From be85631471ebf8009ba4b20142fdca9599b3af93 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 4 Aug 2020 16:08:43 -0700 Subject: [PATCH 01/56] travis: fix spurious VM crashes Every few builds or so, a vmtest VM crashes after printing "x86: Booting SMP configuration:". After some difficult debugging, I determined that the crash happens in arch/x86/realmode/rm/trampoline_64.S (the code that initializes secondary CPUs) at the ljmp from startup_32 to startup_64. The real problem happens earlier in startup_32: movl $pa_trampoline_pgd, %eax movl %eax, %cr3 Sometimes, the store to CR3 "fails" and CR3 remains zero, which causes the later ljmp to triple fault. This can be reproduced by the following script: #!/bin/sh curl -L 'https://www.dropbox.com/sh/2mcf2xvg319qdaw/AABFKsISWRpndNZ1gz60O-qSa/x86_64/vmlinuz-5.8.0-rc7-vmtest1?dl=1' -o vmlinuz cat > commands.gdb << "EOF" set confirm off target remote :1234 # arch/x86/realmode/rm/trampoline_64.S:startup_32 after CR3 store. hbreak *0x9ae09 if $cr3 == 0 command info registers eax cr3 quit 1 end # kernel/smp.c:smp_init() after all CPUs have been brought up. If we get here, # the bug wasn't triggered. hbreak *0xffffffff81ed4484 command kill quit 0 end continue EOF while true; do qemu-system-x86_64 -cpu host -enable-kvm -smp 64 -m 128M \ -nodefaults -display none -serial file:/dev/stdout -no-reboot \ -kernel vmlinuz -append 'console=0,115200 panic=-1 nokaslr' \ -s -S & gdb -batch -x commands.gdb || exit 1 done This seems to be a problem with nested virtualization that was fixed by Linux kernel commit b4d185175bc1 ("KVM: VMX: give unrestricted guest full control of CR3") (in v4.17). Apparently, the Google Cloud hosts that Travis runs on are missing this fix. We obviously can't patch those hosts, but we can work around it. Disabling unrestricted guest support in the Travis VM causes CR3 stores in the nested vmtest VM to be emulated, bypassing the bug. Signed-off-by: Omar Sandoval --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.travis.yml b/.travis.yml index 31f9a4dfd..71b06d744 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,16 @@ python: - '3.7' - '3.6' install: + # If the host is running a kernel without Linux kernel commit b4d185175bc1 + # ("KVM: VMX: give unrestricted guest full control of CR3") (in v4.17), then + # stores to CR3 in the nested guest can spuriously fail and cause it to + # crash. We can work around this by disabling unrestricted guest support. + - | + if grep -q '^flags\b.*\bvmx\b' /proc/cpuinfo; then + echo "options kvm_intel unrestricted_guest=N" | sudo tee /etc/modprobe.d/kvm-cr3-workaround.conf > /dev/null + sudo modprobe -r kvm_intel + sudo modprobe kvm_intel + fi # Upstream defaults to world-read-writeable /dev/kvm. Debian/Ubuntu override # this; see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=892945. We want # the upstream default. From b8aa2dcfc592f95f9ad6e8995fec09294581792f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Aug 2020 22:51:56 -0700 Subject: [PATCH 02/56] drgndoc: format None, True, and False as roles Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/format.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index efa162887..d59bb3b9b 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -10,6 +10,10 @@ from drgndoc.visitor import NodeVisitor +def _is_name_constant(node: ast.Constant) -> bool: + return node.value is None or node.value is True or node.value is False + + class _FormatVisitor(NodeVisitor): def __init__( self, @@ -53,11 +57,16 @@ def visit_Constant( if node.value is ...: self._parts.append("...") else: + obj = self._rst and _is_name_constant(node) quote = self._rst and not isinstance(node.value, (int, float)) - if quote: + if obj: + self._parts.append(":py:obj:`") + elif quote: self._parts.append("``") self._parts.append(repr(node.value)) - if quote: + if obj: + self._parts.append("`") + elif quote: self._parts.append("``") def _append_resolved_name(self, name: str) -> None: @@ -113,6 +122,10 @@ def visit_Attribute( name_stack.append(value.id) name_stack.reverse() self._append_resolved_name(".".join(name_stack)) + elif isinstance(value, ast.Constant) and _is_name_constant(value): + name_stack.append(repr(value.value)) + name_stack.reverse() + self._append_resolved_name(".".join(name_stack)) elif isinstance(value, ast.Constant) and not isinstance( value.value, (type(...), int, float) ): From 2345325ac1e4d18c6ca295b46a1861642045c075 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Aug 2020 23:18:42 -0700 Subject: [PATCH 03/56] drgndoc: handle implicit classmethods The __init_subclass__ and __class_getitem__ methods are always class methods even if not decorated as such, so format them accordingly. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/ext.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 5d6779f75..7851754bf 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -199,7 +199,10 @@ def _run( if node.async_: contents.append(" :async:", sourcename) if resolved.class_: - if node.have_decorator("classmethod"): + if node.have_decorator("classmethod") or argument in ( + "__init_subclass__", + "__class_getitem__", + ): contents.append(" :classmethod:", sourcename) if node.have_decorator("staticmethod"): contents.append(" :staticmethod:", sourcename) From 66c5cc83a65ca74c131252d10fbbdb6838425ee9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 13 Aug 2020 14:47:59 -0700 Subject: [PATCH 04/56] Add IntegerLike type annotation Lots if interfaces in drgn transparently turn an integer Object into an int by using __index__(), so add an IntegerLike protocol for this and use it everywhere applicable. Signed-off-by: Omar Sandoval --- _drgn.pyi | 60 +++++++++++++++++++++++++++++------------ docs/api_reference.rst | 1 + drgn/__init__.py | 2 ++ libdrgn/python/module.c | 27 ++++++++++++++++++- 4 files changed, 72 insertions(+), 18 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 0dccb1251..7db4152b5 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -9,6 +9,7 @@ Don't use this module directly. Instead, use the drgn package. import enum import os +import sys from typing import ( Any, Callable, @@ -21,6 +22,24 @@ from typing import ( overload, ) +if sys.version_info < (3, 8): + from typing_extensions import Protocol +else: + from typing import Protocol + +# This is effectively typing.SupportsIndex without @typing.runtime_checkable +# (both of which are only available since Python 3.8), with a more +# self-explanatory name. +class IntegerLike(Protocol): + """ + An :class:`int` or integer-like object. + + Parameters annotated with this type expect an integer which may be given as + a Python :class:`int` or an :class:`Object` with integer type. + """ + + def __index__(self) -> int: ... + class Program: """ A ``Program`` represents a crashed or running program. It can be used to @@ -152,7 +171,7 @@ class Program: """ ... # address_or_name is positional-only. - def symbol(self, address_or_name: Union[int, str]) -> Symbol: + def symbol(self, address_or_name: Union[IntegerLike, str]) -> Symbol: """ Get the symbol containing the given address, or the global symbol with the given name. @@ -162,7 +181,12 @@ class Program: the given name """ ... - def stack_trace(self, thread: Union[Object, int]) -> StackTrace: + def stack_trace( + self, + # Object is already IntegerLike, but this explicitly documents that it + # can take non-integer Objects. + thread: Union[Object, IntegerLike], + ) -> StackTrace: """ Get the stack trace for the given thread in the program. @@ -216,7 +240,9 @@ class Program: :param lang: :attr:`Type.language` """ ... - def read(self, address: int, size: int, physical: bool = False) -> bytes: + def read( + self, address: IntegerLike, size: IntegerLike, physical: bool = False + ) -> bytes: """ Read *size* bytes of memory starting at *address* in the program. The address may be virtual (the default) or physical if the program @@ -236,11 +262,11 @@ class Program: :raises ValueError: if *size* is negative """ ... - def read_u8(self, address: int, physical: bool = False) -> int: ... - def read_u16(self, address: int, physical: bool = False) -> int: ... - def read_u32(self, address: int, physical: bool = False) -> int: ... - def read_u64(self, address: int, physical: bool = False) -> int: ... - def read_word(self, address: int, physical: bool = False) -> int: + def read_u8(self, address: IntegerLike, physical: bool = False) -> int: ... + def read_u16(self, address: IntegerLike, physical: bool = False) -> int: ... + def read_u32(self, address: IntegerLike, physical: bool = False) -> int: ... + def read_u64(self, address: IntegerLike, physical: bool = False) -> int: ... + def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ Read an unsigned integer from the program's memory in the program's byte order. @@ -262,8 +288,8 @@ class Program: ... def add_memory_segment( self, - address: int, - size: int, + address: IntegerLike, + size: IntegerLike, read_fn: Callable[[int, int, int, bool], bytes], physical: bool = False, ) -> None: @@ -659,10 +685,10 @@ class Object: type: Union[str, Type, None] = None, value: Any = None, *, - address: Optional[int] = None, + address: Optional[IntegerLike] = None, byteorder: Optional[str] = None, - bit_offset: Optional[int] = None, - bit_field_size: Optional[int] = None, + bit_offset: Optional[IntegerLike] = None, + bit_field_size: Optional[IntegerLike] = None, ) -> None: ... prog_: Program """Program that this object is from.""" @@ -703,7 +729,7 @@ class Object: :param name: Attribute name. """ ... - def __getitem__(self, idx: Union[int, Object]) -> Object: + def __getitem__(self, idx: IntegerLike) -> Object: """ Implement ``self[idx]``. Get the array element at the given index. @@ -817,7 +843,7 @@ class Object: def format_( self, *, - columns: Optional[int] = None, + columns: Optional[IntegerLike] = None, dereference: Optional[bool] = None, symbolize: Optional[bool] = None, string: Optional[bool] = None, @@ -1040,7 +1066,7 @@ class StackTrace: default. """ - def __getitem__(self, idx: int) -> StackFrame: ... + def __getitem__(self, idx: IntegerLike) -> StackFrame: ... class StackFrame: """ @@ -1066,7 +1092,7 @@ class StackFrame: instruction instead of the return address. """ ... - def register(self, reg: Union[str, int, Register]) -> int: + def register(self, reg: Union[str, IntegerLike, Register]) -> int: """ Get the value of the given register at this stack frame. The register can be specified by name (e.g., ``'rax'``), number (see diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 0b57d0d07..55476a895 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -116,6 +116,7 @@ Miscellaneous .. drgndoc:: sizeof .. drgndoc:: execscript +.. drgndoc:: IntegerLike Exceptions ---------- diff --git a/drgn/__init__.py b/drgn/__init__.py index 69f87611d..eadf3f303 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -49,6 +49,7 @@ Architecture, FaultError, FindObjectFlags, + IntegerLike, Language, MissingDebugInfoError, NULL, @@ -99,6 +100,7 @@ "Architecture", "FaultError", "FindObjectFlags", + "IntegerLike", "Language", "MissingDebugInfoError", "NULL", diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 43f182213..837ff2340 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -148,6 +148,31 @@ static struct PyModuleDef drgnmodule = { drgn_methods, }; +/* + * These are for type checking and aren't strictly required at runtime, but + * adding them anyways results in better pydoc output and saves us from fiddling + * with typing.TYPE_CHECKING/forward references. + */ +static int add_type_aliases(PyObject *m) +{ + /* + * This should be a subclass of typing.Protocol, but that is only + * available since Python 3.8. + */ + PyObject *IntegerLike = PyType_FromSpec(&(PyType_Spec){ + .name = "_drgn.IntegerLike", + .flags = Py_TPFLAGS_DEFAULT, + .slots = (PyType_Slot []){{0, NULL}}, + }); + if (!IntegerLike) + return -1; + if (PyModule_AddObject(m, "IntegerLike", IntegerLike) == -1) { + Py_DECREF(IntegerLike); + return -1; + } + return 0; +} + DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) { PyObject *m; @@ -158,7 +183,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) if (!m) return NULL; - if (add_module_constants(m) == -1) + if (add_module_constants(m) == -1 || add_type_aliases(m) == -1) goto err; FaultError_type.tp_base = (PyTypeObject *)PyExc_Exception; From 2d49ef657b183e6e830161442aca132cb67ed259 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 13 Aug 2020 15:27:20 -0700 Subject: [PATCH 05/56] Add Path type alias Rather than duplicating Union[str, bytes, os.PathLike] everywhere, add an alias. Also make it explicitly os.PathLike[str] or os.PathLike[bytes] to get rid of some mypy --strict errors. Signed-off-by: Omar Sandoval --- _drgn.pyi | 14 +++++++++++--- docs/api_reference.rst | 1 + drgn/__init__.py | 2 ++ libdrgn/python/module.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 7db4152b5..573daed86 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -40,6 +40,14 @@ class IntegerLike(Protocol): def __index__(self) -> int: ... +Path = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] +""" +Filesystem path. + +Parameters annotated with this type accept a filesystem path as :class:`str`, +:class:`bytes`, or :class:`os.PathLike`. +""" + class Program: """ A ``Program`` represents a crashed or running program. It can be used to @@ -342,7 +350,7 @@ class Program: return an :class:`Object`. """ ... - def set_core_dump(self, path: Union[str, bytes, os.PathLike]) -> None: + def set_core_dump(self, path: Path) -> None: """ Set the program to a core dump. @@ -375,7 +383,7 @@ class Program: ... def load_debug_info( self, - paths: Optional[Iterable[Union[str, bytes, os.PathLike]]] = None, + paths: Optional[Iterable[Path]] = None, default: bool = False, main: bool = False, ) -> None: @@ -480,7 +488,7 @@ def filename_matches(haystack: Optional[str], needle: Optional[str]) -> bool: """ ... -def program_from_core_dump(path: Union[str, bytes, os.PathLike]) -> Program: +def program_from_core_dump(path: Path) -> Program: """ Create a :class:`Program` from a core dump file. The type of program (e.g., userspace or kernel) is determined automatically. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 55476a895..f5cd5a0e3 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -117,6 +117,7 @@ Miscellaneous .. drgndoc:: sizeof .. drgndoc:: execscript .. drgndoc:: IntegerLike +.. drgndoc:: Path Exceptions ---------- diff --git a/drgn/__init__.py b/drgn/__init__.py index eadf3f303..8b50da892 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -55,6 +55,7 @@ NULL, Object, OutOfBoundsError, + Path, Platform, PlatformFlags, PrimitiveType, @@ -106,6 +107,7 @@ "NULL", "Object", "OutOfBoundsError", + "Path", "Platform", "PlatformFlags", "PrimitiveType", diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 837ff2340..52d24878d 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -170,6 +170,41 @@ static int add_type_aliases(PyObject *m) Py_DECREF(IntegerLike); return -1; } + + PyObject *os_module = PyImport_ImportModule("os"); + if (!os_module) + return -1; + PyObject *os_PathLike = PyObject_GetAttrString(os_module, "PathLike"); + Py_DECREF(os_module); + if (!os_PathLike) + return -1; + PyObject *item = Py_BuildValue("OOO", &PyUnicode_Type, &PyBytes_Type, + os_PathLike); + Py_DECREF(os_PathLike); + if (!item) + return -1; + + PyObject *typing_module = PyImport_ImportModule("typing"); + if (!typing_module) { + Py_DECREF(item); + return -1; + } + PyObject *typing_Union = PyObject_GetAttrString(typing_module, "Union"); + Py_DECREF(typing_module); + if (!typing_Union) { + Py_DECREF(item); + return -1; + } + + PyObject *Path = PyObject_GetItem(typing_Union, item); + Py_DECREF(typing_Union); + Py_DECREF(item); + if (!Path) + return -1; + if (PyModule_AddObject(m, "Path", Path) == -1) { + Py_DECREF(Path); + return -1; + } return 0; } From 4a3b8fb8e6ad8878ad7f6e70c613ff824e3caaf9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 13 Aug 2020 16:03:59 -0700 Subject: [PATCH 06/56] drgndoc: fix mypy --strict errors Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/docstrings.py | 2 +- docs/exts/drgndoc/ext.py | 6 +++--- docs/exts/drgndoc/parse.py | 32 ++++++++++++++++++++++---------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index a5a6d7270..21076d29f 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -42,7 +42,7 @@ escapes.append(e) -def escape_string(s): +def escape_string(s: str) -> str: return "".join([escapes[c] for c in s.encode("utf-8")]) diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 7851754bf..6acf60f76 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -51,7 +51,7 @@ import sphinx.util.docutils import sphinx.util.logging import sphinx.util.nodes -from typing import List, cast +from typing import Any, Dict, List, cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode @@ -105,7 +105,7 @@ class DrgnDocDirective(sphinx.util.docutils.SphinxDirective): "exclude": docutils.parsers.rst.directives.unchanged, } - def run(self) -> List[docutils.nodes.Node]: + def run(self) -> Any: parts = [] py_module = self.env.ref_context.get("py:module") if py_module: @@ -278,7 +278,7 @@ def _run_module( del self.env.ref_context["py:module"] -def setup(app: sphinx.application.Sphinx) -> dict: +def setup(app: sphinx.application.Sphinx) -> Dict[str, Any]: app.connect("builder-inited", drgndoc_init) # List of modules or packages. app.add_config_value("drgndoc_paths", [], "env") diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index 4b272cfbc..178057af7 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -24,28 +24,40 @@ class _PreTransformer(ast.NodeTransformer): # Replace string forward references with the parsed expression. - def _visit_annotation(self, node): + @overload + def _visit_annotation(self, node: ast.expr) -> ast.expr: + ... + + @overload + def _visit_annotation(self, node: None) -> None: + ... + + def _visit_annotation(self, node: Optional[ast.expr]) -> Optional[ast.expr]: if isinstance(node, ast.Constant) and isinstance(node.value, str): - node = self.visit(ast.parse(node.value, "", "eval")) + node = self.visit( + cast(ast.Expression, ast.parse(node.value, "", "eval")).body + ) return node - def visit_arg(self, node): - node = self.generic_visit(node) + def visit_arg(self, node: ast.arg) -> ast.arg: + node = cast(ast.arg, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node - def visit_FunctionDef(self, node): - node = self.generic_visit(node) + def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef: + node = cast(ast.FunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node - def visit_AsyncFunctionDef(self, node): - node = self.generic_visit(node) + def visit_AsyncFunctionDef( + self, node: ast.AsyncFunctionDef + ) -> ast.AsyncFunctionDef: + node = cast(ast.AsyncFunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node - def visit_AnnAssign(self, node): - node = self.generic_visit(node) + def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AnnAssign: + node = cast(ast.AnnAssign, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node From a270525f8be8d6f652d5308009d63667d7244faa Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Aug 2020 16:46:18 -0700 Subject: [PATCH 07/56] drgndoc: save all modules and classes traversed to resolve name This will be used to support relative imports. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/ext.py | 10 +-- docs/exts/drgndoc/format.py | 32 ++++----- docs/exts/drgndoc/namespace.py | 121 +++++++++++++-------------------- 3 files changed, 70 insertions(+), 93 deletions(-) diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 6acf60f76..3627f7e2b 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -170,17 +170,17 @@ def _run( ) sourcename = "" - if resolved.module and resolved.module.node.path: - sourcename = resolved.module.node.path + if resolved.modules and resolved.modules[-1].node.path: + sourcename = resolved.modules[-1].node.path if sourcename: self.env.note_dependency(sourcename) if isinstance(node, Class): directive = "py:class" elif isinstance(node, Function): - directive = "py:method" if resolved.class_ else "py:function" + directive = "py:method" if resolved.classes else "py:function" elif isinstance(node, Variable): - directive = "py:attribute" if resolved.class_ else "py:data" + directive = "py:attribute" if resolved.classes else "py:data" else: assert False, type(node).__name__ @@ -198,7 +198,7 @@ def _run( if isinstance(node, Function): if node.async_: contents.append(" :async:", sourcename) - if resolved.class_: + if resolved.classes: if node.have_decorator("classmethod") or argument in ( "__init_subclass__", "__class_getitem__", diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index d59bb3b9b..66071cd8e 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -19,15 +19,15 @@ def __init__( self, namespace: Namespace, substitutions: Sequence[Tuple[Pattern[str], Any]], - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], ) -> None: self._namespace = namespace self._substitutions = substitutions - self._module = module - self._class = class_ + self._modules = modules + self._classes = classes self._context_module = context_module self._context_class = context_class self._parts: List[str] = [] @@ -74,7 +74,7 @@ def _append_resolved_name(self, name: str) -> None: self._parts.append(":py:obj:`") resolved = self._namespace.resolve_name_in_scope( - self._module, self._class, name + self._modules, self._classes, name ) if isinstance(resolved, ResolvedNode): target = resolved.qualified_name() @@ -231,8 +231,8 @@ def _add_class_info( visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.module, - resolved.class_, + resolved.modules, + resolved.classes, context_module, context_class, ) @@ -271,8 +271,8 @@ def _add_function_info( visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.module, - resolved.class_, + resolved.modules, + resolved.classes, context_module, context_class, ) @@ -333,7 +333,7 @@ def visit_arg( ] else: default = None - if i == 0 and resolved.class_ and not node.have_decorator("staticmethod"): + if i == 0 and resolved.classes and not node.have_decorator("staticmethod"): # Skip self for methods and cls for class methods. continue visit_arg(arg, default) @@ -384,8 +384,8 @@ def _add_variable_info( visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.module, - resolved.class_, + resolved.modules, + resolved.classes, context_module, context_class, ) @@ -402,10 +402,10 @@ def format( context_class: Optional[str] = None, rst: bool = True, ) -> Tuple[str, List[str]]: - if context_module is None and resolved.module: - context_module = resolved.module.name - if context_class is None and resolved.class_: - context_class = resolved.class_.name + if context_module is None and resolved.modules: + context_module = ".".join([module.name for module in resolved.modules]) + if context_class is None and resolved.classes: + context_module = ".".join([class_.name for class_ in resolved.classes]) node = resolved.node lines = node.docstring.splitlines() if node.docstring else [] diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index cf2602853..d76da7ed0 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -8,6 +8,7 @@ List, Mapping, Optional, + Sequence, TypeVar, Union, ) @@ -37,49 +38,46 @@ def __init__(self, name: str, node: NodeT_co) -> None: class ResolvedNode(Generic[NodeT_co]): def __init__( self, - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], name: str, node: NodeT_co, ) -> None: - self.module = module - self.class_ = class_ + self.modules = modules + self.classes = classes self.name = name self.node = node def qualified_name(self) -> str: - return dot_join( - self.module.name if self.module else None, - self.class_.name if self.class_ else None, - self.name, + return ".".join( + itertools.chain( + (module.name for module in self.modules), + (class_.name for class_ in self.classes), + (self.name,), + ) ) def attrs(self) -> Iterator["ResolvedNode[Node]"]: if isinstance(self.node, Module): - module_name = dot_join(self.module.name if self.module else None, self.name) + modules = list(self.modules) + modules.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): - yield ResolvedNode(BoundNode(module_name, self.node), None, attr, node) + yield ResolvedNode(modules, self.classes, attr, node) elif isinstance(self.node, Class): - class_name = dot_join(self.class_.name if self.class_ else None, self.name) + classes = list(self.classes) + classes.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): - yield ResolvedNode( - self.module, BoundNode(class_name, self.node), attr, node - ) + yield ResolvedNode(self.modules, classes, attr, node) def attr(self, attr: str) -> "ResolvedNode[Node]": if isinstance(self.node, Module): - module_name = dot_join(self.module.name if self.module else None, self.name) - return ResolvedNode( - BoundNode(module_name, self.node), None, attr, self.node.attrs[attr] - ) + modules = list(self.modules) + modules.append(BoundNode(self.name, self.node)) + return ResolvedNode(modules, self.classes, attr, self.node.attrs[attr]) elif isinstance(self.node, Class): - class_name = dot_join(self.class_.name if self.class_ else None, self.name) - return ResolvedNode( - self.module, - BoundNode(class_name, self.node), - attr, - self.node.attrs[attr], - ) + classes = list(self.classes) + classes.append(BoundNode(self.name, self.node)) + return ResolvedNode(self.modules, classes, attr, self.node.attrs[attr]) else: raise KeyError(attr) @@ -91,30 +89,20 @@ class Namespace: def __init__(self, modules: Mapping[str, Module]) -> None: self.modules = modules + # NB: this modifies the passed lists. def _resolve_name( self, - module_name: Optional[str], - module: Optional[Module], - class_name: Optional[str], - class_: Optional[Class], + modules: List[BoundNode[Module]], + classes: List[BoundNode[Class]], name_components: List[str], ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: - assert (module_name is None) == (module is None) - assert (class_name is None) == (class_ is None) - module_name_parts = [] - if module_name is not None: - module_name_parts.append(module_name) - class_name_parts = [] - if class_name is not None: - class_name_parts.append(class_name) - name_components.reverse() while name_components: attrs: Mapping[str, Node] - if class_: - attrs = class_.attrs - elif module: - attrs = module.attrs + if classes: + attrs = classes[-1].node.attrs + elif modules: + attrs = modules[-1].node.attrs else: attrs = self.modules name = name_components.pop() @@ -124,10 +112,8 @@ def _resolve_name( break if isinstance(node, (Import, ImportFrom)): - module_name_parts.clear() - class_name_parts.clear() - module = None - class_ = None + modules.clear() + classes.clear() if isinstance(node, Import): import_name = node.module elif isinstance(node, ImportFrom): @@ -138,51 +124,42 @@ def _resolve_name( name_components.extend(reversed(import_name.split("."))) elif name_components: if isinstance(node, Module): - assert not class_ - module = node - module_name_parts.append(name) + assert not classes + modules.append(BoundNode(name, node)) elif isinstance(node, Class): - class_ = node - class_name_parts.append(name) + classes.append(BoundNode(name, node)) else: break else: assert isinstance(node, (Module, Class, Function, Variable)) - return ResolvedNode( - BoundNode(".".join(module_name_parts), module) if module else None, - BoundNode(".".join(class_name_parts), class_) if class_ else None, - name, - node, - ) + return ResolvedNode(modules, classes, name, node) return ".".join( itertools.chain( - module_name_parts, class_name_parts, (name,), reversed(name_components) + (module.name for module in modules), + (class_.name for class_ in classes), + (name,), + reversed(name_components), ) ) def resolve_global_name( self, name: str ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: - return self._resolve_name(None, None, None, None, name.split(".")) + return self._resolve_name([], [], name.split(".")) def resolve_name_in_scope( self, - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], name: str, ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: name_components = name.split(".") attr = name_components[0] - if class_ and attr in class_.node.attrs: - pass - elif module and attr in module.node.attrs: - class_ = None + if classes and attr in classes[-1].node.attrs: + classes = list(classes) + elif modules and attr in modules[-1].node.attrs: + classes = [] else: return name - return self._resolve_name( - module.name if module else None, - module.node if module else None, - class_.name if class_ else None, - class_.node if class_ else None, - name_components, - ) + modules = list(modules) + return self._resolve_name(modules, classes, name_components) From 192d35c60987c2d0ca0b7b9c815d4e00af2803c4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Aug 2020 17:45:23 -0700 Subject: [PATCH 08/56] drgndoc: support relative imports Mainly for completeness, as I don't really like using them in my own projects. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/namespace.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index d76da7ed0..d11a44460 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -112,16 +112,18 @@ def _resolve_name( break if isinstance(node, (Import, ImportFrom)): - modules.clear() classes.clear() if isinstance(node, Import): - import_name = node.module + modules.clear() elif isinstance(node, ImportFrom): - if node.module is None or node.level != 0: - raise NotImplementedError("TODO: relative imports") - import_name = node.module + if node.level >= len(modules): + # Relative import beyond top-level package. Bail. + break + # Absolute import is level 0, which clears the whole list. + del modules[-node.level :] name_components.append(node.name) - name_components.extend(reversed(import_name.split("."))) + if node.module is not None: + name_components.extend(reversed(node.module.split("."))) elif name_components: if isinstance(node, Module): assert not classes From f41cc7fb48d550d72de0f87022f6869f32d5c111 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 15 Aug 2020 10:27:02 -0700 Subject: [PATCH 09/56] drgndoc: recursively document names imported with alias The helpers implemented in C have Python wrappers only for the purpose of documentation. This is because drgndoc ignores all imports when recursively documenting attributes. However, mypy uses the convention that aliased imports (i.e., import ... as ... or from ... import ... as ...) are considered re-exported, so we can follow that convention and include aliased imports. (mypy also considered attributes in __all__ as re-exported, so we should probably follow that in the future, too, but for now aliased imports are enough). This lets us get rid of the Python wrappers. Signed-off-by: Omar Sandoval --- _drgn.pyi | 83 +++++++++++++++++++++++++++++---- docs/exts/drgndoc/ext.py | 28 +++++++---- docs/exts/drgndoc/parse.py | 14 ++++-- drgn/helpers/linux/boot.py | 24 ++-------- drgn/helpers/linux/idr.py | 12 +---- drgn/helpers/linux/pid.py | 37 ++------------- drgn/helpers/linux/radixtree.py | 12 +---- drgn/helpers/linux/sched.py | 16 +------ 8 files changed, 114 insertions(+), 112 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 573daed86..242e09525 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1671,11 +1671,78 @@ class OutOfBoundsError(Exception): _with_libkdumpfile: bool def _linux_helper_read_vm(prog, pgtable, address, size): ... -def _linux_helper_radix_tree_lookup(root, index): ... -def _linux_helper_idr_find(idr, id): ... -def _linux_helper_find_pid(ns, pid): ... -def _linux_helper_pid_task(pid, pid_type): ... -def _linux_helper_find_task(ns, pid): ... -def _linux_helper_task_state_to_char(task): ... -def _linux_helper_kaslr_offset(prog): ... -def _linux_helper_pgtable_l5_enabled(prog): ... +def _linux_helper_radix_tree_lookup(root, index): + """ + .. c:function:: void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) + + Look up the entry at a given index in a radix tree. If it is not found, + this returns a ``NULL`` object. + """ + ... + +def _linux_helper_idr_find(idr, id): + """ + .. c:function:: void *idr_find(struct idr *idr, unsigned long id) + + Look up the entry with the given id in an IDR. If it is not found, this + returns a ``NULL`` object. + """ + ... + +def _linux_helper_find_pid(ns, pid): + """ + .. c:function:: struct pid *find_pid(struct pid_namespace *ns, int nr) + + Return the ``struct pid *`` for the given PID number in the given + namespace. If given a :class:`Program` instead, the initial PID namespace + is used. + """ + ... + +def _linux_helper_pid_task(pid, pid_type): + """ + .. c:function:: struct task_struct *pid_task(struct pid *pid, enum pid_type pid_type) + + Return the ``struct task_struct *`` containing the given ``struct pid *`` + of the given type. + """ + ... + +def _linux_helper_find_task(prog_or_ns, pid): + """ + .. c:function:: struct task_struct *find_task(struct pid_namespace *ns, int pid) + + Return the task with the given PID in the given namespace. If given a + :class:`Program` instead, the initial PID namespace is used. + """ + ... + +def _linux_helper_task_state_to_char(task): + """ + .. c:function char task_state_to_char(struct task_struct *task) + + Get the state of the task as a character (e.g., ``'R'`` for running). See + `ps(1) + `_ for + a description of the process state codes. + + :rtype: str + """ + ... + +def _linux_helper_kaslr_offset(prog): + """ + .. c:function:: unsigned long kaslr_offset(void) + + Get the kernel address space layout randomization offset (zero if it is + disabled). + """ + ... + +def _linux_helper_pgtable_l5_enabled(prog): + """ + .. c:function:: bool pgtable_l5_enabled(void) + + Return whether 5-level paging is enabled. + """ + ... diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 3627f7e2b..ea63d85ac 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -124,22 +124,18 @@ def run(self) -> Any: self._run(name, "", resolved, docnode) return docnode.children - def _include_attr(self, attr: ResolvedNode[Node], attr_name: str) -> bool: + def _include_attr(self, attr_name: str) -> bool: """ Return whether the given recursive attribute should be documented. - We recursively include nodes that are: - 1. Not imports. - 2. Match the "include" pattern OR don't start with an underscore. + We recursively include nodes that: + 1. Match the "include" pattern OR don't start with an underscore. AND - 3. Do not match the "exclude" pattern. + 2. Do not match the "exclude" pattern. The "include" and "exclude" patterns are applied to the name relative to the object being documented by the directive. """ - if isinstance(attr.node, (Import, ImportFrom)): - return False - if not attr_name: return True @@ -158,8 +154,22 @@ def _run( resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: - if not self._include_attr(resolved, attr_name): + if not self._include_attr(attr_name): return + + if isinstance(resolved.node, (Import, ImportFrom)): + # Only include imports that are explicitly aliased (i.e., import + # ... as ... or from ... import ... as ...). + # TODO: we should also include imports listed in __all__. + if not resolved.node.aliased: + return + imported = self.env.drgndoc_namespace.resolve_name_in_scope( + resolved.modules, resolved.classes, resolved.name + ) + if not isinstance(imported, ResolvedNode): + return + resolved = imported + resolved = cast(ResolvedNode[DocumentedNode], resolved) node = resolved.node diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index 178057af7..68f9dd867 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -135,15 +135,19 @@ def __init__( class Import: - def __init__(self, module: str) -> None: + def __init__(self, module: str, aliased: bool) -> None: self.module = module + self.aliased = aliased class ImportFrom: - def __init__(self, name: str, module: Optional[str], level: int) -> None: + def __init__( + self, name: str, module: Optional[str], level: int, aliased: bool + ) -> None: self.name = name self.module = module self.level = level + self.aliased = aliased Node = Union[Module, Class, Function, Variable, Import, ImportFrom] @@ -270,7 +274,7 @@ def visit_Import( else: name = alias.asname module_name = alias.name - self._attrs[name] = Import(module_name) + self._attrs[name] = Import(module_name, alias.asname is not None) def visit_ImportFrom( self, @@ -280,7 +284,9 @@ def visit_ImportFrom( ) -> None: for alias in node.names: name = alias.name if alias.asname is None else alias.asname - self._attrs[name] = ImportFrom(alias.name, node.module, node.level) + self._attrs[name] = ImportFrom( + alias.name, node.module, node.level, alias.asname is not None + ) def parse_source( diff --git a/drgn/helpers/linux/boot.py b/drgn/helpers/linux/boot.py index 7d064960d..b43984059 100644 --- a/drgn/helpers/linux/boot.py +++ b/drgn/helpers/linux/boot.py @@ -9,29 +9,13 @@ Linux kernel boot configuration. """ -from _drgn import _linux_helper_kaslr_offset, _linux_helper_pgtable_l5_enabled +from _drgn import ( + _linux_helper_kaslr_offset as kaslr_offset, + _linux_helper_pgtable_l5_enabled as pgtable_l5_enabled, +) __all__ = ( "kaslr_offset", "pgtable_l5_enabled", ) - - -def kaslr_offset(prog): - """ - .. c:function:: unsigned long kaslr_offset(void) - - Get the kernel address space layout randomization offset (zero if it is - disabled). - """ - return _linux_helper_kaslr_offset(prog) - - -def pgtable_l5_enabled(prog): - """ - .. c:function:: bool pgtable_l5_enabled(void) - - Return whether 5-level paging is enabled. - """ - return _linux_helper_pgtable_l5_enabled(prog) diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index 76bd521c5..c7d4b8757 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -12,7 +12,7 @@ """ from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup -from _drgn import _linux_helper_idr_find +from _drgn import _linux_helper_idr_find as idr_find __all__ = ( @@ -21,16 +21,6 @@ ) -def idr_find(idr, id): - """ - .. c:function:: void *idr_find(struct idr *idr, unsigned long id) - - Look up the entry with the given id in an IDR. If it is not found, this - returns a ``NULL`` object. - """ - return _linux_helper_idr_find(idr, id) - - def idr_for_each(idr): """ .. c:function:: idr_for_each(struct idr *idr) diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 1ff744f19..059912301 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -13,9 +13,9 @@ from drgn.helpers.linux.idr import idr_find, idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry from _drgn import ( - _linux_helper_find_pid, - _linux_helper_find_task, - _linux_helper_pid_task, + _linux_helper_find_pid as find_pid, + _linux_helper_find_task as find_task, + _linux_helper_pid_task as pid_task, ) __all__ = ( @@ -27,17 +27,6 @@ ) -def find_pid(prog_or_ns, nr): - """ - .. c:function:: struct pid *find_pid(struct pid_namespace *ns, int nr) - - Return the ``struct pid *`` for the given PID number in the given - namespace. If given a :class:`Program` instead, the initial PID namespace - is used. - """ - return _linux_helper_find_pid(prog_or_ns, nr) - - def for_each_pid(prog_or_ns): """ .. c:function:: for_each_pid(struct pid_namespace *ns) @@ -66,26 +55,6 @@ def for_each_pid(prog_or_ns): yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") -def pid_task(pid, pid_type): - """ - .. c:function:: struct task_struct *pid_task(struct pid *pid, enum pid_type pid_type) - - Return the ``struct task_struct *`` containing the given ``struct pid *`` - of the given type. - """ - return _linux_helper_pid_task(pid, pid_type) - - -def find_task(prog_or_ns, pid): - """ - .. c:function:: struct task_struct *find_task(struct pid_namespace *ns, int pid) - - Return the task with the given PID in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. - """ - return _linux_helper_find_task(prog_or_ns, pid) - - def for_each_task(prog_or_ns): """ .. c:function:: for_each_task(struct pid_namespace *ns) diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index 2796c6bbc..9e1433c65 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -10,7 +10,7 @@ """ from drgn import Object, cast -from _drgn import _linux_helper_radix_tree_lookup +from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup __all__ = ( @@ -38,16 +38,6 @@ def _radix_tree_root_node(root): return cast("struct xa_node *", node).read_(), 2 -def radix_tree_lookup(root, index): - """ - .. c:function:: void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) - - Look up the entry at a given index in a radix tree. If it is not found, - this returns a ``NULL`` object. - """ - return _linux_helper_radix_tree_lookup(root, index) - - def radix_tree_for_each(root): """ .. c:function:: radix_tree_for_each(struct radix_tree_root *root) diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index 85fdd74e9..88a9cf348 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -9,21 +9,7 @@ Linux CPU scheduler. """ -from _drgn import _linux_helper_task_state_to_char +from _drgn import _linux_helper_task_state_to_char as task_state_to_char __all__ = ("task_state_to_char",) - - -def task_state_to_char(task): - """ - .. c:function char task_state_to_char(struct task_struct *task) - - Get the state of the task as a character (e.g., ``'R'`` for running). See - `ps(1) - `_ for - a description of the process state codes. - - :rtype: str - """ - return _linux_helper_task_state_to_char(task) From 64a04a6c4fce65f9f8e60bf09d14e0c3c95f8169 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 16 Aug 2020 20:40:47 -0700 Subject: [PATCH 10/56] drgndoc: include attributes based on presence of docstring We can get rid of the :include: and :exclude: options by deciding solely based on whether a node has a docstring. Empty docstrings can be used to indicate nodes that should be included with no additional content. The __init__() method must now also have a docstring in order to be documented. Additionally, the directives are now fully formatted by the Formatter rather than being split between the Formatter and DrgnDocDirective. Signed-off-by: Omar Sandoval --- _drgn.pyi | 169 +++++++++++++++-------- docs/api_reference.rst | 2 - docs/exts/drgndoc/docstrings.py | 15 +- docs/exts/drgndoc/ext.py | 107 ++++---------- docs/exts/drgndoc/format.py | 237 +++++++++++++++++++++----------- 5 files changed, 302 insertions(+), 228 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 242e09525..4dffcb24d 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -56,15 +56,18 @@ class Program: The main functionality of a ``Program`` is looking up objects (i.e., variables, constants, or functions). This is usually done with the :meth:`[] <.__getitem__>` operator. - - This class can be constructed directly, but it is usually more convenient - to use one of the :ref:`api-program-constructors`. - - :param platform: The platform of the program, or ``None`` if it should be - determined automatically when a core dump or symbol file is added. """ - def __init__(self, platform: Optional[Platform] = None) -> None: ... + def __init__(self, platform: Optional[Platform] = None) -> None: + """ + This class can be constructed directly, but it is usually more + convenient to use one of the :ref:`api-program-constructors`. + + :param platform: The platform of the program, or ``None`` if it should + be determined automatically when a core dump or symbol file is + added. + """ + ... flags: ProgramFlags """Flags which apply to this program.""" @@ -270,10 +273,18 @@ class Program: :raises ValueError: if *size* is negative """ ... - def read_u8(self, address: IntegerLike, physical: bool = False) -> int: ... - def read_u16(self, address: IntegerLike, physical: bool = False) -> int: ... - def read_u32(self, address: IntegerLike, physical: bool = False) -> int: ... - def read_u64(self, address: IntegerLike, physical: bool = False) -> int: ... + def read_u8(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u16(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u32(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u64(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ Read an unsigned integer from the program's memory in the program's @@ -467,9 +478,13 @@ class FindObjectFlags(enum.Flag): """ CONSTANT = ... + "" FUNCTION = ... + "" VARIABLE = ... + "" ANY = ... + "" def filename_matches(haystack: Optional[str], needle: Optional[str]) -> bool: """ @@ -518,15 +533,17 @@ class Platform: """ A ``Platform`` represents the environment (i.e., architecture and ABI) that a program runs on. - - :param arch: :attr:`Platform.arch` - :param flags: :attr:`Platform.flags`; if ``None``, default flags for the - architecture are used. """ def __init__( self, arch: Architecture, flags: Optional[PlatformFlags] = None - ) -> None: ... + ) -> None: + """ + :param arch: :attr:`Platform.arch` + :param flags: :attr:`Platform.flags`; if ``None``, default flags for + the architecture are used. + """ + ... arch: Architecture """Instruction set architecture of this platform.""" @@ -666,25 +683,6 @@ class Object: conflicting with structure, union, or class members. The attributes and methods always take precedence; use :meth:`member_()` if there is a conflict. - - Objects are usually obtained directly from a :class:`Program`, but they can - be constructed manually, as well (for example, if you got a variable - address from a log file). - - :param prog: The program to create this object in. - :param type: The type of the object. If omitted, this is deduced from - *value* according to the language's rules for literals. - :param value: The value of this object. See :meth:`value_()`. - :param address: The address of this object in the program. Either this or - *value* must be given, but not both. - :param byteorder: Byte order of the object. This should be ``'little'`` or - ``'big'``. The default is ``None``, which indicates the program byte - order. This must be ``None`` for primitive values. - :param bit_offset: Offset in bits from the object's address to the - beginning of the object. The default is ``None``, which means no - offset. This must be ``None`` for primitive values. - :param bit_field_size: Size in bits of this object if it is a bit field. - The default is ``None``, which means the object is not a bit field. """ def __init__( @@ -697,7 +695,29 @@ class Object: byteorder: Optional[str] = None, bit_offset: Optional[IntegerLike] = None, bit_field_size: Optional[IntegerLike] = None, - ) -> None: ... + ) -> None: + """ + Objects are usually obtained directly from a :class:`Program`, but they + can be constructed manually, as well (for example, if you got a + variable address from a log file). + + :param prog: The program to create this object in. + :param type: The type of the object. If omitted, this is deduced from + *value* according to the language's rules for literals. + :param value: The value of this object. See :meth:`value_()`. + :param address: The address of this object in the program. Either this + or *value* must be given, but not both. + :param byteorder: Byte order of the object. This should be ``'little'`` + or ``'big'``. The default is ``None``, which indicates the program + byte order. This must be ``None`` for primitive values. + :param bit_offset: Offset in bits from the object's address to the + beginning of the object. The default is ``None``, which means no + offset. This must be ``None`` for primitive values. + :param bit_field_size: Size in bits of this object if it is a bit + field. The default is ``None``, which means the object is not a bit + field. + """ + ... prog_: Program """Program that this object is from.""" @@ -1257,15 +1277,6 @@ class Type: class TypeMember: """ A ``TypeMember`` represents a member of a structure, union, or class type. - - :param type: Type of the member. This may be a :class:`Type` or a callable - that takes no arguments and returns a :class:`Type`. - :param name: Name of the member. This may be ``None`` if the member is - unnamed. - :param bit_offset: Offset of the member from the beginning of the type - in bits. - :param bit_field_size: Size in bits of this member if it is a bit field, - zero otherwise. """ def __init__( @@ -1274,12 +1285,23 @@ class TypeMember: name: Optional[str] = None, bit_offset: int = 0, bit_field_size: int = 0, - ) -> None: ... + ) -> None: + """ + :param type: :attr:`TypeMember.type`; may also be a callable that + takes no arguments and returns a :class:`Type`. + :param name: :attr:`TypeMember.name` + :param bit_offset: :attr:`TypeMember.bit_offset` + :param bit_field_size: :attr:`TypeMember.bit_field_size` + """ + ... type: Type + """Member type.""" name: Optional[str] + """Member name, or ``None`` if the member is unnamed.""" bit_offset: int + """Offset of the member from the beginning of the type in bits.""" offset: int """ @@ -1288,6 +1310,7 @@ class TypeMember: """ bit_field_size: int + """Size in bits of this member if it is a bit field, zero otherwise.""" class TypeEnumerator: """ @@ -1300,15 +1323,19 @@ class TypeEnumerator: >>> name, value = prog.type('enum pid_type').enumerators[0] >>> value 0 - - :param name: Enumerator name. - :param value: Enumerator value. """ - def __init__(self, name: str, value: int) -> None: ... + def __init__(self, name: str, value: int) -> None: + """ + :param name: :attr:`TypeEnumerator.name` + :param value: :attr:`TypeEnumerator.value` + """ + ... name: str + "Enumerator name." value: int + "Enumerator value." def __len__(self) -> int: ... def __getitem__(self, idx: int) -> Any: ... def __iter__(self) -> Iterator[Any]: ... @@ -1316,19 +1343,22 @@ class TypeEnumerator: class TypeParameter: """ A ``TypeParameter`` represents a parameter of a function type. - - :param type: Type of the parameter. This may be a :class:`Type` or a callable - that takes no arguments and returns a :class:`Type`. - :param name: Name of the parameter. This may be ``None`` if the parameter is - unnamed. """ def __init__( self, type: Union[Type, Callable[[], Type]], name: Optional[str] = None - ) -> None: ... + ) -> None: + """ + :param type: :attr:`TypeParameter.type`; may also be a callable that + takes no arguments and returns a :class:`Type`. + :param name: :attr:`TypeParameter.name` + """ + ... type: Type + """Parameter type.""" name: Optional[str] + """Parameter name, or ``None`` if the parameter is unnamed.""" class TypeKind(enum.Enum): """A ``TypeKind`` represents a kind of type.""" @@ -1376,23 +1406,41 @@ class PrimitiveType(enum.Enum): """A ``PrimitiveType`` represents a primitive type known to drgn.""" C_VOID = ... + "" C_CHAR = ... + "" C_SIGNED_CHAR = ... + "" C_UNSIGNED_CHAR = ... + "" C_SHORT = ... + "" C_UNSIGNED_SHORT = ... + "" C_INT = ... + "" C_UNSIGNED_INT = ... + "" C_LONG = ... + "" C_UNSIGNED_LONG = ... + "" C_LONG_LONG = ... + "" C_UNSIGNED_LONG_LONG = ... + "" C_BOOL = ... + "" C_FLOAT = ... + "" C_DOUBLE = ... + "" C_LONG_DOUBLE = ... + "" C_SIZE_T = ... + "" C_PTRDIFF_T = ... + "" class Qualifiers(enum.Flag): """``Qualifiers`` are modifiers on types.""" @@ -1645,12 +1693,15 @@ class FaultError(Exception): """ This error is raised when a bad memory access is attempted (i.e., when accessing a memory address which is not valid in a program). - - :param address: Address that couldn't be accessed. """ - def __init__(self, address: int) -> None: ... + def __init__(self, address: int) -> None: + """ + :param address: :attr:`FaultError.address` + """ + ... address: int + """Address that couldn't be accessed.""" class MissingDebugInfoError(Exception): """ diff --git a/docs/api_reference.rst b/docs/api_reference.rst index f5cd5a0e3..6c3ea7de9 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,7 +7,6 @@ Programs -------- .. drgndoc:: Program - :include: __getitem__ .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags @@ -57,7 +56,6 @@ Objects ------- .. drgndoc:: Object - :include: __getattribute__|__getitem__|__len__ .. drgndoc:: NULL .. drgndoc:: cast .. drgndoc:: reinterpret diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index 21076d29f..6a9c1881e 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -5,7 +5,7 @@ import argparse import functools import sys -from typing import cast +from typing import Union, cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode @@ -91,21 +91,18 @@ def escape_string(s: str) -> str: def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node - if hasattr(node, "docstring"): + if getattr(node, "docstring", None) is not None: var_name = name.replace(".", "_") + "_DOC" if args.header: output_file.write("extern ") output_file.write(f"const char {var_name}[]") if not args.header: output_file.write(" =") - signature, lines = formatter.format( - cast(ResolvedNode[DocumentedNode], resolved), rst=False + lines = formatter.format( + cast(ResolvedNode[DocumentedNode], resolved), + name.rpartition(".")[2], + rst=False, ) - if signature: - lines[0:0] = [ - name.rpartition(".")[2] + signature, - "", - ] if lines: for i, line in enumerate(lines): output_file.write(f'\n\t"{escape_string(line)}') diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index ea63d85ac..9194c9a78 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -58,12 +58,10 @@ from drgndoc.parse import ( Class, DocumentedNode, - Function, Import, ImportFrom, Module, Node, - Variable, parse_paths, ) from drgndoc.util import dot_join @@ -100,10 +98,6 @@ class DrgnDocDirective(sphinx.util.docutils.SphinxDirective): required_arguments = 1 optional_arguments = 0 - option_spec = { - "include": docutils.parsers.rst.directives.unchanged, - "exclude": docutils.parsers.rst.directives.unchanged, - } def run(self) -> Any: parts = [] @@ -119,34 +113,14 @@ def run(self) -> Any: if not isinstance(resolved, ResolvedNode): logger.warning("name %r not found", resolved) return [] + if resolved.node.docstring is None: + logger.warning("name %r is not documented", resolved.qualified_name()) + return [] docnode = docutils.nodes.section() self._run(name, "", resolved, docnode) return docnode.children - def _include_attr(self, attr_name: str) -> bool: - """ - Return whether the given recursive attribute should be documented. - - We recursively include nodes that: - 1. Match the "include" pattern OR don't start with an underscore. - AND - 2. Do not match the "exclude" pattern. - - The "include" and "exclude" patterns are applied to the name relative - to the object being documented by the directive. - """ - if not attr_name: - return True - - dot = attr_name.rfind(".") - if dot + 1 < len(attr_name) and attr_name[dot + 1] == "_": - include_pattern = self.options.get("include") - if include_pattern is None or not re.fullmatch(include_pattern, attr_name): - return False - exclude_pattern = self.options.get("exclude") - return exclude_pattern is None or not re.fullmatch(exclude_pattern, attr_name) - def _run( self, top_name: str, @@ -154,9 +128,6 @@ def _run( resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: - if not self._include_attr(attr_name): - return - if isinstance(resolved.node, (Import, ImportFrom)): # Only include imports that are explicitly aliased (i.e., import # ... as ... or from ... import ... as ...). @@ -172,58 +143,31 @@ def _run( resolved = cast(ResolvedNode[DocumentedNode], resolved) - node = resolved.node - if isinstance(node, Module): - directive = "py:module" + if isinstance(resolved.node, Module): return self._run_module( top_name, attr_name, cast(ResolvedNode[Module], resolved), docnode ) - sourcename = "" - if resolved.modules and resolved.modules[-1].node.path: - sourcename = resolved.modules[-1].node.path - if sourcename: - self.env.note_dependency(sourcename) - - if isinstance(node, Class): - directive = "py:class" - elif isinstance(node, Function): - directive = "py:method" if resolved.classes else "py:function" - elif isinstance(node, Variable): - directive = "py:attribute" if resolved.classes else "py:data" - else: - assert False, type(node).__name__ - - argument = (attr_name or top_name).rpartition(".")[2] - extra_argument, lines = self.env.drgndoc_formatter.format( + lines = self.env.drgndoc_formatter.format( resolved, + (attr_name or top_name).rpartition(".")[2], self.env.ref_context.get("py:module", ""), ".".join(self.env.ref_context.get("py:classes", ())), ) + if not lines: + # Not documented. Ignore it. + return - contents = docutils.statemachine.StringList() - contents.append( - f".. {directive}:: {argument}{extra_argument}", sourcename, - ) - if isinstance(node, Function): - if node.async_: - contents.append(" :async:", sourcename) - if resolved.classes: - if node.have_decorator("classmethod") or argument in ( - "__init_subclass__", - "__class_getitem__", - ): - contents.append(" :classmethod:", sourcename) - if node.have_decorator("staticmethod"): - contents.append(" :staticmethod:", sourcename) + sourcename = "" + if resolved.modules and resolved.modules[-1].node.path: + sourcename = resolved.modules[-1].node.path + if sourcename: + self.env.note_dependency(sourcename) + contents = docutils.statemachine.StringList(lines, sourcename) contents.append("", sourcename) - if lines: - for line in lines: - contents.append(" " + line, sourcename) - contents.append("", sourcename) self.state.nested_parse(contents, 0, docnode) - if isinstance(node, Class): + if isinstance(resolved.node, Class): for desc in reversed(docnode.children): if isinstance(desc, sphinx.addnodes.desc): break @@ -241,9 +185,10 @@ def _run( py_classes.append(resolved.name) self.env.ref_context["py:class"] = resolved.name for member in resolved.attrs(): - self._run( - top_name, dot_join(attr_name, member.name), member, desc_content - ) + if member.name != "__init__": + self._run( + top_name, dot_join(attr_name, member.name), member, desc_content + ) py_classes.pop() self.env.ref_context["py:class"] = py_classes[-1] if py_classes else None @@ -255,14 +200,16 @@ def _run_module( docnode: docutils.nodes.Node, ) -> None: node = resolved.node + if node.docstring is None: + # Not documented. Ignore it. + return + sourcename = node.path or "" if sourcename: self.env.note_dependency(sourcename) - - contents = docutils.statemachine.StringList() - if node.docstring: - for line in node.docstring.splitlines(): - contents.append(line, sourcename) + contents = docutils.statemachine.StringList( + node.docstring.splitlines(), sourcename + ) sphinx.util.nodes.nested_parse_with_titles(self.state, contents, docnode) diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index 66071cd8e..6dd47c5bd 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -218,56 +218,14 @@ def __init__( self._namespace = namespace self._substitutions = substitutions - def _add_class_info( - self, - resolved: ResolvedNode[Class], - context_module: Optional[str], - context_class: Optional[str], - rst: bool, - lines: List[str], - ) -> str: - node = resolved.node - if node.bases: - visitor = _FormatVisitor( - self._namespace, - self._substitutions, - resolved.modules, - resolved.classes, - context_module, - context_class, - ) - bases = [visitor.visit(base, rst) for base in node.bases] - lines[0:0] = ["Bases: " + ", ".join(bases), ""] - - extra_argument = "" - try: - init = resolved.attr("__init__") - except KeyError: - pass - else: - if isinstance(init.node, Function): - init_context_class = resolved.name - if context_class: - init_context_class = context_class + "." + init_context_class - extra_argument = self._add_function_info( - cast(ResolvedNode[Function], init), - context_module, - init_context_class, - rst, - False, - lines, - ) - return extra_argument - - def _add_function_info( + def _format_function_signature( self, resolved: ResolvedNode[Function], context_module: Optional[str], context_class: Optional[str], rst: bool, want_rtype: bool, - lines: List[str], - ) -> str: + ) -> Tuple[str, List[str]]: visitor = _FormatVisitor( self._namespace, self._substitutions, @@ -277,14 +235,15 @@ def _add_function_info( context_class, ) node = resolved.node + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() if rst: - if node.docstring is None: - want_rtype = False - + lines = [] params_need_type = set() params_have_type = set() - for line in lines: + for line in docstring_lines: + lines.append(" " + line) match = re.match(r":(param|type)\s+([a-zA-Z0-9_]+):", line) if match: if match.group(1) == "param": @@ -294,15 +253,17 @@ def _add_function_info( elif line.startswith(":rtype:"): want_rtype = False params_need_type -= params_have_type - lines.append("") + else: + lines = docstring_lines signature = ["("] need_comma = False + need_blank_line = bool(lines) def visit_arg( arg: ast.arg, default: Optional[ast.expr] = None, prefix: str = "" ) -> None: - nonlocal need_comma + nonlocal need_comma, need_blank_line if need_comma: signature.append(", ") if prefix: @@ -321,7 +282,10 @@ def visit_arg( need_comma = True if rst and arg.annotation and arg.arg in params_need_type: - lines.append(f":type {arg.arg}: {visitor.visit(arg.annotation)}") + if need_blank_line: + lines.append("") + need_blank_line = False + lines.append(f" :type {arg.arg}: {visitor.visit(arg.annotation)}") posonlyargs = getattr(node.args, "posonlyargs", []) num_posargs = len(posonlyargs) + len(node.args.args) @@ -359,27 +323,128 @@ def visit_arg( if want_rtype and node.returns: if rst: - lines.append(":rtype: " + visitor.visit(node.returns)) + if need_blank_line: + lines.append("") + need_blank_line = False + lines.append(" :rtype: " + visitor.visit(node.returns)) else: signature.append(" -> ") signature.append(visitor.visit(node.returns, False)) - return "".join(signature) + return "".join(signature), lines + + def _format_class( + self, + resolved: ResolvedNode[Class], + name: str, + context_module: Optional[str] = None, + context_class: Optional[str] = None, + rst: bool = True, + ) -> List[str]: + node = resolved.node + + signature = "" + signature_lines = None + try: + init = resolved.attr("__init__") + except KeyError: + pass + else: + if isinstance(init.node, Function) and init.node.docstring is not None: + init_context_class = resolved.name + if context_class: + init_context_class = context_class + "." + init_context_class + signature, signature_lines = self._format_function_signature( + cast(ResolvedNode[Function], init), + context_module, + init_context_class, + rst, + False, + ) + + if rst: + lines = [f".. py:class:: {name}{signature}"] + elif signature: + lines = [f"{name}{signature}"] + else: + lines = [] + + if node.bases: + visitor = _FormatVisitor( + self._namespace, + self._substitutions, + resolved.modules, + resolved.classes, + context_module, + context_class, + ) + bases = [visitor.visit(base, rst) for base in node.bases] + if lines: + lines.append("") + lines.append((" " if rst else "") + "Bases: " + ", ".join(bases)) + + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() + if docstring_lines: + if lines: + lines.append("") + if rst: + for line in docstring_lines: + lines.append(" " + line) + else: + lines.extend(docstring_lines) + + if signature_lines: + lines.append("") + lines.extend(signature_lines) + return lines - def _add_variable_info( + def _format_function( + self, + resolved: ResolvedNode[Function], + name: str, + context_module: Optional[str] = None, + context_class: Optional[str] = None, + rst: bool = True, + ) -> List[str]: + node = resolved.node + + signature, signature_lines = self._format_function_signature( + resolved, context_module, context_class, rst, True + ) + + if rst: + directive = "py:method" if resolved.classes else "py:function" + lines = [f".. {directive}:: {name}{signature}"] + if node.async_: + lines.append(" :async:") + if node.have_decorator("classmethod") or name in ( + "__init_subclass__", + "__class_getitem__", + ): + lines.append(" :classmethod:") + if node.have_decorator("staticmethod"): + lines.append(" :staticmethod:") + else: + lines = [f"{name}{signature}"] + if signature_lines: + lines.append("") + lines.extend(signature_lines) + return lines + + def _format_variable( self, resolved: ResolvedNode[Variable], + name: str, context_module: Optional[str], context_class: Optional[str], rst: bool, - lines: List[str], - ) -> None: - annotation = resolved.node.annotation - if not annotation: - return - for line in lines: - if line.startswith(":vartype:"): - return + ) -> List[str]: + node = resolved.node + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() + + have_vartype = any(line.startswith(":vartype:") for line in docstring_lines) visitor = _FormatVisitor( self._namespace, @@ -390,50 +455,66 @@ def _add_variable_info( context_class, ) if rst: - lines.append("") - lines.append(":vartype: " + visitor.visit(annotation)) + directive = "py:attribute" if resolved.classes else "py:data" + lines = [f".. {directive}:: {name}"] + if docstring_lines: + lines.append("") + for line in docstring_lines: + lines.append(" " + line) + if node.annotation and not have_vartype: + lines.append("") + lines.append(" :vartype: " + visitor.visit(node.annotation)) + return lines else: - lines[0:0] = [visitor.visit(annotation, False), ""] + if node.annotation and not have_vartype: + if docstring_lines: + docstring_lines.insert(0, "") + docstring_lines.insert(0, visitor.visit(node.annotation, False)) + return docstring_lines def format( self, resolved: ResolvedNode[DocumentedNode], + name: Optional[str] = None, context_module: Optional[str] = None, context_class: Optional[str] = None, rst: bool = True, - ) -> Tuple[str, List[str]]: + ) -> List[str]: + node = resolved.node + if node.docstring is None: + return [] + + if name is None: + name = resolved.name if context_module is None and resolved.modules: context_module = ".".join([module.name for module in resolved.modules]) if context_class is None and resolved.classes: context_module = ".".join([class_.name for class_ in resolved.classes]) - node = resolved.node - lines = node.docstring.splitlines() if node.docstring else [] - - signature = "" if isinstance(node, Class): - signature = self._add_class_info( + return self._format_class( cast(ResolvedNode[Class], resolved), + name, context_module, context_class, rst, - lines, ) elif isinstance(node, Function): - signature = self._add_function_info( + return self._format_function( cast(ResolvedNode[Function], resolved), + name, context_module, context_class, rst, - True, - lines, ) elif isinstance(node, Variable): - self._add_variable_info( + return self._format_variable( cast(ResolvedNode[Variable], resolved), + name, context_module, context_class, rst, - lines, ) - return signature, lines + else: + assert isinstance(node, Module) + return node.docstring.splitlines() From e4a2676cac48822ca43f17d39e414f5cb40a98e6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 18 Aug 2020 13:36:13 -0700 Subject: [PATCH 11/56] drgndoc: support @typing.overload() One of the blockers for adding type annotations to helpers is that some helpers need to be overloaded, but drgndoc doesn't support that. This adds support. Each function now tracks all of its overloaded signature, each of which may be documented separately. The formatted output (for functions/methods and classes with __init__()) combines all of the documented overloads. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/docstrings.py | 2 +- docs/exts/drgndoc/ext.py | 2 +- docs/exts/drgndoc/format.py | 171 ++++++++++++++++++++------------ docs/exts/drgndoc/parse.py | 69 +++++++++---- 4 files changed, 158 insertions(+), 86 deletions(-) diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index 6a9c1881e..045fc0702 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -91,7 +91,7 @@ def escape_string(s: str) -> str: def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node - if getattr(node, "docstring", None) is not None: + if node.has_docstring(): var_name = name.replace(".", "_") + "_DOC" if args.header: output_file.write("extern ") diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 9194c9a78..331af94d7 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -113,7 +113,7 @@ def run(self) -> Any: if not isinstance(resolved, ResolvedNode): logger.warning("name %r not found", resolved) return [] - if resolved.node.docstring is None: + if not resolved.node.has_docstring(): logger.warning("name %r is not documented", resolved.qualified_name()) return [] diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index 6dd47c5bd..489c57021 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -6,7 +6,14 @@ from typing import Any, List, Optional, Pattern, Sequence, Tuple, cast from drgndoc.namespace import BoundNode, Namespace, ResolvedNode -from drgndoc.parse import Class, DocumentedNode, Function, Module, Variable +from drgndoc.parse import ( + Class, + DocumentedNode, + Function, + FunctionSignature, + Module, + Variable, +) from drgndoc.visitor import NodeVisitor @@ -220,7 +227,9 @@ def __init__( def _format_function_signature( self, - resolved: ResolvedNode[Function], + node: FunctionSignature, + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], rst: bool, @@ -229,12 +238,11 @@ def _format_function_signature( visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.modules, - resolved.classes, + modules, + classes, context_module, context_class, ) - node = resolved.node assert node.docstring is not None docstring_lines = node.docstring.splitlines() @@ -297,7 +305,7 @@ def visit_arg( ] else: default = None - if i == 0 and resolved.classes and not node.have_decorator("staticmethod"): + if i == 0 and classes and not node.has_decorator("staticmethod"): # Skip self for methods and cls for class methods. continue visit_arg(arg, default) @@ -343,60 +351,78 @@ def _format_class( ) -> List[str]: node = resolved.node - signature = "" - signature_lines = None + init_signatures = None try: init = resolved.attr("__init__") except KeyError: pass else: - if isinstance(init.node, Function) and init.node.docstring is not None: + if isinstance(init.node, Function): + init_signatures = [ + signature + for signature in init.node.signatures + if signature.docstring is not None + ] init_context_class = resolved.name if context_class: init_context_class = context_class + "." + init_context_class + + lines = [] + for i, signature_node in enumerate(init_signatures or (None,)): + if i > 0: + lines.append("") + + signature_lines: Optional[List[str]] + if signature_node: signature, signature_lines = self._format_function_signature( - cast(ResolvedNode[Function], init), + signature_node, + init.modules, + init.classes, context_module, init_context_class, rst, False, ) + else: + signature = "" + signature_lines = None - if rst: - lines = [f".. py:class:: {name}{signature}"] - elif signature: - lines = [f"{name}{signature}"] - else: - lines = [] - - if node.bases: - visitor = _FormatVisitor( - self._namespace, - self._substitutions, - resolved.modules, - resolved.classes, - context_module, - context_class, - ) - bases = [visitor.visit(base, rst) for base in node.bases] - if lines: - lines.append("") - lines.append((" " if rst else "") + "Bases: " + ", ".join(bases)) - - assert node.docstring is not None - docstring_lines = node.docstring.splitlines() - if docstring_lines: - if lines: - lines.append("") if rst: - for line in docstring_lines: - lines.append(" " + line) - else: - lines.extend(docstring_lines) + lines.append(f".. py:class:: {name}{signature}") + if i > 0: + lines.append(" :noindex:") + elif signature: + lines.append(f"{name}{signature}") + + if i == 0: + if node.bases: + visitor = _FormatVisitor( + self._namespace, + self._substitutions, + resolved.modules, + resolved.classes, + context_module, + context_class, + ) + bases = [visitor.visit(base, rst) for base in node.bases] + if lines: + lines.append("") + lines.append((" " if rst else "") + "Bases: " + ", ".join(bases)) + + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() + if docstring_lines: + if lines: + lines.append("") + if rst: + for line in docstring_lines: + lines.append(" " + line) + else: + lines.extend(docstring_lines) - if signature_lines: - lines.append("") - lines.extend(signature_lines) + if signature_lines: + lines.append("") + lines.extend(signature_lines) return lines def _format_function( @@ -409,27 +435,43 @@ def _format_function( ) -> List[str]: node = resolved.node - signature, signature_lines = self._format_function_signature( - resolved, context_module, context_class, rst, True - ) + lines = [] + for i, signature_node in enumerate( + signature + for signature in node.signatures + if signature.docstring is not None + ): + if i > 0: + lines.append("") + signature, signature_lines = self._format_function_signature( + signature_node, + resolved.modules, + resolved.classes, + context_module, + context_class, + rst, + True, + ) - if rst: - directive = "py:method" if resolved.classes else "py:function" - lines = [f".. {directive}:: {name}{signature}"] - if node.async_: - lines.append(" :async:") - if node.have_decorator("classmethod") or name in ( - "__init_subclass__", - "__class_getitem__", - ): - lines.append(" :classmethod:") - if node.have_decorator("staticmethod"): - lines.append(" :staticmethod:") - else: - lines = [f"{name}{signature}"] - if signature_lines: - lines.append("") - lines.extend(signature_lines) + if rst: + directive = "py:method" if resolved.classes else "py:function" + lines.append(f".. {directive}:: {name}{signature}") + if i > 0: + lines.append(" :noindex:") + if node.async_: + lines.append(" :async:") + if signature_node.has_decorator("classmethod") or name in ( + "__init_subclass__", + "__class_getitem__", + ): + lines.append(" :classmethod:") + if signature_node.has_decorator("staticmethod"): + lines.append(" :staticmethod:") + else: + lines.append(f"{name}{signature}") + if signature_lines: + lines.append("") + lines.extend(signature_lines) return lines def _format_variable( @@ -481,7 +523,7 @@ def format( rst: bool = True, ) -> List[str]: node = resolved.node - if node.docstring is None: + if not node.has_docstring(): return [] if name is None: @@ -517,4 +559,5 @@ def format( ) else: assert isinstance(node, Module) + assert node.docstring is not None return node.docstring.splitlines() diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index 68f9dd867..0dcb766d3 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -91,6 +91,9 @@ def __init__( self.docstring = docstring self.attrs = attrs + def has_docstring(self) -> bool: + return self.docstring is not None + class Class: def __init__( @@ -103,29 +106,39 @@ def __init__( self.docstring = docstring self.attrs = attrs + def has_docstring(self) -> bool: + return self.docstring is not None -class Function: + +class FunctionSignature: def __init__( self, args: ast.arguments, - decorator_list: Sequence[ast.expr], returns: Optional[ast.expr], - async_: bool, + decorator_list: Sequence[ast.expr], docstring: Optional[str], ) -> None: self.args = args - self.decorator_list = decorator_list self.returns = returns - self.async_ = async_ + self.decorator_list = decorator_list self.docstring = docstring - def have_decorator(self, name: str) -> bool: + def has_decorator(self, name: str) -> bool: return any( isinstance(decorator, ast.Name) and decorator.id == name for decorator in self.decorator_list ) +class Function: + def __init__(self, async_: bool, signatures: Sequence[FunctionSignature]) -> None: + self.async_ = async_ + self.signatures = signatures + + def has_docstring(self) -> bool: + return any(signature.docstring is not None for signature in self.signatures) + + class Variable: def __init__( self, annotation: Optional[ast.expr], docstring: Optional[str] @@ -133,12 +146,18 @@ def __init__( self.annotation = annotation self.docstring = docstring + def has_docstring(self) -> bool: + return self.docstring is not None + class Import: def __init__(self, module: str, aliased: bool) -> None: self.module = module self.aliased = aliased + def has_docstring(self) -> bool: + return False + class ImportFrom: def __init__( @@ -149,6 +168,9 @@ def __init__( self.level = level self.aliased = aliased + def has_docstring(self) -> bool: + return False + Node = Union[Module, Class, Function, Variable, Import, ImportFrom] NonModuleNode = Union[Class, Function, Variable, Import, ImportFrom] @@ -192,27 +214,34 @@ def visit_ClassDef( self._attrs = attrs self._attrs[node.name] = class_node - def visit_FunctionDef( + def _visit_function( self, - node: ast.FunctionDef, + node: Union[ast.FunctionDef, ast.AsyncFunctionDef], parent: Optional[ast.AST], sibling: Optional[ast.AST], ) -> None: - self._attrs[node.name] = Function( - node.args, node.decorator_list, node.returns, False, ast.get_docstring(node) + signature = FunctionSignature( + node.args, node.returns, node.decorator_list, ast.get_docstring(node) ) + async_ = isinstance(node, ast.AsyncFunctionDef) + func = self._attrs.get(node.name) + # If we have a previous overload definition, we can add to it. + # Otherwise, we replace it. + if ( + func + and isinstance(func, Function) + and func.async_ == async_ + and func.signatures[-1].has_decorator("overload") + ): + signatures = list(func.signatures) + signatures.append(signature) + else: + signatures = [signature] + self._attrs[node.name] = Function(async_, signatures) # NB: we intentionally don't visit the function body. - def visit_AsyncFunctionDef( - self, - node: ast.AsyncFunctionDef, - parent: Optional[ast.AST], - sibling: Optional[ast.AST], - ) -> None: - self._attrs[node.name] = Function( - node.args, node.decorator_list, node.returns, True, ast.get_docstring(node) - ) - # NB: we intentionally don't visit the function body. + visit_FunctionDef = _visit_function + visit_AsyncFunctionDef = _visit_function def _add_assign( self, From 0cf3320a89b3ebbbfca8b76928cd46054a58aa30 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 11:58:17 -0700 Subject: [PATCH 12/56] Add type annotations to helpers Now that drgndoc can handle overloads and we have the IntegerLike and Path aliases, we can add type annotations to all helpers. There are also a couple of functional changes that snuck in here to make annotating easier. Signed-off-by: Omar Sandoval --- CONTRIBUTING.rst | 4 +- _drgn.pyi | 71 +++++++------ drgn/helpers/linux/block.py | 44 ++++---- drgn/helpers/linux/bpf.py | 35 +++--- drgn/helpers/linux/cgroup.py | 59 ++++++----- drgn/helpers/linux/cpumask.py | 38 +++---- drgn/helpers/linux/device.py | 38 +++---- drgn/helpers/linux/fs.py | 177 +++++++++++++++++-------------- drgn/helpers/linux/idr.py | 9 +- drgn/helpers/linux/kconfig.py | 6 +- drgn/helpers/linux/kernfs.py | 16 ++- drgn/helpers/linux/list.py | 114 +++++++++++--------- drgn/helpers/linux/list_nulls.py | 43 +++----- drgn/helpers/linux/mm.py | 174 +++++++++++++++++++++++------- drgn/helpers/linux/net.py | 16 +-- drgn/helpers/linux/percpu.py | 16 +-- drgn/helpers/linux/pid.py | 22 ++-- drgn/helpers/linux/radixtree.py | 16 +-- drgn/helpers/linux/rbtree.py | 101 +++++++++++------- drgn/helpers/linux/tcp.py | 9 +- drgn/helpers/linux/user.py | 17 +-- libdrgn/python/helpers.c | 2 +- 22 files changed, 581 insertions(+), 446 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 06c948848..82fc64dc4 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -74,8 +74,8 @@ Python Python code in drgn is formatted with `black `_. Code should be compatible with Python 3.6 and newer. -Type hints should be provided for all public interfaces other than helpers -(including the C extension) and most private interfaces. +Type hints should be provided for all interfaces (including helpers and the C +extension). Submitting PRs -------------- diff --git a/_drgn.pyi b/_drgn.pyi index 4dffcb24d..a0ea27bc0 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1721,79 +1721,82 @@ class OutOfBoundsError(Exception): _with_libkdumpfile: bool -def _linux_helper_read_vm(prog, pgtable, address, size): ... -def _linux_helper_radix_tree_lookup(root, index): +def _linux_helper_read_vm( + prog: Program, pgtable: Object, address: IntegerLike, size: IntegerLike +) -> bytes: ... +def _linux_helper_radix_tree_lookup(root: Object, index: IntegerLike) -> Object: """ - .. c:function:: void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) + Look up the entry at a given index in a radix tree. - Look up the entry at a given index in a radix tree. If it is not found, - this returns a ``NULL`` object. + :param root: ``struct radix_tree_root *`` + :param index: Entry index. + :return: ``void *`` found entry, or ``NULL`` if not found. """ ... -def _linux_helper_idr_find(idr, id): +def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: """ - .. c:function:: void *idr_find(struct idr *idr, unsigned long id) + Look up the entry with the given ID in an IDR. - Look up the entry with the given id in an IDR. If it is not found, this - returns a ``NULL`` object. + :param idr: ``struct idr *`` + :param id: Entry ID. + :return: ``void *`` found entry, or ``NULL`` if not found. """ ... -def _linux_helper_find_pid(ns, pid): +def _linux_helper_find_pid( + prog_or_ns: Union[Program, Object], pid: IntegerLike +) -> Object: """ - .. c:function:: struct pid *find_pid(struct pid_namespace *ns, int nr) + Return the ``struct pid *`` for the given PID number. - Return the ``struct pid *`` for the given PID number in the given - namespace. If given a :class:`Program` instead, the initial PID namespace - is used. + :param prog_or_ns: ``struct pid_namespace *`` object, or :class:`Program` + to use initial PID namespace. + :return: ``struct pid *`` """ ... -def _linux_helper_pid_task(pid, pid_type): +def _linux_helper_pid_task(pid: Object, pid_type: IntegerLike) -> Object: """ - .. c:function:: struct task_struct *pid_task(struct pid *pid, enum pid_type pid_type) - Return the ``struct task_struct *`` containing the given ``struct pid *`` of the given type. + + :param pid: ``struct pid *`` + :param pid_type: ``enum pid_type`` + :return: ``struct task_struct *`` """ ... -def _linux_helper_find_task(prog_or_ns, pid): +def _linux_helper_find_task( + prog_or_ns: Union[Program, Object], pid: IntegerLike +) -> Object: """ - .. c:function:: struct task_struct *find_task(struct pid_namespace *ns, int pid) + Return the task with the given PID. - Return the task with the given PID in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. + :param prog_or_ns: ``struct pid_namespace *`` object, or :class:`Program` + to use initial PID namespace. + :return: ``struct task_struct *`` """ ... -def _linux_helper_task_state_to_char(task): +def _linux_helper_task_state_to_char(task: Object) -> str: """ - .. c:function char task_state_to_char(struct task_struct *task) - Get the state of the task as a character (e.g., ``'R'`` for running). See `ps(1) `_ for a description of the process state codes. - :rtype: str + :param task: ``struct task_struct *`` """ ... -def _linux_helper_kaslr_offset(prog): +def _linux_helper_kaslr_offset(prog: Program) -> int: """ - .. c:function:: unsigned long kaslr_offset(void) - Get the kernel address space layout randomization offset (zero if it is disabled). """ ... -def _linux_helper_pgtable_l5_enabled(prog): - """ - .. c:function:: bool pgtable_l5_enabled(void) - - Return whether 5-level paging is enabled. - """ +def _linux_helper_pgtable_l5_enabled(prog: Program) -> bool: + """Return whether 5-level paging is enabled.""" ... diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index 061c8af1c..78ee2434b 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -10,7 +10,9 @@ (``struct hd_struct``). """ -from drgn import container_of +from typing import Iterator + +from drgn import Object, Program, container_of from drgn.helpers import escape_ascii_string from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from drgn.helpers.linux.list import list_for_each_entry @@ -27,34 +29,33 @@ ) -def disk_devt(disk): +def disk_devt(disk: Object) -> Object: """ - .. c:function:: dev_t disk_devt(struct gendisk *disk) - Get a disk's device number. + + :param disk: ``struct gendisk *`` + :return: ``dev_t`` """ - return MKDEV(disk.major, disk.first_minor) + return Object(disk.prog_, "dev_t", MKDEV(disk.major, disk.first_minor)) -def disk_name(disk): +def disk_name(disk: Object) -> bytes: """ - .. c:function:: char *disk_name(struct gendisk *disk) - Get the name of a disk (e.g., ``sda``). - :rtype: bytes + :param disk: ``struct gendisk *`` """ return disk.disk_name.string_() -def _for_each_block_device(prog): +def _for_each_block_device(prog: Program) -> Iterator[Object]: try: class_in_private = prog.cache["knode_class_in_device_private"] except KeyError: # We need a proper has_member(), but this is fine for now. class_in_private = any( member.name == "knode_class" - for member in prog.type("struct device_private").members + for member in prog.type("struct device_private").members # type: ignore[union-attr] ) prog.cache["knode_class_in_device_private"] = class_in_private devices = prog["block_class"].p.klist_devices.k_list.address_of_() @@ -67,7 +68,7 @@ def _for_each_block_device(prog): yield from list_for_each_entry("struct device", devices, "knode_class.n_node") -def for_each_disk(prog): +def for_each_disk(prog: Program) -> Iterator[Object]: """ Iterate over all disks in the system. @@ -79,7 +80,7 @@ def for_each_disk(prog): yield container_of(device, "struct gendisk", "part0.__dev") -def print_disks(prog): +def print_disks(prog: Program) -> None: """Print all of the disks in the system.""" for disk in for_each_disk(prog): major = disk.major.value_() @@ -88,27 +89,26 @@ def print_disks(prog): print(f"{major}:{minor} {name} ({disk.type_.type_name()})0x{disk.value_():x}") -def part_devt(part): +def part_devt(part: Object) -> Object: """ - .. c:function:: dev_t part_devt(struct hd_struct *part) - Get a partition's device number. + + :param part: ``struct hd_struct *`` + :return: ``dev_t`` """ return part.__dev.devt -def part_name(part): +def part_name(part: Object) -> bytes: """ - .. c:function:: char *part_name(struct hd_struct *part) - Get the name of a partition (e.g., ``sda1``). - :rtype: bytes + :param part: ``struct hd_struct *`` """ return part.__dev.kobj.name.string_() -def for_each_partition(prog): +def for_each_partition(prog: Program) -> Iterator[Object]: """ Iterate over all partitions in the system. @@ -118,7 +118,7 @@ def for_each_partition(prog): yield container_of(device, "struct hd_struct", "__dev") -def print_partitions(prog): +def print_partitions(prog: Program) -> None: """Print all of the partitions in the system.""" for part in for_each_partition(prog): devt = part_devt(part).value_() diff --git a/drgn/helpers/linux/bpf.py b/drgn/helpers/linux/bpf.py index 291c28cba..0c0c347e6 100644 --- a/drgn/helpers/linux/bpf.py +++ b/drgn/helpers/linux/bpf.py @@ -12,8 +12,9 @@ import itertools +from typing import Iterator -from drgn import cast +from drgn import IntegerLike, Object, Program, cast from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import list_for_each_entry @@ -26,11 +27,9 @@ ) -def bpf_map_for_each(prog): +def bpf_map_for_each(prog: Program) -> Iterator[Object]: """ - .. c:function:: bpf_map_for_each(prog) - - Iterate over all bpf maps. + Iterate over all BPF maps. :return: Iterator of ``struct bpf_map *`` objects. """ @@ -38,11 +37,9 @@ def bpf_map_for_each(prog): yield cast("struct bpf_map *", entry) -def bpf_prog_for_each(prog): +def bpf_prog_for_each(prog: Program) -> Iterator[Object]: """ - .. c:function:: bpf_prog_for_each(prog) - - Iterate over all bpf programs. + Iterate over all BPF programs. :return: Iterator of ``struct bpf_prog *`` objects. """ @@ -50,13 +47,15 @@ def bpf_prog_for_each(prog): yield cast("struct bpf_prog *", entry) -def cgroup_bpf_prog_for_each(cgrp, bpf_attach_type): +def cgroup_bpf_prog_for_each( + cgrp: Object, bpf_attach_type: IntegerLike +) -> Iterator[Object]: """ - .. c:function:: cgroup_bpf_prog_for_each(struct cgroup *cgrp, int bpf_attach_type) - - Iterate over all cgroup bpf programs of the given attach type attached to + Iterate over all cgroup BPF programs of the given attach type attached to the given cgroup. + :param cgrp: ``struct cgroup *`` + :param bpf_attach_type: ``enum bpf_attach_type`` :return: Iterator of ``struct bpf_prog *`` objects. """ progs_head = cgrp.bpf.progs[bpf_attach_type] @@ -66,13 +65,15 @@ def cgroup_bpf_prog_for_each(cgrp, bpf_attach_type): yield pl.prog -def cgroup_bpf_prog_for_each_effective(cgrp, bpf_attach_type): +def cgroup_bpf_prog_for_each_effective( + cgrp: Object, bpf_attach_type: IntegerLike +) -> Iterator[Object]: """ - .. c:function:: cgroup_bpf_prog_for_each(struct cgroup *cgrp, int bpf_attach_type) - - Iterate over all effective cgroup bpf programs of the given attach type for + Iterate over all effective cgroup BPF programs of the given attach type for the given cgroup. + :param cgrp: ``struct cgroup *`` + :param bpf_attach_type: ``enum bpf_attach_type`` :return: Iterator of ``struct bpf_prog *`` objects. """ prog_array_items = cgrp.bpf.effective[bpf_attach_type].items diff --git a/drgn/helpers/linux/cgroup.py b/drgn/helpers/linux/cgroup.py index c492136e0..b0e112a05 100644 --- a/drgn/helpers/linux/cgroup.py +++ b/drgn/helpers/linux/cgroup.py @@ -10,8 +10,9 @@ supported. """ +from typing import Callable, Iterator -from drgn import NULL, cast, container_of +from drgn import NULL, Object, cast, container_of from drgn.helpers.linux.kernfs import kernfs_name, kernfs_path from drgn.helpers.linux.list import list_for_each_entry @@ -27,22 +28,24 @@ ) -def sock_cgroup_ptr(skcd): +def sock_cgroup_ptr(skcd: Object) -> Object: """ - .. c:function:: struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) - Get the cgroup for a socket from the given ``struct sock_cgroup_data *`` (usually from ``struct sock::sk_cgrp_data``). + + :param skcd: ``struct sock_cgroup_data *`` + :return: ``struct cgroup *`` """ return cast("struct cgroup *", skcd.val) -def cgroup_parent(cgrp): +def cgroup_parent(cgrp: Object) -> Object: """ - .. c:function:: struct cgroup *cgroup_parent(struct cgroup *cgrp) - Return the parent cgroup of the given cgroup if it exists, ``NULL`` otherwise. + + :param cgrp: ``struct cgroup *`` + :return: ``struct cgroup *`` """ parent_css = cgrp.self.parent if parent_css: @@ -50,34 +53,32 @@ def cgroup_parent(cgrp): return NULL(cgrp.prog_, "struct cgroup *") -def cgroup_name(cgrp): +def cgroup_name(cgrp: Object) -> bytes: """ - .. c:function:: char *cgroup_name(struct cgroup *cgrp) - Get the name of the given cgroup. - :rtype: bytes + :param cgrp: ``struct cgroup *`` """ return kernfs_name(cgrp.kn) -def cgroup_path(cgrp): +def cgroup_path(cgrp: Object) -> bytes: """ - .. c:function:: char *cgroup_path(struct cgroup *cgrp) - Get the full path of the given cgroup. - :rtype: bytes + :param cgrp: ``struct cgroup *`` """ return kernfs_path(cgrp.kn) -def css_next_child(pos, parent): +def css_next_child(pos: Object, parent: Object) -> Object: """ - .. c:function:: struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent) - Get the next child (or ``NULL`` if there is none) of the given parent starting from the given position (``NULL`` to initiate traversal). + + :param pos: ``struct cgroup_subsys_state *`` + :param parent: ``struct cgroup_subsys_state *`` + :return: ``struct cgroup_subsys_state *`` """ if not pos: next_ = container_of( @@ -98,13 +99,15 @@ def css_next_child(pos, parent): return NULL(next_.prog_, "struct cgroup_subsys_state *") -def css_next_descendant_pre(pos, root): +def css_next_descendant_pre(pos: Object, root: Object) -> Object: """ - .. c:function:: struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *root) - Get the next pre-order descendant (or ``NULL`` if there is none) of the given css root starting from the given position (``NULL`` to initiate traversal). + + :param pos: ``struct cgroup_subsys_state *`` + :param root: ``struct cgroup_subsys_state *`` + :return: ``struct cgroup_subsys_state *`` """ # If first iteration, visit root. if not pos: @@ -126,7 +129,9 @@ def css_next_descendant_pre(pos, root): return NULL(root.prog_, "struct cgroup_subsys_state *") -def _css_for_each_impl(next_fn, css): +def _css_for_each_impl( + next_fn: Callable[[Object, Object], Object], css: Object +) -> Iterator[Object]: pos = NULL(css.prog_, "struct cgroup_subsys_state *") while True: pos = next_fn(pos, css) @@ -136,23 +141,21 @@ def _css_for_each_impl(next_fn, css): yield pos -def css_for_each_child(css): +def css_for_each_child(css: Object) -> Iterator[Object]: """ - .. c:function:: css_for_each_child(struct cgroup_subsys_state *css) - Iterate through children of the given css. + :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_child, css) -def css_for_each_descendant_pre(css): +def css_for_each_descendant_pre(css: Object) -> Iterator[Object]: """ - .. c:function:: css_for_each_descendant_pre(struct cgroup_subsys_state *css) - Iterate through the given css's descendants in pre-order. + :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_descendant_pre, css) diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 4d9a55508..2547452b1 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -9,6 +9,10 @@ masks from :linux:`include/linux/cpumask.h`. """ +from typing import Iterator + +from drgn import Object, Program, sizeof + __all__ = ( "for_each_cpu", "for_each_online_cpu", @@ -17,45 +21,31 @@ ) -def for_each_cpu(mask): +def for_each_cpu(mask: Object) -> Iterator[int]: """ - .. c:function:: for_each_cpu(struct cpumask mask) - Iterate over all of the CPUs in the given mask. - :rtype: Iterator[int] + :param mask: ``struct cpumask`` """ bits = mask.bits - word_bits = 8 * bits.type_.type.size - for i in range(bits.type_.length): + word_bits = 8 * sizeof(bits.type_.type) + for i in range(bits.type_.length): # type: ignore word = bits[i].value_() for j in range(word_bits): if word & (1 << j): yield (word_bits * i) + j -def for_each_possible_cpu(prog): - """ - Iterate over all possible CPUs. - - :rtype: Iterator[int] - """ +def for_each_possible_cpu(prog: Program) -> Iterator[int]: + """Iterate over all possible CPUs.""" return for_each_cpu(prog["__cpu_possible_mask"]) -def for_each_online_cpu(prog): - """ - Iterate over all online CPUs. - - :rtype: Iterator[int] - """ +def for_each_online_cpu(prog: Program) -> Iterator[int]: + """Iterate over all online CPUs.""" return for_each_cpu(prog["__cpu_online_mask"]) -def for_each_present_cpu(prog): - """ - Iterate over all present CPUs. - - :rtype: Iterator[int] - """ +def for_each_present_cpu(prog: Program) -> Iterator[int]: + """Iterate over all present CPUs.""" return for_each_cpu(prog["__cpu_present_mask"]) diff --git a/drgn/helpers/linux/device.py b/drgn/helpers/linux/device.py index f339ec2d6..0efc10e4d 100644 --- a/drgn/helpers/linux/device.py +++ b/drgn/helpers/linux/device.py @@ -9,7 +9,9 @@ Linux devices, including the kernel encoding of ``dev_t``. """ -from drgn import Object, cast +import operator + +from drgn import IntegerLike __all__ = ( "MAJOR", @@ -23,37 +25,29 @@ _MINORMASK = (1 << _MINORBITS) - 1 -def MAJOR(dev): +def MAJOR(dev: IntegerLike) -> int: """ - .. c:function:: unsigned int MAJOR(dev_t dev) - Return the major ID of a kernel ``dev_t``. + + :param dev: ``dev_t`` object or :class:``int``. """ - major = dev >> _MINORBITS - if isinstance(major, Object): - return cast("unsigned int", major) - return major + return operator.index(dev) >> _MINORBITS -def MINOR(dev): +def MINOR(dev: IntegerLike) -> int: """ - .. c:function:: unsigned int MINOR(dev_t dev) - Return the minor ID of a kernel ``dev_t``. + + :param dev: ``dev_t`` object or :class:``int``. """ - minor = dev & _MINORMASK - if isinstance(minor, Object): - return cast("unsigned int", minor) - return minor + return operator.index(dev) & _MINORMASK -def MKDEV(major, minor): +def MKDEV(major: IntegerLike, minor: IntegerLike) -> int: """ - .. c:function:: dev_t MKDEV(unsigned int major, unsigned int minor) - Return a kernel ``dev_t`` from the major and minor IDs. + + :param major: Device major ID. + :param minor: Device minor ID. """ - dev = (major << _MINORBITS) | minor - if isinstance(dev, Object): - return cast("dev_t", dev) - return dev + return (operator.index(major) << _MINORBITS) | operator.index(minor) diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 5d30a7205..4ca2b2ec9 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -10,8 +10,9 @@ """ import os +from typing import Iterator, Optional, Tuple, Union, overload -from drgn import Object, Program, container_of +from drgn import IntegerLike, Object, Path, Program, container_of, sizeof from drgn.helpers import escape_ascii_string from drgn.helpers.linux.list import ( hlist_empty, @@ -37,7 +38,7 @@ ) -def _follow_mount(mnt, dentry): +def _follow_mount(mnt: Object, dentry: Object) -> Tuple[Object, Object]: # DCACHE_MOUNTED is a macro, so we can't easily get the value. But, it # hasn't changed since v2.6.38, so let's hardcode it for now. DCACHE_MOUNTED = 0x10000 @@ -54,7 +55,9 @@ def _follow_mount(mnt, dentry): return mnt, dentry -def _follow_dotdot(mnt, dentry, root_mnt, root_dentry): +def _follow_dotdot( + mnt: Object, dentry: Object, root_mnt: Object, root_dentry: Object +) -> Tuple[Object, Object]: while dentry != root_dentry or mnt != root_mnt: d_parent = dentry.d_parent.read_() if dentry != d_parent: @@ -68,31 +71,33 @@ def _follow_dotdot(mnt, dentry, root_mnt, root_dentry): return _follow_mount(mnt, dentry) -def path_lookup(prog_or_root, path, allow_negative=False): +def path_lookup( + prog_or_root: Union[Program, Object], path: Path, allow_negative: bool = False +) -> Object: """ - .. c:function:: struct path path_lookup(struct path *root, const char *path, bool allow_negative) - - Look up the given path name relative to the given root directory. If given - a :class:`Program` instead of a ``struct path``, the initial root - filesystem is used. - - :param bool allow_negative: Whether to allow returning a negative dentry - (i.e., a dentry for a non-existent path). + Look up the given path name. + + :param prog_or_root: ``struct path *`` object to use as root directory, or + :class:`Program` to use the initial root filesystem. + :param path: Path to lookup. + :param allow_negative: Whether to allow returning a negative dentry (i.e., + a dentry for a non-existent path). + :return: ``struct path`` :raises Exception: if the dentry is negative and ``allow_negative`` is ``False``, or if the path is not present in the dcache. The latter does not necessarily mean that the path does not exist; it may be uncached. On a live system, you can make the kernel cache the path by accessing it (e.g., with :func:`open()` or :func:`os.stat()`): - >>> path_lookup(prog, '/usr/include/stdlib.h') - ... - Exception: could not find '/usr/include/stdlib.h' in dcache - >>> open('/usr/include/stdlib.h').close() - >>> path_lookup(prog, '/usr/include/stdlib.h') - (struct path){ - .mnt = (struct vfsmount *)0xffff8b70413cdca0, - .dentry = (struct dentry *)0xffff8b702ac2c480, - } + >>> path_lookup(prog, '/usr/include/stdlib.h') + ... + Exception: could not find '/usr/include/stdlib.h' in dcache + >>> open('/usr/include/stdlib.h').close() + >>> path_lookup(prog, '/usr/include/stdlib.h') + (struct path){ + .mnt = (struct vfsmount *)0xffff8b70413cdca0, + .dentry = (struct dentry *)0xffff8b702ac2c480, + } """ if isinstance(prog_or_root, Program): prog_or_root = prog_or_root["init_task"].fs.root @@ -125,16 +130,31 @@ def path_lookup(prog_or_root, path, allow_negative=False): ) -def d_path(path_or_vfsmnt, dentry=None): +@overload +def d_path(path: Object) -> bytes: + """ + Return the full path of a dentry given a ``struct path``. + + :param path: ``struct path`` or ``struct path *`` + """ + ... + + +@overload +def d_path(vfsmnt: Object, dentry: Object) -> bytes: """ - .. c:function:: char *d_path(struct path *path) - .. c:function:: char *d_path(struct vfsmount *vfsmnt, struct dentry *dentry) + Return the full path of a dentry given a mount and dentry. - Return the full path of a dentry given a ``struct path *`` or a mount and a - dentry. + :param vfsmnt: ``struct vfsmount *`` + :param dentry: ``struct dentry *`` """ - type_name = str(path_or_vfsmnt.type_.type_name()) - if type_name == "struct path" or type_name == "struct path *": + ... + + +def d_path( # type: ignore # Need positional-only arguments. + path_or_vfsmnt: Object, dentry: Optional[Object] = None +) -> bytes: + if dentry is None: vfsmnt = path_or_vfsmnt.mnt dentry = path_or_vfsmnt.dentry.read_() else: @@ -144,7 +164,7 @@ def d_path(path_or_vfsmnt, dentry=None): d_op = dentry.d_op.read_() if d_op and d_op.d_dname: - return None + return b"[" + dentry.d_inode.i_sb.s_type.name.string_() + b"]" components = [] while True: @@ -167,11 +187,11 @@ def d_path(path_or_vfsmnt, dentry=None): return b"/" -def dentry_path(dentry): +def dentry_path(dentry: Object) -> bytes: """ - .. c:function:: char *dentry_path(struct dentry *dentry) - Return the path of a dentry from the root of its filesystem. + + :param dentry: ``struct dentry *`` """ components = [] while True: @@ -183,11 +203,12 @@ def dentry_path(dentry): return b"/".join(reversed(components)) -def inode_path(inode): +def inode_path(inode: Object) -> Optional[bytes]: """ - .. c:function:: char *inode_path(struct inode *inode) - Return any path of an inode from the root of its filesystem. + + :param inode: ``struct inode *`` + :return: Path, or ``None`` if the inode has no aliases. """ if hlist_empty(inode.i_dentry): return None @@ -196,14 +217,12 @@ def inode_path(inode): ) -def inode_paths(inode): +def inode_paths(inode: Object) -> Iterator[bytes]: """ - .. c:function:: inode_paths(struct inode *inode) - Return an iterator over all of the paths of an inode from the root of its filesystem. - :rtype: Iterator[bytes] + :param inode: ``struct inode *`` """ return ( dentry_path(dentry) @@ -213,55 +232,54 @@ def inode_paths(inode): ) -def mount_src(mnt): +def mount_src(mnt: Object) -> bytes: """ - .. c:function:: char *mount_src(struct mount *mnt) - Get the source device name for a mount. - :rtype: bytes + :param mnt: ``struct mount *`` """ return mnt.mnt_devname.string_() -def mount_dst(mnt): +def mount_dst(mnt: Object) -> bytes: """ - .. c:function:: char *mount_dst(struct mount *mnt) - Get the path of a mount point. - :rtype: bytes + :param mnt: ``struct mount *`` """ return d_path(mnt.mnt.address_of_(), mnt.mnt.mnt_root) -def mount_fstype(mnt): +def mount_fstype(mnt: Object) -> bytes: """ - .. c:function:: char *mount_fstype(struct mount *mnt) - Get the filesystem type of a mount. - :rtype: bytes + :param mnt: ``struct mount *`` """ sb = mnt.mnt.mnt_sb.read_() fstype = sb.s_type.name.string_() - subtype = sb.s_subtype.read_() - if subtype: - subtype = subtype.string_() + subtype_obj = sb.s_subtype.read_() + if subtype_obj: + subtype = subtype_obj.string_() if subtype: fstype += b"." + subtype return fstype -def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): +def for_each_mount( + prog_or_ns: Union[Program, Object], + src: Optional[Path] = None, + dst: Optional[Path] = None, + fstype: Optional[Union[str, bytes]] = None, +) -> Iterator[Object]: """ - .. c:function:: for_each_mount(struct mnt_namespace *ns, char *src, char *dst, char *fstype) - - Iterate over all of the mounts in a given namespace. If given a - :class:`Program` instead, the initial mount namespace is used. returned - mounts can be filtered by source, destination, or filesystem type, all of - which are encoded using :func:`os.fsencode()`. + Iterate over all of the mounts in a given namespace. + :param prog_or_ns: ``struct mnt_namespace *`` to iterate over, or + :class:`Program` to iterate over initial mount namespace. + :param src: Only include mounts with this source device name. + :param dst: Only include mounts with this destination path. + :param fstype: Only include mounts with this filesystem type. :return: Iterator of ``struct mount *`` objects. """ if isinstance(prog_or_ns, Program): @@ -283,10 +301,13 @@ def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): yield mnt -def print_mounts(prog_or_ns, src=None, dst=None, fstype=None): +def print_mounts( + prog_or_ns: Union[Program, Object], + src: Optional[Path] = None, + dst: Optional[Path] = None, + fstype: Optional[Union[str, bytes]] = None, +) -> None: """ - .. c:function:: print_mounts(struct mnt_namespace *ns, char *src, char *dst, char *fstype) - Print the mount table of a given namespace. The arguments are the same as :func:`for_each_mount()`. The output format is similar to ``/proc/mounts`` but prints the value of each ``struct mount *``. @@ -300,26 +321,26 @@ def print_mounts(prog_or_ns, src=None, dst=None, fstype=None): ) -def fget(task, fd): +def fget(task: Object, fd: IntegerLike) -> Object: """ - .. c:function:: struct file *fget(struct task_struct *task, int fd) - Return the kernel file descriptor of the fd of a given task. + + :param task: ``struct task_struct *`` + :param fd: File descriptor. + :return: ``struct file *`` """ return task.files.fdt.fd[fd] -def for_each_file(task): +def for_each_file(task: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: for_each_file(struct task_struct *task) - Iterate over all of the files open in a given task. + :param task: ``struct task_struct *`` :return: Iterator of (fd, ``struct file *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ fdt = task.files.fdt.read_() - bits_per_long = 8 * fdt.open_fds.type_.type.size + bits_per_long = 8 * sizeof(fdt.open_fds.type_.type) for i in range((fdt.max_fds.value_() + bits_per_long - 1) // bits_per_long): word = fdt.open_fds[i].value_() for j in range(bits_per_long): @@ -329,15 +350,13 @@ def for_each_file(task): yield fd, file -def print_files(task): +def print_files(task: Object) -> None: """ - .. c:function:: print_files(struct task_struct *task) - Print the open files of a given task. + + :param task: ``struct task_struct *`` """ for fd, file in for_each_file(task): path = d_path(file.f_path) - if path is None: - path = file.f_inode.i_sb.s_type.name.string_() - path = escape_ascii_string(path, escape_backslash=True) - print(f"{fd} {path} ({file.type_.type_name()})0x{file.value_():x}") + escaped_path = escape_ascii_string(path, escape_backslash=True) + print(f"{fd} {escaped_path} ({file.type_.type_name()})0x{file.value_():x}") diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index c7d4b8757..909d48b2f 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -11,6 +11,9 @@ IDRs were not based on radix trees. """ +from typing import Iterator, Tuple + +from drgn import Object from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup from _drgn import _linux_helper_idr_find as idr_find @@ -21,14 +24,12 @@ ) -def idr_for_each(idr): +def idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: idr_for_each(struct idr *idr) - Iterate over all of the entries in an IDR. + :param idr: ``struct idr *`` :return: Iterator of (index, ``void *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ try: base = idr.idr_base.value_() diff --git a/drgn/helpers/linux/kconfig.py b/drgn/helpers/linux/kconfig.py index 5baf58b27..e26b3a45e 100644 --- a/drgn/helpers/linux/kconfig.py +++ b/drgn/helpers/linux/kconfig.py @@ -13,10 +13,12 @@ import types from typing import Mapping +from drgn import Program + __all__ = ("get_kconfig",) -def get_kconfig(prog) -> Mapping[str, str]: +def get_kconfig(prog: Program) -> Mapping[str, str]: """ Get the kernel build configuration as a mapping from the option name to the value. @@ -49,7 +51,7 @@ def get_kconfig(prog) -> Mapping[str, str]: ) # The data is delimited by the magic strings "IKCFG_ST" and "IKCFG_ED" # plus a NUL byte. - start = kernel_config_data.address_ + 8 + start = kernel_config_data.address_ + 8 # type: ignore[operator] size = len(kernel_config_data) - 17 data = prog.read(start, size) diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index 33c68d4a1..103ef7ca5 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -9,38 +9,36 @@ kernfs pseudo filesystem interface in :linux:`include/linux/kernfs.h`. """ +from drgn import Object + __all__ = ( "kernfs_name", "kernfs_path", ) -def kernfs_name(kn): +def kernfs_name(kn: Object) -> bytes: """ - .. c:function:: char *kernfs_name(struct kernfs_node *kn) - Get the name of the given kernfs node. - :rtype: bytes + :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" return kn.name.string_() if kn.parent else b"/" -def _kernfs_root(kn): +def _kernfs_root(kn: Object) -> Object: if kn.parent: kn = kn.parent return kn.dir.root -def kernfs_path(kn): +def kernfs_path(kn: Object) -> bytes: """ - .. c:function:: char *kernfs_path(struct kernfs_node *kn) - Get full path of the given kernfs node. - :rtype: bytes + :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index d29e06f66..7ae9ee1d9 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -10,7 +10,9 @@ hlist_head``) in :linux:`include/linux/list.h`. """ -from drgn import NULL, container_of +from typing import Iterator, Union + +from drgn import NULL, Object, Type, container_of __all__ = ( @@ -31,47 +33,55 @@ ) -def list_empty(head): +def list_empty(head: Object) -> bool: """ - .. c:function:: bool list_empty(struct list_head *head) - Return whether a list is empty. + + :param head: ``struct list_head *`` """ head = head.read_() return head.next == head -def list_is_singular(head): +def list_is_singular(head: Object) -> bool: """ - .. c:function:: bool list_is_singular(struct list_head *head) - Return whether a list has only one element. + + :param head: ``struct list_head *`` """ head = head.read_() next = head.next return next != head and next == head.prev -def list_first_entry(head, type, member): +def list_first_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ - .. c:function:: type *list_first_entry(struct list_head *head, type, member) - Return the first entry in a list. The list is assumed to be non-empty. See also :func:`list_first_entry_or_null()`. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(head.next, type, member) -def list_first_entry_or_null(head, type, member): +def list_first_entry_or_null( + head: Object, type: Union[str, Type], member: str +) -> Object: """ - .. c:function:: type *list_first_entry_or_null(struct list_head *head, type, member) - Return the first entry in a list or ``NULL`` if the list is empty. See also :func:`list_first_entry()`. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ head = head.read_() pos = head.next.read_() @@ -81,41 +91,47 @@ def list_first_entry_or_null(head, type, member): return container_of(pos, type, member) -def list_last_entry(head, type, member): +def list_last_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ - .. c:function:: type *list_last_entry(struct list_head *head, type, member) - Return the last entry in a list. The list is assumed to be non-empty. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(head.prev, type, member) -def list_next_entry(pos, member): +def list_next_entry(pos: Object, member: str) -> Object: """ - .. c:function:: type *list_next_entry(type *pos, member) - Return the next entry in a list. + + :param pos: ``type*`` + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(getattr(pos, member).next, pos.type_.type, member) -def list_prev_entry(pos, member): +def list_prev_entry(pos: Object, member: str) -> Object: """ - .. c:function:: type *list_prev_entry(type *pos, member) - Return the previous entry in a list. + + :param pos: ``type*`` + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(getattr(pos, member).prev, pos.type_.type, member) -def list_for_each(head): +def list_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: list_for_each(struct list_head *head) - Iterate over all of the nodes in a list. + :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() @@ -125,12 +141,11 @@ def list_for_each(head): pos = pos.next.read_() -def list_for_each_reverse(head): +def list_for_each_reverse(head: Object) -> Iterator[Object]: """ - .. c:function:: list_for_each_reverse(struct list_head *head) - Iterate over all of the nodes in a list in reverse order. + :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() @@ -140,47 +155,48 @@ def list_for_each_reverse(head): pos = pos.prev.read_() -def list_for_each_entry(type, head, member): +def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: """ - .. c:function:: list_for_each_entry(type, struct list_head *head, member) - - Iterate over all of the entries in a list, given the type of the entry and - the ``struct list_head`` member in that type. + Iterate over all of the entries in a list. + :param type: Entry type. + :param head: ``struct list_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in list_for_each(head): yield container_of(pos, type, member) -def list_for_each_entry_reverse(type, head, member): +def list_for_each_entry_reverse( + type: str, head: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: list_for_each_entry_reverse(type, struct list_head *head, member) - - Iterate over all of the entries in a list in reverse order, given the type - of the entry and the ``struct list_head`` member in that type. + Iterate over all of the entries in a list in reverse order. + :param type: Entry type. + :param head: ``struct list_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in list_for_each_reverse(head): yield container_of(pos, type, member) -def hlist_empty(head): +def hlist_empty(head: Object) -> bool: """ - .. c:function:: bool hlist_empty(struct hlist_head *head) - Return whether a hash list is empty. + + :param head: ``struct hlist_head *`` """ return not head.first -def hlist_for_each(head): +def hlist_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: hlist_for_each(struct hlist_head *head) - Iterate over all of the nodes in a hash list. + :param head: ``struct hlist_head *`` :return: Iterator of ``struct hlist_node *`` objects. """ pos = head.first.read_() @@ -189,13 +205,13 @@ def hlist_for_each(head): pos = pos.next.read_() -def hlist_for_each_entry(type, head, member): +def hlist_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: """ - .. c:function:: hlist_for_each_entry(type, struct hlist_head *head, member) - - Iterate over all of the entries in a has list, given the type of the entry - and the ``struct hlist_node`` member in that type. + Iterate over all of the entries in a hash list. + :param type: Entry type. + :param head: ``struct hlist_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in hlist_for_each(head): diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index 51a490762..f40b6c247 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -11,57 +11,48 @@ list is not a ``NULL`` pointer, but a "nulls" marker. """ -from drgn import container_of +from typing import Iterator + +from drgn import Object, container_of __all__ = ( "hlist_nulls_empty", - "hlist_nulls_entry", "hlist_nulls_for_each_entry", "is_a_nulls", ) -def is_a_nulls(pos): +def is_a_nulls(pos: Object) -> bool: """ - .. c:function:: bool is_a_nulls(struct hlist_nulls_node *pos) - Return whether a a pointer is a nulls marker. + + :param pos: ``struct hlist_nulls_node *`` """ return bool(pos.value_() & 1) -def hlist_nulls_empty(head): +def hlist_nulls_empty(head: Object) -> bool: """ - .. c:function:: bool hlist_nulls_empty(struct hlist_nulls_head *head) - Return whether a nulls hash list is empty. - """ - return is_a_nulls(head.first) - -def hlist_nulls_entry(pos, type, member): - """ - .. c:function:: type *hlist_nulls_entry(struct hlist_nulls_node *pos, type, member) - - Return an entry in a nulls hash list. - - The nulls hash list is assumed to be non-empty. + :param head: ``struct hlist_nulls_head *`` """ - return container_of(pos, type, member) + return is_a_nulls(head.first) -def hlist_nulls_for_each_entry(type, head, member): +def hlist_nulls_for_each_entry( + type: str, head: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: hlist_nulls_for_each_entry(type, struct hlist_nulls_head *head, member) - - Iterate over all the entries in a nulls hash list specified by ``struct - hlist_nulls_head`` head, given the type of the entry and the ``struct - hlist_nulls_node`` member in that type. + Iterate over all the entries in a nulls hash list. + :param type: Entry type. + :param head: ``struct hlist_nulls_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ pos = head.first while not is_a_nulls(pos): - yield hlist_nulls_entry(pos, type, member) + yield container_of(pos, type, member) pos = pos.next diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 684b2444f..59ba7fe3e 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -10,10 +10,11 @@ implemented. """ -from typing import List +import operator +from typing import Any, Iterator, List, Optional, Union, overload from _drgn import _linux_helper_read_vm -from drgn import Object, cast +from drgn import IntegerLike, Object, Program, cast __all__ = ( @@ -31,110 +32,193 @@ ) -def for_each_page(prog): +def for_each_page(prog: Program) -> Iterator[Object]: """ Iterate over all pages in the system. :return: Iterator of ``struct page *`` objects. """ vmemmap = prog["vmemmap"] - for i in range(prog["max_pfn"]): + for i in range(prog["max_pfn"].value_()): yield vmemmap + i -def page_to_pfn(page): +def page_to_pfn(page: Object) -> Object: """ - .. c:function:: unsigned long page_to_pfn(struct page *page) - Get the page frame number (PFN) of a page. + + :param page: ``struct page *`` + :return: ``unsigned long`` """ return cast("unsigned long", page - page.prog_["vmemmap"]) -def pfn_to_page(prog_or_pfn, pfn=None): +@overload +def pfn_to_page(pfn: Object) -> Object: + """ + Get the page with a page frame number (PFN) given as an :class:`.Object`. + + :param pfn: ``unsigned long`` + :return: ``struct page *`` + """ + ... + + +@overload +def pfn_to_page(prog: Program, pfn: IntegerLike) -> Object: """ - .. c:function:: struct page *pfn_to_page(unsigned long pfn) + Get the page with a page frame number (PFN) given as a :class:`.Program` + and an integer. - Get the page with the given page frame number (PFN). This can take the PFN - as an :class:`Object`, or a :class:`Program` and the PFN as an ``int``. + :param pfn: Page frame number. + :return: ``struct page *`` """ + ... + + +def pfn_to_page( # type: ignore # Need positional-only arguments. + prog_or_pfn: Union[Program, Object], pfn: Optional[IntegerLike] = None +) -> Object: if pfn is None: + assert isinstance(prog_or_pfn, Object) prog = prog_or_pfn.prog_ pfn = prog_or_pfn else: + assert isinstance(prog_or_pfn, Program) prog = prog_or_pfn return prog["vmemmap"] + pfn -def virt_to_pfn(prog_or_addr, addr=None): +@overload +def virt_to_pfn(addr: Object) -> Object: """ - .. c:function:: unsigned long virt_to_pfn(void *addr) + Get the page frame number (PFN) of a directly mapped virtual address given + as an :class:`.Object`. - Get the page frame number (PFN) of a directly mapped virtual address. This - can take the address as an :class:`Object`, or a :class:`Program` and the - address as an ``int``. + :param addr: ``void *`` + :return: ``unsigned long`` """ + ... + + +@overload +def virt_to_pfn(prog: Program, addr: IntegerLike) -> Object: + """ + Get the page frame number (PFN) of a directly mapped virtual address given + as a :class:`.Program` and an integer. + + :param addr: Virtual address. + :return: ``unsigned long`` + """ + ... + + +def virt_to_pfn( # type: ignore # Need positional-only arguments. + prog_or_addr: Union[Program, Object], addr: Optional[IntegerLike] = None +) -> Object: if addr is None: + assert isinstance(prog_or_addr, Object) prog = prog_or_addr.prog_ - addr = prog_or_addr.value_() + addr = prog_or_addr else: + assert isinstance(prog_or_addr, Program) prog = prog_or_addr - return Object(prog, "unsigned long", value=(addr - prog["PAGE_OFFSET"]) >> 12) + return cast("unsigned long", (operator.index(addr) - prog["PAGE_OFFSET"]) >> 12) + +@overload +def pfn_to_virt(pfn: Object) -> Object: + """ + Get the directly mapped virtual address of a page frame number (PFN) given + as an :class:`.Object`. -def pfn_to_virt(prog_or_pfn, pfn=None): + :param pfn: ``unsigned long`` + :return: ``void *`` """ - .. c:function:: void *pfn_to_virt(unsigned long pfn) + ... + - Get the directly mapped virtual address of the given page frame number - (PFN). This can take the PFN as an :class:`Object`, or a :class:`Program` - and the PFN as an ``int``. +@overload +def pfn_to_virt(prog: Program, pfn: IntegerLike) -> Object: """ + Get the directly mapped virtual address of a page frame number (PFN) given + as a :class:`.Program` and an integer. + + :param pfn: Page frame number. + :return: ``void *`` + """ + + +def pfn_to_virt( # type: ignore # Need positional-only arguments. + prog_or_pfn: Union[Program, Object], pfn: Optional[IntegerLike] = None +) -> Object: if pfn is None: + assert isinstance(prog_or_pfn, Object) prog = prog_or_pfn.prog_ - pfn = prog_or_pfn.value_() + pfn = prog_or_pfn else: + assert isinstance(prog_or_pfn, Program) prog = prog_or_pfn - return Object(prog, "void *", value=(pfn << 12) + prog["PAGE_OFFSET"]) + return cast("void *", (operator.index(pfn) << 12) + prog["PAGE_OFFSET"]) -def page_to_virt(page): +def page_to_virt(page: Object) -> Object: """ - .. c:function:: void *page_to_virt(struct page *page) - Get the directly mapped virtual address of a page. + + :param page: ``struct page *`` + :return: ``void *`` """ return pfn_to_virt(page_to_pfn(page)) -def virt_to_page(prog_or_addr, addr=None): +@overload +def virt_to_page(addr: Object) -> Object: """ - .. c:function:: struct page *virt_to_page(void *addr) + Get the page containing a directly mapped virtual address given as an + :class:`.Object`. - Get the page containing a directly mapped virtual address. This can take - the address as an :class:`Object`, or a :class:`Program` and the address as - an ``int``. + :param addr: ``void *`` + :return: ``struct page *`` """ - return pfn_to_page(virt_to_pfn(prog_or_addr, addr)) + ... -def access_process_vm(task, address, size) -> bytes: +@overload +def virt_to_page(prog: Program, addr: IntegerLike) -> Object: + """ + Get the page containing a directly mapped virtual address given as a + :class:`.Program` and an integer. + + :param addr: Virtual address. + :return: ``struct page *`` """ - .. c:function:: char *access_process_vm(struct task_struct *task, void *address, size_t size) + ... + +def virt_to_page( # type: ignore # Need positional-only arguments. + prog_or_addr: Union[Program, Object], addr: Optional[IntegerLike] = None +) -> Object: + return pfn_to_page(virt_to_pfn(prog_or_addr, addr)) # type: ignore[arg-type] + + +def access_process_vm(task: Object, address: IntegerLike, size: IntegerLike) -> bytes: + """ Read memory from a task's virtual address space. >>> task = find_task(prog, 1490152) >>> access_process_vm(task, 0x7f8a62b56da0, 12) b'hello, world' + + :param task: ``struct task_struct *`` + :param address: Starting address. + :param size: Number of bytes to read. """ return _linux_helper_read_vm(task.prog_, task.mm.pgd, address, size) -def access_remote_vm(mm, address, size) -> bytes: +def access_remote_vm(mm: Object, address: IntegerLike, size: IntegerLike) -> bytes: """ - .. c:function:: char *access_remote_vm(struct mm_struct *mm, void *address, size_t size) - Read memory from a virtual address space. This is similar to :func:`access_process_vm()`, but it takes a ``struct mm_struct *`` instead of a ``struct task_struct *``. @@ -142,11 +226,15 @@ def access_remote_vm(mm, address, size) -> bytes: >>> task = find_task(prog, 1490152) >>> access_remote_vm(task.mm, 0x7f8a62b56da0, 12) b'hello, world' + + :param mm: ``struct mm_struct *`` + :param address: Starting address. + :param size: Number of bytes to read. """ return _linux_helper_read_vm(mm.prog_, mm.pgd, address, size) -def cmdline(task) -> List[bytes]: +def cmdline(task: Object) -> List[bytes]: """ Get the list of command line arguments of a task. @@ -157,6 +245,8 @@ def cmdline(task) -> List[bytes]: $ tr '\\0' ' ' < /proc/1495216/cmdline vim drgn/helpers/linux/mm.py + + :param task: ``struct task_struct *`` """ mm = task.mm.read_() arg_start = mm.arg_start.value_() @@ -164,7 +254,7 @@ def cmdline(task) -> List[bytes]: return access_remote_vm(mm, arg_start, arg_end - arg_start).split(b"\0")[:-1] -def environ(task) -> List[bytes]: +def environ(task: Object) -> List[bytes]: """ Get the list of environment variables of a task. @@ -177,6 +267,8 @@ def environ(task) -> List[bytes]: HOME=/root PATH=/usr/local/sbin:/usr/local/bin:/usr/bin LOGNAME=root + + :param task: ``struct task_struct *`` """ mm = task.mm.read_() env_start = mm.env_start.value_() diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 4a8e406e4..6f01ef7f9 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -9,6 +9,9 @@ Linux kernel networking subsystem. """ +from typing import Iterator + +from drgn import Object from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry from drgn.helpers.linux.tcp import sk_tcpstate @@ -19,26 +22,25 @@ ) -def sk_fullsock(sk): +def sk_fullsock(sk: Object) -> bool: """ - .. c:function:: bool sk_fullsock(struct sock *sk) - Check whether a socket is a full socket, i.e., not a time-wait or request socket. + + :param sk: ``struct sock *`` """ prog = sk.prog_ state = sk.__sk_common.skc_state.value_() return state != prog["TCP_SYN_RECV"] and state != prog["TCP_TIME_WAIT"] -def sk_nulls_for_each(head): +def sk_nulls_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: sk_nulls_for_each(struct hlist_nulls_head *head) - Iterate over all the entries in a nulls hash list of sockets specified by ``struct hlist_nulls_head`` head. - :return: Iterator of ``struct sock`` objects. + :param head: ``struct hlist_nulls_head *`` + :return: Iterator of ``struct sock *`` objects. """ for sk in hlist_nulls_for_each_entry( "struct sock", head, "__sk_common.skc_nulls_node" diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index 1b97d7b3e..60b566e2b 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -10,7 +10,7 @@ from :linux:`include/linux/percpu_counter.h`. """ -from drgn import Object +from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu @@ -20,21 +20,23 @@ ) -def per_cpu_ptr(ptr, cpu): +def per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: """ - .. c:function:: type *per_cpu_ptr(type __percpu *ptr, int cpu) - Return the per-CPU pointer for a given CPU. + + :param ptr: ``type __percpu *`` + :param cpu: CPU number. + :return: ``type *`` """ offset = ptr.prog_["__per_cpu_offset"][cpu].value_() return Object(ptr.prog_, ptr.type_, value=ptr.value_() + offset) -def percpu_counter_sum(fbc): +def percpu_counter_sum(fbc: Object) -> int: """ - .. c:function:: s64 percpu_counter_sum(struct percpu_counter *fbc) - Return the sum of a per-CPU counter. + + :param fbc: ``struct percpu_counter *`` """ ret = fbc.count.value_() ptr = fbc.counters diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 059912301..2b33870b0 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -9,7 +9,9 @@ IDs and processes. """ -from drgn import NULL, Program, cast, container_of +from typing import Iterator, Union + +from drgn import NULL, Object, Program, cast, container_of from drgn.helpers.linux.idr import idr_find, idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry from _drgn import ( @@ -27,13 +29,12 @@ ) -def for_each_pid(prog_or_ns): +def for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: """ - .. c:function:: for_each_pid(struct pid_namespace *ns) - - Iterate over all of the PIDs in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. + Iterate over all PIDs in a namespace. + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. :return: Iterator of ``struct pid *`` objects. """ if isinstance(prog_or_ns, Program): @@ -55,13 +56,12 @@ def for_each_pid(prog_or_ns): yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") -def for_each_task(prog_or_ns): +def for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: """ - .. c:function:: for_each_task(struct pid_namespace *ns) - - Iterate over all of the tasks visible in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. + Iterate over all of the tasks visible in a namespace. + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. :return: Iterator of ``struct task_struct *`` objects. """ if isinstance(prog_or_ns, Program): diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index 9e1433c65..7ac0da318 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -9,6 +9,8 @@ radix trees from :linux:`include/linux/radix-tree.h`. """ +from typing import Iterator, Tuple + from drgn import Object, cast from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup @@ -21,15 +23,15 @@ _RADIX_TREE_ENTRY_MASK = 3 -def _is_internal_node(node, internal_node): +def _is_internal_node(node: Object, internal_node: int) -> bool: return (node.value_() & _RADIX_TREE_ENTRY_MASK) == internal_node -def _entry_to_node(node, internal_node): +def _entry_to_node(node: Object, internal_node: int) -> Object: return Object(node.prog_, node.type_, value=node.value_() & ~internal_node) -def _radix_tree_root_node(root): +def _radix_tree_root_node(root: Object) -> Tuple[Object, int]: try: node = root.xa_head except AttributeError: @@ -38,18 +40,16 @@ def _radix_tree_root_node(root): return cast("struct xa_node *", node).read_(), 2 -def radix_tree_for_each(root): +def radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: radix_tree_for_each(struct radix_tree_root *root) - Iterate over all of the entries in a radix tree. + :param root: ``struct radix_tree_root *`` :return: Iterator of (index, ``void *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) - def aux(node, index): + def aux(node: Object, index: int) -> Iterator[Tuple[int, Object]]: if _is_internal_node(node, RADIX_TREE_INTERNAL_NODE): parent = _entry_to_node(node, RADIX_TREE_INTERNAL_NODE) for i, slot in enumerate(parent.slots): diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index ee33e468b..88608b40c 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -9,6 +9,8 @@ red-black trees from :linux:`include/linux/rbtree.h`. """ +from typing import Callable, Iterator, TypeVar + from drgn import Object, NULL, container_of @@ -25,31 +27,33 @@ ) -def RB_EMPTY_NODE(node): +def RB_EMPTY_NODE(node: Object) -> bool: """ - .. c:function:: bool RB_EMPTY_NODE(struct rb_node *node) - Return whether a red-black tree node is empty, i.e., not inserted in a tree. + + :param node: ``struct rb_node *`` """ return node.__rb_parent_color.value_() == node.value_() -def rb_parent(node): +def rb_parent(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_parent(struct rb_node *node) - Return the parent node of a red-black tree node. + + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ return Object(node.prog_, node.type_, value=node.__rb_parent_color.value_() & ~3) -def rb_first(root): +def rb_first(root: Object) -> Object: """ - .. c:function:: struct rb_node *rb_first(struct rb_root *root) + Return the first node (in sort order) in a red-black tree, or ``NULL`` if + the tree is empty. - Return the first node (in sort order) in a red-black tree, or a ``NULL`` - object if the tree is empty. + :param root: ``struct rb_root *`` + :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: @@ -61,12 +65,13 @@ def rb_first(root): node = next -def rb_last(root): +def rb_last(root: Object) -> Object: """ - .. c:function:: struct rb_node *rb_last(struct rb_root *root) + Return the last node (in sort order) in a red-black tree, or ``NULL`` if + the tree is empty. - Return the last node (in sort order) in a red-black tree, or a ``NULL`` - object if the tree is empty. + :param root: ``struct rb_root *`` + :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: @@ -78,12 +83,13 @@ def rb_last(root): node = next -def rb_next(node): +def rb_next(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_next(struct rb_node *node) + Return the next node (in sort order) after a red-black node, or ``NULL`` if + the node is the last node in the tree or is empty. - Return the next node (in sort order) after a red-black node, or a ``NULL`` - object if the node is the last node in the tree or is empty. + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ node = node.read_() @@ -106,12 +112,13 @@ def rb_next(node): return parent -def rb_prev(node): +def rb_prev(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_prev(struct rb_node *node) + Return the previous node (in sort order) before a red-black node, or + ``NULL`` if the node is the first node in the tree or is empty. - Return the previous node (in sort order) before a red-black node, or a - ``NULL`` object if the node is the first node in the tree or is empty. + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ node = node.read_() @@ -134,16 +141,15 @@ def rb_prev(node): return parent -def rbtree_inorder_for_each(root): +def rbtree_inorder_for_each(root: Object) -> Iterator[Object]: """ - .. c:function:: rbtree_inorder_for_each(struct rb_root *root) - Iterate over all of the nodes in a red-black tree, in sort order. + :param root: ``struct rb_root *`` :return: Iterator of ``struct rb_node *`` objects. """ - def aux(node): + def aux(node: Object) -> Iterator[Object]: if node: yield from aux(node.rb_left.read_()) yield node @@ -152,32 +158,45 @@ def aux(node): yield from aux(root.rb_node.read_()) -def rbtree_inorder_for_each_entry(type, root, member): +def rbtree_inorder_for_each_entry( + type: str, root: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: rbtree_inorder_for_each_entry(type, struct rb_root *root, member) - - Iterate over all of the entries in a red-black tree, given the type of the - entry and the ``struct rb_node`` member in that type. The entries are - returned in sort order. + Iterate over all of the entries in a red-black tree in sorted order. + :param type: Entry type. + :param root: ``struct rb_root *`` + :param member: Name of red-black node member in entry type. :return: Iterator of ``type *`` objects. """ for node in rbtree_inorder_for_each(root): yield container_of(node, type, member) -def rb_find(type, root, member, key, cmp): - """ - .. c:function:: type *rb_find(type, struct rb_root *root, member, key_type key, int (*cmp)(key_type, type *)) +KeyType = TypeVar("KeyType") - Find an entry in a red-black tree, given a key and a comparator function - which takes the key and an entry. The comparator should return < 0 if the - key is less than the entry, > 0 if it is greater than the entry, or 0 if it - matches the entry. This returns a ``NULL`` object if no entry matches the - key. + +def rb_find( + type: str, + root: Object, + member: str, + key: KeyType, + cmp: Callable[[KeyType, Object], int], +) -> Object: + """ + Find an entry in a red-black tree given a key and a comparator function. Note that this function does not have an analogue in the Linux kernel source code, as tree searches are all open-coded. + + :param type: Entry type. + :param root: ``struct rb_root *`` + :param member: Name of red-black node member in entry type. + :param key: Key to find. + :param cmp: Callback taking key and entry that returns < 0 if the key is + less than the entry, > 0 if the key is greater than the entry, and 0 if + the key matches the entry. + :return: ``type *`` found entry, or ``NULL`` if not found. """ node = root.rb_node.read_() while node: @@ -189,4 +208,4 @@ def rb_find(type, root, member, key, cmp): node = node.rb_right.read_() else: return entry - return node + return NULL(root.prog_, type) diff --git a/drgn/helpers/linux/tcp.py b/drgn/helpers/linux/tcp.py index 63c32ced2..72c5387ff 100644 --- a/drgn/helpers/linux/tcp.py +++ b/drgn/helpers/linux/tcp.py @@ -9,15 +9,16 @@ protocol in the Linux kernel. """ -from drgn import cast +from drgn import Object, cast __all__ = ("sk_tcpstate",) -def sk_tcpstate(sk): +def sk_tcpstate(sk: Object) -> Object: """ - .. c:function:: enum TcpState sk_tcpstate(struct sock *sk) - Return the TCP protocol state of a socket. + + :param sk: ``struct sock *`` + :return: TCP state enum value. """ return cast(sk.prog_["TCP_ESTABLISHED"].type_, sk.__sk_common.skc_state) diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index d6edfc57e..1f4d797af 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -10,8 +10,9 @@ """ import operator +from typing import Iterator, Union -from drgn import NULL, Object +from drgn import IntegerLike, NULL, Object, Program from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( @@ -20,18 +21,18 @@ ) -def _kuid_val(uid): +def _kuid_val(uid: Union[Object, IntegerLike]) -> int: if isinstance(uid, Object) and uid.type_.type_name() == "kuid_t": uid = uid.val return operator.index(uid) -def find_user(prog, uid): +def find_user(prog: Program, uid: Union[Object, IntegerLike]) -> Object: """ - .. c:function:: struct user_struct *find_user(kuid_t uid) + Return the user structure with the given UID. - Return the user structure with the given UID, which may be a ``kuid_t`` or - an integer. + :param uid: ``kuid_t`` object or integer. + :return: ``struct user_state *`` """ try: uidhashentry = prog.cache["uidhashentry"] @@ -41,7 +42,7 @@ def find_user(prog, uid): uidhash_bits = uidhash_sz.bit_length() - 1 uidhash_mask = uidhash_sz - 1 - def uidhashentry(uid): + def uidhashentry(uid: int) -> Object: hash = ((uid >> uidhash_bits) + uid) & uidhash_mask return uidhash_table + hash @@ -56,7 +57,7 @@ def uidhashentry(uid): return NULL(prog, "struct user_struct *") -def for_each_user(prog): +def for_each_user(prog: Program) -> Iterator[Object]: """ Iterate over all users in the system. diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 433e78310..ed244e9e3 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -141,7 +141,7 @@ static int prog_or_pid_ns_converter(PyObject *o, void *p) DrgnObject *drgnpy_linux_helper_find_pid(PyObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"ns", "pid", NULL}; + static char *keywords[] = {"prog_or_ns", "pid", NULL}; struct drgn_error *err; struct prog_or_ns_arg prog_or_ns; struct index_arg pid = {}; From 8c7c80e2f7b400789f7b4fa3c00f0f35272d2286 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 12:10:23 -0700 Subject: [PATCH 13/56] Fix mypy --strict warnings The remaining warnings are all no-any-return, which is hard to avoid in drgn. Signed-off-by: Omar Sandoval --- _drgn.pyi | 2 +- drgn/__init__.py | 9 +++++---- drgn/internal/cli.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index a0ea27bc0..d95ee6bde 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -433,7 +433,7 @@ class Program: This is equivalent to ``load_debug_info(None, True)``. """ ... - cache: dict + cache: Dict[Any, Any] """ Dictionary for caching program metadata. diff --git a/drgn/__init__.py b/drgn/__init__.py index 8b50da892..975c1f166 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -71,7 +71,7 @@ TypeKind, TypeMember, TypeParameter, - _with_libkdumpfile, + _with_libkdumpfile as _with_libkdumpfile, array_type, bool_type, cast, @@ -149,11 +149,12 @@ ) -try: +if sys.version_info >= (3, 8): _open_code = io.open_code -except AttributeError: +else: + from typing import BinaryIO - def _open_code(path): + def _open_code(path: str) -> BinaryIO: return open(path, "rb") diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index de6dd93c3..2ac79a07a 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -155,7 +155,7 @@ def main() -> None: if not isinstance(e, FileNotFoundError) and not args.quiet: print("could not read history:", str(e), file=sys.stderr) - def write_history_file(): + def write_history_file() -> None: try: readline.write_history_file(histfile) except OSError as e: From 4e770fb18a8b3d40b68680c6232df5c606e6df0b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 14:27:51 -0700 Subject: [PATCH 14/56] Format imports with isort Signed-off-by: Omar Sandoval --- CONTRIBUTING.rst | 8 ++++++-- docs/exts/drgndoc/docstrings.py | 1 - docs/exts/drgndoc/ext.py | 8 ++++---- docs/exts/drgndoc/namespace.py | 12 +----------- docs/exts/setuptools_config.py | 3 +-- drgn/__init__.py | 3 +-- drgn/helpers/linux/__init__.py | 1 - drgn/helpers/linux/boot.py | 1 - drgn/helpers/linux/bpf.py | 1 - drgn/helpers/linux/idr.py | 3 +-- drgn/helpers/linux/list.py | 1 - drgn/helpers/linux/list_nulls.py | 1 - drgn/helpers/linux/mm.py | 1 - drgn/helpers/linux/net.py | 1 - drgn/helpers/linux/percpu.py | 1 - drgn/helpers/linux/pid.py | 6 +++--- drgn/helpers/linux/radixtree.py | 3 +-- drgn/helpers/linux/rbtree.py | 3 +-- drgn/helpers/linux/sched.py | 1 - drgn/helpers/linux/user.py | 2 +- drgn/internal/cli.py | 3 ++- drgn/internal/rlcompleter.py | 3 +-- examples/linux/cgroup.py | 4 +--- examples/linux/fs_inodes.py | 4 ++-- examples/linux/lsmod.py | 1 - examples/linux/ps.py | 1 - examples/linux/tcp_sock.py | 1 - pyproject.toml | 5 +++++ scripts/generate_dwarf_constants.py | 1 - scripts/generate_primitive_type_spellings.py | 1 - setup.py | 10 ++++++---- tests/__init__.py | 1 - tests/dwarfwriter.py | 3 +-- tests/helpers/linux/test_block.py | 1 - tests/helpers/linux/test_cgroup.py | 1 - tests/helpers/linux/test_fs.py | 2 +- tests/helpers/linux/test_kconfig.py | 1 - tests/helpers/linux/test_pid.py | 7 +------ tests/helpers/linux/test_user.py | 1 - tests/helpers/linux/test_uts.py | 1 - tests/libdrgn.py | 3 +-- tests/test_dwarf.py | 3 +-- tests/test_language_c.py | 2 +- tests/test_lexer.py | 2 +- tests/test_python.py | 3 ++- tests/test_serialize.py | 1 - tests/test_type.py | 1 - tools/bpf_inspect.py | 7 +------ vmtest/manage.py | 4 ++-- vmtest/resolver.py | 1 - vmtest/vm.py | 1 - 51 files changed, 49 insertions(+), 92 deletions(-) create mode 100644 pyproject.toml diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 82fc64dc4..ba9eb43ed 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -71,8 +71,12 @@ drgn assumes some `implementation-defined behavior Python ^^^^^^ -Python code in drgn is formatted with `black `_. -Code should be compatible with Python 3.6 and newer. +Python code in drgn should be compatible with Python 3.6 and newer. + +Python code should be formatted with `black `_ +and `isort `_:: + + $ isort . && black . Type hints should be provided for all interfaces (including helpers and the C extension). diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index 045fc0702..be92ddc24 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -12,7 +12,6 @@ from drgndoc.parse import Class, DocumentedNode, Node, parse_paths from drgndoc.util import dot_join - escapes = [] for c in range(256): if c == 0: diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 331af94d7..d367bee87 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -40,18 +40,19 @@ file for the C extension itself (drgndoc.docstrings). """ +import os.path +import re +from typing import Any, Dict, List, cast + import docutils.nodes import docutils.parsers.rst.directives import docutils.statemachine -import os.path -import re import sphinx.addnodes import sphinx.application import sphinx.environment import sphinx.util.docutils import sphinx.util.logging import sphinx.util.nodes -from typing import Any, Dict, List, cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode @@ -66,7 +67,6 @@ ) from drgndoc.util import dot_join - logger = sphinx.util.logging.getLogger(__name__) diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index d11a44460..5bf92365d 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -2,16 +2,7 @@ # SPDX-License-Identifier: GPL-3.0+ import itertools -from typing import ( - Generic, - Iterator, - List, - Mapping, - Optional, - Sequence, - TypeVar, - Union, -) +from typing import Generic, Iterator, List, Mapping, Optional, Sequence, TypeVar, Union from drgndoc.parse import ( Class, @@ -25,7 +16,6 @@ ) from drgndoc.util import dot_join - NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) diff --git a/docs/exts/setuptools_config.py b/docs/exts/setuptools_config.py index 0d45cc13c..f9acf072b 100644 --- a/docs/exts/setuptools_config.py +++ b/docs/exts/setuptools_config.py @@ -23,9 +23,8 @@ from __future__ import unicode_literals import os -import sys import subprocess - +import sys if "check_output" not in dir(subprocess): import subprocess32 as subprocess diff --git a/drgn/__init__.py b/drgn/__init__.py index 975c1f166..51c989d10 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -46,13 +46,13 @@ import types from _drgn import ( + NULL, Architecture, FaultError, FindObjectFlags, IntegerLike, Language, MissingDebugInfoError, - NULL, Object, OutOfBoundsError, Path, @@ -96,7 +96,6 @@ void_type, ) - __all__ = ( "Architecture", "FaultError", diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index cbdd6ee7a..701bb383d 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -33,7 +33,6 @@ import importlib import pkgutil - __all__ = [] for _module_info in pkgutil.iter_modules( __path__, # type: ignore[name-defined] # python/mypy#1422 diff --git a/drgn/helpers/linux/boot.py b/drgn/helpers/linux/boot.py index b43984059..427298627 100644 --- a/drgn/helpers/linux/boot.py +++ b/drgn/helpers/linux/boot.py @@ -14,7 +14,6 @@ _linux_helper_pgtable_l5_enabled as pgtable_l5_enabled, ) - __all__ = ( "kaslr_offset", "pgtable_l5_enabled", diff --git a/drgn/helpers/linux/bpf.py b/drgn/helpers/linux/bpf.py index 0c0c347e6..af7d083d6 100644 --- a/drgn/helpers/linux/bpf.py +++ b/drgn/helpers/linux/bpf.py @@ -18,7 +18,6 @@ from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import list_for_each_entry - __all__ = ( "bpf_map_for_each", "bpf_prog_for_each", diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index 909d48b2f..d5fa25d6b 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -13,10 +13,9 @@ from typing import Iterator, Tuple +from _drgn import _linux_helper_idr_find as idr_find from drgn import Object from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup -from _drgn import _linux_helper_idr_find as idr_find - __all__ = ( "idr_find", diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index 7ae9ee1d9..58aa901ff 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -14,7 +14,6 @@ from drgn import NULL, Object, Type, container_of - __all__ = ( "hlist_empty", "hlist_for_each", diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index f40b6c247..a5161e3b8 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -15,7 +15,6 @@ from drgn import Object, container_of - __all__ = ( "hlist_nulls_empty", "hlist_nulls_for_each_entry", diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 59ba7fe3e..50f9c124e 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -16,7 +16,6 @@ from _drgn import _linux_helper_read_vm from drgn import IntegerLike, Object, Program, cast - __all__ = ( "access_process_vm", "access_remote_vm", diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 6f01ef7f9..7f1e15a79 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -15,7 +15,6 @@ from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry from drgn.helpers.linux.tcp import sk_tcpstate - __all__ = ( "sk_fullsock", "sk_nulls_for_each", diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index 60b566e2b..402ce801a 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -13,7 +13,6 @@ from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu - __all__ = ( "per_cpu_ptr", "percpu_counter_sum", diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 2b33870b0..fe816c492 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -11,14 +11,14 @@ from typing import Iterator, Union -from drgn import NULL, Object, Program, cast, container_of -from drgn.helpers.linux.idr import idr_find, idr_for_each -from drgn.helpers.linux.list import hlist_for_each_entry from _drgn import ( _linux_helper_find_pid as find_pid, _linux_helper_find_task as find_task, _linux_helper_pid_task as pid_task, ) +from drgn import NULL, Object, Program, cast, container_of +from drgn.helpers.linux.idr import idr_find, idr_for_each +from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_pid", diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index 7ac0da318..b0aa67e97 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -11,9 +11,8 @@ from typing import Iterator, Tuple -from drgn import Object, cast from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup - +from drgn import Object, cast __all__ = ( "radix_tree_for_each", diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index 88608b40c..2bbd9a980 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -11,8 +11,7 @@ from typing import Callable, Iterator, TypeVar -from drgn import Object, NULL, container_of - +from drgn import NULL, Object, container_of __all__ = ( "RB_EMPTY_NODE", diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index 88a9cf348..a48063991 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -11,5 +11,4 @@ from _drgn import _linux_helper_task_state_to_char as task_state_to_char - __all__ = ("task_state_to_char",) diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index 1f4d797af..d64d09974 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -12,7 +12,7 @@ import operator from typing import Iterator, Union -from drgn import IntegerLike, NULL, Object, Program +from drgn import NULL, IntegerLike, Object, Program from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 2ac79a07a..05dc3e6aa 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -9,12 +9,13 @@ import importlib import os import os.path -import pkg_resources import runpy import shutil import sys from typing import Any, Dict +import pkg_resources + import drgn diff --git a/drgn/internal/rlcompleter.py b/drgn/internal/rlcompleter.py index 0684276c2..f2321e856 100644 --- a/drgn/internal/rlcompleter.py +++ b/drgn/internal/rlcompleter.py @@ -4,12 +4,11 @@ """Improved readline completer""" import builtins -import re import keyword +import re import readline from typing import Any, Dict, List, Optional - _EXPR_RE = re.compile( r""" ( diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index 3d85bb1d1..ae17bf84c 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -3,11 +3,10 @@ """List the paths of all descendants of a cgroup v2""" +from contextlib import contextmanager import os import sys -from contextlib import contextmanager - from drgn import cast from drgn.helpers import enum_type_to_class from drgn.helpers.linux import ( @@ -18,7 +17,6 @@ find_task, ) - BpfAttachType = enum_type_to_class( prog.type("enum bpf_attach_type"), "BpfAttachType", diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 081c6854b..5e9d75cf4 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -3,12 +3,12 @@ """List the paths of all inodes cached in a given filesystem""" -from drgn.helpers.linux.fs import for_each_mount, inode_path -from drgn.helpers.linux.list import list_for_each_entry import os import sys import time +from drgn.helpers.linux.fs import for_each_mount, inode_path +from drgn.helpers.linux.list import list_for_each_entry if len(sys.argv) == 1: path = "/" diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index b7fef922a..b6e8ddf0c 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -5,7 +5,6 @@ from drgn.helpers.linux.list import list_for_each_entry - print("Module Size Used by") for mod in list_for_each_entry("struct module", prog["modules"].address_of_(), "list"): name = mod.name.string_().decode() diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 45adfc7c8..5346db05c 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -5,7 +5,6 @@ from drgn.helpers.linux.pid import for_each_task - print("PID COMM") for task in for_each_task(prog): pid = task.pid.value_() diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index 438146908..68d8e84c0 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -19,7 +19,6 @@ sock_cgroup_ptr, ) - TcpState = enum_type_to_class( prog["TCP_ESTABLISHED"].type_, "TcpState", diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..5e9b80cdc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[tool.isort] +profile = "black" +known_first_party = ["drgn", "_drgn", "drgndoc", "vmtest"] +combine_as_imports = true +force_sort_within_sections = true diff --git a/scripts/generate_dwarf_constants.py b/scripts/generate_dwarf_constants.py index ae62a6300..f6a510b25 100755 --- a/scripts/generate_dwarf_constants.py +++ b/scripts/generate_dwarf_constants.py @@ -5,7 +5,6 @@ import keyword import re - prefixes = [ "DW_AT", "DW_ATE", diff --git a/scripts/generate_primitive_type_spellings.py b/scripts/generate_primitive_type_spellings.py index 03b943dad..7708efea5 100755 --- a/scripts/generate_primitive_type_spellings.py +++ b/scripts/generate_primitive_type_spellings.py @@ -5,7 +5,6 @@ import itertools import sys - SPELLINGS = [ ("DRGN_C_TYPE_VOID", ["void"]), ("DRGN_C_TYPE_CHAR", ["char"]), diff --git a/setup.py b/setup.py index f3c9ee6cf..f23283cab 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-3.0+ # setuptools must be imported before distutils (see pypa/setuptools#2230). -from setuptools import setup, find_packages, Command +import setuptools # isort: skip import contextlib from distutils import log @@ -14,13 +14,15 @@ import os import os.path import re +import shlex +import subprocess +import sys + import pkg_resources +from setuptools import Command, find_packages, setup from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.egg_info import egg_info as _egg_info from setuptools.extension import Extension -import shlex -import subprocess -import sys from util import nproc, out_of_date diff --git a/tests/__init__.py b/tests/__init__.py index 96d7366ca..cb4f36510 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -26,7 +26,6 @@ union_type, ) - DEFAULT_LANGUAGE = Language.C diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 268a7ba44..8600f69dc 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -4,10 +4,9 @@ from collections import namedtuple import os.path +from tests.dwarf import DW_AT, DW_FORM, DW_TAG from tests.elf import ET, PT, SHT from tests.elfwriter import ElfSection, create_elf_file -from tests.dwarf import DW_AT, DW_FORM, DW_TAG - DwarfAttrib = namedtuple("DwarfAttrib", ["name", "form", "value"]) DwarfDie = namedtuple("DwarfAttrib", ["tag", "attribs", "children"]) diff --git a/tests/helpers/linux/test_block.py b/tests/helpers/linux/test_block.py index b77b77b5e..c0b374eaa 100644 --- a/tests/helpers/linux/test_block.py +++ b/tests/helpers/linux/test_block.py @@ -19,7 +19,6 @@ from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from tests.helpers.linux import LinuxHelperTestCase - LOOP_SET_FD = 0x4C00 LOOP_SET_STATUS64 = 0x4C04 LOOP_GET_STATUS64 = 0x4C05 diff --git a/tests/helpers/linux/test_cgroup.py b/tests/helpers/linux/test_cgroup.py index 686657af1..43cd4baa9 100644 --- a/tests/helpers/linux/test_cgroup.py +++ b/tests/helpers/linux/test_cgroup.py @@ -3,7 +3,6 @@ import os - from drgn.helpers.linux.cgroup import ( cgroup_name, cgroup_path, diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index dbcccb394..c1bc6a813 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -17,7 +17,7 @@ path_lookup, ) from drgn.helpers.linux.pid import find_task -from tests.helpers.linux import LinuxHelperTestCase, MS_BIND, mount, umount +from tests.helpers.linux import MS_BIND, LinuxHelperTestCase, mount, umount class TestFs(LinuxHelperTestCase): diff --git a/tests/helpers/linux/test_kconfig.py b/tests/helpers/linux/test_kconfig.py index 83d0d80ac..8993c9aaa 100644 --- a/tests/helpers/linux/test_kconfig.py +++ b/tests/helpers/linux/test_kconfig.py @@ -4,7 +4,6 @@ import os.path from drgn.helpers.linux.kconfig import get_kconfig - from tests.helpers.linux import LinuxHelperTestCase diff --git a/tests/helpers/linux/test_pid.py b/tests/helpers/linux/test_pid.py index 7fe870ed5..ae0c90fac 100644 --- a/tests/helpers/linux/test_pid.py +++ b/tests/helpers/linux/test_pid.py @@ -3,12 +3,7 @@ import os -from drgn.helpers.linux.pid import ( - find_pid, - find_task, - for_each_pid, - for_each_task, -) +from drgn.helpers.linux.pid import find_pid, find_task, for_each_pid, for_each_task from tests.helpers.linux import LinuxHelperTestCase diff --git a/tests/helpers/linux/test_user.py b/tests/helpers/linux/test_user.py index 47a9c0555..3ba1be632 100644 --- a/tests/helpers/linux/test_user.py +++ b/tests/helpers/linux/test_user.py @@ -6,7 +6,6 @@ import signal from drgn.helpers.linux.user import find_user, for_each_user - from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, diff --git a/tests/helpers/linux/test_uts.py b/tests/helpers/linux/test_uts.py index 033262ef0..6755b686b 100644 --- a/tests/helpers/linux/test_uts.py +++ b/tests/helpers/linux/test_uts.py @@ -4,7 +4,6 @@ import os import drgn - from tests.helpers.linux import LinuxHelperTestCase diff --git a/tests/libdrgn.py b/tests/libdrgn.py index 8138a945f..e5059bded 100644 --- a/tests/libdrgn.py +++ b/tests/libdrgn.py @@ -6,9 +6,8 @@ from enum import auto import os -import drgn import _drgn - +import drgn _drgn_pydll = ctypes.PyDLL(_drgn.__file__) _drgn_cdll = ctypes.CDLL(_drgn.__file__) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index ceecb5039..e6f564b0f 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -39,8 +39,7 @@ point_type, ) from tests.dwarf import DW_AT, DW_ATE, DW_FORM, DW_LANG, DW_TAG -from tests.dwarfwriter import compile_dwarf, DwarfDie, DwarfAttrib - +from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf bool_die = DwarfDie( DW_TAG.base_type, diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 3aee580a3..f2527f1ea 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -25,7 +25,7 @@ void_type, ) from tests import coord_type, point_type -from tests.libdrgn import C_TOKEN, drgn_lexer_c, Lexer +from tests.libdrgn import C_TOKEN, Lexer, drgn_lexer_c class TestPrettyPrintTypeName(unittest.TestCase): diff --git a/tests/test_lexer.py b/tests/test_lexer.py index c2ff473b7..db92b4481 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -3,7 +3,7 @@ import unittest -from tests.libdrgn import drgn_test_lexer_func, Lexer +from tests.libdrgn import Lexer, drgn_test_lexer_func class TestLexer(unittest.TestCase): diff --git a/tests/test_python.py b/tests/test_python.py index 44efd6af1..183495e6a 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,9 +1,10 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ +import unittest + import _drgn import drgn -import unittest class TestModule(unittest.TestCase): diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 04ae95023..eb52e05c9 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -5,7 +5,6 @@ from tests.libdrgn import deserialize_bits, serialize_bits - VALUE = 12345678912345678989 diff --git a/tests/test_type.py b/tests/test_type.py index cdd81b666..70b1a4273 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -26,7 +26,6 @@ union_type, void_type, ) - from tests import DEFAULT_LANGUAGE diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 8fd5f986e..09d263370 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -11,12 +11,7 @@ import sys from drgn.helpers import enum_type_to_class -from drgn.helpers.linux import ( - bpf_map_for_each, - bpf_prog_for_each, - hlist_for_each_entry, -) - +from drgn.helpers.linux import bpf_map_for_each, bpf_prog_for_each, hlist_for_each_entry BpfMapType = enum_type_to_class(prog.type("enum bpf_map_type"), "BpfMapType") BpfProgType = enum_type_to_class(prog.type("enum bpf_prog_type"), "BpfProgType") diff --git a/vmtest/manage.py b/vmtest/manage.py index d74cbd6d7..0426279ce 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -1,7 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ -import aiohttp import argparse import asyncio import difflib @@ -30,11 +29,12 @@ Tuple, ) import urllib.parse + +import aiohttp from yarl import URL from util import nproc - logger = logging.getLogger("asyncio") diff --git a/vmtest/resolver.py b/vmtest/resolver.py index 25ea539ec..2bbbaefe0 100644 --- a/vmtest/resolver.py +++ b/vmtest/resolver.py @@ -16,7 +16,6 @@ from util import KernelVersion - # This URL contains a mapping from file names to URLs where those files can be # downloaded. This is needed because the files under a Dropbox shared folder # have randomly-generated links. diff --git a/vmtest/vm.py b/vmtest/vm.py index de1d7e690..6a7109a8b 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -12,7 +12,6 @@ from util import nproc, out_of_date - # Script run as init in the virtual machine. This only depends on busybox. We # don't assume that any regular commands are built in (not even echo or test), # so we always explicitly run busybox. From 656d85f2fee228d56a50f52725135ea851638e4c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 12:21:32 -0700 Subject: [PATCH 15/56] travis: check Python code with black, isort, and mypy Signed-off-by: Omar Sandoval --- .travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 71b06d744..f7510a149 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,12 @@ install: # On systemd >= 238 we can use udevadm trigger -w and remove udevadm settle. - sudo udevadm trigger /dev/kvm - sudo udevadm settle -script: python setup.py test -K + - pip install black isort mypy +script: + - black --check --diff . + - isort --check --diff . + - mypy --strict --no-warn-return-any drgn _drgn.pyi + - python setup.py test -K addons: apt: From 7fb196cfbf845207b526ef7c50c168b63620bd8c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 17:46:26 -0700 Subject: [PATCH 16/56] vmtest: don't use onoatimehack on QEMU 5.1.0 As of QEMU commit a5804fcf7b22 ("9pfs: local: ignore O_NOATIME if we don't have permissions") (in v5.1.0), QEMU handles O_NOATIME sanely, so we don't need the LD_PRELOAD hack. Since we're adding a version check, make the multidevs check based on the version, too. Signed-off-by: Omar Sandoval --- vmtest/onoatimehack.c | 3 +++ vmtest/vm.py | 35 ++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/vmtest/onoatimehack.c b/vmtest/onoatimehack.c index dd5c4fa82..b4390bf08 100644 --- a/vmtest/onoatimehack.c +++ b/vmtest/onoatimehack.c @@ -8,6 +8,9 @@ * Overlayfs uses O_NOATIME, so overlayfs on top of 9pfs doesn't work. We work * around this with this LD_PRELOAD hack to remove O_NOATIME from open() and * fcntl() calls. + * + * As of QEMU 5.1.0, the 9pfs server falls back to removing O_NOATIME, so this + * isn't necessary on newer versions. */ #include diff --git a/vmtest/vm.py b/vmtest/vm.py index 6a7109a8b..6243abc42 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -4,6 +4,7 @@ import errno import os import os.path +import re import shlex import shutil import socket @@ -140,20 +141,23 @@ class LostVMError(Exception): def run_in_vm(command: str, *, vmlinuz: str, build_dir: str) -> int: + match = re.search( + "QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", + subprocess.check_output( + ["qemu-system-x86_64", "-version"], universal_newlines=True + ), + ) + if not match: + raise Exception("could not determine QEMU version") + qemu_version = tuple(int(x) for x in match.group(1).split(".")) + # multidevs was added in QEMU 4.2.0. - if ( - "multidevs" - in subprocess.run( - ["qemu-system-x86_64", "-help"], - stdout=subprocess.PIPE, - universal_newlines=True, - ).stdout - ): - multidevs = ",multidevs=remap" - else: - multidevs = "" - - onoatimehack = _build_onoatimehack(build_dir) + multidevs = ",multidevs=remap" if qemu_version >= (4, 2) else "" + # QEMU's 9pfs O_NOATIME handling was fixed in 5.1.0. + env = os.environ.copy() + if qemu_version < (5, 1): + onoatimehack_so = _build_onoatimehack(build_dir) + env["LD_PRELOAD"] = f"{onoatimehack_so}:{env.get('LD_PRELOAD', '')}" with tempfile.TemporaryDirectory(prefix="drgn-vmtest-") as temp_dir, socket.socket( socket.AF_UNIX @@ -199,10 +203,7 @@ def run_in_vm(command: str, *, vmlinuz: str, build_dir: str) -> int: f"rootfstype=9p rootflags=trans=virtio,cache=loose ro console=0,115200 panic=-1 init={init}", # fmt: on ], - env={ - **os.environ, - "LD_PRELOAD": f"{onoatimehack}:{os.getenv('LD_PRELOAD', '')}", - }, + env=env, ) as qemu: server_sock.settimeout(5) try: From 903a44d0dd18b4227f693ff5453b0b616608d1a3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Aug 2020 17:55:15 -0700 Subject: [PATCH 17/56] travis: upgrade to Ubuntu 20.04 This picks up a newer version of QEMU and lets us use udevadm trigger -w. Let's also explicitly add "os: linux" to silence the config validation. Signed-off-by: Omar Sandoval --- .travis.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index f7510a149..016e2c992 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ -dist: bionic +os: linux +dist: focal language: python python: @@ -21,9 +22,7 @@ install: # the upstream default. - echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /lib/udev/rules.d/99-fix-kvm.rules > /dev/null - sudo udevadm control --reload-rules - # On systemd >= 238 we can use udevadm trigger -w and remove udevadm settle. - - sudo udevadm trigger /dev/kvm - - sudo udevadm settle + - sudo udevadm trigger -w /dev/kvm - pip install black isort mypy script: - black --check --diff . From 6f6c5f272f6489eaea92355dd512b3eff5da7078 Mon Sep 17 00:00:00 2001 From: arsarwade <49537651+arsarwade@users.noreply.github.com> Date: Fri, 21 Aug 2020 10:24:52 -0700 Subject: [PATCH 18/56] libdrgn: export function drgn_object_init() (#70) drgn_object_init() is available in drgh.h file and seems to a required call before calling drgn_program_find_object(). Without this, trying to call drgn_object_init() from an external C application results in undefined reference. Signed-off-by: Aditya Sarwade --- libdrgn/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdrgn/object.c b/libdrgn/object.c index 4f0d57c02..690d981fc 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -15,7 +15,8 @@ #include "type.h" #include "type_index.h" -void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) +LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, + struct drgn_program *prog) { obj->prog = prog; obj->type = drgn_void_type(drgn_program_language(prog)); From d40526d85de2d9990126b0f6b8c52f11dea48c15 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Aug 2020 18:07:31 -0700 Subject: [PATCH 19/56] scripts: add Python include header path to cscope Signed-off-by: Omar Sandoval --- scripts/cscope.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/cscope.sh b/scripts/cscope.sh index 52f0b252b..ac81aff76 100755 --- a/scripts/cscope.sh +++ b/scripts/cscope.sh @@ -1,3 +1,15 @@ -#!/bin/sh +#!/bin/bash -find libdrgn -name '*.[ch]' -o -name '*.[ch].in' | cscope -bq -i- +: ${PYTHON:=python3} +cscope_args=(-bq -i-) + +python_include="$("$PYTHON" -c 'import sysconfig; print(sysconfig.get_path("include"))' 2>/dev/null)" +if [[ -n $python_include ]] ; then + cscope_args+=("-I$python_include") +fi +python_platinclude="$("$PYTHON" -c 'import sysconfig; print(sysconfig.get_path("platinclude"))' 2>/dev/null)" +if [[ -n $python_platinclude && $python_platinclude != $python_include ]] ; then + cscope_args+=("-I$python_platinclude") +fi + +find libdrgn -name '*.[ch]' -o -name '*.[ch].in' | cscope "${cscope_args[@]}" From 93e33513da40b3c84adc5c0df6ca6d2fc0135204 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Aug 2020 16:32:41 -0700 Subject: [PATCH 20/56] drgndoc: bring back :exclude: It's still useful to have an escape hatch for names we don't want documented. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/ext.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index d367bee87..7d68e44cd 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -98,6 +98,9 @@ class DrgnDocDirective(sphinx.util.docutils.SphinxDirective): required_arguments = 1 optional_arguments = 0 + option_spec = { + "exclude": docutils.parsers.rst.directives.unchanged, + } def run(self) -> Any: parts = [] @@ -128,6 +131,10 @@ def _run( resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: + exclude_pattern = self.options.get("exclude") + if exclude_pattern is not None and re.fullmatch(exclude_pattern, attr_name): + return + if isinstance(resolved.node, (Import, ImportFrom)): # Only include imports that are explicitly aliased (i.e., import # ... as ... or from ... import ... as ...). From b0f9403ebf027725af2ee787f70a57d81b429db8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Aug 2020 16:51:12 -0700 Subject: [PATCH 21/56] drgndoc: directly use name passed as argument to drgndoc directive E.g., drgndoc:: foo.bar() should emit py:method:: foo.bar() regardless of a previous py:module directive. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/ext.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 7d68e44cd..b347f4f4f 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -121,13 +121,14 @@ def run(self) -> Any: return [] docnode = docutils.nodes.section() - self._run(name, "", resolved, docnode) + self._run(name, "", self.arguments[0], resolved, docnode) return docnode.children def _run( self, top_name: str, attr_name: str, + name: str, resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: @@ -157,7 +158,7 @@ def _run( lines = self.env.drgndoc_formatter.format( resolved, - (attr_name or top_name).rpartition(".")[2], + name, self.env.ref_context.get("py:module", ""), ".".join(self.env.ref_context.get("py:classes", ())), ) @@ -194,7 +195,11 @@ def _run( for member in resolved.attrs(): if member.name != "__init__": self._run( - top_name, dot_join(attr_name, member.name), member, desc_content + top_name, + dot_join(attr_name, member.name), + member.name, + member, + desc_content, ) py_classes.pop() self.env.ref_context["py:class"] = py_classes[-1] if py_classes else None @@ -235,7 +240,9 @@ def _run_module( have_old_py_module = False self.env.ref_context["py:module"] = dot_join(top_name, attr_name) for attr in resolved.attrs(): - self._run(top_name, dot_join(attr_name, attr.name), attr, section) + self._run( + top_name, dot_join(attr_name, attr.name), attr.name, attr, section + ) if have_old_py_module: self.env.ref_context["py:module"] = old_py_module else: From a8d632b4c15d9b3851ba1e8ba4360dacf2d126d4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 21 Aug 2020 15:49:38 -0700 Subject: [PATCH 22/56] libdrgn/python: use F14 instead of PyDict for Program::objects Program::objects is used to store references to objects that must stay alive while the Program is alive. It is currently a PyDict where the keys are the object addresses as PyLong and the values are the objects themselves. This has two problems: 1. Allocating the key as a full object is obviously wasteful. 2. PyDict doesn't have an API for reserving capacity ahead of time, which we want for an upcoming change. Both of these are easily fixed by using our own hash table. Signed-off-by: Omar Sandoval --- libdrgn/python/drgnpy.h | 9 ++++++- libdrgn/python/program.c | 52 +++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 0dd47174e..53a41d159 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -12,6 +12,7 @@ #include "docstrings.h" #include "../drgn.h" +#include "../hash_table.h" #include "../program.h" /* These were added in Python 3.7. */ @@ -88,11 +89,17 @@ typedef struct { struct drgn_platform *platform; } Platform; +DEFINE_HASH_SET_TYPE(pyobjectp_set, PyObject *) + typedef struct { PyObject_HEAD struct drgn_program prog; - PyObject *objects; PyObject *cache; + /* + * Set of objects that we need to hold a reference to during the + * lifetime of the Program. + */ + struct pyobjectp_set objects; } Program; typedef struct { diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 479654551..ad2886bf2 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -4,18 +4,15 @@ #include "drgnpy.h" #include "../vector.h" +DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, hash_pair_ptr_type, + hash_table_scalar_eq) + static int Program_hold_object(Program *prog, PyObject *obj) { - PyObject *key; - int ret; - - key = PyLong_FromVoidPtr(obj); - if (!key) + if (pyobjectp_set_insert(&prog->objects, &obj, NULL) == -1) return -1; - - ret = PyDict_SetItem(prog->objects, key, obj); - Py_DECREF(key); - return ret; + Py_INCREF(obj); + return 0; } static int Program_hold_type(Program *prog, DrgnType *type) @@ -66,15 +63,13 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, static Program *Program_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"platform", NULL}; - PyObject *platform_obj = NULL, *objects, *cache; - struct drgn_platform *platform; - Program *prog; - + static char *keywords[] = { "platform", NULL }; + PyObject *platform_obj = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:Program", keywords, &platform_obj)) return NULL; + struct drgn_platform *platform; if (!platform_obj || platform_obj == Py_None) { platform = NULL; } else if (PyObject_TypeCheck(platform_obj, &Platform_type)) { @@ -85,22 +80,17 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, return NULL; } - objects = PyDict_New(); - if (!objects) - return NULL; - - cache = PyDict_New(); + PyObject *cache = PyDict_New(); if (!cache) return NULL; - prog = (Program *)Program_type.tp_alloc(&Program_type, 0); + Program *prog = (Program *)Program_type.tp_alloc(&Program_type, 0); if (!prog) { Py_DECREF(cache); - Py_DECREF(objects); return NULL; } - prog->objects = objects; prog->cache = cache; + pyobjectp_set_init(&prog->objects); drgn_program_init(&prog->prog, platform); return prog; } @@ -108,21 +98,33 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, static void Program_dealloc(Program *self) { drgn_program_deinit(&self->prog); - Py_XDECREF(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_DECREF(*it.entry); + pyobjectp_set_deinit(&self->objects); Py_XDECREF(self->cache); Py_TYPE(self)->tp_free((PyObject *)self); } static int Program_traverse(Program *self, visitproc visit, void *arg) { - Py_VISIT(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_VISIT(*it.entry); Py_VISIT(self->cache); return 0; } static int Program_clear(Program *self) { - Py_CLEAR(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_DECREF(*it.entry); + pyobjectp_set_deinit(&self->objects); + pyobjectp_set_init(&self->objects); Py_CLEAR(self->cache); return 0; } From d4e0771f87779016549d60d256439717ea91febd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 24 Aug 2020 18:01:25 -0700 Subject: [PATCH 23/56] libdrgn: return error from drgn_program_{is_little_endian,bswap,is_64_bit}() Most places that call these check has_platform and return an error, and those that don't can live with the extra check. Signed-off-by: Omar Sandoval --- libdrgn/arch_x86_64.c.in | 13 ++++--- libdrgn/object.c | 22 ++++-------- libdrgn/program.c | 73 ++++++++++++++++++++++------------------ libdrgn/program.h | 34 ++++++++++++++----- libdrgn/python/object.c | 22 +++++++++--- 5 files changed, 98 insertions(+), 66 deletions(-) diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in index c6e4399ca..82cb49440 100644 --- a/libdrgn/arch_x86_64.c.in +++ b/libdrgn/arch_x86_64.c.in @@ -172,10 +172,13 @@ prstatus_set_initial_registers_x86_64(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "NT_PRSTATUS is truncated"); } + bool bswap; + struct drgn_error *err = drgn_program_bswap(prog, &bswap); + if (err) + return err; return set_initial_registers_from_struct_x86_64(thread, (char *)prstatus + 112, - size - 112, - drgn_program_bswap(prog)); + size - 112, bswap); } static inline struct drgn_error *read_register(struct drgn_object *reg_obj, @@ -462,9 +465,11 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it, static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000); struct drgn_program *prog = it->prog; struct pgtable_iterator_x86_64 *arch = (void *)it->arch; - struct drgn_error *err; - bool bswap = drgn_program_bswap(prog); int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level; + bool bswap; + struct drgn_error *err = drgn_program_bswap(prog, &bswap); + if (err) + return err; /* Find the lowest level with cached entries. */ for (level = 0; level < levels; level++) { diff --git a/libdrgn/object.c b/libdrgn/object.c index 690d981fc..63c443c56 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -285,12 +285,7 @@ drgn_byte_order_to_little_endian(struct drgn_program *prog, *ret = true; return NULL; case DRGN_PROGRAM_ENDIAN: - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program byte order is not known"); - } - *ret = drgn_program_is_little_endian(prog); - return NULL; + return drgn_program_is_little_endian(prog, ret); default: return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "invalid byte order"); @@ -382,22 +377,17 @@ drgn_object_set_reference_internal(struct drgn_object *res, uint64_t bit_size, uint64_t address, uint64_t bit_offset, bool little_endian) { - struct drgn_error *err; - - if (!res->prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } + bool is_64_bit; + struct drgn_error *err = drgn_program_is_64_bit(res->prog, &is_64_bit); + if (err) + return err; err = sanity_check_object(kind, type->bit_field_size, bit_size); if (err) return err; address += bit_offset / 8; - if (drgn_program_is_64_bit(res->prog)) - address &= UINT64_MAX; - else - address &= UINT32_MAX; + address &= is_64_bit ? UINT64_MAX : UINT32_MAX; bit_offset %= 8; if (bit_size > UINT64_MAX - bit_offset) { return drgn_error_format(DRGN_ERROR_OVERFLOW, diff --git a/libdrgn/program.c b/libdrgn/program.c index bbd45f369..e52b30973 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -738,25 +738,34 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, return err; } -static uint32_t get_prstatus_pid(struct drgn_program *prog, const char *data, - size_t size) +static struct drgn_error *get_prstatus_pid(struct drgn_program *prog, const char *data, + size_t size, uint32_t *ret) { + bool is_64_bit, bswap; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + + size_t offset = is_64_bit ? 32 : 24; uint32_t pr_pid; - memcpy(&pr_pid, data + (drgn_program_is_64_bit(prog) ? 32 : 24), - sizeof(pr_pid)); - if (drgn_program_bswap(prog)) + if (size < offset + sizeof(pr_pid)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "NT_PRSTATUS is truncated"); + } + memcpy(&pr_pid, data + offset, sizeof(pr_pid)); + if (bswap) pr_pid = bswap_32(pr_pid); - return pr_pid; + *ret = pr_pid; + return NULL; } struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, const char *data, size_t size) { - if (size < (drgn_program_is_64_bit(prog) ? 36 : 28)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "NT_PRSTATUS is truncated"); - } if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { struct string *entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector); @@ -766,9 +775,12 @@ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, entry->len = size; } else { struct drgn_prstatus_map_entry entry = { - .key = get_prstatus_pid(prog, data, size), .value = { data, size }, }; + struct drgn_error *err = get_prstatus_pid(prog, data, size, + &entry.key); + if (err) + return err; if (drgn_prstatus_map_insert(&prog->prstatus_map, &entry, NULL) == -1) return &drgn_enomem; @@ -863,21 +875,19 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, struct string *ret, uint32_t *tid_ret) { - struct drgn_error *err; - assert(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL); - err = drgn_program_cache_prstatus(prog); + struct drgn_error *err = drgn_program_cache_prstatus(prog); if (err) return err; if (cpu < prog->prstatus_vector.size) { *ret = prog->prstatus_vector.data[cpu]; - *tid_ret = get_prstatus_pid(prog, ret->str, ret->len); + return get_prstatus_pid(prog, ret->str, ret->len, tid_ret); } else { ret->str = NULL; ret->len = 0; + return NULL; } - return NULL; } struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, @@ -1069,18 +1079,16 @@ LIBDRGN_PUBLIC struct drgn_error * \ drgn_program_read_u##n(struct drgn_program *prog, uint64_t address, \ bool physical, uint##n##_t *ret) \ { \ - struct drgn_error *err; \ + bool bswap; \ + struct drgn_error *err = drgn_program_bswap(prog, &bswap); \ + if (err) \ + return err; \ uint##n##_t tmp; \ - \ - if (!prog->has_platform) { \ - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ - "program byte order is not known"); \ - } \ err = drgn_memory_reader_read(&prog->reader, &tmp, address, \ sizeof(tmp), physical); \ if (err) \ return err; \ - if (drgn_program_bswap(prog)) \ + if (bswap) \ tmp = bswap_##n(tmp); \ *ret = tmp; \ return NULL; \ @@ -1095,19 +1103,20 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_word(struct drgn_program *prog, uint64_t address, bool physical, uint64_t *ret) { - struct drgn_error *err; - - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } - if (drgn_program_is_64_bit(prog)) { + bool is_64_bit, bswap; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + if (is_64_bit) { uint64_t tmp; err = drgn_memory_reader_read(&prog->reader, &tmp, address, sizeof(tmp), physical); if (err) return err; - if (drgn_program_bswap(prog)) + if (bswap) tmp = bswap_64(tmp); *ret = tmp; } else { @@ -1116,7 +1125,7 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, sizeof(tmp), physical); if (err) return err; - if (drgn_program_bswap(prog)) + if (bswap) tmp = bswap_32(tmp); *ret = tmp; } diff --git a/libdrgn/program.h b/libdrgn/program.h index 7efbdcf23..3ffb5bfc4 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -159,26 +159,42 @@ struct drgn_error *drgn_program_init_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid); -static inline bool drgn_program_is_little_endian(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_is_little_endian(struct drgn_program *prog, bool *ret) { - assert(prog->has_platform); - return prog->platform.flags & DRGN_PLATFORM_IS_LITTLE_ENDIAN; + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program byte order is not known"); + } + *ret = prog->platform.flags & DRGN_PLATFORM_IS_LITTLE_ENDIAN; + return NULL; } /** * Return whether a @ref drgn_program has a different endianness than the host * system. */ -static inline bool drgn_program_bswap(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_bswap(struct drgn_program *prog, bool *ret) { - return (drgn_program_is_little_endian(prog) != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)); + bool is_little_endian; + struct drgn_error *err = drgn_program_is_little_endian(prog, + &is_little_endian); + if (err) + return err; + *ret = is_little_endian != (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__); + return NULL; } -static inline bool drgn_program_is_64_bit(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_is_64_bit(struct drgn_program *prog, bool *ret) { - assert(prog->has_platform); - return prog->platform.flags & DRGN_PLATFORM_IS_64_BIT; + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program word size is not known"); + } + *ret = prog->platform.flags & DRGN_PLATFORM_IS_64_BIT; + return NULL; } struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret); diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 117c99c61..e5b8389a3 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -866,16 +866,28 @@ static PyObject *DrgnObject_repr(DrgnObject *self) Py_DECREF(tmp); } - if ((self->obj.is_reference || self->obj.kind == DRGN_OBJECT_BUFFER) && - self->obj.prog->has_platform) { + if (self->obj.is_reference || self->obj.kind == DRGN_OBJECT_BUFFER) { bool little_endian; - if (self->obj.is_reference) little_endian = self->obj.reference.little_endian; else little_endian = self->obj.value.little_endian; - if (little_endian != - drgn_program_is_little_endian(self->obj.prog) && + + bool print_byteorder; + if (self->obj.prog->has_platform) { + bool prog_little_endian; + err = drgn_program_is_little_endian(self->obj.prog, + &prog_little_endian); + if (err) { + set_drgn_error(err); + goto out; + } + print_byteorder = little_endian != prog_little_endian; + } else { + print_byteorder = true; + } + + if (print_byteorder && append_format(parts, ", byteorder='%s'", little_endian ? "little" : "big") == -1) goto out; From 1c8181e22d57274c32844ddb592a7f1076772c8f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 14 Jul 2020 00:17:42 -0700 Subject: [PATCH 24/56] libdrgn: rearrange struct drgn_program members struct drgn_program has a bunch of state scattered around. Group it together more logically, even if it means sacrificing some padding here and there. Signed-off-by: Omar Sandoval --- libdrgn/program.h | 79 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/libdrgn/program.h b/libdrgn/program.h index 3ffb5bfc4..2a06e4f9d 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -62,38 +62,42 @@ struct drgn_dwarf_index; struct drgn_program { /** @privatesection */ - struct drgn_memory_reader reader; - struct drgn_type_index tindex; - struct drgn_object_index oindex; - struct drgn_memory_file_segment *file_segments; - /* Default language of the program. */ - const struct drgn_language *lang; + /* - * Valid iff flags & DRGN_PROGRAM_IS_LINUX_KERNEL. + * Memory/core dump. */ - struct vmcoreinfo vmcoreinfo; - /* Cached PAGE_OFFSET. */ - uint64_t page_offset; - /* Cached vmemmap. */ - uint64_t vmemmap; - /* Cached THREAD_SIZE. */ - uint64_t thread_size; + struct drgn_memory_reader reader; + /* Elf core dump or /proc/pid/mem file segments. */ + struct drgn_memory_file_segment *file_segments; + /* Elf core dump. Not valid for live programs or kdump files. */ + Elf *core; + /* File descriptor for ELF core dump, kdump file, or /proc/pid/mem. */ + int core_fd; + /* PID of live userspace program. */ + pid_t pid; #ifdef WITH_LIBKDUMPFILE kdump_ctx_t *kdump_ctx; #endif + /* - * Valid iff !(flags & DRGN_PROGRAM_IS_LIVE), unless the file - * was a kdump file. + * Debugging information. */ - Elf *core; - int core_fd; - /* - * Valid iff - * (flags & (DRGN_PROGRAM_IS_LINUX_KERNEL | DRGN_PROGRAM_IS_LIVE)) == - * DRGN_PROGRAM_IS_LIVE. - */ - pid_t pid; + struct drgn_type_index tindex; + struct drgn_object_index oindex; struct drgn_dwarf_info_cache *_dicache; + + /* + * Program information. + */ + /* Default language of the program. */ + const struct drgn_language *lang; + struct drgn_platform platform; + bool has_platform; + enum drgn_program_flags flags; + + /* + * Stack traces. + */ union { /* * For the Linux kernel, PRSTATUS notes indexed by CPU. See @ref @@ -109,22 +113,29 @@ struct drgn_program { /* See @ref drgn_object_stack_trace_next_thread(). */ const struct drgn_object *stack_trace_obj; uint32_t stack_trace_tid; - enum drgn_program_flags flags; - struct drgn_platform platform; - bool has_platform; - bool attached_dwfl_state; bool prstatus_cached; + bool attached_dwfl_state; + /* - * Whether @ref drgn_program::pgtable_it is currently being used. Used - * to prevent address translation from recursing. + * Linux kernel-specific. */ - bool pgtable_it_in_use; - - /* Page table iterator for linux_helper_read_vm(). */ - struct pgtable_iterator *pgtable_it; + struct vmcoreinfo vmcoreinfo; + /* Cached PAGE_OFFSET. */ + uint64_t page_offset; + /* Cached vmemmap. */ + uint64_t vmemmap; + /* Cached THREAD_SIZE. */ + uint64_t thread_size; /* Cache for @ref linux_helper_task_state_to_char(). */ char *task_state_chars; uint64_t task_report; + /* Page table iterator for linux_helper_read_vm(). */ + struct pgtable_iterator *pgtable_it; + /* + * Whether @ref drgn_program::pgtable_it is currently being used. Used + * to prevent address translation from recursing. + */ + bool pgtable_it_in_use; }; /** Initialize a @ref drgn_program. */ From c31208f69c1c81bf1bbc530882464c7f6fffcd40 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 22 Apr 2020 16:23:26 -0700 Subject: [PATCH 25/56] libdrgn: fold drgn_type_index into drgn_program This is preparation for associating types with a program. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 - libdrgn/dwarf_info_cache.c | 31 +- libdrgn/dwarf_info_cache.h | 9 +- libdrgn/language.h | 11 +- libdrgn/language_c.c | 179 +++++------ libdrgn/linux_kernel.c | 42 +-- libdrgn/object.c | 19 +- libdrgn/object_index.c | 7 + libdrgn/object_index.h | 5 +- libdrgn/program.c | 42 +-- libdrgn/program.h | 24 +- libdrgn/python/program.c | 4 +- libdrgn/type.c | 585 +++++++++++++++++++++++++++++++++- libdrgn/type.h | 223 +++++++++++-- libdrgn/type_index.c | 622 ------------------------------------- libdrgn/type_index.h | 323 ------------------- tests/test_program.py | 4 +- 17 files changed, 973 insertions(+), 1159 deletions(-) delete mode 100644 libdrgn/type_index.c delete mode 100644 libdrgn/type_index.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 426175991..22a341e1e 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -61,8 +61,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ symbol.h \ type.c \ type.h \ - type_index.c \ - type_index.h \ util.h \ vector.c \ vector.h diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c index 536d99a2e..e9e22cdba 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/dwarf_info_cache.c @@ -12,7 +12,7 @@ #include "hash_table.h" #include "object.h" #include "object_index.h" -#include "type_index.h" +#include "type.h" #include "vector.h" DEFINE_HASH_TABLE_FUNCTIONS(dwarf_type_map, hash_pair_ptr_type, @@ -1025,8 +1025,8 @@ drgn_pointer_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, if (err) return err; - return drgn_type_index_pointer_type(dicache->tindex, referenced_type, - lang, ret); + return drgn_program_pointer_type(dicache->prog, referenced_type, lang, + ret); } struct array_dimension { @@ -1129,19 +1129,18 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, do { dimension = array_dimension_vector_pop(&dimensions); if (dimension->is_complete) { - err = drgn_type_index_array_type(dicache->tindex, - dimension->length, - element_type, lang, - &type); + err = drgn_program_array_type(dicache->prog, + dimension->length, + element_type, lang, + &type); } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_type_index_array_type(dicache->tindex, 0, - element_type, lang, - &type); + err = drgn_program_array_type(dicache->prog, 0, + element_type, lang, + &type); } else { - err = drgn_type_index_incomplete_array_type(dicache->tindex, - element_type, - lang, - &type); + err = drgn_program_incomplete_array_type(dicache->prog, + element_type, + lang, &type); } if (err) goto out; @@ -1666,7 +1665,7 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, } struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_type_index *tindex, +drgn_dwarf_info_cache_create(struct drgn_program *prog, const Dwfl_Callbacks *dwfl_callbacks, struct drgn_dwarf_info_cache **ret) { @@ -1684,7 +1683,7 @@ drgn_dwarf_info_cache_create(struct drgn_type_index *tindex, dwarf_type_map_init(&dicache->map); dwarf_type_map_init(&dicache->cant_be_incomplete_array_map); dicache->depth = 0; - dicache->tindex = tindex; + dicache->prog = prog; *ret = dicache; return NULL; } diff --git a/libdrgn/dwarf_info_cache.h b/libdrgn/dwarf_info_cache.h index 2d8a871c3..123312e69 100644 --- a/libdrgn/dwarf_info_cache.h +++ b/libdrgn/dwarf_info_cache.h @@ -23,8 +23,7 @@ * Caching of DWARF debugging information. * * @ref drgn_dwarf_info_cache bridges the raw DWARF information indexed by @ref - * drgn_dwarf_index to the higher-level @ref drgn_type_index and @ref - * drgn_object_index. + * drgn_dwarf_index to higher-level type and object finders. * * @{ */ @@ -73,13 +72,13 @@ struct drgn_dwarf_info_cache { struct dwarf_type_map cant_be_incomplete_array_map; /** Current parsing recursion depth. */ int depth; - /** Type index. */ - struct drgn_type_index *tindex; + /** Program owning this cache. */ + struct drgn_program *prog; }; /** Create a @ref drgn_dwarf_info_cache. */ struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_type_index *tindex, +drgn_dwarf_info_cache_create(struct drgn_program *prog, const Dwfl_Callbacks *dwfl_callbacks, struct drgn_dwarf_info_cache **ret); diff --git a/libdrgn/language.h b/libdrgn/language.h index 9d129dcc1..89d54f131 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -12,9 +12,10 @@ #ifndef DRGN_LANGUAGE_H #define DRGN_LANGUAGE_H -#include "drgn.h" #include +#include "drgn.h" + /** * @ingroup Internals * @@ -28,15 +29,13 @@ * @{ */ -struct drgn_type_index; - typedef struct drgn_error *drgn_format_type_fn(struct drgn_qualified_type, char **); typedef struct drgn_error *drgn_format_object_fn(const struct drgn_object *, size_t, enum drgn_format_object_flags, char **); -typedef struct drgn_error *drgn_find_type_fn(struct drgn_type_index *tindex, +typedef struct drgn_error *drgn_find_type_fn(struct drgn_program *prog, const char *name, const char *filename, struct drgn_qualified_type *ret); @@ -78,10 +77,10 @@ struct drgn_language { /** Implement @ref drgn_format_object(). */ drgn_format_object_fn *format_object; /** - * Implement @ref drgn_type_index_find(). + * Implement @ref drgn_program_find_type(). * * This should parse @p name and call @ref - * drgn_type_index_find_parsed(). + * drgn_program_find_type_impl(). */ drgn_find_type_fn *find_type; /** diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index df4f6800a..83b0a392f 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -18,7 +18,6 @@ #include "string_builder.h" #include "symbol.h" #include "type.h" -#include "type_index.h" static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, @@ -2111,7 +2110,7 @@ enum drgn_primitive_type c_parse_specifier_list(const char *s) } static struct drgn_error * -c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, +c_parse_specifier_qualifier_list(struct drgn_program *prog, struct drgn_lexer *lexer, const char *filename, struct drgn_qualified_type *ret) { @@ -2206,17 +2205,17 @@ c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, kind = DRGN_TYPE_ENUM; } else if (identifier) { if (strstartswith(identifier, "size_t")) { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_SIZE_T, - &ret->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_SIZE_T, + &ret->type); if (err) return err; ret->qualifiers = 0; goto out; } else if (strstartswith(identifier, "ptrdiff_t")) { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_PTRDIFF_T, - &ret->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_PTRDIFF_T, + &ret->type); if (err) return err; ret->qualifiers = 0; @@ -2229,15 +2228,15 @@ c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, "expected type specifier"); } - err = drgn_type_index_find_parsed(tindex, kind, identifier, + err = drgn_program_find_type_impl(prog, kind, identifier, identifier_len, filename, ret); if (err) return err; } else { - err = drgn_type_index_find_primitive(tindex, - specifier_kind[specifier], - &ret->type); + err = drgn_program_find_primitive_type(prog, + specifier_kind[specifier], + &ret->type); if (err) return err; ret->qualifiers = 0; @@ -2259,7 +2258,7 @@ struct c_declarator { /* These functions don't free the declarator list on error. */ static struct drgn_error * -c_parse_abstract_declarator(struct drgn_type_index *tindex, +c_parse_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner); @@ -2289,7 +2288,7 @@ c_parse_optional_type_qualifier_list(struct drgn_lexer *lexer, } static struct drgn_error * -c_parse_pointer(struct drgn_type_index *tindex, struct drgn_lexer *lexer, +c_parse_pointer(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) { struct drgn_error *err; @@ -2329,7 +2328,7 @@ c_parse_pointer(struct drgn_type_index *tindex, struct drgn_lexer *lexer, } static struct drgn_error * -c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, +c_parse_direct_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) @@ -2351,7 +2350,7 @@ c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, if (token2.kind == C_TOKEN_ASTERISK || token2.kind == C_TOKEN_LPAREN || token2.kind == C_TOKEN_LBRACKET) { - err = c_parse_abstract_declarator(tindex, lexer, outer, + err = c_parse_abstract_declarator(prog, lexer, outer, inner); if (err) return err; @@ -2431,7 +2430,7 @@ c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, } static struct drgn_error * -c_parse_abstract_declarator(struct drgn_type_index *tindex, +c_parse_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) @@ -2443,7 +2442,7 @@ c_parse_abstract_declarator(struct drgn_type_index *tindex, if (err) return err; if (token.kind == C_TOKEN_ASTERISK) { - err = c_parse_pointer(tindex, lexer, outer, inner); + err = c_parse_pointer(prog, lexer, outer, inner); if (err) return err; @@ -2452,21 +2451,21 @@ c_parse_abstract_declarator(struct drgn_type_index *tindex, token.kind == C_TOKEN_LBRACKET) { struct c_declarator *tmp; - err = c_parse_direct_abstract_declarator(tindex, lexer, + err = c_parse_direct_abstract_declarator(prog, lexer, outer, &tmp); if (err) return err; } return NULL; } else { - return c_parse_direct_abstract_declarator(tindex, lexer, outer, + return c_parse_direct_abstract_declarator(prog, lexer, outer, inner); } } /* This always frees the declarator list regardless of success or failure. */ static struct drgn_error * -c_type_from_declarator(struct drgn_type_index *tindex, +c_type_from_declarator(struct drgn_program *prog, struct c_declarator *declarator, struct drgn_qualified_type *ret) { @@ -2475,21 +2474,20 @@ c_type_from_declarator(struct drgn_type_index *tindex, if (!declarator) return NULL; - err = c_type_from_declarator(tindex, declarator->next, ret); + err = c_type_from_declarator(prog, declarator->next, ret); if (err) { free(declarator); return err; } if (declarator->kind == C_TOKEN_ASTERISK) { - err = drgn_type_index_pointer_type(tindex, *ret, NULL, - &ret->type); + err = drgn_program_pointer_type(prog, *ret, NULL, &ret->type); } else if (declarator->is_complete) { - err = drgn_type_index_array_type(tindex, declarator->length, - *ret, NULL, &ret->type); + err = drgn_program_array_type(prog, declarator->length, *ret, + NULL, &ret->type); } else { - err = drgn_type_index_incomplete_array_type(tindex, *ret, NULL, - &ret->type); + err = drgn_program_incomplete_array_type(prog, *ret, NULL, + &ret->type); } if (!err) @@ -2498,7 +2496,7 @@ c_type_from_declarator(struct drgn_type_index *tindex, return err; } -struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, +struct drgn_error *c_find_type(struct drgn_program *prog, const char *name, const char *filename, struct drgn_qualified_type *ret) { @@ -2508,7 +2506,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, drgn_lexer_init(&lexer, drgn_lexer_c, name); - err = c_parse_specifier_qualifier_list(tindex, &lexer, filename, ret); + err = c_parse_specifier_qualifier_list(prog, &lexer, filename, ret); if (err) goto out; @@ -2522,8 +2520,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, if (err) return err; - err = c_parse_abstract_declarator(tindex, &lexer, &outer, - &inner); + err = c_parse_abstract_declarator(prog, &lexer, &outer, &inner); if (err) { while (outer) { struct c_declarator *next; @@ -2535,7 +2532,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, goto out; } - err = c_type_from_declarator(tindex, outer, ret); + err = c_type_from_declarator(prog, outer, ret); if (err) goto out; @@ -2580,11 +2577,10 @@ struct drgn_error *c_bit_offset(struct drgn_program *prog, struct drgn_member_value *member; struct drgn_qualified_type member_type; - err = drgn_type_index_find_member(&prog->tindex, - type, - token.value, - token.len, - &member); + err = drgn_program_find_member(prog, type, + token.value, + token.len, + &member); if (err) goto out; if (__builtin_add_overflow(bit_offset, @@ -2703,9 +2699,8 @@ struct drgn_error *c_integer_literal(struct drgn_object *res, uint64_t uvalue) bits = fls(uvalue); qualified_type.qualifiers = 0; for (i = 0; i < ARRAY_SIZE(types); i++) { - err = drgn_type_index_find_primitive(&res->prog->tindex, - types[i], - &qualified_type.type); + err = drgn_program_find_primitive_type(res->prog, types[i], + &qualified_type.type); if (err) return err; @@ -2729,9 +2724,8 @@ struct drgn_error *c_bool_literal(struct drgn_object *res, bool bvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_type_index_find_primitive(&res->prog->tindex, - DRGN_C_TYPE_INT, - &qualified_type.type); + err = drgn_program_find_primitive_type(res->prog, DRGN_C_TYPE_INT, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -2743,9 +2737,8 @@ struct drgn_error *c_float_literal(struct drgn_object *res, double fvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_type_index_find_primitive(&res->prog->tindex, - DRGN_C_TYPE_DOUBLE, - &qualified_type.type); + err = drgn_program_find_primitive_type(res->prog, DRGN_C_TYPE_DOUBLE, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -2800,7 +2793,7 @@ static bool c_can_represent_all_values(struct drgn_type *type1, return false; } -static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, +static struct drgn_error *c_integer_promotions(struct drgn_program *prog, struct drgn_object_type *type) { struct drgn_error *err; @@ -2850,8 +2843,8 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, */ if (primitive >= ARRAY_SIZE(c_integer_conversion_rank) || type->bit_field_size) { - err = drgn_type_index_find_primitive(tindex, DRGN_C_TYPE_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, @@ -2862,9 +2855,9 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, return NULL; } - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, @@ -2886,16 +2879,16 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, * If int can represent all values of the original type, then the result * is int. Otherwise, the result is unsigned int. */ - err = drgn_type_index_find_primitive(tindex, DRGN_C_TYPE_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, type->underlying_type, 0)) { type->type = int_type; } else { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - &type->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + &type->type); if (err) return err; } @@ -2904,7 +2897,7 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, } static struct drgn_error * -c_corresponding_unsigned_type(struct drgn_type_index *tindex, +c_corresponding_unsigned_type(struct drgn_program *prog, enum drgn_primitive_type type, struct drgn_type **ret) { @@ -2914,23 +2907,23 @@ c_corresponding_unsigned_type(struct drgn_type_index *tindex, * handle them here. */ case DRGN_C_TYPE_INT: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + ret); case DRGN_C_TYPE_LONG: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + ret); case DRGN_C_TYPE_LONG_LONG: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_LONG_LONG, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + ret); default: UNREACHABLE(); } } -static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, +static struct drgn_error *c_common_real_type(struct drgn_program *prog, struct drgn_object_type *type1, struct drgn_object_type *type2, struct drgn_object_type *ret) @@ -2980,10 +2973,10 @@ static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, * Otherwise, the integer promotions are performed before applying the * following rules. */ - err = c_integer_promotions(tindex, type1); + err = c_integer_promotions(prog, type1); if (err) return err; - err = c_integer_promotions(tindex, type2); + err = c_integer_promotions(prog, type2); if (err) return err; @@ -3103,7 +3096,7 @@ static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, * rank, then it must have greater size and thus be able to represent * all values of the unsigned integer type. */ - err = c_corresponding_unsigned_type(tindex, + err = c_corresponding_unsigned_type(prog, is_signed1 ? primitive1 : primitive2, &ret->type); if (err) @@ -3130,10 +3123,10 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, *type_ret = drgn_object_type(obj); switch (drgn_type_kind(type_ret->underlying_type)) { case DRGN_TYPE_ARRAY: - err = drgn_type_index_pointer_type(&obj->prog->tindex, - drgn_type_type(type_ret->underlying_type), - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + err = drgn_program_pointer_type(obj->prog, + drgn_type_type(type_ret->underlying_type), + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; @@ -3144,10 +3137,9 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, .qualifiers = type_ret->qualifiers, }; - err = drgn_type_index_pointer_type(&obj->prog->tindex, - function_type, - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + err = drgn_program_pointer_type(obj->prog, function_type, + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; @@ -3257,8 +3249,8 @@ struct drgn_error *c_op_cmp(const struct drgn_object *lhs, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, - &rhs_type, &type); + err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, + &type); if (err) return err; @@ -3299,8 +3291,8 @@ struct drgn_error *c_op_add(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, - &rhs_type, &type); + err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, + &type); if (err) return err; @@ -3330,9 +3322,9 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (lhs_pointer && rhs_pointer) { struct drgn_object_type type = {}; - err = drgn_type_index_find_primitive(&lhs->prog->tindex, - DRGN_C_TYPE_PTRDIFF_T, - &type.type); + err = drgn_program_find_primitive_type(lhs->prog, + DRGN_C_TYPE_PTRDIFF_T, + &type.type); if (err) return err; type.underlying_type = drgn_underlying_type(type.type); @@ -3351,8 +3343,8 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, - &rhs_type, &type); + err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, + &type); if (err) return err; @@ -3382,8 +3374,7 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary "#op, &lhs_type, \ &rhs_type); \ \ - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, &rhs_type, \ - &type); \ + err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, &type); \ if (err) \ return err; \ \ @@ -3416,10 +3407,10 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary " #op, &lhs_type, \ &rhs_type); \ \ - err = c_integer_promotions(&lhs->prog->tindex, &lhs_type); \ + err = c_integer_promotions(lhs->prog, &lhs_type); \ if (err) \ return err; \ - err = c_integer_promotions(&lhs->prog->tindex, &rhs_type); \ + err = c_integer_promotions(lhs->prog, &rhs_type); \ if (err) \ return err; \ \ @@ -3442,7 +3433,7 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ if (!drgn_type_is_##check(type.underlying_type)) \ return drgn_error_unary_op("unary " #op, &type); \ \ - err = c_integer_promotions(&obj->prog->tindex, &type); \ + err = c_integer_promotions(obj->prog, &type); \ if (err) \ return err; \ \ diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 964f541a3..94efffe24 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -286,18 +286,18 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, } } - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, prog->page_offset, 0); } else if (name_len == strlen("PAGE_SHIFT") && memcmp(name, "PAGE_SHIFT", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_INT, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_INT, + &qualified_type.type); if (err) return err; return drgn_object_set_signed(ret, qualified_type, @@ -305,9 +305,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, 0); } else if (name_len == strlen("PAGE_SIZE") && memcmp(name, "PAGE_SIZE", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -315,9 +315,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, 0); } else if (name_len == strlen("PAGE_MASK") && memcmp(name, "PAGE_MASK", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -330,9 +330,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, err = linux_kernel_get_thread_size(prog, &thread_size); if (err) return err; - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -341,16 +341,16 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, memcmp(name, "UTS_RELEASE", name_len) == 0) { size_t len; - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_CHAR, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_CHAR, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = DRGN_QUALIFIER_CONST; len = strlen(prog->vmcoreinfo.osrelease); - err = drgn_type_index_array_type(&prog->tindex, len + 1, - qualified_type, NULL, - &qualified_type.type); + err = drgn_program_array_type(prog, len + 1, + qualified_type, NULL, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; diff --git a/libdrgn/object.c b/libdrgn/object.c index 63c443c56..8ca5ed697 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -13,7 +13,6 @@ #include "program.h" #include "serialize.h" #include "type.h" -#include "type_index.h" LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) @@ -1291,9 +1290,9 @@ drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) "cannot take address of bit field"); } - err = drgn_type_index_pointer_type(&obj->prog->tindex, - drgn_object_qualified_type(obj), - NULL, &qualified_type.type); + err = drgn_program_pointer_type(obj->prog, + drgn_object_qualified_type(obj), NULL, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -1368,10 +1367,10 @@ struct drgn_error *drgn_object_member_dereference(struct drgn_object *res, obj->type); } - err = drgn_type_index_find_member(&obj->prog->tindex, - drgn_type_type(underlying_type).type, - member_name, strlen(member_name), - &member); + err = drgn_program_find_member(obj->prog, + drgn_type_type(underlying_type).type, + member_name, strlen(member_name), + &member); if (err) return err; @@ -1418,8 +1417,8 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, if (err) return err; - err = drgn_type_index_pointer_type(&obj->prog->tindex, qualified_type, - NULL, &result_type.type); + err = drgn_program_pointer_type(obj->prog, qualified_type, NULL, + &result_type.type); if (err) return err; result_type.qualifiers = 0; diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c index 68f069b0c..b2fc6296a 100644 --- a/libdrgn/object_index.c +++ b/libdrgn/object_index.c @@ -41,6 +41,13 @@ drgn_object_index_add_finder(struct drgn_object_index *oindex, return NULL; } +void drgn_object_index_remove_finder(struct drgn_object_index *oindex) +{ + struct drgn_object_finder *finder = oindex->finders->next; + free(oindex->finders); + oindex->finders = finder; +} + struct drgn_error *drgn_object_index_find(struct drgn_object_index *oindex, const char *name, const char *filename, diff --git a/libdrgn/object_index.h b/libdrgn/object_index.h index fcd384795..022fbc222 100644 --- a/libdrgn/object_index.h +++ b/libdrgn/object_index.h @@ -41,7 +41,7 @@ struct drgn_object_finder { * Object index. * * A object index is used to find objects (variables, constants, and functions) - * by name. The types are found using callbacks which are registered with @ref + * by name. The objects are found using callbacks which are registered with @ref * drgn_object_index_add_finder(). @ref drgn_object_index_find() searches for an * object. */ @@ -61,6 +61,9 @@ struct drgn_error * drgn_object_index_add_finder(struct drgn_object_index *oindex, drgn_object_find_fn fn, void *arg); +/** Remove the most recently added object finding callback. */ +void drgn_object_index_remove_finder(struct drgn_object_index *oindex); + /** * Find an object in a @ref drgn_object_index. * diff --git a/libdrgn/program.c b/libdrgn/program.c index e52b30973..0f0d724ed 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -25,7 +25,6 @@ #include "read.h" #include "string_builder.h" #include "symbol.h" -#include "type_index.h" #include "vector.h" DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) @@ -63,8 +62,6 @@ void drgn_program_set_platform(struct drgn_program *prog, if (!prog->has_platform) { prog->platform = *platform; prog->has_platform = true; - prog->tindex.word_size = - platform->flags & DRGN_PLATFORM_IS_64_BIT ? 8 : 4; } } @@ -73,7 +70,7 @@ void drgn_program_init(struct drgn_program *prog, { memset(prog, 0, sizeof(*prog)); drgn_memory_reader_init(&prog->reader); - drgn_type_index_init(&prog->tindex); + drgn_program_init_types(prog); drgn_object_index_init(&prog->oindex); prog->core_fd = -1; if (platform) @@ -92,7 +89,7 @@ void drgn_program_deinit(struct drgn_program *prog) free(prog->pgtable_it); drgn_object_index_deinit(&prog->oindex); - drgn_type_index_deinit(&prog->tindex); + drgn_program_deinit_types(prog); drgn_memory_reader_deinit(&prog->reader); free(prog->file_segments); @@ -139,13 +136,6 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, read_fn, arg, physical); } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, - void *arg) -{ - return drgn_type_index_add_finder(&prog->tindex, fn, arg); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_add_object_finder(struct drgn_program *prog, drgn_object_find_fn fn, void *arg) @@ -552,21 +542,21 @@ static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, else dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - err = drgn_dwarf_info_cache_create(&prog->tindex, - dwfl_callbacks, &dicache); + err = drgn_dwarf_info_cache_create(prog, dwfl_callbacks, + &dicache); if (err) return err; - err = drgn_program_add_type_finder(prog, drgn_dwarf_type_find, - dicache); + err = drgn_program_add_object_finder(prog, + drgn_dwarf_object_find, + dicache); if (err) { drgn_dwarf_info_cache_destroy(dicache); return err; } - err = drgn_program_add_object_finder(prog, - drgn_dwarf_object_find, - dicache); + err = drgn_program_add_type_finder(prog, drgn_dwarf_type_find, + dicache); if (err) { - drgn_type_index_remove_finder(&prog->tindex); + drgn_object_index_remove_finder(&prog->oindex); drgn_dwarf_info_cache_destroy(dicache); return err; } @@ -1132,14 +1122,6 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, return NULL; } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_find_type(struct drgn_program *prog, const char *name, - const char *filename, struct drgn_qualified_type *ret) -{ - return drgn_type_index_find(&prog->tindex, name, filename, - drgn_program_language(prog), ret); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_find_object(struct drgn_program *prog, const char *name, const char *filename, @@ -1297,8 +1279,8 @@ drgn_program_member_info(struct drgn_program *prog, struct drgn_type *type, struct drgn_error *err; struct drgn_member_value *member; - err = drgn_type_index_find_member(&prog->tindex, type, member_name, - strlen(member_name), &member); + err = drgn_program_find_member(prog, type, member_name, + strlen(member_name), &member); if (err) return err; diff --git a/libdrgn/program.h b/libdrgn/program.h index 2a06e4f9d..386a90316 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -21,7 +21,7 @@ #include "memory_reader.h" #include "object_index.h" #include "platform.h" -#include "type_index.h" +#include "type.h" #include "vector.h" /** @@ -79,10 +79,30 @@ struct drgn_program { kdump_ctx_t *kdump_ctx; #endif + /* + * Types. + */ + /** Callbacks for finding types. */ + struct drgn_type_finder *type_finders; + /** Cache of primitive types. */ + struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; + struct drgn_type default_size_t; + struct drgn_type default_ptrdiff_t; + /** Cache of created pointer types. */ + struct drgn_pointer_type_table pointer_types; + /** Cache of created array types. */ + struct drgn_array_type_table array_types; + /** Cache for @ref drgn_program_find_member(). */ + struct drgn_member_map members; + /** + * Set of types which have been already cached in @ref + * drgn_program::members. + */ + struct drgn_type_set members_cached; + /* * Debugging information. */ - struct drgn_type_index tindex; struct drgn_object_index oindex; struct drgn_dwarf_info_cache *_dicache; diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index ad2886bf2..9601f71c6 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -620,8 +620,8 @@ static PyObject *Program_pointer_type(Program *self, PyObject *args, &referenced_type) == -1) return NULL; - err = drgn_type_index_pointer_type(&self->prog.tindex, referenced_type, - language, &qualified_type.type); + err = drgn_program_pointer_type(&self->prog, referenced_type, language, + &qualified_type.type); if (err) return set_drgn_error(err); qualified_type.qualifiers = qualifiers; diff --git a/libdrgn/type.c b/libdrgn/type.c index b9e537efd..2809ab10c 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -6,8 +6,8 @@ #include "internal.h" #include "hash_table.h" #include "language.h" +#include "program.h" #include "type.h" -#include "type_index.h" const char * const drgn_type_kind_spelling[] = { [DRGN_TYPE_VOID] = "void", @@ -25,7 +25,17 @@ const char * const drgn_type_kind_spelling[] = { [DRGN_TYPE_FUNCTION] = "function", }; -const char * const * const +/** + * Names of primitive types. + * + * In some languages, like C, the same primitive type can be spelled in multiple + * ways. For example, "int" can also be spelled "signed int" or "int signed". + * + * This maps each @ref drgn_primitive_type to a ``NULL``-terminated array of the + * different ways to spell that type. The spelling at index zero is the + * preferred spelling. + */ +static const char * const * const drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM] = { [DRGN_C_TYPE_VOID] = (const char * []){ "void", NULL, }, [DRGN_C_TYPE_CHAR] = (const char * []){ "char", NULL, }, @@ -93,7 +103,11 @@ drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM] = { [DRGN_C_TYPE_PTRDIFF_T] = (const char * []){ "ptrdiff_t", NULL, }, }; -const enum drgn_type_kind +/** + * Mapping from a @ref drgn_type_primitive to the corresponding @ref + * drgn_type_kind. + */ +static const enum drgn_type_kind drgn_primitive_type_kind[DRGN_PRIMITIVE_TYPE_NUM + 1] = { [DRGN_C_TYPE_CHAR] = DRGN_TYPE_INT, [DRGN_C_TYPE_SIGNED_CHAR] = DRGN_TYPE_INT, @@ -133,6 +147,75 @@ drgn_primitive_type_is_signed(enum drgn_primitive_type primitive) } } +/* These functions compare the underlying type by reference, not by value. */ + +static struct hash_pair +drgn_pointer_type_key_hash(const struct drgn_pointer_type_key *key) +{ + size_t hash; + + hash = hash_combine((uintptr_t)key->type, key->qualifiers); + hash = hash_combine(hash, (uintptr_t)key->lang); + return hash_pair_from_avalanching_hash(hash); +} + +static bool drgn_pointer_type_key_eq(const struct drgn_pointer_type_key *a, + const struct drgn_pointer_type_key *b) +{ + return (a->type == b->type && a->qualifiers == b->qualifiers && + a->lang == b->lang); +} + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_pointer_type_table, drgn_pointer_type_key_hash, + drgn_pointer_type_key_eq) + +static struct hash_pair +drgn_array_type_key_hash(const struct drgn_array_type_key *key) +{ + size_t hash; + + hash = hash_combine((uintptr_t)key->type, key->qualifiers); + hash = hash_combine(hash, key->is_complete); + hash = hash_combine(hash, key->length); + hash = hash_combine(hash, (uintptr_t)key->lang); + return hash_pair_from_avalanching_hash(hash); +} + +static bool drgn_array_type_key_eq(const struct drgn_array_type_key *a, + const struct drgn_array_type_key *b) +{ + return (a->type == b->type && a->qualifiers == b->qualifiers && + a->is_complete == b->is_complete && a->length == b->length && + a->lang == b->lang); +} + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_array_type_table, drgn_array_type_key_hash, + drgn_array_type_key_eq) + +static struct hash_pair drgn_member_hash_pair(const struct drgn_member_key *key) +{ + size_t hash; + if (key->name) + hash = cityhash_size_t(key->name, key->name_len); + else + hash = 0; + hash = hash_combine((uintptr_t)key->type, hash); + return hash_pair_from_avalanching_hash(hash); +} + +static bool drgn_member_eq(const struct drgn_member_key *a, + const struct drgn_member_key *b) +{ + return (a->type == b->type && a->name_len == b->name_len && + (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); +} + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_hash_pair, + drgn_member_eq) + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, hash_pair_ptr_type, + hash_table_scalar_eq) + void drgn_type_thunk_free(struct drgn_type_thunk *thunk) { thunk->free_fn(thunk); @@ -948,3 +1031,499 @@ struct drgn_error *drgn_error_member_not_found(struct drgn_type *type, free(name); return err; } + +void drgn_program_init_types(struct drgn_program *prog) +{ + drgn_pointer_type_table_init(&prog->pointer_types); + drgn_array_type_table_init(&prog->array_types); + drgn_member_map_init(&prog->members); + drgn_type_set_init(&prog->members_cached); +} + +static void free_pointer_types(struct drgn_program *prog) +{ + struct drgn_pointer_type_table_iterator it; + + for (it = drgn_pointer_type_table_first(&prog->pointer_types); + it.entry; it = drgn_pointer_type_table_next(it)) + free(*it.entry); + drgn_pointer_type_table_deinit(&prog->pointer_types); +} + +static void free_array_types(struct drgn_program *prog) +{ + struct drgn_array_type_table_iterator it; + for (it = drgn_array_type_table_first(&prog->array_types); it.entry; + it = drgn_array_type_table_next(it)) + free(*it.entry); + drgn_array_type_table_deinit(&prog->array_types); +} + +void drgn_program_deinit_types(struct drgn_program *prog) +{ + drgn_member_map_deinit(&prog->members); + drgn_type_set_deinit(&prog->members_cached); + free_array_types(prog); + free_pointer_types(prog); + + struct drgn_type_finder *finder = prog->type_finders; + while (finder) { + struct drgn_type_finder *next = finder->next; + free(finder); + finder = next; + } +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, + void *arg) +{ + struct drgn_type_finder *finder = malloc(sizeof(*finder)); + if (!finder) + return &drgn_enomem; + finder->fn = fn; + finder->arg = arg; + finder->next = prog->type_finders; + prog->type_finders = finder; + return NULL; +} + +struct drgn_error * +drgn_program_find_type_impl(struct drgn_program *prog, + enum drgn_type_kind kind, const char *name, + size_t name_len, const char *filename, + struct drgn_qualified_type *ret) +{ + struct drgn_type_finder *finder = prog->type_finders; + while (finder) { + struct drgn_error *err = + finder->fn(kind, name, name_len, filename, finder->arg, + ret); + if (!err) { + if (drgn_type_kind(ret->type) != kind) { + return drgn_error_create(DRGN_ERROR_TYPE, + "type find callback returned wrong kind of type"); + } + return NULL; + } + if (err != &drgn_not_found) + return err; + finder = finder->next; + } + return &drgn_not_found; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_find_type(struct drgn_program *prog, const char *name, + const char *filename, struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + err = drgn_program_language(prog)->find_type(prog, name, filename, ret); + if (err != &drgn_not_found) + return err; + + if (filename) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s' in '%s'", name, + filename); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s'", name); + } +} + +/* Default long and unsigned long are 64 bits. */ +static struct drgn_type default_primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; +/* 32-bit versions of long and unsigned long. */ +static struct drgn_type default_long_32bit; +static struct drgn_type default_unsigned_long_32bit; + +__attribute__((constructor(200))) +static void default_primitive_types_init(void) +{ + size_t i; + + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_CHAR], + drgn_primitive_type_spellings[DRGN_C_TYPE_CHAR][0], + 1, true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SIGNED_CHAR], + drgn_primitive_type_spellings[DRGN_C_TYPE_SIGNED_CHAR][0], + 1, true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_CHAR], + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_CHAR][0], + 1, false, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SHORT], + drgn_primitive_type_spellings[DRGN_C_TYPE_SHORT][0], + 2, true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_SHORT], + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_SHORT][0], + 2, false, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_INT], + drgn_primitive_type_spellings[DRGN_C_TYPE_INT][0], 4, + true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_INT], + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_INT][0], + 4, false, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG], + drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], + 8, true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG], + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], + 8, false, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_LONG], + drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_LONG][0], + 8, true, &drgn_language_c); + drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG_LONG], + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG_LONG][0], + 8, false, &drgn_language_c); + drgn_bool_type_init(&default_primitive_types[DRGN_C_TYPE_BOOL], + drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0], + 1, &drgn_language_c); + drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_FLOAT], + drgn_primitive_type_spellings[DRGN_C_TYPE_FLOAT][0], + 4, &drgn_language_c); + drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_DOUBLE], + drgn_primitive_type_spellings[DRGN_C_TYPE_DOUBLE][0], + 8, &drgn_language_c); + drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_DOUBLE], + drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_DOUBLE][0], + 16, &drgn_language_c); + for (i = 0; i < ARRAY_SIZE(default_primitive_types); i++) { + if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID || + i == DRGN_C_TYPE_SIZE_T || i == DRGN_C_TYPE_PTRDIFF_T) + continue; + assert(drgn_type_primitive(&default_primitive_types[i]) == i); + } + + drgn_int_type_init(&default_long_32bit, + drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], + 4, true, &drgn_language_c); + assert(drgn_type_primitive(&default_long_32bit) == + DRGN_C_TYPE_LONG); + + drgn_int_type_init(&default_unsigned_long_32bit, + drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], + 4, false, &drgn_language_c); + assert(drgn_type_primitive(&default_unsigned_long_32bit) == + DRGN_C_TYPE_UNSIGNED_LONG); +} + +struct drgn_error * +drgn_program_find_primitive_type(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret) +{ + struct drgn_error *err; + struct drgn_qualified_type qualified_type; + enum drgn_type_kind kind; + const char * const *spellings; + uint8_t word_size; + size_t i; + + if (prog->primitive_types[type]) { + *ret = prog->primitive_types[type]; + return NULL; + } + + kind = drgn_primitive_type_kind[type]; + if (kind == DRGN_TYPE_VOID) { + *ret = drgn_void_type(&drgn_language_c); + goto out; + } + + spellings = drgn_primitive_type_spellings[type]; + for (i = 0; spellings[i]; i++) { + err = drgn_program_find_type_impl(prog, kind, spellings[i], + strlen(spellings[i]), NULL, + &qualified_type); + if (!err && drgn_type_primitive(qualified_type.type) == type) { + *ret = qualified_type.type; + goto out; + } else if (err && err != &drgn_not_found) { + return err; + } + } + + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program word size is not known"); + } + word_size = drgn_program_is_64_bit(prog) ? 8 : 4; + + /* long and unsigned long default to the word size. */ + if (type == DRGN_C_TYPE_LONG || type == DRGN_C_TYPE_UNSIGNED_LONG) { + if (word_size == 4) { + *ret = (type == DRGN_C_TYPE_LONG ? + &default_long_32bit : + &default_unsigned_long_32bit); + goto out; + } + } + /* + * size_t and ptrdiff_t default to typedefs of whatever integer type + * matches the word size. + */ + if (type == DRGN_C_TYPE_SIZE_T || type == DRGN_C_TYPE_PTRDIFF_T) { + static enum drgn_primitive_type integer_types[2][3] = { + { + DRGN_C_TYPE_UNSIGNED_LONG, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + DRGN_C_TYPE_UNSIGNED_INT, + }, + { + DRGN_C_TYPE_LONG, + DRGN_C_TYPE_LONG_LONG, + DRGN_C_TYPE_INT, + }, + }; + + for (i = 0; i < 3; i++) { + enum drgn_primitive_type integer_type; + + integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + err = drgn_program_find_primitive_type(prog, + integer_type, + &qualified_type.type); + if (err) + return err; + if (drgn_type_size(qualified_type.type) == word_size) { + qualified_type.qualifiers = 0; + *ret = (type == DRGN_C_TYPE_SIZE_T ? + &prog->default_size_t : + &prog->default_ptrdiff_t); + drgn_typedef_type_init(*ret, spellings[0], + qualified_type, &drgn_language_c); + goto out; + } + } + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "no suitable integer type for %s", + spellings[0]); + } + + *ret = &default_primitive_types[type]; +out: + prog->primitive_types[type] = *ret; + return NULL; +} + +struct drgn_error * +drgn_program_pointer_type(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + const struct drgn_pointer_type_key key = { + .type = referenced_type.type, + .qualifiers = referenced_type.qualifiers, + .lang = lang ? lang : drgn_type_language(referenced_type.type), + }; + struct drgn_pointer_type_table_iterator it; + struct drgn_type *type; + struct hash_pair hp; + + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program word size is not known"); + } + + hp = drgn_pointer_type_table_hash(&key); + it = drgn_pointer_type_table_search_hashed(&prog->pointer_types, &key, + hp); + if (it.entry) { + type = *it.entry; + goto out; + } + + type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + drgn_pointer_type_init(type, drgn_program_is_64_bit(prog) ? 8 : 4, + referenced_type, key.lang); + if (drgn_pointer_type_table_insert_searched(&prog->pointer_types, &type, + hp, NULL) == -1) { + free(type); + return &drgn_enomem; + } +out: + *ret = type; + return NULL; +} + +struct drgn_error * +drgn_program_array_type(struct drgn_program *prog, uint64_t length, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + const struct drgn_array_type_key key = { + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .is_complete = true, + .length = length, + .lang = lang ? lang : drgn_type_language(element_type.type), + }; + struct drgn_array_type_table_iterator it; + struct drgn_type *type; + struct hash_pair hp; + + hp = drgn_array_type_table_hash(&key); + it = drgn_array_type_table_search_hashed(&prog->array_types, &key, hp); + if (it.entry) { + type = *it.entry; + goto out; + } + + type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + drgn_array_type_init(type, length, element_type, key.lang); + if (drgn_array_type_table_insert_searched(&prog->array_types, &type, hp, + NULL) == -1) { + free(type); + return &drgn_enomem; + } +out: + *ret = type; + return NULL; +} + +struct drgn_error * +drgn_program_incomplete_array_type(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + const struct drgn_array_type_key key = { + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .is_complete = false, + .lang = lang ? lang : drgn_type_language(element_type.type), + }; + struct drgn_array_type_table_iterator it; + struct drgn_type *type; + struct hash_pair hp; + + hp = drgn_array_type_table_hash(&key); + it = drgn_array_type_table_search_hashed(&prog->array_types, &key, hp); + if (it.entry) { + type = *it.entry; + goto out; + } + + type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + drgn_array_type_init_incomplete(type, element_type, key.lang); + if (drgn_array_type_table_insert_searched(&prog->array_types, &type, hp, + NULL) == -1) { + free(type); + return &drgn_enomem; + } +out: + *ret = type; + return NULL; +} + +static struct drgn_error * +drgn_program_cache_members(struct drgn_program *prog, + struct drgn_type *outer_type, + struct drgn_type *type, uint64_t bit_offset) +{ + if (!drgn_type_has_members(type)) + return NULL; + + struct drgn_type_member *members = drgn_type_members(type); + size_t num_members = drgn_type_num_members(type); + for (size_t i = 0; i < num_members; i++) { + struct drgn_type_member *member = &members[i]; + if (member->name) { + struct drgn_member_map_entry entry = { + .key = { + .type = outer_type, + .name = member->name, + .name_len = strlen(member->name), + }, + .value = { + .type = &member->type, + .bit_offset = + bit_offset + member->bit_offset, + .bit_field_size = + member->bit_field_size, + }, + }; + if (drgn_member_map_insert(&prog->members, &entry, + NULL) == -1) + return &drgn_enomem; + } else { + struct drgn_qualified_type member_type; + struct drgn_error *err = drgn_member_type(member, + &member_type); + if (err) + return err; + err = drgn_program_cache_members(prog, outer_type, + member_type.type, + bit_offset + + member->bit_offset); + if (err) + return err; + } + } + return NULL; +} + +struct drgn_error *drgn_program_find_member(struct drgn_program *prog, + struct drgn_type *type, + const char *member_name, + size_t member_name_len, + struct drgn_member_value **ret) +{ + const struct drgn_member_key key = { + .type = drgn_underlying_type(type), + .name = member_name, + .name_len = member_name_len, + }; + struct hash_pair hp = drgn_member_map_hash(&key); + struct drgn_member_map_iterator it = + drgn_member_map_search_hashed(&prog->members, &key, hp); + if (it.entry) { + *ret = &it.entry->value; + return NULL; + } + + /* + * Cache miss. One of the following is true: + * + * 1. The type isn't a structure, union, or class, which is a type + * error. + * 2. The type hasn't been cached, which means we need to cache it and + * check again. + * 3. The type has already been cached, which means the member doesn't + * exist. + */ + if (!drgn_type_has_members(key.type)) { + return drgn_type_error("'%s' is not a structure, union, or class", + type); + } + struct hash_pair cached_hp = drgn_type_set_hash(&key.type); + if (drgn_type_set_search_hashed(&prog->members_cached, &key.type, + cached_hp).entry) + return drgn_error_member_not_found(type, member_name); + + struct drgn_error *err = drgn_program_cache_members(prog, key.type, + key.type, 0); + if (err) + return err; + + if (drgn_type_set_insert_searched(&prog->members_cached, &key.type, + cached_hp, NULL) == -1) + return &drgn_enomem; + + it = drgn_member_map_search_hashed(&prog->members, &key, hp); + if (it.entry) { + *ret = &it.entry->value; + return NULL; + } + + return drgn_error_member_not_found(type, member_name); +} diff --git a/libdrgn/type.h b/libdrgn/type.h index 4acae99e2..d92c11379 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -13,6 +13,7 @@ #define DRGN_TYPE_H #include "drgn.h" +#include "hash_table.h" #include "language.h" /** @@ -29,6 +30,92 @@ * @{ */ +/** Registered type finding callback in a @ref drgn_program. */ +struct drgn_type_finder { + /** The callback. */ + drgn_type_find_fn fn; + /** Argument to pass to @ref drgn_type_finder::fn. */ + void *arg; + /** Next callback to try. */ + struct drgn_type_finder *next; +}; + +struct drgn_pointer_type_key { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + const struct drgn_language *lang; +}; + +static struct drgn_pointer_type_key +drgn_pointer_type_entry_to_key(struct drgn_type * const *entry) +{ + struct drgn_qualified_type referenced_type = drgn_type_type(*entry); + + return (struct drgn_pointer_type_key){ + .type = referenced_type.type, + .qualifiers = referenced_type.qualifiers, + .lang = drgn_type_language(*entry), + }; +} + +struct drgn_array_type_key { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + bool is_complete; + uint64_t length; + const struct drgn_language *lang; +}; + +static struct drgn_array_type_key +drgn_array_type_entry_to_key(struct drgn_type * const *entry) +{ + struct drgn_qualified_type element_type = drgn_type_type(*entry); + + return (struct drgn_array_type_key){ + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .is_complete = drgn_type_is_complete(*entry), + .length = drgn_type_length(*entry), + .lang = drgn_type_language(*entry), + }; +} + +DEFINE_HASH_TABLE_TYPE(drgn_pointer_type_table, struct drgn_type *, + drgn_pointer_type_entry_to_key) +DEFINE_HASH_TABLE_TYPE(drgn_array_type_table, struct drgn_type *, + drgn_array_type_entry_to_key) + +/** (type, member name) pair. */ +struct drgn_member_key { + struct drgn_type *type; + const char *name; + size_t name_len; +}; + +/** Type, offset, and bit field size of a type member. */ +struct drgn_member_value { + struct drgn_lazy_type *type; + uint64_t bit_offset, bit_field_size; +}; + +#ifdef DOXYGEN +/** + * @struct drgn_member_map + * + * Map of compound type members. + * + * The key is a @ref drgn_member_key, and the value is a @ref drgn_member_value. + * + * @struct drgn_type_set + * + * Set of types compared by address. + */ +#else +DEFINE_HASH_MAP_TYPE(drgn_member_map, struct drgn_member_key, + struct drgn_member_value) +DEFINE_HASH_SET_TYPE(drgn_type_set, struct drgn_type *) +#endif + /** * @defgroup LazyTypes Lazy types * @@ -482,26 +569,6 @@ void drgn_function_type_init(struct drgn_type *type, /** Mapping from @ref drgn_type_kind to the spelling of that kind. */ extern const char * const drgn_type_kind_spelling[]; -/** - * Names of primitive types. - * - * In some languages, like C, the same primitive type can be spelled in multiple - * ways. For example, "int" can also be spelled "signed int" or "int signed". - * - * This maps each @ref drgn_primitive_type to a ``NULL``-terminated array of the - * different ways to spell that type. The spelling at index zero is the - * preferred spelling. - */ -extern const char * const * const -drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM]; - -/** - * Mapping from a @ref drgn_type_primitive to the corresponding @ref - * drgn_type_kind. - */ -extern const enum drgn_type_kind -drgn_primitive_type_kind[DRGN_PRIMITIVE_TYPE_NUM + 1]; - /** * Parse the name of an unqualified primitive C type. * @@ -594,6 +661,122 @@ struct drgn_error *drgn_type_bit_size(struct drgn_type *type, /** Get the appropriate @ref drgn_object_kind for a @ref drgn_type. */ enum drgn_object_kind drgn_type_object_kind(struct drgn_type *type); +/** Initialize type-related fields in a @ref drgn_program. */ +void drgn_program_init_types(struct drgn_program *prog); +/** Deinitialize type-related fields in a @ref drgn_program. */ +void drgn_program_deinit_types(struct drgn_program *prog); + +/** + * Find a parsed type in a @ref drgn_program. + * + * This should only be called by implementations of @ref + * drgn_language::find_type() + * + * @param[in] kind Kind of type to find. Must be @ref DRGN_TYPE_STRUCT, @ref + * DRGN_TYPE_UNION, @ref DRGN_TYPE_CLASS, @ref DRGN_TYPE_ENUM, or @ref + * DRGN_TYPE_TYPEDEF. + * @param[in] name Name of the type. + * @param[in] name_len Length of @p name in bytes. + * @param[in] filename See @ref drgn_program_find_type(). + * @param[out] ret Returned type. + * @return @c NULL on success, &@ref drgn_not_found if the type wasn't found, + * non-@c NULL on other error. + */ +struct drgn_error * +drgn_program_find_type_impl(struct drgn_program *prog, + enum drgn_type_kind kind, const char *name, + size_t name_len, const char *filename, + struct drgn_qualified_type *ret); + +/** Find a primitive type in a @ref drgn_program. */ +struct drgn_error * +drgn_program_find_primitive_type(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret); + +/** + * Create a pointer type. + * + * The created type is cached for the lifetime of the @ref drgn_program. If the + * same @p referenced_type and @p lang are passed, the same type will be + * returned. + * + * If this succeeds, @p referenced_type must remain valid until @p prog is + * destroyed. + * + * @param[in] referenced_type Type referenced by the pointer type. + * @param[in] lang Language of the pointer type. If @c NULL, the language of @p + * referenced_type is used. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_program_pointer_type(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +/** + * Create an array type. + * + * The created type is cached for the lifetime of the @ref drgn_program. If the + * same @p length, @p element_type, and @p lang are passed, the same type will + * be returned. + * + * @param[in] length Number of elements in the array type. + * @param[in] element_type Type of an element in the array type. + * @param[in] lang Language of the array type. If @c NULL, the language of @p + * element_type is used. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_program_array_type(struct drgn_program *prog, uint64_t length, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +/** + * Create an incomplete array type. + * + * The created type is cached for the lifetime of the @ref drgn_program. If the + * same @p element_type and @p lang are passed, the same type will be returned. + * + * If this succeeds, @p element_type must remain valid until @p prog is + * destroyed. + * + * @param[in] element_type Type of an element in the array type. + * @param[in] lang Language of the array type. If @c NULL, the language of @p + * element_type is used. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_program_incomplete_array_type(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +/** + * Find the type, offset, and bit field size of a type member. + * + * This matches the members of the type itself as well as the members of any + * unnamed members of the type. + * + * This caches all members of @p type for subsequent calls. + * + * @param[in] type Compound type to search in. + * @param[in] member_name Name of member. + * @param[in] member_name_len Length of @p member_name + * @param[out] ret Returned member information. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_program_find_member(struct drgn_program *prog, + struct drgn_type *type, + const char *member_name, + size_t member_name_len, + struct drgn_member_value **ret); + /** @} */ #endif /* DRGN_TYPE_H */ diff --git a/libdrgn/type_index.c b/libdrgn/type_index.c deleted file mode 100644 index 5b5113fef..000000000 --- a/libdrgn/type_index.c +++ /dev/null @@ -1,622 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include - -#include "internal.h" -#include "language.h" -#include "type_index.h" - -/* These functions compare the underlying type by reference, not by value. */ - -static struct hash_pair -drgn_pointer_type_key_hash(const struct drgn_pointer_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_pointer_type_key_eq(const struct drgn_pointer_type_key *a, - const struct drgn_pointer_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_pointer_type_table, drgn_pointer_type_key_hash, - drgn_pointer_type_key_eq) - -static struct hash_pair -drgn_array_type_key_hash(const struct drgn_array_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, key->is_complete); - hash = hash_combine(hash, key->length); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_array_type_key_eq(const struct drgn_array_type_key *a, - const struct drgn_array_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->is_complete == b->is_complete && a->length == b->length && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_array_type_table, drgn_array_type_key_hash, - drgn_array_type_key_eq) - -static struct hash_pair drgn_member_hash_pair(const struct drgn_member_key *key) -{ - size_t hash; - - if (key->name) - hash = cityhash_size_t(key->name, key->name_len); - else - hash = 0; - hash = hash_combine((uintptr_t)key->type, hash); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_member_eq(const struct drgn_member_key *a, - const struct drgn_member_key *b) -{ - return (a->type == b->type && a->name_len == b->name_len && - (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_hash_pair, - drgn_member_eq) - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, hash_pair_ptr_type, - hash_table_scalar_eq) - -void drgn_type_index_init(struct drgn_type_index *tindex) -{ - tindex->finders = NULL; - memset(tindex->primitive_types, 0, sizeof(tindex->primitive_types)); - drgn_pointer_type_table_init(&tindex->pointer_types); - drgn_array_type_table_init(&tindex->array_types); - drgn_member_map_init(&tindex->members); - drgn_type_set_init(&tindex->members_cached); - tindex->word_size = 0; -} - -static void free_pointer_types(struct drgn_type_index *tindex) -{ - struct drgn_pointer_type_table_iterator it; - - for (it = drgn_pointer_type_table_first(&tindex->pointer_types); - it.entry; it = drgn_pointer_type_table_next(it)) - free(*it.entry); - drgn_pointer_type_table_deinit(&tindex->pointer_types); -} - -static void free_array_types(struct drgn_type_index *tindex) -{ - struct drgn_array_type_table_iterator it; - - for (it = drgn_array_type_table_first(&tindex->array_types); it.entry; - it = drgn_array_type_table_next(it)) - free(*it.entry); - drgn_array_type_table_deinit(&tindex->array_types); -} - -void drgn_type_index_deinit(struct drgn_type_index *tindex) -{ - struct drgn_type_finder *finder; - - drgn_member_map_deinit(&tindex->members); - drgn_type_set_deinit(&tindex->members_cached); - free_array_types(tindex); - free_pointer_types(tindex); - - finder = tindex->finders; - while (finder) { - struct drgn_type_finder *next = finder->next; - - free(finder); - finder = next; - } -} - -struct drgn_error *drgn_type_index_add_finder(struct drgn_type_index *tindex, - drgn_type_find_fn fn, void *arg) -{ - struct drgn_type_finder *finder; - - finder = malloc(sizeof(*finder)); - if (!finder) - return &drgn_enomem; - finder->fn = fn; - finder->arg = arg; - finder->next = tindex->finders; - tindex->finders = finder; - return NULL; -} - -void drgn_type_index_remove_finder(struct drgn_type_index *tindex) -{ - struct drgn_type_finder *finder; - - finder = tindex->finders->next; - free(tindex->finders); - tindex->finders = finder; -} - -/* Default long and unsigned long are 64 bits. */ -static struct drgn_type default_primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; -/* 32-bit versions of long and unsigned long. */ -static struct drgn_type default_long_32bit; -static struct drgn_type default_unsigned_long_32bit; - -__attribute__((constructor(200))) -static void default_primitive_types_init(void) -{ - size_t i; - - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_SIGNED_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_CHAR][0], - 1, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_SHORT][0], - 2, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_SHORT][0], - 2, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_INT][0], 4, - true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_INT][0], - 4, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 8, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG_LONG][0], - 8, false, &drgn_language_c); - drgn_bool_type_init(&default_primitive_types[DRGN_C_TYPE_BOOL], - drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0], - 1, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_FLOAT], - drgn_primitive_type_spellings[DRGN_C_TYPE_FLOAT][0], - 4, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_DOUBLE][0], - 8, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_DOUBLE][0], - 16, &drgn_language_c); - for (i = 0; i < ARRAY_SIZE(default_primitive_types); i++) { - if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID || - i == DRGN_C_TYPE_SIZE_T || i == DRGN_C_TYPE_PTRDIFF_T) - continue; - assert(drgn_type_primitive(&default_primitive_types[i]) == i); - } - - drgn_int_type_init(&default_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 4, true, &drgn_language_c); - assert(drgn_type_primitive(&default_long_32bit) == - DRGN_C_TYPE_LONG); - - drgn_int_type_init(&default_unsigned_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 4, false, &drgn_language_c); - assert(drgn_type_primitive(&default_unsigned_long_32bit) == - DRGN_C_TYPE_UNSIGNED_LONG); -} - -/* - * Like @ref drgn_type_index_find_parsed(), but returns - * &drgn_error_not_found instead of a more informative error message. - */ -static struct drgn_error * -drgn_type_index_find_parsed_internal(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - struct drgn_type_finder *finder; - - finder = tindex->finders; - while (finder) { - err = finder->fn(kind, name, name_len, filename, finder->arg, - ret); - if (!err) { - if (drgn_type_kind(ret->type) != kind) { - return drgn_error_create(DRGN_ERROR_TYPE, - "type find callback returned wrong kind of type"); - } - return NULL; - } - if (err != &drgn_not_found) - return err; - finder = finder->next; - } - return &drgn_not_found; -} - -struct drgn_error * -drgn_type_index_find_primitive(struct drgn_type_index *tindex, - enum drgn_primitive_type type, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - enum drgn_type_kind kind; - const char * const *spellings; - size_t i; - - if (tindex->primitive_types[type]) { - *ret = tindex->primitive_types[type]; - return NULL; - } - - kind = drgn_primitive_type_kind[type]; - if (kind == DRGN_TYPE_VOID) { - *ret = drgn_void_type(&drgn_language_c); - goto out; - } - - spellings = drgn_primitive_type_spellings[type]; - for (i = 0; spellings[i]; i++) { - err = drgn_type_index_find_parsed_internal(tindex, kind, - spellings[i], - strlen(spellings[i]), - NULL, - &qualified_type); - if (!err && drgn_type_primitive(qualified_type.type) == type) { - *ret = qualified_type.type; - goto out; - } else if (err && err != &drgn_not_found) { - return err; - } - } - - /* long and unsigned long default to the word size. */ - if (type == DRGN_C_TYPE_LONG || type == DRGN_C_TYPE_UNSIGNED_LONG) { - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - if (tindex->word_size == 4) { - *ret = (type == DRGN_C_TYPE_LONG ? - &default_long_32bit : - &default_unsigned_long_32bit); - goto out; - } - } - /* - * size_t and ptrdiff_t default to typedefs of whatever integer type - * matches the word size. - */ - if (type == DRGN_C_TYPE_SIZE_T || type == DRGN_C_TYPE_PTRDIFF_T) { - static enum drgn_primitive_type integer_types[2][3] = { - { - DRGN_C_TYPE_UNSIGNED_LONG, - DRGN_C_TYPE_UNSIGNED_LONG_LONG, - DRGN_C_TYPE_UNSIGNED_INT, - }, - { - DRGN_C_TYPE_LONG, - DRGN_C_TYPE_LONG_LONG, - DRGN_C_TYPE_INT, - }, - }; - - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - for (i = 0; i < 3; i++) { - enum drgn_primitive_type integer_type; - - integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; - err = drgn_type_index_find_primitive(tindex, - integer_type, - &qualified_type.type); - if (err) - return err; - if (drgn_type_size(qualified_type.type) == - tindex->word_size) { - qualified_type.qualifiers = 0; - *ret = (type == DRGN_C_TYPE_SIZE_T ? - &tindex->default_size_t : - &tindex->default_ptrdiff_t); - drgn_typedef_type_init(*ret, spellings[0], - qualified_type, &drgn_language_c); - goto out; - } - } - return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, - "no suitable integer type for %s", - spellings[0]); - } - - *ret = &default_primitive_types[type]; - -out: - tindex->primitive_types[type] = *ret; - return NULL; -} - -struct drgn_error * -drgn_type_index_find_parsed(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - int precision; - - err = drgn_type_index_find_parsed_internal(tindex, kind, name, name_len, - filename, ret); - if (err != &drgn_not_found) - return err; - - precision = name_len < INT_MAX ? (int)name_len : INT_MAX; - if (filename) { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find '%s %.*s' in '%s'", - drgn_type_kind_spelling[kind], precision, name, - filename); - } else { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find '%s %.*s'", - drgn_type_kind_spelling[kind], precision, name); - } -} - -struct drgn_error * -drgn_type_index_pointer_type(struct drgn_type_index *tindex, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_pointer_type_key key = { - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = lang ? lang : drgn_type_language(referenced_type.type), - }; - struct drgn_pointer_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - - hp = drgn_pointer_type_table_hash(&key); - it = drgn_pointer_type_table_search_hashed(&tindex->pointer_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_pointer_type_init(type, tindex->word_size, referenced_type, - key.lang); - if (drgn_pointer_type_table_insert_searched(&tindex->pointer_types, - &type, hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_type_index_array_type(struct drgn_type_index *tindex, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = true, - .length = length, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&tindex->array_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init(type, length, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&tindex->array_types, &type, - hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_type_index_incomplete_array_type(struct drgn_type_index *tindex, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = false, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&tindex->array_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init_incomplete(type, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&tindex->array_types, &type, - hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -static struct drgn_error * -drgn_type_index_cache_members(struct drgn_type_index *tindex, - struct drgn_type *outer_type, - struct drgn_type *type, uint64_t bit_offset) -{ - struct drgn_error *err; - struct drgn_type_member *members; - size_t num_members, i; - - if (!drgn_type_has_members(type)) - return NULL; - - members = drgn_type_members(type); - num_members = drgn_type_num_members(type); - for (i = 0; i < num_members; i++) { - struct drgn_type_member *member; - - member = &members[i]; - if (member->name) { - struct drgn_member_map_entry entry = { - .key = { - .type = outer_type, - .name = member->name, - .name_len = strlen(member->name), - }, - .value = { - .type = &member->type, - .bit_offset = - bit_offset + member->bit_offset, - .bit_field_size = - member->bit_field_size, - }, - }; - - if (drgn_member_map_insert(&tindex->members, &entry, - NULL) == -1) - return &drgn_enomem; - } else { - struct drgn_qualified_type member_type; - - err = drgn_member_type(member, &member_type); - if (err) - return err; - err = drgn_type_index_cache_members(tindex, outer_type, - member_type.type, - bit_offset + - member->bit_offset); - if (err) - return err; - } - } - return NULL; -} - -struct drgn_error *drgn_type_index_find_member(struct drgn_type_index *tindex, - struct drgn_type *type, - const char *member_name, - size_t member_name_len, - struct drgn_member_value **ret) -{ - struct drgn_error *err; - const struct drgn_member_key key = { - .type = drgn_underlying_type(type), - .name = member_name, - .name_len = member_name_len, - }; - struct hash_pair hp, cached_hp; - struct drgn_member_map_iterator it; - - hp = drgn_member_map_hash(&key); - it = drgn_member_map_search_hashed(&tindex->members, &key, hp); - if (it.entry) { - *ret = &it.entry->value; - return NULL; - } - - /* - * Cache miss. One of the following is true: - * - * 1. The type isn't a structure, union, or class, which is a type - * error. - * 2. The type hasn't been cached, which means we need to cache it and - * check again. - * 3. The type has already been cached, which means the member doesn't - * exist. - */ - if (!drgn_type_has_members(key.type)) { - return drgn_type_error("'%s' is not a structure, union, or class", - type); - } - cached_hp = drgn_type_set_hash(&key.type); - if (drgn_type_set_search_hashed(&tindex->members_cached, &key.type, - cached_hp).entry) - return drgn_error_member_not_found(type, member_name); - - err = drgn_type_index_cache_members(tindex, key.type, key.type, 0); - if (err) - return err; - - if (drgn_type_set_insert_searched(&tindex->members_cached, &key.type, - cached_hp, NULL) == -1) - return &drgn_enomem; - - it = drgn_member_map_search_hashed(&tindex->members, &key, hp); - if (it.entry) { - *ret = &it.entry->value; - return NULL; - } - - return drgn_error_member_not_found(type, member_name); -} diff --git a/libdrgn/type_index.h b/libdrgn/type_index.h deleted file mode 100644 index 46425ac2f..000000000 --- a/libdrgn/type_index.h +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Type lookup and caching. - * - * See @ref TypeIndex. - */ - -#ifndef DRGN_TYPE_INDEX_H -#define DRGN_TYPE_INDEX_H - -#include - -#include "drgn.h" -#include "hash_table.h" -#include "language.h" -#include "type.h" - -/** - * @ingroup Internals - * - * @defgroup TypeIndex Type index - * - * Type lookup and caching. - * - * @ref drgn_type_index provides a common interface for finding types in a - * program. - * - * @{ - */ - -struct drgn_pointer_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - const struct drgn_language *lang; -}; - -static struct drgn_pointer_type_key -drgn_pointer_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type referenced_type = drgn_type_type(*entry); - - return (struct drgn_pointer_type_key){ - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = drgn_type_language(*entry), - }; -} - -struct drgn_array_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - bool is_complete; - uint64_t length; - const struct drgn_language *lang; -}; - -static struct drgn_array_type_key -drgn_array_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type element_type = drgn_type_type(*entry); - - return (struct drgn_array_type_key){ - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = drgn_type_is_complete(*entry), - .length = drgn_type_length(*entry), - .lang = drgn_type_language(*entry), - }; -} - -DEFINE_HASH_TABLE_TYPE(drgn_pointer_type_table, struct drgn_type *, - drgn_pointer_type_entry_to_key) -DEFINE_HASH_TABLE_TYPE(drgn_array_type_table, struct drgn_type *, - drgn_array_type_entry_to_key) - -/** (type, member name) pair. */ -struct drgn_member_key { - struct drgn_type *type; - const char *name; - size_t name_len; -}; - -/** Type, offset, and bit field size of a type member. */ -struct drgn_member_value { - struct drgn_lazy_type *type; - uint64_t bit_offset, bit_field_size; -}; - -#ifdef DOXYGEN -/** - * @struct drgn_member_map - * - * Map of compound type members. - * - * The key is a @ref drgn_member_key, and the value is a @ref drgn_member_value. - * - * @struct drgn_type_set - * - * Set of types compared by address. - */ -#else -DEFINE_HASH_MAP_TYPE(drgn_member_map, struct drgn_member_key, - struct drgn_member_value) -DEFINE_HASH_SET_TYPE(drgn_type_set, struct drgn_type *) -#endif - -/** Registered callback in a @ref drgn_type_index. */ -struct drgn_type_finder { - /** The callback. */ - drgn_type_find_fn fn; - /** Argument to pass to @ref drgn_type_finder::fn. */ - void *arg; - /** Next callback to try. */ - struct drgn_type_finder *next; -}; - -/** - * Type index. - * - * A type index is used to find types by name and cache the results. The types - * are found using callbacks which are registered with @ref - * drgn_type_index_add_finder(). - * - * @ref drgn_type_index_find() searches for a type. @ref - * drgn_type_index_pointer_type(), @ref drgn_type_index_array_type(), and @ref - * drgn_type_index_incomplete_array_type() create derived types. Any type - * returned by these is valid until the type index is destroyed with @ref - * drgn_type_index_destroy(). - */ -struct drgn_type_index { - /** Callbacks for finding types. */ - struct drgn_type_finder *finders; - /** Cache of primitive types. */ - struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; - struct drgn_type default_size_t; - struct drgn_type default_ptrdiff_t; - /** Cache of created pointer types. */ - struct drgn_pointer_type_table pointer_types; - /** Cache of created array types. */ - struct drgn_array_type_table array_types; - /** Cache for @ref drgn_type_index_find_member(). */ - struct drgn_member_map members; - /** - * Set of types which have been already cached in @ref - * drgn_type_index::members. - */ - struct drgn_type_set members_cached; - /** - * Size of a pointer in bytes. - * - * This is zero if it has not been set yet. - */ - uint8_t word_size; -}; - -/** - * Initialize a @ref drgn_type_index. - * - * @param[in] tindex Type index to initialize. - */ -void drgn_type_index_init(struct drgn_type_index *tindex); - -/** Deinitialize a @ref drgn_type_index. */ -void drgn_type_index_deinit(struct drgn_type_index *tindex); - -/** @sa drgn_program_add_type_finder() */ -struct drgn_error *drgn_type_index_add_finder(struct drgn_type_index *tindex, - drgn_type_find_fn fn, void *arg); - -/** - * Remove the most recently added type finding callback. - * - * This must only be called if the type index hasn't been used since the finder - * was added. - */ -void drgn_type_index_remove_finder(struct drgn_type_index *tindex); - -/** Find a primitive type in a @ref drgn_type_index. */ -struct drgn_error * -drgn_type_index_find_primitive(struct drgn_type_index *tindex, - enum drgn_primitive_type type, - struct drgn_type **ret); - -/** - * Find a parsed type in a @ref drgn_type_index. - * - * This should only be called by implementations of @ref - * drgn_language::find_type(). - * - * @param[in] kind Kind of type to find. Must be @ref DRGN_TYPE_STRUCT, @ref - * DRGN_TYPE_UNION, @ref DRGN_TYPE_CLASS, @ref DRGN_TYPE_ENUM, or @ref - * DRGN_TYPE_TYPEDEF. - * @param[in] name Name of the type. - * @param[in] name_len Length of @p name in bytes. - * @param[in] filename See @ref drgn_type_index_find(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_find_parsed(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret); - -/** - * Find a type in a @ref drgn_type_index. - * - * The returned type is valid for the lifetime of the @ref drgn_type_index. - * - * @param[in] tindex Type index. - * @param[in] name Name of the type. - * @param[in] filename Exact filename containing the type definition, or @c NULL - * for any definition. - * @param[in] lang Language to use to parse @p name. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static inline struct drgn_error * -drgn_type_index_find(struct drgn_type_index *tindex, const char *name, - const char *filename, const struct drgn_language *lang, - struct drgn_qualified_type *ret) -{ - return lang->find_type(tindex, name, filename, ret); -} - -/** - * Create a pointer type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p referenced_type and @p lang are passed, the same type will be - * returned. - * - * If this succeeds, @p referenced_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] referenced_type Type referenced by the pointer type. - * @param[in] lang Language of the pointer type. If @c NULL, the language of @p - * referenced_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_pointer_type(struct drgn_type_index *tindex, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an array type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p length, @p element_type, and @p lang are passed, the same type - * will be returned. - * - * If this succeeds, @p element_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] length Number of elements in the array type. - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_array_type(struct drgn_type_index *tindex, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an incomplete array type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p element_type and @p lang are passed, the same type will be - * returned. - * - * If this succeeds, @p element_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_incomplete_array_type(struct drgn_type_index *tindex, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Find the type, offset, and bit field size of a type member. - * - * This matches the members of the type itself as well as the members of any - * unnamed members of the type. - * - * This caches all members of @p type for subsequent calls. - * - * @param[in] tindex Type index. - * @param[in] type Compound type to search in. - * @param[in] member_name Name of member. - * @param[in] member_name_len Length of @p member_name - * @param[out] ret Returned member information. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_type_index_find_member(struct drgn_type_index *tindex, - struct drgn_type *type, - const char *member_name, - size_t member_name_len, - struct drgn_member_value **ret); - -/** @} */ - -#endif /* DRGN_TYPE_INDEX_H */ diff --git a/tests/test_program.py b/tests/test_program.py index 52c32f4ef..4e5a7eedf 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -91,11 +91,11 @@ def test_lookup_error(self): "foo.c", ) self.assertRaisesRegex( - LookupError, "^could not find 'typedef foo'$", prog.type, "foo" + LookupError, "^could not find 'foo'$", prog.type, "foo" ) self.assertRaisesRegex( LookupError, - "^could not find 'typedef foo' in 'foo.c'$", + "^could not find 'foo' in 'foo.c'$", prog.type, "foo", "foo.c", From a97f6c4fa2bbabc7c99d9e8544aa9adec404abd8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 15 Jul 2020 16:34:56 -0700 Subject: [PATCH 26/56] Associate types with program I originally envisioned types as dumb descriptors. This mostly works for C because in C, types are fairly simple. However, even then the drgn_program_member_info() API is awkward. You should be able to look up a member directly from a type, but we need the program for caching purposes. This has also held me back from adding offsetof() or has_member() APIs. Things get even messier with C++. C++ template parameters can be objects (e.g., template ). Such parameters would best be represented by a drgn object, which we need a drgn program for. Static members are a similar case. So, let's reimagine types as being owned by a program. This has a few parts: 1. In libdrgn, simple types are now created by factory functions, drgn_foo_type_create(). 2. To handle their variable length fields, compound types, enum types, and function types are constructed with a "builder" API. 3. Simple types are deduplicated. 4. The Python type factory functions are replaced by methods of the Program class. 5. While we're changing the API, the parameters to pointer_type() and array_type() are reordered to be more logical (and to allow pointer_type() to take a default size of None for the program's default pointer size). 6. Likewise, the type factory methods take qualifiers as a keyword argument only. A big part of this change is updating the tests and splitting up large test cases into smaller ones in a few places. Signed-off-by: Omar Sandoval --- _drgn.pyi | 515 ++--- docs/api_reference.rst | 27 +- drgn/__init__.py | 26 - drgn/helpers/linux/list.py | 2 + libdrgn/drgn.h.in | 7 + libdrgn/dwarf_info_cache.c | 649 +++--- libdrgn/dwarf_info_cache.h | 2 - libdrgn/language.c | 14 - libdrgn/language.h | 4 +- libdrgn/language_c.c | 46 +- libdrgn/linux_kernel.c | 6 +- libdrgn/object.c | 46 +- libdrgn/program.h | 39 +- libdrgn/python/drgnpy.h | 97 +- libdrgn/python/module.c | 27 - libdrgn/python/object.c | 3 +- libdrgn/python/program.c | 88 +- libdrgn/python/type.c | 1369 +++++++------ libdrgn/type.c | 1286 +++++++----- libdrgn/type.h | 593 +++--- tests/__init__.py | 106 +- tests/test_dwarf.py | 3815 ++++++++++++++++++++++++------------ tests/test_language_c.py | 487 +++-- tests/test_object.py | 688 +++---- tests/test_program.py | 398 ++-- tests/test_type.py | 1009 ++++++---- 26 files changed, 6535 insertions(+), 4814 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index d95ee6bde..b593c7fa4 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -236,21 +236,6 @@ class Program: the given file """ ... - def pointer_type( - self, - type: Union[str, Type], - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, - ) -> Type: - """ - Create a pointer type which points to the given type. - - :param type: The referenced type. - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... def read( self, address: IntegerLike, size: IntegerLike, physical: bool = False ) -> bytes: @@ -455,6 +440,282 @@ class Program: else: return prog['bar'] """ + def void_type( + self, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new void type. It has kind :attr:`TypeKind.VOID`. + + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def int_type( + self, + name: str, + size: IntegerLike, + is_signed: bool, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new integer type. It has kind :attr:`TypeKind.INT`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param is_signed: :attr:`Type.is_signed` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def bool_type( + self, + name: str, + size: IntegerLike, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def float_type( + self, + name: str, + size: IntegerLike, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def complex_type( + self, + name: str, + size: IntegerLike, + type: Type, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new complex type. It has kind :attr:`TypeKind.COMPLEX`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param type: The corresponding real type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def struct_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. + + :param tag: :attr:`Type.tag` + :param size: :attr:`Type.size` + :param members: :attr:`Type.members` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def struct_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete structure type.""" + ... + @overload + def union_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, + this is the same as as :meth:`struct_type()`. + """ + ... + @overload + def union_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete union type.""" + ... + @overload + def class_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new class type. It has kind :attr:`TypeKind.CLASS`. Otherwise, + this is the same as as :meth:`struct_type()`. + """ + ... + @overload + def class_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete class type.""" + ... + @overload + def enum_type( + self, + tag: Optional[str], + type: Type, + enumerators: Sequence[TypeEnumerator], + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. + + :param tag: :attr:`Type.tag` + :param type: The compatible integer type (:attr:`Type.type`) + :param enumerators: :attr:`Type.enumerators` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def enum_type( + self, + tag: Optional[str], + type: None = None, + enumerators: None = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete enumerated type.""" + ... + def typedef_type( + self, + name: str, + type: Type, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. + + :param name: :attr:`Type.name` + :param type: The aliased type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def pointer_type( + self, + type: Type, + size: Optional[int] = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, + + You can usually use :meth:`Program:pointer_type()` instead. + + :param type: The referenced type (:attr:`Type.type`) + :param size: :attr:`Type.size`, or ``None`` to use the program's + default pointer size. + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def array_type( + self, + type: Type, + length: Optional[int] = None, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new array type. It has kind :attr:`TypeKind.ARRAY`. + + :param type: The element type (:attr:`Type.type`) + :param length: :attr:`Type.length` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def function_type( + self, + type: Type, + parameters: Sequence[TypeParameter], + is_variadic: bool = False, + *, + qualifiers: Optional[Qualifiers] = None, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. + + :param type: The return type (:attr:`Type.type`) + :param parameters: :attr:`Type.parameters` + :param is_variadic: :attr:`Type.is_variadic` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... class ProgramFlags(enum.Flag): """ @@ -1169,6 +1430,9 @@ class Type: memory-intensive. """ + prog: Program + """Program that this type is from.""" + kind: TypeKind """Kind of this type.""" @@ -1457,227 +1721,6 @@ class Qualifiers(enum.Flag): ATOMIC = ... """Atomic type.""" -def void_type( - qualifiers: Optional[Qualifiers] = None, *, language: Optional[Language] = None -) -> Type: - """ - Create a new void type. It has kind :attr:`TypeKind.VOID`. - - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def int_type( - name: str, - size: int, - is_signed: bool, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new integer type. It has kind :attr:`TypeKind.INT`. - - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param is_signed: :attr:`Type.is_signed` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def bool_type( - name: str, - size: int, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. - - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def float_type( - name: str, - size: int, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. - - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def complex_type( - name: str, - size: int, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new complex type. It has kind :attr:`TypeKind.COMPLEX`. - - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param type: The corresponding real type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def struct_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. - - :param tag: :attr:`Type.tag` - :param size: :attr:`Type.size`; ``None`` if this is an incomplete type. - :param members: :attr:`Type.members` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def union_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, - this is the same as :func:`struct_type()`. - """ - ... - -def class_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new class type. It has kind :attr:`TypeKind.CLASS`. Otherwise, - this is the same as :func:`struct_type()`. - """ - ... - -def enum_type( - tag: Optional[str], - type: Optional[Type] = None, - enumerators: Optional[Sequence[TypeEnumerator]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. - - :param tag: :attr:`Type.tag` - :param type: The compatible integer type (:attr:`Type.type`) - :param enumerators: :attr:`Type.enumerators` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def typedef_type( - name: str, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. - - :param name: :attr:`Type.name` - :param type: The aliased type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def pointer_type( - size: int, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, - - You can usually use :meth:`Program:pointer_type()` instead. - - :param size: :attr:`Type.size` - :param type: The referenced type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def array_type( - length: Optional[int], - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new array type. It has kind :attr:`TypeKind.ARRAY`. - - :param length: :attr:`Type.length` - :param type: The element type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def function_type( - type: Type, - parameters: Sequence[TypeParameter], - is_variadic: bool = False, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. - - :param type: The return type (:attr:`Type.type`) - :param parameters: :attr:`Type.parameters` - :param is_variadic: :attr:`Type.is_variadic` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - # type_or_obj is positional-only. def sizeof(type_or_obj: Union[Type, Object]) -> int: """ diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 6c3ea7de9..020c47369 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,6 +7,7 @@ Programs -------- .. drgndoc:: Program + :exclude: (void|int|bool|float|complex|struct|union|class|enum|typedef|pointer|array|function)_type .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags @@ -95,19 +96,19 @@ Type Constructors Custom drgn types can be created with the following factory functions. These can be used just like types obtained from :meth:`Program.type()`. -.. drgndoc:: void_type -.. drgndoc:: int_type -.. drgndoc:: bool_type -.. drgndoc:: float_type -.. drgndoc:: complex_type -.. drgndoc:: struct_type -.. drgndoc:: union_type -.. drgndoc:: class_type -.. drgndoc:: enum_type -.. drgndoc:: typedef_type -.. drgndoc:: pointer_type -.. drgndoc:: array_type -.. drgndoc:: function_type +.. drgndoc:: Program.void_type +.. drgndoc:: Program.int_type +.. drgndoc:: Program.bool_type +.. drgndoc:: Program.float_type +.. drgndoc:: Program.complex_type +.. drgndoc:: Program.struct_type +.. drgndoc:: Program.union_type +.. drgndoc:: Program.class_type +.. drgndoc:: Program.enum_type +.. drgndoc:: Program.typedef_type +.. drgndoc:: Program.pointer_type +.. drgndoc:: Program.array_type +.. drgndoc:: Program.function_type Miscellaneous ------------- diff --git a/drgn/__init__.py b/drgn/__init__.py index 51c989d10..0c9441560 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -72,28 +72,15 @@ TypeMember, TypeParameter, _with_libkdumpfile as _with_libkdumpfile, - array_type, - bool_type, cast, - class_type, - complex_type, container_of, - enum_type, filename_matches, - float_type, - function_type, host_platform, - int_type, - pointer_type, program_from_core_dump, program_from_kernel, program_from_pid, reinterpret, sizeof, - struct_type, - typedef_type, - union_type, - void_type, ) __all__ = ( @@ -122,29 +109,16 @@ "TypeKind", "TypeMember", "TypeParameter", - "array_type", - "bool_type", "cast", - "class_type", - "complex_type", "container_of", - "enum_type", "execscript", "filename_matches", - "float_type", - "function_type", "host_platform", - "int_type", - "pointer_type", "program_from_core_dump", "program_from_kernel", "program_from_pid", "reinterpret", "sizeof", - "struct_type", - "typedef_type", - "union_type", - "void_type", ) diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index 58aa901ff..dc3aace4f 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -85,6 +85,8 @@ def list_first_entry_or_null( head = head.read_() pos = head.next.read_() if pos == head: + if isinstance(type, str): + type = head.prog_.type(type) return NULL(head.prog_, head.prog_.pointer_type(type)) else: return container_of(pos, type, member) diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 9c3804e95..fd10a358a 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -425,6 +425,7 @@ struct drgn_type { enum drgn_primitive_type primitive; /* These are the qualifiers for the wrapped type, not this type. */ enum drgn_qualifiers qualifiers; + struct drgn_program *program; const struct drgn_language *language; /* * This mess of unions is used to make this as compact as possible. Use @@ -493,6 +494,12 @@ static inline bool drgn_type_is_complete(struct drgn_type *type) return type->_private.is_complete; } +static inline struct drgn_program * +drgn_type_program(struct drgn_type *type) +{ + return type->_private.program; +} + /** Get the language of a type. */ static inline const struct drgn_language * drgn_type_language(struct drgn_type *type) diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c index e9e22cdba..d8d2d6f93 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/dwarf_info_cache.c @@ -12,53 +12,19 @@ #include "hash_table.h" #include "object.h" #include "object_index.h" +#include "program.h" #include "type.h" #include "vector.h" DEFINE_HASH_TABLE_FUNCTIONS(dwarf_type_map, hash_pair_ptr_type, hash_table_scalar_eq) -DEFINE_VECTOR(drgn_type_member_vector, struct drgn_type_member) -DEFINE_VECTOR(drgn_type_enumerator_vector, struct drgn_type_enumerator) -DEFINE_VECTOR(drgn_type_parameter_vector, struct drgn_type_parameter) struct drgn_type_from_dwarf_thunk { struct drgn_type_thunk thunk; - struct drgn_dwarf_info_cache *dicache; Dwarf_Die die; bool can_be_incomplete_array; }; -static void drgn_dwarf_type_free(struct drgn_dwarf_type *dwarf_type) -{ - if (dwarf_type->should_free) { - struct drgn_type *type = dwarf_type->type; - - if (drgn_type_has_members(type)) { - struct drgn_type_member *members; - size_t num_members, i; - - members = drgn_type_members(type); - num_members = drgn_type_num_members(type); - for (i = 0; i < num_members; i++) - drgn_type_member_deinit(&members[i]); - free(members); - } - if (drgn_type_has_parameters(type)) { - struct drgn_type_parameter *parameters; - size_t num_parameters, i; - - parameters = drgn_type_parameters(type); - num_parameters = drgn_type_num_parameters(type); - for (i = 0; i < num_parameters; i++) - drgn_type_parameter_deinit(¶meters[i]); - free(parameters); - } - if (drgn_type_has_enumerators(type)) - free(drgn_type_enumerators(type)); - free(type); - } -} - /** * Return whether a DWARF DIE is little-endian. * @@ -193,10 +159,9 @@ static struct drgn_error * drgn_type_from_dwarf_thunk_evaluate_fn(struct drgn_type_thunk *thunk, struct drgn_qualified_type *ret) { - struct drgn_type_from_dwarf_thunk *t; - - t = container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk); - return drgn_type_from_dwarf_internal(t->dicache, &t->die, + struct drgn_type_from_dwarf_thunk *t = + container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk); + return drgn_type_from_dwarf_internal(thunk->prog->_dicache, &t->die, t->can_be_incomplete_array, NULL, ret); } @@ -211,29 +176,26 @@ drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *parent_die, bool can_be_incomplete_array, const char *tag_name, struct drgn_lazy_type *ret) { - struct drgn_type_from_dwarf_thunk *thunk; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Die type_die; - + Dwarf_Attribute attr_mem, *attr; if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { return drgn_error_format(DRGN_ERROR_OTHER, "%s is missing DW_AT_type", tag_name); } + Dwarf_Die type_die; if (!dwarf_formref_die(attr, &type_die)) { return drgn_error_format(DRGN_ERROR_OTHER, "%s has invalid DW_AT_type", tag_name); } - thunk = malloc(sizeof(*thunk)); + struct drgn_type_from_dwarf_thunk *thunk = malloc(sizeof(*thunk)); if (!thunk) return &drgn_enomem; + thunk->thunk.prog = dicache->prog; thunk->thunk.evaluate_fn = drgn_type_from_dwarf_thunk_evaluate_fn; thunk->thunk.free_fn = drgn_type_from_dwarf_thunk_free_fn; - thunk->dicache = dicache; thunk->die = type_die; thunk->can_be_incomplete_array = can_be_incomplete_array; drgn_lazy_type_init_thunk(ret, &thunk->thunk); @@ -280,7 +242,7 @@ drgn_type_from_dwarf_child(struct drgn_dwarf_info_cache *dicache, if (err) return err; } - ret->type = drgn_void_type(parent_lang); + ret->type = drgn_void_type(dicache->prog, parent_lang); ret->qualifiers = 0; return NULL; } else { @@ -305,48 +267,40 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, const struct drgn_language *lang, struct drgn_type **ret) { - struct drgn_error *err; - struct drgn_type *type; - Dwarf_Attribute attr; - Dwarf_Word encoding; - const char *name; - int size; - - name = dwarf_diename(die); + const char *name = dwarf_diename(die); if (!name) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_base_type has missing or invalid DW_AT_name"); } + Dwarf_Attribute attr; + Dwarf_Word encoding; if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || dwarf_formudata(&attr, &encoding)) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_base_type has missing or invalid DW_AT_encoding"); } - size = dwarf_bytesize(die); + int size = dwarf_bytesize(die); if (size == -1) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); } - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; switch (encoding) { case DW_ATE_boolean: - drgn_bool_type_init(type, name, size, lang); - break; + return drgn_bool_type_create(dicache->prog, name, size, lang, + ret); case DW_ATE_float: - drgn_float_type_init(type, name, size, lang); - break; + return drgn_float_type_create(dicache->prog, name, size, lang, + ret); case DW_ATE_signed: case DW_ATE_signed_char: - drgn_int_type_init(type, name, size, true, lang); - break; + return drgn_int_type_create(dicache->prog, name, size, true, + lang, ret); case DW_ATE_unsigned: case DW_ATE_unsigned_char: - drgn_int_type_init(type, name, size, false, lang); - break; + return drgn_int_type_create(dicache->prog, name, size, false, + lang, ret); /* * GCC also supports complex integer types, but DWARF 4 doesn't have an * encoding for that. GCC as of 8.2 emits DW_ATE_lo_user, but that's @@ -354,14 +308,14 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, * don't support it. */ case DW_ATE_complex_float: { - struct drgn_qualified_type real_type; Dwarf_Die child; - if (dwarf_type(die, &child)) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_base_type has missing or invalid DW_AT_type"); } - err = drgn_type_from_dwarf(dicache, &child, &real_type); + struct drgn_qualified_type real_type; + struct drgn_error *err = drgn_type_from_dwarf(dicache, &child, + &real_type); if (err) return err; if (drgn_type_kind(real_type.type) != DRGN_TYPE_FLOAT && @@ -369,16 +323,14 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_type of DW_ATE_complex_float is not a floating-point or integer type"); } - drgn_complex_type_init(type, name, size, real_type.type, lang); - break; + return drgn_complex_type_create(dicache->prog, name, size, + real_type.type, lang, ret); } default: return drgn_error_format(DRGN_ERROR_OTHER, "DW_TAG_base_type has unknown DWARF encoding 0x%llx", (unsigned long long)encoding); } - *ret = type; - return NULL; } /* @@ -530,21 +482,14 @@ parse_member_offset(Dwarf_Die *die, struct drgn_lazy_type *member_type, return NULL; } -static struct drgn_error *parse_member(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - struct drgn_type_member *member, - bool little_endian) +static struct drgn_error * +parse_member(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, + bool little_endian, bool can_be_incomplete_array, + struct drgn_compound_type_builder *builder) { - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - struct drgn_lazy_type member_type; + Dwarf_Attribute attr_mem, *attr; const char *name; - uint64_t bit_offset; - uint64_t bit_field_size; - - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); - if (attr) { + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { name = dwarf_formstring(attr); if (!name) { return drgn_error_create(DRGN_ERROR_OTHER, @@ -554,10 +499,9 @@ static struct drgn_error *parse_member(struct drgn_dwarf_info_cache *dicache, name = NULL; } - attr = dwarf_attr_integrate(die, DW_AT_bit_size, &attr_mem); - if (attr) { + uint64_t bit_field_size; + if ((attr = dwarf_attr_integrate(die, DW_AT_bit_size, &attr_mem))) { Dwarf_Word bit_size; - if (dwarf_formudata(attr, &bit_size)) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_member has invalid DW_AT_bit_size"); @@ -567,29 +511,35 @@ static struct drgn_error *parse_member(struct drgn_dwarf_info_cache *dicache, bit_field_size = 0; } - err = drgn_lazy_type_from_dwarf(dicache, die, false, "DW_TAG_member", - &member_type); + struct drgn_lazy_type member_type; + struct drgn_error *err = drgn_lazy_type_from_dwarf(dicache, die, + can_be_incomplete_array, + "DW_TAG_member", + &member_type); if (err) return err; + uint64_t bit_offset; err = parse_member_offset(die, &member_type, bit_field_size, little_endian, &bit_offset); - if (err) { - drgn_lazy_type_deinit(&member_type); - return err; - } + if (err) + goto err; - drgn_type_member_init(member, member_type, name, bit_offset, - bit_field_size); + err = drgn_compound_type_builder_add_member(builder, member_type, name, + bit_offset, bit_field_size); + if (err) + goto err; return NULL; + +err: + drgn_lazy_type_deinit(&member_type); + return err; } static struct drgn_error * drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - const struct drgn_language *lang, - enum drgn_type_kind kind, - struct drgn_type **ret, bool *should_free) + Dwarf_Die *die, const struct drgn_language *lang, + enum drgn_type_kind kind, struct drgn_type **ret) { struct drgn_error *err; @@ -634,66 +584,40 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, dw_tag_str); } if (declaration && tag) { - err = drgn_dwarf_info_cache_find_complete(dicache, - dw_tag, tag, ret); - if (!err) { - *should_free = false; - return NULL; - } else if (err->code != DRGN_ERROR_STOP) { + err = drgn_dwarf_info_cache_find_complete(dicache, dw_tag, tag, + ret); + if (!err || err->code != DRGN_ERROR_STOP) return err; - } } - *should_free = true; - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - if (declaration) { - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init_incomplete(type, tag, lang); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init_incomplete(type, tag, lang); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init_incomplete(type, tag, lang); - break; - default: - UNREACHABLE(); - } - *ret = type; - return NULL; + return drgn_incomplete_compound_type_create(dicache->prog, kind, + tag, lang, ret); } - struct drgn_type_member_vector members = VECTOR_INIT; int size = dwarf_bytesize(die); if (size == -1) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has missing or invalid DW_AT_byte_size", - dw_tag_str); - goto err; + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has missing or invalid DW_AT_byte_size", + dw_tag_str); } + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, dicache->prog, kind); bool little_endian; dwarf_die_is_little_endian(die, false, &little_endian); - Dwarf_Die child; + Dwarf_Die member = {}, child; int r = dwarf_child(die, &child); while (r == 0) { if (dwarf_tag(&child) == DW_TAG_member) { - struct drgn_type_member *member = - drgn_type_member_vector_append_entry(&members); - if (!member) { - err = &drgn_enomem; - goto err; - } - err = parse_member(dicache, &child, member, - little_endian); - if (err) { - members.size--; - goto err; + if (member.addr) { + err = parse_member(dicache, &member, + little_endian, false, + &builder); + if (err) + goto err; } + member = child; } r = dwarf_siblingof(&child, &child); } @@ -702,165 +626,96 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, "libdw could not parse DIE children"); goto err; } - drgn_type_member_vector_shrink_to_fit(&members); - - if (kind == DRGN_TYPE_UNION) { - drgn_union_type_init(type, tag, size, members.data, - members.size, lang); - } else { - if (kind == DRGN_TYPE_STRUCT) { - drgn_struct_type_init(type, tag, size, members.data, - members.size, lang); - } else { - drgn_class_type_init(type, tag, size, members.data, - members.size, lang); - } - /* - * Flexible array members are only allowed as the last member of - * a structure with more than one named member. We defaulted - * can_be_incomplete_array to false in parse_member(), so fix it - * up. - */ - if (members.size > 1) { - struct drgn_type_member *member = - &drgn_type_members(type)[members.size - 1]; - /* - * The type may have already been evaluated if it's a - * bit field. Arrays can't be bit fields, so it's okay - * if we missed it. - */ - if (!drgn_lazy_type_is_evaluated(&member->type)) { - struct drgn_type_from_dwarf_thunk *thunk = - container_of(member->type.thunk, struct - drgn_type_from_dwarf_thunk, - thunk); - thunk->can_be_incomplete_array = true; - } - } + /* + * Flexible array members are only allowed as the last member of a + * structure with at least one other member. + */ + if (member.addr) { + err = parse_member(dicache, &member, little_endian, + kind != DRGN_TYPE_UNION && + builder.members.size > 0, + &builder); + if (err) + goto err; } - *ret = type; + + err = drgn_compound_type_create(&builder, tag, size, lang, ret); + if (err) + goto err; return NULL; err: - for (size_t i = 0; i < members.size; i++) - drgn_type_member_deinit(&members.data[i]); - drgn_type_member_vector_deinit(&members); - free(type); + drgn_compound_type_builder_deinit(&builder); return err; } static struct drgn_error * -parse_enumerator(Dwarf_Die *die, struct drgn_type_enumerator *enumerator, +parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, bool *is_signed) { - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - const char *name; - int r; - - name = dwarf_diename(die); + const char *name = dwarf_diename(die); if (!name) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_enumerator has missing or invalid DW_AT_name"); } - attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem); - if (!attr) { + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_enumerator is missing DW_AT_const_value"); } - + struct drgn_error *err; if (attr->form == DW_FORM_sdata || attr->form == DW_FORM_implicit_const) { Dwarf_Sword svalue; - - r = dwarf_formsdata(attr, &svalue); - if (r == 0) { - drgn_type_enumerator_init_signed(enumerator, name, - svalue); - if (svalue < 0) - *is_signed = true; - } + if (dwarf_formsdata(attr, &svalue)) + goto invalid; + err = drgn_enum_type_builder_add_signed(builder, name, + svalue); + /* + * GCC before 7.1 didn't include DW_AT_encoding for + * DW_TAG_enumeration_type DIEs, so we have to guess the sign + * for enum_compatible_type_fallback(). + */ + if (!err && svalue < 0) + *is_signed = true; } else { Dwarf_Word uvalue; - - r = dwarf_formudata(attr, &uvalue); - if (r == 0) { - drgn_type_enumerator_init_unsigned(enumerator, name, - uvalue); - } - } - if (r) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has invalid DW_AT_const_value"); + if (dwarf_formudata(attr, &uvalue)) + goto invalid; + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); } - return NULL; -} - -static struct drgn_type fallback_enum_compatible_types[2][4]; - -__attribute__((constructor(200))) -static void fallback_enum_compatible_types_init(void) -{ - unsigned int is_signed, shift; - - for (is_signed = 0; is_signed < 2; is_signed++) { - for (shift = 0; - shift < ARRAY_SIZE(fallback_enum_compatible_types[0]); - shift++) { - struct drgn_type *type; + return err; - type = &fallback_enum_compatible_types[is_signed][shift]; - drgn_int_type_init(type, "", 1 << shift, - is_signed, NULL); - } - } +invalid: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has invalid DW_AT_const_value"); } /* * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, * so we have to fabricate the compatible type. - * - * GCC before 7.1 didn't include DW_AT_encoding for DW_TAG_enumeration_type - * DIEs, either, so we also have to guess at the sign. */ static struct drgn_error * enum_compatible_type_fallback(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, bool is_signed, + const struct drgn_language *lang, struct drgn_type **ret) { - int size; - - size = dwarf_bytesize(die); - switch (size) { - case 1: - *ret = &fallback_enum_compatible_types[is_signed][0]; - return NULL; - case 2: - *ret = &fallback_enum_compatible_types[is_signed][1]; - return NULL; - case 4: - *ret = &fallback_enum_compatible_types[is_signed][2]; - return NULL; - case 8: - *ret = &fallback_enum_compatible_types[is_signed][3]; - return NULL; - case -1: - *ret = NULL; + int size = dwarf_bytesize(die); + if (size == -1) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); - default: - *ret = NULL; - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has unsupported DW_AT_byte_size %d", - size); } + return drgn_int_type_create(dicache->prog, "", size, is_signed, + lang, ret); } static struct drgn_error * drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret, bool *should_free) + struct drgn_type **ret) { struct drgn_error *err; @@ -886,43 +741,23 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, err = drgn_dwarf_info_cache_find_complete(dicache, DW_TAG_enumeration_type, tag, ret); - if (!err) { - *should_free = false; - return NULL; - } else if (err->code != DRGN_ERROR_STOP) { + if (!err || err->code != DRGN_ERROR_STOP) return err; - } } - *should_free = true; - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - if (declaration) { - drgn_enum_type_init_incomplete(type, tag, lang); - *ret = type; - return NULL; + return drgn_incomplete_enum_type_create(dicache->prog, tag, + lang, ret); } - struct drgn_type_enumerator_vector enumerators = - VECTOR_INIT; + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, dicache->prog); bool is_signed = false; Dwarf_Die child; int r = dwarf_child(die, &child); while (r == 0) { - int tag; - - tag = dwarf_tag(&child); - if (tag == DW_TAG_enumerator) { - struct drgn_type_enumerator *enumerator; - - enumerator = drgn_type_enumerator_vector_append_entry(&enumerators); - if (!enumerator) { - err = &drgn_enomem; - goto err; - } - err = parse_enumerator(&child, enumerator, &is_signed); + if (dwarf_tag(&child) == DW_TAG_enumerator) { + err = parse_enumerator(&child, &builder, &is_signed); if (err) goto err; } @@ -933,7 +768,6 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, "libdw could not parse DIE children"); goto err; } - drgn_type_enumerator_vector_shrink_to_fit(&enumerators); struct drgn_type *compatible_type; r = dwarf_type(die, &child); @@ -943,7 +777,7 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, goto err; } else if (r) { err = enum_compatible_type_fallback(dicache, die, is_signed, - &compatible_type); + lang, &compatible_type); if (err) goto err; } else { @@ -960,14 +794,13 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } } - drgn_enum_type_init(type, tag, compatible_type, enumerators.data, - enumerators.size, lang); - *ret = type; + err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); + if (err) + goto err; return NULL; err: - drgn_type_enumerator_vector_deinit(&enumerators); - free(type); + drgn_enum_type_builder_deinit(&builder); return err; } @@ -979,35 +812,25 @@ drgn_typedef_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, bool *is_incomplete_array_ret, struct drgn_type **ret) { - struct drgn_error *err; - struct drgn_type *type; - struct drgn_qualified_type aliased_type; - const char *name; - - name = dwarf_diename(die); + const char *name = dwarf_diename(die); if (!name) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_typedef has missing or invalid DW_AT_name"); } - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_typedef", true, - can_be_incomplete_array, - is_incomplete_array_ret, - &aliased_type); - if (err) { - free(type); + struct drgn_qualified_type aliased_type; + struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, + drgn_language_or_default(lang), + "DW_TAG_typedef", + true, + can_be_incomplete_array, + is_incomplete_array_ret, + &aliased_type); + if (err) return err; - } - drgn_typedef_type_init(type, name, aliased_type, lang); - *ret = type; - return NULL; + return drgn_typedef_type_create(dicache->prog, name, aliased_type, lang, + ret); } static struct drgn_error * @@ -1015,18 +838,34 @@ drgn_pointer_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, const struct drgn_language *lang, struct drgn_type **ret) { - struct drgn_error *err; struct drgn_qualified_type referenced_type; - - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_pointer_type", true, true, - NULL, &referenced_type); + struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, + drgn_language_or_default(lang), + "DW_TAG_pointer_type", + true, true, NULL, + &referenced_type); if (err) return err; - return drgn_program_pointer_type(dicache->prog, referenced_type, lang, - ret); + Dwarf_Attribute attr_mem, *attr; + uint64_t size; + if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_pointer_type has invalid DW_AT_byte_size"); + } + size = word; + } else { + uint8_t word_size; + err = drgn_program_word_size(dicache->prog, &word_size); + if (err) + return err; + size = word_size; + } + + return drgn_pointer_type_create(dicache->prog, referenced_type, size, + lang, ret); } struct array_dimension { @@ -1129,18 +968,18 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, do { dimension = array_dimension_vector_pop(&dimensions); if (dimension->is_complete) { - err = drgn_program_array_type(dicache->prog, - dimension->length, - element_type, lang, - &type); + err = drgn_array_type_create(dicache->prog, + element_type, + dimension->length, lang, + &type); } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_program_array_type(dicache->prog, 0, - element_type, lang, - &type); + err = drgn_array_type_create(dicache->prog, + element_type, 0, lang, + &type); } else { - err = drgn_program_incomplete_array_type(dicache->prog, - element_type, - lang, &type); + err = drgn_incomplete_array_type_create(dicache->prog, + element_type, + lang, &type); } if (err) goto out; @@ -1158,16 +997,11 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, static struct drgn_error * parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - struct drgn_type_parameter *parameter) + struct drgn_function_type_builder *builder) { - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; + Dwarf_Attribute attr_mem, *attr; const char *name; - struct drgn_lazy_type parameter_type; - - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); - if (attr) { + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { name = dwarf_formstring(attr); if (!name) { return drgn_error_create(DRGN_ERROR_OTHER, @@ -1177,14 +1011,18 @@ parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, name = NULL; } - err = drgn_lazy_type_from_dwarf(dicache, die, true, - "DW_TAG_formal_parameter", - ¶meter_type); + struct drgn_lazy_type parameter_type; + struct drgn_error *err = drgn_lazy_type_from_dwarf(dicache, die, true, + "DW_TAG_formal_parameter", + ¶meter_type); if (err) return err; - drgn_type_parameter_init(parameter, parameter_type, name); - return NULL; + err = drgn_function_type_builder_add_parameter(builder, parameter_type, + name); + if (err) + drgn_lazy_type_deinit(¶meter_type); + return err; } static struct drgn_error * @@ -1194,39 +1032,28 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, { struct drgn_error *err; - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - const char *tag_name = dwarf_tag(die) == DW_TAG_subroutine_type ? "DW_TAG_subroutine_type" : "DW_TAG_subprogram"; - struct drgn_type_parameter_vector parameters = VECTOR_INIT; + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, dicache->prog); bool is_variadic = false; Dwarf_Die child; int r = dwarf_child(die, &child); while (r == 0) { - int tag = dwarf_tag(&child); - if (tag == DW_TAG_formal_parameter) { + switch (dwarf_tag(&child)) { + case DW_TAG_formal_parameter: if (is_variadic) { err = drgn_error_format(DRGN_ERROR_OTHER, "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", tag_name); goto err; } - - struct drgn_type_parameter *parameter = - drgn_type_parameter_vector_append_entry(¶meters); - if (!parameter) { - err = &drgn_enomem; - goto err; - } - err = parse_formal_parameter(dicache, &child, parameter); - if (err) { - parameters.size--; + err = parse_formal_parameter(dicache, &child, &builder); + if (err) goto err; - } - } else if (tag == DW_TAG_unspecified_parameters) { + break; + case DW_TAG_unspecified_parameters: if (is_variadic) { err = drgn_error_format(DRGN_ERROR_OTHER, "%s has multiple DW_TAG_unspecified_parameters children", @@ -1234,6 +1061,9 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, goto err; } is_variadic = true; + break; + default: + break; } r = dwarf_siblingof(&child, &child); } @@ -1242,7 +1072,6 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, "libdw could not parse DIE children"); goto err; } - drgn_type_parameter_vector_shrink_to_fit(¶meters); struct drgn_qualified_type return_type; err = drgn_type_from_dwarf_child(dicache, die, @@ -1252,16 +1081,14 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, if (err) goto err; - drgn_function_type_init(type, return_type, parameters.data, - parameters.size, is_variadic, lang); - *ret = type; + err = drgn_function_type_create(&builder, return_type, is_variadic, + lang, ret); + if (err) + goto err; return NULL; err: - for (size_t i = 0; i < parameters.size; i++) - drgn_type_parameter_deinit(¶meters.data[i]); - drgn_type_parameter_vector_deinit(¶meters); - free(type); + drgn_function_type_builder_deinit(&builder); return err; } @@ -1271,27 +1098,22 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, bool *is_incomplete_array_ret, struct drgn_qualified_type *ret) { - struct drgn_error *err; - const struct drgn_language *lang; - struct hash_pair hp; - struct dwarf_type_map_entry entry = { - .key = die->addr, - }; - struct dwarf_type_map *map; - struct dwarf_type_map_iterator it; - if (dicache->depth >= 1000) { return drgn_error_create(DRGN_ERROR_RECURSION, "maximum DWARF type parsing depth exceeded"); } - hp = dwarf_type_map_hash(&entry.key); - it = dwarf_type_map_search_hashed(&dicache->map, &entry.key, hp); + struct dwarf_type_map_entry entry = { + .key = die->addr, + }; + struct hash_pair hp = dwarf_type_map_hash(&entry.key); + struct dwarf_type_map_iterator it = + dwarf_type_map_search_hashed(&dicache->map, &entry.key, hp); if (it.entry) { if (!can_be_incomplete_array && it.entry->value.is_incomplete_array) { - map = &dicache->cant_be_incomplete_array_map; - it = dwarf_type_map_search_hashed(map, &entry.key, hp); + it = dwarf_type_map_search_hashed(&dicache->cant_be_incomplete_array_map, + &entry.key, hp); } if (it.entry) { ret->type = it.entry->value.type; @@ -1300,7 +1122,8 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, } } - err = drgn_language_from_die(die, &lang); + const struct drgn_language *lang; + struct drgn_error *err = drgn_language_from_die(die, &lang); if (err) return err; @@ -1309,11 +1132,6 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, entry.value.is_incomplete_array = false; switch (dwarf_tag(die)) { case DW_TAG_const_type: - /* - * Qualified types share the struct drgn_type with the - * unqualified type. - */ - entry.value.should_free = false; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), "DW_TAG_const_type", true, @@ -1321,7 +1139,6 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, ret->qualifiers |= DRGN_QUALIFIER_CONST; break; case DW_TAG_restrict_type: - entry.value.should_free = false; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), "DW_TAG_restrict_type", true, @@ -1329,7 +1146,6 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; break; case DW_TAG_volatile_type: - entry.value.should_free = false; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), "DW_TAG_volatile_type", true, @@ -1337,7 +1153,6 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; break; case DW_TAG_atomic_type: - entry.value.should_free = false; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), "DW_TAG_atomic_type", true, @@ -1345,44 +1160,37 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; break; case DW_TAG_base_type: - entry.value.should_free = true; err = drgn_base_type_from_dwarf(dicache, die, lang, &ret->type); break; case DW_TAG_structure_type: err = drgn_compound_type_from_dwarf(dicache, die, lang, DRGN_TYPE_STRUCT, - &ret->type, - &entry.value.should_free); + &ret->type); break; case DW_TAG_union_type: err = drgn_compound_type_from_dwarf(dicache, die, lang, - DRGN_TYPE_UNION, &ret->type, - &entry.value.should_free); + DRGN_TYPE_UNION, + &ret->type); break; case DW_TAG_class_type: err = drgn_compound_type_from_dwarf(dicache, die, lang, - DRGN_TYPE_CLASS, &ret->type, - &entry.value.should_free); + DRGN_TYPE_CLASS, + &ret->type); break; case DW_TAG_enumeration_type: - err = drgn_enum_type_from_dwarf(dicache, die, lang, &ret->type, - &entry.value.should_free); + err = drgn_enum_type_from_dwarf(dicache, die, lang, &ret->type); break; case DW_TAG_typedef: - entry.value.should_free = true; err = drgn_typedef_type_from_dwarf(dicache, die, lang, can_be_incomplete_array, &entry.value.is_incomplete_array, &ret->type); break; case DW_TAG_pointer_type: - /* Pointer types are owned by the type index. */ - entry.value.should_free = false; - err = drgn_pointer_type_from_dwarf(dicache, die, lang, &ret->type); + err = drgn_pointer_type_from_dwarf(dicache, die, lang, + &ret->type); break; case DW_TAG_array_type: - /* Array types are owned by the type index. */ - entry.value.should_free = false; err = drgn_array_type_from_dwarf(dicache, die, lang, can_be_incomplete_array, &entry.value.is_incomplete_array, @@ -1390,7 +1198,6 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, break; case DW_TAG_subroutine_type: case DW_TAG_subprogram: - entry.value.should_free = true; err = drgn_function_type_from_dwarf(dicache, die, lang, &ret->type); break; @@ -1406,12 +1213,16 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, entry.value.type = ret->type; entry.value.qualifiers = ret->qualifiers; + struct dwarf_type_map *map; if (!can_be_incomplete_array && entry.value.is_incomplete_array) map = &dicache->cant_be_incomplete_array_map; else map = &dicache->map; if (dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { - drgn_dwarf_type_free(&entry.value); + /* + * This will "leak" the type we created, but it'll still be + * cleaned up when the program is freed. + */ return &drgn_enomem; } if (is_incomplete_array_ret) @@ -1690,18 +1501,8 @@ drgn_dwarf_info_cache_create(struct drgn_program *prog, void drgn_dwarf_info_cache_destroy(struct drgn_dwarf_info_cache *dicache) { - struct dwarf_type_map_iterator it; - if (!dicache) return; - - for (it = dwarf_type_map_first(&dicache->map); it.entry; - it = dwarf_type_map_next(it)) - drgn_dwarf_type_free(&it.entry->value); - /* Arrays don't need to be freed, but typedefs do. */ - for (it = dwarf_type_map_first(&dicache->cant_be_incomplete_array_map); - it.entry; it = dwarf_type_map_next(it)) - drgn_dwarf_type_free(&it.entry->value); dwarf_type_map_deinit(&dicache->cant_be_incomplete_array_map); dwarf_type_map_deinit(&dicache->map); drgn_dwarf_index_deinit(&dicache->dindex); diff --git a/libdrgn/dwarf_info_cache.h b/libdrgn/dwarf_info_cache.h index 123312e69..7935782a8 100644 --- a/libdrgn/dwarf_info_cache.h +++ b/libdrgn/dwarf_info_cache.h @@ -39,8 +39,6 @@ struct drgn_dwarf_type { * drgn_type_from_dwarf_internal(). */ bool is_incomplete_array; - /** Whether we need to free @c type. */ - bool should_free; }; DEFINE_HASH_MAP_TYPE(dwarf_type_map, const void *, struct drgn_dwarf_type); diff --git a/libdrgn/language.c b/libdrgn/language.c index e6219cd2d..09e7f6cc4 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -7,13 +7,6 @@ const struct drgn_language drgn_languages[] = { [DRGN_LANGUAGE_C] = { .name = "C", - .void_type = { - { - .kind = DRGN_TYPE_VOID, - .primitive = DRGN_C_TYPE_VOID, - .language = &drgn_language_c, - }, - }, .format_type_name = c_format_type_name, .format_type = c_format_type, .format_object = c_format_object, @@ -41,13 +34,6 @@ const struct drgn_language drgn_languages[] = { }, [DRGN_LANGUAGE_CPP] = { .name = "C++", - .void_type = { - { - .kind = DRGN_TYPE_VOID, - .primitive = DRGN_C_TYPE_VOID, - .language = &drgn_language_cpp, - }, - }, .format_type_name = c_format_type_name, .format_type = c_format_type, .format_object = c_format_object, diff --git a/libdrgn/language.h b/libdrgn/language.h index 89d54f131..89aef6972 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -68,8 +68,6 @@ typedef struct drgn_error *drgn_cmp_op(const struct drgn_object *lhs, struct drgn_language { /** Name of this programming language. */ const char *name; - /** Void type for this language. See @ref drgn_void_type(). */ - struct drgn_type void_type; /** Implement @ref drgn_format_type_name(). */ drgn_format_type_fn *format_type_name; /** Implement @ref drgn_format_type(). */ @@ -163,8 +161,8 @@ enum { extern const struct drgn_language drgn_languages[DRGN_NUM_LANGUAGES]; -#define drgn_language_cpp drgn_languages[DRGN_LANGUAGE_CPP] #define drgn_language_c drgn_languages[DRGN_LANGUAGE_C] +#define drgn_language_cpp drgn_languages[DRGN_LANGUAGE_CPP] /** * Return flags that should be passed through when formatting an object diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 83b0a392f..02e712a3d 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -2481,13 +2481,21 @@ c_type_from_declarator(struct drgn_program *prog, } if (declarator->kind == C_TOKEN_ASTERISK) { - err = drgn_program_pointer_type(prog, *ret, NULL, &ret->type); + uint8_t word_size; + err = drgn_program_word_size(prog, &word_size); + if (!err) { + err = drgn_pointer_type_create(prog, *ret, word_size, + drgn_type_language(ret->type), + &ret->type); + } } else if (declarator->is_complete) { - err = drgn_program_array_type(prog, declarator->length, *ret, - NULL, &ret->type); + err = drgn_array_type_create(prog, *ret, declarator->length, + drgn_type_language(ret->type), + &ret->type); } else { - err = drgn_program_incomplete_array_type(prog, *ret, NULL, - &ret->type); + err = drgn_incomplete_array_type_create(prog, *ret, + drgn_type_language(ret->type), + &ret->type); } if (!err) @@ -3122,24 +3130,34 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, *type_ret = drgn_object_type(obj); switch (drgn_type_kind(type_ret->underlying_type)) { - case DRGN_TYPE_ARRAY: - err = drgn_program_pointer_type(obj->prog, - drgn_type_type(type_ret->underlying_type), - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + case DRGN_TYPE_ARRAY: { + uint8_t word_size; + err = drgn_program_word_size(obj->prog, &word_size); + if (err) + return err; + err = drgn_pointer_type_create(obj->prog, + drgn_type_type(type_ret->underlying_type), + word_size, + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; break; + } case DRGN_TYPE_FUNCTION: { struct drgn_qualified_type function_type = { .type = type_ret->underlying_type, .qualifiers = type_ret->qualifiers, }; - - err = drgn_program_pointer_type(obj->prog, function_type, - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + uint8_t word_size; + err = drgn_program_word_size(obj->prog, &word_size); + if (err) + return err; + err = drgn_pointer_type_create(obj->prog, function_type, + word_size, + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 94efffe24..ef6a4adcc 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -348,9 +348,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, return err; qualified_type.qualifiers = DRGN_QUALIFIER_CONST; len = strlen(prog->vmcoreinfo.osrelease); - err = drgn_program_array_type(prog, len + 1, - qualified_type, NULL, - &qualified_type.type); + err = drgn_array_type_create(prog, qualified_type, + len + 1, &drgn_language_c, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; diff --git a/libdrgn/object.c b/libdrgn/object.c index 8ca5ed697..7634ded8b 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -18,7 +18,7 @@ LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) { obj->prog = prog; - obj->type = drgn_void_type(drgn_program_language(prog)); + obj->type = drgn_void_type(prog, NULL); obj->bit_size = 0; obj->qualifiers = 0; obj->kind = DRGN_OBJECT_NONE; @@ -1272,9 +1272,6 @@ UNARY_OP(not) LIBDRGN_PUBLIC struct drgn_error * drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - if (res->prog != obj->prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); @@ -1290,13 +1287,20 @@ drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) "cannot take address of bit field"); } - err = drgn_program_pointer_type(obj->prog, - drgn_object_qualified_type(obj), NULL, - &qualified_type.type); + struct drgn_qualified_type qualified_type = + drgn_object_qualified_type(obj); + uint8_t word_size; + struct drgn_error *err = drgn_program_word_size(obj->prog, &word_size); if (err) return err; - qualified_type.qualifiers = 0; - return drgn_object_set_unsigned(res, qualified_type, + struct drgn_qualified_type result_type; + err = drgn_pointer_type_create(obj->prog, qualified_type, word_size, + drgn_type_language(qualified_type.type), + &result_type.type); + if (err) + return err; + result_type.qualifiers = 0; + return drgn_object_set_unsigned(res, result_type, obj->reference.address, 0); } @@ -1388,11 +1392,6 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, struct drgn_qualified_type qualified_type, const char *member_designator) { - const struct drgn_language *lang = drgn_object_language(obj); - struct drgn_error *err; - uint64_t address, bit_offset; - struct drgn_qualified_type result_type; - if (res->prog != obj->prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); @@ -1404,8 +1403,12 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, obj->type); } - err = lang->bit_offset(obj->prog, qualified_type.type, - member_designator, &bit_offset); + const struct drgn_language *lang = drgn_object_language(obj); + uint64_t bit_offset; + struct drgn_error *err = lang->bit_offset(obj->prog, + qualified_type.type, + member_designator, + &bit_offset); if (err) return err; if (bit_offset % 8) { @@ -1413,12 +1416,19 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, "container_of() member is not byte-aligned"); } + uint64_t address; err = drgn_object_value_unsigned(obj, &address); if (err) return err; - err = drgn_program_pointer_type(obj->prog, qualified_type, NULL, - &result_type.type); + uint8_t word_size; + err = drgn_program_word_size(obj->prog, &word_size); + if (err) + return err; + struct drgn_qualified_type result_type; + err = drgn_pointer_type_create(obj->prog, qualified_type, word_size, + drgn_type_language(qualified_type.type), + &result_type.type); if (err) return err; result_type.qualifiers = 0; diff --git a/libdrgn/program.h b/libdrgn/program.h index 386a90316..bb01bd89f 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -20,6 +20,7 @@ #include "hash_table.h" #include "memory_reader.h" #include "object_index.h" +#include "language.h" #include "platform.h" #include "type.h" #include "vector.h" @@ -54,11 +55,11 @@ struct vmcoreinfo { bool pgtable_l5_enabled; }; +DEFINE_VECTOR_TYPE(drgn_typep_vector, struct drgn_type *) DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct string) DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct string) struct drgn_dwarf_info_cache; -struct drgn_dwarf_index; struct drgn_program { /** @privatesection */ @@ -84,14 +85,27 @@ struct drgn_program { */ /** Callbacks for finding types. */ struct drgn_type_finder *type_finders; + /** Void type for each language. */ + struct drgn_type void_types[DRGN_NUM_LANGUAGES]; /** Cache of primitive types. */ struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; - struct drgn_type default_size_t; - struct drgn_type default_ptrdiff_t; - /** Cache of created pointer types. */ - struct drgn_pointer_type_table pointer_types; - /** Cache of created array types. */ - struct drgn_array_type_table array_types; + /** Cache of deduplicated types. */ + struct drgn_dedupe_type_set dedupe_types; + /** + * List of created types that cannot be deduplicated. + * + * Complete structure, union, and class types, as well as function + * types, refer to lazily-evaluated types, so they cannot be easily + * deduplicated. + * + * Complete enumerated types could be deduplicated, but it's probably + * not worth the effort of hashing and comparing long lists of + * enumerators. + * + * All other types, including incomplete structure, union, class, and + * enumerated types, are deduplicated. + */ + struct drgn_typep_vector created_types; /** Cache for @ref drgn_program_find_member(). */ struct drgn_member_map members; /** @@ -228,6 +242,17 @@ drgn_program_is_64_bit(struct drgn_program *prog, bool *ret) return NULL; } +static inline struct drgn_error * +drgn_program_word_size(struct drgn_program *prog, uint8_t *ret) +{ + bool is_64_bit; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + *ret = is_64_bit ? 8 : 4; + return NULL; +} + struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret); /** diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 53a41d159..1fd6f892b 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -43,28 +43,14 @@ typedef struct { } DrgnObject; typedef struct { - PyObject_VAR_HEAD + PyObject_HEAD + struct drgn_type *type; enum drgn_qualifiers qualifiers; /* - * This serves two purposes: it caches attributes which were previously - * converted from a struct drgn_type member, and it keeps a reference to - * any objects which are referenced internally by _type. For example, in - * order to avoid doing a strdup(), we can set the name of a type - * directly to PyUnicode_AsUTF8(s). This is only valid as long as s is - * alive, so we store it here. + * Cache of attributes which were previously converted from a struct + * drgn_type member or used to create the type. */ PyObject *attr_cache; - /* - * A Type object can wrap a struct drgn_type created elsewhere, or it - * can have an embedded struct drgn_type. In the latter case, type - * points to _type. - */ - struct drgn_type *type; - union { - struct drgn_type _type[0]; - /* An object which must be kept alive for type to be valid. */ - PyObject *parent; - }; } DrgnType; typedef struct { @@ -126,38 +112,31 @@ typedef struct { PyObject *value; } TypeEnumerator; -/* - * LazyType.obj is a tagged pointer to a PyObject. If the - * DRGNPY_LAZY_TYPE_UNEVALUATED flag is unset, then LazyType.obj is the - * evaluated Type. If it is set and LazyType.lazy_type is set, then LazyType.obj - * is the parent Type and LazyType.lazy_type must be evaluated and wrapped. If - * the flag is set and LazyType.lazy_type is not set, then LazyType.obj is a - * Python callable that should return the Type. - */ -enum { - DRGNPY_LAZY_TYPE_UNEVALUATED = 1, - DRGNPY_LAZY_TYPE_MASK = ~(uintptr_t)1, -}; -static_assert(alignof(PyObject) >= 2, "PyObject is not aligned"); - -#define LazyType_HEAD \ - PyObject_HEAD \ - uintptr_t obj; \ - struct drgn_lazy_type *lazy_type; - typedef struct { - LazyType_HEAD + PyObject_HEAD + enum { + /* obj is the evaluated Type. */ + DRGNPY_LAZY_TYPE_EVALUATED, + /* lazy_type must be evaluated and wrapped. */ + DRGNPY_LAZY_TYPE_UNEVALUATED, + /* obj is a Python callable that should return the Type. */ + DRGNPY_LAZY_TYPE_CALLABLE, + } state; + union { + PyObject *obj; + struct drgn_lazy_type *lazy_type; + }; } LazyType; typedef struct { - LazyType_HEAD + LazyType lazy_type; PyObject *name; PyObject *bit_offset; PyObject *bit_field_size; } TypeMember; typedef struct { - LazyType_HEAD + LazyType lazy_type; PyObject *name; } TypeParameter; @@ -222,6 +201,8 @@ DrgnObject *DrgnObject_container_of(PyObject *self, PyObject *args, PyObject *Platform_wrap(const struct drgn_platform *platform); +int Program_hold_object(Program *prog, PyObject *obj); +bool Program_hold_reserve(Program *prog, size_t n); int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, struct drgn_qualified_type *ret); Program *program_from_core_dump(PyObject *self, PyObject *args, PyObject *kwds); @@ -230,29 +211,25 @@ Program *program_from_pid(PyObject *self, PyObject *args, PyObject *kwds); PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog); -static inline PyObject *DrgnType_parent(DrgnType *type) +static inline Program *DrgnType_prog(DrgnType *type) { - if (type->type == type->_type) - return (PyObject *)type; - else - return type->parent; + return container_of(drgn_type_program(type->type), Program, prog); } -PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type, - PyObject *parent); +PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type); int qualifiers_converter(PyObject *arg, void *result); -DrgnType *void_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *struct_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *union_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *class_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *typedef_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *pointer_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *array_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *function_type(PyObject *self, PyObject *args, PyObject *kwds); +DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_float_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_complex_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_struct_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_union_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_class_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_enum_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_typedef_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_pointer_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds); int append_string(PyObject *parts, const char *s); int append_format(PyObject *parts, const char *format, ...); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 52d24878d..646793d62 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -88,33 +88,6 @@ static PyMethodDef drgn_methods[] = { METH_NOARGS, drgn_program_from_kernel_DOC}, {"program_from_pid", (PyCFunction)program_from_pid, METH_VARARGS | METH_KEYWORDS, drgn_program_from_pid_DOC}, - {"void_type", (PyCFunction)void_type, METH_VARARGS | METH_KEYWORDS, - drgn_void_type_DOC}, - {"int_type", (PyCFunction)int_type, METH_VARARGS | METH_KEYWORDS, - drgn_int_type_DOC}, - {"bool_type", (PyCFunction)bool_type, METH_VARARGS | METH_KEYWORDS, - drgn_bool_type_DOC}, - {"float_type", (PyCFunction)float_type, METH_VARARGS | METH_KEYWORDS, - drgn_float_type_DOC}, - {"complex_type", (PyCFunction)complex_type, - METH_VARARGS | METH_KEYWORDS, drgn_complex_type_DOC}, - {"struct_type", (PyCFunction)struct_type, METH_VARARGS | METH_KEYWORDS, - drgn_struct_type_DOC}, - {"union_type", (PyCFunction)union_type, METH_VARARGS | METH_KEYWORDS, - drgn_union_type_DOC}, - {"class_type", (PyCFunction)class_type, METH_VARARGS | METH_KEYWORDS, - drgn_class_type_DOC}, - {"enum_type", (PyCFunction)enum_type, METH_VARARGS | METH_KEYWORDS, - drgn_enum_type_DOC}, - {"typedef_type", (PyCFunction)typedef_type, - METH_VARARGS | METH_KEYWORDS, - drgn_typedef_type_DOC}, - {"pointer_type", (PyCFunction)pointer_type, - METH_VARARGS | METH_KEYWORDS, drgn_pointer_type_DOC}, - {"array_type", (PyCFunction)array_type, METH_VARARGS | METH_KEYWORDS, - drgn_array_type_DOC}, - {"function_type", (PyCFunction)function_type, - METH_VARARGS | METH_KEYWORDS, drgn_function_type_DOC}, {"_linux_helper_read_vm", (PyCFunction)drgnpy_linux_helper_read_vm, METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index e5b8389a3..4e9b6d9b7 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -1037,8 +1037,7 @@ static Program *DrgnObject_get_prog(DrgnObject *self, void *arg) static PyObject *DrgnObject_get_type(DrgnObject *self, void *arg) { - return DrgnType_wrap(drgn_object_qualified_type(&self->obj), - (PyObject *)DrgnObject_prog(self)); + return DrgnType_wrap(drgn_object_qualified_type(&self->obj)); } static PyObject *DrgnObject_get_address(DrgnObject *self, void *arg) diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 9601f71c6..ada225a46 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -7,7 +7,7 @@ DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, hash_pair_ptr_type, hash_table_scalar_eq) -static int Program_hold_object(Program *prog, PyObject *obj) +int Program_hold_object(Program *prog, PyObject *obj) { if (pyobjectp_set_insert(&prog->objects, &obj, NULL) == -1) return -1; @@ -15,15 +15,10 @@ static int Program_hold_object(Program *prog, PyObject *obj) return 0; } -static int Program_hold_type(Program *prog, DrgnType *type) +bool Program_hold_reserve(Program *prog, size_t n) { - PyObject *parent; - - parent = DrgnType_parent(type); - if (parent && parent != (PyObject *)prog) - return Program_hold_object(prog, parent); - else - return 0; + return pyobjectp_set_reserve(&prog->objects, + pyobjectp_set_size(&prog->objects) + n); } int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, @@ -32,8 +27,11 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, struct drgn_error *err; if (PyObject_TypeCheck(type_obj, &DrgnType_type)) { - if (Program_hold_type(prog, (DrgnType *)type_obj) == -1) + if (DrgnType_prog((DrgnType *)type_obj) != prog) { + PyErr_SetString(PyExc_ValueError, + "type is from different program"); return -1; + } ret->type = ((DrgnType *)type_obj)->type; ret->qualifiers = ((DrgnType *)type_obj)->qualifiers; } else if (PyUnicode_Check(type_obj)) { @@ -241,8 +239,15 @@ static struct drgn_error *py_type_find_fn(enum drgn_type_kind kind, err = drgn_error_from_python(); goto out_type_obj; } - if (Program_hold_type((Program *)PyTuple_GET_ITEM(arg, 0), - (DrgnType *)type_obj) == -1) { + /* + * This check is also done in libdrgn, but we need it here because if + * the type isn't from this program, then there's no guarantee that it + * will remain valid after we decrement its reference count. + */ + if (DrgnType_prog((DrgnType *)type_obj) != + (Program *)PyTuple_GET_ITEM(arg, 0)) { + PyErr_SetString(PyExc_ValueError, + "type find callback returned type from wrong program"); err = drgn_error_from_python(); goto out_type_obj; } @@ -596,36 +601,7 @@ static PyObject *Program_find_type(Program *self, PyObject *args, PyObject *kwds path_cleanup(&filename); if (err) return set_drgn_error(err); - return DrgnType_wrap(qualified_type, (PyObject *)self); -} - -static PyObject *Program_pointer_type(Program *self, PyObject *args, - PyObject *kwds) -{ - static char *keywords[] = {"type", "qualifiers", "language", NULL}; - struct drgn_error *err; - PyObject *referenced_type_obj; - struct drgn_qualified_type referenced_type; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - struct drgn_qualified_type qualified_type; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&$O&:pointer_type", - keywords, &referenced_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) - return NULL; - - if (Program_type_arg(self, referenced_type_obj, false, - &referenced_type) == -1) - return NULL; - - err = drgn_program_pointer_type(&self->prog, referenced_type, language, - &qualified_type.type); - if (err) - return set_drgn_error(err); - qualified_type.qualifiers = qualifiers; - return DrgnType_wrap(qualified_type, (PyObject *)self); + return DrgnType_wrap(qualified_type); } static DrgnObject *Program_find_object(Program *self, const char *name, @@ -914,8 +890,6 @@ static PyMethodDef Program_methods[] = { #undef METHOD_READ_U {"type", (PyCFunction)Program_find_type, METH_VARARGS | METH_KEYWORDS, drgn_Program_type_DOC}, - {"pointer_type", (PyCFunction)Program_pointer_type, - METH_VARARGS | METH_KEYWORDS, drgn_Program_pointer_type_DOC}, {"object", (PyCFunction)Program_object, METH_VARARGS | METH_KEYWORDS, drgn_Program_object_DOC}, {"constant", (PyCFunction)Program_constant, @@ -928,6 +902,32 @@ static PyMethodDef Program_methods[] = { METH_VARARGS | METH_KEYWORDS, drgn_Program_stack_trace_DOC}, {"symbol", (PyCFunction)Program_symbol, METH_O, drgn_Program_symbol_DOC}, + {"void_type", (PyCFunction)Program_void_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_void_type_DOC}, + {"int_type", (PyCFunction)Program_int_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_int_type_DOC}, + {"bool_type", (PyCFunction)Program_bool_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_bool_type_DOC}, + {"float_type", (PyCFunction)Program_float_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_float_type_DOC}, + {"complex_type", (PyCFunction)Program_complex_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_complex_type_DOC}, + {"struct_type", (PyCFunction)Program_struct_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_struct_type_DOC}, + {"union_type", (PyCFunction)Program_union_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_union_type_DOC}, + {"class_type", (PyCFunction)Program_class_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_class_type_DOC}, + {"enum_type", (PyCFunction)Program_enum_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_enum_type_DOC}, + {"typedef_type", (PyCFunction)Program_typedef_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_typedef_type_DOC}, + {"pointer_type", (PyCFunction)Program_pointer_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_pointer_type_DOC}, + {"array_type", (PyCFunction)Program_array_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_array_type_DOC}, + {"function_type", (PyCFunction)Program_function_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_function_type_DOC}, {}, }; diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 649dcfff2..86997772c 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -9,132 +9,40 @@ static const char *drgn_type_kind_str(struct drgn_type *type) return drgn_type_kind_spelling[drgn_type_kind(type)]; } -static DrgnType *DrgnType_new(enum drgn_qualifiers qualifiers) +DRGNPY_PUBLIC PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type) { - DrgnType *type_obj; - - type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, 1); - if (!type_obj) - return NULL; - type_obj->qualifiers = qualifiers; - type_obj->attr_cache = PyDict_New(); - if (!type_obj->attr_cache) { - Py_DECREF(type_obj); - return NULL; - } - type_obj->type = type_obj->_type; - return type_obj; -} - -DRGNPY_PUBLIC PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type, - PyObject *parent) -{ - DrgnType *type_obj; - - type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, 0); + DrgnType *type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, + 0); if (!type_obj) return NULL; + type_obj->type = qualified_type.type; type_obj->qualifiers = qualified_type.qualifiers; + Py_INCREF(DrgnType_prog(type_obj)); type_obj->attr_cache = PyDict_New(); if (!type_obj->attr_cache) { Py_DECREF(type_obj); return NULL; } - type_obj->type = qualified_type.type; - if (parent) { - Py_INCREF(parent); - type_obj->parent = parent; - } return (PyObject *)type_obj; } -static DrgnType *LazyType_get_borrowed(LazyType *self) -{ - if (unlikely(self->obj & DRGNPY_LAZY_TYPE_UNEVALUATED)) { - PyObject *obj; - PyObject *type; - - obj = (PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK); - if (self->lazy_type) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - bool clear = false; - - /* Avoid the thread state overhead if we can. */ - if (!drgn_lazy_type_is_evaluated(self->lazy_type)) - clear = set_drgn_in_python(); - err = drgn_lazy_type_evaluate(self->lazy_type, - &qualified_type); - if (clear) - clear_drgn_in_python(); - if (err) { - set_drgn_error(err); - return NULL; - } - type = DrgnType_wrap(qualified_type, obj); - if (!type) - return NULL; - } else { - type = PyObject_CallObject(obj, NULL); - if (!type) - return NULL; - if (!PyObject_TypeCheck(type, &DrgnType_type)) { - Py_DECREF(type); - PyErr_SetString(PyExc_TypeError, - "type callable must return Type"); - return NULL; - } - } - Py_DECREF(obj); - self->obj = (uintptr_t)type; - } - return (DrgnType *)self->obj; -} - -static DrgnType *LazyType_get(LazyType *self, void *arg) -{ - DrgnType *ret; - - ret = LazyType_get_borrowed(self); - Py_XINCREF(ret); - return ret; -} - -struct py_type_thunk { - struct drgn_type_thunk thunk; - LazyType *lazy_type; -}; - -static struct drgn_error * -py_type_thunk_evaluate_fn(struct drgn_type_thunk *thunk, - struct drgn_qualified_type *ret) +static inline struct drgn_qualified_type DrgnType_unwrap(DrgnType *type) { - struct py_type_thunk *t = container_of(thunk, struct py_type_thunk, thunk); - PyGILState_STATE gstate; - struct drgn_error *err = NULL; - DrgnType *type; - - gstate = PyGILState_Ensure(); - type = LazyType_get_borrowed(t->lazy_type); - if (!type) { - err = drgn_error_from_python(); - goto out; - } - ret->type = type->type; - ret->qualifiers = type->qualifiers; -out: - PyGILState_Release(gstate); - return err; + return (struct drgn_qualified_type){ + .type = type->type, + .qualifiers = type->qualifiers, + }; } -static void py_type_thunk_free_fn(struct drgn_type_thunk *thunk) +static PyObject *DrgnType_get_ptr(DrgnType *self, void *arg) { - free(container_of(thunk, struct py_type_thunk, thunk)); + return PyLong_FromVoidPtr(self->type); } -static PyObject *DrgnType_get_ptr(DrgnType *self, void *arg) +static Program *DrgnType_get_prog(DrgnType *self, void *arg) { - return PyLong_FromVoidPtr(self->type); + Py_INCREF(DrgnType_prog(self)); + return DrgnType_prog(self); } static PyObject *DrgnType_get_kind(DrgnType *self) @@ -232,12 +140,10 @@ static PyObject *DrgnType_get_type(DrgnType *self) drgn_type_kind_str(self->type)); } if (drgn_type_kind(self->type) == DRGN_TYPE_ENUM && - !drgn_type_is_complete(self->type)) { + !drgn_type_is_complete(self->type)) Py_RETURN_NONE; - } else { - return DrgnType_wrap(drgn_type_type(self->type), - (PyObject *)self); - } + else + return DrgnType_wrap(drgn_type_type(self->type)); } static PyObject *DrgnType_get_members(DrgnType *self) @@ -270,9 +176,8 @@ static PyObject *DrgnType_get_members(DrgnType *self) if (!item) goto err; PyTuple_SET_ITEM(members_obj, i, (PyObject *)item); - Py_INCREF(self); - item->obj = (uintptr_t)self | DRGNPY_LAZY_TYPE_UNEVALUATED; - item->lazy_type = &member->type; + item->lazy_type.state = DRGNPY_LAZY_TYPE_UNEVALUATED; + item->lazy_type.lazy_type = &member->type; if (member->name) { item->name = PyUnicode_FromString(member->name); if (!item->name) @@ -370,9 +275,8 @@ static PyObject *DrgnType_get_parameters(DrgnType *self) if (!item) goto err; PyTuple_SET_ITEM(parameters_obj, i, (PyObject *)item); - Py_INCREF(self); - item->obj = (uintptr_t)self | DRGNPY_LAZY_TYPE_UNEVALUATED; - item->lazy_type = ¶meter->type; + item->lazy_type.state = DRGNPY_LAZY_TYPE_UNEVALUATED; + item->lazy_type.lazy_type = ¶meter->type; if (parameter->name) { item->name = PyUnicode_FromString(parameter->name); if (!item->name) @@ -452,6 +356,7 @@ static PyGetSetDef DrgnType_getset[] = { "This is used for testing.\n" "\n" ":vartype: int"}, + {"prog", (getter)DrgnType_get_prog, NULL, drgn_Type_prog_DOC}, {"kind", (getter)DrgnType_getter, NULL, drgn_Type_kind_DOC, &DrgnType_attr_kind}, {"primitive", (getter)DrgnType_getter, NULL, drgn_Type_primitive_DOC, @@ -483,103 +388,32 @@ static PyGetSetDef DrgnType_getset[] = { {}, }; -static int type_arg(PyObject *arg, struct drgn_qualified_type *qualified_type, - DrgnType *type_obj) -{ - Py_INCREF(arg); - if (!PyObject_IsInstance(arg, (PyObject *)&DrgnType_type)) { - Py_DECREF(arg); - PyErr_SetString(PyExc_TypeError, "type must be Type"); - return -1; - } - - if (type_obj) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_type.id, arg) == -1) { - Py_DECREF(arg); - return -1; - } - } - qualified_type->type = ((DrgnType *)arg)->type; - qualified_type->qualifiers = ((DrgnType *)arg)->qualifiers; - Py_DECREF(arg); - return 0; -} - -static int lazy_type_from_py(struct drgn_lazy_type *lazy_type, LazyType *obj) -{ - if (obj->obj & DRGNPY_LAZY_TYPE_UNEVALUATED) { - struct py_type_thunk *thunk; - - thunk = malloc(sizeof(*thunk)); - if (!thunk) { - PyErr_NoMemory(); - return -1; - } - thunk->thunk.evaluate_fn = py_type_thunk_evaluate_fn; - thunk->thunk.free_fn = py_type_thunk_free_fn; - thunk->lazy_type = obj; - drgn_lazy_type_init_thunk(lazy_type, &thunk->thunk); - } else { - DrgnType *type = (DrgnType *)obj->obj; - - drgn_lazy_type_init_evaluated(lazy_type, type->type, - type->qualifiers); - } - return 0; -} - static void DrgnType_dealloc(DrgnType *self) { - if (self->type != self->_type) { - Py_XDECREF(self->parent); - } else if (drgn_type_is_complete(self->type)) { - if (drgn_type_has_members(self->type)) { - struct drgn_type_member *members; - size_t num_members, i; - - members = drgn_type_members(self->type); - num_members = drgn_type_num_members(self->type); - for (i = 0; i < num_members; i++) - drgn_type_member_deinit(&members[i]); - free(members); - } - if (drgn_type_has_parameters(self->type)) { - struct drgn_type_parameter *parameters; - size_t num_parameters, i; - - parameters = drgn_type_parameters(self->type); - num_parameters = drgn_type_num_parameters(self->type); - for (i = 0; i < num_parameters; i++) - drgn_type_parameter_deinit(¶meters[i]); - free(parameters); - } - if (drgn_type_has_enumerators(self->type)) - free(drgn_type_enumerators(self->type)); - } Py_XDECREF(self->attr_cache); + if (self->type) + Py_DECREF(DrgnType_prog(self)); Py_TYPE(self)->tp_free((PyObject *)self); } static int DrgnType_traverse(DrgnType *self, visitproc visit, void *arg) { - if (self->type != self->_type) - Py_VISIT(self->parent); Py_VISIT(self->attr_cache); + if (self->type) + Py_VISIT(DrgnType_prog(self)); return 0; } static int DrgnType_clear(DrgnType *self) { - if (self->type != self->_type) - Py_CLEAR(self->parent); Py_CLEAR(self->attr_cache); + if (self->type) { + Py_DECREF(DrgnType_prog(self)); + self->type = NULL; + } return 0; } -#undef visit_type_thunks -#undef visit_lazy_type - static int append_field(PyObject *parts, bool *first, const char *format, ...) { va_list ap; @@ -677,7 +511,7 @@ static PyObject *DrgnType_repr(DrgnType *self) if (!parts) return NULL; - if (append_format(parts, "%s_type(", + if (append_format(parts, "prog.%s_type(", drgn_type_kind_str(self->type)) == -1) goto out; if (append_member(parts, self, &first, name) == -1) @@ -694,14 +528,34 @@ static PyObject *DrgnType_repr(DrgnType *self) goto join; } - if (append_member(parts, self, &first, size) == -1) - goto out_repr_leave; - if (append_member(parts, self, &first, length) == -1) + if (drgn_type_kind(self->type) != DRGN_TYPE_POINTER && + append_member(parts, self, &first, size) == -1) goto out_repr_leave; if (append_member(parts, self, &first, is_signed) == -1) goto out_repr_leave; if (append_member(parts, self, &first, type) == -1) goto out_repr_leave; + if (drgn_type_kind(self->type) == DRGN_TYPE_POINTER) { + bool print_size; + if (drgn_type_program(self->type)->has_platform) { + uint8_t word_size; + struct drgn_error *err = + drgn_program_word_size(drgn_type_program(self->type), + &word_size); + if (err) { + set_drgn_error(err); + goto out_repr_leave; + } + print_size = drgn_type_size(self->type) != word_size; + } else { + print_size = true; + } + if (print_size && + append_member(parts, self, &first, size) == -1) + goto out_repr_leave; + } + if (append_member(parts, self, &first, length) == -1) + goto out_repr_leave; if (append_member(parts, self, &first, members) == -1) goto out_repr_leave; if (append_member(parts, self, &first, enumerators) == -1) @@ -723,6 +577,15 @@ static PyObject *DrgnType_repr(DrgnType *self) } Py_DECREF(obj); } + if (drgn_type_language(self->type) != + drgn_program_language(drgn_type_program(self->type))) { + PyObject *obj = DrgnType_get_language(self, NULL); + if (append_field(parts, &first, "language=%R", obj) == -1) { + Py_DECREF(obj); + goto out_repr_leave; + } + Py_DECREF(obj); + } if (append_string(parts, ")") == -1) goto out_repr_leave; @@ -742,38 +605,25 @@ static PyObject *DrgnType_repr(DrgnType *self) static PyObject *DrgnType_str(DrgnType *self) { - struct drgn_qualified_type qualified_type = { - .type = self->type, - .qualifiers = self->qualifiers, - }; - struct drgn_error *err; - PyObject *ret; char *str; - - err = drgn_format_type(qualified_type, &str); + struct drgn_error *err = drgn_format_type(DrgnType_unwrap(self), &str); if (err) return set_drgn_error(err); - ret = PyUnicode_FromString(str); + PyObject *ret = PyUnicode_FromString(str); free(str); return ret; } static PyObject *DrgnType_type_name(DrgnType *self) { - struct drgn_qualified_type qualified_type = { - .type = self->type, - .qualifiers = self->qualifiers, - }; - struct drgn_error *err; - PyObject *ret; char *str; - - err = drgn_format_type_name(qualified_type, &str); + struct drgn_error *err = drgn_format_type_name(DrgnType_unwrap(self), + &str); if (err) return set_drgn_error(err); - ret = PyUnicode_FromString(str); + PyObject *ret = PyUnicode_FromString(str); free(str); return ret; } @@ -802,43 +652,34 @@ static PyObject *DrgnType_qualified(DrgnType *self, PyObject *args, { static char *keywords[] = { "qualifiers", NULL, }; unsigned char qualifiers; - struct drgn_qualified_type qualified_type; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:qualified", keywords, qualifiers_converter, &qualifiers)) return NULL; - qualified_type.type = self->type; - qualified_type.qualifiers = qualifiers; - return DrgnType_wrap(qualified_type, DrgnType_parent(self)); + struct drgn_qualified_type qualified_type = { + .type = self->type, + .qualifiers = qualifiers, + }; + return DrgnType_wrap(qualified_type); } static PyObject *DrgnType_unqualified(DrgnType *self) { - struct drgn_qualified_type qualified_type; - - qualified_type.type = self->type; - qualified_type.qualifiers = 0; - return DrgnType_wrap(qualified_type, DrgnType_parent(self)); + struct drgn_qualified_type qualified_type = { .type = self->type }; + return DrgnType_wrap(qualified_type); } static PyObject *DrgnType_richcompare(DrgnType *self, PyObject *other, int op) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type1, qualified_type2; - bool clear; - bool ret; - if (!PyObject_TypeCheck(other, &DrgnType_type) || (op != Py_EQ && op != Py_NE)) Py_RETURN_NOTIMPLEMENTED; - clear = set_drgn_in_python(); - qualified_type1.type = self->type; - qualified_type1.qualifiers = self->qualifiers; - qualified_type2.type = ((DrgnType *)other)->type; - qualified_type2.qualifiers = ((DrgnType *)other)->qualifiers; - err = drgn_qualified_type_eq(qualified_type1, qualified_type2, &ret); + bool clear = set_drgn_in_python(); + bool ret; + struct drgn_error *err = drgn_qualified_type_eq(DrgnType_unwrap(self), + DrgnType_unwrap((DrgnType *)other), + &ret); if (clear) clear_drgn_in_python(); if (err) @@ -867,8 +708,6 @@ PyTypeObject DrgnType_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.Type", .tp_basicsize = sizeof(DrgnType), - /* The "item" of a Type object is an optional struct drgn_type. */ - .tp_itemsize = sizeof(struct drgn_type), .tp_dealloc = (destructor)DrgnType_dealloc, .tp_repr = (reprfunc)DrgnType_repr, .tp_str = (reprfunc)DrgnType_str, @@ -982,6 +821,58 @@ PyTypeObject TypeEnumerator_type = { .tp_new = (newfunc)TypeEnumerator_new, }; +static DrgnType *LazyType_get_borrowed(LazyType *self) +{ + if (unlikely(self->state != DRGNPY_LAZY_TYPE_EVALUATED)) { + PyObject *type; + if (self->state == DRGNPY_LAZY_TYPE_UNEVALUATED) { + bool clear = false; + /* Avoid the thread state overhead if we can. */ + if (!drgn_lazy_type_is_evaluated(self->lazy_type)) + clear = set_drgn_in_python(); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = + drgn_lazy_type_evaluate(self->lazy_type, + &qualified_type); + if (clear) + clear_drgn_in_python(); + if (err) + return set_drgn_error(err); + type = DrgnType_wrap(qualified_type); + if (!type) + return NULL; + } else { /* (self->state == DRGNPY_LAZY_TYPE_CALLABLE) */ + type = PyObject_CallObject(self->obj, NULL); + if (!type) + return NULL; + if (!PyObject_TypeCheck(type, &DrgnType_type)) { + Py_DECREF(type); + PyErr_SetString(PyExc_TypeError, + "type callable must return Type"); + return NULL; + } + Py_DECREF(self->obj); + } + self->state = DRGNPY_LAZY_TYPE_EVALUATED; + self->obj = type; + } + return (DrgnType *)self->obj; +} + +static DrgnType *LazyType_get(LazyType *self, void *arg) +{ + DrgnType *ret = LazyType_get_borrowed(self); + Py_XINCREF(ret); + return ret; +} + +static void LazyType_dealloc(LazyType *self) +{ + if (self->state != DRGNPY_LAZY_TYPE_UNEVALUATED) + Py_XDECREF(self->obj); + Py_TYPE(self)->tp_free((PyObject *)self); +} + static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { @@ -989,7 +880,7 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, "type", "name", "bit_offset", "bit_field_size", NULL }; PyObject *type_arg, *name = Py_None, *bit_offset = NULL, *bit_field_size = NULL; - uintptr_t obj; + int type_state; TypeMember *member; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO!O!:TypeMember", @@ -999,9 +890,9 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, return NULL; if (PyCallable_Check(type_arg)) { - obj = (uintptr_t)type_arg | DRGNPY_LAZY_TYPE_UNEVALUATED; + type_state = DRGNPY_LAZY_TYPE_CALLABLE; } else if (PyObject_TypeCheck(type_arg, &DrgnType_type)) { - obj = (uintptr_t)type_arg; + type_state = DRGNPY_LAZY_TYPE_EVALUATED; } else { PyErr_SetString(PyExc_TypeError, "TypeMember type must be type or callable returning Type"); @@ -1018,8 +909,9 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, if (!member) return NULL; + member->lazy_type.state = type_state; Py_INCREF(type_arg); - member->obj = obj; + member->lazy_type.obj = type_arg; Py_INCREF(name); member->name = name; @@ -1052,8 +944,7 @@ static void TypeMember_dealloc(TypeMember *self) Py_XDECREF(self->bit_field_size); Py_XDECREF(self->bit_offset); Py_XDECREF(self->name); - Py_XDECREF((PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK)); - Py_TYPE(self)->tp_free((PyObject *)self); + LazyType_dealloc((LazyType *)self); } static PyObject *TypeMember_get_offset(TypeMember *self, void *arg) @@ -1163,7 +1054,7 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, { static char *keywords[] = {"type", "name", NULL}; PyObject *type_arg, *name = Py_None; - uintptr_t obj; + int type_state; TypeParameter *parameter; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:TypeParameter", @@ -1171,9 +1062,9 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, return NULL; if (PyCallable_Check(type_arg)) { - obj = (uintptr_t)type_arg | DRGNPY_LAZY_TYPE_UNEVALUATED; + type_state = DRGNPY_LAZY_TYPE_CALLABLE; } else if (PyObject_TypeCheck(type_arg, &DrgnType_type)) { - obj = (uintptr_t)type_arg; + type_state = DRGNPY_LAZY_TYPE_EVALUATED; } else { PyErr_SetString(PyExc_TypeError, "TypeParameter type must be type or callable returning Type"); @@ -1188,8 +1079,9 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, parameter = (TypeParameter *)subtype->tp_alloc(subtype, 0); if (parameter) { + parameter->lazy_type.state = type_state; Py_INCREF(type_arg); - parameter->obj = obj; + parameter->lazy_type.obj = type_arg; Py_INCREF(name); parameter->name = name; } @@ -1199,8 +1091,7 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, static void TypeParameter_dealloc(TypeParameter *self) { Py_XDECREF(self->name); - Py_XDECREF((PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK)); - Py_TYPE(self)->tp_free((PyObject *)self); + LazyType_dealloc((LazyType *)self); } static PyObject *TypeParameter_repr(TypeParameter *self) @@ -1272,55 +1163,65 @@ PyTypeObject TypeParameter_type = { .tp_new = (newfunc)TypeParameter_new, }; -DrgnType *void_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { "qualifiers", "language", NULL, }; + static char *keywords[] = { "qualifiers", "language", NULL }; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - struct drgn_qualified_type qualified_type; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&$O&:void_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$O&O&:void_type", keywords, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - qualified_type.type = drgn_void_type(language); - qualified_type.qualifiers = qualifiers; - return (DrgnType *)DrgnType_wrap(qualified_type, NULL); + struct drgn_qualified_type qualified_type = { + .type = drgn_void_type(&self->prog, language), + .qualifiers = qualifiers, + }; + return (DrgnType *)DrgnType_wrap(qualified_type); } -DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "is_signed", "qualifiers", "language", NULL, + "name", "size", "is_signed", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; + struct index_arg size = {}; int is_signed; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!kp|O&$O&:int_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&p|$O&O&:int_type", keywords, &PyUnicode_Type, &name_obj, - &size, &is_signed, + index_converter, &size, &is_signed, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_int_type_init(type_obj->type, name, size, is_signed, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_int_type_create(&self->prog, name, + size.uvalue, is_signed, + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1330,36 +1231,45 @@ DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "qualifiers", "language", NULL, + "name", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; + struct index_arg size = {}; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!k|O&$O&:bool_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:bool_type", keywords, &PyUnicode_Type, &name_obj, - &size, qualifiers_converter, - &qualifiers, language_converter, - &language)) + index_converter, &size, + qualifiers_converter, &qualifiers, + language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_bool_type_init(type_obj->type, name, size, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_bool_type_create(&self->prog, name, + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1369,36 +1279,46 @@ DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_float_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "qualifiers", "language", NULL, + "name", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; + struct index_arg size = {}; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!k|O&$O&:float_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:float_type", keywords, &PyUnicode_Type, &name_obj, - &size, qualifiers_converter, - &qualifiers, language_converter, - &language)) + index_converter, &size, + qualifiers_converter, &qualifiers, + language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_float_type_init(type_obj->type, name, size, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_float_type_create(&self->prog, name, + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1408,61 +1328,65 @@ DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_complex_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { "name", "size", "type", "qualifiers", NULL, }; - DrgnType *type_obj; + static char *keywords[] = { + "name", "size", "type", "qualifiers", "language", NULL + }; PyObject *name_obj; - const char *name; - unsigned long size; - PyObject *real_type_obj; - struct drgn_type *real_type; + struct index_arg size = {}; + DrgnType *real_type_obj; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!kO|O&$O&:complex_type", - keywords, &PyUnicode_Type, &name_obj, - &size, &real_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!O&O!|$O&O&:complex_type", keywords, + &PyUnicode_Type, &name_obj, + index_converter, &size, &DrgnType_type, + &real_type_obj, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - if (!PyObject_TypeCheck(real_type_obj, &DrgnType_type)) { - PyErr_SetString(PyExc_TypeError, - "complex_type() real type must be Type"); - return NULL; - } - real_type = ((DrgnType *)real_type_obj)->type; + struct drgn_type *real_type = real_type_obj->type; if (drgn_type_kind(real_type) != DRGN_TYPE_FLOAT && drgn_type_kind(real_type) != DRGN_TYPE_INT) { PyErr_SetString(PyExc_ValueError, "complex_type() real type must be floating-point or integer type"); return NULL; } - if (((DrgnType *)real_type_obj)->qualifiers) { + if (real_type_obj->qualifiers) { PyErr_SetString(PyExc_ValueError, "complex_type() real type must be unqualified"); return NULL; } - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_complex_type_init(type_obj->type, name, size, real_type, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_complex_type_create(&self->prog, name, + size.uvalue, + real_type, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); - if (drgn_type_name(type_obj->type) == name && - _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, - name_obj) == -1) { - Py_DECREF(type_obj); + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) return NULL; - } + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, - real_type_obj) == -1) { + (PyObject *)real_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, + name_obj) == -1) { Py_DECREF(type_obj); return NULL; } @@ -1470,55 +1394,128 @@ DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -static int unpack_member(struct drgn_type_member *members, - PyObject *cached_members_obj, size_t i) +struct py_type_thunk { + struct drgn_type_thunk thunk; + LazyType *lazy_type; +}; + +static struct drgn_error * +py_type_thunk_evaluate_fn(struct drgn_type_thunk *thunk, + struct drgn_qualified_type *ret) { - TypeMember *item; - const char *name; - unsigned long long bit_offset, bit_field_size; - struct drgn_lazy_type member_type; + struct py_type_thunk *t = container_of(thunk, struct py_type_thunk, thunk); + PyGILState_STATE gstate = PyGILState_Ensure(); + DrgnType *type = LazyType_get_borrowed(t->lazy_type); + struct drgn_error *err; + if (type) { + ret->type = type->type; + ret->qualifiers = type->qualifiers; + err = NULL; + } else { + err = drgn_error_from_python(); + } + PyGILState_Release(gstate); + return err; +} + +static void py_type_thunk_free_fn(struct drgn_type_thunk *thunk) +{ + free(container_of(thunk, struct py_type_thunk, thunk)); +} - item = (TypeMember *)PyTuple_GET_ITEM(cached_members_obj, i); +static int lazy_type_from_py(struct drgn_lazy_type *lazy_type, LazyType *obj, + struct drgn_program *prog, bool *can_cache) +{ + if (obj->state == DRGNPY_LAZY_TYPE_EVALUATED) { + DrgnType *type = (DrgnType *)obj->obj; + drgn_lazy_type_init_evaluated(lazy_type, type->type, + type->qualifiers); + } else { + struct py_type_thunk *thunk = malloc(sizeof(*thunk)); + if (!thunk) { + PyErr_NoMemory(); + return -1; + } + thunk->thunk.prog = prog; + thunk->thunk.evaluate_fn = py_type_thunk_evaluate_fn; + thunk->thunk.free_fn = py_type_thunk_free_fn; + thunk->lazy_type = obj; + drgn_lazy_type_init_thunk(lazy_type, &thunk->thunk); + /* + * We created a new thunk, so we can't reuse the passed + * LazyType. Don't cache the container so we create a new one + * when it's accessed. + */ + *can_cache = false; + } + return 0; +} + +static int unpack_member(struct drgn_compound_type_builder *builder, + PyObject *item, bool *can_cache) +{ if (!PyObject_TypeCheck((PyObject *)item, &TypeMember_type)) { PyErr_SetString(PyExc_TypeError, "member must be TypeMember"); return -1; } + TypeMember *member = (TypeMember *)item; - if (item->name == Py_None) { + const char *name; + if (member->name == Py_None) { name = NULL; } else { - name = PyUnicode_AsUTF8(item->name); + name = PyUnicode_AsUTF8(member->name); if (!name) return -1; } - bit_offset = PyLong_AsUnsignedLongLong(item->bit_offset); + unsigned long long bit_offset = + PyLong_AsUnsignedLongLong(member->bit_offset); if (bit_offset == (unsigned long long)-1 && PyErr_Occurred()) return -1; - bit_field_size = PyLong_AsUnsignedLongLong(item->bit_field_size); + unsigned long long bit_field_size = + PyLong_AsUnsignedLongLong(member->bit_field_size); if (bit_field_size == (unsigned long long)-1 && PyErr_Occurred()) return -1; - if (lazy_type_from_py(&member_type, (LazyType *)item) == -1) + struct drgn_lazy_type member_type; + if (lazy_type_from_py(&member_type, (LazyType *)member, + builder->prog, can_cache) == -1) return -1; - drgn_type_member_init(&members[i], member_type, name, bit_offset, - bit_field_size); + struct drgn_error *err = + drgn_compound_type_builder_add_member(builder, member_type, + name, bit_offset, + bit_field_size); + if (err) { + drgn_lazy_type_deinit(&member_type); + set_drgn_error(err); + return -1; + } return 0; } -static DrgnType *compound_type(PyObject *tag_obj, PyObject *size_obj, - PyObject *members_obj, - enum drgn_qualifiers qualifiers, - const struct drgn_language *language, - enum drgn_type_kind kind) +#define compound_type_arg_format "O|O&O$O&O&" + +static DrgnType *Program_compound_type(Program *self, PyObject *args, + PyObject *kwds, const char *arg_format, + enum drgn_type_kind kind) { - const char *tag; - DrgnType *type_obj = NULL; - unsigned long long size; - PyObject *cached_members_obj = NULL; - struct drgn_type_member *members = NULL; - size_t num_members; + static char *keywords[] = { + "tag", "size", "members", "qualifiers", "language", NULL + }; + PyObject *tag_obj; + struct index_arg size = { .allow_none = true, .is_none = true }; + PyObject *members_obj = Py_None; + unsigned char qualifiers = 0; + const struct drgn_language *language = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_format, keywords, + &tag_obj, index_converter, &size, + &members_obj, qualifiers_converter, + &qualifiers, language_converter, + &language)) + return NULL; + const char *tag; if (tag_obj == Py_None) { tag = NULL; } else if (PyUnicode_Check(tag_obj)) { @@ -1532,236 +1529,169 @@ static DrgnType *compound_type(PyObject *tag_obj, PyObject *size_obj, return NULL; } + PyObject *cached_members; + bool can_cache_members = true; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; if (members_obj == Py_None) { - if (size_obj != Py_None) { + if (!size.is_none) { PyErr_Format(PyExc_ValueError, "incomplete %s type must not have size", drgn_type_kind_spelling[kind]); return NULL; } - } else { - size_t i; - if (size_obj == Py_None) { + if (!Program_hold_reserve(self, tag_obj != Py_None)) + return NULL; + + err = drgn_incomplete_compound_type_create(&self->prog, kind, + tag, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + cached_members = NULL; + } else { + if (size.is_none) { PyErr_Format(PyExc_ValueError, "%s type must have size", drgn_type_kind_spelling[kind]); return NULL; } - size = PyLong_AsUnsignedLongLong(size_obj); - if (size == (unsigned long long)-1) - return NULL; - if (!PySequence_Check(members_obj)) { PyErr_SetString(PyExc_TypeError, "members must be sequence or None"); return NULL; } - cached_members_obj = PySequence_Tuple(members_obj); - if (!cached_members_obj) + cached_members = PySequence_Tuple(members_obj); + if (!cached_members) return NULL; - num_members = PyTuple_GET_SIZE(cached_members_obj); - members = malloc_array(num_members, - sizeof(struct drgn_type_member)); - if (!members) - goto err; + size_t num_members = PyTuple_GET_SIZE(cached_members); + + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, &self->prog, kind); + for (size_t i = 0; i < num_members; i++) { + if (unpack_member(&builder, + PyTuple_GET_ITEM(cached_members, i), + &can_cache_members) == -1) + goto err_builder; + } - for (i = 0; i < num_members; i++) { - if (unpack_member(members, cached_members_obj, i) == -1) - goto err; + if (!Program_hold_reserve(self, 1 + (tag_obj != Py_None))) + goto err_builder; + + err = drgn_compound_type_create(&builder, tag, size.uvalue, + language, &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_compound_type_builder_deinit(&builder); + goto err_members; } + + Program_hold_object(self, cached_members); } - type_obj = DrgnType_new(qualifiers); + if (tag_obj != Py_None && drgn_type_tag(qualified_type.type) == tag) + Program_hold_object(self, tag_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) - goto err; + goto err_members; if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_tag.id, - tag_obj) == -1) - goto err; - - if (members_obj == Py_None) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_members.id, Py_None) == -1) - goto err; + tag_obj) == -1 || + (can_cache_members && + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_members.id, + cached_members ? + cached_members : Py_None) == -1)) + goto err_type; + Py_XDECREF(cached_members); - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init_incomplete(type_obj->type, tag, - language); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init_incomplete(type_obj->type, tag, - language); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init_incomplete(type_obj->type, tag, - language); - break; - default: - UNREACHABLE(); - } - } else { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_members.id, - cached_members_obj) == -1) - goto err; - Py_DECREF(cached_members_obj); - - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init(type_obj->type, tag, size, - members, num_members, language); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init(type_obj->type, tag, size, members, - num_members, language); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init(type_obj->type, tag, size, members, - num_members, language); - break; - default: - UNREACHABLE(); - } - } return type_obj; -err: - Py_XDECREF(type_obj); - free(members); - Py_XDECREF(cached_members_obj); +err_type: + Py_DECREF(type_obj); +err_members: + Py_XDECREF(cached_members); return NULL; } -DrgnType *struct_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_struct_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:struct_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_STRUCT); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":struct_type", + DRGN_TYPE_STRUCT); } -DrgnType *union_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_union_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:union_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_UNION); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":union_type", + DRGN_TYPE_UNION); } -DrgnType *class_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_class_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:class_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_CLASS); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":class_type", + DRGN_TYPE_CLASS); } -static int unpack_enumerator(struct drgn_type_enumerator *enumerators, - PyObject *cached_enumerators_obj, - size_t i, bool is_signed) +static int unpack_enumerator(struct drgn_enum_type_builder *builder, + PyObject *item, bool is_signed) { - TypeEnumerator *item; - const char *name; - - item = (TypeEnumerator *)PyTuple_GET_ITEM(cached_enumerators_obj, i); - if (!PyObject_TypeCheck((PyObject *)item, &TypeEnumerator_type)) { + if (!PyObject_TypeCheck(item, &TypeEnumerator_type)) { PyErr_SetString(PyExc_TypeError, "enumerator must be TypeEnumerator"); return -1; } + TypeEnumerator *enumerator = (TypeEnumerator *)item; - name = PyUnicode_AsUTF8(item->name); + const char *name = PyUnicode_AsUTF8(enumerator->name); if (!name) return -1; + struct drgn_error *err; if (is_signed) { - long long svalue; - - svalue = PyLong_AsLongLong(item->value); + long long svalue = PyLong_AsLongLong(enumerator->value); if (svalue == -1 && PyErr_Occurred()) return -1; - drgn_type_enumerator_init_signed(&enumerators[i], name, - svalue); + err = drgn_enum_type_builder_add_signed(builder, name, svalue); } else { - unsigned long long uvalue; - - uvalue = PyLong_AsUnsignedLongLong(item->value); + unsigned long long uvalue = + PyLong_AsUnsignedLongLong(enumerator->value); if (uvalue == (unsigned long long)-1 && PyErr_Occurred()) return -1; - drgn_type_enumerator_init_unsigned(&enumerators[i], name, - uvalue); + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); + } + if (err) { + set_drgn_error(err); + return -1; } return 0; } -DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_enum_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "tag", "type", "enumerators", "qualifiers", "language", NULL, + "tag", "type", "enumerators", "qualifiers", "language", NULL }; - DrgnType *type_obj = NULL; PyObject *tag_obj; - const char *tag; PyObject *compatible_type_obj = Py_None; - struct drgn_type *compatible_type; PyObject *enumerators_obj = Py_None; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - PyObject *cached_enumerators_obj = NULL; - struct drgn_type_enumerator *enumerators = NULL; - size_t num_enumerators; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:enum_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO$O&O&:enum_type", keywords, &tag_obj, &compatible_type_obj, &enumerators_obj, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; + const char *tag; if (tag_obj == Py_None) { tag = NULL; } else if (PyUnicode_Check(tag_obj)) { @@ -1774,316 +1704,369 @@ DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (compatible_type_obj == Py_None) { - compatible_type = NULL; - } else if (PyObject_TypeCheck(compatible_type_obj, &DrgnType_type)) { - compatible_type = ((DrgnType *)compatible_type_obj)->type; - if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { - PyErr_SetString(PyExc_ValueError, - "enum_type() compatible type must be integer type"); - return NULL; - } - if (((DrgnType *)compatible_type_obj)->qualifiers) { - PyErr_SetString(PyExc_ValueError, - "enum_type() compatible type must be unqualified"); - return NULL; - } - } else { + if (compatible_type_obj != Py_None && + !PyObject_TypeCheck(compatible_type_obj, &DrgnType_type)) { PyErr_SetString(PyExc_TypeError, "enum_type() compatible type must be Type or None"); return NULL; } + PyObject *cached_enumerators; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; if (enumerators_obj == Py_None) { - if (compatible_type) { + if (compatible_type_obj != Py_None) { PyErr_SetString(PyExc_ValueError, "incomplete enum type must not have compatible type"); return NULL; } - num_enumerators = 0; - } else { - bool is_signed; - size_t i; - if (!compatible_type) { + if (!Program_hold_reserve(self, tag_obj != Py_None)) + return NULL; + + err = drgn_incomplete_enum_type_create(&self->prog, tag, + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + cached_enumerators = NULL; + } else { + if (compatible_type_obj == Py_None) { PyErr_SetString(PyExc_ValueError, "enum type must have compatible type"); return NULL; } + struct drgn_type *compatible_type = + ((DrgnType *)compatible_type_obj)->type; + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + PyErr_SetString(PyExc_ValueError, + "enum_type() compatible type must be integer type"); + return NULL; + } + if (((DrgnType *)compatible_type_obj)->qualifiers) { + PyErr_SetString(PyExc_ValueError, + "enum_type() compatible type must be unqualified"); + return NULL; + } + if (!PySequence_Check(enumerators_obj)) { PyErr_SetString(PyExc_TypeError, "enumerators must be sequence or None"); return NULL; } - cached_enumerators_obj = PySequence_Tuple(enumerators_obj); - if (!cached_enumerators_obj) + cached_enumerators = PySequence_Tuple(enumerators_obj); + if (!cached_enumerators) return NULL; - - num_enumerators = PyTuple_GET_SIZE(cached_enumerators_obj); - enumerators = malloc_array(num_enumerators, - sizeof(struct drgn_type_enumerator)); - if (!enumerators) - goto err; - is_signed = drgn_type_is_signed(compatible_type); - for (i = 0; i < num_enumerators; i++) { - if (unpack_enumerator(enumerators, - cached_enumerators_obj, i, + size_t num_enumerators = PyTuple_GET_SIZE(cached_enumerators); + + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, &self->prog); + bool is_signed = drgn_type_is_signed(compatible_type); + for (size_t i = 0; i < num_enumerators; i++) { + if (unpack_enumerator(&builder, + PyTuple_GET_ITEM(cached_enumerators, i), is_signed) == -1) - goto err; + goto err_enumerators; + } + + if (!Program_hold_reserve(self, 1 + (tag_obj != Py_None))) + goto err_builder; + + err = drgn_enum_type_create(&builder, tag, compatible_type, + language, &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_enum_type_builder_deinit(&builder); + goto err_enumerators; } + + Program_hold_object(self, cached_enumerators); } - type_obj = DrgnType_new(qualifiers); + if (tag_obj != Py_None && drgn_type_tag(qualified_type.type) == tag) + Program_hold_object(self, tag_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) - goto err; + goto err_enumerators; if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_tag.id, - tag_obj) == -1) - goto err; - if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, - compatible_type_obj) == -1) - goto err; - - if (enumerators_obj == Py_None) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_enumerators.id, - Py_None) == -1) - goto err; + tag_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + compatible_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, + &DrgnType_attr_enumerators.id, + cached_enumerators ? + cached_enumerators : Py_None) == -1) + goto err_type; + Py_XDECREF(cached_enumerators); - drgn_enum_type_init_incomplete(type_obj->type, tag, language); - } else { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_enumerators.id, - cached_enumerators_obj) == -1) - goto err; - Py_DECREF(cached_enumerators_obj); - - drgn_enum_type_init(type_obj->type, tag, compatible_type, - enumerators, num_enumerators, language); - } return type_obj; -err: - Py_XDECREF(type_obj); - free(enumerators); - Py_XDECREF(cached_enumerators_obj); +err_type: + Py_DECREF(type_obj); +err_enumerators: + Py_XDECREF(cached_enumerators); return NULL; } -DrgnType *typedef_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_typedef_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "type", "qualifiers", "language", NULL, + "name", "type", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - PyObject *aliased_type_obj; - struct drgn_qualified_type aliased_type; + DrgnType *aliased_type_obj; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|O&$O&:typedef_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!|$O&O&:typedef_type", keywords, &PyUnicode_Type, &name_obj, - &aliased_type_obj, + &DrgnType_type, &aliased_type_obj, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - if (type_arg(aliased_type_obj, &aliased_type, type_obj) == -1) { - Py_DECREF(type_obj); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_typedef_type_create(&self->prog, name, + DrgnType_unwrap(aliased_type_obj), + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) return NULL; - } - if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)aliased_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); return NULL; } - drgn_typedef_type_init(type_obj->type, name, aliased_type, language); return type_obj; } -DrgnType *pointer_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_pointer_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "size", "type", "qualifiers", "language", NULL, + "type", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; - unsigned long size; - PyObject *referenced_type_obj; - struct drgn_qualified_type referenced_type; + DrgnType *referenced_type_obj; + struct index_arg size = { .allow_none = true, .is_none = true }; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "kO|O&$O&:pointer_type", - keywords, &size, &referenced_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:pointer_type", + keywords, &DrgnType_type, + &referenced_type_obj, index_converter, + &size, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - type_obj = DrgnType_new(qualifiers); + if (size.is_none) { + uint8_t word_size; + struct drgn_error *err = drgn_program_word_size(&self->prog, + &word_size); + if (err) + return set_drgn_error(err); + size.uvalue = word_size; + } + + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_pointer_type_create(&self->prog, + DrgnType_unwrap(referenced_type_obj), + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) return NULL; - if (type_arg(referenced_type_obj, &referenced_type, type_obj) == -1) { + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)referenced_type_obj) == -1) { Py_DECREF(type_obj); return NULL; } - drgn_pointer_type_init(type_obj->type, size, referenced_type, language); return type_obj; } -DrgnType *array_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "length", "type", "qualifiers", "language", NULL, + "type", "length", "qualifiers", "language", NULL }; - DrgnType *type_obj; - PyObject *length_obj; - unsigned long long length; - PyObject *element_type_obj; - struct drgn_qualified_type element_type; + DrgnType *element_type_obj; + struct index_arg length = { .allow_none = true, .is_none = true }; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O&$O&:array_type", - keywords, &length_obj, - &element_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:array_type", + keywords, &DrgnType_type, + &element_type_obj, index_converter, + &length, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - if (length_obj == Py_None) { - length = 0; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; + if (length.is_none) { + err = drgn_incomplete_array_type_create(&self->prog, + DrgnType_unwrap(element_type_obj), + language, + &qualified_type.type); } else { - if (!PyLong_Check(length_obj)) { - PyErr_SetString(PyExc_TypeError, - "length must be integer or None"); - return NULL; - } - length = PyLong_AsUnsignedLongLong(length_obj); - if (length == (unsigned long long)-1 && PyErr_Occurred()) - return NULL; + err = drgn_array_type_create(&self->prog, + DrgnType_unwrap(element_type_obj), + length.uvalue, language, + &qualified_type.type); } - - type_obj = DrgnType_new(qualifiers); + if (err) + return set_drgn_error(err); + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) return NULL; - if (type_arg(element_type_obj, &element_type, type_obj) == -1) { + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)element_type_obj) == -1) { Py_DECREF(type_obj); return NULL; } - if (length_obj == Py_None) { - drgn_array_type_init_incomplete(type_obj->type, element_type, - language); - } else { - drgn_array_type_init(type_obj->type, length, element_type, - language); - } return type_obj; } -static int unpack_parameter(struct drgn_type_parameter *parameters, - PyObject *cached_parameters_obj, size_t i) +static int unpack_parameter(struct drgn_function_type_builder *builder, + PyObject *item, bool *can_cache) { - TypeParameter *item; - const char *name; - struct drgn_lazy_type parameter_type; - - item = (TypeParameter *)PyTuple_GET_ITEM(cached_parameters_obj, i); - if (!PyObject_TypeCheck((PyObject *)item, &TypeParameter_type)) { - PyErr_SetString(PyExc_TypeError, "parameter must be TypeParameter"); + if (!PyObject_TypeCheck(item, &TypeParameter_type)) { + PyErr_SetString(PyExc_TypeError, + "parameter must be TypeParameter"); return -1; } + TypeParameter *parameter = (TypeParameter *)item; - if (item->name == Py_None) { + const char *name; + if (parameter->name == Py_None) { name = NULL; } else { - name = PyUnicode_AsUTF8(item->name); + name = PyUnicode_AsUTF8(parameter->name); if (!name) return -1; } - if (lazy_type_from_py(¶meter_type, (LazyType *)item) == -1) + struct drgn_lazy_type parameter_type; + if (lazy_type_from_py(¶meter_type, (LazyType *)parameter, + builder->prog, can_cache) == -1) + return -1; + struct drgn_error *err = + drgn_function_type_builder_add_parameter(builder, + parameter_type, name); + if (err) { + drgn_lazy_type_deinit(¶meter_type); + set_drgn_error(err); return -1; - drgn_type_parameter_init(¶meters[i], parameter_type, name); + } return 0; } -DrgnType *function_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "type", "parameters", "is_variadic", "qualifiers", "language", NULL, }; - DrgnType *type_obj = NULL; - PyObject *return_type_obj; - struct drgn_qualified_type return_type; - PyObject *parameters_obj, *cached_parameters_obj = NULL; - struct drgn_type_parameter *parameters = NULL; - size_t num_parameters, i; + DrgnType *return_type_obj; + PyObject *parameters_obj; int is_variadic = 0; unsigned char qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|pO&$O&:function_type", - keywords, &return_type_obj, - ¶meters_obj, &is_variadic, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|p$O&O&:function_type", + keywords, &DrgnType_type, + &return_type_obj, ¶meters_obj, + &is_variadic, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; if (!PySequence_Check(parameters_obj)) { PyErr_SetString(PyExc_TypeError, "parameters must be sequence"); return NULL; } - cached_parameters_obj = PySequence_Tuple(parameters_obj); - if (!cached_parameters_obj) + + PyObject *cached_parameters = PySequence_Tuple(parameters_obj); + if (!cached_parameters) return NULL; + size_t num_parameters = PyTuple_GET_SIZE(cached_parameters); + bool can_cache_parameters = true; - num_parameters = PyTuple_GET_SIZE(cached_parameters_obj); - parameters = malloc_array(num_parameters, - sizeof(struct drgn_type_parameter)); - if (!parameters) - goto err; - for (i = 0; i < num_parameters; i++) { - if (unpack_parameter(parameters, cached_parameters_obj, i) == -1) - goto err; + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, &self->prog); + for (size_t i = 0; i < num_parameters; i++) { + if (unpack_parameter(&builder, + PyTuple_GET_ITEM(cached_parameters, i), + &can_cache_parameters) == -1) + goto err_builder; } - type_obj = DrgnType_new(qualifiers); - if (!type_obj) - goto err; + if (!Program_hold_reserve(self, 1)) + goto err_builder; - if (type_arg(return_type_obj, &return_type, type_obj) == -1) - goto err; + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_function_type_create(&builder, + DrgnType_unwrap(return_type_obj), + is_variadic, + language, + &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_function_type_builder_deinit(&builder); + goto err_parameters; + } + + Program_hold_object(self, cached_parameters); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + goto err_parameters; - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_parameters.id, - cached_parameters_obj) == -1) - goto err; - Py_DECREF(cached_parameters_obj); + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)return_type_obj) == -1 || + (can_cache_parameters && + _PyDict_SetItemId(type_obj->attr_cache, + &DrgnType_attr_parameters.id, + cached_parameters) == -1)) + goto err_type; + Py_DECREF(cached_parameters); - drgn_function_type_init(type_obj->type, return_type, parameters, - num_parameters, is_variadic, language); return type_obj; -err: - Py_XDECREF(type_obj); - free(parameters); - Py_XDECREF(cached_parameters_obj); +err_type: + Py_DECREF(type_obj); +err_parameters: + Py_DECREF(cached_parameters); return NULL; } diff --git a/libdrgn/type.c b/libdrgn/type.c index 2809ab10c..764f50786 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -147,51 +147,6 @@ drgn_primitive_type_is_signed(enum drgn_primitive_type primitive) } } -/* These functions compare the underlying type by reference, not by value. */ - -static struct hash_pair -drgn_pointer_type_key_hash(const struct drgn_pointer_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_pointer_type_key_eq(const struct drgn_pointer_type_key *a, - const struct drgn_pointer_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_pointer_type_table, drgn_pointer_type_key_hash, - drgn_pointer_type_key_eq) - -static struct hash_pair -drgn_array_type_key_hash(const struct drgn_array_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, key->is_complete); - hash = hash_combine(hash, key->length); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_array_type_key_eq(const struct drgn_array_type_key *a, - const struct drgn_array_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->is_complete == b->is_complete && a->length == b->length && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_array_type_table, drgn_array_type_key_hash, - drgn_array_type_key_eq) - static struct hash_pair drgn_member_hash_pair(const struct drgn_member_key *key) { size_t hash; @@ -216,27 +171,24 @@ DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_hash_pair, DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, hash_pair_ptr_type, hash_table_scalar_eq) -void drgn_type_thunk_free(struct drgn_type_thunk *thunk) -{ - thunk->free_fn(thunk); -} - struct drgn_error *drgn_lazy_type_evaluate(struct drgn_lazy_type *lazy_type, - struct drgn_qualified_type *qualified_type) + struct drgn_qualified_type *ret) { if (drgn_lazy_type_is_evaluated(lazy_type)) { - qualified_type->type = lazy_type->type; - qualified_type->qualifiers = lazy_type->qualifiers; + ret->type = lazy_type->type; + ret->qualifiers = lazy_type->qualifiers; } else { - struct drgn_error *err; struct drgn_type_thunk *thunk_ptr = lazy_type->thunk; struct drgn_type_thunk thunk = *thunk_ptr; - - err = thunk.evaluate_fn(thunk_ptr, qualified_type); + struct drgn_error *err = thunk.evaluate_fn(thunk_ptr, ret); if (err) return err; - drgn_lazy_type_init_evaluated(lazy_type, qualified_type->type, - qualified_type->qualifiers); + if (drgn_type_program(ret->type) != thunk.prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + drgn_lazy_type_init_evaluated(lazy_type, ret->type, + ret->qualifiers); thunk.free_fn(thunk_ptr); } return NULL; @@ -248,6 +200,19 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type) drgn_type_thunk_free(lazy_type->thunk); } +static inline struct drgn_error * +drgn_lazy_type_check_prog(struct drgn_lazy_type *lazy_type, + struct drgn_program *prog) +{ + if ((drgn_lazy_type_is_evaluated(lazy_type) ? + drgn_type_program(lazy_type->type) : + lazy_type->thunk->prog) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + return NULL; +} + LIBDRGN_PUBLIC struct drgn_error * drgn_member_type(struct drgn_type_member *member, struct drgn_qualified_type *ret) @@ -262,272 +227,605 @@ drgn_parameter_type(struct drgn_type_parameter *parameter, return drgn_lazy_type_evaluate(¶meter->type, ret); } -void drgn_int_type_init(struct drgn_type *type, const char *name, uint64_t size, - bool is_signed, const struct drgn_language *lang) +static struct hash_pair drgn_type_dedupe_hash(struct drgn_type * const *entry) { - enum drgn_primitive_type primitive; + struct drgn_type *type = *entry; + size_t hash = hash_combine(drgn_type_kind(type), + (uintptr_t)drgn_type_language(type)); + /* + * We don't dedupe complete compound or enumerated types, and typedefs + * inherit is_complete from the aliased type, so is_complete can only + * differ for otherwise equal array types. We implicitly include that in + * the hash with the is_complete check below, so we don't need to hash + * it explicitly. + */ + if (drgn_type_has_name(type)) { + const char *name = drgn_type_name(type); + hash = hash_combine(hash, cityhash_size_t(name, strlen(name))); + } + if (drgn_type_has_size(type)) + hash = hash_combine(hash, drgn_type_size(type)); + if (drgn_type_has_is_signed(type)) + hash = hash_combine(hash, drgn_type_is_signed(type)); + const char *tag; + if (drgn_type_has_tag(type) && (tag = drgn_type_tag(type))) + hash = hash_combine(hash, cityhash_size_t(tag, strlen(tag))); + if (drgn_type_has_type(type)) { + struct drgn_qualified_type qualified_type = + drgn_type_type(type); + hash = hash_combine(hash, (uintptr_t)qualified_type.type); + hash = hash_combine(hash, qualified_type.qualifiers); + } + if (drgn_type_has_length(type) && drgn_type_is_complete(type)) + hash = hash_combine(hash, drgn_type_length(type)); + return hash_pair_from_avalanching_hash(hash); +} - assert(name); - type->_private.kind = DRGN_TYPE_INT; - type->_private.is_complete = true; - primitive = c_parse_specifier_list(name); - if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_INT && - (primitive == DRGN_C_TYPE_CHAR || - is_signed == drgn_primitive_type_is_signed(primitive))) { - type->_private.primitive = primitive; - type->_private.name = - drgn_primitive_type_spellings[primitive][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; +static bool drgn_type_dedupe_eq(struct drgn_type * const *entry_a, + struct drgn_type * const *entry_b) +{ + struct drgn_type *a = *entry_a; + struct drgn_type *b = *entry_b; + + if (drgn_type_kind(a) != drgn_type_kind(b) || + drgn_type_language(a) != drgn_type_language(b) || + drgn_type_is_complete(a) != drgn_type_is_complete(b)) + return false; + if (drgn_type_has_name(a) && + strcmp(drgn_type_name(a), drgn_type_name(b)) != 0) + return false; + if (drgn_type_has_size(a) && drgn_type_size(a) != drgn_type_size(b)) + return false; + if (drgn_type_has_is_signed(a) && + drgn_type_is_signed(a) != drgn_type_is_signed(b)) + return false; + if (drgn_type_has_tag(a)) { + const char *tag_a = drgn_type_tag(a); + const char *tag_b = drgn_type_tag(b); + if ((!tag_a != !tag_b) || (tag_a && strcmp(tag_a, tag_b) != 0)) + return false; } - type->_private.size = size; - type->_private.is_signed = is_signed; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_has_type(a)) { + struct drgn_qualified_type type_a = drgn_type_type(a); + struct drgn_qualified_type type_b = drgn_type_type(b); + if (type_a.type != type_b.type || + type_a.qualifiers != type_b.qualifiers) + return false; + } + if (drgn_type_has_length(a) && + drgn_type_length(a) != drgn_type_length(b)) + return false; + return true; } -void drgn_bool_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang) +/* + * We don't deduplicate complete compound types, complete enumerated types, or + * function types, so the hash and comparison functions ignore members, + * enumerators, parameters, and is_variadic. + */ +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash, + drgn_type_dedupe_eq) + +DEFINE_VECTOR_FUNCTIONS(drgn_typep_vector) + +static struct drgn_error *find_or_create_type(struct drgn_type *key, + struct drgn_type **ret) { - assert(name); - type->_private.kind = DRGN_TYPE_BOOL; - type->_private.is_complete = true; - if (c_parse_specifier_list(name) == DRGN_C_TYPE_BOOL) { - type->_private.primitive = DRGN_C_TYPE_BOOL; - type->_private.name = - drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; + struct drgn_program *prog = key->_private.program; + struct hash_pair hp = drgn_dedupe_type_set_hash(&key); + struct drgn_dedupe_type_set_iterator it = + drgn_dedupe_type_set_search_hashed(&prog->dedupe_types, &key, + hp); + if (it.entry) { + *ret = *it.entry; + return NULL; } - type->_private.size = size; - type->_private.language = drgn_language_or_default(lang); + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + + *type = *key; + if (!drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, + hp, NULL)) { + free(type); + return &drgn_enomem; + } + *ret = type; + return NULL; } -void drgn_float_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang) +struct drgn_type *drgn_void_type(struct drgn_program *prog, + const struct drgn_language *lang) { - enum drgn_primitive_type primitive; + if (!lang) + lang = drgn_program_language(prog); + return &prog->void_types[lang - drgn_languages]; +} - assert(name); - type->_private.kind = DRGN_TYPE_FLOAT; - type->_private.is_complete = true; - primitive = c_parse_specifier_list(name); - if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_FLOAT) { - type->_private.primitive = primitive; - type->_private.name = - drgn_primitive_type_spellings[primitive][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; +struct drgn_error *drgn_int_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_INT && + (primitive == DRGN_C_TYPE_CHAR || + is_signed == drgn_primitive_type_is_signed(primitive))) + name = drgn_primitive_type_spellings[primitive][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_INT, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .is_signed = is_signed, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_bool_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (primitive == DRGN_C_TYPE_BOOL) + name = drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_BOOL, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_float_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_FLOAT) + name = drgn_primitive_type_spellings[primitive][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_FLOAT, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_complex_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + struct drgn_type *real_type, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + if (drgn_type_program(real_type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); } - type->_private.size = size; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_kind(real_type) != DRGN_TYPE_FLOAT && + drgn_type_kind(real_type) != DRGN_TYPE_INT) { + return drgn_error_create(DRGN_ERROR_TYPE, + "real type of complex type must be floating-point or integer type"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_COMPLEX, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .name = name, + .size = size, + .type = real_type, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_complex_type_init(struct drgn_type *type, const char *name, - uint64_t size, struct drgn_type *real_type, - const struct drgn_language *lang) +DEFINE_VECTOR_FUNCTIONS(drgn_type_member_vector) + +void drgn_compound_type_builder_init(struct drgn_compound_type_builder *builder, + struct drgn_program *prog, + enum drgn_type_kind kind) { - assert(name); - assert(real_type); - assert(drgn_type_kind(real_type) == DRGN_TYPE_FLOAT || - drgn_type_kind(real_type) == DRGN_TYPE_INT); - type->_private.kind = DRGN_TYPE_COMPLEX; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; - type->_private.size = size; - type->_private.type = real_type; - type->_private.qualifiers = 0; - type->_private.language = drgn_language_or_default(lang); + assert(kind == DRGN_TYPE_STRUCT || + kind == DRGN_TYPE_UNION || + kind == DRGN_TYPE_CLASS); + builder->prog = prog; + builder->kind = kind; + drgn_type_member_vector_init(&builder->members); } -void drgn_struct_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +void +drgn_compound_type_builder_deinit(struct drgn_compound_type_builder *builder) { - type->_private.kind = DRGN_TYPE_STRUCT; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + for (size_t i = 0; i < builder->members.size; i++) + drgn_lazy_type_deinit(&builder->members.data[i].type); + drgn_type_member_vector_deinit(&builder->members); } -void drgn_struct_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_compound_type_builder_add_member(struct drgn_compound_type_builder *builder, + struct drgn_lazy_type type, + const char *name, uint64_t bit_offset, + uint64_t bit_field_size) { - type->_private.kind = DRGN_TYPE_STRUCT; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + struct drgn_error *err = drgn_lazy_type_check_prog(&type, + builder->prog); + if (err) + return err; + struct drgn_type_member *member = + drgn_type_member_vector_append_entry(&builder->members); + if (!member) + return &drgn_enomem; + member->type = type; + member->name = name; + member->bit_offset = bit_offset; + member->bit_field_size = bit_field_size; + return NULL; } -void drgn_union_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +struct drgn_error * +drgn_compound_type_create(struct drgn_compound_type_builder *builder, + const char *tag, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_UNION; + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_member_vector_shrink_to_fit(&builder->members); + + type->_private.kind = builder->kind; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.tag = tag; type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + type->_private.members = builder->members.data; + type->_private.num_members = builder->members.size; + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } -void drgn_union_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_compound_type_create(struct drgn_program *prog, + enum drgn_type_kind kind, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + assert(kind == DRGN_TYPE_STRUCT || + kind == DRGN_TYPE_UNION || + kind == DRGN_TYPE_CLASS); + struct drgn_type key = { + { + .kind = kind, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .tag = tag, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +DEFINE_VECTOR_FUNCTIONS(drgn_type_enumerator_vector) + +void drgn_enum_type_builder_init(struct drgn_enum_type_builder *builder, + struct drgn_program *prog) { - type->_private.kind = DRGN_TYPE_UNION; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + builder->prog = prog; + drgn_type_enumerator_vector_init(&builder->enumerators); } -void drgn_class_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +void drgn_enum_type_builder_deinit(struct drgn_enum_type_builder *builder) { - type->_private.kind = DRGN_TYPE_CLASS; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + drgn_type_enumerator_vector_deinit(&builder->enumerators); } -void drgn_class_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_enum_type_builder_add_signed(struct drgn_enum_type_builder *builder, + const char *name, int64_t svalue) { - type->_private.kind = DRGN_TYPE_CLASS; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + struct drgn_type_enumerator *enumerator = + drgn_type_enumerator_vector_append_entry(&builder->enumerators); + if (!enumerator) + return &drgn_enomem; + enumerator->name = name; + enumerator->svalue = svalue; + return NULL; +} + +struct drgn_error * +drgn_enum_type_builder_add_unsigned(struct drgn_enum_type_builder *builder, + const char *name, uint64_t uvalue) +{ + struct drgn_type_enumerator *enumerator = + drgn_type_enumerator_vector_append_entry(&builder->enumerators); + if (!enumerator) + return &drgn_enomem; + enumerator->name = name; + enumerator->uvalue = uvalue; + return NULL; } -void drgn_enum_type_init(struct drgn_type *type, const char *tag, - struct drgn_type *compatible_type, - struct drgn_type_enumerator *enumerators, - size_t num_enumerators, - const struct drgn_language *lang) +struct drgn_error *drgn_enum_type_create(struct drgn_enum_type_builder *builder, + const char *tag, + struct drgn_type *compatible_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - assert(drgn_type_kind(compatible_type) == DRGN_TYPE_INT); + if (drgn_type_program(compatible_type) != builder->prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + return drgn_error_create(DRGN_ERROR_TYPE, + "compatible type of enum type must be integer type"); + } + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_enumerator_vector_shrink_to_fit(&builder->enumerators); + type->_private.kind = DRGN_TYPE_ENUM; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.tag = tag; type->_private.type = compatible_type; type->_private.qualifiers = 0; - type->_private.enumerators = enumerators; - type->_private.num_enumerators = num_enumerators; - type->_private.language = drgn_language_or_default(lang); + type->_private.enumerators = builder->enumerators.data; + type->_private.num_enumerators = builder->enumerators.size; + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } -void drgn_enum_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_enum_type_create(struct drgn_program *prog, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ENUM; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.type = NULL; - type->_private.qualifiers = 0; - type->_private.enumerators = NULL; - type->_private.num_enumerators = 0; - type->_private.language = drgn_language_or_default(lang); + struct drgn_type key = { + { + .kind = DRGN_TYPE_ENUM, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .tag = tag, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_typedef_type_init(struct drgn_type *type, const char *name, - struct drgn_qualified_type aliased_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_typedef_type_create(struct drgn_program *prog, const char *name, + struct drgn_qualified_type aliased_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_TYPEDEF; - type->_private.is_complete = drgn_type_is_complete(aliased_type.type); + if (drgn_type_program(aliased_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + enum drgn_primitive_type primitive; if (strcmp(name, "size_t") == 0) - type->_private.primitive = DRGN_C_TYPE_SIZE_T; + primitive = DRGN_C_TYPE_SIZE_T; else if (strcmp(name, "ptrdiff_t") == 0) - type->_private.primitive = DRGN_C_TYPE_PTRDIFF_T; + primitive = DRGN_C_TYPE_PTRDIFF_T; else - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; - type->_private.type = aliased_type.type; - type->_private.qualifiers = aliased_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_TYPEDEF, + .is_complete = drgn_type_is_complete(aliased_type.type), + .primitive = primitive, + .name = name, + .type = aliased_type.type, + .qualifiers = aliased_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_pointer_type_init(struct drgn_type *type, uint64_t size, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_pointer_type_create(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + uint64_t size, const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_POINTER; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.size = size; - type->_private.type = referenced_type.type; - type->_private.qualifiers = referenced_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(referenced_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_POINTER, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .size = size, + .type = referenced_type.type, + .qualifiers = referenced_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_array_type_init(struct drgn_type *type, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + uint64_t length, const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ARRAY; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.length = length; - type->_private.type = element_type.type; - type->_private.qualifiers = element_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(element_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_ARRAY, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .length = length, + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_array_type_init_incomplete(struct drgn_type *type, - struct drgn_qualified_type element_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ARRAY; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.length = 0; - type->_private.type = element_type.type; - type->_private.qualifiers = element_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(element_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_ARRAY, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_function_type_init(struct drgn_type *type, - struct drgn_qualified_type return_type, - struct drgn_type_parameter *parameters, - size_t num_parameters, bool is_variadic, - const struct drgn_language *lang) +DEFINE_VECTOR_FUNCTIONS(drgn_type_parameter_vector) + +void drgn_function_type_builder_init(struct drgn_function_type_builder *builder, + struct drgn_program *prog) { + builder->prog = prog; + drgn_type_parameter_vector_init(&builder->parameters); +} + +void +drgn_function_type_builder_deinit(struct drgn_function_type_builder *builder) +{ + for (size_t i = 0; i < builder->parameters.size; i++) + drgn_lazy_type_deinit(&builder->parameters.data[i].type); + drgn_type_parameter_vector_deinit(&builder->parameters); +} + +struct drgn_error * +drgn_function_type_builder_add_parameter(struct drgn_function_type_builder *builder, + struct drgn_lazy_type type, + const char *name) +{ + struct drgn_error *err = drgn_lazy_type_check_prog(&type, + builder->prog); + if (err) + return err; + struct drgn_type_parameter *parameter = + drgn_type_parameter_vector_append_entry(&builder->parameters); + if (!parameter) + return &drgn_enomem; + parameter->type = type; + parameter->name = name; + return NULL; +} + +struct drgn_error * +drgn_function_type_create(struct drgn_function_type_builder *builder, + struct drgn_qualified_type return_type, + bool is_variadic, const struct drgn_language *lang, + struct drgn_type **ret) +{ + if (drgn_type_program(return_type.type) != builder->prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_parameter_vector_shrink_to_fit(&builder->parameters); + type->_private.kind = DRGN_TYPE_FUNCTION; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.type = return_type.type; type->_private.qualifiers = return_type.qualifiers; - type->_private.parameters = parameters; - type->_private.num_parameters = num_parameters; + type->_private.parameters = builder->parameters.data; + type->_private.num_parameters = builder->parameters.size; type->_private.is_variadic = is_variadic; - type->_private.language = drgn_language_or_default(lang); + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } struct drgn_type_pair { @@ -692,8 +990,6 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, int *depth, bool *ret) { struct drgn_error *err; - struct drgn_type_pair pair = { a, b }; - struct hash_pair hp; if (*depth >= 1000) { return drgn_error_create(DRGN_ERROR_RECURSION, @@ -713,7 +1009,8 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, * Cache this comparison so that we don't do it again. We insert the * cache entry before doing the comparison in order to break cycles. */ - hp = drgn_type_pair_set_hash(&pair); + struct drgn_type_pair pair = { a, b }; + struct hash_pair hp = drgn_type_pair_set_hash(&pair); switch (drgn_type_pair_set_insert_hashed(cache, &pair, hp, NULL)) { case 1: /* These types haven't been compared yet. */ @@ -734,18 +1031,48 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, (*depth)++; if (drgn_type_kind(a) != drgn_type_kind(b) || - drgn_type_language(a) != drgn_type_language(b) || drgn_type_is_complete(a) != drgn_type_is_complete(b)) goto out_false; + switch (drgn_type_kind(a)) { + /* + * This types are uniquely deduplicated, so if their pointers did not + * compare equal then they are not equal. + */ + case DRGN_TYPE_VOID: + case DRGN_TYPE_INT: + case DRGN_TYPE_BOOL: + case DRGN_TYPE_FLOAT: + case DRGN_TYPE_COMPLEX: + goto out_false; + /* These types are uniquely deduplicated only if incomplete. */ + case DRGN_TYPE_STRUCT: + case DRGN_TYPE_UNION: + case DRGN_TYPE_CLASS: + case DRGN_TYPE_ENUM: + if (!drgn_type_is_complete(a)) + goto out_false; + break; + /* + * These types are not uniquely deduplicated because they can refer to + * types that are not deduplicated. + */ + case DRGN_TYPE_TYPEDEF: + case DRGN_TYPE_POINTER: + case DRGN_TYPE_ARRAY: + case DRGN_TYPE_FUNCTION: + break; + } + + if (drgn_type_language(a) != drgn_type_language(b)) + goto out_false; + if (drgn_type_has_name(a) && strcmp(drgn_type_name(a), drgn_type_name(b)) != 0) goto out_false; if (drgn_type_has_tag(a)) { - const char *tag_a, *tag_b; - - tag_a = drgn_type_tag(a); - tag_b = drgn_type_tag(b); + const char *tag_a = drgn_type_tag(a); + const char *tag_b = drgn_type_tag(b); if ((!tag_a != !tag_b) || (tag_a && strcmp(tag_a, tag_b) != 0)) goto out_false; } @@ -754,14 +1081,10 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, if (drgn_type_has_length(a) && drgn_type_length(a) != drgn_type_length(b)) goto out_false; - if (drgn_type_has_is_signed(a) && - drgn_type_is_signed(a) != drgn_type_is_signed(b)) - goto out_false; + assert(!drgn_type_has_is_signed(a)); if (drgn_type_has_type(a)) { - struct drgn_qualified_type type_a, type_b; - - type_a = drgn_type_type(a); - type_b = drgn_type_type(b); + struct drgn_qualified_type type_a = drgn_type_type(a); + struct drgn_qualified_type type_b = drgn_type_type(b); err = drgn_qualified_type_eq_impl(&type_a, &type_b, cache, depth, ret); if (err || !*ret) @@ -797,6 +1120,10 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, LIBDRGN_PUBLIC struct drgn_error *drgn_type_eq(struct drgn_type *a, struct drgn_type *b, bool *ret) { + if (drgn_type_program(a) != drgn_type_program(b)) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "types are from different programs"); + } struct drgn_type_pair_set cache = HASH_TABLE_INIT; int depth = 0; struct drgn_error *err = drgn_type_eq_impl(a, b, &cache, &depth, ret); @@ -1034,37 +1361,54 @@ struct drgn_error *drgn_error_member_not_found(struct drgn_type *type, void drgn_program_init_types(struct drgn_program *prog) { - drgn_pointer_type_table_init(&prog->pointer_types); - drgn_array_type_table_init(&prog->array_types); + for (size_t i = 0; i < ARRAY_SIZE(prog->void_types); i++) { + struct drgn_type *type = &prog->void_types[i]; + type->_private.kind = DRGN_TYPE_VOID; + type->_private.is_complete = false; + type->_private.primitive = DRGN_C_TYPE_VOID; + type->_private.program = prog; + type->_private.language = &drgn_languages[i]; + } + drgn_dedupe_type_set_init(&prog->dedupe_types); + drgn_typep_vector_init(&prog->created_types); drgn_member_map_init(&prog->members); drgn_type_set_init(&prog->members_cached); } -static void free_pointer_types(struct drgn_program *prog) -{ - struct drgn_pointer_type_table_iterator it; - - for (it = drgn_pointer_type_table_first(&prog->pointer_types); - it.entry; it = drgn_pointer_type_table_next(it)) - free(*it.entry); - drgn_pointer_type_table_deinit(&prog->pointer_types); -} - -static void free_array_types(struct drgn_program *prog) -{ - struct drgn_array_type_table_iterator it; - for (it = drgn_array_type_table_first(&prog->array_types); it.entry; - it = drgn_array_type_table_next(it)) - free(*it.entry); - drgn_array_type_table_deinit(&prog->array_types); -} - void drgn_program_deinit_types(struct drgn_program *prog) { drgn_member_map_deinit(&prog->members); drgn_type_set_deinit(&prog->members_cached); - free_array_types(prog); - free_pointer_types(prog); + + for (size_t i = 0; i < prog->created_types.size; i++) { + struct drgn_type *type = prog->created_types.data[i]; + if (drgn_type_has_members(type)) { + struct drgn_type_member *members = + drgn_type_members(type); + size_t num_members = drgn_type_num_members(type); + for (size_t j = 0; j < num_members; j++) + drgn_lazy_type_deinit(&members[j].type); + free(members); + } + if (drgn_type_has_enumerators(type)) + free(drgn_type_enumerators(type)); + if (drgn_type_has_parameters(type)) { + struct drgn_type_parameter *parameters = + drgn_type_parameters(type); + size_t num_parameters = drgn_type_num_parameters(type); + for (size_t j = 0; j < num_parameters; j++) + drgn_lazy_type_deinit(¶meters[j].type); + free(parameters); + } + free(type); + } + drgn_typep_vector_deinit(&prog->created_types); + + for (struct drgn_dedupe_type_set_iterator it = + drgn_dedupe_type_set_first(&prog->dedupe_types); + it.entry; it = drgn_dedupe_type_set_next(it)) + free(*it.entry); + drgn_dedupe_type_set_deinit(&prog->dedupe_types); struct drgn_type_finder *finder = prog->type_finders; while (finder) { @@ -1100,6 +1444,10 @@ drgn_program_find_type_impl(struct drgn_program *prog, finder->fn(kind, name, name_len, filename, finder->arg, ret); if (!err) { + if (drgn_type_program(ret->type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type find callback returned type from wrong program"); + } if (drgn_type_kind(ret->type) != kind) { return drgn_error_create(DRGN_ERROR_TYPE, "type find callback returned wrong kind of type"); @@ -1132,80 +1480,53 @@ drgn_program_find_type(struct drgn_program *prog, const char *name, } } -/* Default long and unsigned long are 64 bits. */ -static struct drgn_type default_primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; -/* 32-bit versions of long and unsigned long. */ -static struct drgn_type default_long_32bit; -static struct drgn_type default_unsigned_long_32bit; +/* + * size_t and ptrdiff_t default to typedefs of whatever integer type matches the + * word size. + */ +static struct drgn_error * +default_size_t_or_ptrdiff_t(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret) +{ + static const enum drgn_primitive_type integer_types[2][3] = { + { + DRGN_C_TYPE_UNSIGNED_LONG, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + DRGN_C_TYPE_UNSIGNED_INT, + }, + { + DRGN_C_TYPE_LONG, + DRGN_C_TYPE_LONG_LONG, + DRGN_C_TYPE_INT, + }, + }; + struct drgn_error *err; + uint8_t word_size; -__attribute__((constructor(200))) -static void default_primitive_types_init(void) -{ - size_t i; + err = drgn_program_word_size(prog, &word_size); + if (err) + return err; + for (size_t i = 0; i < ARRAY_SIZE(integer_types[0]); i++) { + enum drgn_primitive_type integer_type; + struct drgn_qualified_type qualified_type; - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_SIGNED_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_CHAR][0], - 1, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_SHORT][0], - 2, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_SHORT][0], - 2, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_INT][0], 4, - true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_INT][0], - 4, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 8, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG_LONG][0], - 8, false, &drgn_language_c); - drgn_bool_type_init(&default_primitive_types[DRGN_C_TYPE_BOOL], - drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0], - 1, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_FLOAT], - drgn_primitive_type_spellings[DRGN_C_TYPE_FLOAT][0], - 4, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_DOUBLE][0], - 8, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_DOUBLE][0], - 16, &drgn_language_c); - for (i = 0; i < ARRAY_SIZE(default_primitive_types); i++) { - if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID || - i == DRGN_C_TYPE_SIZE_T || i == DRGN_C_TYPE_PTRDIFF_T) - continue; - assert(drgn_type_primitive(&default_primitive_types[i]) == i); + integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + err = drgn_program_find_primitive_type(prog, integer_type, + &qualified_type.type); + if (err) + return err; + if (drgn_type_size(qualified_type.type) == word_size) { + qualified_type.qualifiers = 0; + return drgn_typedef_type_create(prog, + drgn_primitive_type_spellings[type][0], + qualified_type, + &drgn_language_c, ret); + } } - - drgn_int_type_init(&default_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 4, true, &drgn_language_c); - assert(drgn_type_primitive(&default_long_32bit) == - DRGN_C_TYPE_LONG); - - drgn_int_type_init(&default_unsigned_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 4, false, &drgn_language_c); - assert(drgn_type_primitive(&default_unsigned_long_32bit) == - DRGN_C_TYPE_UNSIGNED_LONG); + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "no suitable integer type for %s", + drgn_primitive_type_spellings[type][0]); } struct drgn_error * @@ -1217,7 +1538,6 @@ drgn_program_find_primitive_type(struct drgn_program *prog, struct drgn_qualified_type qualified_type; enum drgn_type_kind kind; const char * const *spellings; - uint8_t word_size; size_t i; if (prog->primitive_types[type]) { @@ -1227,7 +1547,7 @@ drgn_program_find_primitive_type(struct drgn_program *prog, kind = drgn_primitive_type_kind[type]; if (kind == DRGN_TYPE_VOID) { - *ret = drgn_void_type(&drgn_language_c); + *ret = drgn_void_type(prog, &drgn_language_c); goto out; } @@ -1244,184 +1564,82 @@ drgn_program_find_primitive_type(struct drgn_program *prog, } } - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } - word_size = drgn_program_is_64_bit(prog) ? 8 : 4; - + switch (type) { + case DRGN_C_TYPE_CHAR: + case DRGN_C_TYPE_SIGNED_CHAR: + err = drgn_int_type_create(prog, spellings[0], 1, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_CHAR: + err = drgn_int_type_create(prog, spellings[0], 1, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_SHORT: + err = drgn_int_type_create(prog, spellings[0], 2, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_SHORT: + err = drgn_int_type_create(prog, spellings[0], 2, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_INT: + err = drgn_int_type_create(prog, spellings[0], 4, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_INT: + err = drgn_int_type_create(prog, spellings[0], 4, false, + &drgn_language_c, ret); + break; /* long and unsigned long default to the word size. */ - if (type == DRGN_C_TYPE_LONG || type == DRGN_C_TYPE_UNSIGNED_LONG) { - if (word_size == 4) { - *ret = (type == DRGN_C_TYPE_LONG ? - &default_long_32bit : - &default_unsigned_long_32bit); - goto out; - } - } - /* - * size_t and ptrdiff_t default to typedefs of whatever integer type - * matches the word size. - */ - if (type == DRGN_C_TYPE_SIZE_T || type == DRGN_C_TYPE_PTRDIFF_T) { - static enum drgn_primitive_type integer_types[2][3] = { - { - DRGN_C_TYPE_UNSIGNED_LONG, - DRGN_C_TYPE_UNSIGNED_LONG_LONG, - DRGN_C_TYPE_UNSIGNED_INT, - }, - { - DRGN_C_TYPE_LONG, - DRGN_C_TYPE_LONG_LONG, - DRGN_C_TYPE_INT, - }, - }; - - for (i = 0; i < 3; i++) { - enum drgn_primitive_type integer_type; - - integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; - err = drgn_program_find_primitive_type(prog, - integer_type, - &qualified_type.type); - if (err) - return err; - if (drgn_type_size(qualified_type.type) == word_size) { - qualified_type.qualifiers = 0; - *ret = (type == DRGN_C_TYPE_SIZE_T ? - &prog->default_size_t : - &prog->default_ptrdiff_t); - drgn_typedef_type_init(*ret, spellings[0], - qualified_type, &drgn_language_c); - goto out; - } - } - return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, - "no suitable integer type for %s", - spellings[0]); - } - - *ret = &default_primitive_types[type]; -out: - prog->primitive_types[type] = *ret; - return NULL; -} - -struct drgn_error * -drgn_program_pointer_type(struct drgn_program *prog, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_pointer_type_key key = { - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = lang ? lang : drgn_type_language(referenced_type.type), - }; - struct drgn_pointer_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } - - hp = drgn_pointer_type_table_hash(&key); - it = drgn_pointer_type_table_search_hashed(&prog->pointer_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_pointer_type_init(type, drgn_program_is_64_bit(prog) ? 8 : 4, - referenced_type, key.lang); - if (drgn_pointer_type_table_insert_searched(&prog->pointer_types, &type, - hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_program_array_type(struct drgn_program *prog, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = true, - .length = length, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&prog->array_types, &key, hp); - if (it.entry) { - type = *it.entry; - goto out; - } + case DRGN_C_TYPE_LONG: + case DRGN_C_TYPE_UNSIGNED_LONG: { + uint8_t word_size; - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init(type, length, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&prog->array_types, &type, hp, - NULL) == -1) { - free(type); - return &drgn_enomem; + err = drgn_program_word_size(prog, &word_size); + if (err) + break; + err = drgn_int_type_create(prog, spellings[0], word_size, + type == DRGN_C_TYPE_LONG, + &drgn_language_c, ret); + break; } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_program_incomplete_array_type(struct drgn_program *prog, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = false, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&prog->array_types, &key, hp); - if (it.entry) { - type = *it.entry; - goto out; + case DRGN_C_TYPE_LONG_LONG: + err = drgn_int_type_create(prog, spellings[0], 8, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_LONG_LONG: + err = drgn_int_type_create(prog, spellings[0], 8, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_BOOL: + err = drgn_bool_type_create(prog, spellings[0], 1, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_FLOAT: + err = drgn_float_type_create(prog, spellings[0], 4, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_DOUBLE: + err = drgn_float_type_create(prog, spellings[0], 8, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_LONG_DOUBLE: + err = drgn_float_type_create(prog, spellings[0], 16, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_SIZE_T: + case DRGN_C_TYPE_PTRDIFF_T: + err = default_size_t_or_ptrdiff_t(prog, type, ret); + break; + default: + UNREACHABLE(); } + if (err) + return err; + assert(drgn_type_primitive(*ret) == type); - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init_incomplete(type, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&prog->array_types, &type, hp, - NULL) == -1) { - free(type); - return &drgn_enomem; - } out: - *ret = type; + prog->primitive_types[type] = *ret; return NULL; } diff --git a/libdrgn/type.h b/libdrgn/type.h index d92c11379..50b22563e 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -15,6 +15,7 @@ #include "drgn.h" #include "hash_table.h" #include "language.h" +#include "vector.h" /** * @ingroup Internals @@ -40,50 +41,7 @@ struct drgn_type_finder { struct drgn_type_finder *next; }; -struct drgn_pointer_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - const struct drgn_language *lang; -}; - -static struct drgn_pointer_type_key -drgn_pointer_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type referenced_type = drgn_type_type(*entry); - - return (struct drgn_pointer_type_key){ - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = drgn_type_language(*entry), - }; -} - -struct drgn_array_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - bool is_complete; - uint64_t length; - const struct drgn_language *lang; -}; - -static struct drgn_array_type_key -drgn_array_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type element_type = drgn_type_type(*entry); - - return (struct drgn_array_type_key){ - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = drgn_type_is_complete(*entry), - .length = drgn_type_length(*entry), - .lang = drgn_type_language(*entry), - }; -} - -DEFINE_HASH_TABLE_TYPE(drgn_pointer_type_table, struct drgn_type *, - drgn_pointer_type_entry_to_key) -DEFINE_HASH_TABLE_TYPE(drgn_array_type_table, struct drgn_type *, - drgn_array_type_entry_to_key) +DEFINE_HASH_SET_TYPE(drgn_dedupe_type_set, struct drgn_type *) /** (type, member name) pair. */ struct drgn_member_key { @@ -139,6 +97,8 @@ DEFINE_HASH_SET_TYPE(drgn_type_set, struct drgn_type *) * like @c container_of(). */ struct drgn_type_thunk { + /** Program owning this thunk. */ + struct drgn_program *prog; /** * Callback to evaluate this thunk to a @ref drgn_qualified_type. * @@ -161,7 +121,10 @@ struct drgn_type_thunk { * * @param[in] thunk Thunk to free. */ -void drgn_type_thunk_free(struct drgn_type_thunk *thunk); +static inline void drgn_type_thunk_free(struct drgn_type_thunk *thunk) +{ + thunk->free_fn(thunk); +} /** * Create a @ref drgn_lazy_type from a @ref drgn_type_thunk. @@ -215,11 +178,11 @@ static inline bool drgn_lazy_type_is_evaluated(struct drgn_lazy_type *lazy_type) * remains in a valid, unevaluated state. * * @param[in] lazy_type Lazy type to evaluate. - * @param[out] qualified_type Evaluated type. + * @param[out] ret Evaluated type. * @return @c NULL on success, non-@c NULL on error. */ struct drgn_error *drgn_lazy_type_evaluate(struct drgn_lazy_type *lazy_type, - struct drgn_qualified_type *qualified_type); + struct drgn_qualified_type *ret); /** * Free a @ref drgn_lazy_type. @@ -238,14 +201,13 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type); * * Creating type descriptors. * - * libdrgn does not provide a way to allocate a @ref drgn_type. Instead, a type - * can be allocated in any way deemed appropriate (e.g., with @c malloc(), on - * the stack, embedded in another structure). These helpers initialize an - * allocated type. + * These functions create type descriptors. They are valid for the lifetime of + * the program that owns them. * - * Note that structure, union, enumerated, and function types end with a - * variable-length array. The caller must allocate the necessary number of - * elements. + * A few kinds of types have variable-length fields: structure, union, and class + * types have members, enumerated types have enumerators, and function types + * have parameters. These fields are constructed with a @em builder before + * creating the type. * * @{ */ @@ -253,316 +215,364 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type); /** * Get the void type for the given @ref drgn_language. * - * The void type does not have any fields, so there is a single type - * descriptor per language to represent it. + * The void type does not have any fields, so a program has a single type + * descriptor per language to represent it. This function cannot fail. + * + * @param[in] prog Program owning type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. */ -static inline struct drgn_type * -drgn_void_type(const struct drgn_language *lang) -{ - return (struct drgn_type *)&drgn_language_or_default(lang)->void_type; -} +struct drgn_type *drgn_void_type(struct drgn_program *prog, + const struct drgn_language *lang); /** - * Initialize an integer type. + * Create an integer type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. * @param[in] is_signed Whether the type is signed. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_int_type_init(struct drgn_type *type, const char *name, uint64_t size, - bool is_signed, const struct drgn_language *lang); +struct drgn_error *drgn_int_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a boolean type. + * Create a boolean type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_bool_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang); +struct drgn_error *drgn_bool_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a floating-point type. + * Create a floating-point type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_float_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang); +struct drgn_error *drgn_float_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a complex type. + * Create a complex type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] real_type The corresponding real type. It must not be @c NULL and - * must be a floating-point or integer type. - * @param[in] lang Language of this type. + * @param[in] real_type Corresponding real type. Must not be @c NULL and must be + * a floating-point or integer type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_complex_type_init(struct drgn_type *type, const char *name, - uint64_t size, struct drgn_type *real_type, - const struct drgn_language *lang); +struct drgn_error *drgn_complex_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + struct drgn_type *real_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +DEFINE_VECTOR_TYPE(drgn_type_member_vector, struct drgn_type_member) + +/** Builder for members of a structure, union, or class type. */ +struct drgn_compound_type_builder { + struct drgn_program *prog; + enum drgn_type_kind kind; + struct drgn_type_member_vector members; +}; /** - * Initialize a member of a type. + * Initialize a @ref drgn_compound_type_builder. * - * @param[out] members Member to initialize. - * @param[in] member_type See @ref drgn_type_member::type. - * @param[in] name See @ref drgn_type_member::name. - * @param[in] bit_offset See @ref drgn_type_member::bit_offset. - * @param[in] bit_field_size See @ref drgn_type_member::bit_field_size. + * @param[in] kind One of @ref DRGN_TYPE_STRUCT, @ref DRGN_TYPE_UNION, or @ref + * DRGN_TYPE_CLASS. */ -static inline void drgn_type_member_init(struct drgn_type_member *member, - struct drgn_lazy_type member_type, - const char *name, uint64_t bit_offset, - uint64_t bit_field_size) -{ - member->type = member_type; - member->name = name; - member->bit_offset = bit_offset; - member->bit_field_size = bit_field_size; -} +void drgn_compound_type_builder_init(struct drgn_compound_type_builder *builder, + struct drgn_program *prog, + enum drgn_type_kind kind); /** - * Free a member of a type. + * Deinitialize a @ref drgn_compound_type_builder. * - * This only frees @ref drgn_type_member::type. + * Don't call this if @ref drgn_compound_type_create() succeeded. + */ +void +drgn_compound_type_builder_deinit(struct drgn_compound_type_builder *builder); + +/** + * Add a @ref drgn_type_member to a @ref drgn_compound_type_builder. * - * @param[out] member Member to free. + * On success, @p builder takes ownership of @p type. */ -static inline void drgn_type_member_deinit(struct drgn_type_member *member) -{ - drgn_lazy_type_deinit(&member->type); -} +struct drgn_error * +drgn_compound_type_builder_add_member(struct drgn_compound_type_builder *builder, + struct drgn_lazy_type type, + const char *name, uint64_t bit_offset, + uint64_t bit_field_size); /** - * Initialize a structure type. + * Create a structure, union, or class type. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. + * On success, this takes ownership of @p builder. + * + * @param[in] builder Builder containing members. @c type and @c name of each + * member must remain valid for the lifetime of @c builder->prog. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @c builder->prog. May be @c NULL if the type is anonymous. * @param[in] size Size of the type in bytes. - * @param[in] members Members of the type. - * @param[in] num_members The number of members in the type. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_struct_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +struct drgn_error * +drgn_compound_type_create(struct drgn_compound_type_builder *builder, + const char *tag, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete structure type. + * Create an incomplete structure, union, or class type. * * @c size and @c num_members are set to zero and @c is_complete is set to @c * false. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. - * @param[in] lang Language of this type. + * @param[in] prog Program owning type. + * @param[in] kind One of @ref DRGN_TYPE_STRUCT, @ref DRGN_TYPE_UNION, or @ref + * DRGN_TYPE_CLASS. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. May be @c NULL if the type is anonymous. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_struct_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_compound_type_create(struct drgn_program *prog, + enum drgn_type_kind kind, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret); -/** - * Initialize a union type. - * - * @sa drgn_struct_type_init(). - */ -void drgn_union_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +DEFINE_VECTOR_TYPE(drgn_type_enumerator_vector, struct drgn_type_enumerator) -/** - * Initialize an incomplete union type. - * - * @sa drgn_struct_type_init_incomplete(). - */ -void drgn_union_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +/** Builder for enumerators of an enumerated type. */ +struct drgn_enum_type_builder { + struct drgn_program *prog; + struct drgn_type_enumerator_vector enumerators; +}; -/** - * Initialize a class type. - * - * @sa drgn_struct_type_init(). - */ -void drgn_class_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +/** Initialize a @ref drgn_enum_type_builder. */ +void drgn_enum_type_builder_init(struct drgn_enum_type_builder *builder, + struct drgn_program *prog); /** - * Initialize an incomplete class type. + * Deinitialize a @ref drgn_enum_type_builder. * - * @sa drgn_struct_type_init_incomplete(). + * Don't call this if @ref drgn_enum_type_create() succeeded. */ -void drgn_class_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +void drgn_enum_type_builder_deinit(struct drgn_enum_type_builder *builder); /** - * Initialize a signed enumerator of a type. - * - * @param[out] enumerator Enumerator to initialize. - * @param[in] name See @ref drgn_type_enumerator::name. - * @param[in] svalue See @ref drgn_type_enumerator::svalue. + * Add a @ref drgn_type_enumerator with a signed value to a @ref + * drgn_enum_type_builder. */ -static inline void -drgn_type_enumerator_init_signed(struct drgn_type_enumerator *enumerator, - const char *name, int64_t svalue) -{ - enumerator->name = name; - enumerator->svalue = svalue; -} +struct drgn_error * +drgn_enum_type_builder_add_signed(struct drgn_enum_type_builder *builder, + const char *name, int64_t svalue); /** - * Initialize an unsigned enumerator of a type. - * - * @param[out] enumerator Enumerator to initialize. - * @param[in] name See @ref drgn_type_enumerator::name. - * @param[in] uvalue See @ref drgn_type_enumerator::uvalue. + * Add a @ref drgn_type_enumerator with an unsigned value to a @ref + * drgn_enum_type_builder. */ -static inline void -drgn_type_enumerator_init_unsigned(struct drgn_type_enumerator *enumerator, - const char *name, uint64_t uvalue) -{ - enumerator->name = name; - enumerator->uvalue = uvalue; -} +struct drgn_error * +drgn_enum_type_builder_add_unsigned(struct drgn_enum_type_builder *builder, + const char *name, uint64_t uvalue); /** - * Initialize an enumerated type. + * Create an enumerated type. + * + * On success, this takes ownership of @p builder. * - * @param[out] type Type to initialize. + * @param[in] builder Builder containing enumerators. @c name of each enumerator + * must remain valid for the lifetime of @c builder->prog. * @param[in] tag Name of the type. This string is not copied. It may be @c NULL * if the type is anonymous. - * @param[in] compatible_type Type compatible with this enumerated type. It must - * be an integer type. - * @param[in] enumerators Enumerators of the type. - * @param[in] num_enumerators The number of enumerators in the type. - * @param[in] lang Language of this type. + * @param[in] compatible_type Type compatible with this enumerated type. Must be + * an integer type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_enum_type_init(struct drgn_type *type, const char *tag, - struct drgn_type *compatible_type, - struct drgn_type_enumerator *enumerators, - size_t num_enumerators, - const struct drgn_language *lang); +struct drgn_error *drgn_enum_type_create(struct drgn_enum_type_builder *builder, + const char *tag, + struct drgn_type *compatible_type, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete enumerated type. + * Create an incomplete enumerated type. * * @c compatible_type is set to @c NULL and @c num_enumerators is set to zero. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. - * @param[in] lang Language of this type. + * @param[in] prog Program owning type. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. May be @c NULL if the type is anonymous. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_enum_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_enum_type_create(struct drgn_program *prog, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a typedef type. + * Create a typedef type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] aliased_type Type aliased by the typedef. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_typedef_type_init(struct drgn_type *type, const char *name, - struct drgn_qualified_type aliased_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_typedef_type_create(struct drgn_program *prog, const char *name, + struct drgn_qualified_type aliased_type, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a pointer type. + * Create a pointer type. * - * @param[out] type Type to initialize. - * @param[in] size Size of the type in bytes. + * @param[in] prog Program owning type. * @param[in] referenced_type Type referenced by the pointer type. - * @param[in] lang Language of this type. + * @param[in] size Size of the type in bytes. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_pointer_type_init(struct drgn_type *type, uint64_t size, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_pointer_type_create(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + uint64_t size, const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an array type. + * Create an array type. * - * @param[out] type Type to initialize. - * @param[in] length Number of elements in the array type. + * @param[in] prog Program owning type. * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of this type. + * @param[in] length Number of elements in the array type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_array_type_init(struct drgn_type *type, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + uint64_t length, const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete array type. + * Create an incomplete array type. * * @c length is set to zero. * - * @param[out] type Type to initialize. + * @param[in] prog Program owning type. * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_array_type_init_incomplete(struct drgn_type *type, - struct drgn_qualified_type element_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +DEFINE_VECTOR_TYPE(drgn_type_parameter_vector, struct drgn_type_parameter) + +/** Builder for parameters of a function type. */ +struct drgn_function_type_builder { + struct drgn_program *prog; + struct drgn_type_parameter_vector parameters; +}; + +/** Initialize a @ref drgn_function_type_builder. */ +void drgn_function_type_builder_init(struct drgn_function_type_builder *builder, + struct drgn_program *prog); /** - * Initialize a parameter of a type. + * Deinitialize a @ref drgn_function_type_builder. * - * @param[out] parameter Parameter to initialize. - * @param[in] parameter_type See @ref drgn_type_parameter::type. - * @param[in] name See @ref drgn_type_parameter::name. + * Don't call this if @ref drgn_function_type_create() succeeded. */ -static inline void -drgn_type_parameter_init(struct drgn_type_parameter *parameter, - struct drgn_lazy_type parameter_type, const char *name) -{ - parameter->type = parameter_type; - parameter->name = name; -} +void +drgn_function_type_builder_deinit(struct drgn_function_type_builder *builder); /** - * Free a parameter of a type. - * - * This only frees @ref drgn_type_parameter::type. + * Add a @ref drgn_type_parameter to a @ref drgn_function_type_builder. * - * @param[out] parameter Parameter to free. + * On success, @p builder takes ownership of @p type. */ -static inline void drgn_type_parameter_deinit(struct drgn_type_parameter *parameter) -{ - drgn_lazy_type_deinit(¶meter->type); -} +struct drgn_error * +drgn_function_type_builder_add_parameter(struct drgn_function_type_builder *builder, + struct drgn_lazy_type type, + const char *name); /** - * Initialize a function type. + * Create a function type. + * + * On success, this takes ownership of @p builder. * - * @param[out] type Type to initialize. + * @param[in] builder Builder containing parameters. @c type and @c name of each + * parameter must remain valid for the lifetime of @c builder->prog. * @param[in] return_type Type returned by the function type. - * @param[in] parameters Parameters of the function type. - * @param[in] num_parameters The number of parameters accepted by the function - * type. * @param[in] is_variadic Whether the function type is variadic. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_function_type_init(struct drgn_type *type, - struct drgn_qualified_type return_type, - struct drgn_type_parameter *parameters, - size_t num_parameters, bool is_variadic, - const struct drgn_language *lang); +struct drgn_error * +drgn_function_type_create(struct drgn_function_type_builder *builder, + struct drgn_qualified_type return_type, + bool is_variadic, const struct drgn_language *lang, + struct drgn_type **ret); /** @} */ @@ -694,69 +704,6 @@ drgn_program_find_primitive_type(struct drgn_program *prog, enum drgn_primitive_type type, struct drgn_type **ret); -/** - * Create a pointer type. - * - * The created type is cached for the lifetime of the @ref drgn_program. If the - * same @p referenced_type and @p lang are passed, the same type will be - * returned. - * - * If this succeeds, @p referenced_type must remain valid until @p prog is - * destroyed. - * - * @param[in] referenced_type Type referenced by the pointer type. - * @param[in] lang Language of the pointer type. If @c NULL, the language of @p - * referenced_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_program_pointer_type(struct drgn_program *prog, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an array type. - * - * The created type is cached for the lifetime of the @ref drgn_program. If the - * same @p length, @p element_type, and @p lang are passed, the same type will - * be returned. - * - * @param[in] length Number of elements in the array type. - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_program_array_type(struct drgn_program *prog, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an incomplete array type. - * - * The created type is cached for the lifetime of the @ref drgn_program. If the - * same @p element_type and @p lang are passed, the same type will be returned. - * - * If this succeeds, @p element_type must remain valid until @p prog is - * destroyed. - * - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_program_incomplete_array_type(struct drgn_program *prog, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - /** * Find the type, offset, and bit field size of a type member. * diff --git a/tests/__init__.py b/tests/__init__.py index cb4f36510..4e95c4123 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,54 +17,11 @@ TypeEnumerator, TypeKind, TypeMember, - class_type, - enum_type, - float_type, - int_type, - struct_type, - typedef_type, - union_type, ) DEFAULT_LANGUAGE = Language.C -coord_type = class_type( - "coord", - 12, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), - ), -) -point_type = struct_type( - "point", - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - ), -) -line_segment_type = struct_type( - "line_segment", 16, (TypeMember(point_type, "a"), TypeMember(point_type, "b", 64)) -) -option_type = union_type( - "option", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(float_type("float", 4), "f"), - ), -) -color_type = enum_type( - "color", - int_type("unsigned int", 4, False), - (TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), TypeEnumerator("BLUE", 2)), -) -pid_type = typedef_type("pid_t", int_type("int", 4, True)) - - MOCK_32BIT_PLATFORM = Platform(Architecture.UNKNOWN, PlatformFlags.IS_LITTLE_ENDIAN) MOCK_PLATFORM = Platform( Architecture.UNKNOWN, PlatformFlags.IS_64_BIT | PlatformFlags.IS_LITTLE_ENDIAN @@ -145,10 +102,9 @@ def mock_object_find(prog, name, flags, filename): return prog -class ObjectTestCase(unittest.TestCase): +class TestCase(unittest.TestCase): def setUp(self): super().setUp() - self.prog = mock_program() # For testing, we want to compare the raw objects rather than using the # language's equality operator. def object_equality_func(a, b, msg=None): @@ -216,3 +172,63 @@ def long(self, value): def double(self, value): return Object(self.prog, "double", value=value) + + +class MockProgramTestCase(TestCase): + def setUp(self): + super().setUp() + self.types = [] + self.objects = [] + self.prog = mock_program(types=self.types, objects=self.objects) + self.coord_type = self.prog.class_type( + "coord", + 12, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), + ), + ) + self.point_type = self.prog.struct_type( + "point", + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ) + self.line_segment_type = self.prog.struct_type( + "line_segment", + 16, + (TypeMember(self.point_type, "a"), TypeMember(self.point_type, "b", 64)), + ) + self.option_type = self.prog.union_type( + "option", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember(self.prog.float_type("float", 4), "f"), + ), + ) + self.color_type = self.prog.enum_type( + "color", + self.prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ) + self.pid_type = self.prog.typedef_type( + "pid_t", self.prog.int_type("int", 4, True) + ) + + def add_memory_segment(self, buf, virt_addr=None, phys_addr=None): + if virt_addr is not None: + self.prog.add_memory_segment( + virt_addr, len(buf), functools.partial(mock_memory_read, buf), + ) + if phys_addr is not None: + self.prog.add_memory_segment( + phys_addr, len(buf), functools.partial(mock_memory_read, buf), True, + ) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index e6f564b0f..e355e453a 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -16,28 +16,8 @@ TypeEnumerator, TypeMember, TypeParameter, - array_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import ( - DEFAULT_LANGUAGE, - ObjectTestCase, - color_type, - coord_type, - option_type, - pid_type, - point_type, ) +from tests import DEFAULT_LANGUAGE, TestCase from tests.dwarf import DW_AT, DW_ATE, DW_FORM, DW_LANG, DW_TAG from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf @@ -209,336 +189,632 @@ def dwarf_program(*args, **kwds): return prog -class TestTypes(unittest.TestCase): - @staticmethod - def type_from_dwarf(dies, *args, **kwds): - if isinstance(dies, DwarfDie): - dies = (dies,) - dies = tuple(dies) + ( - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "__TEST__"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], - ), - ) - prog = dwarf_program(dies, *args, **kwds) - return prog.type("__TEST__").type +def test_type_dies(dies): + if isinstance(dies, DwarfDie): + dies = (dies,) + return tuple(dies) + ( + DwarfDie( + DW_TAG.typedef, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ], + ), + ) - def assertFromDwarf(self, dies, type, *args, **kwds): - self.assertEqual(self.type_from_dwarf(dies, *args, **kwds), type) +class TestTypes(TestCase): def test_unknown_tag(self): - die = DwarfDie(0x9999, ()) + prog = dwarf_program(test_type_dies(DwarfDie(0x9999, ()))) self.assertRaisesRegex( - Exception, "unknown DWARF type tag 0x9999", self.type_from_dwarf, die + Exception, "unknown DWARF type tag 0x9999", prog.type, "TEST" ) - def test_bad_base_type(self): - die = DwarfDie( - DW_TAG.base_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), - DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), - ], + def test_base_type_missing_byte_size(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), + ), + ) + ) ) - - byte_size = die.attribs.pop(0) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) - die.attribs.insert(0, byte_size) - encoding = die.attribs.pop(1) + def test_base_type_missing_encoding(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), + ), + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_encoding", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) - die.attribs.insert(1, encoding) - del die.attribs[2] + def test_base_type_missing_name(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + ), + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_name", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) def test_complex(self): - dies = [ - DwarfDie( - DW_TAG.base_type, + prog = dwarf_program( + test_type_dies( ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 16), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.complex_float), - DwarfAttrib(DW_AT.name, DW_FORM.string, "double _Complex"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ), - ), - double_die, - ] - self.assertFromDwarf( - dies, complex_type("double _Complex", 16, float_type("double", 8)) + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 16), + DwarfAttrib( + DW_AT.encoding, DW_FORM.data1, DW_ATE.complex_float + ), + DwarfAttrib(DW_AT.name, DW_FORM.string, "double _Complex"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + double_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.complex_type("double _Complex", 16, prog.float_type("double", 8)), ) def test_unknown_base_type_encoding(self): - die = DwarfDie( - DW_TAG.base_type, - ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, 99), - DwarfAttrib(DW_AT.name, DW_FORM.string, "magic int"), - ), - ) - self.assertRaisesRegex( - Exception, "unknown DWARF encoding", self.type_from_dwarf, die + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, 99), + DwarfAttrib(DW_AT.name, DW_FORM.string, "magic int"), + ), + ) + ) ) + self.assertRaisesRegex(Exception, "unknown DWARF encoding", prog.type, "TEST") - def test_qualifiers(self): - dies = [ - DwarfDie(DW_TAG.const_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)],), - int_die, - ] - self.assertFromDwarf(dies, int_type("int", 4, True, Qualifiers.CONST)) - - del dies[0].attribs[0] - self.assertFromDwarf(dies, void_type(Qualifiers.CONST)) + def test_qualifier(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + ) - dies = [ - DwarfDie(DW_TAG.const_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)],), - DwarfDie(DW_TAG.restrict_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - DwarfDie(DW_TAG.volatile_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), - DwarfDie(DW_TAG.atomic_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 4)],), - int_die, - ] - self.assertFromDwarf( - dies, - int_type( + def test_multiple_qualifiers(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + DwarfDie( + DW_TAG.restrict_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie( + DW_TAG.volatile_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ), + DwarfDie( + DW_TAG.atomic_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 4),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.int_type( "int", 4, True, - Qualifiers.CONST + qualifiers=Qualifiers.CONST | Qualifiers.RESTRICT | Qualifiers.VOLATILE | Qualifiers.ATOMIC, ), ) - del dies[3].attribs[0] - self.assertFromDwarf( - dies, - void_type( - Qualifiers.CONST + def test_qualifier_void(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.const_type, ()))) + self.assertEqual( + prog.type("TEST").type, prog.void_type(qualifiers=Qualifiers.CONST) + ) + + def test_multiple_qualifiers_void(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + DwarfDie( + DW_TAG.restrict_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie( + DW_TAG.volatile_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ), + DwarfDie(DW_TAG.atomic_type, ()), + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.void_type( + qualifiers=Qualifiers.CONST | Qualifiers.RESTRICT | Qualifiers.VOLATILE - | Qualifiers.ATOMIC + | Qualifiers.ATOMIC, ), ) def test_struct(self): - dies = [ - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) + + def test_struct_anonymous(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), ), - int_die, - ] - - self.assertFromDwarf(dies, point_type) + ) - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, struct_type(None, point_type.size, point_type.members) + def test_struct_no_members(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0),), + ) + ) ) - dies[0].attribs.insert(0, tag) + self.assertEqual(prog.type("TEST").type, prog.struct_type(None, 0, ())) - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, struct_type("point", point_type.size, ())) - size = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) + def test_struct_incomplete(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), + ) + ) ) - self.assertFromDwarf(dies, struct_type("point")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, size) - dies[0].children.extend(children) + self.assertEqual(prog.type("TEST").type, prog.struct_type("point")) - name = dies[0].children[0].attribs.pop(0) - self.assertFromDwarf( - dies, - struct_type( + def test_struct_unnamed_member(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( "point", - point_type.size, + 8, ( - TypeMember(int_type("int", 4, True), None, 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(prog.int_type("int", 4, True), None), + TypeMember(prog.int_type("int", 4, True), "y", 32), ), ), ) - dies[0].children[0].attribs.insert(0, name) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_structure_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, + def test_struct_member_missing_type(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + ), + ), + ), + ), + ) ) - dies[0].attribs[0] = tag - - size = dies[0].attribs.pop(1) self.assertRaisesRegex( - Exception, - "DW_TAG_structure_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_member is missing DW_AT_type", prog.type, "TEST" ) - dies[0].attribs.insert(1, size) - name = dies[0].children[0].attribs.pop(0) - dies[0].children[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) + def test_struct_member_invalid_type(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.string, "foo"), + ), + ), + ), + ), + ) + ) self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_name", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_member has invalid DW_AT_type", prog.type, "TEST" ) - dies[0].children[0].attribs[0] = name - location = dies[0].children[0].attribs[1] - dies[0].children[0].attribs[1] = DwarfAttrib( - DW_AT.data_member_location, DW_FORM.string, "foo" + def test_struct_member_invalid_location(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.string, + "foo", + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) self.assertRaisesRegex( Exception, "DW_TAG_member has invalid DW_AT_data_member_location", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs[1] = location - type_ = dies[0].children[0].attribs.pop(2) + def test_struct_missing_size(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.structure_type, ()))) self.assertRaisesRegex( - Exception, "DW_TAG_member is missing DW_AT_type", self.type_from_dwarf, dies + Exception, + "DW_TAG_structure_type has missing or invalid DW_AT_byte_size", + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert( - 2, DwarfAttrib(DW_AT.type, DW_FORM.string, "foo") + + def test_struct_invalid_name(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ) + ) ) self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_type", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_structure_type has invalid DW_AT_name", prog.type, "TEST" ) - dies[0].children[0].attribs[2] = type_ def test_incomplete_to_complete(self): - dies = [ - DwarfDie( - DW_TAG.pointer_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), - ], - ), - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), - ], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), - ], + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) ), - int_die, - ] - self.assertFromDwarf(dies, pointer_type(8, point_type)) + ) - # Ambiguous incomplete type. - dies.append( - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "bar.c"), - ], - [ + def test_incomplete_to_complete_ambiguous(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "b"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), - ], + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + int_die, + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "bar.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "b"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + ) ) ) - type_ = pointer_type(8, struct_type("point")) - self.assertFromDwarf(dies, type_) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.struct_type("point")) + ) def test_filename(self): dies = list(base_type_dies) + [ @@ -604,12 +880,20 @@ def test_filename(self): ), ] - other_point_type = struct_type( + point_type = lambda prog: prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + other_point_type = lambda prog: prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "a"), - TypeMember(int_type("int", 4, True), "b", 32), + TypeMember(prog.int_type("int", 4, True), "a"), + TypeMember(prog.int_type("int", 4, True), "b", 32), ), ) @@ -617,13 +901,14 @@ def test_filename(self): for dir in ["", "src", "usr/src", "/usr/src"]: with self.subTest(dir=dir): self.assertEqual( - prog.type("struct point", os.path.join(dir, "foo.c")), point_type + prog.type("struct point", os.path.join(dir, "foo.c")), + point_type(prog), ) for dir in ["", "bar", "src/bar", "usr/src/bar", "/usr/src/bar"]: with self.subTest(dir=dir): self.assertEqual( prog.type("struct point", os.path.join(dir, "baz.c")), - other_point_type, + other_point_type(prog), ) dies[len(base_type_dies)].attribs[-1] = DwarfAttrib( @@ -636,406 +921,717 @@ def test_filename(self): for dir in ["xy", "src/xy", "usr/src/xy", "/usr/src/xy"]: with self.subTest(dir=dir): self.assertEqual( - prog.type("struct point", os.path.join(dir, "foo.h")), point_type + prog.type("struct point", os.path.join(dir, "foo.h")), + point_type(prog), ) for dir in ["ab", "include/ab", "usr/include/ab", "/usr/include/ab"]: with self.subTest(dir=dir): self.assertEqual( prog.type("struct point", os.path.join(dir, "foo.h")), - other_point_type, + other_point_type(prog), ) for filename in [None, "foo.h"]: with self.subTest(filename=filename): self.assertIn( - prog.type("struct point", filename), (point_type, other_point_type) + prog.type("struct point", filename), + (point_type(prog), other_point_type(prog)), ) - def test_bit_field(self): - dies = [ + def test_bit_field_data_bit_offset(self): + dies = ( DwarfDie( DW_TAG.structure_type, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + ), + ( DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), DwarfAttrib(DW_AT.data_bit_offset, DW_FORM.data1, 32), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), DwarfAttrib(DW_AT.data_bit_offset, DW_FORM.data1, 44), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), - ], + ), ), int_die, - ] - - t = struct_type( - "point", - 8, - [ - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32, 12), - TypeMember(int_type("int", 4, True), "z", 44, 20), - ], ) - # With DW_AT_data_bit_offset. - self.assertFromDwarf(dies, t, little_endian=True) - self.assertFromDwarf(dies, t, little_endian=False) - - # With DW_AT_bit_offset on big-endian. - dies[0].children[1].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 32 - ) - dies[0].children[2].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 44 - ) - self.assertFromDwarf(dies, t, little_endian=False) + for little_endian in [True, False]: + prog = dwarf_program(test_type_dies(dies), little_endian=little_endian) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), + ], + ), + ) - # With DW_AT_data_member_location and DW_AT_bit_offset on big-endian. - dies[0].children[1].attribs.append( - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4) - ) - dies[0].children[1].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0) - dies[0].children[2].attribs.append( - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4) + def test_bit_field_bit_offset_big_endian(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 32), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 44), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ), + little_endian=False, ) - dies[0].children[2].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 4) - - # With DW_AT_data_member_location and DW_AT_bit_offset on little-endian. - dies[0].children[1].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 20 + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), + ], + ), ) - dies[0].children[2].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0) - self.assertFromDwarf(dies, t, little_endian=True) - # With DW_AT_data_member_location, DW_AT_bit_offset, and - # DW_AT_byte_size on little-endian. - dies[0].children[1].attribs.append( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4) - ) - dies[0].children[2].attribs.append( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4) + def test_bit_field_data_member_location_and_bit_offset_big_endian(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 12), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ), + little_endian=False, ) - self.assertFromDwarf(dies, t, little_endian=True) - - def test_union(self): - dies = [ - DwarfDie( - DW_TAG.union_type, + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "option"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], - [ - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), + ), + ) + + def test_bit_field_data_member_location_and_bit_offset_little_endian(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "f"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], ), - int_die, - float_die, - ] - - self.assertFromDwarf(dies, option_type) - - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_union_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, ) - dies[0].attribs[0] = tag - size = dies[0].attribs.pop(1) - self.assertRaisesRegex( - Exception, - "DW_TAG_union_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + def test_bit_field_data_member_location_and_bit_offset_with_byte_size_little_endian( + self, + ): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) - dies[0].attribs.insert(1, size) - - def test_class(self): - dies = [ - DwarfDie( - DW_TAG.class_type, + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "coord"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 12), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], - [ - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), + ), + ) + + def test_union(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.union_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "option"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "f"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), ), - ], + int_die, + float_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + "option", + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.float_type("float", 4), "f"), + ), ), - int_die, - ] - - self.assertFromDwarf(dies, coord_type) - - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, class_type(None, coord_type.size, coord_type.members) ) - dies[0].attribs.insert(0, tag) - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, class_type("coord", coord_type.size, ())) - size = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) + def test_class(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "coord"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 12), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 8 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf(dies, class_type("coord")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, size) - dies[0].children.extend(children) - - name = dies[0].children[0].attribs.pop(0) - self.assertFromDwarf( - dies, - class_type( + self.assertEqual( + prog.type("TEST").type, + prog.class_type( "coord", - coord_type.size, + 12, ( - TypeMember(int_type("int", 4, True), None, 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + TypeMember(prog.int_type("int", 4, True), "z", 64), ), ), ) - dies[0].children[0].attribs.insert(0, name) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_class_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, + def test_lazy_cycle(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "next"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ) + ) ) - dies[0].attribs[0] = tag - - size = dies[0].attribs.pop(1) - self.assertRaisesRegex( - Exception, - "DW_TAG_class_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + type_ = prog.struct_type( + "foo", 8, (TypeMember(lambda: prog.pointer_type(type_), "next"),) ) - dies[0].attribs.insert(1, size) + self.assertEqual(prog.type("TEST").type, type_) - name = dies[0].children[0].attribs.pop(0) - dies[0].children[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_name", - self.type_from_dwarf, - dies, - ) - dies[0].children[0].attribs[0] = name - - location = dies[0].children[0].attribs[1] - dies[0].children[0].attribs[1] = DwarfAttrib( - DW_AT.data_member_location, DW_FORM.string, "foo" - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_data_member_location", - self.type_from_dwarf, - dies, + def test_infinite_cycle(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ) + ) ) - dies[0].children[0].attribs[1] = location + self.assertRaisesRegex(Exception, "maximum.*depth exceeded", prog.type, "TEST") - type_ = dies[0].children[0].attribs.pop(2) - self.assertRaisesRegex( - Exception, "DW_TAG_member is missing DW_AT_type", self.type_from_dwarf, dies - ) - dies[0].children[0].attribs.insert( - 2, DwarfAttrib(DW_AT.type, DW_FORM.string, "foo") - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_type", - self.type_from_dwarf, - dies, + def test_enum(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), + ), + ), + unsigned_int_die, + ) + ) ) - dies[0].children[0].attribs[2] = type_ - - def test_lazy_cycle(self): - dies = [ - DwarfDie( - DW_TAG.structure_type, + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), ), + ), + ) + + def test_enum_anonymous(self): + prog = dwarf_program( + test_type_dies( ( DwarfDie( - DW_TAG.member, + DW_TAG.enumeration_type, ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "next"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), ), - ), - ), - DwarfDie( - DW_TAG.pointer_type, + unsigned_int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + None, + prog.int_type("unsigned int", 4, False), ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), ), ), - ] - - type_ = struct_type( - "foo", 8, (TypeMember(lambda: pointer_type(8, type_), "next"),) ) - self.assertFromDwarf(dies, type_) - def test_infinite_cycle(self): - dies = [ - DwarfDie( - DW_TAG.pointer_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], - ), - ] - self.assertRaisesRegex( - Exception, "maximum.*depth exceeded", self.type_from_dwarf, dies + def test_enum_no_enumerators(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + unsigned_int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type("color", prog.int_type("unsigned int", 4, False), ()), ) - def test_enum(self): - dies = [ - DwarfDie( - DW_TAG.enumeration_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), - ], + def test_enum_incomplete(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), - ], + ) + ) + ) + self.assertEqual(prog.type("TEST").type, prog.enum_type("color")) + + def test_enum_old_gcc(self): + # GCC < 5.1 + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), - ], + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), - ], + ), + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), ), - unsigned_int_die, - double_die, - ] + ) - self.assertFromDwarf(dies, color_type) - - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, enum_type(None, color_type.type, color_type.enumerators) - ) - dies[0].attribs.insert(0, tag) - - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, enum_type("color", color_type.type, ())) - type_ = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) - ) - self.assertFromDwarf(dies, enum_type("color")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, type_) - dies[0].children.extend(children) - - # A la GCC before 5.1. - del dies[0].attribs[1] - self.assertFromDwarf( - dies, - enum_type("color", int_type("", 4, False), color_type.enumerators), - ) - for i, child in enumerate(dies[0].children): - child.attribs[1] = DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -i) - self.assertFromDwarf( - dies, - enum_type( + def test_enum_old_gcc_signed(self): + # GCC < 5.1 + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -2), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( "color", - int_type("", 4, True), + prog.int_type("", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", -1), @@ -1044,60 +1640,148 @@ def test_enum(self): ), ) - dies[0].attribs.insert(1, DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)) + def test_enum_compatible_type_not_integer(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + float_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_AT_type of DW_TAG_enumeration_type is not an integer type", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - del dies[0].attribs[1] - size = dies[0].attribs.pop(1) + def test_enum_missing_compatible_type_and_byte_size(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.enumeration_type, ()))) self.assertRaisesRegex( Exception, "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].attribs.insert(1, size) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) + def test_enum_invalid_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumeration_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].attribs[0] = tag - name = dies[0].children[0].attribs.pop(0) + def test_enum_enumerator_missing_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + (DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0),), + ), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert(0, name) - const_value = dies[0].children[0].attribs.pop(1) + def test_enum_enumerator_missing_const_value(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "FOO"),), + ), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator is missing DW_AT_const_value", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert( - 1, DwarfAttrib(DW_AT.const_value, DW_FORM.string, "asdf") + + def test_enum_enumerator_invalid_const_value(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FOO"), + DwarfAttrib( + DW_AT.const_value, DW_FORM.string, "FOO" + ), + ), + ), + ), + ), + unsigned_int_die, + ) + ) ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator has invalid DW_AT_const_value", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs[1] = const_value def test_tagged_by_name(self): prog = dwarf_program( @@ -1206,342 +1890,615 @@ def test_tagged_by_name(self): ), ], ), - ) + ) + ) + + self.assertEqual( + prog.type("struct point"), + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) + self.assertRaisesRegex(LookupError, "could not find", prog.type, "union point") + self.assertEqual( + prog.type("union option"), + prog.union_type( + "option", + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.float_type("float", 4), "f"), + ), + ), ) - - self.assertEqual(prog.type("struct point"), point_type) - self.assertRaisesRegex(LookupError, "could not find", prog.type, "union point") - self.assertEqual(prog.type("union option"), option_type) self.assertRaisesRegex( LookupError, "could not find", prog.type, "struct option" ) - self.assertEqual(prog.type("enum color"), color_type) + self.assertEqual( + prog.type("enum color"), + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) self.assertRaisesRegex(LookupError, "could not find", prog.type, "struct color") def test_typedef(self): - dies = [ - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "INT"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - int_die, - ] - self.assertFromDwarf(dies, typedef_type("INT", int_type("int", 4, True))) + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "INT"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.typedef_type("INT", prog.int_type("int", 4, True)), + ) - dies[0].attribs.pop(0) + def test_typedef_missing_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.typedef, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_typedef has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - def test_void_typedef(self): - dies = [ - DwarfDie( - DW_TAG.typedef, [DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),], - ), - ] - self.assertFromDwarf(dies, typedef_type("VOID", void_type())) - - dies[0].attribs.pop(0) - self.assertRaisesRegex( - Exception, - "DW_TAG_typedef has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + def test_typedef_void(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.typedef, (DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),), + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.typedef_type("VOID", prog.void_type()) ) def test_typedef_by_name(self): prog = dwarf_program( - base_type_dies - + ( + ( DwarfDie( DW_TAG.typedef, ( DwarfAttrib(DW_AT.name, DW_FORM.string, "pid_t"), - DwarfAttrib( - DW_AT.type, DW_FORM.ref4, base_type_dies.index(int_die) - ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), ), ), + int_die, ) ) - self.assertEqual(prog.type("pid_t"), pid_type) + self.assertEqual( + prog.type("pid_t"), + prog.typedef_type("pid_t", prog.int_type("int", 4, True)), + ) def test_pointer(self): - dies = [ - DwarfDie(DW_TAG.pointer_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),],), - int_die, - ] - self.assertFromDwarf(dies, pointer_type(8, int_type("int", 4, True))) + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.pointer_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True)) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True), 8) + ) + + def test_pointer_explicit_size(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True), 4) + ) - del dies[0].attribs[0] - self.assertFromDwarf(dies, pointer_type(8, void_type())) + def test_pointer_void(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.pointer_type, ()))) + self.assertEqual(prog.type("TEST").type, prog.pointer_type(prog.void_type())) def test_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), ), - ], - ), - int_die, - ] - self.assertFromDwarf(dies, array_type(2, int_type("int", 4, True))) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 2) + ) - dies[0].children.append( - DwarfDie( - DW_TAG.subrange_type, [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)] - ), + def test_array_two_dimensional(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf( - dies, array_type(2, array_type(3, int_type("int", 4, True))) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 3), 2), ) - dies[0].children.append( - DwarfDie( - DW_TAG.subrange_type, [DwarfAttrib(DW_AT.count, DW_FORM.data1, 4)] - ), + def test_array_three_dimensional(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 4),), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf( - dies, array_type(2, array_type(3, array_type(4, int_type("int", 4, True)))) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 4), 3), 2 + ), ) - del dies[0].attribs[0] + def test_array_missing_type(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + int_die, + ) + ) + ) self.assertRaisesRegex( - Exception, - "DW_TAG_array_type is missing DW_AT_type", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_array_type is missing DW_AT_type", prog.type, "TEST" ) - def test_zero_length_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + def test_array_zero_length_count(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], - ), - int_die, - ] - self.assertFromDwarf(dies, array_type(0, int_type("int", 4, True))) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 0) + ) - dies[0].children[0].attribs[0] = DwarfAttrib( - DW_AT.upper_bound, DW_FORM.sdata, -1 + def test_array_zero_length_upper_bound(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.sdata, -1),), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 0) ) - self.assertFromDwarf(dies, array_type(0, int_type("int", 4, True))) - def test_incomplete_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [DwarfDie(DW_TAG.subrange_type, [])], - ), - int_die, - ] - self.assertFromDwarf(dies, array_type(None, int_type("int", 4, True))) + def test_incomplete_array_no_subrange(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True)) + ) - del dies[0].children[0] - self.assertFromDwarf(dies, array_type(None, int_type("int", 4, True))) + def test_incomplete_array_empty_subrange(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + (DwarfDie(DW_TAG.subrange_type, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True)) + ) def test_incomplete_array_of_array(self): - # int [3][] - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ - DwarfDie(DW_TAG.subrange_type, []), + prog = dwarf_program( + test_type_dies( + # int [3][] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie(DW_TAG.subrange_type, ()), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), ), - ], - ), - int_die, - ] - self.assertFromDwarf( - dies, array_type(None, array_type(3, int_type("int", 4, True))) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 3)), ) def test_array_of_zero_length_array(self): - # int [3][0] - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + # int [3][0] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 0), 3), + ) + + def test_array_of_zero_length_array_old_gcc(self): + # GCC < 9.0 + prog = dwarf_program( + test_type_dies( + # int [3][0] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie(DW_TAG.subrange_type, ()), + ), ), - ], - ), - int_die, - ] - - type_ = array_type(3, array_type(0, int_type("int", 4, True))) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[0].children[1].attribs[0] - self.assertFromDwarf(dies, type_) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 0), 3), + ) def test_array_of_zero_length_array_typedef(self): - dies = [ - # ZARRAY [3] - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + ( + # ZARRAY [3] DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), + ), + # typedef int ZARRAY[0]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), ), - ], - ), - # typedef int ZARRAY[0]; - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)], - [ DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + 3, ), - int_die, - ] - - type_ = array_type( - 3, typedef_type("ZARRAY", array_type(0, int_type("int", 4, True))) ) - self.assertFromDwarf(dies, type_) + def test_array_of_zero_length_array_typedef_old_gcc(self): # GCC actually squashes arrays of typedef arrays into one array type, # but let's handle it like GCC < 9.0 anyways. - del dies[2].children[0] - self.assertFromDwarf(dies, type_) + prog = dwarf_program( + test_type_dies( + ( + # ZARRAY [3] + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), + ), + # typedef int ZARRAY[0]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + (DwarfDie(DW_TAG.subrange_type, (),),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + 3, + ), + ) def test_flexible_array_member(self): # struct { # int i; # int a[]; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + ), + ), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - ], + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),) ), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - int_die, - ] - - self.assertFromDwarf( - dies, - struct_type( + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( None, 4, ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(None, int_type("int", 4, True)), "a", 32), + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True)), "a", 32), ), ), ) def test_typedef_flexible_array_member(self): - dies = [ - # struct { - # int i; - # FARRAY a; - # }; - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( + # struct { + # int i; + # FARRAY a; + # }; DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + ), + ), + ), ), + # typedef int FARRAY[]; DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - ], + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), ), - ], - ), - # typedef int FARRAY[]; - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "FARRAY"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), - int_die, - ] - - self.assertFromDwarf( - dies, - struct_type( + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( None, 4, ( - TypeMember(int_type("int", 4, True), "i"), + TypeMember(prog.int_type("int", 4, True), "i"), TypeMember( - typedef_type( - "FARRAY", array_type(None, int_type("int", 4, True)) + prog.typedef_type( + "FARRAY", prog.array_type(prog.int_type("int", 4, True)) ), "a", 32, @@ -1554,303 +2511,602 @@ def test_zero_length_array_only_member(self): # struct { # int a[0]; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + (TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"),), ), - int_die, - ] - - type_ = struct_type( - None, 4, (TypeMember(array_type(0, int_type("int", 4, True)), "a"),) ) - self.assertFromDwarf(dies, type_) + def test_zero_length_array_only_member_old_gcc(self): # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) + # struct { + # int a[0]; + # }; + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, (),),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + (TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"),), + ), + ) def test_typedef_zero_length_array_only_member(self): - dies = [ + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + # struct foo { + # ZARRAY a; + # }; + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + # typedef int ZARRAY[0]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), + ), + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), + ), + ), + ) + + def test_typedef_zero_length_array_only_member_old_gcc(self): + # GCC < 9.0. + dies = ( DwarfDie( # struct foo { # ZARRAY a; # }; DW_TAG.structure_type, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ + ), + ( DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), - ], + ), ), # typedef int ZARRAY[0]; DwarfDie( DW_TAG.typedef, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - [ + ), + ), + DwarfDie(DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),)), + int_die, + ) + + prog = dwarf_program(test_type_dies(dies)) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), + ), + ), + ) + # Although the ZARRAY type must be a zero-length array in the context + # of the structure, it could still be an incomplete array if used + # elsewhere. + self.assertEqual( + prog.type("ZARRAY"), + prog.typedef_type("ZARRAY", prog.array_type(prog.int_type("int", 4, True))), + ) + + # Make sure it still works if we parse the array type first. + prog = dwarf_program(test_type_dies(dies)) + self.assertEqual( + prog.type("ZARRAY"), + prog.typedef_type("ZARRAY", prog.array_type(prog.int_type("int", 4, True))), + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), + ), + ), + ) + + def test_zero_length_array_not_last_member(self): + # struct { + # int a[0]; + # int i; + # }; + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), + ), DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), - int_die, - ] - - type_ = struct_type( - "foo", - 4, - ( - TypeMember( - typedef_type("ZARRAY", array_type(0, int_type("int", 4, True))), "a" + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + ( + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + TypeMember(prog.int_type("int", 4, True), "i"), ), ), ) - self.assertFromDwarf(dies, type_) - - farray_zarray = typedef_type( - "ZARRAY", array_type(None, int_type("int", 4, True)) - ) + def test_zero_length_array_not_last_member_old_gcc(self): # GCC < 9.0. - del dies[1].children[0] - prog = dwarf_program(dies) - self.assertEqual(prog.type("struct foo"), type_) - # Although the ZARRAY type must be a zero-length array in the context - # of the structure, it could still be an incomplete array if used - # elsewhere. - self.assertEqual(prog.type("ZARRAY"), farray_zarray) - - # Make sure it still works if we parse the array type first. - prog = dwarf_program(dies) - self.assertEqual(prog.type("ZARRAY"), farray_zarray) - self.assertEqual(prog.type("struct foo"), type_) - - def test_zero_length_array_not_last_member(self): # struct { # int a[0]; # int i; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, ()),), ), - ], - ), - int_die, - ] - - type_ = struct_type( - None, - 4, - ( - TypeMember(array_type(0, int_type("int", 4, True)), "a"), - TypeMember(int_type("int", 4, True), "i"), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + ( + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + TypeMember(prog.int_type("int", 4, True), "i"), + ), ), ) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) def test_zero_length_array_in_union(self): # union { # int i; # int a[0]; # }; - dies = [ - DwarfDie( - DW_TAG.union_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.union_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + None, + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + ), ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ + ) + + def test_zero_length_array_in_union_old_gcc(self): + # GCC < 9.0. + # union { + # int i; + # int a[0]; + # }; + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.union_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], - ), - int_die, - ] - - type_ = union_type( - None, - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(0, int_type("int", 4, True)), "a"), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + None, + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + ), ), ) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) def test_pointer_size(self): prog = dwarf_program(base_type_dies, bits=32) - self.assertEqual(prog.type("int *"), pointer_type(4, int_type("int", 4, True))) + self.assertEqual( + prog.type("int *"), prog.pointer_type(prog.int_type("int", 4, True), 4) + ) - def test_function(self): + def test_function_no_parameters(self): + # int foo(void) + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type(prog.int_type("int", 4, True), (), False), + ) + + def test_function_void_return(self): + # void foo(void) + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.subroutine_type, ()))) + self.assertEqual( + prog.type("TEST").type, prog.function_type(prog.void_type(), (), False), + ) + + def test_function_unnamed_parameter(self): # int foo(char) - dies = [ - DwarfDie( - DW_TAG.subroutine_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + ), ), - ], - ), - int_die, - char_die, - ] - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True)),), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True)),), False, ), ) + def test_function_named_parameter(self): # int foo(char c) - dies[0].children[0].attribs.append(DwarfAttrib(DW_AT.name, DW_FORM.string, "c")) - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True), "c"),), + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + DwarfAttrib(DW_AT.name, DW_FORM.string, "c"), + ), + ), + ), + ), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True), "c"),), False, ), ) + def test_function_unspecified_parameters(self): + # int foo() + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + (DwarfDie(DW_TAG.unspecified_parameters, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type(prog.int_type("int", 4, True), (), True), + ) + + def test_function_variadic(self): # int foo(char, ...) - del dies[0].children[0].attribs[-1] - dies[0].children.append(DwarfDie(DW_TAG.unspecified_parameters, [])) - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True)),), + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie(DW_TAG.unspecified_parameters, ()), + ), + ), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True)),), True, ), ) - # int foo() - del dies[0].children[0] - self.assertFromDwarf(dies, function_type(int_type("int", 4, True), (), True)) - - # int foo(void) - del dies[0].children[0] - self.assertFromDwarf(dies, function_type(int_type("int", 4, True), (), False)) - - # void foo(void) - del dies[0].attribs[0] - self.assertFromDwarf(dies, function_type(void_type(), (), False)) - - def test_incomplete_array_parameter(self): + def test_function_incomplete_array_parameter(self): # void foo(int []) # Note that in C, this is equivalent to void foo(int *), so GCC and # Clang emit the DWARF for the latter. - dies = [ - DwarfDie( - DW_TAG.subroutine_type, - [], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], + DW_TAG.subroutine_type, + (), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + ), ), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - int_die, - ] - self.assertFromDwarf( - dies, - function_type( - void_type(), - (TypeParameter(array_type(None, int_type("int", 4, True))),), + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.void_type(), + (TypeParameter(prog.array_type(prog.int_type("int", 4, True))),), False, ), ) @@ -1858,132 +3114,151 @@ def test_incomplete_array_parameter(self): def test_language(self): for name, lang in DW_LANG.__members__.items(): if re.fullmatch("C[0-9]*", name): - self.assertFromDwarf( - (int_die,), - int_type("int", 4, True, language=Language.C), - lang=lang, + prog = dwarf_program(test_type_dies(int_die), lang=lang) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, language=Language.C), ) - - self.assertFromDwarf( - (int_die,), - int_type("int", 4, True, language=DEFAULT_LANGUAGE), - lang=DW_LANG.BLISS, + prog = dwarf_program(test_type_dies(int_die), lang=DW_LANG.BLISS) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, language=DEFAULT_LANGUAGE), ) -class TestObjects(ObjectTestCase): - def test_constant(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.enumeration_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), - ], - ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), - ], +class TestObjects(TestCase): + def test_constant_signed_enum(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), - ], + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib( - DW_AT.location, - DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), ), - ], - ), - ] - - type_ = enum_type( - "color", - int_type("int", 4, True), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), + ), + ) ) - prog = dwarf_program(dies) - self.assertEqual(prog["BLUE"], Object(prog, type_, value=2)) - - dies[0] = unsigned_int_die - type_ = enum_type( + type_ = prog.enum_type( "color", - int_type("unsigned int", 4, False), + prog.int_type("int", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), TypeEnumerator("BLUE", 2), ), ) - prog = dwarf_program(dies) - self.assertEqual(prog["GREEN"], Object(prog, type_, value=1)) + self.assertEqual( + prog.object("RED", FindObjectFlags.CONSTANT), Object(prog, type_, value=0) + ) + self.assertEqual(prog["BLUE"], Object(prog, type_, value=2)) - del dies[1].attribs[0] - type_ = enum_type( - None, - int_type("unsigned int", 4, False), + def test_constant_unsigned_enum(self): + prog = dwarf_program( ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), + unsigned_int_die, + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FLAG"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data2, 1 << 12), + ), + ), + ), + ), + ) ) - prog = dwarf_program(dies) self.assertEqual( - prog.object("RED", FindObjectFlags.CONSTANT), Object(prog, type_, value=0) + prog["FLAG"], + Object( + prog, + prog.enum_type( + None, + prog.int_type("unsigned int", 4, False), + (TypeEnumerator("FLAG", 4096),), + ), + 4096, + ), ) def test_function(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.subprogram, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), - ], - [ + prog = dwarf_program( + test_type_dies( + ( + int_die, DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0)], + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ), + ), ), - ], + ) + ) + ) + self.assertEqual( + prog["abs"], + Object( + prog, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("int", 1, True)),), + False, + ), + address=0x7FC3EB9B1C30, ), - ] - type_ = function_type( - int_type("int", 4, True), (TypeParameter(int_type("int", 1, True)),), False ) - - prog = dwarf_program(dies) - self.assertEqual(prog["abs"], Object(prog, type_, address=0x7FC3EB9B1C30)) self.assertEqual(prog.object("abs", FindObjectFlags.FUNCTION), prog["abs"]) self.assertRaisesRegex( LookupError, @@ -1993,33 +3268,42 @@ def test_function(self): FindObjectFlags.VARIABLE, ) - del dies[1].attribs[2] - prog = dwarf_program(dies) + def test_function_no_address(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "abort"),), + ) + ) + ) self.assertRaisesRegex( - LookupError, "could not find address", prog.object, "abs" + LookupError, "could not find address", prog.object, "abort" ) def test_variable(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib( - DW_AT.location, - DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), ), - ], - ), - ] - - prog = dwarf_program(dies) + ) + ) + ) self.assertEqual( prog["x"], - Object(prog, int_type("int", 4, True), address=0xFFFFFFFF01020304), + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), ) self.assertEqual(prog.object("x", FindObjectFlags.VARIABLE), prog["x"]) self.assertRaisesRegex( @@ -2030,17 +3314,44 @@ def test_variable(self): FindObjectFlags.CONSTANT, ) - del dies[1].attribs[2] - prog = dwarf_program(dies) + def test_variable_no_address(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ) + ) + ) self.assertRaisesRegex( LookupError, "could not find address or value", prog.object, "x" ) - dies[1].attribs.insert(2, DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0")) - prog = dwarf_program(dies) + def test_variable_unimplemented_location(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0"), + ), + ), + ) + ) + ) self.assertRaisesRegex(Exception, "unimplemented operation", prog.object, "x") - def test_const_signed(self): + def test_variable_const_signed(self): for form in ( DW_FORM.data1, DW_FORM.data2, @@ -2048,23 +3359,27 @@ def test_const_signed(self): DW_FORM.data8, DW_FORM.sdata, ): - dies = [ - int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1,), - ], - ), - ] - prog = dwarf_program(dies) + + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1,), + ), + ), + ) + ) + ) self.assertEqual( - prog["x"], Object(prog, int_type("int", 4, True), 1), + prog["x"], Object(prog, prog.int_type("int", 4, True), 1), ) - def test_const_unsigned(self): + def test_variable_const_unsigned(self): for form in ( DW_FORM.data1, DW_FORM.data2, @@ -2072,80 +3387,98 @@ def test_const_unsigned(self): DW_FORM.data8, DW_FORM.udata, ): - dies = [ - unsigned_int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1), - ], - ), - ] - prog = dwarf_program(dies) + prog = dwarf_program( + test_type_dies( + ( + unsigned_int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1), + ), + ), + ) + ) + ) self.assertEqual( - prog["x"], Object(prog, int_type("unsigned int", 4, False), 1), + prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1), ) - def test_const_block(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + def test_variable_const_block(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], - ), - ], - ), - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib( - DW_AT.const_value, - DW_FORM.block1, - b"\x01\x00\x00\x00\x02\x00\x00\x00", + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ), + ), ), - ], - ), - ] - prog = dwarf_program(dies) + ) + ) + ) self.assertEqual( - prog["p"], Object(prog, point_type, {"x": 1, "y": 2}), + prog["p"], + Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2],), ) - dies[2].attribs[2] = DwarfAttrib( - DW_AT.const_value, DW_FORM.block1, b"\x01\x00\x00\x00\x02\x00\x00", + def test_variable_const_block_too_small(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00", + ), + ), + ), + ) + ) ) - prog = dwarf_program(dies) self.assertRaisesRegex(Exception, "too small", prog.variable, "p") def test_not_found(self): - prog = dwarf_program([int_die]) + prog = dwarf_program(int_die) self.assertRaisesRegex(LookupError, "could not find", prog.object, "y") -class TestProgram(unittest.TestCase): +class TestProgram(TestCase): def test_language(self): dies = ( DwarfDie( diff --git a/tests/test_language_c.py b/tests/test_language_c.py index f2527f1ea..0606fbb12 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -5,30 +5,12 @@ import operator import unittest -from drgn import ( - Qualifiers, - TypeEnumerator, - TypeMember, - TypeParameter, - array_type, - bool_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import coord_type, point_type +from drgn import Qualifiers, TypeEnumerator, TypeMember, TypeParameter +from tests import MockProgramTestCase from tests.libdrgn import C_TOKEN, Lexer, drgn_lexer_c -class TestPrettyPrintTypeName(unittest.TestCase): +class TestPrettyPrintTypeName(MockProgramTestCase): def assertTypeName(self, type, expected, same_as_definition=False): self.assertEqual(type.type_name(), expected) if same_as_definition: @@ -58,120 +40,175 @@ def assertQualifiedTypeName(self, expected, same_as_definition, constructor, *ar ) def test_void(self): - self.assertQualifiedTypeName("void", True, void_type) + self.assertQualifiedTypeName("void", True, self.prog.void_type) def test_int(self): - self.assertQualifiedTypeName("int", True, int_type, "int", 4, True) + self.assertQualifiedTypeName("int", True, self.prog.int_type, "int", 4, True) def test_bool(self): - self.assertQualifiedTypeName("_Bool", True, bool_type, "_Bool", 1) + self.assertQualifiedTypeName("_Bool", True, self.prog.bool_type, "_Bool", 1) def test_float(self): - self.assertQualifiedTypeName("float", True, float_type, "float", 4) + self.assertQualifiedTypeName("float", True, self.prog.float_type, "float", 4) def test_complex(self): self.assertQualifiedTypeName( "double _Complex", True, - complex_type, + self.prog.complex_type, "double _Complex", 16, - float_type("double", 8), + self.prog.float_type("double", 8), ) def test_struct(self): - self.assertQualifiedTypeName("struct point", True, struct_type, "point") - self.assertQualifiedTypeName("struct ", False, struct_type, None) + self.assertQualifiedTypeName( + "struct point", True, self.prog.struct_type, "point" + ) + self.assertQualifiedTypeName( + "struct ", False, self.prog.struct_type, None + ) def test_union(self): - self.assertQualifiedTypeName("union option", True, union_type, "option"), - self.assertQualifiedTypeName("union ", False, union_type, None) + self.assertQualifiedTypeName( + "union option", True, self.prog.union_type, "option" + ), + self.assertQualifiedTypeName( + "union ", False, self.prog.union_type, None + ) def test_class(self): - self.assertQualifiedTypeName("class coord", True, class_type, "coord") - self.assertQualifiedTypeName("class ", False, class_type, None) + self.assertQualifiedTypeName("class coord", True, self.prog.class_type, "coord") + self.assertQualifiedTypeName( + "class ", False, self.prog.class_type, None + ) def test_enum(self): self.assertQualifiedTypeName( - "enum color", True, enum_type, "color", None, None + "enum color", True, self.prog.enum_type, "color", None, None ), self.assertQualifiedTypeName( - "enum ", False, enum_type, None, None, None + "enum ", False, self.prog.enum_type, None, None, None ) def test_typedef(self): self.assertQualifiedTypeName( - "bool", False, typedef_type, "bool", bool_type("_Bool", 1) + "bool", + False, + self.prog.typedef_type, + "bool", + self.prog.bool_type("_Bool", 1), ) def test_pointer(self): - self.assertTypeName(pointer_type(8, void_type()), "void *", True) - t = pointer_type(8, void_type(Qualifiers.VOLATILE)) + self.assertTypeName( + self.prog.pointer_type(self.prog.void_type()), "void *", True + ) + t = self.prog.pointer_type(self.prog.void_type(qualifiers=Qualifiers.VOLATILE)) self.assertTypeName(t, "volatile void *", True) - t = pointer_type(8, void_type(Qualifiers.VOLATILE), Qualifiers.CONST) + t = self.prog.pointer_type( + self.prog.void_type(qualifiers=Qualifiers.VOLATILE), + qualifiers=Qualifiers.CONST, + ) self.assertTypeName(t, "volatile void * const", True) - t = pointer_type(8, t) + t = self.prog.pointer_type(t) self.assertTypeName(t, "volatile void * const *", True) def test_array(self): - i = int_type("int", 4, True) - self.assertTypeName(array_type(None, i), "int []", True) - self.assertTypeName(array_type(2, i), "int [2]", True) - self.assertTypeName(array_type(2, array_type(3, i)), "int [2][3]", True) + i = self.prog.int_type("int", 4, True) + self.assertTypeName(self.prog.array_type(i), "int []", True) + self.assertTypeName(self.prog.array_type(i, 2), "int [2]", True) self.assertTypeName( - array_type(2, array_type(3, array_type(4, i))), "int [2][3][4]", True + self.prog.array_type(self.prog.array_type(i, 3), 2), "int [2][3]", True + ) + self.assertTypeName( + self.prog.array_type( + self.prog.array_type(self.prog.array_type(i, 4), 3), 2 + ), + "int [2][3][4]", + True, ) def test_array_of_pointers(self): self.assertTypeName( - array_type(2, array_type(3, pointer_type(8, int_type("int", 4, True)))), + self.prog.array_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 3 + ), + 2, + ), "int *[2][3]", True, ) def test_pointer_to_array(self): self.assertTypeName( - pointer_type(8, array_type(2, int_type("int", 4, True))), "int (*)[2]", True + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ), + "int (*)[2]", + True, ) def test_pointer_to_pointer_to_array(self): self.assertTypeName( - pointer_type(8, pointer_type(8, array_type(2, int_type("int", 4, True)))), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ) + ), "int (**)[2]", True, ) def test_pointer_to_array_of_pointers(self): self.assertTypeName( - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), + self.prog.pointer_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 2 + ) + ), "int *(*)[2]", True, ) def test_array_of_pointers_to_array(self): self.assertTypeName( - array_type(2, pointer_type(8, array_type(3, int_type("int", 4, True)))), + self.prog.array_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3) + ), + 2, + ), "int (*[2])[3]", True, ) def test_pointer_to_function(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i),), False)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False) + ), "int (*)(int)", True, ) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i, "x"),), False)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i, "x"),), False) + ), "int (*)(int x)", True, ) self.assertTypeName( - pointer_type( - 8, - function_type( - i, (TypeParameter(i), TypeParameter(float_type("float", 4),)), False + self.prog.pointer_type( + self.prog.function_type( + i, + ( + TypeParameter(i), + TypeParameter(self.prog.float_type("float", 4),), + ), + False, ), ), "int (*)(int, float)", @@ -179,19 +216,22 @@ def test_pointer_to_function(self): ) def test_pointer_to_function_returning_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, function_type(pointer_type(8, i), (TypeParameter(i),), False) + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), (TypeParameter(i),), False + ) ), "int *(*)(int)", True, ) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, i), (TypeParameter(pointer_type(8, i)),), False + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), + (TypeParameter(self.prog.pointer_type(i)),), + False, ), ), "int *(*)(int *)", @@ -199,12 +239,13 @@ def test_pointer_to_function_returning_pointer(self): ) def test_pointer_to_function_returning_pointer_to_const(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, int_type("int", 4, True, Qualifiers.CONST)), + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), (TypeParameter(i),), False, ), @@ -214,12 +255,13 @@ def test_pointer_to_function_returning_pointer_to_const(self): ) def test_pointer_to_function_returning_const_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, i, Qualifiers.CONST), (TypeParameter(i),), False + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i, qualifiers=Qualifiers.CONST), + (TypeParameter(i),), + False, ), ), "int * const (*)(int)", @@ -227,34 +269,38 @@ def test_pointer_to_function_returning_const_pointer(self): ) def test_const_pointer_to_function_returning_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type(pointer_type(8, i), (TypeParameter(i),), False), - Qualifiers.CONST, + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), (TypeParameter(i),), False + ), + qualifiers=Qualifiers.CONST, ), "int *(* const)(int)", True, ) def test_array_of_pointers_to_functions(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - array_type( - 4, pointer_type(8, function_type(i, (TypeParameter(i),), False)) + self.prog.array_type( + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False) + ), + 4, ), "int (*[4])(int)", True, ) def test_array_of_const_pointers_to_functions(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - array_type( - None, - pointer_type( - 8, function_type(i, (TypeParameter(i),), False), Qualifiers.CONST + self.prog.array_type( + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False), + qualifiers=Qualifiers.CONST, ), ), "int (* const [])(int)", @@ -262,40 +308,47 @@ def test_array_of_const_pointers_to_functions(self): ) def test_pointer_to_variadic_function(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i),), True)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), True) + ), "int (*)(int, ...)", True, ) def test_pointer_to_function_with_no_parameters(self): self.assertTypeName( - pointer_type(8, function_type(int_type("int", 4, True), (), False)), + self.prog.pointer_type( + self.prog.function_type(self.prog.int_type("int", 4, True), (), False) + ), "int (*)(void)", True, ) def test_pointer_to_function_with_no_parameter_specification(self): self.assertTypeName( - pointer_type(8, function_type(int_type("int", 4, True), (), True)), + self.prog.pointer_type( + self.prog.function_type(self.prog.int_type("int", 4, True), (), True) + ), "int (*)()", True, ) def test_function(self): self.assertTypeName( - function_type(int_type("int", 4, True), (), False), "int (void)" + self.prog.function_type(self.prog.int_type("int", 4, True), (), False), + "int (void)", ) -class TestPrettyPrintType(unittest.TestCase): +class TestPrettyPrintType(MockProgramTestCase): def assertPrettyPrint(self, type, expected): self.assertEqual(str(type), expected) def test_struct(self): self.assertPrettyPrint( - point_type, + self.point_type, """\ struct point { int x; @@ -303,13 +356,9 @@ def test_struct(self): }""", ) - line_segment = struct_type( - "line_segment", - 16, - (TypeMember(point_type, "a", 0), TypeMember(point_type, "b", 8)), - ) + def test_struct_member(self): self.assertPrettyPrint( - line_segment, + self.line_segment_type, """\ struct line_segment { struct point a; @@ -317,16 +366,16 @@ def test_struct(self): }""", ) - anonymous_point = struct_type( - None, - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 4), - ), - ) + def test_anonymous_struct(self): self.assertPrettyPrint( - anonymous_point, + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ), """\ struct { int x; @@ -334,15 +383,25 @@ def test_struct(self): }""", ) + def test_anonymous_struct_member(self): # Member with anonymous struct type. - line_segment = struct_type( - "line_segment", - 16, - (TypeMember(anonymous_point, "a", 0), TypeMember(anonymous_point, "b", 8),), + anonymous_struct = self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), ) - self.assertPrettyPrint( - line_segment, + self.prog.struct_type( + "line_segment", + 16, + ( + TypeMember(anonymous_struct, "a", 0), + TypeMember(anonymous_struct, "b", 64), + ), + ), """\ struct line_segment { struct { @@ -356,17 +415,25 @@ def test_struct(self): }""", ) - # Unnamed member. - point3 = struct_type( - "point3", - 0, - ( - TypeMember(anonymous_point, None, 0), - TypeMember(int_type("int", 4, True), "z", 8), - ), - ) + def test_struct_unnamed_member(self): self.assertPrettyPrint( - point3, + self.prog.struct_type( + "point3", + 0, + ( + TypeMember( + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), + ), + ), """\ struct point3 { struct { @@ -378,16 +445,15 @@ def test_struct(self): ) def test_bit_field(self): - point = struct_type( - "point", - 4, - ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 4, 8), - ), - ) self.assertPrettyPrint( - point, + self.prog.struct_type( + "point", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 4, 8), + ), + ), """\ struct point { int x : 4; @@ -396,16 +462,20 @@ def test_bit_field(self): ) def test_union(self): - t = union_type( - "foo", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(4, int_type("unsigned char", 1, False)), "a"), - ), - ) self.assertPrettyPrint( - t, + self.prog.union_type( + "foo", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember( + self.prog.array_type( + self.prog.int_type("unsigned char", 1, False), 4 + ), + "a", + ), + ), + ), """\ union foo { int i; @@ -413,17 +483,22 @@ def test_union(self): }""", ) - t = union_type( - "foo", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(4, int_type("unsigned char", 1, False)), "a"), - ), - Qualifiers.CONST, - ) + def test_union_qualified(self): self.assertPrettyPrint( - t, + self.prog.union_type( + "foo", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember( + self.prog.array_type( + self.prog.int_type("unsigned char", 1, False), 4 + ), + "a", + ), + ), + qualifiers=Qualifiers.CONST, + ), """\ const union foo { int i; @@ -433,7 +508,7 @@ def test_union(self): def test_class(self): self.assertPrettyPrint( - coord_type, + self.coord_type, """\ class coord { int x; @@ -443,17 +518,8 @@ class coord { ) def test_enum(self): - t = enum_type( - "color", - int_type("unsigned int", 4, False), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), - ) self.assertPrettyPrint( - t, + self.color_type, """\ enum color { RED = 0, @@ -462,18 +528,9 @@ def test_enum(self): }""", ) - t = enum_type( - "color", - int_type("unsigned int", 4, False), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), - Qualifiers.CONST, - ) + def test_enum_qualified(self): self.assertPrettyPrint( - t, + self.color_type.qualified(Qualifiers.CONST), """\ const enum color { RED = 0, @@ -482,17 +539,17 @@ def test_enum(self): }""", ) - t = enum_type( - None, - int_type("int", 4, True), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", -1), - TypeEnumerator("BLUE", -2), - ), - ) + def test_enum_anonymous(self): self.assertPrettyPrint( - t, + self.prog.enum_type( + None, + self.prog.int_type("int", 4, True), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", -1), + TypeEnumerator("BLUE", -2), + ), + ), """\ enum { RED = 0, @@ -503,34 +560,47 @@ def test_enum(self): def test_typedef(self): self.assertPrettyPrint( - typedef_type("INT", int_type("int", 4, True)), "typedef int INT" + self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)), + "typedef int INT", ) + + def test_typedef_const(self): self.assertPrettyPrint( - typedef_type("CINT", int_type("int", 4, True, Qualifiers.CONST)), + self.prog.typedef_type( + "CINT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), "typedef const int CINT", ) + + def test_const_typedef(self): self.assertPrettyPrint( - typedef_type("INT", int_type("int", 4, True), Qualifiers.CONST), + self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ), "const typedef int INT", ) + + def test_typedef_pointer(self): self.assertPrettyPrint( - typedef_type("string", pointer_type(8, int_type("char", 1, True))), + self.prog.typedef_type( + "string", self.prog.pointer_type(self.prog.int_type("char", 1, True)) + ), "typedef char *string", ) - t = typedef_type( - "Point", - struct_type( - None, - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 4), + def test_typedef_struct(self): + self.assertPrettyPrint( + self.prog.typedef_type( + "Point", + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), ), ), - ) - self.assertPrettyPrint( - t, """\ typedef struct { int x; @@ -538,9 +608,12 @@ def test_typedef(self): } Point""", ) - def test_function_typedef(self): + def test_typedef_function(self): self.assertPrettyPrint( - typedef_type("fn", function_type(int_type("int", 4, True), (), False)), + self.prog.typedef_type( + "fn", + self.prog.function_type(self.prog.int_type("int", 4, True), (), False), + ), "typedef int fn(void)", ) @@ -549,10 +622,16 @@ def test_function_no_name(self): ValueError, "function must have name", str, - struct_type( + self.prog.struct_type( "foo", 8, - (TypeMember(function_type(int_type("int", 4, True), (), False), None),), + ( + TypeMember( + self.prog.function_type( + self.prog.int_type("int", 4, True), (), False + ) + ), + ), ), ) diff --git a/tests/test_object.py b/tests/test_object.py index c07dce858..c3fb7550c 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -13,43 +13,21 @@ Type, TypeEnumerator, TypeMember, - array_type, cast, container_of, - enum_type, - float_type, - function_type, - int_type, - pointer_type, reinterpret, sizeof, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import ( - MockMemorySegment, - ObjectTestCase, - color_type, - coord_type, - line_segment_type, - mock_program, - option_type, - pid_type, - point_type, ) +from tests import MockMemorySegment, MockProgramTestCase, mock_program -class TestInit(ObjectTestCase): +class TestInit(MockProgramTestCase): def test_type_stays_alive(self): - obj = Object(self.prog, int_type("int", 4, True), value=0) - self.assertEqual(obj.type_, int_type("int", 4, True)) + obj = Object(self.prog, self.prog.int_type("int", 4, True), value=0) + self.assertEqual(obj.type_, self.prog.int_type("int", 4, True)) type_ = obj.type_ del obj - self.assertEqual(type_, int_type("int", 4, True)) - del self.prog - self.assertEqual(type_, int_type("int", 4, True)) + self.assertEqual(type_, self.prog.int_type("int", 4, True)) def test_type(self): self.assertRaisesRegex( @@ -132,16 +110,13 @@ def test_bit_offset(self): ) -class TestReference(ObjectTestCase): +class TestReference(MockProgramTestCase): def test_basic(self): - prog = mock_program( - segments=[ - MockMemorySegment((1000).to_bytes(4, "little"), virt_addr=0xFFFF0000), - ] - ) - obj = Object(prog, "int", address=0xFFFF0000) - self.assertIs(obj.prog_, prog) - self.assertEqual(obj.type_, prog.type("int")) + self.add_memory_segment((1000).to_bytes(4, "little"), virt_addr=0xFFFF0000) + + obj = Object(self.prog, "int", address=0xFFFF0000) + self.assertIs(obj.prog_, self.prog) + self.assertEqual(obj.type_, self.prog.type("int")) self.assertEqual(obj.address_, 0xFFFF0000) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -149,9 +124,9 @@ def test_basic(self): self.assertEqual(obj.value_(), 1000) self.assertEqual(repr(obj), "Object(prog, 'int', address=0xffff0000)") - self.assertEqual(obj.read_(), Object(prog, "int", value=1000)) + self.assertEqual(obj.read_(), Object(self.prog, "int", value=1000)) - obj = Object(prog, "int", address=0xFFFF0000, byteorder="big") + obj = Object(self.prog, "int", address=0xFFFF0000, byteorder="big") self.assertEqual(obj.byteorder_, "big") self.assertEqual(obj.value_(), -402456576) self.assertEqual( @@ -159,7 +134,7 @@ def test_basic(self): ) self.assertEqual(sizeof(obj), 4) - obj = Object(prog, "unsigned int", address=0xFFFF0000, bit_field_size=4) + obj = Object(self.prog, "unsigned int", address=0xFFFF0000, bit_field_size=4) self.assertEqual(obj.bit_offset_, 0) self.assertEqual(obj.bit_field_size_, 4) self.assertEqual(obj.value_(), 8) @@ -170,7 +145,11 @@ def test_basic(self): self.assertRaises(TypeError, sizeof, obj) obj = Object( - prog, "unsigned int", address=0xFFFF0000, bit_field_size=4, bit_offset=4 + self.prog, + "unsigned int", + address=0xFFFF0000, + bit_field_size=4, + bit_offset=4, ) self.assertEqual(obj.bit_offset_, 4) self.assertEqual(obj.bit_field_size_, 4) @@ -245,33 +224,32 @@ def test_read_float(self): self.assertEqual(obj.value_(), expected) def test_struct(self): - segment = ( - (99).to_bytes(4, "little") - + (-1).to_bytes(4, "little", signed=True) - + (12345).to_bytes(4, "little") - + (0).to_bytes(4, "little") - ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], + self.add_memory_segment( + ( + (99).to_bytes(4, "little") + + (-1).to_bytes(4, "little", signed=True) + + (12345).to_bytes(4, "little") + + (0).to_bytes(4, "little") + ), + virt_addr=0xFFFF0000, ) - - obj = Object(prog, "struct point", address=0xFFFF0000) + self.types.append(self.point_type) + obj = Object(self.prog, "struct point", address=0xFFFF0000) self.assertEqual(obj.value_(), {"x": 99, "y": -1}) self.assertEqual(sizeof(obj), 8) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(point_type, "point"), + TypeMember(self.point_type, "point"), TypeMember( - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "bar"), - TypeMember(int_type("int", 4, True), "baz", 32), + TypeMember(self.prog.int_type("int", 4, True), "bar"), + TypeMember(self.prog.int_type("int", 4, True), "baz", 32), ), ), None, @@ -279,7 +257,7 @@ def test_struct(self): ), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( obj.value_(), {"point": {"x": 99, "y": -1}, "bar": 12345, "baz": 0} ) @@ -288,24 +266,21 @@ def test_array(self): segment = bytearray() for i in range(10): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - obj = Object(prog, "int [5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [5]", address=0xFFFF0000) self.assertEqual(obj.value_(), [0, 1, 2, 3, 4]) self.assertEqual(sizeof(obj), 20) - obj = Object(prog, "int [2][5]", address=0xFFFF0000) + obj = Object(self.prog, "int [2][5]", address=0xFFFF0000) self.assertEqual(obj.value_(), [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) - obj = Object(prog, "int [2][2][2]", address=0xFFFF0000) + obj = Object(self.prog, "int [2][2][2]", address=0xFFFF0000) self.assertEqual(obj.value_(), [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) def test_void(self): - obj = Object(self.prog, void_type(), address=0) + obj = Object(self.prog, self.prog.void_type(), address=0) self.assertIs(obj.prog_, self.prog) - self.assertEqual(obj.type_, void_type()) + self.assertEqual(obj.type_, self.prog.void_type()) self.assertEqual(obj.address_, 0) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -319,9 +294,15 @@ def test_void(self): self.assertRaises(TypeError, sizeof, obj) def test_function(self): - obj = Object(self.prog, function_type(void_type(), (), False), address=0) + obj = Object( + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0, + ) self.assertIs(obj.prog_, self.prog) - self.assertEqual(obj.type_, function_type(void_type(), (), False)) + self.assertEqual( + obj.type_, self.prog.function_type(self.prog.void_type(), (), False) + ) self.assertEqual(obj.address_, 0) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -337,7 +318,7 @@ def test_function(self): def test_incomplete(self): # It's valid to create references with incomplete type, but not to read # from them. - obj = Object(self.prog, struct_type("foo"), address=0) + obj = Object(self.prog, self.prog.struct_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete structure type", obj.value_ ) @@ -346,7 +327,7 @@ def test_incomplete(self): ) self.assertRaises(TypeError, sizeof, obj) - obj = Object(self.prog, union_type("foo"), address=0) + obj = Object(self.prog, self.prog.union_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete union type", obj.value_ ) @@ -354,7 +335,7 @@ def test_incomplete(self): TypeError, "cannot read object with incomplete union type", obj.read_ ) - obj = Object(self.prog, enum_type("foo"), address=0) + obj = Object(self.prog, self.prog.enum_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete enumerated type", obj.value_ ) @@ -362,7 +343,11 @@ def test_incomplete(self): TypeError, "cannot read object with incomplete enumerated type", obj.read_ ) - obj = Object(self.prog, array_type(None, int_type("int", 4, True)), address=0) + obj = Object( + self.prog, + self.prog.array_type(self.prog.int_type("int", 4, True)), + address=0, + ) self.assertRaisesRegex( TypeError, "cannot read object with incomplete array type", obj.value_ ) @@ -371,7 +356,7 @@ def test_incomplete(self): ) -class TestValue(ObjectTestCase): +class TestValue(MockProgramTestCase): def test_positional(self): self.assertEqual(Object(self.prog, "int", 1), Object(self.prog, "int", value=1)) @@ -495,73 +480,90 @@ def test_float(self): ) def test_enum(self): - self.assertEqual(Object(self.prog, color_type, value=0).value_(), 0) + self.assertEqual(Object(self.prog, self.color_type, value=0).value_(), 0) - def test_incomplete(self): + def test_incomplete_struct(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete structure type", Object, self.prog, - struct_type("foo"), + self.prog.struct_type("foo"), value={}, ) + def test_incomplete_union(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete union type", Object, self.prog, - union_type("foo"), + self.prog.union_type("foo"), value={}, ) + def test_incomplete_class(self): + self.assertRaisesRegex( + TypeError, + "cannot create object with incomplete class type", + Object, + self.prog, + self.prog.class_type("foo"), + value={}, + ) + + def test_incomplete_enum(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete enumerated type", Object, self.prog, - enum_type("foo"), + self.prog.enum_type("foo"), value=0, ) + def test_incomplete_array(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete array type", Object, self.prog, - array_type(None, int_type("int", 4, True)), + self.prog.array_type(self.prog.int_type("int", 4, True)), value=[], ) def test_compound(self): - obj = Object(self.prog, point_type, value={"x": 100, "y": -5}) + obj = Object(self.prog, self.point_type, value={"x": 100, "y": -5}) self.assertEqual(obj.x, Object(self.prog, "int", value=100)) self.assertEqual(obj.y, Object(self.prog, "int", value=-5)) self.assertEqual( - Object(self.prog, point_type, value={}), - Object(self.prog, point_type, value={"x": 0, "y": 0}), + Object(self.prog, self.point_type, value={}), + Object(self.prog, self.point_type, value={"x": 0, "y": 0}), ) value = { "a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}, } - obj = Object(self.prog, line_segment_type, value=value) - self.assertEqual(obj.a, Object(self.prog, point_type, value={"x": 1, "y": 2})) - self.assertEqual(obj.b, Object(self.prog, point_type, value={"x": 3, "y": 4})) + obj = Object(self.prog, self.line_segment_type, value=value) + self.assertEqual( + obj.a, Object(self.prog, self.point_type, value={"x": 1, "y": 2}) + ) + self.assertEqual( + obj.b, Object(self.prog, self.point_type, value={"x": 3, "y": 4}) + ) self.assertEqual(obj.value_(), value) - invalid_struct = struct_type( + invalid_struct = self.prog.struct_type( "foo", 4, ( - TypeMember(int_type("short", 2, True), "a"), + TypeMember(self.prog.int_type("short", 2, True), "a"), # Straddles the end of the structure. - TypeMember(int_type("int", 4, True), "b", 16), + TypeMember(self.prog.int_type("int", 4, True), "b", 16), # Beyond the end of the structure. - TypeMember(int_type("int", 4, True), "c", 32), + TypeMember(self.prog.int_type("int", 4, True), "c", 32), ), ) @@ -588,7 +590,7 @@ def test_compound(self): "must be dictionary or mapping", Object, self.prog, - point_type, + self.point_type, value=1, ) self.assertRaisesRegex( @@ -596,18 +598,23 @@ def test_compound(self): "member key must be string", Object, self.prog, - point_type, + self.point_type, value={0: 0}, ) self.assertRaisesRegex( - TypeError, "must be number", Object, self.prog, point_type, value={"x": []} + TypeError, + "must be number", + Object, + self.prog, + self.point_type, + value={"x": []}, ) self.assertRaisesRegex( LookupError, "has no member 'z'", Object, self.prog, - point_type, + self.point_type, value={"z": 999}, ) @@ -617,8 +624,11 @@ def test_pointer(self): self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'int *', value=0xffff0000)") + def test_pointer_typedef(self): obj = Object( - self.prog, typedef_type("INTP", self.prog.type("int *")), value=0xFFFF0000 + self.prog, + self.prog.typedef_type("INTP", self.prog.type("int *")), + value=0xFFFF0000, ) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) @@ -647,7 +657,7 @@ def test_array(self): ) -class TestConversions(ObjectTestCase): +class TestConversions(MockProgramTestCase): def test_bool(self): self.assertTrue(Object(self.prog, "int", value=-1)) self.assertFalse(Object(self.prog, "int", value=0)) @@ -667,7 +677,7 @@ def test_bool(self): TypeError, "cannot convert 'struct point' to bool", bool, - Object(self.prog, point_type, address=0), + Object(self.prog, self.point_type, address=0), ) def test_int(self): @@ -720,7 +730,7 @@ def test_index(self): ) -class TestInvalidBitField(ObjectTestCase): +class TestInvalidBitField(MockProgramTestCase): def test_integer(self): self.assertRaisesRegex( ValueError, @@ -785,7 +795,7 @@ def test_reference(self): "bit field must be integer", Object, self.prog, - point_type, + self.point_type, address=0, bit_field_size=4, ) @@ -794,20 +804,22 @@ def test_reference(self): "bit field must be integer", Object, self.prog, - point_type, + self.point_type, value={}, bit_field_size=4, ) def test_member(self): - type_ = struct_type("foo", 8, (TypeMember(point_type, "p", 0, 4),)) + type_ = self.prog.struct_type( + "foo", 8, (TypeMember(self.point_type, "p", 0, 4),) + ) obj = Object(self.prog, type_, address=0) self.assertRaisesRegex( ValueError, "bit field must be integer", obj.member_, "p" ) -class TestCLiteral(ObjectTestCase): +class TestCLiteral(MockProgramTestCase): def test_int(self): self.assertEqual(Object(self.prog, value=1), Object(self.prog, "int", value=1)) self.assertEqual( @@ -863,7 +875,7 @@ class Foo: ) -class TestCIntegerPromotion(ObjectTestCase): +class TestCIntegerPromotion(MockProgramTestCase): def test_conversion_rank_less_than_int(self): self.assertEqual(+self.bool(False), self.int(0)) @@ -890,11 +902,11 @@ def test_conversion_rank_less_than_int(self): # If short is the same size as int, then int can't represent all of the # values of unsigned short. self.assertEqual( - +Object(self.prog, int_type("short", 4, True), value=1), + +Object(self.prog, self.prog.int_type("short", 4, True), value=1), Object(self.prog, "int", value=1), ) self.assertEqual( - +Object(self.prog, int_type("unsigned short", 4, False), value=2), + +Object(self.prog, self.prog.int_type("unsigned short", 4, False), value=2), Object(self.prog, "unsigned int", value=2), ) @@ -930,20 +942,20 @@ def test_conversion_rank_greater_than_int(self): def test_extended_integer(self): self.assertEqual( - +Object(self.prog, int_type("byte", 1, True), value=1), + +Object(self.prog, self.prog.int_type("byte", 1, True), value=1), Object(self.prog, "int", value=1), ) self.assertEqual( - +Object(self.prog, int_type("ubyte", 1, False), value=-1), + +Object(self.prog, self.prog.int_type("ubyte", 1, False), value=-1), Object(self.prog, "int", value=0xFF), ) self.assertEqual( - +Object(self.prog, int_type("qword", 8, True), value=1), - Object(self.prog, int_type("qword", 8, True), value=1), + +Object(self.prog, self.prog.int_type("qword", 8, True), value=1), + Object(self.prog, self.prog.int_type("qword", 8, True), value=1), ) self.assertEqual( - +Object(self.prog, int_type("qword", 8, False), value=1), - Object(self.prog, int_type("qword", 8, False), value=1), + +Object(self.prog, self.prog.int_type("qword", 8, False), value=1), + Object(self.prog, self.prog.int_type("qword", 8, False), value=1), ) def test_bit_field(self): @@ -996,11 +1008,11 @@ def test_bit_field(self): def test_enum(self): # Enums should be converted to their compatible type and then promoted. self.assertEqual( - +Object(self.prog, color_type, value=1), + +Object(self.prog, self.color_type, value=1), Object(self.prog, "unsigned int", value=1), ) - type_ = enum_type( + type_ = self.prog.enum_type( "color", self.prog.type("unsigned long long"), ( @@ -1014,7 +1026,7 @@ def test_enum(self): Object(self.prog, "unsigned long long", value=1), ) - type_ = enum_type( + type_ = self.prog.enum_type( "color", self.prog.type("char"), ( @@ -1028,13 +1040,13 @@ def test_enum(self): ) def test_typedef(self): - type_ = typedef_type("SHORT", self.prog.type("short")) + type_ = self.prog.typedef_type("SHORT", self.prog.type("short")) self.assertEqual( +Object(self.prog, type_, value=5), Object(self.prog, "int", value=5) ) # Typedef should be preserved if the type wasn't promoted. - type_ = typedef_type("self.int", self.prog.type("int")) + type_ = self.prog.typedef_type("self.int", self.prog.type("int")) self.assertEqual( +Object(self.prog, type_, value=5), Object(self.prog, type_, value=5) ) @@ -1047,7 +1059,7 @@ def test_non_integer(self): ) -class TestCCommonRealType(ObjectTestCase): +class TestCCommonRealType(MockProgramTestCase): def assertCommonRealType(self, lhs, rhs, expected, commutative=True): if isinstance(lhs, (str, Type)): obj1 = Object(self.prog, lhs, value=1) @@ -1076,7 +1088,7 @@ def test_float(self): self.assertCommonRealType("double", "double", "double") # Floating type not in the standard. - float64 = float_type("float64", 8) + float64 = self.prog.float_type("float64", 8) self.assertCommonRealType(float64, "long long", float64) self.assertCommonRealType(float64, "float", float64) self.assertCommonRealType(float64, "double", float64) @@ -1127,8 +1139,8 @@ def test_same_sign(self): "unsigned long long", "unsigned long", "unsigned long long" ) - int64 = int_type("int64", 8, True) - qword = int_type("qword", 8, True) + int64 = self.prog.int_type("int64", 8, True) + qword = self.prog.int_type("qword", 8, True) self.assertCommonRealType("long", int64, "long") self.assertCommonRealType(int64, qword, qword, commutative=False) self.assertCommonRealType(qword, int64, int64, commutative=False) @@ -1139,8 +1151,8 @@ def test_unsigned_greater_rank(self): self.assertCommonRealType("unsigned long long", "long", "unsigned long long") self.assertCommonRealType("unsigned long long", "int", "unsigned long long") - int64 = int_type("int64", 8, True) - uint64 = int_type("uint64", 8, False) + int64 = self.prog.int_type("int64", 8, True) + uint64 = self.prog.int_type("uint64", 8, False) self.assertCommonRealType(uint64, "int", uint64) self.assertCommonRealType("unsigned long", int64, "unsigned long") @@ -1148,8 +1160,8 @@ def test_signed_can_represent_unsigned(self): self.assertCommonRealType("long", "unsigned int", "long") self.assertCommonRealType("long long", "unsigned int", "long long") - int64 = int_type("int64", 8, True) - weirduint = int_type("weirduint", 6, False) + int64 = self.prog.int_type("int64", 8, True) + weirduint = self.prog.int_type("weirduint", 6, False) self.assertCommonRealType(int64, "unsigned int", int64) self.assertCommonRealType("long", weirduint, "long") @@ -1158,19 +1170,19 @@ def test_corresponding_unsigned(self): self.assertCommonRealType("long long", "unsigned long", "unsigned long long") def test_enum(self): - self.assertCommonRealType(color_type, color_type, "unsigned int") + self.assertCommonRealType(self.color_type, self.color_type, "unsigned int") def test_typedef(self): - type_ = typedef_type("INT", self.prog.type("int")) + type_ = self.prog.typedef_type("INT", self.prog.type("int")) self.assertCommonRealType(type_, type_, type_) self.assertCommonRealType("int", type_, type_, commutative=False) self.assertCommonRealType(type_, "int", "int", commutative=False) - type_ = typedef_type("LONG", self.prog.type("long")) + type_ = self.prog.typedef_type("LONG", self.prog.type("long")) self.assertCommonRealType(type_, "int", type_) -class TestCOperators(ObjectTestCase): +class TestCOperators(MockProgramTestCase): def test_cast_array(self): obj = Object(self.prog, "int []", address=0xFFFF0000) self.assertEqual( @@ -1189,7 +1201,9 @@ def test_cast_array(self): def test_cast_function(self): func = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertEqual( cast("void *", func), Object(self.prog, "void *", value=0xFFFF0000) @@ -1315,7 +1329,9 @@ def test_ptr_relational(self): self.assertRaises(TypeError, operator.lt, ptr0, self.int(1)) func = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertTrue(func == func) self.assertTrue(func == ptr0) @@ -1331,7 +1347,9 @@ def test_ptr_relational(self): self.assertRaises( TypeError, operator.eq, - Object(self.prog, struct_type("foo", None, None), address=0xFFFF0000), + Object( + self.prog, self.prog.struct_type("foo", None, None), address=0xFFFF0000 + ), ptr0, ) @@ -1519,39 +1537,51 @@ def test_not(self): def test_container_of(self): obj = Object(self.prog, "int *", value=0xFFFF000C) - container_of(obj, point_type, "x") + container_of(obj, self.point_type, "x") self.assertEqual( - container_of(obj, point_type, "x"), - Object(self.prog, pointer_type(8, point_type), value=0xFFFF000C), + container_of(obj, self.point_type, "x"), + Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF000C + ), ) self.assertEqual( - container_of(obj, point_type, "y"), - Object(self.prog, pointer_type(8, point_type), value=0xFFFF0008), + container_of(obj, self.point_type, "y"), + Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF0008 + ), ) self.assertEqual( - container_of(obj, line_segment_type, "a.x"), - Object(self.prog, pointer_type(8, line_segment_type), value=0xFFFF000C), + container_of(obj, self.line_segment_type, "a.x"), + Object( + self.prog, + self.prog.pointer_type(self.line_segment_type), + value=0xFFFF000C, + ), ) self.assertEqual( - container_of(obj, line_segment_type, "b.x"), - Object(self.prog, pointer_type(8, line_segment_type), value=0xFFFF0004), + container_of(obj, self.line_segment_type, "b.x"), + Object( + self.prog, + self.prog.pointer_type(self.line_segment_type), + value=0xFFFF0004, + ), ) - polygon_type = struct_type( - "polygon", 0, (TypeMember(array_type(None, point_type), "points"),) + polygon_type = self.prog.struct_type( + "polygon", 0, (TypeMember(self.prog.array_type(self.point_type), "points"),) ) self.assertEqual( container_of(obj, polygon_type, "points[3].x"), - Object(self.prog, pointer_type(8, polygon_type), value=0xFFFEFFF4), + Object(self.prog, self.prog.pointer_type(polygon_type), value=0xFFFEFFF4), ) - small_point_type = struct_type( + small_point_type = self.prog.struct_type( "small_point", 1, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 4, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 4, 4), ), ) self.assertRaisesRegex( @@ -1568,7 +1598,7 @@ def test_container_of(self): r"container_of\(\) argument must be a pointer", container_of, obj[0], - point_type, + self.point_type, "x", ) @@ -1581,12 +1611,14 @@ def test_container_of(self): "x", ), - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(array_type(8, int_type("int", 4, True)), "arr"), - TypeMember(point_type, "point", 256), + TypeMember( + self.prog.array_type(self.prog.int_type("int", 4, True), 8), "arr" + ), + TypeMember(self.point_type, "point", 256), ), ) syntax_errors = [ @@ -1604,7 +1636,7 @@ def test_container_of(self): ) -class TestCPretty(ObjectTestCase): +class TestCPretty(MockProgramTestCase): def test_int(self): obj = Object(self.prog, "int", value=99) self.assertEqual(str(obj), "(int)99") @@ -1628,7 +1660,7 @@ def test_char(self): self.assertEqual( Object( self.prog, - typedef_type("uint8_t", self.prog.type("unsigned char")), + self.prog.typedef_type("uint8_t", self.prog.type("unsigned char")), value=65, ).format_(char=True), "(uint8_t)65", @@ -1645,13 +1677,17 @@ def test_float(self): self.assertEqual(str(Object(self.prog, "float", value=0.5)), "(float)0.5") def test_typedef(self): - type_ = typedef_type("INT", int_type("int", 4, True)) + type_ = self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) self.assertEqual(str(Object(self.prog, type_, value=99)), "(INT)99") - type_ = typedef_type("INT", int_type("int", 4, True), Qualifiers.CONST) + type_ = self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ) self.assertEqual(str(Object(self.prog, type_, value=99)), "(const INT)99") - type_ = typedef_type("CINT", int_type("int", 4, True, Qualifiers.CONST)) + type_ = self.prog.typedef_type( + "CINT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ) self.assertEqual(str(Object(self.prog, type_, value=99)), "(CINT)99") def test_struct(self): @@ -1661,12 +1697,10 @@ def test_struct(self): + (12345).to_bytes(4, "little", signed=True) + (0).to_bytes(4, "little", signed=True) ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) - obj = Object(prog, "struct point", address=0xFFFF0000) + obj = Object(self.prog, "struct point", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -1700,18 +1734,18 @@ def test_struct(self): "(struct point){ (int)99, (int)-1 }", ) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(point_type, "point"), + TypeMember(self.point_type, "point"), TypeMember( - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "bar"), - TypeMember(int_type("int", 4, True), "baz", 32), + TypeMember(self.prog.int_type("int", 4, True), "bar"), + TypeMember(self.prog.int_type("int", 4, True), "baz", 32), ), ), None, @@ -1719,7 +1753,7 @@ def test_struct(self): ), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) expected = """\ (struct foo){ .point = (struct point){ @@ -1732,36 +1766,36 @@ def test_struct(self): self.assertEqual(str(obj), expected) self.assertEqual(str(obj.read_()), expected) - segment = ( - (99).to_bytes(8, "little") - + (-1).to_bytes(8, "little", signed=True) - + (12345).to_bytes(8, "little", signed=True) - + (0).to_bytes(8, "little", signed=True) - ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] + self.add_memory_segment( + ( + (99).to_bytes(8, "little") + + (-1).to_bytes(8, "little", signed=True) + + (12345).to_bytes(8, "little", signed=True) + + (0).to_bytes(8, "little", signed=True) + ), + virt_addr=0xFFFF8000, ) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 32, ( TypeMember( - struct_type( + self.prog.struct_type( "long_point", 16, ( - TypeMember(int_type("long", 8, True), "x"), - TypeMember(int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "x"), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), ), ), "point", ), - TypeMember(int_type("long", 8, True), "bar", 128), - TypeMember(int_type("long", 8, True), "baz", 192), + TypeMember(self.prog.int_type("long", 8, True), "bar", 128), + TypeMember(self.prog.int_type("long", 8, True), "baz", 192), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF8000) expected = """\ (struct foo){ .point = (struct long_point){ @@ -1774,10 +1808,10 @@ def test_struct(self): self.assertEqual(str(obj), expected) self.assertEqual(str(obj.read_()), expected) - type_ = struct_type("foo", 0, ()) - self.assertEqual(str(Object(prog, type_, address=0)), "(struct foo){}") + type_ = self.prog.struct_type("foo", 0, ()) + self.assertEqual(str(Object(self.prog, type_, address=0)), "(struct foo){}") - obj = Object(prog, point_type, value={"x": 1}) + obj = Object(self.prog, self.point_type, value={"x": 1}) self.assertEqual( obj.format_(implicit_members=False), """\ @@ -1792,7 +1826,7 @@ def test_struct(self): (int)1, }""", ) - obj = Object(prog, point_type, value={"y": 1}) + obj = Object(self.prog, self.point_type, value={"y": 1}) self.assertEqual( obj.format_(implicit_members=False), """\ @@ -1810,22 +1844,23 @@ def test_struct(self): ) def test_bit_field(self): - segment = b"\x07\x10\x5e\x5f\x1f\0\0\0" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - type_ = struct_type( + self.add_memory_segment(b"\x07\x10\x5e\x5f\x1f\0\0\0", virt_addr=0xFFFF0000) + type_ = self.prog.struct_type( "bits", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True, Qualifiers.CONST), "y", 4, 28), - TypeMember(int_type("int", 4, True), "z", 32, 5), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + "y", + 4, + 28, + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 32, 5), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -1841,13 +1876,10 @@ def test_bit_field(self): self.assertEqual(str(obj.z), "(int)-1") def test_union(self): - segment = b"\0\0\x80?" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[option_type], - ) + self.add_memory_segment(b"\0\0\x80?", virt_addr=0xFFFF0000) + self.types.append(self.option_type) self.assertEqual( - str(Object(prog, "union option", address=0xFFFF0000)), + str(Object(self.prog, "union option", address=0xFFFF0000)), """\ (union option){ .i = (int)1065353216, @@ -1856,71 +1888,62 @@ def test_union(self): ) def test_enum(self): - self.assertEqual(str(Object(self.prog, color_type, value=0)), "(enum color)RED") self.assertEqual( - str(Object(self.prog, color_type, value=1)), "(enum color)GREEN" + str(Object(self.prog, self.color_type, value=0)), "(enum color)RED" + ) + self.assertEqual( + str(Object(self.prog, self.color_type, value=1)), "(enum color)GREEN" + ) + self.assertEqual( + str(Object(self.prog, self.color_type, value=4)), "(enum color)4" ) - self.assertEqual(str(Object(self.prog, color_type, value=4)), "(enum color)4") - obj = Object(self.prog, enum_type("color"), address=0) + obj = Object(self.prog, self.prog.enum_type("color"), address=0) self.assertRaisesRegex(TypeError, "cannot format incomplete enum", str, obj) def test_pointer(self): - prog = mock_program( - segments=[ - MockMemorySegment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000), - ] - ) - obj = Object(prog, "int *", value=0xFFFF0000) + self.add_memory_segment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000) + obj = Object(self.prog, "int *", value=0xFFFF0000) self.assertEqual(str(obj), "*(int *)0xffff0000 = 99") self.assertEqual(obj.format_(dereference=False), "(int *)0xffff0000") self.assertEqual( - str(Object(prog, "int *", value=0x7FFFFFFF)), "(int *)0x7fffffff" + str(Object(self.prog, "int *", value=0x7FFFFFFF)), "(int *)0x7fffffff" ) def test_void_pointer(self): - prog = mock_program( - segments=[ - MockMemorySegment((99).to_bytes(8, "little"), virt_addr=0xFFFF0000), - ] - ) + self.add_memory_segment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000) self.assertEqual( - str(Object(prog, "void *", value=0xFFFF0000)), "(void *)0xffff0000" + str(Object(self.prog, "void *", value=0xFFFF0000)), "(void *)0xffff0000" ) def test_pointer_typedef(self): - prog = mock_program( - segments=[ - MockMemorySegment( - (0xFFFF00F0).to_bytes(8, "little"), virt_addr=0xFFFF0000 - ), - ] + self.add_memory_segment( + (0xFFFF00F0).to_bytes(8, "little"), virt_addr=0xFFFF0000 + ) + type_ = self.prog.typedef_type( + "HANDLE", + self.prog.pointer_type(self.prog.pointer_type(self.prog.void_type())), ) - type_ = typedef_type("HANDLE", pointer_type(8, pointer_type(8, void_type()))) self.assertEqual( - str(Object(prog, type_, value=0xFFFF0000)), + str(Object(self.prog, type_, value=0xFFFF0000)), "*(HANDLE)0xffff0000 = 0xffff00f0", ) # TODO: test symbolize. def test_c_string(self): - prog = mock_program( - segments=[ - MockMemorySegment(b"hello\0", virt_addr=0xFFFF0000), - MockMemorySegment(b"unterminated", virt_addr=0xFFFF0010), - MockMemorySegment(b'"escape\tme\\\0', virt_addr=0xFFFF0020), - ] - ) + self.add_memory_segment(b"hello\0", virt_addr=0xFFFF0000) + self.add_memory_segment(b"unterminated", virt_addr=0xFFFF0010) + self.add_memory_segment(b'"escape\tme\\\0', virt_addr=0xFFFF0020) - obj = Object(prog, "char *", value=0xFFFF0000) + obj = Object(self.prog, "char *", value=0xFFFF0000) self.assertEqual(str(obj), '(char *)0xffff0000 = "hello"') self.assertEqual(obj.format_(string=False), "*(char *)0xffff0000 = 104") - self.assertEqual(str(Object(prog, "char *", value=0x0)), "(char *)0x0") + self.assertEqual(str(Object(self.prog, "char *", value=0x0)), "(char *)0x0") self.assertEqual( - str(Object(prog, "char *", value=0xFFFF0010)), "(char *)0xffff0010" + str(Object(self.prog, "char *", value=0xFFFF0010)), "(char *)0xffff0010" ) self.assertEqual( - str(Object(prog, "char *", value=0xFFFF0020)), + str(Object(self.prog, "char *", value=0xFFFF0020)), r'(char *)0xffff0020 = "\"escape\tme\\"', ) @@ -1928,10 +1951,8 @@ def test_basic_array(self): segment = bytearray() for i in range(5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - obj = Object(prog, "int [5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [5]", address=0xFFFF0000) self.assertEqual(str(obj), "(int [5]){ 0, 1, 2, 3, 4 }") self.assertEqual( @@ -2012,10 +2033,8 @@ def test_nested_array(self): segment = bytearray() for i in range(10): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - obj = Object(prog, "int [2][5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [2][5]", address=0xFFFF0000) self.assertEqual( str(obj), "(int [2][5]){ { 0, 1, 2, 3, 4 }, { 5, 6, 7, 8, 9 } }" @@ -2096,14 +2115,18 @@ def test_array_member(self): segment = bytearray() for i in range(5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) - type_ = struct_type( - None, 20, (TypeMember(array_type(5, int_type("int", 4, True)), "arr"),) + type_ = self.prog.struct_type( + None, + 20, + ( + TypeMember( + self.prog.array_type(self.prog.int_type("int", 4, True), 5), "arr" + ), + ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( str(obj), @@ -2142,12 +2165,10 @@ def test_array_of_struct(self): segment = bytearray() for i in range(1, 5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) - obj = Object(prog, "struct point [2]", address=0xFFFF0000) + obj = Object(self.prog, "struct point [2]", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -2169,12 +2190,11 @@ def test_zero_length_array(self): def test_array_zeroes(self): segment = bytearray(16) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type, struct_type("empty", 0, ()),], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) + self.types.append(self.prog.struct_type("empty", 0, ())) - obj = Object(prog, "int [2]", address=0xFFFF0000) + obj = Object(self.prog, "int [2]", address=0xFFFF0000) self.assertEqual(str(obj), "(int [2]){}") self.assertEqual(obj.format_(implicit_elements=True), "(int [2]){ 0, 0 }") segment[:4] = (99).to_bytes(4, "little") @@ -2183,7 +2203,7 @@ def test_array_zeroes(self): segment[4:8] = (99).to_bytes(4, "little") self.assertEqual(str(obj), "(int [2]){ 0, 99 }") - obj = Object(prog, "struct point [2]", address=0xFFFF0000) + obj = Object(self.prog, "struct point [2]", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -2195,16 +2215,14 @@ def test_array_zeroes(self): }""", ) - obj = Object(prog, "struct empty [2]", address=0) + obj = Object(self.prog, "struct empty [2]", address=0) self.assertEqual(str(obj), "(struct empty [2]){}") def test_char_array(self): segment = bytearray(16) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) - obj = Object(prog, "char [4]", address=0xFFFF0000) + obj = Object(self.prog, "char [4]", address=0xFFFF0000) segment[:16] = b"hello, world\0\0\0\0" self.assertEqual(str(obj), '(char [4])"hell"') self.assertEqual(obj.format_(string=False), "(char [4]){ 104, 101, 108, 108 }") @@ -2214,29 +2232,26 @@ def test_char_array(self): self.assertEqual(str(obj.read_()), str(obj)) self.assertEqual( - str(Object(prog, "char [0]", address=0xFFFF0000)), "(char [0]){}" + str(Object(self.prog, "char [0]", address=0xFFFF0000)), "(char [0]){}" ) self.assertEqual( - str(Object(prog, "char []", address=0xFFFF0000)), "(char []){}" + str(Object(self.prog, "char []", address=0xFFFF0000)), "(char []){}" ) def test_function(self): obj = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertEqual(str(obj), "(void (void))0xffff0000") -class TestGenericOperators(ObjectTestCase): +class TestGenericOperators(MockProgramTestCase): def setUp(self): super().setUp() - self.prog = mock_program( - segments=[ - MockMemorySegment( - b"".join(i.to_bytes(4, "little") for i in range(4)), - virt_addr=0xFFFF0000, - ), - ] + self.add_memory_segment( + b"".join(i.to_bytes(4, "little") for i in range(4)), virt_addr=0xFFFF0000, ) def test_len(self): @@ -2303,14 +2318,14 @@ def test_cast_primitive_value(self): TypeError, "cannot convert 'int' to 'struct point'", cast, - point_type, + self.point_type, Object(self.prog, "int", value=1), ) def test_cast_compound_value(self): - obj = Object(self.prog, point_type, address=0xFFFF0000).read_() - self.assertEqual(cast(point_type, obj), obj) - const_point_type = point_type.qualified(Qualifiers.CONST) + obj = Object(self.prog, self.point_type, address=0xFFFF0000).read_() + self.assertEqual(cast(self.point_type, obj), obj) + const_point_type = self.point_type.qualified(Qualifiers.CONST) self.assertEqual( cast(const_point_type, obj), Object(self.prog, const_point_type, address=0xFFFF0000).read_(), @@ -2319,7 +2334,7 @@ def test_cast_compound_value(self): TypeError, "cannot convert 'struct point' to 'enum color'", cast, - color_type, + self.color_type, obj, ) @@ -2342,39 +2357,43 @@ def test_reinterpret_reference(self): ) def test_reinterpret_value(self): - segment = (1).to_bytes(4, "little") + (2).to_bytes(4, "little") - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[ - point_type, - struct_type( - "foo", 8, (TypeMember(int_type("long", 8, True), "counter"),) - ), - ], + self.types.append(self.point_type) + self.types.append( + self.prog.struct_type( + "foo", 8, (TypeMember(self.prog.int_type("long", 8, True), "counter"),) + ), ) - obj = Object(prog, "struct point", address=0xFFFF0000).read_() + obj = Object(self.prog, "struct point", address=0xFFFF0008).read_() self.assertEqual( reinterpret("struct foo", obj), - Object(prog, "struct foo", address=0xFFFF0000).read_(), + Object(self.prog, "struct foo", address=0xFFFF0008).read_(), ) self.assertEqual( reinterpret(obj.type_, obj, byteorder="big"), - Object(prog, "struct point", address=0xFFFF0000, byteorder="big").read_(), + Object( + self.prog, "struct point", address=0xFFFF0008, byteorder="big" + ).read_(), ) - self.assertEqual(reinterpret("int", obj), Object(prog, "int", value=1)) + self.assertEqual(reinterpret("int", obj), Object(self.prog, "int", value=2)) def test_member(self): - reference = Object(self.prog, point_type, address=0xFFFF0000) + reference = Object(self.prog, self.point_type, address=0xFFFF0000) unnamed_reference = Object( self.prog, - struct_type( + self.prog.struct_type( "point", 8, - (TypeMember(struct_type(None, 8, point_type.members), None),), + ( + TypeMember( + self.prog.struct_type(None, 8, self.point_type.members), None + ), + ), ), address=0xFFFF0000, ) - ptr = Object(self.prog, pointer_type(8, point_type), value=0xFFFF0000) + ptr = Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF0000 + ) for obj in [reference, unnamed_reference, ptr]: self.assertEqual( obj.member_("x"), Object(self.prog, "int", address=0xFFFF0000) @@ -2403,34 +2422,38 @@ def test_member(self): self.assertRaisesRegex(AttributeError, "no attribute", getattr, obj, "x") def test_bit_field_member(self): - segment = b"\x07\x10\x5e\x5f\x1f\0\0\0" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - type_ = struct_type( + self.add_memory_segment(b"\x07\x10\x5e\x5f\x1f\0\0\0", virt_addr=0xFFFF8000) + type_ = self.prog.struct_type( "bits", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True, Qualifiers.CONST), "y", 4, 28), - TypeMember(int_type("int", 4, True), "z", 32, 5), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + "y", + 4, + 28, + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 32, 5), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF8000) self.assertEqual( obj.x, Object( - prog, int_type("int", 4, True), address=0xFFFF0000, bit_field_size=4 + self.prog, + self.prog.int_type("int", 4, True), + address=0xFFFF8000, + bit_field_size=4, ), ) self.assertEqual( obj.y, Object( - prog, - int_type("int", 4, True, Qualifiers.CONST), - address=0xFFFF0000, + self.prog, + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + address=0xFFFF8000, bit_field_size=28, bit_offset=4, ), @@ -2438,29 +2461,30 @@ def test_bit_field_member(self): self.assertEqual( obj.z, Object( - prog, int_type("int", 4, True), address=0xFFFF0004, bit_field_size=5 + self.prog, + self.prog.int_type("int", 4, True), + address=0xFFFF8004, + bit_field_size=5, ), ) def test_member_out_of_bounds(self): obj = Object( - self.prog, struct_type("foo", 4, point_type.members), address=0xFFFF0000 + self.prog, + self.prog.struct_type("foo", 4, self.point_type.members), + address=0xFFFF0000, ).read_() self.assertRaisesRegex(OutOfBoundsError, "out of bounds", getattr, obj, "y") def test_string(self): - prog = mock_program( - segments=[ - MockMemorySegment( - b"\x00\x00\xff\xff\x00\x00\x00\x00", virt_addr=0xFFFEFFF8 - ), - MockMemorySegment(b"hello\0world\0", virt_addr=0xFFFF0000), - ] + self.add_memory_segment( + b"\x00\x00\xff\xff\x00\x00\x00\x00", virt_addr=0xFFFEFFF8 ) + self.add_memory_segment(b"hello\0world\0", virt_addr=0xFFFF0000) strings = [ - (Object(prog, "char *", address=0xFFFEFFF8), b"hello"), - (Object(prog, "char [2]", address=0xFFFF0000), b"he"), - (Object(prog, "char [8]", address=0xFFFF0000), b"hello"), + (Object(self.prog, "char *", address=0xFFFEFFF8), b"hello"), + (Object(self.prog, "char [2]", address=0xFFFF0000), b"he"), + (Object(self.prog, "char [8]", address=0xFFFF0000), b"hello"), ] for obj, expected in strings: with self.subTest(obj=obj): @@ -2468,10 +2492,10 @@ def test_string(self): self.assertEqual(obj.read_().string_(), expected) strings = [ - Object(prog, "char []", address=0xFFFF0000), - Object(prog, "int []", address=0xFFFF0000), - Object(prog, "int [2]", address=0xFFFF0000), - Object(prog, "int *", value=0xFFFF0000), + Object(self.prog, "char []", address=0xFFFF0000), + Object(self.prog, "int []", address=0xFFFF0000), + Object(self.prog, "int [2]", address=0xFFFF0000), + Object(self.prog, "int *", value=0xFFFF0000), ] for obj in strings: self.assertEqual(obj.string_(), b"hello") @@ -2479,16 +2503,16 @@ def test_string(self): self.assertRaisesRegex( TypeError, "must be an array or pointer", - Object(prog, "int", value=1).string_, + Object(self.prog, "int", value=1).string_, ) -class TestSpecialMethods(ObjectTestCase): +class TestSpecialMethods(MockProgramTestCase): def test_dir(self): obj = Object(self.prog, "int", value=0) self.assertEqual(dir(obj), sorted(object.__dir__(obj))) - obj = Object(self.prog, point_type, address=0xFFFF0000) + obj = Object(self.prog, self.point_type, address=0xFFFF0000) self.assertEqual(dir(obj), sorted(object.__dir__(obj) + ["x", "y"])) self.assertEqual(dir(obj.address_of_()), dir(obj)) diff --git a/tests/test_program.py b/tests/test_program.py index 4e5a7eedf..6ad9d81e7 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -5,7 +5,6 @@ import itertools import os import tempfile -import unittest import unittest.mock from drgn import ( @@ -18,15 +17,8 @@ Program, ProgramFlags, Qualifiers, - array_type, - bool_type, - float_type, - function_type, + TypeKind, host_platform, - int_type, - pointer_type, - typedef_type, - void_type, ) from tests import ( DEFAULT_LANGUAGE, @@ -34,12 +26,9 @@ MOCK_PLATFORM, MockMemorySegment, MockObject, - ObjectTestCase, - color_type, + MockProgramTestCase, + TestCase, mock_program, - option_type, - pid_type, - point_type, ) from tests.elf import ET, PT from tests.elfwriter import ElfSection, create_elf_file @@ -90,15 +79,9 @@ def test_lookup_error(self): "foo", "foo.c", ) + self.assertRaisesRegex(LookupError, "^could not find 'foo'$", prog.type, "foo") self.assertRaisesRegex( - LookupError, "^could not find 'foo'$", prog.type, "foo" - ) - self.assertRaisesRegex( - LookupError, - "^could not find 'foo' in 'foo.c'$", - prog.type, - "foo", - "foo.c", + LookupError, "^could not find 'foo' in 'foo.c'$", prog.type, "foo", "foo.c", ) self.assertRaisesRegex( LookupError, "^could not find variable 'foo'$", prog.variable, "foo" @@ -118,15 +101,6 @@ def test_lookup_error(self): def test_flags(self): self.assertIsInstance(mock_program().flags, ProgramFlags) - def test_pointer_type(self): - prog = mock_program() - self.assertEqual(prog.pointer_type(prog.type("int")), prog.type("int *")) - self.assertEqual(prog.pointer_type("int"), prog.type("int *")) - self.assertEqual( - prog.pointer_type(prog.type("int"), Qualifiers.CONST), - prog.type("int * const"), - ) - def test_debug_info(self): Program().load_debug_info([]) @@ -134,7 +108,7 @@ def test_language(self): self.assertEqual(Program().language, DEFAULT_LANGUAGE) -class TestMemory(unittest.TestCase): +class TestMemory(TestCase): def test_simple_read(self): data = b"hello, world" prog = mock_program(segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)]) @@ -353,24 +327,37 @@ def test_invalid_read_fn(self): ) -class TestTypes(unittest.TestCase): +class TestTypes(MockProgramTestCase): def test_invalid_finder(self): - self.assertRaises(TypeError, mock_program().add_type_finder, "foo") + self.assertRaises(TypeError, self.prog.add_type_finder, "foo") - prog = mock_program() - prog.add_type_finder(lambda kind, name, filename: "foo") - self.assertRaises(TypeError, prog.type, "int") + self.prog.add_type_finder(lambda kind, name, filename: "foo") + self.assertRaises(TypeError, self.prog.type, "int") + + def test_finder_different_program(self): + def finder(kind, name, filename): + if kind == TypeKind.TYPEDEF and name == "foo": + prog = Program() + return prog.typedef_type("foo", prog.void_type()) + else: + return None + + self.prog.add_type_finder(finder) + self.assertRaisesRegex( + ValueError, + "type find callback returned type from wrong program", + self.prog.type, + "foo", + ) def test_wrong_kind(self): - prog = mock_program() - prog.add_type_finder(lambda kind, name, filename: void_type()) - self.assertRaises(TypeError, prog.type, "int") + self.prog.add_type_finder(lambda kind, name, filename: self.prog.void_type()) + self.assertRaises(TypeError, self.prog.type, "int") def test_not_found(self): - prog = mock_program() - self.assertRaises(LookupError, prog.type, "struct foo") - prog.add_type_finder(lambda kind, name, filename: None) - self.assertRaises(LookupError, prog.type, "struct foo") + self.assertRaises(LookupError, self.prog.type, "struct foo") + self.prog.add_type_finder(lambda kind, name, filename: None) + self.assertRaises(LookupError, self.prog.type, "struct foo") def test_default_primitive_types(self): def spellings(tokens, num_optional=0): @@ -382,96 +369,110 @@ def spellings(tokens, num_optional=0): prog = mock_program( MOCK_PLATFORM if word_size == 8 else MOCK_32BIT_PLATFORM ) - self.assertEqual(prog.type("_Bool"), bool_type("_Bool", 1)) - self.assertEqual(prog.type("char"), int_type("char", 1, True)) + self.assertEqual(prog.type("_Bool"), prog.bool_type("_Bool", 1)) + self.assertEqual(prog.type("char"), prog.int_type("char", 1, True)) for spelling in spellings(["signed", "char"]): - self.assertEqual(prog.type(spelling), int_type("signed char", 1, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("signed char", 1, True) + ) for spelling in spellings(["unsigned", "char"]): self.assertEqual( - prog.type(spelling), int_type("unsigned char", 1, False) + prog.type(spelling), prog.int_type("unsigned char", 1, False) ) for spelling in spellings(["short", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("short", 2, True)) + self.assertEqual(prog.type(spelling), prog.int_type("short", 2, True)) for spelling in spellings(["short", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned short", 2, False) + prog.type(spelling), prog.int_type("unsigned short", 2, False) ) for spelling in spellings(["int", "signed"], 1): - self.assertEqual(prog.type(spelling), int_type("int", 4, True)) + self.assertEqual(prog.type(spelling), prog.int_type("int", 4, True)) for spelling in spellings(["unsigned", "int"]): self.assertEqual( - prog.type(spelling), int_type("unsigned int", 4, False) + prog.type(spelling), prog.int_type("unsigned int", 4, False) ) for spelling in spellings(["long", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("long", word_size, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("long", word_size, True) + ) for spelling in spellings(["long", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned long", word_size, False) + prog.type(spelling), + prog.int_type("unsigned long", word_size, False), ) for spelling in spellings(["long", "long", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("long long", 8, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("long long", 8, True) + ) for spelling in spellings(["long", "long", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned long long", 8, False) + prog.type(spelling), prog.int_type("unsigned long long", 8, False) ) - self.assertEqual(prog.type("float"), float_type("float", 4)) - self.assertEqual(prog.type("double"), float_type("double", 8)) + self.assertEqual(prog.type("float"), prog.float_type("float", 4)) + self.assertEqual(prog.type("double"), prog.float_type("double", 8)) for spelling in spellings(["long", "double"]): - self.assertEqual(prog.type(spelling), float_type("long double", 16)) + self.assertEqual( + prog.type(spelling), prog.float_type("long double", 16) + ) self.assertEqual( prog.type("size_t"), - typedef_type("size_t", int_type("unsigned long", word_size, False)), + prog.typedef_type( + "size_t", prog.int_type("unsigned long", word_size, False) + ), ) self.assertEqual( prog.type("ptrdiff_t"), - typedef_type("ptrdiff_t", int_type("long", word_size, True)), + prog.typedef_type("ptrdiff_t", prog.int_type("long", word_size, True)), ) def test_primitive_type(self): - prog = mock_program( - types=[int_type("long", 4, True), int_type("unsigned long", 4, True),] - ) - self.assertEqual(prog.type("long"), int_type("long", 4, True)) + self.types.append(self.prog.int_type("long", 4, True)) + self.assertEqual(self.prog.type("long"), self.prog.int_type("long", 4, True)) + + def test_primitive_type_invalid(self): # unsigned long with signed=True isn't valid, so it should be ignored. + self.types.append(self.prog.int_type("unsigned long", 4, True)) self.assertEqual( - prog.type("unsigned long"), int_type("unsigned long", 8, False) + self.prog.type("unsigned long"), + self.prog.int_type("unsigned long", 8, False), ) def test_size_t_and_ptrdiff_t(self): # 64-bit architecture with 4-byte long/unsigned long. - prog = mock_program( - types=[int_type("long", 4, True), int_type("unsigned long", 4, False),] - ) + types = [] + prog = mock_program(types=types) + types.append(prog.int_type("long", 4, True)) + types.append(prog.int_type("unsigned long", 4, False)) self.assertEqual( - prog.type("size_t"), typedef_type("size_t", prog.type("unsigned long long")) + prog.type("size_t"), + prog.typedef_type("size_t", prog.type("unsigned long long")), ) self.assertEqual( - prog.type("ptrdiff_t"), typedef_type("ptrdiff_t", prog.type("long long")) + prog.type("ptrdiff_t"), + prog.typedef_type("ptrdiff_t", prog.type("long long")), ) # 32-bit architecture with 8-byte long/unsigned long. - prog = mock_program( - MOCK_32BIT_PLATFORM, - types=[int_type("long", 8, True), int_type("unsigned long", 8, False),], - ) + types = [] + prog = mock_program(MOCK_32BIT_PLATFORM, types=types) + types.append(prog.int_type("long", 8, True)) + types.append(prog.int_type("unsigned long", 8, False)) self.assertEqual( - prog.type("size_t"), typedef_type("size_t", prog.type("unsigned int")) + prog.type("size_t"), prog.typedef_type("size_t", prog.type("unsigned int")) ) self.assertEqual( - prog.type("ptrdiff_t"), typedef_type("ptrdiff_t", prog.type("int")) + prog.type("ptrdiff_t"), prog.typedef_type("ptrdiff_t", prog.type("int")) ) # Nonsense sizes. - prog = mock_program( - types=[ - int_type("int", 1, True), - int_type("unsigned int", 1, False), - int_type("long", 1, True), - int_type("unsigned long", 1, False), - int_type("long long", 2, True), - int_type("unsigned long long", 2, False), - ] - ) + types = [] + prog = mock_program(types=types) + types.append(prog.int_type("int", 1, True)) + types.append(prog.int_type("unsigned int", 1, False)) + types.append(prog.int_type("long", 1, True)) + types.append(prog.int_type("unsigned long", 1, False)) + types.append(prog.int_type("long long", 2, True)) + types.append(prog.int_type("unsigned long long", 2, False)) self.assertRaisesRegex( ValueError, "no suitable integer type for size_t", prog.type, "size_t" ) @@ -480,159 +481,232 @@ def test_size_t_and_ptrdiff_t(self): ) def test_tagged_type(self): - prog = mock_program(types=[point_type, option_type, color_type]) - self.assertEqual(prog.type("struct point"), point_type) - self.assertEqual(prog.type("union option"), option_type) - self.assertEqual(prog.type("enum color"), color_type) + self.types.append(self.point_type) + self.types.append(self.option_type) + self.types.append(self.color_type) + self.assertEqual(self.prog.type("struct point"), self.point_type) + self.assertEqual(self.prog.type("union option"), self.option_type) + self.assertEqual(self.prog.type("enum color"), self.color_type) def test_typedef(self): - prog = mock_program(types=[pid_type]) - self.assertEqual(prog.type("pid_t"), pid_type) + self.types.append(self.pid_type) + self.assertEqual(self.prog.type("pid_t"), self.pid_type) def test_pointer(self): - prog = mock_program() - self.assertEqual(prog.type("int *"), pointer_type(8, int_type("int", 4, True))) self.assertEqual( - prog.type("const int *"), - pointer_type(8, int_type("int", 4, True, Qualifiers.CONST)), + self.prog.type("int *"), + self.prog.pointer_type(self.prog.int_type("int", 4, True)), ) + + def test_pointer_to_const(self): self.assertEqual( - prog.type("int * const"), - pointer_type(8, int_type("int", 4, True), Qualifiers.CONST), + self.prog.type("const int *"), + self.prog.pointer_type( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), ) + + def test_const_pointer(self): self.assertEqual( - prog.type("int **"), - pointer_type(8, pointer_type(8, int_type("int", 4, True))), + self.prog.type("int * const"), + self.prog.pointer_type( + self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ), + ) + + def test_pointer_to_pointer(self): + self.assertEqual( + self.prog.type("int **"), + self.prog.pointer_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)) + ), ) self.assertEqual( - prog.type("int *((*))"), - pointer_type(8, pointer_type(8, int_type("int", 4, True))), + self.prog.type("int *((*))"), self.prog.type("int **"), ) + + def test_pointer_to_const_pointer(self): self.assertEqual( - prog.type("int * const *"), - pointer_type( - 8, pointer_type(8, int_type("int", 4, True), Qualifiers.CONST) + self.prog.type("int * const *"), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ) ), ) def test_array(self): - prog = mock_program() self.assertEqual( - prog.type("int []"), array_type(None, int_type("int", 4, True)) + self.prog.type("int [20]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 20), ) + + def test_array_hexadecimal(self): self.assertEqual( - prog.type("int [20]"), array_type(20, int_type("int", 4, True)) + self.prog.type("int [0x20]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 32), ) + + def test_array_octal(self): self.assertEqual( - prog.type("int [0x20]"), array_type(32, int_type("int", 4, True)) + self.prog.type("int [020]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 16), ) + + def test_incomplete_array(self): self.assertEqual( - prog.type("int [020]"), array_type(16, int_type("int", 4, True)) + self.prog.type("int []"), + self.prog.array_type(self.prog.int_type("int", 4, True)), ) + + def test_array_two_dimensional(self): self.assertEqual( - prog.type("int [2][3]"), - array_type(2, array_type(3, int_type("int", 4, True))), + self.prog.type("int [2][3]"), + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3), 2 + ), ) + + def test_array_three_dimensional(self): self.assertEqual( - prog.type("int [2][3][4]"), - array_type(2, array_type(3, array_type(4, int_type("int", 4, True)))), + self.prog.type("int [2][3][4]"), + self.prog.array_type( + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 4), 3 + ), + 2, + ), ) def test_array_of_pointers(self): - prog = mock_program() self.assertEqual( - prog.type("int *[2][3]"), - array_type(2, array_type(3, pointer_type(8, int_type("int", 4, True)))), + self.prog.type("int *[2][3]"), + self.prog.array_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 3 + ), + 2, + ), ) def test_pointer_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (*)[2]"), - pointer_type(8, array_type(2, int_type("int", 4, True))), + self.prog.type("int (*)[2]"), + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ), ) + + def test_pointer_to_two_dimensional_array(self): self.assertEqual( - prog.type("int (*)[2][3]"), - pointer_type(8, array_type(2, array_type(3, int_type("int", 4, True)))), + self.prog.type("int (*)[2][3]"), + self.prog.pointer_type( + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3), 2 + ) + ), ) def test_pointer_to_pointer_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (**)[2]"), - pointer_type(8, pointer_type(8, array_type(2, int_type("int", 4, True)))), + self.prog.type("int (**)[2]"), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ) + ), ) def test_pointer_to_array_of_pointers(self): - prog = mock_program() self.assertEqual( - prog.type("int *(*)[2]"), - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), + self.prog.type("int *(*)[2]"), + self.prog.pointer_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 2 + ) + ), ) self.assertEqual( - prog.type("int *((*)[2])"), - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), + self.prog.type("int *((*)[2])"), self.prog.type("int *(*)[2]"), ) def test_array_of_pointers_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (*[2])[3]"), - array_type(2, pointer_type(8, array_type(3, int_type("int", 4, True)))), + self.prog.type("int (*[2])[3]"), + self.prog.array_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3) + ), + 2, + ), ) -class TestObjects(ObjectTestCase): +class TestObjects(MockProgramTestCase): def test_invalid_finder(self): - self.assertRaises(TypeError, mock_program().add_object_finder, "foo") + self.assertRaises(TypeError, self.prog.add_object_finder, "foo") - prog = mock_program() - prog.add_object_finder(lambda prog, name, flags, filename: "foo") - self.assertRaises(TypeError, prog.object, "foo") + self.prog.add_object_finder(lambda prog, name, flags, filename: "foo") + self.assertRaises(TypeError, self.prog.object, "foo") def test_not_found(self): - prog = mock_program() - self.assertRaises(LookupError, prog.object, "foo") - prog.add_object_finder(lambda prog, name, flags, filename: None) - self.assertRaises(LookupError, prog.object, "foo") - self.assertFalse("foo" in prog) + self.assertRaises(LookupError, self.prog.object, "foo") + self.prog.add_object_finder(lambda prog, name, flags, filename: None) + self.assertRaises(LookupError, self.prog.object, "foo") + self.assertFalse("foo" in self.prog) def test_constant(self): - mock_obj = MockObject("PAGE_SIZE", int_type("int", 4, True), value=4096) - prog = mock_program(objects=[mock_obj]) + self.objects.append( + MockObject("PAGE_SIZE", self.prog.int_type("int", 4, True), value=4096) + ) self.assertEqual( - prog["PAGE_SIZE"], Object(prog, int_type("int", 4, True), value=4096) + self.prog["PAGE_SIZE"], + Object(self.prog, self.prog.int_type("int", 4, True), value=4096), ) self.assertEqual( - prog.object("PAGE_SIZE", FindObjectFlags.CONSTANT), prog["PAGE_SIZE"] + self.prog.object("PAGE_SIZE", FindObjectFlags.CONSTANT), + self.prog["PAGE_SIZE"], ) - self.assertTrue("PAGE_SIZE" in prog) + self.assertTrue("PAGE_SIZE" in self.prog) def test_function(self): - mock_obj = MockObject( - "func", function_type(void_type(), (), False), address=0xFFFF0000 + self.objects.append( + MockObject( + "func", + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, + ) ) - prog = mock_program(objects=[mock_obj]) self.assertEqual( - prog["func"], - Object(prog, function_type(void_type(), (), False), address=0xFFFF0000), + self.prog["func"], + Object( + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, + ), ) - self.assertEqual(prog.object("func", FindObjectFlags.FUNCTION), prog["func"]) - self.assertTrue("func" in prog) + self.assertEqual( + self.prog.object("func", FindObjectFlags.FUNCTION), self.prog["func"] + ) + self.assertTrue("func" in self.prog) def test_variable(self): - mock_obj = MockObject("counter", int_type("int", 4, True), address=0xFFFF0000) - prog = mock_program(objects=[mock_obj]) + self.objects.append( + MockObject( + "counter", self.prog.int_type("int", 4, True), address=0xFFFF0000 + ) + ) self.assertEqual( - prog["counter"], Object(prog, int_type("int", 4, True), address=0xFFFF0000) + self.prog["counter"], + Object(self.prog, self.prog.int_type("int", 4, True), address=0xFFFF0000), ) self.assertEqual( - prog.object("counter", FindObjectFlags.VARIABLE), prog["counter"] + self.prog.object("counter", FindObjectFlags.VARIABLE), self.prog["counter"] ) - self.assertTrue("counter" in prog) + self.assertTrue("counter" in self.prog) -class TestCoreDump(unittest.TestCase): +class TestCoreDump(TestCase): def test_not_core_dump(self): prog = Program() self.assertRaisesRegex( diff --git a/tests/test_type.py b/tests/test_type.py index 70b1a4273..cf9b440d2 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1,46 +1,34 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ -import unittest +import operator from drgn import ( Language, PrimitiveType, + Program, Qualifiers, TypeEnumerator, TypeKind, TypeMember, TypeParameter, - array_type, - bool_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, sizeof, - struct_type, - typedef_type, - union_type, - void_type, ) -from tests import DEFAULT_LANGUAGE +from tests import DEFAULT_LANGUAGE, MockProgramTestCase -class TestType(unittest.TestCase): +class TestType(MockProgramTestCase): def test_void(self): - t = void_type() + t = self.prog.void_type() self.assertEqual(t.kind, TypeKind.VOID) self.assertEqual(t.primitive, PrimitiveType.C_VOID) self.assertEqual(t.language, DEFAULT_LANGUAGE) - self.assertEqual(t, void_type()) + self.assertEqual(t, self.prog.void_type()) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "void_type()") + self.assertEqual(repr(t), "prog.void_type()") def test_int(self): - t = int_type("int", 4, True) + t = self.prog.int_type("int", 4, True) self.assertEqual(t.kind, TypeKind.INT) self.assertEqual(t.primitive, PrimitiveType.C_INT) self.assertEqual(t.language, DEFAULT_LANGUAGE) @@ -49,21 +37,21 @@ def test_int(self): self.assertTrue(t.is_signed) self.assertTrue(t.is_complete()) - self.assertEqual(t, int_type("int", 4, True)) - self.assertNotEqual(t, int_type("long", 4, True)) - self.assertNotEqual(t, int_type("int", 2, True)) - self.assertNotEqual(t, int_type("int", 4, False)) + self.assertEqual(t, self.prog.int_type("int", 4, True)) + self.assertNotEqual(t, self.prog.int_type("long", 4, True)) + self.assertNotEqual(t, self.prog.int_type("int", 2, True)) + self.assertNotEqual(t, self.prog.int_type("int", 4, False)) - self.assertEqual(repr(t), "int_type(name='int', size=4, is_signed=True)") + self.assertEqual(repr(t), "prog.int_type(name='int', size=4, is_signed=True)") self.assertEqual(sizeof(t), 4) - self.assertRaises(TypeError, int_type, None, 4, True) + self.assertRaises(TypeError, self.prog.int_type, None, 4, True) - self.assertIsNone(int_type("my_int", 4, True).primitive) - self.assertIsNone(int_type("int", 4, False).primitive) + self.assertIsNone(self.prog.int_type("my_int", 4, True).primitive) + self.assertIsNone(self.prog.int_type("int", 4, False).primitive) def test_bool(self): - t = bool_type("_Bool", 1) + t = self.prog.bool_type("_Bool", 1) self.assertEqual(t.kind, TypeKind.BOOL) self.assertEqual(t.primitive, PrimitiveType.C_BOOL) self.assertEqual(t.language, DEFAULT_LANGUAGE) @@ -71,87 +59,109 @@ def test_bool(self): self.assertEqual(t.size, 1) self.assertTrue(t.is_complete()) - self.assertEqual(t, bool_type("_Bool", 1)) - self.assertNotEqual(t, bool_type("bool", 1)) - self.assertNotEqual(t, bool_type("_Bool", 2)) + self.assertEqual(t, self.prog.bool_type("_Bool", 1)) + self.assertNotEqual(t, self.prog.bool_type("bool", 1)) + self.assertNotEqual(t, self.prog.bool_type("_Bool", 2)) - self.assertEqual(repr(t), "bool_type(name='_Bool', size=1)") + self.assertEqual(repr(t), "prog.bool_type(name='_Bool', size=1)") self.assertEqual(sizeof(t), 1) - self.assertRaises(TypeError, bool_type, None, 1) + self.assertRaises(TypeError, self.prog.bool_type, None, 1) def test_float(self): - t = float_type("float", 4) + t = self.prog.float_type("float", 4) self.assertEqual(t.primitive, PrimitiveType.C_FLOAT) self.assertEqual(t.kind, TypeKind.FLOAT) self.assertEqual(t.name, "float") self.assertEqual(t.size, 4) self.assertTrue(t.is_complete()) - self.assertEqual(t, float_type("float", 4)) - self.assertNotEqual(t, float_type("double", 4)) - self.assertNotEqual(t, float_type("float", 8)) + self.assertEqual(t, self.prog.float_type("float", 4)) + self.assertNotEqual(t, self.prog.float_type("double", 4)) + self.assertNotEqual(t, self.prog.float_type("float", 8)) - self.assertEqual(repr(t), "float_type(name='float', size=4)") + self.assertEqual(repr(t), "prog.float_type(name='float', size=4)") self.assertEqual(sizeof(t), 4) - self.assertRaises(TypeError, float_type, None, 4) + self.assertRaises(TypeError, self.prog.float_type, None, 4) def test_complex(self): - t = complex_type("double _Complex", 16, float_type("double", 8)) + t = self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("double", 8) + ) self.assertEqual(t.kind, TypeKind.COMPLEX) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.name, "double _Complex") self.assertEqual(t.size, 16) - self.assertEqual(t.type, float_type("double", 8)) + self.assertEqual(t.type, self.prog.float_type("double", 8)) self.assertTrue(t.is_complete()) self.assertEqual( - t, complex_type("double _Complex", 16, float_type("double", 8)) + t, + self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("float _Complex", 16, float_type("double", 8)) + t, + self.prog.complex_type( + "float _Complex", 16, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("double _Complex", 32, float_type("double", 8)) + t, + self.prog.complex_type( + "double _Complex", 32, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("double _Complex", 16, float_type("float", 4)) + t, + self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("float", 4) + ), ) self.assertEqual( repr(t), - "complex_type(name='double _Complex', size=16, type=float_type(name='double', size=8))", + "prog.complex_type(name='double _Complex', size=16, type=prog.float_type(name='double', size=8))", ) self.assertEqual(sizeof(t), 16) - self.assertRaises(TypeError, complex_type, None, 16, float_type("double", 8)) - self.assertRaises(TypeError, complex_type, "double _Complex", 16, None) + self.assertRaises( + TypeError, + self.prog.complex_type, + None, + 16, + self.prog.float_type("double", 8), + ) + self.assertRaises( + TypeError, self.prog.complex_type, "double _Complex", 16, None + ) self.assertRaisesRegex( ValueError, "must be floating-point or integer type", - complex_type, + self.prog.complex_type, "double _Complex", 16, - void_type(), + self.prog.void_type(), ) self.assertRaisesRegex( ValueError, "must be unqualified", - complex_type, + self.prog.complex_type, "double _Complex", 16, - float_type("double", 8, Qualifiers.CONST), + self.prog.float_type("double", 8, qualifiers=Qualifiers.CONST), ) def test_struct(self): - t = struct_type( + t = self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ) self.assertEqual(t.kind, TypeKind.STRUCT) @@ -162,111 +172,111 @@ def test_struct(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different tag. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "pt", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different size. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 16, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # One is anonymous. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different members. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("long", 8, True), "x", 0), - TypeMember(int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "x", 0), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), ), ), ) # Different number of members. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # One member is anonymous. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), None, 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), None, 32), ), ), ) # One is incomplete. - self.assertNotEqual(t, struct_type("point")) + self.assertNotEqual(t, self.prog.struct_type("point")) self.assertEqual( repr(t), - "struct_type(tag='point', size=8, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32)))", + "prog.struct_type(tag='point', size=8, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32)))", ) self.assertEqual(sizeof(t), 8) - t = struct_type( + t = self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ) self.assertEqual(t.kind, TypeKind.STRUCT) @@ -276,77 +286,79 @@ def test_struct(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), ), ) self.assertTrue(t.is_complete()) - t = struct_type("color", 0, ()) + t = self.prog.struct_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.struct_type(tag='color', size=0, members=())") - t = struct_type("color") + t = self.prog.struct_type("color") self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.struct_type(tag='color', size=None, members=None)" + ) - t = struct_type(None, None, None) + t = self.prog.struct_type(None, None, None) self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.struct_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, struct_type, 4) + self.assertRaises(TypeError, self.prog.struct_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", struct_type, "point", 8, None + ValueError, "must not have size", self.prog.struct_type, "point", 8, None ) self.assertRaisesRegex( - ValueError, "must have size", struct_type, "point", None, () + ValueError, "must have size", self.prog.struct_type, "point", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", struct_type, "point", 8, 4 + TypeError, "must be sequence or None", self.prog.struct_type, "point", 8, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", struct_type, "point", 8, (4,) + TypeError, "must be TypeMember", self.prog.struct_type, "point", 8, (4,) ) # Bit size. - t = struct_type( + t = self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), ), ) def test_union(self): - t = union_type( + t = self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ) self.assertEqual(t.kind, TypeKind.UNION) @@ -357,111 +369,111 @@ def test_union(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different tag. self.assertNotEqual( t, - union_type( + self.prog.union_type( "pt", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different size. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 8, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # One is anonymous. self.assertNotEqual( t, - union_type( + self.prog.union_type( None, 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different members. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("long", 8, True), "x"), - TypeMember(int_type("unsigned long", 8, False), "y"), + TypeMember(self.prog.int_type("long", 8, True), "x"), + TypeMember(self.prog.int_type("unsigned long", 8, False), "y"), ), ), ) # Different number of members. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), - TypeMember(float_type("float", 4), "z"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.float_type("float", 4), "z"), ), ), ) # One member is anonymous. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False),), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False),), ), ), ) # One is incomplete. - self.assertNotEqual(t, union_type("option")) + self.assertNotEqual(t, self.prog.union_type("option")) self.assertEqual( repr(t), - "union_type(tag='option', size=4, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='unsigned int', size=4, is_signed=False), name='y', bit_offset=0)))", + "prog.union_type(tag='option', size=4, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='unsigned int', size=4, is_signed=False), name='y', bit_offset=0)))", ) self.assertEqual(sizeof(t), 4) - t = union_type( + t = self.prog.union_type( None, 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ) self.assertEqual(t.kind, TypeKind.UNION) @@ -471,78 +483,80 @@ def test_union(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 0), ), ) self.assertTrue(t.is_complete()) - t = union_type("color", 0, ()) + t = self.prog.union_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.union_type(tag='color', size=0, members=())") - t = union_type("color") + t = self.prog.union_type("color") self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.union_type(tag='color', size=None, members=None)" + ) - t = union_type(None, None, None) + t = self.prog.union_type(None, None, None) self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.union_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, union_type, 4) + self.assertRaises(TypeError, self.prog.union_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", union_type, "option", 8, None + ValueError, "must not have size", self.prog.union_type, "option", 8, None ) self.assertRaisesRegex( - ValueError, "must have size", union_type, "option", None, () + ValueError, "must have size", self.prog.union_type, "option", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", union_type, "option", 8, 4 + TypeError, "must be sequence or None", self.prog.union_type, "option", 8, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", union_type, "option", 8, (4,) + TypeError, "must be TypeMember", self.prog.union_type, "option", 8, (4,) ) # Bit size. - t = union_type( + t = self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 4), ), ) def test_class(self): - t = class_type( + t = self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ) self.assertEqual(t.kind, TypeKind.CLASS) @@ -553,118 +567,118 @@ def test_class(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different tag. self.assertNotEqual( t, - class_type( + self.prog.class_type( "crd", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different size. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 16, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # One is anonymous. self.assertNotEqual( t, - class_type( + self.prog.class_type( None, 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different members. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("long", 8, True), "x", 0), - TypeMember(int_type("long", 8, True), "y", 64), - TypeMember(int_type("long", 8, True), "z", 128), + TypeMember(self.prog.int_type("long", 8, True), "x", 0), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "z", 128), ), ), ) # Different number of members. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # One member is anonymous. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), None, 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), None, 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ), ) # One is incomplete. - self.assertNotEqual(t, class_type("coord")) + self.assertNotEqual(t, self.prog.class_type("coord")) self.assertEqual( repr(t), - "class_type(tag='coord', size=12, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='z', bit_offset=64)))", + "prog.class_type(tag='coord', size=12, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='z', bit_offset=64)))", ) self.assertEqual(sizeof(t), 12) - t = class_type( + t = self.prog.class_type( None, 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ) self.assertEqual(t.kind, TypeKind.CLASS) @@ -674,77 +688,79 @@ def test_class(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ) self.assertTrue(t.is_complete()) - t = class_type("color", 0, ()) + t = self.prog.class_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.class_type(tag='color', size=0, members=())") - t = class_type("color") + t = self.prog.class_type("color") self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.class_type(tag='color', size=None, members=None)" + ) - t = class_type(None, None, None) + t = self.prog.class_type(None, None, None) self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.class_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, class_type, 4) + self.assertRaises(TypeError, self.prog.class_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", class_type, "coord", 12, None + ValueError, "must not have size", self.prog.class_type, "coord", 12, None ) self.assertRaisesRegex( - ValueError, "must have size", class_type, "coord", None, () + ValueError, "must have size", self.prog.class_type, "coord", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", class_type, "coord", 12, 4 + TypeError, "must be sequence or None", self.prog.class_type, "coord", 12, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", class_type, "coord", 12, (4,) + TypeError, "must be TypeMember", self.prog.class_type, "coord", 12, (4,) ) # Bit size. - t = class_type( + t = self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), - TypeMember(int_type("int", 4, True), "z", 64, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), - TypeMember(int_type("int", 4, True), "z", 64, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 4), ), ) def test_enum(self): - t = enum_type( + t = self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -755,7 +771,7 @@ def test_enum(self): self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.tag, "color") - self.assertEqual(t.type, int_type("unsigned int", 4, False)) + self.assertEqual(t.type, self.prog.int_type("unsigned int", 4, False)) self.assertEqual( t.enumerators, ( @@ -768,9 +784,9 @@ def test_enum(self): self.assertEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -781,9 +797,9 @@ def test_enum(self): # Different tag. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "COLOR", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -794,9 +810,9 @@ def test_enum(self): # One is anonymous. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( None, - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -807,9 +823,9 @@ def test_enum(self): # Different compatible type. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("int", 4, True), + self.prog.int_type("int", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -820,9 +836,9 @@ def test_enum(self): # Different enumerators. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("YELLOW", 1), @@ -833,22 +849,22 @@ def test_enum(self): # Different number of enumerators. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), (TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1)), ), ) # One is incomplete. - self.assertNotEqual(t, enum_type("color")) + self.assertNotEqual(t, self.prog.enum_type("color")) self.assertEqual( repr(t), - "enum_type(tag='color', type=int_type(name='unsigned int', size=4, is_signed=False), enumerators=(TypeEnumerator('RED', 0), TypeEnumerator('GREEN', 1), TypeEnumerator('BLUE', 2)))", + "prog.enum_type(tag='color', type=prog.int_type(name='unsigned int', size=4, is_signed=False), enumerators=(TypeEnumerator('RED', 0), TypeEnumerator('GREEN', 1), TypeEnumerator('BLUE', 2)))", ) self.assertEqual(sizeof(t), 4) - t = enum_type("color", None, None) + t = self.prog.enum_type("color", None, None) self.assertEqual(t.kind, TypeKind.ENUM) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") @@ -856,245 +872,335 @@ def test_enum(self): self.assertIsNone(t.enumerators) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "enum_type(tag='color', type=None, enumerators=None)") + self.assertEqual( + repr(t), "prog.enum_type(tag='color', type=None, enumerators=None)" + ) # A type with no enumerators isn't valid in C, but we allow it. - t = enum_type("color", int_type("unsigned int", 4, False), ()) + t = self.prog.enum_type( + "color", self.prog.int_type("unsigned int", 4, False), () + ) self.assertEqual(t.kind, TypeKind.ENUM) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") - self.assertEqual(t.type, int_type("unsigned int", 4, False)) + self.assertEqual(t.type, self.prog.int_type("unsigned int", 4, False)) self.assertEqual(t.enumerators, ()) self.assertTrue(t.is_complete()) self.assertEqual( repr(t), - "enum_type(tag='color', type=int_type(name='unsigned int', size=4, is_signed=False), enumerators=())", + "prog.enum_type(tag='color', type=prog.int_type(name='unsigned int', size=4, is_signed=False), enumerators=())", ) - self.assertRaisesRegex(TypeError, "must be Type", enum_type, "color", 4, ()) self.assertRaisesRegex( - ValueError, "must be integer type", enum_type, "color", void_type(), () + TypeError, "must be Type", self.prog.enum_type, "color", 4, () + ) + self.assertRaisesRegex( + ValueError, + "must be integer type", + self.prog.enum_type, + "color", + self.prog.void_type(), + (), ) self.assertRaisesRegex( ValueError, "must be unqualified", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, True, Qualifiers.CONST), + self.prog.int_type("unsigned int", 4, True, qualifiers=Qualifiers.CONST), (), ) self.assertRaisesRegex( ValueError, "must not have compatible type", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), None, ) self.assertRaisesRegex( - ValueError, "must have compatible type", enum_type, "color", None, () + ValueError, + "must have compatible type", + self.prog.enum_type, + "color", + None, + (), ) self.assertRaisesRegex( TypeError, "must be sequence or None", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), 4, ) self.assertRaisesRegex( TypeError, "must be TypeEnumerator", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), (4,), ) def test_typedef(self): - t = typedef_type("INT", int_type("int", 4, True)) + t = self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) self.assertEqual(t.kind, TypeKind.TYPEDEF) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.name, "INT") - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, typedef_type("INT", int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, typedef_type("INT", int_type("int", 4, True))) + self.assertEqual( + t, self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) + ) # Different name. - self.assertNotEqual(t, typedef_type("integer", int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.typedef_type("integer", self.prog.int_type("int", 4, True)) + ) # Different type. self.assertNotEqual( - t, typedef_type("integer", int_type("unsigned int", 4, False)) + t, + self.prog.typedef_type( + "integer", self.prog.int_type("unsigned int", 4, False) + ), ) self.assertNotEqual( - t, typedef_type("INT", int_type("int", 4, True, Qualifiers.CONST)) + t, + self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), ) self.assertEqual( repr(t), - "typedef_type(name='INT', type=int_type(name='int', size=4, is_signed=True))", + "prog.typedef_type(name='INT', type=prog.int_type(name='int', size=4, is_signed=True))", ) self.assertEqual(sizeof(t), 4) - t = typedef_type("VOID", void_type()) + t = self.prog.typedef_type("VOID", self.prog.void_type()) self.assertFalse(t.is_complete()) - self.assertRaises(TypeError, typedef_type, None, int_type("int", 4, True)) - self.assertRaises(TypeError, typedef_type, "INT", 4) + self.assertRaises( + TypeError, self.prog.typedef_type, None, self.prog.int_type("int", 4, True) + ) + self.assertRaises(TypeError, self.prog.typedef_type, "INT", 4) self.assertEqual( - typedef_type("size_t", int_type("unsigned long", 8, False)).primitive, + self.prog.typedef_type( + "size_t", self.prog.int_type("unsigned long", 8, False) + ).primitive, PrimitiveType.C_SIZE_T, ) self.assertEqual( - typedef_type("ptrdiff_t", int_type("long", 8, True)).primitive, + self.prog.typedef_type( + "ptrdiff_t", self.prog.int_type("long", 8, True) + ).primitive, PrimitiveType.C_PTRDIFF_T, ) def test_pointer(self): - t = pointer_type(8, int_type("int", 4, True)) + t = self.prog.pointer_type(self.prog.int_type("int", 4, True), 8) self.assertEqual(t.kind, TypeKind.POINTER) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.size, 8) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, pointer_type(8, int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, pointer_type(8, int_type("int", 4, True))) + self.assertEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), 8) + ) + # Default size. + self.assertEqual(t, self.prog.pointer_type(self.prog.int_type("int", 4, True))) + self.assertEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), None) + ) # Different size. - self.assertNotEqual(t, pointer_type(4, int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), 4) + ) # Different type. - self.assertNotEqual(t, pointer_type(8, void_type())) - self.assertNotEqual(t, pointer_type(8, void_type(Qualifiers.CONST))) + self.assertNotEqual(t, self.prog.pointer_type(self.prog.void_type(), 8)) + self.assertNotEqual( + t, + self.prog.pointer_type(self.prog.void_type(qualifiers=Qualifiers.CONST), 8), + ) self.assertEqual( repr(t), - "pointer_type(size=8, type=int_type(name='int', size=4, is_signed=True))", + "prog.pointer_type(type=prog.int_type(name='int', size=4, is_signed=True))", ) + self.assertEqual( + repr(self.prog.pointer_type(self.prog.int_type("int", 4, True), 4)), + "prog.pointer_type(type=prog.int_type(name='int', size=4, is_signed=True), size=4)", + ) + self.assertEqual(sizeof(t), 8) - self.assertRaises(TypeError, pointer_type, None, int_type("int", 4, True)) - self.assertRaises(TypeError, pointer_type, 8, 4) + self.assertRaises(TypeError, self.prog.pointer_type, 4) def test_array(self): - t = array_type(10, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True), 10) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.length, 10) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, array_type(10, int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, array_type(10, int_type("int", 4, True))) + self.assertEqual( + t, self.prog.array_type(self.prog.int_type("int", 4, True), 10) + ) # Different length. - self.assertNotEqual(t, array_type(4, int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.array_type(self.prog.int_type("int", 4, True), 4) + ) # Different type. - self.assertNotEqual(t, array_type(10, void_type())) - self.assertNotEqual(t, array_type(10, void_type(Qualifiers.CONST))) + self.assertNotEqual(t, self.prog.array_type(self.prog.void_type(), 10)) + self.assertNotEqual( + t, + self.prog.array_type(self.prog.void_type(qualifiers=Qualifiers.CONST), 10), + ) self.assertEqual( repr(t), - "array_type(length=10, type=int_type(name='int', size=4, is_signed=True))", + "prog.array_type(type=prog.int_type(name='int', size=4, is_signed=True), length=10)", ) self.assertEqual(sizeof(t), 40) - t = array_type(0, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True), 0) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertEqual(t.length, 0) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - t = array_type(None, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True)) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertIsNone(t.length) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertFalse(t.is_complete()) - self.assertRaises(TypeError, array_type, 10, 4) + self.assertRaises(TypeError, self.prog.array_type, 10, 4) def test_function(self): - t = function_type(void_type(), (TypeParameter(int_type("int", 4, True), "n"),)) + t = self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + ) self.assertEqual(t.kind, TypeKind.FUNCTION) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) - self.assertEqual(t.type, void_type()) - self.assertEqual(t.parameters, (TypeParameter(int_type("int", 4, True), "n"),)) + self.assertEqual(t.type, self.prog.void_type()) + self.assertEqual( + t.parameters, (TypeParameter(self.prog.int_type("int", 4, True), "n"),) + ) self.assertFalse(t.is_variadic) self.assertTrue(t.is_complete()) self.assertEqual( t, - function_type(void_type(), (TypeParameter(int_type("int", 4, True), "n"),)), + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + ), ) # Different return type. self.assertNotEqual( t, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("int", 4, True), "n"),), + self.prog.function_type( + self.prog.int_type("int", 4, True), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), ), ) # Different parameter name. self.assertNotEqual( t, - function_type(void_type(), (TypeParameter(int_type("int", 4, True), "x"),)), + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "x"),), + ), ) # Unnamed parameter. self.assertNotEqual( - t, function_type(void_type(), (TypeParameter(int_type("int", 4, True),),)) + t, + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True),),), + ), ) # Different number of parameters. self.assertNotEqual( t, - function_type( - void_type(), + self.prog.function_type( + self.prog.void_type(), ( - TypeParameter(int_type("int", 4, True), "n"), - TypeParameter(pointer_type(8, void_type()), "p"), + TypeParameter(self.prog.int_type("int", 4, True), "n"), + TypeParameter( + self.prog.pointer_type(self.prog.void_type(), 8), "p" + ), ), ), ) # One is variadic. self.assertNotEqual( t, - function_type( - void_type(), (TypeParameter(int_type("int", 4, True), "n"),), True + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + True, ), ) self.assertEqual( repr(t), - "function_type(type=void_type(), parameters=(TypeParameter(type=int_type(name='int', size=4, is_signed=True), name='n'),), is_variadic=False)", + "prog.function_type(type=prog.void_type(), parameters=(TypeParameter(type=prog.int_type(name='int', size=4, is_signed=True), name='n'),), is_variadic=False)", ) self.assertRaises(TypeError, sizeof, t) - self.assertFalse(function_type(void_type(), (), False).is_variadic) - self.assertTrue(function_type(void_type(), (), True).is_variadic) + self.assertFalse( + self.prog.function_type(self.prog.void_type(), (), False).is_variadic + ) + self.assertTrue( + self.prog.function_type(self.prog.void_type(), (), True).is_variadic + ) - self.assertRaisesRegex(TypeError, "must be Type", function_type, None, ()) self.assertRaisesRegex( - TypeError, "must be sequence", function_type, void_type(), None + TypeError, "must be _drgn\.Type", self.prog.function_type, None, () ) self.assertRaisesRegex( - TypeError, "must be TypeParameter", function_type, void_type(), (4,) + TypeError, + "must be sequence", + self.prog.function_type, + self.prog.void_type(), + None, + ) + self.assertRaisesRegex( + TypeError, + "must be TypeParameter", + self.prog.function_type, + self.prog.void_type(), + (4,), ) def test_cycle(self): - t1 = struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t1), "next"),)) - t2 = struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t2), "next"),)) + t1 = self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t1), "next"),) + ) + t2 = self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t2), "next"),) + ) t3, t4 = ( - struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t4), "next"),)), - struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t3), "next"),)), + self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t4), "next"),) + ), + self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t3), "next"),) + ), ) self.assertEqual(t1, t2) self.assertEqual(t2, t3) @@ -1102,90 +1208,205 @@ def test_cycle(self): self.assertEqual( repr(t1), - "struct_type(tag='foo', size=8, members=(TypeMember(type=pointer_type(size=8, type=struct_type(tag='foo', ...)), name='next', bit_offset=0),))", + "prog.struct_type(tag='foo', size=8, members=(TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='foo', ...)), name='next', bit_offset=0),))", ) def test_cycle2(self): - t1 = struct_type( + t1 = self.prog.struct_type( "list_head", 16, ( - TypeMember(lambda: pointer_type(8, t1), "next"), - TypeMember(lambda: pointer_type(8, t1), "prev", 8), + TypeMember(lambda: self.prog.pointer_type(t1), "next"), + TypeMember(lambda: self.prog.pointer_type(t1), "prev", 8), ), ) - t2 = struct_type( + t2 = self.prog.struct_type( "list_head", 16, ( - TypeMember(lambda: pointer_type(8, t2), "next"), - TypeMember(lambda: pointer_type(8, t2), "prev", 8), + TypeMember(lambda: self.prog.pointer_type(t2), "next"), + TypeMember(lambda: self.prog.pointer_type(t2), "prev", 8), ), ) self.assertEqual(t1, t2) self.assertEqual( repr(t1), - "struct_type(tag='list_head', size=16, members=(TypeMember(type=pointer_type(size=8, type=struct_type(tag='list_head', ...)), name='next', bit_offset=0), TypeMember(type=pointer_type(size=8, type=struct_type(tag='list_head', ...)), name='prev', bit_offset=8)))", + "prog.struct_type(tag='list_head', size=16, members=(TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='list_head', ...)), name='next', bit_offset=0), TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='list_head', ...)), name='prev', bit_offset=8)))", ) def test_infinite(self): - f = lambda: struct_type("foo", 0, (TypeMember(f, "next"),)) + f = lambda: self.prog.struct_type("foo", 0, (TypeMember(f, "next"),)) self.assertEqual( repr(f()), - "struct_type(tag='foo', size=0, members=(TypeMember(type=struct_type(tag='foo', ...), name='next', bit_offset=0),))", + "prog.struct_type(tag='foo', size=0, members=(TypeMember(type=prog.struct_type(tag='foo', ...), name='next', bit_offset=0),))", ) with self.assertRaisesRegex(RecursionError, "maximum.*depth"): f() == f() def test_bad_thunk(self): - t1 = struct_type( + t1 = self.prog.struct_type( "foo", 16, (TypeMember(lambda: exec('raise Exception("test")'), "bar"),) ) with self.assertRaisesRegex(Exception, "test"): t1.members[0].type - t1 = struct_type("foo", 16, (TypeMember(lambda: 0, "bar"),)) + t1 = self.prog.struct_type("foo", 16, (TypeMember(lambda: 0, "bar"),)) with self.assertRaisesRegex(TypeError, "type callable must return Type"): t1.members[0].type def test_qualifiers(self): - self.assertEqual(void_type().qualifiers, Qualifiers(0)) + self.assertEqual(self.prog.void_type().qualifiers, Qualifiers(0)) - t = void_type(Qualifiers.CONST | Qualifiers.VOLATILE) + t = self.prog.void_type(qualifiers=Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual(t.qualifiers, Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual( - repr(t), "void_type(qualifiers=)" + repr(t), "prog.void_type(qualifiers=)" ) - self.assertEqual(t.qualified(Qualifiers.ATOMIC), void_type(Qualifiers.ATOMIC)) - self.assertEqual(t.unqualified(), void_type()) + self.assertEqual( + t.qualified(Qualifiers.ATOMIC), + self.prog.void_type(qualifiers=Qualifiers.ATOMIC), + ) + self.assertEqual(t.unqualified(), self.prog.void_type()) self.assertEqual(t.qualified(Qualifiers(0)), t.unqualified()) - self.assertRaisesRegex(TypeError, "expected Qualifiers or None", void_type, 1.5) + self.assertRaisesRegex( + TypeError, + "expected Qualifiers or None", + self.prog.void_type, + qualifiers=1.5, + ) def test_language(self): - self.assertEqual(void_type(language=None).language, DEFAULT_LANGUAGE) - self.assertEqual(void_type(language=Language.C).language, Language.C) + self.assertEqual(self.prog.void_type(language=None).language, DEFAULT_LANGUAGE) + self.assertEqual(self.prog.void_type(language=Language.C).language, Language.C) self.assertEqual( - int_type("int", 4, True, language=Language.CPP).language, Language.CPP + self.prog.int_type("int", 4, True, language=Language.CPP).language, + Language.CPP, ) self.assertNotEqual( - int_type("int", 4, True, language=Language.C), - int_type("int", 4, True, language=Language.CPP), + self.prog.int_type("int", 4, True, language=Language.C), + self.prog.int_type("int", 4, True, language=Language.CPP), + ) + + def test_language_repr(self): + self.assertEqual( + repr(self.prog.void_type(language=Language.CPP)), + "prog.void_type(language=Language.CPP)", ) def test_cmp(self): - self.assertEqual(void_type(), void_type()) - self.assertEqual(void_type(Qualifiers.CONST), void_type(Qualifiers.CONST)) - self.assertNotEqual(void_type(), void_type(Qualifiers.CONST)) - self.assertNotEqual(void_type(), int_type("int", 4, True)) - self.assertNotEqual(void_type(), 1) - self.assertNotEqual(1, void_type()) + self.assertEqual(self.prog.void_type(), self.prog.void_type()) + self.assertEqual( + self.prog.void_type(qualifiers=Qualifiers.CONST), + self.prog.void_type(qualifiers=Qualifiers.CONST), + ) + self.assertNotEqual( + self.prog.void_type(), self.prog.void_type(qualifiers=Qualifiers.CONST) + ) + self.assertNotEqual(self.prog.void_type(), self.prog.int_type("int", 4, True)) + self.assertNotEqual(self.prog.void_type(), 1) + self.assertNotEqual(1, self.prog.void_type()) + + def test_different_programs_compare(self): + self.assertRaisesRegex( + ValueError, + "types are from different programs", + operator.eq, + self.prog.void_type(), + Program().void_type(), + ) + + def test_different_programs_complex(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.complex_type, + "double _Complex", + 16, + Program().float_type("double", 8), + ) + + def test_different_programs_compound(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.struct_type, + None, + 4, + (TypeMember(Program().int_type("int", 4, True)),), + ) + + def test_different_programs_compound_callback(self): + with self.assertRaisesRegex(ValueError, "type is from different program"): + self.prog.struct_type( + None, 4, (TypeMember(lambda: Program().int_type("int", 4, True)),) + ).members[0].type + + def test_different_programs_enum(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.enum_type, + None, + Program().int_type("int", 4, True), + (), + ) + + def test_different_programs_typedef(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.typedef_type, + "INT", + Program().int_type("int", 4, True), + ) + def test_different_programs_pointer(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.pointer_type, + Program().int_type("int", 4, True), + ) -class TestTypeEnumerator(unittest.TestCase): + def test_different_programs_array(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.pointer_type, + Program().int_type("int", 4, True), + ) + + def test_different_programs_function_return(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.function_type, + Program().int_type("int", 4, True), + (), + ) + + def test_different_programs_function_parameter(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.function_type, + self.prog.void_type(), + (TypeParameter(Program().int_type("int", 4, True)),), + ) + + def test_different_programs_function_parameter_callback(self): + with self.assertRaisesRegex(ValueError, "type is from different program"): + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(lambda: Program().int_type("int", 4, True)),), + ).parameters[0].type + + +class TestTypeEnumerator(MockProgramTestCase): def test_init(self): e = TypeEnumerator("a", 1) self.assertEqual(e.name, "a") @@ -1211,128 +1432,142 @@ def test_cmp(self): self.assertNotEqual(TypeEnumerator("b", 1), TypeEnumerator("a", 1)) -class TestTypeMember(unittest.TestCase): +class TestTypeMember(MockProgramTestCase): def test_init(self): - m = TypeMember(void_type()) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type()) + self.assertEqual(m.type, self.prog.void_type()) self.assertIsNone(m.name) self.assertEqual(m.bit_offset, 0) self.assertEqual(m.offset, 0) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo") - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo") + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 0) self.assertEqual(m.offset, 0) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo", 8) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo", 8) + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 8) self.assertEqual(m.offset, 1) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo", 9, 7) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo", 9, 7) + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 9) self.assertRaises(ValueError, getattr, m, "offset") self.assertEqual(m.bit_field_size, 7) self.assertRaises(TypeError, TypeMember, None) - self.assertRaises(TypeError, TypeMember, void_type(), 1) - self.assertRaises(TypeError, TypeMember, void_type(), "foo", None) - self.assertRaises(TypeError, TypeMember, void_type(), "foo", 0, None) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), 1) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), "foo", None) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), "foo", 0, None) def test_callable(self): - m = TypeMember(void_type) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type) + self.assertEqual(m.type, self.prog.void_type()) - m = TypeMember(lambda: int_type("int", 4, True)) - self.assertEqual(m.type, int_type("int", 4, True)) + m = TypeMember(lambda: self.prog.int_type("int", 4, True)) + self.assertEqual(m.type, self.prog.int_type("int", 4, True)) m = TypeMember(lambda: None) self.assertRaises(TypeError, getattr, m, "type") def test_repr(self): - m = TypeMember(type=void_type, name="foo") + m = TypeMember(type=self.prog.void_type, name="foo") self.assertEqual( - repr(m), "TypeMember(type=void_type(), name='foo', bit_offset=0)" + repr(m), "TypeMember(type=prog.void_type(), name='foo', bit_offset=0)" ) - m = TypeMember(type=void_type, bit_field_size=4) + m = TypeMember(type=self.prog.void_type, bit_field_size=4) self.assertEqual( repr(m), - "TypeMember(type=void_type(), name=None, bit_offset=0, bit_field_size=4)", + "TypeMember(type=prog.void_type(), name=None, bit_offset=0, bit_field_size=4)", ) m = TypeMember(lambda: None) self.assertRaises(TypeError, repr, m) def test_cmp(self): - self.assertEqual(TypeMember(void_type()), TypeMember(void_type(), None, 0, 0)) self.assertEqual( - TypeMember(bit_offset=9, bit_field_size=7, type=void_type, name="foo"), - TypeMember(void_type(), "foo", 9, 7), + TypeMember(self.prog.void_type()), + TypeMember(self.prog.void_type(), None, 0, 0), + ) + self.assertEqual( + TypeMember( + bit_offset=9, bit_field_size=7, type=self.prog.void_type, name="foo" + ), + TypeMember(self.prog.void_type(), "foo", 9, 7), ) self.assertNotEqual( - TypeMember(int_type("int", 4, True)), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.int_type("int", 4, True)), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), "foo"), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.void_type(), "foo"), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), bit_offset=8), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.void_type(), bit_offset=8), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), bit_field_size=8), - TypeMember(void_type(), None, 0, 0), + TypeMember(self.prog.void_type(), bit_field_size=8), + TypeMember(self.prog.void_type(), None, 0, 0), ) -class TestTypeParameter(unittest.TestCase): +class TestTypeParameter(MockProgramTestCase): def test_init(self): - p = TypeParameter(void_type()) - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type()) + self.assertEqual(p.type, self.prog.void_type()) self.assertIsNone(p.name) - p = TypeParameter(void_type(), "foo") - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type(), "foo") + self.assertEqual(p.type, self.prog.void_type()) self.assertEqual(p.name, "foo") self.assertRaises(TypeError, TypeParameter, None) - self.assertRaises(TypeError, TypeParameter, void_type(), 1) + self.assertRaises(TypeError, TypeParameter, self.prog.void_type(), 1) def test_callable(self): - p = TypeParameter(void_type) - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type) + self.assertEqual(p.type, self.prog.void_type()) - p = TypeParameter(lambda: int_type("int", 4, True)) - self.assertEqual(p.type, int_type("int", 4, True)) + p = TypeParameter(lambda: self.prog.int_type("int", 4, True)) + self.assertEqual(p.type, self.prog.int_type("int", 4, True)) p = TypeParameter(lambda: None) self.assertRaises(TypeError, getattr, p, "type") def test_repr(self): - p = TypeParameter(type=void_type, name="foo") - self.assertEqual(repr(p), "TypeParameter(type=void_type(), name='foo')") + p = TypeParameter(type=self.prog.void_type, name="foo") + self.assertEqual(repr(p), "TypeParameter(type=prog.void_type(), name='foo')") - p = TypeParameter(type=void_type) - self.assertEqual(repr(p), "TypeParameter(type=void_type(), name=None)") + p = TypeParameter(type=self.prog.void_type) + self.assertEqual(repr(p), "TypeParameter(type=prog.void_type(), name=None)") p = TypeParameter(lambda: None) self.assertRaises(TypeError, repr, p) def test_cmp(self): - self.assertEqual(TypeParameter(void_type()), TypeParameter(void_type(), None)) self.assertEqual( - TypeParameter(name="foo", type=void_type), TypeParameter(void_type(), "foo") + TypeParameter(self.prog.void_type()), + TypeParameter(self.prog.void_type(), None), + ) + self.assertEqual( + TypeParameter(name="foo", type=self.prog.void_type), + TypeParameter(self.prog.void_type(), "foo"), ) self.assertNotEqual( - TypeParameter(int_type("int", 4, True)), TypeParameter(void_type(), None) + TypeParameter(self.prog.int_type("int", 4, True)), + TypeParameter(self.prog.void_type(), None), ) self.assertNotEqual( - TypeParameter(void_type(), "foo"), TypeParameter(void_type(), None) + TypeParameter(self.prog.void_type(), "foo"), + TypeParameter(self.prog.void_type(), None), ) From e49a87a3d77823e188b231c243c9f3a7ab0fe77b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Aug 2020 11:26:39 -0700 Subject: [PATCH 27/56] libdrgn: remove struct drgn_object::prog We can get it via the type now. Signed-off-by: Omar Sandoval --- libdrgn/arch_x86_64.c.in | 4 +- libdrgn/drgn.h.in | 8 ++- libdrgn/language_c.c | 65 +++++++++++--------- libdrgn/linux_kernel_helpers.c | 71 +++++++++++----------- libdrgn/object.c | 94 +++++++++++++++-------------- libdrgn/program.c | 2 +- libdrgn/python/drgnpy.h | 2 +- libdrgn/python/object.c | 105 +++++++++++++++++---------------- libdrgn/stack_trace.c | 6 +- 9 files changed, 191 insertions(+), 166 deletions(-) diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in index 82cb49440..1bec2e2c1 100644 --- a/libdrgn/arch_x86_64.c.in +++ b/libdrgn/arch_x86_64.c.in @@ -208,7 +208,7 @@ set_initial_registers_inactive_task_frame(Dwfl_Thread *thread, Dwarf_Word dwarf_regs[5]; uint64_t sp; - drgn_object_init(®_obj, frame_obj->prog); + drgn_object_init(®_obj, drgn_object_program(frame_obj)); err = read_register(®_obj, frame_obj, "bx", &dwarf_regs[0]); if (err) @@ -263,7 +263,7 @@ linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread, const struct drgn_object *task_obj) { struct drgn_error *err; - struct drgn_program *prog = task_obj->prog; + struct drgn_program *prog = drgn_object_program(task_obj); struct drgn_object sp_obj; drgn_object_init(&sp_obj, prog); diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index fd10a358a..1742212b2 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -1635,8 +1635,6 @@ static inline bool drgn_value_is_inline(uint64_t bit_size, uint64_t bit_offset) * provided functions. */ struct drgn_object { - /** Program that this object belongs to. */ - struct drgn_program *prog; /** Type of this object. */ struct drgn_type *type; /** @@ -1734,6 +1732,12 @@ void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog); */ void drgn_object_deinit(struct drgn_object *obj); +/** Get the program that a @ref drgn_object is from. */ +static inline struct drgn_program * +drgn_object_program(const struct drgn_object *obj) +{ + return drgn_type_program(obj->type); +} /** Get the language of a @ref drgn_object from its type. */ static inline const struct drgn_language * diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 02e712a3d..a13f5d3cd 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1159,7 +1159,7 @@ c_format_compound_object(const struct drgn_object *obj, new->member < new->end) { struct drgn_object member; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); do { struct drgn_qualified_type member_type; bool zero; @@ -1187,7 +1187,7 @@ c_format_compound_object(const struct drgn_object *obj, return err; } - err = c_format_initializer(obj->prog, &iter.iter, indent, + err = c_format_initializer(drgn_object_program(obj), &iter.iter, indent, one_line_columns, multi_line_columns, flags & DRGN_FORMAT_OBJECT_MEMBERS_SAME_LINE, sb); @@ -1291,7 +1291,7 @@ c_format_pointer_object(const struct drgn_object *obj, return err; have_symbol = ((flags & DRGN_FORMAT_OBJECT_SYMBOLIZE) && - drgn_program_find_symbol_by_address_internal(obj->prog, + drgn_program_find_symbol_by_address_internal(drgn_object_program(obj), uvalue, NULL, &sym)); @@ -1316,12 +1316,12 @@ c_format_pointer_object(const struct drgn_object *obj, return &drgn_enomem; if (c_string) { - err = c_format_string(&obj->prog->reader, uvalue, UINT64_MAX, - sb); + err = c_format_string(&drgn_object_program(obj)->reader, uvalue, + UINT64_MAX, sb); } else { struct drgn_object dereferenced; - drgn_object_init(&dereferenced, obj->prog); + drgn_object_init(&dereferenced, drgn_object_program(obj)); err = drgn_object_dereference(&dereferenced, obj); if (err) { drgn_object_deinit(&dereferenced); @@ -1451,7 +1451,7 @@ c_format_array_object(const struct drgn_object *obj, if ((flags & DRGN_FORMAT_OBJECT_STRING) && iter.length && is_character_type(iter.element_type.type)) { if (obj->is_reference) { - return c_format_string(&obj->prog->reader, + return c_format_string(&drgn_object_program(obj)->reader, obj->reference.address, iter.length, sb); } else { @@ -1491,7 +1491,7 @@ c_format_array_object(const struct drgn_object *obj, iter.length) { struct drgn_object element; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); do { bool zero; @@ -1515,8 +1515,9 @@ c_format_array_object(const struct drgn_object *obj, if (err) return err; } - return c_format_initializer(obj->prog, &iter.iter, indent, - one_line_columns, multi_line_columns, + return c_format_initializer(drgn_object_program(obj), &iter.iter, + indent, one_line_columns, + multi_line_columns, flags & DRGN_FORMAT_OBJECT_ELEMENTS_SAME_LINE, sb); } @@ -2707,7 +2708,8 @@ struct drgn_error *c_integer_literal(struct drgn_object *res, uint64_t uvalue) bits = fls(uvalue); qualified_type.qualifiers = 0; for (i = 0; i < ARRAY_SIZE(types); i++) { - err = drgn_program_find_primitive_type(res->prog, types[i], + err = drgn_program_find_primitive_type(drgn_object_program(res), + types[i], &qualified_type.type); if (err) return err; @@ -2732,7 +2734,8 @@ struct drgn_error *c_bool_literal(struct drgn_object *res, bool bvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_program_find_primitive_type(res->prog, DRGN_C_TYPE_INT, + err = drgn_program_find_primitive_type(drgn_object_program(res), + DRGN_C_TYPE_INT, &qualified_type.type); if (err) return err; @@ -2745,7 +2748,8 @@ struct drgn_error *c_float_literal(struct drgn_object *res, double fvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_program_find_primitive_type(res->prog, DRGN_C_TYPE_DOUBLE, + err = drgn_program_find_primitive_type(drgn_object_program(res), + DRGN_C_TYPE_DOUBLE, &qualified_type.type); if (err) return err; @@ -3132,10 +3136,11 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, switch (drgn_type_kind(type_ret->underlying_type)) { case DRGN_TYPE_ARRAY: { uint8_t word_size; - err = drgn_program_word_size(obj->prog, &word_size); + err = drgn_program_word_size(drgn_object_program(obj), + &word_size); if (err) return err; - err = drgn_pointer_type_create(obj->prog, + err = drgn_pointer_type_create(drgn_object_program(obj), drgn_type_type(type_ret->underlying_type), word_size, drgn_type_language(type_ret->underlying_type), @@ -3151,11 +3156,12 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, .qualifiers = type_ret->qualifiers, }; uint8_t word_size; - err = drgn_program_word_size(obj->prog, &word_size); + err = drgn_program_word_size(drgn_object_program(obj), + &word_size); if (err) return err; - err = drgn_pointer_type_create(obj->prog, function_type, - word_size, + err = drgn_pointer_type_create(drgn_object_program(obj), + function_type, word_size, drgn_type_language(type_ret->underlying_type), &type_ret->type); if (err) @@ -3267,8 +3273,8 @@ struct drgn_error *c_op_cmp(const struct drgn_object *lhs, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, - &type); + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, + &rhs_type, &type); if (err) return err; @@ -3309,8 +3315,8 @@ struct drgn_error *c_op_add(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, - &type); + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, + &rhs_type, &type); if (err) return err; @@ -3340,7 +3346,7 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (lhs_pointer && rhs_pointer) { struct drgn_object_type type = {}; - err = drgn_program_find_primitive_type(lhs->prog, + err = drgn_program_find_primitive_type(drgn_object_program(lhs), DRGN_C_TYPE_PTRDIFF_T, &type.type); if (err) @@ -3361,8 +3367,8 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, - &type); + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, + &rhs_type, &type); if (err) return err; @@ -3392,7 +3398,8 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary "#op, &lhs_type, \ &rhs_type); \ \ - err = c_common_real_type(lhs->prog, &lhs_type, &rhs_type, &type); \ + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, \ + &rhs_type, &type); \ if (err) \ return err; \ \ @@ -3425,10 +3432,10 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary " #op, &lhs_type, \ &rhs_type); \ \ - err = c_integer_promotions(lhs->prog, &lhs_type); \ + err = c_integer_promotions(drgn_object_program(lhs), &lhs_type); \ if (err) \ return err; \ - err = c_integer_promotions(lhs->prog, &rhs_type); \ + err = c_integer_promotions(drgn_object_program(lhs), &rhs_type); \ if (err) \ return err; \ \ @@ -3451,7 +3458,7 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ if (!drgn_type_is_##check(type.underlying_type)) \ return drgn_error_unary_op("unary " #op, &type); \ \ - err = c_integer_promotions(obj->prog, &type); \ + err = c_integer_promotions(drgn_object_program(obj), &type); \ if (err) \ return err; \ \ diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index dedaed776..83c6a0bb0 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -108,14 +108,15 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, struct drgn_member_info member; struct drgn_qualified_type node_type; - drgn_object_init(&node, res->prog); - drgn_object_init(&tmp, res->prog); + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); /* node = root->xa_head */ err = drgn_object_member_dereference(&node, root, "xa_head"); if (!err) { - err = drgn_program_find_type(res->prog, "struct xa_node *", - NULL, &node_type); + err = drgn_program_find_type(drgn_object_program(res), + "struct xa_node *", NULL, + &node_type); if (err) goto out; RADIX_TREE_INTERNAL_NODE = 2; @@ -125,14 +126,14 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, err = drgn_object_member_dereference(&node, root, "rnode"); if (err) goto out; - err = drgn_program_find_type(res->prog, "void *", NULL, - &node_type); + err = drgn_program_find_type(drgn_object_program(res), "void *", + NULL, &node_type); if (err) goto out; err = drgn_object_cast(&node, node_type, &node); if (err) goto out; - err = drgn_program_find_type(res->prog, + err = drgn_program_find_type(drgn_object_program(res), "struct radix_tree_node *", NULL, &node_type); if (err) @@ -142,7 +143,7 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, goto out; } - err = drgn_program_member_info(res->prog, + err = drgn_program_member_info(drgn_object_program(res), drgn_type_type(node_type.type).type, "slots", &member); if (err) @@ -204,7 +205,7 @@ struct drgn_error *linux_helper_idr_find(struct drgn_object *res, struct drgn_error *err; struct drgn_object tmp; - drgn_object_init(&tmp, res->prog); + drgn_object_init(&tmp, drgn_object_program(res)); /* id -= idr->idr_base */ err = drgn_object_member_dereference(&tmp, idr, "idr_base"); @@ -255,29 +256,29 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, union drgn_value ns_level, pidhash_shift; uint64_t i; - err = drgn_program_find_type(res->prog, "struct pid *", NULL, - &pidp_type); + err = drgn_program_find_type(drgn_object_program(res), "struct pid *", + NULL, &pidp_type); if (err) return err; - err = drgn_program_find_type(res->prog, "struct upid", NULL, - &upid_type); + err = drgn_program_find_type(drgn_object_program(res), "struct upid", + NULL, &upid_type); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "pid_chain", - &pid_chain_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "pid_chain", &pid_chain_member); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "nr", - &nr_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "nr", &nr_member); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "ns", - &ns_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "ns", &ns_member); if (err) return err; - drgn_object_init(&node, res->prog); - drgn_object_init(&tmp, res->prog); + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); err = drgn_object_read(&tmp, ns); if (err) @@ -293,7 +294,8 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, goto out; /* i = 1 << pidhash_shift */ - err = drgn_program_find_object(res->prog, "pidhash_shift", NULL, + err = drgn_program_find_object(drgn_object_program(res), + "pidhash_shift", NULL, DRGN_FIND_OBJECT_ANY, &tmp); if (err) goto out; @@ -383,7 +385,7 @@ struct drgn_error *linux_helper_find_pid(struct drgn_object *res, struct drgn_error *err; struct drgn_object tmp; - drgn_object_init(&tmp, res->prog); + drgn_object_init(&tmp, drgn_object_program(res)); /* (struct pid *)idr_find(&ns->idr, pid) */ err = drgn_object_member_dereference(&tmp, ns, "idr"); @@ -396,14 +398,16 @@ struct drgn_error *linux_helper_find_pid(struct drgn_object *res, err = linux_helper_idr_find(&tmp, &tmp, pid); if (err) goto out; - err = drgn_program_find_type(res->prog, "struct pid *", NULL, + err = drgn_program_find_type(drgn_object_program(res), + "struct pid *", NULL, &qualified_type); if (err) goto out; err = drgn_object_cast(res, qualified_type, &tmp); } else if (err->code == DRGN_ERROR_LOOKUP) { drgn_error_destroy(err); - err = drgn_program_find_object(res->prog, "pid_hash", NULL, + err = drgn_program_find_object(drgn_object_program(res), + "pid_hash", NULL, DRGN_FIND_OBJECT_ANY, &tmp); if (err) goto out; @@ -425,9 +429,10 @@ struct drgn_error *linux_helper_pid_task(struct drgn_object *res, struct drgn_object first; char member[64]; - drgn_object_init(&first, res->prog); + drgn_object_init(&first, drgn_object_program(res)); - err = drgn_program_find_type(res->prog, "struct task_struct *", NULL, + err = drgn_program_find_type(drgn_object_program(res), + "struct task_struct *", NULL, &task_structp_type); if (err) goto out; @@ -482,14 +487,14 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, struct drgn_object pid_type_obj; union drgn_value pid_type; - drgn_object_init(&pid_obj, res->prog); - drgn_object_init(&pid_type_obj, res->prog); + drgn_object_init(&pid_obj, drgn_object_program(res)); + drgn_object_init(&pid_type_obj, drgn_object_program(res)); err = linux_helper_find_pid(&pid_obj, ns, pid); if (err) goto out; - err = drgn_program_find_object(res->prog, "PIDTYPE_PID", NULL, - DRGN_FIND_OBJECT_CONSTANT, + err = drgn_program_find_object(drgn_object_program(res), "PIDTYPE_PID", + NULL, DRGN_FIND_OBJECT_CONSTANT, &pid_type_obj); if (err) goto out; @@ -506,7 +511,7 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, static struct drgn_error *cache_task_state_chars(struct drgn_object *tmp) { struct drgn_error *err; - struct drgn_program *prog = tmp->prog; + struct drgn_program *prog = drgn_object_program(tmp); struct drgn_object task_state_array; uint64_t length; size_t i; @@ -582,7 +587,7 @@ linux_helper_task_state_to_char(const struct drgn_object *task, char *ret) { static const uint64_t TASK_NOLOAD = 0x400; struct drgn_error *err; - struct drgn_program *prog = task->prog; + struct drgn_program *prog = drgn_object_program(task); struct drgn_object tmp; union drgn_value task_state, exit_state; uint64_t state; diff --git a/libdrgn/object.c b/libdrgn/object.c index 7634ded8b..498cd8eb6 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -17,7 +17,6 @@ LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) { - obj->prog = prog; obj->type = drgn_void_type(prog, NULL); obj->bit_size = 0; obj->qualifiers = 0; @@ -356,8 +355,8 @@ drgn_object_set_buffer(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -377,7 +376,8 @@ drgn_object_set_reference_internal(struct drgn_object *res, uint64_t bit_offset, bool little_endian) { bool is_64_bit; - struct drgn_error *err = drgn_program_is_64_bit(res->prog, &is_64_bit); + struct drgn_error *err = + drgn_program_is_64_bit(drgn_object_program(res), &is_64_bit); if (err) return err; @@ -413,8 +413,8 @@ drgn_object_set_reference(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -433,7 +433,7 @@ drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj) if (res == obj) return NULL; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -527,7 +527,7 @@ drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, enum drgn_object_kind kind; uint64_t bit_size; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -590,8 +590,8 @@ drgn_object_read_reference(const struct drgn_object *obj, if (!buf) return &drgn_enomem; } - err = drgn_memory_reader_read(&obj->prog->reader, buf, - obj->reference.address, size, + err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, + buf, obj->reference.address, size, false); if (err) { if (buf != value->ibuf) @@ -607,8 +607,8 @@ drgn_object_read_reference(const struct drgn_object *obj, char buf[9]; assert(size <= sizeof(buf)); - err = drgn_memory_reader_read(&obj->prog->reader, buf, - obj->reference.address, size, + err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, + buf, obj->reference.address, size, false); if (err) return err; @@ -627,7 +627,7 @@ drgn_object_read(struct drgn_object *res, const struct drgn_object *obj) if (obj->is_reference) { union drgn_value value; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -817,8 +817,8 @@ drgn_object_read_c_string(const struct drgn_object *obj, char **ret) obj->type); } - return drgn_program_read_c_string(obj->prog, address, false, max_size, - ret); + return drgn_program_read_c_string(drgn_object_program(obj), address, + false, max_size, ret); } LIBDRGN_PUBLIC struct drgn_error * @@ -931,7 +931,7 @@ drgn_compound_object_is_zero(const struct drgn_object *obj, struct drgn_type_member *members; size_t num_members, i; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); members = drgn_type_members(underlying_type); num_members = drgn_type_num_members(underlying_type); for (i = 0; i < num_members; i++) { @@ -972,7 +972,7 @@ drgn_array_object_is_zero(const struct drgn_object *obj, if (err) return err; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); length = drgn_type_length(underlying_type); for (i = 0; i < length; i++) { err = drgn_object_slice(&element, obj, element_type, @@ -1065,7 +1065,7 @@ drgn_object_cast(struct drgn_object *res, { const struct drgn_language *lang = drgn_type_language(qualified_type.type); - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1084,13 +1084,13 @@ drgn_object_reinterpret(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -1123,24 +1123,24 @@ drgn_object_reinterpret(struct drgn_object *res, LIBDRGN_PUBLIC struct drgn_error * drgn_object_integer_literal(struct drgn_object *res, uint64_t uvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->integer_literal(res, uvalue); } LIBDRGN_PUBLIC struct drgn_error * drgn_object_bool_literal(struct drgn_object *res, bool bvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->bool_literal(res, bvalue); } LIBDRGN_PUBLIC struct drgn_error * drgn_object_float_literal(struct drgn_object *res, double fvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->float_literal(res, fvalue); } @@ -1157,8 +1157,7 @@ LIBDRGN_PUBLIC struct drgn_error *drgn_object_cmp(const struct drgn_object *lhs, int *ret) { const struct drgn_language *lang = drgn_object_language(lhs); - - if (lhs->prog != rhs->prog) { + if (drgn_object_program(lhs) != drgn_object_program(rhs)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1224,7 +1223,8 @@ drgn_object_##op_name(struct drgn_object *res, const struct drgn_object *lhs, \ { \ const struct drgn_language *lang = drgn_object_language(lhs); \ \ - if (lhs->prog != res->prog || rhs->prog != res->prog) { \ + if (drgn_object_program(lhs) != drgn_object_program(res) || \ + drgn_object_program(rhs) != drgn_object_program(res)) { \ return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ "objects are from different programs");\ } \ @@ -1253,7 +1253,7 @@ drgn_object_##op_name(struct drgn_object *res, const struct drgn_object *obj) \ { \ const struct drgn_language *lang = drgn_object_language(obj); \ \ - if (res->prog != obj->prog) { \ + if (drgn_object_program(res) != drgn_object_program(obj)) { \ return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ "objects are from different programs");\ } \ @@ -1272,7 +1272,7 @@ UNARY_OP(not) LIBDRGN_PUBLIC struct drgn_error * drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) { - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1290,11 +1290,13 @@ drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) struct drgn_qualified_type qualified_type = drgn_object_qualified_type(obj); uint8_t word_size; - struct drgn_error *err = drgn_program_word_size(obj->prog, &word_size); + struct drgn_error *err = + drgn_program_word_size(drgn_object_program(obj), &word_size); if (err) return err; struct drgn_qualified_type result_type; - err = drgn_pointer_type_create(obj->prog, qualified_type, word_size, + err = drgn_pointer_type_create(drgn_object_program(obj), qualified_type, + word_size, drgn_type_language(qualified_type.type), &result_type.type); if (err) @@ -1311,12 +1313,13 @@ drgn_object_subscript(struct drgn_object *res, const struct drgn_object *obj, struct drgn_error *err; struct drgn_element_info element; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_program_element_info(obj->prog, obj->type, &element); + err = drgn_program_element_info(drgn_object_program(obj), obj->type, + &element); if (err) return err; @@ -1338,13 +1341,13 @@ drgn_object_member(struct drgn_object *res, const struct drgn_object *obj, struct drgn_error *err; struct drgn_member_info member; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_program_member_info(obj->prog, obj->type, member_name, - &member); + err = drgn_program_member_info(drgn_object_program(obj), obj->type, + member_name, &member); if (err) return err; return drgn_object_slice(res, obj, member.qualified_type, @@ -1360,7 +1363,7 @@ struct drgn_error *drgn_object_member_dereference(struct drgn_object *res, struct drgn_member_value *member; struct drgn_qualified_type qualified_type; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1371,7 +1374,7 @@ struct drgn_error *drgn_object_member_dereference(struct drgn_object *res, obj->type); } - err = drgn_program_find_member(obj->prog, + err = drgn_program_find_member(drgn_object_program(obj), drgn_type_type(underlying_type).type, member_name, strlen(member_name), &member); @@ -1392,7 +1395,7 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, struct drgn_qualified_type qualified_type, const char *member_designator) { - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1405,7 +1408,7 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, const struct drgn_language *lang = drgn_object_language(obj); uint64_t bit_offset; - struct drgn_error *err = lang->bit_offset(obj->prog, + struct drgn_error *err = lang->bit_offset(drgn_object_program(obj), qualified_type.type, member_designator, &bit_offset); @@ -1422,11 +1425,12 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, return err; uint8_t word_size; - err = drgn_program_word_size(obj->prog, &word_size); + err = drgn_program_word_size(drgn_object_program(obj), &word_size); if (err) return err; struct drgn_qualified_type result_type; - err = drgn_pointer_type_create(obj->prog, qualified_type, word_size, + err = drgn_pointer_type_create(drgn_object_program(obj), qualified_type, + word_size, drgn_type_language(qualified_type.type), &result_type.type); if (err) diff --git a/libdrgn/program.c b/libdrgn/program.c index 0f0d724ed..6f858ada8 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1128,7 +1128,7 @@ drgn_program_find_object(struct drgn_program *prog, const char *name, enum drgn_find_object_flags flags, struct drgn_object *ret) { - if (ret && ret->prog != prog) { + if (ret && drgn_object_program(ret) != prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "object is from wrong program"); } diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 1fd6f892b..fce638da2 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -191,7 +191,7 @@ static inline DrgnObject *DrgnObject_alloc(Program *prog) } static inline Program *DrgnObject_prog(DrgnObject *obj) { - return container_of(obj->obj.prog, Program, prog); + return container_of(drgn_object_program(&obj->obj), Program, prog); } PyObject *DrgnObject_NULL(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *cast(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 4e9b6d9b7..73301d764 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -312,7 +312,8 @@ static int buffer_object_from_value(struct drgn_object *res, uint64_t bit_size, size; char *buf; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &value.little_endian); if (err) { set_drgn_error(err); @@ -354,9 +355,9 @@ static int buffer_object_from_value(struct drgn_object *res, memset(buf, 0, size); value.bit_offset = bit_offset; - if (serialize_py_object(res->prog, buf, bit_offset + bit_size, - bit_offset, value_obj, &type, - value.little_endian) == -1) { + if (serialize_py_object(drgn_object_program(res), buf, + bit_offset + bit_size, bit_offset, value_obj, + &type, value.little_endian) == -1) { if (buf != value.ibuf) free(buf); return -1; @@ -590,7 +591,7 @@ static PyObject *DrgnObject_compound_value(struct drgn_object *obj, if (!dict) return NULL; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); members = drgn_type_members(underlying_type); num_members = drgn_type_num_members(underlying_type); for (i = 0; i < num_members; i++) { @@ -662,7 +663,7 @@ static PyObject *DrgnObject_array_value(struct drgn_object *obj, if (!list) return NULL; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); for (i = 0; i < length; i++) { PyObject *element_value; @@ -874,9 +875,9 @@ static PyObject *DrgnObject_repr(DrgnObject *self) little_endian = self->obj.value.little_endian; bool print_byteorder; - if (self->obj.prog->has_platform) { + if (drgn_object_program(&self->obj)->has_platform) { bool prog_little_endian; - err = drgn_program_is_little_endian(self->obj.prog, + err = drgn_program_is_little_endian(drgn_object_program(&self->obj), &prog_little_endian); if (err) { set_drgn_error(err); @@ -1086,53 +1087,55 @@ static int DrgnObject_binary_operand(PyObject *self, PyObject *other, } else { *obj = tmp; /* If self isn't a DrgnObject, then other must be. */ - drgn_object_init(tmp, ((DrgnObject *)other)->obj.prog); + drgn_object_init(tmp, + drgn_object_program(&((DrgnObject *)other)->obj)); return DrgnObject_literal(tmp, self); } } -#define DrgnObject_BINARY_OP(op) \ -static PyObject *DrgnObject_##op(PyObject *left, PyObject *right) \ -{ \ - struct drgn_error *err; \ - struct drgn_object *lhs, lhs_tmp, *rhs, rhs_tmp; \ - DrgnObject *res = NULL; \ - int ret; \ - \ - ret = DrgnObject_binary_operand(left, right, &lhs, &lhs_tmp); \ - if (ret) \ - goto out; \ - ret = DrgnObject_binary_operand(right, left, &rhs, &rhs_tmp); \ - if (ret) \ - goto out_lhs; \ - \ - res = DrgnObject_alloc(container_of(lhs->prog, Program, prog)); \ - if (!res) { \ - ret = -1; \ - goto out_rhs; \ - } \ - \ - err = drgn_object_##op(&res->obj, lhs, rhs); \ - if (err) { \ - set_drgn_error(err); \ - Py_DECREF(res); \ - ret = -1; \ - goto out_rhs; \ - } \ - \ -out_rhs: \ - if (rhs == &rhs_tmp) \ - drgn_object_deinit(&rhs_tmp); \ -out_lhs: \ - if (lhs == &lhs_tmp) \ - drgn_object_deinit(&lhs_tmp); \ -out: \ - if (ret == -1) \ - return NULL; \ - else if (ret) \ - Py_RETURN_NOTIMPLEMENTED; \ - else \ - return (PyObject *)res; \ +#define DrgnObject_BINARY_OP(op) \ +static PyObject *DrgnObject_##op(PyObject *left, PyObject *right) \ +{ \ + struct drgn_error *err; \ + struct drgn_object *lhs, lhs_tmp, *rhs, rhs_tmp; \ + DrgnObject *res = NULL; \ + int ret; \ + \ + ret = DrgnObject_binary_operand(left, right, &lhs, &lhs_tmp); \ + if (ret) \ + goto out; \ + ret = DrgnObject_binary_operand(right, left, &rhs, &rhs_tmp); \ + if (ret) \ + goto out_lhs; \ + \ + res = DrgnObject_alloc(container_of(drgn_object_program(lhs), Program, \ + prog)); \ + if (!res) { \ + ret = -1; \ + goto out_rhs; \ + } \ + \ + err = drgn_object_##op(&res->obj, lhs, rhs); \ + if (err) { \ + set_drgn_error(err); \ + Py_DECREF(res); \ + ret = -1; \ + goto out_rhs; \ + } \ + \ +out_rhs: \ + if (rhs == &rhs_tmp) \ + drgn_object_deinit(&rhs_tmp); \ +out_lhs: \ + if (lhs == &lhs_tmp) \ + drgn_object_deinit(&lhs_tmp); \ +out: \ + if (ret == -1) \ + return NULL; \ + else if (ret) \ + Py_RETURN_NOTIMPLEMENTED; \ + else \ + return (PyObject *)res; \ } DrgnObject_BINARY_OP(add) DrgnObject_BINARY_OP(sub) diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 4224e9a05..a5266a13e 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -553,8 +553,10 @@ drgn_object_stack_trace(const struct drgn_object *obj, err = drgn_object_read_integer(obj, &value); if (err) return err; - return drgn_get_stack_trace(obj->prog, value.uvalue, NULL, ret); + return drgn_get_stack_trace(drgn_object_program(obj), + value.uvalue, NULL, ret); } else { - return drgn_get_stack_trace(obj->prog, 0, obj, ret); + return drgn_get_stack_trace(drgn_object_program(obj), 0, obj, + ret); } } From 2fc514f2a410bc3e9583cdca7a97660990bf382d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Aug 2020 17:25:30 -0700 Subject: [PATCH 28/56] libdrgn/python: add Qualifiers.NONE and stop using Optional[Qualifiers] I originally did it this way because pydoc doesn't handle non-trivial defaults in signature very well (see commit 67a16a09b8d8 ("tests: test that Python documentation renders")). drgndoc doesn't generate signature for pydoc anymore, though, so we don't need to worry about it and can clean up the typing. Signed-off-by: Omar Sandoval --- _drgn.pyi | 39 ++++++++++++++++-------------- libdrgn/build-aux/gen_constants.py | 37 +++++++++++++++++++++------- libdrgn/python/drgnpy.h | 1 - libdrgn/python/type.c | 30 +++++++++++------------ tests/test_type.py | 5 +--- 5 files changed, 64 insertions(+), 48 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index b593c7fa4..116c0d963 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -443,7 +443,7 @@ class Program: def void_type( self, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -459,7 +459,7 @@ class Program: size: IntegerLike, is_signed: bool, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -477,7 +477,7 @@ class Program: name: str, size: IntegerLike, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -494,7 +494,7 @@ class Program: name: str, size: IntegerLike, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -512,7 +512,7 @@ class Program: size: IntegerLike, type: Type, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -532,7 +532,7 @@ class Program: size: IntegerLike, members: Sequence[TypeMember], *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -552,7 +552,7 @@ class Program: size: None = None, members: None = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete structure type.""" @@ -564,7 +564,7 @@ class Program: size: IntegerLike, members: Sequence[TypeMember], *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -579,7 +579,7 @@ class Program: size: None = None, members: None = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete union type.""" @@ -591,7 +591,7 @@ class Program: size: IntegerLike, members: Sequence[TypeMember], *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -606,7 +606,7 @@ class Program: size: None = None, members: None = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete class type.""" @@ -618,7 +618,7 @@ class Program: type: Type, enumerators: Sequence[TypeEnumerator], *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -638,7 +638,7 @@ class Program: type: None = None, enumerators: None = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete enumerated type.""" @@ -648,7 +648,7 @@ class Program: name: str, type: Type, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -665,7 +665,7 @@ class Program: type: Type, size: Optional[int] = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -685,7 +685,7 @@ class Program: type: Type, length: Optional[int] = None, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -703,7 +703,7 @@ class Program: parameters: Sequence[TypeParameter], is_variadic: bool = False, *, - qualifiers: Optional[Qualifiers] = None, + qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ @@ -1525,7 +1525,7 @@ class Type: is always ``True``. """ ... - def qualified(self, qualifiers: Optional[Qualifiers]) -> Type: + def qualified(self, qualifiers: Qualifiers) -> Type: """ Get a copy of this type with different qualifiers. @@ -1709,6 +1709,9 @@ class PrimitiveType(enum.Enum): class Qualifiers(enum.Flag): """``Qualifiers`` are modifiers on types.""" + NONE = ... + """No qualifiers.""" + CONST = ... """Constant type.""" diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 354ae78f5..5a589c483 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -6,8 +6,14 @@ import sys -def gen_constant_class(drgn_h, output_file, class_name, enum_class, regex): - matches = re.findall(r"^\s*(" + regex + r")\s*[=,]", drgn_h, flags=re.MULTILINE) +def gen_constant_class(drgn_h, output_file, class_name, enum_class, constants, regex): + constants = list(constants) + constants.extend( + ("_".join(groups[1:]), groups[0]) + for groups in re.findall( + r"^\s*(" + regex + r")\s*[=,]", drgn_h, flags=re.MULTILINE + ) + ) output_file.write( f""" static int add_{class_name}(PyObject *m, PyObject *enum_module) @@ -15,15 +21,15 @@ def gen_constant_class(drgn_h, output_file, class_name, enum_class, regex): PyObject *tmp, *item; int ret = -1; - tmp = PyList_New({len(matches)}); + tmp = PyList_New({len(constants)}); if (!tmp) goto out; """ ) - for i, groups in enumerate(matches): + for i, (name, value) in enumerate(constants): output_file.write( f"""\ - item = Py_BuildValue("sk", "{'_'.join(groups[1:])}", {groups[0]}); + item = Py_BuildValue("sk", "{name}", {value}); if (!item) goto out; PyList_SET_ITEM(tmp, {i}, item); @@ -72,23 +78,30 @@ def gen_constants(input_file, output_file): """ ) gen_constant_class( - drgn_h, output_file, "Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)" + drgn_h, output_file, "Architecture", "Enum", (), r"DRGN_ARCH_([a-zA-Z0-9_]+)" ) gen_constant_class( drgn_h, output_file, "FindObjectFlags", "Flag", + (), r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)", ) gen_constant_class( - drgn_h, output_file, "PrimitiveType", "Enum", r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)" + drgn_h, + output_file, + "PrimitiveType", + "Enum", + (), + r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)", ) gen_constant_class( drgn_h, output_file, "PlatformFlags", "Flag", + (), r"DRGN_PLATFORM_([a-zA-Z0-9_]+)(?type), Program, prog); } PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type); -int qualifiers_converter(PyObject *arg, void *result); DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds); DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds); DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 86997772c..93e03cc7e 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -633,17 +633,15 @@ static PyObject *DrgnType_is_complete(DrgnType *self) return PyBool_FromLong(drgn_type_is_complete(self->type)); } -int qualifiers_converter(PyObject *o, void *p) +static int qualifiers_converter(PyObject *o, void *p) { struct enum_arg arg = { .type = Qualifiers_class, .value = 0, - .allow_none = true, }; - if (!enum_converter(o, &arg)) return 0; - *(unsigned char *)p = arg.value; + *(enum drgn_qualifiers *)p = arg.value; return 1; } @@ -651,7 +649,7 @@ static PyObject *DrgnType_qualified(DrgnType *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "qualifiers", NULL, }; - unsigned char qualifiers; + enum drgn_qualifiers qualifiers; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:qualified", keywords, qualifiers_converter, &qualifiers)) return NULL; @@ -1166,7 +1164,7 @@ PyTypeObject TypeParameter_type = { DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "qualifiers", "language", NULL }; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$O&O&:void_type", keywords, qualifiers_converter, @@ -1189,7 +1187,7 @@ DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds) PyObject *name_obj; struct index_arg size = {}; int is_signed; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&p|$O&O&:int_type", keywords, &PyUnicode_Type, &name_obj, @@ -1238,7 +1236,7 @@ DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds) }; PyObject *name_obj; struct index_arg size = {}; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:bool_type", keywords, &PyUnicode_Type, &name_obj, @@ -1286,7 +1284,7 @@ DrgnType *Program_float_type(Program *self, PyObject *args, PyObject *kwds) }; PyObject *name_obj; struct index_arg size = {}; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:float_type", @@ -1336,7 +1334,7 @@ DrgnType *Program_complex_type(Program *self, PyObject *args, PyObject *kwds) PyObject *name_obj; struct index_arg size = {}; DrgnType *real_type_obj; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&O!|$O&O&:complex_type", keywords, @@ -1506,7 +1504,7 @@ static DrgnType *Program_compound_type(Program *self, PyObject *args, PyObject *tag_obj; struct index_arg size = { .allow_none = true, .is_none = true }; PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_format, keywords, &tag_obj, index_converter, &size, @@ -1682,7 +1680,7 @@ DrgnType *Program_enum_type(Program *self, PyObject *args, PyObject *kwds) PyObject *tag_obj; PyObject *compatible_type_obj = Py_None; PyObject *enumerators_obj = Py_None; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO$O&O&:enum_type", keywords, &tag_obj, @@ -1820,7 +1818,7 @@ DrgnType *Program_typedef_type(Program *self, PyObject *args, PyObject *kwds) }; PyObject *name_obj; DrgnType *aliased_type_obj; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!|$O&O&:typedef_type", @@ -1871,7 +1869,7 @@ DrgnType *Program_pointer_type(Program *self, PyObject *args, PyObject *kwds) }; DrgnType *referenced_type_obj; struct index_arg size = { .allow_none = true, .is_none = true }; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:pointer_type", keywords, &DrgnType_type, @@ -1918,7 +1916,7 @@ DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds) }; DrgnType *element_type_obj; struct index_arg length = { .allow_none = true, .is_none = true }; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:array_type", keywords, &DrgnType_type, @@ -2000,7 +1998,7 @@ DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds) DrgnType *return_type_obj; PyObject *parameters_obj; int is_variadic = 0; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|p$O&O&:function_type", keywords, &DrgnType_type, diff --git a/tests/test_type.py b/tests/test_type.py index cf9b440d2..3510dfd46 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1271,10 +1271,7 @@ def test_qualifiers(self): self.assertEqual(t.qualified(Qualifiers(0)), t.unqualified()) self.assertRaisesRegex( - TypeError, - "expected Qualifiers or None", - self.prog.void_type, - qualifiers=1.5, + TypeError, "expected Qualifiers", self.prog.void_type, qualifiers=1.5 ) def test_language(self): From e96d9fd3fde0804223785339ac854de8d78512bd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Aug 2020 17:32:49 -0700 Subject: [PATCH 29/56] libdrgn/python: don't allow None for Program.object() flags Similar to the previous commit, this was to work around pydoc issues that we don't have anymore. Signed-off-by: Omar Sandoval --- _drgn.pyi | 6 ++---- libdrgn/python/program.c | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 116c0d963..f6f79c558 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -165,16 +165,14 @@ class Program: def object( self, name: str, - flags: Optional[FindObjectFlags] = None, + flags: FindObjectFlags = FindObjectFlags.ANY, filename: Optional[str] = None, ) -> Object: """ Get the object (variable, constant, or function) with the given name. :param name: The object name. - :param flags: Flags indicating what kind of object to look for. If this - is ``None`` or not given, it defaults to - :attr:`FindObjectFlags.ANY`. + :param flags: Flags indicating what kind of object to look for. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no objects with the given name are found in diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index ada225a46..96b04b0b1 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -637,7 +637,6 @@ static DrgnObject *Program_object(Program *self, PyObject *args, struct enum_arg flags = { .type = FindObjectFlags_class, .value = DRGN_FIND_OBJECT_ANY, - .allow_none = true, }; struct path_arg filename = {.allow_none = true}; From 36068a0ea842845e5455b418f40ffd0c91de49d7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Aug 2020 22:15:04 -0700 Subject: [PATCH 30/56] Fix trailing commas for Black v20.8b1 Black was recently changed to treat a trailing comma as an indicator to put each item/argument on its own line. We have a bunch of places where something previously had to be split into multiple lines, then was edited to fit on one line, but Black kept the trailing comma. Now this update wants to unnecessarily split it back up. For now, let's get rid of these commas. Hopefully in the future Black has a way to opt out of this. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/format.py | 8 ++------ drgn/helpers/linux/fs.py | 2 +- setup.py | 2 +- tests/__init__.py | 4 ++-- tests/dwarfwriter.py | 4 ++-- tests/elfwriter.py | 2 +- tests/helpers/linux/test_uts.py | 6 ++---- tests/test_dwarf.py | 22 ++++++++++------------ tests/test_language_c.py | 2 +- tests/test_object.py | 2 +- tests/test_program.py | 16 ++++++---------- tests/test_type.py | 4 ++-- vmtest/manage.py | 2 +- vmtest/resolver.py | 4 ++-- 14 files changed, 34 insertions(+), 46 deletions(-) diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index 489c57021..fec1be9cb 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -190,9 +190,7 @@ def visit_Tuple( for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") - self._visit( - elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None, - ) + self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if len(node.elts) == 1: self._parts.append(",") if parens: @@ -208,9 +206,7 @@ def visit_List( for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") - self._visit( - elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None, - ) + self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if self._rst: self._parts.append("\\") self._parts.append("]") diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 4ca2b2ec9..27a922118 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -126,7 +126,7 @@ def path_lookup( return Object( mnt.prog_, "struct path", - value={"mnt": mnt.mnt.address_of_(), "dentry": dentry,}, + value={"mnt": mnt.mnt.address_of_(), "dentry": dentry}, ) diff --git a/setup.py b/setup.py index f23283cab..1826fa807 100755 --- a/setup.py +++ b/setup.py @@ -304,7 +304,7 @@ def get_version(): "egg_info": egg_info, "test": test, }, - entry_points={"console_scripts": ["drgn=drgn.internal.cli:main"],}, + entry_points={"console_scripts": ["drgn=drgn.internal.cli:main"]}, python_requires=">=3.6", author="Omar Sandoval", author_email="osandov@osandov.com", diff --git a/tests/__init__.py b/tests/__init__.py index 4e95c4123..b9b1aaee1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -226,9 +226,9 @@ def setUp(self): def add_memory_segment(self, buf, virt_addr=None, phys_addr=None): if virt_addr is not None: self.prog.add_memory_segment( - virt_addr, len(buf), functools.partial(mock_memory_read, buf), + virt_addr, len(buf), functools.partial(mock_memory_read, buf) ) if phys_addr is not None: self.prog.add_memory_segment( - phys_addr, len(buf), functools.partial(mock_memory_read, buf), True, + phys_addr, len(buf), functools.partial(mock_memory_read, buf), True ) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 8600f69dc..b9dfcc3e6 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -210,7 +210,7 @@ def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): return create_elf_file( ET.EXEC, [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b"",), + ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), ElfSection( name=".debug_abbrev", sh_type=SHT.PROGBITS, @@ -226,7 +226,7 @@ def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): sh_type=SHT.PROGBITS, data=_compile_debug_line(cu_die, little_endian), ), - ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0",), + ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), ], little_endian=little_endian, bits=bits, diff --git a/tests/elfwriter.py b/tests/elfwriter.py index b96246eb7..52a0caaa9 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -52,7 +52,7 @@ def create_elf_file( phdr_struct = struct.Struct(endian + "8I") e_machine = 3 if little_endian else 8 # EM_386 or EM_MIPS - shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1),) + shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1)) tmp = [shstrtab] tmp.extend(sections) sections = tmp diff --git a/tests/helpers/linux/test_uts.py b/tests/helpers/linux/test_uts.py index 6755b686b..0f4db8f96 100644 --- a/tests/helpers/linux/test_uts.py +++ b/tests/helpers/linux/test_uts.py @@ -10,12 +10,10 @@ class TestUts(LinuxHelperTestCase): def test_uts_release(self): self.assertEqual( - self.prog["UTS_RELEASE"].string_().decode(), os.uname().release, + self.prog["UTS_RELEASE"].string_().decode(), os.uname().release ) def test_uts_release_no_debug_info(self): prog = drgn.Program() prog.set_kernel() - self.assertEqual( - prog["UTS_RELEASE"].string_().decode(), os.uname().release, - ) + self.assertEqual(prog["UTS_RELEASE"].string_().decode(), os.uname().release) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index e355e453a..1771ff703 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1958,7 +1958,7 @@ def test_typedef_missing_name(self): test_type_dies( ( DwarfDie( - DW_TAG.typedef, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + DW_TAG.typedef, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) ), int_die, ) @@ -1975,7 +1975,7 @@ def test_typedef_void(self): prog = dwarf_program( test_type_dies( DwarfDie( - DW_TAG.typedef, (DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),), + DW_TAG.typedef, (DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),) ) ) ) @@ -2196,7 +2196,7 @@ def test_incomplete_array_no_subrange(self): test_type_dies( ( DwarfDie( - DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) ), int_die, ) @@ -2377,7 +2377,7 @@ def test_array_of_zero_length_array_typedef_old_gcc(self): DwarfDie( DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), - (DwarfDie(DW_TAG.subrange_type, (),),), + (DwarfDie(DW_TAG.subrange_type, ()),), ), int_die, ) @@ -2574,7 +2574,7 @@ def test_zero_length_array_only_member_old_gcc(self): DwarfDie( DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), - (DwarfDie(DW_TAG.subrange_type, (),),), + (DwarfDie(DW_TAG.subrange_type, ()),), ), int_die, ) @@ -2965,7 +2965,7 @@ def test_function_void_return(self): # void foo(void) prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.subroutine_type, ()))) self.assertEqual( - prog.type("TEST").type, prog.function_type(prog.void_type(), (), False), + prog.type("TEST").type, prog.function_type(prog.void_type(), (), False) ) def test_function_unnamed_parameter(self): @@ -3369,15 +3369,13 @@ def test_variable_const_signed(self): ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1,), + DwarfAttrib(DW_AT.const_value, form, 1), ), ), ) ) ) - self.assertEqual( - prog["x"], Object(prog, prog.int_type("int", 4, True), 1), - ) + self.assertEqual(prog["x"], Object(prog, prog.int_type("int", 4, True), 1)) def test_variable_const_unsigned(self): for form in ( @@ -3403,7 +3401,7 @@ def test_variable_const_unsigned(self): ) ) self.assertEqual( - prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1), + prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1) ) def test_variable_const_block(self): @@ -3438,7 +3436,7 @@ def test_variable_const_block(self): ) self.assertEqual( prog["p"], - Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2],), + Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2]), ) def test_variable_const_block_too_small(self): diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 0606fbb12..fe4090887 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -206,7 +206,7 @@ def test_pointer_to_function(self): i, ( TypeParameter(i), - TypeParameter(self.prog.float_type("float", 4),), + TypeParameter(self.prog.float_type("float", 4)), ), False, ), diff --git a/tests/test_object.py b/tests/test_object.py index c3fb7550c..1180b81c8 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -2251,7 +2251,7 @@ class TestGenericOperators(MockProgramTestCase): def setUp(self): super().setUp() self.add_memory_segment( - b"".join(i.to_bytes(4, "little") for i in range(4)), virt_addr=0xFFFF0000, + b"".join(i.to_bytes(4, "little") for i in range(4)), virt_addr=0xFFFF0000 ) def test_len(self): diff --git a/tests/test_program.py b/tests/test_program.py index 6ad9d81e7..b97c86a33 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -81,7 +81,7 @@ def test_lookup_error(self): ) self.assertRaisesRegex(LookupError, "^could not find 'foo'$", prog.type, "foo") self.assertRaisesRegex( - LookupError, "^could not find 'foo' in 'foo.c'$", prog.type, "foo", "foo.c", + LookupError, "^could not find 'foo' in 'foo.c'$", prog.type, "foo", "foo.c" ) self.assertRaisesRegex( LookupError, "^could not find variable 'foo'$", prog.variable, "foo" @@ -138,7 +138,7 @@ def test_read_unsigned(self): self.assertEqual(prog.read_word(0xA0, True), value) prog = mock_program( - MOCK_32BIT_PLATFORM, segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)], + MOCK_32BIT_PLATFORM, segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)] ) def test_bad_address(self): @@ -521,9 +521,7 @@ def test_pointer_to_pointer(self): self.prog.pointer_type(self.prog.int_type("int", 4, True)) ), ) - self.assertEqual( - self.prog.type("int *((*))"), self.prog.type("int **"), - ) + self.assertEqual(self.prog.type("int *((*))"), self.prog.type("int **")) def test_pointer_to_const_pointer(self): self.assertEqual( @@ -626,9 +624,7 @@ def test_pointer_to_array_of_pointers(self): ) ), ) - self.assertEqual( - self.prog.type("int *((*)[2])"), self.prog.type("int *(*)[2]"), - ) + self.assertEqual(self.prog.type("int *((*)[2])"), self.prog.type("int *(*)[2]")) def test_array_of_pointers_to_array(self): self.assertEqual( @@ -738,7 +734,7 @@ def test_simple(self): with tempfile.NamedTemporaryFile() as f: f.write( create_elf_file( - ET.CORE, [ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=data,),] + ET.CORE, [ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=data)] ) ) f.flush() @@ -755,7 +751,7 @@ def test_physical(self): ET.CORE, [ ElfSection( - p_type=PT.LOAD, vaddr=0xFFFF0000, paddr=0xA0, data=data, + p_type=PT.LOAD, vaddr=0xFFFF0000, paddr=0xA0, data=data ), ], ) diff --git a/tests/test_type.py b/tests/test_type.py index 3510dfd46..735a395b7 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -455,7 +455,7 @@ def test_union(self): 4, ( TypeMember(self.prog.int_type("int", 4, True), "x"), - TypeMember(self.prog.int_type("unsigned int", 4, False),), + TypeMember(self.prog.int_type("unsigned int", 4, False)), ), ), ) @@ -1130,7 +1130,7 @@ def test_function(self): t, self.prog.function_type( self.prog.void_type(), - (TypeParameter(self.prog.int_type("int", 4, True),),), + (TypeParameter(self.prog.int_type("int", 4, True)),), ), ) # Different number of parameters. diff --git a/vmtest/manage.py b/vmtest/manage.py index 0426279ce..3b069af23 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -296,7 +296,7 @@ async def _upload_file_obj(self, file: BinaryIO, commit: Dict[str, Any]) -> None params = {} else: params = { - "cursor": {"offset": offset, "session_id": session_id,}, + "cursor": {"offset": offset, "session_id": session_id}, } if last: endpoint = "upload_session/finish" diff --git a/vmtest/resolver.py b/vmtest/resolver.py index 2bbbaefe0..23965d9af 100644 --- a/vmtest/resolver.py +++ b/vmtest/resolver.py @@ -53,10 +53,10 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: def _resolve_build(self, path: str) -> ResolvedKernel: release = subprocess.check_output( - ["make", "-s", "kernelrelease"], universal_newlines=True, cwd=path, + ["make", "-s", "kernelrelease"], universal_newlines=True, cwd=path ).strip() vmlinuz = subprocess.check_output( - ["make", "-s", "image_name"], universal_newlines=True, cwd=path, + ["make", "-s", "image_name"], universal_newlines=True, cwd=path ).strip() return ResolvedKernel( release=release, From ff96c75da05c4c760b4ad11dff120a6a0cd8d346 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Aug 2020 13:54:39 -0700 Subject: [PATCH 31/56] helpers: translate task_state_to_char() to Python Commit 326107f05431 ("libdrgn: add task_state_to_char() helper") implemented task_state_to_char() in libdrgn so that it could be used in commit 4780c7a26621 ("libdrgn: stack_trace: prohibit unwinding stack of running tasks"). As of commit eea542254600 ("libdrgn: make Linux kernel stack unwinding more robust"), it is no longer used in libdrgn, so we can translate it to Python. This removes a bunch of code and is more useful as an example. Signed-off-by: Omar Sandoval --- _drgn.pyi | 11 --- drgn/helpers/linux/sched.py | 46 ++++++++++++- libdrgn/helpers.h | 3 - libdrgn/linux_kernel_helpers.c | 119 --------------------------------- libdrgn/program.c | 1 - libdrgn/program.h | 3 - libdrgn/python/helpers.c | 18 ----- libdrgn/python/module.c | 3 - 8 files changed, 45 insertions(+), 159 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index f6f79c558..467f67ca4 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1823,17 +1823,6 @@ def _linux_helper_find_task( """ ... -def _linux_helper_task_state_to_char(task: Object) -> str: - """ - Get the state of the task as a character (e.g., ``'R'`` for running). See - `ps(1) - `_ for - a description of the process state codes. - - :param task: ``struct task_struct *`` - """ - ... - def _linux_helper_kaslr_offset(prog: Program) -> int: """ Get the kernel address space layout randomization offset (zero if it is diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index a48063991..3d5276867 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -9,6 +9,50 @@ Linux CPU scheduler. """ -from _drgn import _linux_helper_task_state_to_char as task_state_to_char +from drgn import Object __all__ = ("task_state_to_char",) + +_TASK_NOLOAD = 0x400 + + +def task_state_to_char(task: Object) -> str: + """ + Get the state of the task as a character (e.g., ``'R'`` for running). See + `ps(1) + `_ for + a description of the process state codes. + + :param task: ``struct task_struct *`` + """ + prog = task.prog_ + task_state_chars: str + TASK_REPORT: int + try: + task_state_chars, TASK_REPORT = prog.cache["task_state_to_char"] + except KeyError: + task_state_array = prog["task_state_array"] + # Walk through task_state_array backwards looking for the largest state + # that we know is in TASK_REPORT, then populate the task state mapping. + chars = None + for i in range(len(task_state_array) - 1, -1, -1): + c: int = task_state_array[i][0].value_() + if chars is None and c in b"RSDTtXZP": + chars = bytearray(i + 1) + TASK_REPORT = (1 << i) - 1 + if chars is not None: + chars[i] = c + if chars is None: + raise Exception("could not parse task_state_array") + task_state_chars = chars.decode("ascii") + prog.cache["task_state_to_char"] = task_state_chars, TASK_REPORT + task_state = task.state.value_() + exit_state = task.exit_state.value_() + state = (task_state | exit_state) & TASK_REPORT + char = task_state_chars[state.bit_length()] + # States beyond TASK_REPORT are special. As of Linux v5.8, TASK_IDLE is the + # only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. + if char == "D" and (task_state & ~state) == _TASK_NOLOAD: + return "I" + else: + return char diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index e287d19e4..0f844a74f 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -37,7 +37,4 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, const struct drgn_object *ns, uint64_t pid); -struct drgn_error * -linux_helper_task_state_to_char(const struct drgn_object *task, char *ret); - #endif /* DRGN_HELPERS_H */ diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 83c6a0bb0..a04454aa6 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -507,122 +507,3 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, drgn_object_deinit(&pid_obj); return err; } - -static struct drgn_error *cache_task_state_chars(struct drgn_object *tmp) -{ - struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(tmp); - struct drgn_object task_state_array; - uint64_t length; - size_t i; - char *task_state_chars = NULL; - int64_t task_report = 0; - - drgn_object_init(&task_state_array, prog); - - err = drgn_program_find_object(prog, "task_state_array", NULL, - DRGN_FIND_OBJECT_ANY, &task_state_array); - if (err) - goto out; - - if (drgn_type_kind(task_state_array.type) != DRGN_TYPE_ARRAY) { - err = drgn_error_create(DRGN_ERROR_TYPE, - "task_state_array is not an array"); - goto out; - } - length = drgn_type_length(task_state_array.type); - if (length == 0 || length >= 64) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "task_state_array length is invalid"); - goto out; - } - - /* - * Walk through task_state_array backwards looking for the largest state - * that we know is in TASK_REPORT. - */ - for (i = length; i--; ) { - union drgn_value value; - char c; - - err = drgn_object_subscript(tmp, &task_state_array, i); - if (err) - goto out; - err = drgn_object_dereference(tmp, tmp); - if (err) - goto out; - err = drgn_object_read_integer(tmp, &value); - if (err) - goto out; - c = value.uvalue; - if (!task_state_chars && strchr("RSDTtXZP", c)) { - task_state_chars = malloc(i + 1); - if (!task_state_chars) { - err = &drgn_enomem; - goto out; - } - task_report = (UINT64_C(1) << i) - 1; - } - if (task_state_chars) - task_state_chars[i] = c; - } - if (!task_state_chars) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "could not parse task_state_array"); - goto out; - } - - prog->task_state_chars = task_state_chars; - prog->task_report = task_report; - task_state_chars = NULL; - err = NULL; -out: - free(task_state_chars); - drgn_object_deinit(&task_state_array); - return err; -} - -struct drgn_error * -linux_helper_task_state_to_char(const struct drgn_object *task, char *ret) -{ - static const uint64_t TASK_NOLOAD = 0x400; - struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(task); - struct drgn_object tmp; - union drgn_value task_state, exit_state; - uint64_t state; - - drgn_object_init(&tmp, prog); - - if (!prog->task_state_chars) { - err = cache_task_state_chars(&tmp); - if (err) - goto out; - } - - err = drgn_object_member_dereference(&tmp, task, "state"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &task_state); - if (err) - goto out; - err = drgn_object_member_dereference(&tmp, task, "exit_state"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &exit_state); - if (err) - goto out; - - state = (task_state.uvalue | exit_state.uvalue) & prog->task_report; - *ret = prog->task_state_chars[fls(state)]; - /* - * States beyond TASK_REPORT are special. As of Linux v5.3, TASK_IDLE is - * the only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. - */ - if (*ret == 'D' && (task_state.uvalue & ~state) == TASK_NOLOAD) - *ret = 'I'; - err = NULL; -out: - drgn_object_deinit(&tmp); - return err; -} diff --git a/libdrgn/program.c b/libdrgn/program.c index 6f858ada8..776b2aca9 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -79,7 +79,6 @@ void drgn_program_init(struct drgn_program *prog, void drgn_program_deinit(struct drgn_program *prog) { - free(prog->task_state_chars); if (prog->prstatus_cached) { if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) drgn_prstatus_vector_deinit(&prog->prstatus_vector); diff --git a/libdrgn/program.h b/libdrgn/program.h index bb01bd89f..01d6c1eb7 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -160,9 +160,6 @@ struct drgn_program { uint64_t vmemmap; /* Cached THREAD_SIZE. */ uint64_t thread_size; - /* Cache for @ref linux_helper_task_state_to_char(). */ - char *task_state_chars; - uint64_t task_report; /* Page table iterator for linux_helper_read_vm(). */ struct pgtable_iterator *pgtable_it; /* diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index ed244e9e3..8bfd69bfa 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -219,24 +219,6 @@ DrgnObject *drgnpy_linux_helper_find_task(PyObject *self, PyObject *args, return res; } -PyObject *drgnpy_linux_helper_task_state_to_char(PyObject *self, PyObject *args, - PyObject *kwds) -{ - static char *keywords[] = {"task", NULL}; - struct drgn_error *err; - DrgnObject *task; - char c; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:task_state_to_char", - keywords, &DrgnObject_type, &task)) - return NULL; - - err = linux_helper_task_state_to_char(&task->obj, &c); - if (err) - return set_drgn_error(err); - return PyUnicode_FromStringAndSize(&c, 1); -} - PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *args, PyObject *kwds) diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 646793d62..48b741e63 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -101,9 +101,6 @@ static PyMethodDef drgn_methods[] = { METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_find_task", (PyCFunction)drgnpy_linux_helper_find_task, METH_VARARGS | METH_KEYWORDS}, - {"_linux_helper_task_state_to_char", - (PyCFunction)drgnpy_linux_helper_task_state_to_char, - METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_kaslr_offset", (PyCFunction)drgnpy_linux_helper_kaslr_offset, METH_VARARGS | METH_KEYWORDS}, From 5ce80016c5cc89acca4fdcdcf7b0c297cd232595 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Aug 2020 14:16:19 -0700 Subject: [PATCH 32/56] docs: use a different example for execscript() We have a real task_state_to_char() helper now, so we shouldn't use a half-baked implementation of it as an example. Signed-off-by: Omar Sandoval --- drgn/__init__.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drgn/__init__.py b/drgn/__init__.py index 0c9441560..2898a5075 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -154,40 +154,35 @@ def execscript(path: str, *args: str) -> None: added back to the calling context. This is most useful for executing scripts from interactive mode. For - example, you could have a script named ``tasks.py``: + example, you could have a script named ``exe.py``: .. code-block:: python3 + \"\"\"Get all tasks executing a given file.\"\"\" + import sys - \"\"\" - Get all tasks in a given state. - \"\"\" + from drgn.helpers.linux.fs import d_path + from drgn.helpers.linux.pid import find_task - # From include/linux/sched.h. - def task_state_index(task): - task_state = task.state.value_() - if task_state == 0x402: # TASK_IDLE - return 8 + def task_exe_path(task): + if task.mm: + return d_path(task.mm.exe_file.f_path).decode() else: - state = (task_state | task.exit_state.value_()) & 0x7f - return state.bit_length() - - def task_state_to_char(task): - return 'RSDTtXZPI'[task_state_index(task)] + return None tasks = [ task for task in for_each_task(prog) - if task_state_to_char(task) == sys.argv[1] + if task_exe_path(task) == sys.argv[1] ] Then, you could execute it and use the defined variables and functions: - >>> execscript('tasks.py', 'R') - >>> tasks[0].comm - (char [16])"python3" - >>> task_state_to_char(find_task(prog, 1)) - 'S' + >>> execscript('exe.py', '/usr/bin/bash') + >>> tasks[0].pid + (pid_t)358442 + >>> task_exe_path(find_task(prog, 357954)) + '/usr/bin/vim' :param path: File path of the script. :param args: Zero or more additional arguments to pass to the script. This From d4b171112845ca28a3637f8a843cb5a6b81f21aa Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Aug 2020 16:29:03 -0700 Subject: [PATCH 33/56] setup.py: add 5.9 to vmtest kernels Signed-off-by: Omar Sandoval --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1826fa807..c7db3efc9 100755 --- a/setup.py +++ b/setup.py @@ -128,7 +128,7 @@ def run(self): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.8", "5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = ["5.9", "5.8", "5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( From 85c4b368204637d0710c5311d5105bacd1573444 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 25 Jun 2020 13:23:28 -0700 Subject: [PATCH 34/56] libdrgn: dwarf_index: fix leak when parsing bad line number program header If we fail to read an include directory in read_file_name_table(), we need to free the directory hashes. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index b78d84e3b..0e8ae28cf 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -1726,8 +1726,10 @@ read_file_name_table(struct drgn_dwarf_index *dindex, for (;;) { const char *path; size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) - return drgn_eof(); + if (!read_string(&ptr, end, &path, &path_len)) { + err = drgn_eof(); + goto out; + } if (!path_len) break; From 2252bef1a7694b2a034d6353360ee04188112167 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 24 Jun 2020 17:19:45 -0700 Subject: [PATCH 35/56] libdrgn: dwarf_index: rename TAG_FLAG_* and TAG_MASK to DIE_FLAG_* This is more clear: although these flags happen to be encoded with the DWARF tag, they are flags regarding the DIE. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 0e8ae28cf..70a5232da 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -180,9 +180,9 @@ static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = { * over. The next few instructions mean that the corresponding attribute can be * skipped over. The remaining instructions indicate that the corresponding * attribute should be parsed. Finally, every sequence of instructions - * corresponding to a DIE is terminated by a zero byte followed by a bitmask of - * TAG_FLAG_* bits combined with the DWARF tag (which may be set to zero if the - * tag is not of interest). + * corresponding to a DIE is terminated by a zero byte followed by the DIE + * flags, which are a bitmask of flags combined with the DWARF tag (which may be + * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { INSN_MAX_SKIP = 229, @@ -216,12 +216,11 @@ enum { }; enum { - /* Maximum number of bits used by the tags we care about. */ - TAG_BITS = 6, - TAG_MASK = (1 << TAG_BITS) - 1, + /* Mask of tags that we care about. */ + DIE_FLAG_TAG_MASK = 0x3f, /* The remaining bits can be used for other purposes. */ - TAG_FLAG_DECLARATION = 0x40, - TAG_FLAG_CHILDREN = 0x80, + DIE_FLAG_DECLARATION = 0x40, + DIE_FLAG_CHILDREN = 0x80, }; DEFINE_VECTOR(uint8_vector, uint8_t) @@ -1397,6 +1396,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, case DW_TAG_enumerator: /* Functions. */ case DW_TAG_subprogram: + /* If adding anything here, make sure it fits in DIE_FLAG_TAG_MASK. */ should_index = true; break; default: @@ -1413,7 +1413,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, if (!read_u8(ptr, end, &children)) return drgn_eof(); if (children) - die_flags |= TAG_FLAG_CHILDREN; + die_flags |= DIE_FLAG_CHILDREN; for (;;) { uint64_t name, form; @@ -1516,7 +1516,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, * zero, but in practice, GCC always uses * DW_FORM_flag_present. */ - die_flags |= TAG_FLAG_DECLARATION; + die_flags |= DIE_FLAG_DECLARATION; } else if (name == DW_AT_specification && should_index) { switch (form) { case DW_FORM_ref1: @@ -2089,14 +2089,14 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, goto out; } - tag = die.flags & TAG_MASK; + tag = die.flags & DIE_FLAG_TAG_MASK; if (tag == DW_TAG_compile_unit || tag == DW_TAG_partial_unit) { if (depth == 0 && die.stmt_list != SIZE_MAX && (err = read_file_name_table(dindex, cu, die.stmt_list, &file_name_table))) goto out; - } else if (tag && !(die.flags & TAG_FLAG_DECLARATION)) { + } else if (tag && !(die.flags & DIE_FLAG_DECLARATION)) { uint64_t file_name_hash; /* @@ -2144,7 +2144,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, } next: - if (die.flags & TAG_FLAG_CHILDREN) { + if (die.flags & DIE_FLAG_CHILDREN) { if (die.sibling) ptr = die.sibling; else From c8f84c57fbfd86ac3292ad55ad92764daf425a38 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 25 Jun 2020 11:02:43 -0700 Subject: [PATCH 36/56] libdrgn: dwarf_index: use size_t instead of uint64_t where appropriate The CU unit length and DIE offset are both limited by the size of the mapped debugging information, i.e., size_t. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 70a5232da..2b0f677a1 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -253,7 +253,7 @@ struct compilation_unit { Dwfl_Module *module; Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS]; const char *ptr; - uint64_t unit_length; + size_t unit_length; uint64_t debug_abbrev_offset; uint8_t address_size; bool is_64_bit; @@ -287,7 +287,7 @@ struct drgn_dwarf_index_die { */ size_t next; Dwfl_Module *module; - uint64_t offset; + size_t offset; }; /* @@ -1169,7 +1169,8 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, return drgn_eof(); cu->is_64_bit = tmp == UINT32_C(0xffffffff); if (cu->is_64_bit) { - if (!read_u64(&ptr, end, cu->bswap, &cu->unit_length)) + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &cu->unit_length)) return drgn_eof(); } else { cu->unit_length = tmp; @@ -1791,7 +1792,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint64_t tag, uint64_t file_name_hash, Dwfl_Module *module, - uint64_t offset) + size_t offset) { struct drgn_dwarf_index_die *die; @@ -1809,7 +1810,7 @@ static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint64_t tag, static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, const char *name, uint64_t tag, uint64_t file_name_hash, - Dwfl_Module *module, uint64_t offset) + Dwfl_Module *module, size_t offset) { struct drgn_error *err; struct drgn_dwarf_index_die_map_entry entry = { @@ -2062,7 +2063,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, const char *debug_str_buffer = section_ptr(debug_str, 0); const char *debug_str_end = section_end(debug_str); unsigned int depth = 0; - uint64_t enum_die_offset = 0; + size_t enum_die_offset = 0; if ((err = read_abbrev_table(section_ptr(debug_abbrev, cu->debug_abbrev_offset), @@ -2073,7 +2074,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, struct die die = { .stmt_list = SIZE_MAX, }; - uint64_t die_offset = ptr - debug_info_buffer; + size_t die_offset = ptr - debug_info_buffer; uint64_t tag; err = read_die(cu, &abbrev, &ptr, end, debug_str_buffer, From ea9f3f31145782cc21c46712c097aeb7bf544ca2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 24 Jun 2020 17:51:00 -0700 Subject: [PATCH 37/56] libdrgn: dwarf_index: don't worry about tag of CU DIE As a small simplification, we can take commit 9bb2ccecb735 ("Enable DWARF indexing to work with partial units") further and not look at the tag of the top-level DIE at all. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 2b0f677a1..6356bf2d7 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -1404,12 +1404,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, should_index = false; break; } - - if (should_index || tag == DW_TAG_compile_unit || - tag == DW_TAG_partial_unit) - die_flags = tag; - else - die_flags = 0; + die_flags = should_index ? tag : 0; if (!read_u8(ptr, end, &children)) return drgn_eof(); @@ -1466,8 +1461,6 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, break; } } else if (name == DW_AT_stmt_list && - (tag == DW_TAG_compile_unit || - tag == DW_TAG_partial_unit) && cu->sections[SECTION_DEBUG_LINE]) { switch (form) { case DW_FORM_data4: @@ -2090,14 +2083,14 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, goto out; } - tag = die.flags & DIE_FLAG_TAG_MASK; - if (tag == DW_TAG_compile_unit || tag == DW_TAG_partial_unit) { - if (depth == 0 && die.stmt_list != SIZE_MAX && + if (depth == 0) { + if (die.stmt_list != SIZE_MAX && (err = read_file_name_table(dindex, cu, die.stmt_list, &file_name_table))) goto out; - } else if (tag && !(die.flags & DIE_FLAG_DECLARATION)) { + } else if ((tag = die.flags & DIE_FLAG_TAG_MASK) && + !(die.flags & DIE_FLAG_DECLARATION)) { uint64_t file_name_hash; /* From 53ba7262cdcab73af9ae93e5ca577e7987ee80ea Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 24 Jun 2020 17:26:57 -0700 Subject: [PATCH 38/56] libdrgn: dwarf_index: handle DW_AT_declaration with DW_FORM_flag We currently assume that if DW_AT_declaration is present, it is true. This seems to be true in practice, and I see no reason to ever use DW_FORM_flag with a value of zero. There's no performance hit to handle it, though, so we might as well. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 6356bf2d7..d31e49e49 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -185,7 +185,7 @@ static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = { * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 229, + INSN_MAX_SKIP = 228, ATTRIB_BLOCK1, ATTRIB_BLOCK2, ATTRIB_BLOCK4, @@ -207,6 +207,7 @@ enum { ATTRIB_DECL_FILE_DATA4, ATTRIB_DECL_FILE_DATA8, ATTRIB_DECL_FILE_UDATA, + ATTRIB_DECLARATION_FLAG, ATTRIB_SPECIFICATION_REF1, ATTRIB_SPECIFICATION_REF2, ATTRIB_SPECIFICATION_REF4, @@ -1504,13 +1505,24 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_declaration) { - /* - * In theory, this could be DW_FORM_flag with a value of - * zero, but in practice, GCC always uses - * DW_FORM_flag_present. - */ - die_flags |= DIE_FLAG_DECLARATION; + } else if (name == DW_AT_declaration && should_index) { + switch (form) { + case DW_FORM_flag: + insn = ATTRIB_DECLARATION_FLAG; + goto append_insn; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as + * we have a free DIE flag bit, we might as well + * use it. + */ + die_flags |= DIE_FLAG_DECLARATION; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown attribute form %" PRIu64 " for DW_AT_declaration", + form); + } } else if (name == DW_AT_specification && should_index) { switch (form) { case DW_FORM_ref1: @@ -1870,6 +1882,7 @@ struct die { size_t stmt_list; size_t decl_file; const char *specification; + bool declaration; uint8_t flags; }; @@ -2000,6 +2013,14 @@ static struct drgn_error *read_die(struct compilation_unit *cu, &die->decl_file))) return err; break; + case ATTRIB_DECLARATION_FLAG: { + uint8_t flag; + if (!read_u8(ptr, end, &flag)) + return drgn_eof(); + if (flag) + die->declaration = true; + break; + } case ATTRIB_SPECIFICATION_REF1: if (!read_u8_into_size_t(ptr, end, &tmp)) return drgn_eof(); @@ -2036,6 +2057,8 @@ static struct drgn_error *read_die(struct compilation_unit *cu, } die->flags = *insnp; + if (die->flags & DIE_FLAG_DECLARATION) + die->declaration = true; return NULL; } @@ -2090,7 +2113,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, &file_name_table))) goto out; } else if ((tag = die.flags & DIE_FLAG_TAG_MASK) && - !(die.flags & DIE_FLAG_DECLARATION)) { + !die.declaration) { uint64_t file_name_hash; /* From 94e7b1f92c21ba4f879277a088ba6f2363db2f6f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 26 May 2020 15:48:54 -0700 Subject: [PATCH 39/56] libdrgn: dwarf_index: avoid copying CUs for one thread In read_cus(), the master thread can use the final CUs vector directly and the rest of the threads can merge their private vectors in. This consistently shaves a few milliseconds off of startup. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 6 +++++- libdrgn/dwarf_index.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index d31e49e49..a428164b1 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -1307,6 +1307,7 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, #pragma omp parallel { + int thread_num = omp_get_thread_num(); struct compilation_unit_vector cus = VECTOR_INIT; #pragma omp for schedule(dynamic) @@ -1317,7 +1318,10 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, if (err) continue; - module_err = read_module_cus(unindexed[i], &cus, &name); + module_err = read_module_cus(unindexed[i], + thread_num == 0 ? + all_cus : &cus, + &name); if (module_err) { #pragma omp critical(drgn_read_cus) if (err) { diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index da7a4f9f2..7ab0364b1 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -20,6 +20,10 @@ #ifdef _OPENMP #include #else +static inline int omp_get_thread_num(void) +{ + return 0; +} typedef struct {} omp_lock_t; #define omp_init_lock(lock) do {} while (0) #define omp_destroy_lock(lock) do {} while (0) From 9ce9094ee0f3ece79b231d41e420d41802d2c16f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 30 Jun 2020 12:22:00 -0700 Subject: [PATCH 40/56] libdrgn: dwarf_index: don't copy sections into each CU I originally copied the sections into each compilation unit to avoid a pointer indirection, but performance-wise it's a wash, so we might as well save the memory. This will be more important when we keep the CUs after indexing. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 182 +++++++++++++++++++----------------------- libdrgn/dwarf_index.h | 5 ++ 2 files changed, 87 insertions(+), 100 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index a428164b1..5e637efeb 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -159,21 +159,6 @@ const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { .section_address = drgn_dwfl_section_address, }; -enum { - SECTION_DEBUG_INFO, - SECTION_DEBUG_ABBREV, - SECTION_DEBUG_STR, - SECTION_DEBUG_LINE, - DRGN_DWARF_INDEX_NUM_SECTIONS, -}; - -static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = { - [SECTION_DEBUG_INFO] = ".debug_info", - [SECTION_DEBUG_ABBREV] = ".debug_abbrev", - [SECTION_DEBUG_STR] = ".debug_str", - [SECTION_DEBUG_LINE] = ".debug_line", -}; - /* * The DWARF abbreviation table gets translated into a series of instructions. * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped @@ -251,8 +236,7 @@ static void abbrev_table_deinit(struct abbrev_table *abbrev) } struct compilation_unit { - Dwfl_Module *module; - Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS]; + struct drgn_dwfl_module_userdata *userdata; const char *ptr; size_t unit_length; uint64_t debug_abbrev_offset; @@ -754,6 +738,7 @@ struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex, userdata->fd = fd; userdata->elf = elf; userdata->state = DRGN_DWARF_MODULE_NEW; + userdata->module = dwfl_module; *userdatap = userdata; if (new_ret) *new_ret = true; @@ -823,6 +808,7 @@ static int drgn_dwarf_index_report_dwfl_module(Dwfl_Module *dwfl_module, userdata->path = NULL; userdata->fd = -1; userdata->elf = NULL; + userdata->module = dwfl_module; if (module->state == DRGN_DWARF_MODULE_INDEXED) { /* * We've already indexed this module. Don't index it again, but @@ -1103,59 +1089,89 @@ static struct drgn_error *apply_elf_relocations(Elf *elf) return NULL; } -static struct drgn_error *get_debug_sections(Elf *elf, Elf_Data **sections) +static struct drgn_error * +get_debug_sections(struct drgn_dwfl_module_userdata *userdata, + bool *bswap_ret) { struct drgn_error *err; - size_t shstrndx; - Elf_Scn *scn = NULL; - size_t i; - Elf_Data *debug_str; + if (userdata->elf) { + err = apply_elf_relocations(userdata->elf); + if (err) + return err; + } + + /* + * Note: not dwfl_module_getelf(), because then libdwfl applies + * ELF relocations to all sections, not just debug sections. + */ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(userdata->module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + + Elf *elf = dwarf_getelf(dwarf); + if (!elf) + return drgn_error_libdw(); + + size_t shstrndx; if (elf_getshdrstrndx(elf, &shstrndx)) return drgn_error_libelf(); + userdata->debug_info = NULL; + userdata->debug_abbrev = NULL; + userdata->debug_str = NULL; + userdata->debug_line = NULL; + Elf_Scn *scn = NULL; while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); if (!shdr) return drgn_error_libelf(); if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP)) continue; - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); if (!scnname) continue; - for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) { - if (sections[i]) - continue; - - if (strcmp(scnname, section_name[i]) != 0) - continue; - - err = read_elf_section(scn, §ions[i]); - if (err) - return err; - } + Elf_Data **sectionp; + if (!userdata->debug_info && strcmp(scnname, ".debug_info") == 0) + sectionp = &userdata->debug_info; + else if (!userdata->debug_abbrev && strcmp(scnname, ".debug_abbrev") == 0) + sectionp = &userdata->debug_abbrev; + else if (!userdata->debug_str && strcmp(scnname, ".debug_str") == 0) + sectionp = &userdata->debug_str; + else if (!userdata->debug_line && strcmp(scnname, ".debug_line") == 0) + sectionp = &userdata->debug_line; + else + continue; + err = read_elf_section(scn, sectionp); + if (err) + return err; } - for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) { - if (i != SECTION_DEBUG_LINE && !sections[i]) { - return drgn_error_format(DRGN_ERROR_OTHER, - "no %s section", - section_name[i]); - } + if (!userdata->debug_info) { + return drgn_error_create(DRGN_ERROR_OTHER, + "no .debug_info section"); + } else if (!userdata->debug_abbrev) { + return drgn_error_create(DRGN_ERROR_OTHER, + "no .debug_abbrev section"); + } else if (!userdata->debug_str) { + return drgn_error_create(DRGN_ERROR_OTHER, + "no .debug_str section"); } - debug_str = sections[SECTION_DEBUG_STR]; - if (debug_str->d_size == 0 || - ((char *)debug_str->d_buf)[debug_str->d_size - 1] != '\0') { + if (userdata->debug_str->d_size == 0 || + ((char *)userdata->debug_str->d_buf)[userdata->debug_str->d_size - 1]) { return drgn_error_create(DRGN_ERROR_OTHER, ".debug_str is not null terminated"); } + + *bswap_ret = (elf_getident(elf, NULL)[EI_DATA] != + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + ELFDATA2LSB : ELFDATA2MSB)); return NULL; } @@ -1203,54 +1219,25 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, DEFINE_VECTOR(compilation_unit_vector, struct compilation_unit) static struct drgn_error * -read_dwfl_module_cus(Dwfl_Module *dwfl_module, - struct drgn_dwfl_module_userdata *userdata, +read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, struct compilation_unit_vector *cus) { struct drgn_error *err; - Dwarf *dwarf; - Dwarf_Addr bias; - Elf *elf; - Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS] = {}; - bool bswap; - const char *ptr, *end; - - if (userdata->elf) { - err = apply_elf_relocations(userdata->elf); - if (err) - return err; - } - - /* - * Note: not dwfl_module_getelf(), because then libdwfl applies - * ELF relocations to all sections, not just debug sections. - */ - dwarf = dwfl_module_getdwarf(dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - elf = dwarf_getelf(dwarf); - if (!elf) - return drgn_error_libdw(); - - err = get_debug_sections(elf, sections); + bool bswap; + err = get_debug_sections(userdata, &bswap); if (err) return err; - bswap = (elf_getident(elf, NULL)[EI_DATA] != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? - ELFDATA2LSB : ELFDATA2MSB)); - - ptr = section_ptr(sections[SECTION_DEBUG_INFO], 0); - end = section_end(sections[SECTION_DEBUG_INFO]); + const char *ptr = section_ptr(userdata->debug_info, 0); + const char *end = section_end(userdata->debug_info); while (ptr < end) { struct compilation_unit *cu; cu = compilation_unit_vector_append_entry(cus); if (!cu) return &drgn_enomem; - cu->module = dwfl_module; - memcpy(cu->sections, sections, sizeof(cu->sections)); + cu->userdata = userdata; cu->ptr = ptr; cu->bswap = bswap; err = read_compilation_unit_header(ptr, end, cu); @@ -1266,20 +1253,14 @@ static struct drgn_error *read_module_cus(struct drgn_dwarf_module *module, struct compilation_unit_vector *cus, const char **name_ret) { - struct drgn_error *err; const size_t orig_cus_size = cus->size; - size_t i; - - for (i = 0; i < module->dwfl_modules.size; i++) { - Dwfl_Module *dwfl_module; + for (size_t i = 0; i < module->dwfl_modules.size; i++) { void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - dwfl_module = module->dwfl_modules.data[i]; - *name_ret = dwfl_module_info(dwfl_module, &userdatap, NULL, - NULL, NULL, NULL, NULL, NULL); - userdata = *userdatap; - err = read_dwfl_module_cus(dwfl_module, userdata, cus); + *name_ret = dwfl_module_info(module->dwfl_modules.data[i], + &userdatap, NULL, NULL, NULL, NULL, + NULL, NULL); + struct drgn_dwfl_module_userdata *userdata = *userdatap; + struct drgn_error *err = read_dwfl_module_cus(userdata, cus); if (err) { /* * Ignore the error unless we have no more Dwfl_Modules @@ -1466,7 +1447,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, break; } } else if (name == DW_AT_stmt_list && - cu->sections[SECTION_DEBUG_LINE]) { + cu->userdata->debug_line) { switch (form) { case DW_FORM_data4: insn = ATTRIB_STMT_LIST_LINEPTR4; @@ -1724,7 +1705,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, */ static const uint64_t siphash_key[2]; struct drgn_error *err; - Elf_Data *debug_line = cu->sections[SECTION_DEBUG_LINE]; + Elf_Data *debug_line = cu->userdata->debug_line; const char *ptr = section_ptr(debug_line, stmt_list); const char *end = section_end(debug_line); @@ -2073,13 +2054,13 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, struct drgn_error *err; struct abbrev_table abbrev = ABBREV_TABLE_INIT; struct uint64_vector file_name_table = VECTOR_INIT; - Elf_Data *debug_abbrev = cu->sections[SECTION_DEBUG_ABBREV]; + Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; const char *debug_abbrev_end = section_end(debug_abbrev); const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; - Elf_Data *debug_info = cu->sections[SECTION_DEBUG_INFO]; + Elf_Data *debug_info = cu->userdata->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); - Elf_Data *debug_str = cu->sections[SECTION_DEBUG_STR]; + Elf_Data *debug_str = cu->userdata->debug_str; const char *debug_str_buffer = section_ptr(debug_str, 0); const char *debug_str_end = section_end(debug_str); unsigned int depth = 0; @@ -2158,7 +2139,8 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, else file_name_hash = 0; if ((err = index_die(dindex, die.name, tag, - file_name_hash, cu->module, + file_name_hash, + cu->userdata->module, die_offset))) goto out; } diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 7ab0364b1..c721acc88 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -137,6 +137,11 @@ struct drgn_dwfl_module_userdata { Elf *elf; int fd; enum drgn_dwarf_module_state state; + Dwfl_Module *module; + Elf_Data *debug_info; + Elf_Data *debug_abbrev; + Elf_Data *debug_str; + Elf_Data *debug_line; }; DEFINE_VECTOR_TYPE(drgn_dwarf_module_vector, struct drgn_dwarf_module *) From 0b4ab1772b62fd7e9c0d78bfc0e84a9591f8d3e1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 30 Jun 2020 16:46:01 -0700 Subject: [PATCH 41/56] libdrgn: dwarf_index: store DIE indices as uint32_t It's very unlikely that we'll ever index more than 4 billion DIEs in a single shard, so we can shrink the index a bit by using uint32_t indices (and uint8_t tag). Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 33 +++++++++++++++++---------------- libdrgn/dwarf_index.h | 4 ++-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 5e637efeb..2bedcdccd 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -264,13 +264,13 @@ static inline const char *section_end(Elf_Data *data) * name and tag. */ struct drgn_dwarf_index_die { - uint64_t tag; - uint64_t file_name_hash; /* * The next DIE with the same name (as an index into - * drgn_dwarf_index_shard::dies), or SIZE_MAX if this is the last DIE. + * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. */ - size_t next; + uint32_t next; + uint8_t tag; + uint64_t file_name_hash; Dwfl_Module *module; size_t offset; }; @@ -1780,25 +1780,26 @@ read_file_name_table(struct drgn_dwarf_index *dindex, return err; } -static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint64_t tag, +static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint8_t tag, uint64_t file_name_hash, Dwfl_Module *module, size_t offset) { - struct drgn_dwarf_index_die *die; - - die = drgn_dwarf_index_die_vector_append_entry(&shard->dies); + if (shard->dies.size == UINT32_MAX) + return false; + struct drgn_dwarf_index_die *die = + drgn_dwarf_index_die_vector_append_entry(&shard->dies); if (!die) return false; + die->next = UINT32_MAX; die->tag = tag; die->file_name_hash = file_name_hash; die->module = module; die->offset = offset; - die->next = SIZE_MAX; return true; } static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, - const char *name, uint64_t tag, + const char *name, uint8_t tag, uint64_t file_name_hash, Dwfl_Module *module, size_t offset) { @@ -1844,7 +1845,7 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, goto out; } - if (die->next == SIZE_MAX) + if (die->next == UINT32_MAX) break; die = &shard->dies.data[die->next]; } @@ -2076,7 +2077,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, .stmt_list = SIZE_MAX, }; size_t die_offset = ptr - debug_info_buffer; - uint64_t tag; + uint8_t tag; err = read_die(cu, &abbrev, &ptr, end, debug_str_buffer, debug_str_end, &die); @@ -2203,9 +2204,9 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) */ for (index = 0; index < shard->dies.size; i++) { die = &shard->dies.data[index]; - if (die->next != SIZE_MAX && + if (die->next != UINT32_MAX && die->next >= shard->dies.size) - die->next = SIZE_MAX; + die->next = UINT32_MAX; } /* Finally, delete the new entries in the map. */ @@ -2357,7 +2358,7 @@ void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, shard = &dindex->shards[it->shard]; map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &key, hp); - it->index = map_it.entry ? map_it.entry->value : SIZE_MAX; + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; it->any_name = false; } else { it->index = 0; @@ -2421,7 +2422,7 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, for (;;) { struct drgn_dwarf_index_shard *shard; - if (it->index == SIZE_MAX) + if (it->index == UINT32_MAX) return &drgn_stop; shard = &dindex->shards[it->shard]; diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index c721acc88..28d4aa202 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -63,7 +63,7 @@ extern const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks; extern const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks; struct drgn_dwarf_index_die; -DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, size_t) +DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, uint32_t) DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) struct drgn_dwarf_index_shard { @@ -347,7 +347,7 @@ struct drgn_dwarf_index_iterator { const uint64_t *tags; size_t num_tags; size_t shard; - size_t index; + uint32_t index; bool any_name; }; From 507977664c83ae4f78228d23ad03c73dfadf1b1f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Jul 2020 11:36:02 -0700 Subject: [PATCH 42/56] libdrgn: dwarf_index: store abbrevation and file name tables in CU This is preparation for the next change where we'll need to do two passes over the CUs. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 167 +++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 84 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 2bedcdccd..5fc7bd0be 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -213,28 +213,6 @@ DEFINE_VECTOR(uint8_vector, uint8_t) DEFINE_VECTOR(uint32_vector, uint32_t) DEFINE_VECTOR(uint64_vector, uint64_t) -struct abbrev_table { - /* - * This is indexed on the DWARF abbreviation code minus one. It maps the - * abbreviation code to an index in insns where the instruction stream - * for that code begins. - * - * Technically, abbreviation codes don't have to be sequential. In - * practice, GCC seems to always generate sequential codes starting at - * one, so we can get away with a flat array. - */ - struct uint32_vector decls; - struct uint8_vector insns; -}; - -#define ABBREV_TABLE_INIT { VECTOR_INIT, VECTOR_INIT } - -static void abbrev_table_deinit(struct abbrev_table *abbrev) -{ - uint8_vector_deinit(&abbrev->insns); - uint32_vector_deinit(&abbrev->decls); -} - struct compilation_unit { struct drgn_dwfl_module_userdata *userdata; const char *ptr; @@ -243,6 +221,20 @@ struct compilation_unit { uint8_t address_size; bool is_64_bit; bool bswap; + /* + * This is indexed on the DWARF abbreviation code minus one. It maps the + * abbreviation code to an index in abbrev_insns where the instruction + * stream for that code begins. + * + * Technically, abbreviation codes don't have to be sequential. In + * practice, GCC and Clang seem to always generate sequential codes + * starting at one, so we can get away with a flat array. + */ + uint32_t *abbrev_decls; + size_t num_abbrev_decls; + uint8_t *abbrev_insns; + uint64_t *file_name_hashes; + size_t num_file_names; }; static inline const char *section_ptr(Elf_Data *data, size_t offset) @@ -1243,6 +1235,11 @@ read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, err = read_compilation_unit_header(ptr, end, cu); if (err) return err; + cu->abbrev_decls = NULL; + cu->num_abbrev_decls = 0; + cu->abbrev_insns = NULL; + cu->file_name_hashes = NULL; + cu->num_file_names = 0; ptr += (cu->is_64_bit ? 12 : 4) + cu->unit_length; } @@ -1337,38 +1334,34 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, } static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, - const struct compilation_unit *cu, - struct abbrev_table *abbrev) + struct compilation_unit *cu, + struct uint32_vector *decls, + struct uint8_vector *insns) { struct drgn_error *err; - uint64_t code; - uint32_t insn_index; - uint64_t tag; - uint8_t children; - uint8_t die_flags; - bool should_index; - bool first = true; - uint8_t insn; static_assert(ATTRIB_MAX_INSN == UINT8_MAX, "maximum DWARF attribute instruction is invalid"); + uint64_t code; if ((err = read_uleb128(ptr, end, &code))) return err; if (code == 0) return &drgn_stop; - if (code != abbrev->decls.size + 1) { + if (code != decls->size + 1) { return drgn_error_create(DRGN_ERROR_OTHER, "DWARF abbreviation table is not sequential"); } - insn_index = abbrev->insns.size; - if (!uint32_vector_append(&abbrev->decls, &insn_index)) + uint32_t insn_index = insns->size; + if (!uint32_vector_append(decls, &insn_index)) return &drgn_enomem; + uint64_t tag; if ((err = read_uleb128(ptr, end, &tag))) return err; + bool should_index; switch (tag) { /* Types. */ case DW_TAG_base_type: @@ -1390,16 +1383,18 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, should_index = false; break; } - die_flags = should_index ? tag : 0; + uint8_t die_flags = should_index ? tag : 0; + uint8_t children; if (!read_u8(ptr, end, &children)) return drgn_eof(); if (children) die_flags |= DIE_FLAG_CHILDREN; + bool first = true; + uint8_t insn; for (;;) { uint64_t name, form; - if ((err = read_uleb128(ptr, end, &name))) return err; if ((err = read_uleb128(ptr, end, &form))) @@ -1589,44 +1584,49 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, } if (!first) { - uint8_t last_insn; - - last_insn = abbrev->insns.data[abbrev->insns.size - 1]; + uint8_t last_insn = insns->data[insns->size - 1]; if (last_insn + insn <= INSN_MAX_SKIP) { - abbrev->insns.data[abbrev->insns.size - 1] += insn; + insns->data[insns->size - 1] += insn; continue; } else if (last_insn < INSN_MAX_SKIP) { insn = last_insn + insn - INSN_MAX_SKIP; - abbrev->insns.data[abbrev->insns.size - 1] = - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; } } append_insn: first = false; - if (!uint8_vector_append(&abbrev->insns, &insn)) + if (!uint8_vector_append(insns, &insn)) return &drgn_enomem; } insn = 0; - if (!uint8_vector_append(&abbrev->insns, &insn) || - !uint8_vector_append(&abbrev->insns, &die_flags)) + if (!uint8_vector_append(insns, &insn) || + !uint8_vector_append(insns, &die_flags)) return &drgn_enomem; return NULL; } -static struct drgn_error *read_abbrev_table(const char *ptr, const char *end, - const struct compilation_unit *cu, - struct abbrev_table *abbrev) +static struct drgn_error *read_abbrev_table(struct compilation_unit *cu) { - struct drgn_error *err; - + Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; + const char *ptr = section_ptr(debug_abbrev, cu->debug_abbrev_offset); + const char *end = section_end(debug_abbrev); + struct uint32_vector decls = VECTOR_INIT; + struct uint8_vector insns = VECTOR_INIT; for (;;) { - err = read_abbrev_decl(&ptr, end, cu, abbrev); - if (err && err->code == DRGN_ERROR_STOP) + struct drgn_error *err = read_abbrev_decl(&ptr, end, cu, &decls, + &insns); + if (err && err->code == DRGN_ERROR_STOP) { break; - else if (err) + } else if (err) { + uint8_vector_deinit(&insns); + uint32_vector_deinit(&decls); return err; + } } + cu->abbrev_decls = decls.data; + cu->num_abbrev_decls = decls.size; + cu->abbrev_insns = insns.data; return NULL; } @@ -1696,8 +1696,7 @@ DEFINE_VECTOR(siphash_vector, struct siphash) static struct drgn_error * read_file_name_table(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu, size_t stmt_list, - struct uint64_vector *file_name_table) + struct compilation_unit *cu, size_t stmt_list) { /* * We don't care about hash flooding attacks, so don't bother with the @@ -1719,7 +1718,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, size_t path_len; if (!read_string(&ptr, end, &path, &path_len)) { err = drgn_eof(); - goto out; + goto out_directories; } if (!path_len) break; @@ -1728,36 +1727,37 @@ read_file_name_table(struct drgn_dwarf_index *dindex, siphash_vector_append_entry(&directories); if (!hash) { err = &drgn_enomem; - goto out; + goto out_directories; } siphash_init(hash, siphash_key); hash_directory(hash, path, path_len); } + struct uint64_vector file_name_hashes = VECTOR_INIT; for (;;) { const char *path; size_t path_len; if (!read_string(&ptr, end, &path, &path_len)) { err = drgn_eof(); - goto out; + goto out_hashes; } if (!path_len) break; uint64_t directory_index; if ((err = read_uleb128(&ptr, end, &directory_index))) - goto out; + goto out_hashes; /* mtime, size */ if (!skip_leb128(&ptr, end) || !skip_leb128(&ptr, end)) { err = drgn_eof(); - goto out; + goto out_hashes; } if (directory_index > directories.size) { err = drgn_error_format(DRGN_ERROR_OTHER, "directory index %" PRIu64 " is invalid", directory_index); - goto out; + goto out_hashes; } struct siphash hash; @@ -1768,14 +1768,20 @@ read_file_name_table(struct drgn_dwarf_index *dindex, siphash_update(&hash, path, path_len); uint64_t file_name_hash = siphash_final(&hash); - if (!uint64_vector_append(file_name_table, &file_name_hash)) { + if (!uint64_vector_append(&file_name_hashes, &file_name_hash)) { err = &drgn_enomem; - goto out; + goto out_hashes; } } + cu->file_name_hashes = file_name_hashes.data; + cu->num_file_names = file_name_hashes.size; err = NULL; -out: + goto out_directories; + +out_hashes: + uint64_vector_deinit(&file_name_hashes); +out_directories: siphash_vector_deinit(&directories); return err; } @@ -1873,7 +1879,6 @@ struct die { }; static struct drgn_error *read_die(struct compilation_unit *cu, - const struct abbrev_table *abbrev, const char **ptr, const char *end, const char *debug_str_buffer, const char *debug_str_end, struct die *die) @@ -1888,12 +1893,12 @@ static struct drgn_error *read_die(struct compilation_unit *cu, if (code == 0) return &drgn_stop; - if (code < 1 || code > abbrev->decls.size) { + if (code < 1 || code > cu->num_abbrev_decls) { return drgn_error_format(DRGN_ERROR_OTHER, "unknown abbreviation code %" PRIu64, code); } - insnp = &abbrev->insns.data[abbrev->decls.data[code - 1]]; + insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; while ((insn = *insnp++)) { size_t skip, tmp; @@ -2053,10 +2058,6 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, struct compilation_unit *cu) { struct drgn_error *err; - struct abbrev_table abbrev = ABBREV_TABLE_INIT; - struct uint64_vector file_name_table = VECTOR_INIT; - Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; - const char *debug_abbrev_end = section_end(debug_abbrev); const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; Elf_Data *debug_info = cu->userdata->debug_info; @@ -2067,9 +2068,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, unsigned int depth = 0; size_t enum_die_offset = 0; - if ((err = read_abbrev_table(section_ptr(debug_abbrev, - cu->debug_abbrev_offset), - debug_abbrev_end, cu, &abbrev))) + if ((err = read_abbrev_table(cu))) goto out; for (;;) { @@ -2079,8 +2078,8 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, size_t die_offset = ptr - debug_info_buffer; uint8_t tag; - err = read_die(cu, &abbrev, &ptr, end, debug_str_buffer, - debug_str_end, &die); + err = read_die(cu, &ptr, end, debug_str_buffer, debug_str_end, + &die); if (err && err->code == DRGN_ERROR_STOP) { depth--; if (depth == 1) @@ -2095,8 +2094,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, if (depth == 0) { if (die.stmt_list != SIZE_MAX && (err = read_file_name_table(dindex, cu, - die.stmt_list, - &file_name_table))) + die.stmt_list))) goto out; } else if ((tag = die.flags & DIE_FLAG_TAG_MASK) && !die.declaration) { @@ -2118,7 +2116,7 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, struct die decl = {}; const char *decl_ptr = die.specification; - if ((err = read_die(cu, &abbrev, &decl_ptr, end, + if ((err = read_die(cu, &decl_ptr, end, debug_str_buffer, debug_str_end, &decl))) goto out; @@ -2129,14 +2127,14 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, } if (die.name) { - if (die.decl_file > file_name_table.size) { + if (die.decl_file > cu->num_file_names) { err = drgn_error_format(DRGN_ERROR_OTHER, "invalid DW_AT_decl_file %zu", die.decl_file); goto out; } if (die.decl_file) - file_name_hash = file_name_table.data[die.decl_file - 1]; + file_name_hash = cu->file_name_hashes[die.decl_file - 1]; else file_name_hash = 0; if ((err = index_die(dindex, die.name, tag, @@ -2160,8 +2158,9 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, err = NULL; out: - uint64_vector_deinit(&file_name_table); - abbrev_table_deinit(&abbrev); + free(cu->file_name_hashes); + free(cu->abbrev_insns); + free(cu->abbrev_decls); return err; } From 26291647eb605bff59b3fc15825745a630fe6080 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 6 Jul 2020 11:59:35 -0700 Subject: [PATCH 43/56] libdrgn: dwarf_index: handle DW_AT_specification DIEs with two passes We currently handle DIEs with a DW_AT_specification attribute by parsing the corresponding declaration to get the name and inserting the DIE as usual. This has a couple of problems: 1. It only works if DW_AT_specification refers to the same compilation unit, which is true for DW_FORM_ref{1,2,4,8,_udata}, but not DW_FORM_ref_addr. As a result, drgn doesn't support the latter. 2. It assumes that the DIE with DW_AT_specification is in the correct "scope". Unfortunately, this is not true for g++: for a variable definition in a C++ namespace, it generates a DIE with DW_AT_declaration as a child of the DW_TAG_namespace DIE and a DIE which refers to the declaration with DW_AT_specification _outside_ of the DW_TAG_namespace as a child of the DW_TAG_compilation_unit DIE. Supporting both of these cases requires reworking how we handle DW_AT_specification. This commit takes an approach of parsing the DWARF data in two passes: the first pass reads the abbrevation and file name tables and builds a map of instances of DW_AT_specification; the second pass indexes DIEs as before, but ignores DIEs with DW_AT_specification and handles DIEs with DW_AT_declaration by looking them up in the map built by the first pass. This approach is a 10-20% regression in indexing time in the benchmarks I ran. Thankfully, it is not 100% slower for a couple of reasons. The first is that the two passes are simpler than the original combined pass. The second is that a decent part of the indexing time is spent faulting in the mapped debugging information, which only needs to happen once (even if the file is cached, minor page faults add non-negligible overhead). This doesn't handle DW_AT_specification "chains" yet, but neither did the original code. If it is necessary, it shouldn't be too difficult to add. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 846 ++++++++++++++++++++++++++---------------- libdrgn/dwarf_index.h | 35 ++ tests/test_dwarf.py | 33 ++ 3 files changed, 591 insertions(+), 323 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 5fc7bd0be..a64a0d288 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -267,12 +267,10 @@ struct drgn_dwarf_index_die { size_t offset; }; -/* - * The key is the DIE name. The value is the first DIE with that name (as an - * index into drgn_dwarf_index_shard::dies). - */ DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash, string_eq) DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, + hash_pair_int_type, hash_table_scalar_eq) static inline size_t hash_pair_to_shard(struct hash_pair hp) { @@ -341,17 +339,6 @@ static inline struct drgn_error *read_uleb128_into_size_t(const char **ptr, return NULL; } -static void free_shards(struct drgn_dwarf_index *dindex, size_t n) -{ - size_t i; - - for (i = 0; i < n; i++) { - drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies); - drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map); - omp_destroy_lock(&dindex->shards[i].lock); - } -} - static void drgn_dwarf_module_destroy(struct drgn_dwarf_module *module) { if (module) { @@ -482,22 +469,19 @@ static void drgn_dwarf_index_free_modules(struct drgn_dwarf_index *dindex, struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, const Dwfl_Callbacks *callbacks) { - size_t i; - char *max_errors; - dindex->dwfl = dwfl_begin(callbacks); if (!dindex->dwfl) return drgn_error_libdwfl(); - for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) { + for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; - omp_init_lock(&shard->lock); drgn_dwarf_index_die_map_init(&shard->map); drgn_dwarf_index_die_vector_init(&shard->dies); } + drgn_dwarf_index_specification_map_init(&dindex->specifications); memset(&dindex->errors, 0, sizeof(dindex->errors)); dindex->num_errors = 0; - max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); if (max_errors) dindex->max_errors = atoi(max_errors); else @@ -518,7 +502,12 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) assert(drgn_dwarf_module_table_size(&dindex->module_table) == 0); drgn_dwarf_module_vector_deinit(&dindex->no_build_id); drgn_dwarf_module_table_deinit(&dindex->module_table); - free_shards(dindex, ARRAY_SIZE(dindex->shards)); + drgn_dwarf_index_specification_map_deinit(&dindex->specifications); + for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { + drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies); + drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map); + omp_destroy_lock(&dindex->shards[i].lock); + } dwfl_end(dindex->dwfl); } @@ -1402,12 +1391,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, if (name == 0 && form == 0) break; - if (name == DW_AT_sibling && tag != DW_TAG_enumeration_type) { - /* - * If we are indexing enumerators, we must descend into - * DW_TAG_enumeration_type to find the DW_TAG_enumerator - * children instead of skipping to the sibling DIE. - */ + if (name == DW_AT_sibling) { switch (form) { case DW_FORM_ref1: insn = ATTRIB_SIBLING_REF1; @@ -1786,6 +1770,244 @@ read_file_name_table(struct drgn_dwarf_index *dindex, return err; } +static struct drgn_error * +index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, + Dwfl_Module *module, size_t offset) +{ + struct drgn_dwarf_index_specification entry = { + .declaration = declaration, + .module = module, + .offset = offset, + }; + struct hash_pair hp = + drgn_dwarf_index_specification_map_hash(&declaration); + int ret; + #pragma omp critical(drgn_index_specification) + ret = drgn_dwarf_index_specification_map_insert_hashed(&dindex->specifications, + &entry, hp, + NULL); + /* + * There may be duplicates if multiple DIEs reference one declaration, + * but we ignore them. + */ + return ret == -1 ? &drgn_enomem : NULL; +} + +/* + * First pass: read the abbreviation and file name tables and index DIEs with + * DW_AT_specification. + */ +static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, + struct compilation_unit *cu) +{ + struct drgn_error *err; + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; + const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; + Elf_Data *debug_info = cu->userdata->debug_info; + const char *debug_info_buffer = section_ptr(debug_info, 0); + unsigned int depth = 0; + + if ((err = read_abbrev_table(cu))) + return err; + + for (;;) { + size_t die_offset = ptr - debug_info_buffer; + + uint64_t code; + if ((err = read_uleb128(&ptr, end, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + bool declaration = false; + uintptr_t specification = 0; + size_t stmt_list = SIZE_MAX; + const char *sibling = NULL; + uint8_t insn; + while ((insn = *insnp++)) { + size_t skip, tmp; + switch (insn) { + case ATTRIB_BLOCK1: + if (!read_u8_into_size_t(&ptr, end, &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_EXPRLOC: + if ((err = read_uleb128_into_size_t(&ptr, end, + &skip))) + return err; + goto skip; + case ATTRIB_LEB128: + case ATTRIB_DECL_FILE_UDATA: + if (!skip_leb128(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_STRING: + case ATTRIB_NAME_STRING: + if (!skip_string(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_SIBLING_REF1: + if (!read_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF_UDATA: + if ((err = read_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +sibling: + if (!read_in_bounds(cu->ptr, end, tmp)) + return drgn_eof(); + sibling = &cu->ptr[tmp]; + __builtin_prefetch(sibling); + break; + case ATTRIB_STMT_LIST_LINEPTR4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) + return drgn_eof(); + break; + case ATTRIB_STMT_LIST_LINEPTR8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA1: + skip = 1; + goto skip; + case ATTRIB_DECL_FILE_DATA2: + skip = 2; + goto skip; + case ATTRIB_NAME_STRP4: + case ATTRIB_DECL_FILE_DATA4: + skip = 4; + goto skip; + case ATTRIB_NAME_STRP8: + case ATTRIB_DECL_FILE_DATA8: + skip = 8; + goto skip; + case ATTRIB_DECLARATION_FLAG: { + uint8_t flag; + if (!read_u8(&ptr, end, &flag)) + return drgn_eof(); + if (flag) + declaration = true; + break; + } + case ATTRIB_SPECIFICATION_REF1: + if (!read_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF_UDATA: + if ((err = read_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +specification: + specification = (uintptr_t)cu->ptr + tmp; + break; + default: + skip = insn; +skip: + if (!read_in_bounds(ptr, end, skip)) + return drgn_eof(); + ptr += skip; + break; + } + } + insn = *insnp; + + if (depth == 0) { + if (stmt_list != SIZE_MAX && + (err = read_file_name_table(dindex, cu, stmt_list))) + return err; + } else if (specification) { + if (insn & DIE_FLAG_DECLARATION) + declaration = true; + /* + * For now, we don't handle DIEs with + * DW_AT_specification which are themselves + * declarations. We may need to handle + * DW_AT_specification "chains" in the future. + */ + if (!declaration && + (err = index_specification(dindex, specification, + cu->userdata->module, + die_offset))) + return err; + } + + if (insn & DIE_FLAG_CHILDREN) { + if (sibling) + ptr = sibling; + else + depth++; + } else if (depth == 0) { + break; + } + } + return NULL; +} + +static bool find_definition(struct drgn_dwarf_index *dindex, uintptr_t die_addr, + Dwfl_Module **module_ret, size_t *offset_ret) +{ + struct drgn_dwarf_index_specification_map_iterator it = + drgn_dwarf_index_specification_map_search(&dindex->specifications, + &die_addr); + if (!it.entry) + return false; + *module_ret = it.entry->module; + *offset_ret = it.entry->offset; + return true; +} + static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint8_t tag, uint64_t file_name_hash, Dwfl_Module *module, size_t offset) @@ -1868,194 +2090,9 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, return err; } -struct die { - const char *sibling; - const char *name; - size_t stmt_list; - size_t decl_file; - const char *specification; - bool declaration; - uint8_t flags; -}; - -static struct drgn_error *read_die(struct compilation_unit *cu, - const char **ptr, const char *end, - const char *debug_str_buffer, - const char *debug_str_end, struct die *die) -{ - struct drgn_error *err; - uint64_t code; - uint8_t *insnp; - uint8_t insn; - - if ((err = read_uleb128(ptr, end, &code))) - return err; - if (code == 0) - return &drgn_stop; - - if (code < 1 || code > cu->num_abbrev_decls) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown abbreviation code %" PRIu64, - code); - } - insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; - - while ((insn = *insnp++)) { - size_t skip, tmp; - - switch (insn) { - case ATTRIB_BLOCK1: - if (!read_u8_into_size_t(ptr, end, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_BLOCK2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_BLOCK4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_EXPRLOC: - if ((err = read_uleb128_into_size_t(ptr, end, &skip))) - return err; - goto skip; - case ATTRIB_LEB128: - if (!skip_leb128(ptr, end)) - return drgn_eof(); - break; - case ATTRIB_NAME_STRING: - die->name = *ptr; - /* fallthrough */ - case ATTRIB_STRING: - if (!skip_string(ptr, end)) - return drgn_eof(); - break; - case ATTRIB_SIBLING_REF1: - if (!read_u8_into_size_t(ptr, end, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, &tmp))) - return err; -sibling: - if (!read_in_bounds(cu->ptr, end, tmp)) - return drgn_eof(); - die->sibling = &cu->ptr[tmp]; - __builtin_prefetch(die->sibling); - break; - case ATTRIB_NAME_STRP4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto strp; - case ATTRIB_NAME_STRP8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); -strp: - if (!read_in_bounds(debug_str_buffer, debug_str_end, - tmp)) - return drgn_eof(); - die->name = &debug_str_buffer[tmp]; - __builtin_prefetch(die->name); - break; - case ATTRIB_STMT_LIST_LINEPTR4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, - &die->stmt_list)) - return drgn_eof(); - break; - case ATTRIB_STMT_LIST_LINEPTR8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, - &die->stmt_list)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA1: - if (!read_u8_into_size_t(ptr, end, &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, - &die->decl_file))) - return err; - break; - case ATTRIB_DECLARATION_FLAG: { - uint8_t flag; - if (!read_u8(ptr, end, &flag)) - return drgn_eof(); - if (flag) - die->declaration = true; - break; - } - case ATTRIB_SPECIFICATION_REF1: - if (!read_u8_into_size_t(ptr, end, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, &tmp))) - return err; -specification: - if (!read_in_bounds(cu->ptr, end, tmp)) - return drgn_eof(); - die->specification = &cu->ptr[tmp]; - __builtin_prefetch(die->specification); - break; - default: - skip = insn; -skip: - if (!read_in_bounds(*ptr, end, skip)) - return drgn_eof(); - *ptr += skip; - break; - } - } - - die->flags = *insnp; - if (die->flags & DIE_FLAG_DECLARATION) - die->declaration = true; - - return NULL; -} - -static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu) +/* Second pass: index the actual DIEs. */ +static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, + struct compilation_unit *cu) { struct drgn_error *err; const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; @@ -2066,115 +2103,245 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, const char *debug_str_buffer = section_ptr(debug_str, 0); const char *debug_str_end = section_end(debug_str); unsigned int depth = 0; - size_t enum_die_offset = 0; - - if ((err = read_abbrev_table(cu))) - goto out; + uint8_t depth1_tag = 0; + size_t depth1_offset = 0; for (;;) { - struct die die = { - .stmt_list = SIZE_MAX, - }; size_t die_offset = ptr - debug_info_buffer; - uint8_t tag; - err = read_die(cu, &ptr, end, debug_str_buffer, debug_str_end, - &die); - if (err && err->code == DRGN_ERROR_STOP) { - depth--; - if (depth == 1) - enum_die_offset = 0; - else if (depth == 0) + uint64_t code; + if ((err = read_uleb128(&ptr, end, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else break; - continue; - } else if (err) { - goto out; + } else if (code > cu->num_abbrev_decls) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown abbreviation code %" PRIu64, + code); } - if (depth == 0) { - if (die.stmt_list != SIZE_MAX && - (err = read_file_name_table(dindex, cu, - die.stmt_list))) - goto out; - } else if ((tag = die.flags & DIE_FLAG_TAG_MASK) && - !die.declaration) { - uint64_t file_name_hash; + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + const char *name = NULL; + size_t decl_file = 0; + bool declaration = false; + bool specification = false; + const char *sibling = NULL; + uint8_t insn; + while ((insn = *insnp++)) { + size_t skip, tmp; + switch (insn) { + case ATTRIB_BLOCK1: + if (!read_u8_into_size_t(&ptr, end, &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_EXPRLOC: + if ((err = read_uleb128_into_size_t(&ptr, end, + &skip))) + return err; + goto skip; + case ATTRIB_SPECIFICATION_REF_UDATA: + specification = true; + /* fallthrough */ + case ATTRIB_LEB128: + if (!skip_leb128(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_NAME_STRING: + name = ptr; + /* fallthrough */ + case ATTRIB_STRING: + if (!skip_string(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_SIBLING_REF1: + if (!read_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF_UDATA: + if ((err = read_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +sibling: + if (!read_in_bounds(cu->ptr, end, tmp)) + return drgn_eof(); + sibling = &cu->ptr[tmp]; + __builtin_prefetch(sibling); + break; + case ATTRIB_NAME_STRP4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto strp; + case ATTRIB_NAME_STRP8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); +strp: + if (!read_in_bounds(debug_str_buffer, debug_str_end, + tmp)) + return drgn_eof(); + name = &debug_str_buffer[tmp]; + __builtin_prefetch(name); + break; + case ATTRIB_STMT_LIST_LINEPTR4: + skip = 4; + goto skip; + case ATTRIB_STMT_LIST_LINEPTR8: + skip = 8; + goto skip; + case ATTRIB_DECL_FILE_DATA1: + if (!read_u8_into_size_t(&ptr, end, &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA2: + if (!read_u16_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_UDATA: + if ((err = read_uleb128_into_size_t(&ptr, end, + &decl_file))) + return err; + break; + case ATTRIB_DECLARATION_FLAG: { + uint8_t flag; + if (!read_u8(&ptr, end, &flag)) + return drgn_eof(); + if (flag) + declaration = true; + break; + } + case ATTRIB_SPECIFICATION_REF1: + specification = true; + skip = 1; + goto skip; + case ATTRIB_SPECIFICATION_REF2: + specification = true; + skip = 2; + goto skip; + case ATTRIB_SPECIFICATION_REF4: + specification = true; + skip = 4; + goto skip; + case ATTRIB_SPECIFICATION_REF8: + specification = true; + skip = 8; + goto skip; + default: + skip = insn; +skip: + if (!read_in_bounds(ptr, end, skip)) + return drgn_eof(); + ptr += skip; + break; + } + } + insn = *insnp; - /* - * NB: the enumerator name points to the - * enumeration_type DIE instead of the enumerator DIE. - */ - if (depth == 1 && tag == DW_TAG_enumeration_type) - enum_die_offset = die_offset; - else if (depth == 2 && tag == DW_TAG_enumerator && - enum_die_offset) - die_offset = enum_die_offset; - else if (depth != 1) - goto next; - - if (die.specification && (!die.name || !die.decl_file)) { - struct die decl = {}; - const char *decl_ptr = die.specification; - - if ((err = read_die(cu, &decl_ptr, end, - debug_str_buffer, - debug_str_end, &decl))) - goto out; - if (!die.name && decl.name) - die.name = decl.name; - if (!die.decl_file && decl.decl_file) - die.decl_file = decl.decl_file; + uint8_t tag = insn & DIE_FLAG_TAG_MASK; + if (depth == 1) { + depth1_tag = tag; + depth1_offset = die_offset; + } + if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && + !specification) { + if (insn & DIE_FLAG_DECLARATION) + declaration = true; + Dwfl_Module *module = cu->userdata->module; + if (tag == DW_TAG_enumerator) { + if (depth1_tag != DW_TAG_enumeration_type) + goto next; + /* + * NB: the enumerator name points to the + * enumeration_type DIE. Also, enumerators can't + * be declared in C/C++, so we don't check for + * that. + */ + die_offset = depth1_offset; + } else if (declaration && + !find_definition(dindex, + (uintptr_t)debug_info_buffer + + die_offset, + &module, &die_offset)) { + goto next; } - if (die.name) { - if (die.decl_file > cu->num_file_names) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "invalid DW_AT_decl_file %zu", - die.decl_file); - goto out; - } - if (die.decl_file) - file_name_hash = cu->file_name_hashes[die.decl_file - 1]; - else - file_name_hash = 0; - if ((err = index_die(dindex, die.name, tag, - file_name_hash, - cu->userdata->module, - die_offset))) - goto out; + if (decl_file > cu->num_file_names) { + return drgn_error_format(DRGN_ERROR_OTHER, + "invalid DW_AT_decl_file %zu", + decl_file); } + uint64_t file_name_hash; + if (decl_file) + file_name_hash = cu->file_name_hashes[decl_file - 1]; + else + file_name_hash = 0; + if ((err = index_die(dindex, name, + insn & DIE_FLAG_TAG_MASK, + file_name_hash, module, + die_offset))) + return err; } next: - if (die.flags & DIE_FLAG_CHILDREN) { - if (die.sibling) - ptr = die.sibling; + if (insn & DIE_FLAG_CHILDREN) { + /* + * We must descend into the children of enumeration_type + * DIEs to index enumerator DIEs. + */ + if (sibling && tag != DW_TAG_enumeration_type) + ptr = sibling; else depth++; } else if (depth == 0) { break; } } - - err = NULL; -out: - free(cu->file_name_hashes); - free(cu->abbrev_insns); - free(cu->abbrev_decls); - return err; + return NULL; } static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) { - size_t i; - - for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die *die; - struct drgn_dwarf_index_die_map_iterator it; - size_t index; - - shard = &dindex->shards[i]; + for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { + struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; /* * Because we're deleting everything that was added since the @@ -2182,13 +2349,12 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) * entry that was added for this update. */ while (shard->dies.size) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[shard->dies.size - 1]; void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - die = &shard->dies.data[shard->dies.size - 1]; dwfl_module_info(die->module, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); - userdata = *userdatap; + struct drgn_dwfl_module_userdata *userdata = *userdatap; if (userdata->state == DRGN_DWARF_MODULE_INDEXED) break; else @@ -2201,15 +2367,17 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) * entries must also be new, so there's no need to preserve * them. */ - for (index = 0; index < shard->dies.size; i++) { - die = &shard->dies.data[index]; + for (size_t index = 0; index < shard->dies.size; i++) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[index]; if (die->next != UINT32_MAX && die->next >= shard->dies.size) die->next = UINT32_MAX; } /* Finally, delete the new entries in the map. */ - for (it = drgn_dwarf_index_die_map_first(&shard->map); + for (struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_first(&shard->map); it.entry; ) { if (it.entry->value >= shard->dies.size) { it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, @@ -2219,6 +2387,21 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) } } } + + for (struct drgn_dwarf_index_specification_map_iterator it = + drgn_dwarf_index_specification_map_first(&dindex->specifications); + it.entry; ) { + void **userdatap; + dwfl_module_info(it.entry->module, &userdatap, NULL, NULL, NULL, + NULL, NULL, NULL); + struct drgn_dwfl_module_userdata *userdata = *userdatap; + if (userdata->state == DRGN_DWARF_MODULE_INDEXED) { + it = drgn_dwarf_index_specification_map_next(it); + } else { + it = drgn_dwarf_index_specification_map_delete_iterator(&dindex->specifications, + it); + } + } } static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, @@ -2226,22 +2409,39 @@ static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, size_t num_cus) { struct drgn_error *err = NULL; - size_t i; - - #pragma omp parallel for schedule(dynamic) - for (i = 0; i < num_cus; i++) { - struct drgn_error *cu_err; - - if (err) - continue; + #pragma omp parallel + { + #pragma omp for schedule(dynamic) + for (size_t i = 0; i < num_cus; i++) { + if (!err) { + struct drgn_error *cu_err = + index_cu_first_pass(dindex, &cus[i]); + if (cu_err) { + #pragma omp critical(drgn_index_cus) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + } - cu_err = index_cu(dindex, &cus[i]); - if (cu_err) { - #pragma omp critical(drgn_index_cus) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; + #pragma omp for schedule(dynamic) + for (size_t i = 0; i < num_cus; i++) { + if (!err) { + struct drgn_error *cu_err = + index_cu_second_pass(dindex, &cus[i]); + if (cu_err) { + #pragma omp critical(drgn_index_cus) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + free(cus[i].file_name_hashes); + free(cus[i].abbrev_insns); + free(cus[i].abbrev_decls); } } return err; diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 28d4aa202..83f05a06f 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -69,6 +69,10 @@ DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) struct drgn_dwarf_index_shard { /** @privatesection */ omp_lock_t lock; + /* + * Map from name to list of DIEs with that name (as the index in + * drgn_dwarf_index_shard::dies of the first DIE with that name). + */ struct drgn_dwarf_index_die_map map; /* * We store all entries in a shard as a single array, which is more @@ -79,6 +83,28 @@ struct drgn_dwarf_index_shard { #define DRGN_DWARF_INDEX_SHARD_BITS 8 +/* A DIE with a DW_AT_specification attribute. */ +struct drgn_dwarf_index_specification { + /* + * Address of non-defining declaration DIE referenced by + * DW_AT_specification. + */ + uintptr_t declaration; + /* Module and offset of DIE. */ + Dwfl_Module *module; + size_t offset; +}; + +static inline uintptr_t +drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) +{ + return entry->declaration; +} + +DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, + struct drgn_dwarf_index_specification, + drgn_dwarf_index_specification_to_key) + /** State of a @ref drgn_dwarf_module or a @c Dwfl_Module. */ enum drgn_dwarf_module_state { /** Reported but not indexed. */ @@ -184,6 +210,15 @@ struct drgn_dwarf_index { * This is sharded to reduce lock contention. */ struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; + /** + * Map from address of DIE referenced by DW_AT_specification to DIE that + * references it. This is used to resolve DIEs with DW_AT_declaration to + * their definition. + * + * This is not sharded because there typically aren't enough of these in + * a program to cause contention. + */ + struct drgn_dwarf_index_specification_map specifications; Dwfl *dwfl; /** * Formatted errors reported by @ref drgn_dwarf_index_report_error(). diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 1771ff703..04ae4ff57 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -3471,6 +3471,39 @@ def test_variable_const_block_too_small(self): ) self.assertRaisesRegex(Exception, "too small", prog.variable, "p") + def test_specification(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), + ), + ) + ) + ) + + self.assertEqual( + prog["x"], + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + ) + def test_not_found(self): prog = dwarf_program(int_die) self.assertRaisesRegex(LookupError, "could not find", prog.object, "y") From c053c2b212fe8710c2ff83c57005ef0e39fbe2dc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 7 Jul 2020 14:09:00 -0700 Subject: [PATCH 44/56] libdrgn: dwarf_index: handle DW_AT_specification with DW_FORM_ref_addr Now that we can handle a DW_AT_specification that references another compilation unit, add support for DW_FORM_ref_addr. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index a64a0d288..3a3560afe 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -170,7 +170,7 @@ const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 228, + INSN_MAX_SKIP = 226, ATTRIB_BLOCK1, ATTRIB_BLOCK2, ATTRIB_BLOCK4, @@ -198,7 +198,9 @@ enum { ATTRIB_SPECIFICATION_REF4, ATTRIB_SPECIFICATION_REF8, ATTRIB_SPECIFICATION_REF_UDATA, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_UDATA, + ATTRIB_SPECIFICATION_REF_ADDR4, + ATTRIB_SPECIFICATION_REF_ADDR8, + ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_ADDR8, }; enum { @@ -218,6 +220,7 @@ struct compilation_unit { const char *ptr; size_t unit_length; uint64_t debug_abbrev_offset; + uint8_t version; uint8_t address_size; bool is_64_bit; bool bswap; @@ -1181,6 +1184,7 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, "unknown DWARF CU version %" PRIu16, version); } + cu->version = version; if (cu->is_64_bit) { if (!read_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset)) @@ -1504,8 +1508,27 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, case DW_FORM_ref_udata: insn = ATTRIB_SPECIFICATION_REF_UDATA; goto append_insn; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + insn = ATTRIB_SPECIFICATION_REF_ADDR8; + else + insn = ATTRIB_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + insn = ATTRIB_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + insn = ATTRIB_SPECIFICATION_REF_ADDR4; + else + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + cu->address_size); + } + goto append_insn; default: - break; + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown attribute form %" PRIu64 " for DW_AT_specification", + form); } } @@ -1952,6 +1975,18 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, specification: specification = (uintptr_t)cu->ptr + tmp; break; + case ATTRIB_SPECIFICATION_REF_ADDR4: + if (!read_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification_ref_addr; + case ATTRIB_SPECIFICATION_REF_ADDR8: + if (!read_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); +specification_ref_addr: + specification = (uintptr_t)debug_info_buffer + tmp; + break; default: skip = insn; skip: @@ -2258,10 +2293,12 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, skip = 2; goto skip; case ATTRIB_SPECIFICATION_REF4: + case ATTRIB_SPECIFICATION_REF_ADDR4: specification = true; skip = 4; goto skip; case ATTRIB_SPECIFICATION_REF8: + case ATTRIB_SPECIFICATION_REF_ADDR8: specification = true; skip = 8; goto skip; From 7a85b4188e3ce1d4afd2889f8c58f2dc758a717e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 6 Jul 2020 17:44:39 -0700 Subject: [PATCH 45/56] libdrgn: clean up read.h helpers and avoid undefined pointer behavior There are a couple of related ways that we can cause undefined behavior when parsing a malformed DWARF or depmod index file: 1. There are several places where we increment the cursor to skip past some data. It is undefined behavior if the result points out of bounds of the data, even if we don't attempt to dereference it. 2. read_in_bounds() checks that ptr <= end. This pointer comparison is only defined if ptr and end both point to elements of the same array object or one past the last element. If ptr has gone past end, then this comparison is likely undefined anyways. Fix it by adding a helper to skip past data with bounds checking. Then, all of the helpers can assume that ptr <= end and maintain that invariant. while we're here and auditing all of the call sites, let's clean up the API and rename it from read_foo() to the less generic mread_foo(). Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 +- libdrgn/dwarf_index.c | 271 ++++++++++++++++++++--------------------- libdrgn/linux_kernel.c | 57 ++++----- libdrgn/mread.h | 256 ++++++++++++++++++++++++++++++++++++++ libdrgn/program.c | 1 - libdrgn/read.h | 232 ----------------------------------- libdrgn/stack_trace.c | 1 - 7 files changed, 417 insertions(+), 403 deletions(-) create mode 100644 libdrgn/mread.h delete mode 100644 libdrgn/read.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 22a341e1e..43b5cdd06 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -40,6 +40,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ linux_kernel_helpers.c \ memory_reader.c \ memory_reader.h \ + mread.h \ object.c \ object.h \ object_index.c \ @@ -49,7 +50,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ platform.h \ program.c \ program.h \ - read.h \ serialize.c \ serialize.h \ siphash.h \ diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 3a3560afe..215dabcc2 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -18,7 +18,7 @@ #include "internal.h" #include "dwarf_index.h" -#include "read.h" +#include "mread.h" #include "siphash.h" #include "string_builder.h" @@ -242,12 +242,14 @@ struct compilation_unit { static inline const char *section_ptr(Elf_Data *data, size_t offset) { - return &((char *)data->d_buf)[offset]; + if (offset > data->d_size) + return NULL; + return (const char *)data->d_buf + offset; } static inline const char *section_end(Elf_Data *data) { - return section_ptr(data, data->d_size); + return (const char *)data->d_buf + data->d_size; } /* @@ -292,27 +294,22 @@ static inline struct drgn_error *drgn_eof(void) "debug information is truncated"); } -static inline bool skip_leb128(const char **ptr, const char *end) +static inline bool mread_skip_leb128(const char **ptr, const char *end) { - for (;;) { - if (*ptr >= end) - return false; + while (*ptr < end) { if (!(*(const uint8_t *)(*ptr)++ & 0x80)) return true; } + return false; } -static inline struct drgn_error *read_uleb128(const char **ptr, const char *end, - uint64_t *value) +static inline struct drgn_error *mread_uleb128(const char **ptr, + const char *end, uint64_t *value) { int shift = 0; - uint8_t byte; - *value = 0; - for (;;) { - if (*ptr >= end) - return drgn_eof(); - byte = *(const uint8_t *)*ptr; + while (*ptr < end) { + uint8_t byte = *(const uint8_t *)*ptr; (*ptr)++; if (shift == 63 && byte > 1) { return drgn_error_create(DRGN_ERROR_OVERFLOW, @@ -321,19 +318,19 @@ static inline struct drgn_error *read_uleb128(const char **ptr, const char *end, *value |= (uint64_t)(byte & 0x7f) << shift; shift += 7; if (!(byte & 0x80)) - break; + return NULL; } - return NULL; + return drgn_eof(); } -static inline struct drgn_error *read_uleb128_into_size_t(const char **ptr, - const char *end, - size_t *value) +static inline struct drgn_error *mread_uleb128_into_size_t(const char **ptr, + const char *end, + size_t *value) { struct drgn_error *err; uint64_t tmp; - if ((err = read_uleb128(ptr, end, &tmp))) + if ((err = mread_uleb128(ptr, end, &tmp))) return err; if (tmp > SIZE_MAX) @@ -1164,20 +1161,19 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, struct compilation_unit *cu) { uint32_t tmp; - uint16_t version; - - if (!read_u32(&ptr, end, cu->bswap, &tmp)) + if (!mread_u32(&ptr, end, cu->bswap, &tmp)) return drgn_eof(); cu->is_64_bit = tmp == UINT32_C(0xffffffff); if (cu->is_64_bit) { - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &cu->unit_length)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &cu->unit_length)) return drgn_eof(); } else { cu->unit_length = tmp; } - if (!read_u16(&ptr, end, cu->bswap, &version)) + uint16_t version; + if (!mread_u16(&ptr, end, cu->bswap, &version)) return drgn_eof(); if (version != 2 && version != 3 && version != 4) { return drgn_error_format(DRGN_ERROR_OTHER, @@ -1187,15 +1183,15 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, cu->version = version; if (cu->is_64_bit) { - if (!read_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset)) + if (!mread_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset)) return drgn_eof(); } else { - if (!read_u32_into_u64(&ptr, end, cu->bswap, - &cu->debug_abbrev_offset)) + if (!mread_u32_into_u64(&ptr, end, cu->bswap, + &cu->debug_abbrev_offset)) return drgn_eof(); } - if (!read_u8(&ptr, end, &cu->address_size)) + if (!mread_u8(&ptr, end, &cu->address_size)) return drgn_eof(); return NULL; @@ -1234,7 +1230,9 @@ read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, cu->file_name_hashes = NULL; cu->num_file_names = 0; - ptr += (cu->is_64_bit ? 12 : 4) + cu->unit_length; + if (!mread_skip(&ptr, end, + (cu->is_64_bit ? 12 : 4) + cu->unit_length)) + return drgn_eof(); } return NULL; } @@ -1337,7 +1335,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, "maximum DWARF attribute instruction is invalid"); uint64_t code; - if ((err = read_uleb128(ptr, end, &code))) + if ((err = mread_uleb128(ptr, end, &code))) return err; if (code == 0) return &drgn_stop; @@ -1351,7 +1349,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, return &drgn_enomem; uint64_t tag; - if ((err = read_uleb128(ptr, end, &tag))) + if ((err = mread_uleb128(ptr, end, &tag))) return err; bool should_index; @@ -1379,7 +1377,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, uint8_t die_flags = should_index ? tag : 0; uint8_t children; - if (!read_u8(ptr, end, &children)) + if (!mread_u8(ptr, end, &children)) return drgn_eof(); if (children) die_flags |= DIE_FLAG_CHILDREN; @@ -1388,9 +1386,9 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, uint8_t insn; for (;;) { uint64_t name, form; - if ((err = read_uleb128(ptr, end, &name))) + if ((err = mread_uleb128(ptr, end, &name))) return err; - if ((err = read_uleb128(ptr, end, &form))) + if ((err = mread_uleb128(ptr, end, &form))) return err; if (name == 0 && form == 0) break; @@ -1617,6 +1615,8 @@ static struct drgn_error *read_abbrev_table(struct compilation_unit *cu) { Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; const char *ptr = section_ptr(debug_abbrev, cu->debug_abbrev_offset); + if (!ptr) + return drgn_eof(); const char *end = section_end(debug_abbrev); struct uint32_vector decls = VECTOR_INIT; struct uint8_vector insns = VECTOR_INIT; @@ -1641,17 +1641,14 @@ static struct drgn_error *skip_lnp_header(struct compilation_unit *cu, const char **ptr, const char *end) { uint32_t tmp; - bool is_64_bit; - uint16_t version; - uint8_t opcode_base; - - if (!read_u32(ptr, end, cu->bswap, &tmp)) + if (!mread_u32(ptr, end, cu->bswap, &tmp)) + return drgn_eof(); + bool is_64_bit = tmp == UINT32_C(0xffffffff); + if (is_64_bit && !mread_skip(ptr, end, sizeof(uint64_t))) return drgn_eof(); - is_64_bit = tmp == UINT32_C(0xffffffff); - if (is_64_bit) - *ptr += sizeof(uint64_t); - if (!read_u16(ptr, end, cu->bswap, &version)) + uint16_t version; + if (!mread_u16(ptr, end, cu->bswap, &version)) return drgn_eof(); if (version != 2 && version != 3 && version != 4) { return drgn_error_format(DRGN_ERROR_OTHER, @@ -1660,19 +1657,20 @@ static struct drgn_error *skip_lnp_header(struct compilation_unit *cu, } /* + * Skip: * header_length * minimum_instruction_length * maximum_operations_per_instruction (DWARF 4 only) * default_is_stmt * line_base * line_range + * standard_opcode_lengths */ - *ptr += (is_64_bit ? 8 : 4) + 4 + (version >= 4); - - if (!read_u8(ptr, end, &opcode_base)) + uint8_t opcode_base; + if (!mread_skip(ptr, end, (is_64_bit ? 8 : 4) + 4 + (version >= 4)) || + !mread_u8(ptr, end, &opcode_base) || + !mread_skip(ptr, end, opcode_base - 1)) return drgn_eof(); - /* standard_opcode_lengths */ - *ptr += opcode_base - 1; return NULL; } @@ -1711,8 +1709,11 @@ read_file_name_table(struct drgn_dwarf_index *dindex, */ static const uint64_t siphash_key[2]; struct drgn_error *err; + Elf_Data *debug_line = cu->userdata->debug_line; const char *ptr = section_ptr(debug_line, stmt_list); + if (!ptr) + return drgn_eof(); const char *end = section_end(debug_line); err = skip_lnp_header(cu, &ptr, end); @@ -1723,7 +1724,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, for (;;) { const char *path; size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) { + if (!mread_string(&ptr, end, &path, &path_len)) { err = drgn_eof(); goto out_directories; } @@ -1744,7 +1745,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, for (;;) { const char *path; size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) { + if (!mread_string(&ptr, end, &path, &path_len)) { err = drgn_eof(); goto out_hashes; } @@ -1752,10 +1753,11 @@ read_file_name_table(struct drgn_dwarf_index *dindex, break; uint64_t directory_index; - if ((err = read_uleb128(&ptr, end, &directory_index))) + if ((err = mread_uleb128(&ptr, end, &directory_index))) goto out_hashes; /* mtime, size */ - if (!skip_leb128(&ptr, end) || !skip_leb128(&ptr, end)) { + if (!mread_skip_leb128(&ptr, end) || + !mread_skip_leb128(&ptr, end)) { err = drgn_eof(); goto out_hashes; } @@ -1837,7 +1839,7 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, size_t die_offset = ptr - debug_info_buffer; uint64_t code; - if ((err = read_uleb128(&ptr, end, &code))) + if ((err = mread_uleb128(&ptr, end, &code))) return err; if (code == 0) { if (depth-- > 1) @@ -1860,71 +1862,70 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, size_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: - if (!read_u8_into_size_t(&ptr, end, &skip)) + if (!mread_u8_into_size_t(&ptr, end, &skip)) return drgn_eof(); goto skip; case ATTRIB_BLOCK2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &skip)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) return drgn_eof(); goto skip; case ATTRIB_BLOCK4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &skip)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) return drgn_eof(); goto skip; case ATTRIB_EXPRLOC: - if ((err = read_uleb128_into_size_t(&ptr, end, - &skip))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &skip))) return err; goto skip; case ATTRIB_LEB128: case ATTRIB_DECL_FILE_UDATA: - if (!skip_leb128(&ptr, end)) + if (!mread_skip_leb128(&ptr, end)) return drgn_eof(); break; case ATTRIB_STRING: case ATTRIB_NAME_STRING: - if (!skip_string(&ptr, end)) + if (!mread_skip_string(&ptr, end)) return drgn_eof(); break; case ATTRIB_SIBLING_REF1: - if (!read_u8_into_size_t(&ptr, end, &tmp)) + if (!mread_u8_into_size_t(&ptr, end, &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF_UDATA: - if ((err = read_uleb128_into_size_t(&ptr, end, - &tmp))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) return err; sibling: - if (!read_in_bounds(cu->ptr, end, tmp)) + if (!(sibling = mread_begin(cu->ptr, end, tmp))) return drgn_eof(); - sibling = &cu->ptr[tmp]; __builtin_prefetch(sibling); break; case ATTRIB_STMT_LIST_LINEPTR4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &stmt_list)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) return drgn_eof(); break; case ATTRIB_STMT_LIST_LINEPTR8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &stmt_list)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) return drgn_eof(); break; case ATTRIB_DECL_FILE_DATA1: @@ -1943,46 +1944,46 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, goto skip; case ATTRIB_DECLARATION_FLAG: { uint8_t flag; - if (!read_u8(&ptr, end, &flag)) + if (!mread_u8(&ptr, end, &flag)) return drgn_eof(); if (flag) declaration = true; break; } case ATTRIB_SPECIFICATION_REF1: - if (!read_u8_into_size_t(&ptr, end, &tmp)) + if (!mread_u8_into_size_t(&ptr, end, &tmp)) return drgn_eof(); goto specification; case ATTRIB_SPECIFICATION_REF2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto specification; case ATTRIB_SPECIFICATION_REF4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto specification; case ATTRIB_SPECIFICATION_REF8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto specification; case ATTRIB_SPECIFICATION_REF_UDATA: - if ((err = read_uleb128_into_size_t(&ptr, end, - &tmp))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) return err; specification: specification = (uintptr_t)cu->ptr + tmp; break; case ATTRIB_SPECIFICATION_REF_ADDR4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto specification_ref_addr; case ATTRIB_SPECIFICATION_REF_ADDR8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); specification_ref_addr: specification = (uintptr_t)debug_info_buffer + tmp; @@ -1990,9 +1991,8 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, default: skip = insn; skip: - if (!read_in_bounds(ptr, end, skip)) + if (!mread_skip(&ptr, end, skip)) return drgn_eof(); - ptr += skip; break; } } @@ -2135,8 +2135,6 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, Elf_Data *debug_info = cu->userdata->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); Elf_Data *debug_str = cu->userdata->debug_str; - const char *debug_str_buffer = section_ptr(debug_str, 0); - const char *debug_str_end = section_end(debug_str); unsigned int depth = 0; uint8_t depth1_tag = 0; size_t depth1_offset = 0; @@ -2145,7 +2143,7 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, size_t die_offset = ptr - debug_info_buffer; uint64_t code; - if ((err = read_uleb128(&ptr, end, &code))) + if ((err = mread_uleb128(&ptr, end, &code))) return err; if (code == 0) { if (depth-- > 1) @@ -2169,81 +2167,78 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, size_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: - if (!read_u8_into_size_t(&ptr, end, &skip)) + if (!mread_u8_into_size_t(&ptr, end, &skip)) return drgn_eof(); goto skip; case ATTRIB_BLOCK2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &skip)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) return drgn_eof(); goto skip; case ATTRIB_BLOCK4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &skip)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) return drgn_eof(); goto skip; case ATTRIB_EXPRLOC: - if ((err = read_uleb128_into_size_t(&ptr, end, - &skip))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &skip))) return err; goto skip; case ATTRIB_SPECIFICATION_REF_UDATA: specification = true; /* fallthrough */ case ATTRIB_LEB128: - if (!skip_leb128(&ptr, end)) + if (!mread_skip_leb128(&ptr, end)) return drgn_eof(); break; case ATTRIB_NAME_STRING: name = ptr; /* fallthrough */ case ATTRIB_STRING: - if (!skip_string(&ptr, end)) + if (!mread_skip_string(&ptr, end)) return drgn_eof(); break; case ATTRIB_SIBLING_REF1: - if (!read_u8_into_size_t(&ptr, end, &tmp)) + if (!mread_u8_into_size_t(&ptr, end, &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto sibling; case ATTRIB_SIBLING_REF_UDATA: - if ((err = read_uleb128_into_size_t(&ptr, end, - &tmp))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) return err; sibling: - if (!read_in_bounds(cu->ptr, end, tmp)) + if (!(sibling = mread_begin(cu->ptr, end, tmp))) return drgn_eof(); - sibling = &cu->ptr[tmp]; __builtin_prefetch(sibling); break; case ATTRIB_NAME_STRP4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); goto strp; case ATTRIB_NAME_STRP8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &tmp)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) return drgn_eof(); strp: - if (!read_in_bounds(debug_str_buffer, debug_str_end, - tmp)) + if (!(name = section_ptr(debug_str, tmp))) return drgn_eof(); - name = &debug_str_buffer[tmp]; __builtin_prefetch(name); break; case ATTRIB_STMT_LIST_LINEPTR4: @@ -2253,32 +2248,33 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, skip = 8; goto skip; case ATTRIB_DECL_FILE_DATA1: - if (!read_u8_into_size_t(&ptr, end, &decl_file)) + if (!mread_u8_into_size_t(&ptr, end, + &decl_file)) return drgn_eof(); break; case ATTRIB_DECL_FILE_DATA2: - if (!read_u16_into_size_t(&ptr, end, cu->bswap, - &decl_file)) + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &decl_file)) return drgn_eof(); break; case ATTRIB_DECL_FILE_DATA4: - if (!read_u32_into_size_t(&ptr, end, cu->bswap, - &decl_file)) + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &decl_file)) return drgn_eof(); break; case ATTRIB_DECL_FILE_DATA8: - if (!read_u64_into_size_t(&ptr, end, cu->bswap, - &decl_file)) + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &decl_file)) return drgn_eof(); break; case ATTRIB_DECL_FILE_UDATA: - if ((err = read_uleb128_into_size_t(&ptr, end, - &decl_file))) + if ((err = mread_uleb128_into_size_t(&ptr, end, + &decl_file))) return err; break; case ATTRIB_DECLARATION_FLAG: { uint8_t flag; - if (!read_u8(&ptr, end, &flag)) + if (!mread_u8(&ptr, end, &flag)) return drgn_eof(); if (flag) declaration = true; @@ -2305,9 +2301,8 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, default: skip = insn; skip: - if (!read_in_bounds(ptr, end, skip)) + if (!mread_skip(&ptr, end, skip)) return drgn_eof(); - ptr += skip; break; } } diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index ef6a4adcc..8f19df14f 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -16,8 +16,8 @@ #include "dwarf_index.h" #include "helpers.h" #include "linux_kernel.h" +#include "mread.h" #include "program.h" -#include "read.h" struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, size_t count, uint64_t offset, @@ -753,12 +753,10 @@ struct kmod_index { static struct drgn_error *kmod_index_validate(struct kmod_index *index, const char *path) { - const char *ptr; + const char *ptr = index->ptr; uint32_t magic, version; - - ptr = index->ptr; - if (!read_be32(&ptr, index->end, &magic) || - !read_be32(&ptr, index->end, &version)) { + if (!mread_be32(&ptr, index->end, &magic) || + !mread_be32(&ptr, index->end, &version)) { return drgn_error_format(DRGN_ERROR_OTHER, "%s is too short", path); } @@ -824,20 +822,21 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); + + /* kmod_index_validate() already checked that this is within bounds. */ const char *ptr = index->ptr + 8; uint32_t offset; - for (;;) { - if (!read_be32(&ptr, index->end, &offset)) + if (!mread_be32(&ptr, index->end, &offset) || + !(ptr = mread_begin(index->ptr, index->end, + offset & INDEX_NODE_MASK))) return NULL; - ptr = index->ptr + (offset & INDEX_NODE_MASK); if (offset & INDEX_NODE_PREFIX) { const char *prefix; size_t prefix_len; - - if (!read_string(&ptr, index->end, &prefix, - &prefix_len)) + if (!mread_string(&ptr, index->end, &prefix, + &prefix_len)) return NULL; if (strncmp(key, prefix, prefix_len) != 0) return NULL; @@ -846,20 +845,21 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) if (offset & INDEX_NODE_CHILDS) { uint8_t first, last; - - if (!read_u8(&ptr, index->end, &first) || - !read_u8(&ptr, index->end, &last)) + if (!mread_u8(&ptr, index->end, &first) || + !mread_u8(&ptr, index->end, &last)) return NULL; if (*key) { uint8_t cur = *key; - - if (cur < first || cur > last) + if (cur < first || cur > last || + !mread_skip(&ptr, index->end, + 4 * (cur - first))) return NULL; - ptr += 4 * (cur - first); key++; continue; } else { - ptr += 4 * (last - first + 1); + if (!mread_skip(&ptr, index->end, + 4 * (last - first + 1))) + return NULL; break; } } else if (*key) { @@ -904,27 +904,24 @@ static void depmod_index_deinit(struct depmod_index *depmod) static bool depmod_index_find(struct depmod_index *depmod, const char *name, const char **path_ret, size_t *len_ret) { - const char *ptr; - uint32_t value_count; - const char *deps; - size_t deps_len; - char *colon; - - ptr = kmod_index_find(&depmod->modules_dep, name); + const char *ptr = kmod_index_find(&depmod->modules_dep, name); if (!ptr) return false; - if (!read_be32(&ptr, depmod->modules_dep.end, &value_count) || + uint32_t value_count; + if (!mread_be32(&ptr, depmod->modules_dep.end, &value_count) || !value_count) return false; /* Skip over priority. */ - ptr += 4; - if (!read_string(&ptr, depmod->modules_dep.end, &deps, + const char *deps; + size_t deps_len; + if (!mread_skip(&ptr, depmod->modules_dep.end, 4) || + !mread_string(&ptr, depmod->modules_dep.end, &deps, &deps_len)) return false; - colon = strchr(deps, ':'); + const char *colon = strchr(deps, ':'); if (!colon) return false; diff --git a/libdrgn/mread.h b/libdrgn/mread.h new file mode 100644 index 000000000..dbb9f6a9d --- /dev/null +++ b/libdrgn/mread.h @@ -0,0 +1,256 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0+ + +/** + * @file + * + * Helpers for parsing values in memory. + * + * See @ref MemoryParsing. + */ + +#ifndef DRGN_MREAD_H +#define DRGN_MREAD_H + +#include +#include + +/** + * @ingroup Internals + * + * @defgroup MemoryParsing Memory parsing + * + * Helpers for reading values in memory. + * + * This provides helpers for reading values in memory (e.g., from an mmap'd + * file) with safe bounds checking. + * + * @{ + */ + +/** + * Return whether ptr + offset is within @p end. + * + * @param[in] ptr Pointer to check. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset to check. + * @return @c true if the result would be in bounds, @c false if not. + */ +static inline bool mread_in_bounds(const char *ptr, const char *end, + size_t offset) +{ + return end - ptr >= offset; +} + +/** + * Return start + offset, checking bounds. + * + * @param[in] start Pointer to first valid byte. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset from @p start. + * @return start + offset if it is within @p end, @c NULL if not. + */ +static inline const char *mread_begin(const char *start, const char *end, + size_t offset) +{ + return mread_in_bounds(start, end, offset) ? start + offset : NULL; +} + +/** + * Advance @p ptr by @p offset, checking bounds. + * + * @param[in,out] ptr Pointer to check and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset to advance by. + * @return @c true if the pointer was advanced, @c false if it was not advanced + * because the result would be out of bounds. + */ +static inline bool mread_skip(const char **ptr, const char *end, size_t offset) +{ + if (!mread_in_bounds(*ptr, end, offset)) + return false; + *ptr += offset; + return true; +} + +/** + * Read an unsigned 8-bit integer in memory and advance @p ptr. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[out] ret Returned value. + * @return @c true on success, @c false if the read was out of bounds. + */ +static inline bool mread_u8(const char **ptr, const char *end, uint8_t *ret) +{ + if (!mread_in_bounds(*ptr, end, sizeof(uint8_t))) + return false; + *ret = *(const uint8_t *)*ptr; + *ptr += sizeof(uint8_t); + return true; +} + +/** + * Read an unsigned 8-bit integer in memory into a @c size_t and advance @p ptr. + * + * @sa mread_u8() + */ +static inline bool mread_u8_into_size_t(const char **ptr, const char *end, + size_t *ret) +{ + uint8_t tmp; + if (!mread_u8(ptr, end, &tmp)) + return false; + /* SIZE_MAX is required to be at least 65535, so this won't overflow. */ + *ret = tmp; + return true; +} + +#ifdef DOXYGEN +/** + * Read an unsigned N-bit integer in memory and advance @p ptr. + * + * This is defined for N of 16, 32, and 64. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] bswap Whether to swap the byte order of the read value. + * @param[out] ret Returned value. + * @return @c true on success, @c false if the read was out of bounds. + */ +bool mread_uN(const char **ptr, const char *end, bool bswap, uintN_t *ret); + +/** + * Read an unsigned N-bit little-endian integer in memory and advance @p ptr. + * + * @sa mread_uN() + */ +bool mread_leN(const char **ptr, const char *end, uintN_t *ret); + +/** + * Read an unsigned N-bit big-endian integer in memory and advance @p ptr. + * + * @sa mread_uN() + */ +bool mread_beN(const char **ptr, const char *end, uintN_t *ret); + +/** + * Read an unsigned N-bit integer in memory into a @c uint64_t and advance @p + * ptr. + * + * @sa mread_uN() + */ +bool mread_uN_into_u64(const char **ptr, const char *end, bool bswap, + uint64_t *ret); + +/** + * Read an unsigned N-bit integer in memory into a @c size_t and advance @p + * ptr. + * + * @sa mread_uN() + * + * @return @c true on success, @c false if the read was out of bounds or the + * result is too large for a @c size_t + */ +bool mread_uN_into_size_t(const char **ptr, const char *end, bool bswap, + uint64_t *ret); +#endif + +#define DEFINE_READ(size) \ +static inline bool mread_u##size(const char **ptr, const char *end, bool bswap, \ + uint##size##_t *ret) \ +{ \ + if (!mread_in_bounds(*ptr, end, sizeof(uint##size##_t))) \ + return false; \ + uint##size##_t tmp; \ + memcpy(&tmp, *ptr, sizeof(tmp)); \ + if (bswap) \ + tmp = bswap_##size(tmp); \ + *ret = tmp; \ + *ptr += sizeof(uint##size##_t); \ + return true; \ +} \ + \ +static inline bool mread_le##size(const char **ptr, const char *end, \ + uint##size##_t *ret) \ +{ \ + return mread_u##size(ptr, end, \ + __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__, ret); \ +} \ + \ +static inline bool mread_be##size(const char **ptr, const char *end, \ + uint##size##_t *ret) \ +{ \ + return mread_u##size(ptr, end, __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__, \ + ret); \ +} \ + \ +static inline bool mread_u##size##_into_u64(const char **ptr, const char *end, \ + bool bswap, uint64_t *ret) \ +{ \ + uint##size##_t tmp; \ + if (!mread_u##size(ptr, end, bswap, &tmp)) \ + return false; \ + *ret = tmp; \ + return true; \ +} \ + \ +static inline bool mread_u##size##_into_size_t(const char **ptr, \ + const char *end, bool bswap, \ + size_t *ret) \ +{ \ + uint##size##_t tmp; \ + if (!mread_u##size(ptr, end, bswap, &tmp)) \ + return false; \ + if (tmp > SIZE_MAX) \ + return false; \ + *ret = tmp; \ + return true; \ +} + +DEFINE_READ(16) +DEFINE_READ(32) +DEFINE_READ(64) + +/** + * Advance @p ptr to the byte after the next null byte. + * + * @param[in,out] ptr Pointer to advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @return @c true if the pointer was advanced, @c false if no null byte was + * found. + */ +static inline bool mread_skip_string(const char **ptr, const char *end) +{ + const char *nul = memchr(*ptr, 0, end - *ptr); + if (!nul) + return false; + *ptr = nul + 1; + return true; +} + +/** + * Read a null-terminated string in memory and advance @p ptr. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[out] str_ret Returned string. Equal to the initial value of + * *ptr. + * @param[out] len_ret Returned string length not including the null byte. + * @return @c true on success, @c false if no null byte was found. + */ +static inline bool mread_string(const char **ptr, const char *end, + const char **str_ret, size_t *len_ret) +{ + const char *nul = memchr(*ptr, 0, end - *ptr); + if (!nul) + return false; + *str_ret = *ptr; + *len_ret = nul - *ptr; + *ptr = nul + 1; + return true; +} + +/** @} */ + +#endif /* DRGN_MREAD_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 776b2aca9..f92f2a359 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -22,7 +22,6 @@ #include "memory_reader.h" #include "object_index.h" #include "program.h" -#include "read.h" #include "string_builder.h" #include "symbol.h" #include "vector.h" diff --git a/libdrgn/read.h b/libdrgn/read.h deleted file mode 100644 index 7b9b16ff5..000000000 --- a/libdrgn/read.h +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Helpers for parsing values in memory. - * - * See @ref MemoryParsing. - */ - -#ifndef DRGN_READ_H -#define DRGN_READ_H - -#include -#include - -/** - * @ingroup Internals - * - * @defgroup MemoryParsing Memory parsing - * - * Helpers for parsing values in memory. - * - * This provides helpers for parsing values in memory (e.g., from an mmap'd - * file) with safe bounds checking. - * - * These helpers take a cursor (@p ptr) which is read from and advanced. They - * are bounds-checked against an end pointer (@p end). If desired, they will - * swap the byte order of the read value. The @c readN helpers are defined for N - * of 16, 32, and 64. - * - * @{ - */ - -/** Return whether ptr + size is within @p end. */ -static inline bool read_in_bounds(const char *ptr, const char *end, size_t size) -{ - return ptr <= end && (size_t)(end - ptr) >= size; -} - -/** Parse an unsigned 8-bit integer in memory. */ -static inline bool read_u8(const char **ptr, const char *end, uint8_t *ret) -{ - if (!read_in_bounds(*ptr, end, sizeof(uint8_t))) - return false; - *ret = *(const uint8_t *)*ptr; - *ptr += sizeof(uint8_t); - return true; -} - -/** Parse an unsigned 8-bit integer in memory into a @c size_t. */ -static inline bool read_u8_into_size_t(const char **ptr, const char *end, - size_t *ret) -{ - uint8_t tmp; - - if (!read_u8(ptr, end, &tmp)) - return false; - if (tmp > SIZE_MAX) - return false; - *ret = tmp; - return true; -} - -#ifdef DOXYGEN -/** - * Parse an unsigned N-bit integer in memory. - * - * This does not perform any bounds checking, so it should only be used if - * bounds checking was already done. - * - * This is defined for N of 16, 32, and 64. - * - * @param[in,out] ptr Pointer to read from and advance. - * @param[in] bswap Whether to swap the byte order of the read value. - * @param[out] ret Returned value. - */ -void read_uN_nocheck(const char **ptr, bool bswap, uintN_t *ret); - -/** - * Parse an unsigned N-bit integer in memory, checking bounds. - * - * @sa read_uN_nocheck(). - * - * @param[in] end Pointer to one after the last valid address. - * @return Whether the read was in bounds. - */ -bool read_uN(const char **ptr, const char *end, bool bswap, uintN_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c uint64_t. - * - * @sa read_uN_nocheck(). - */ -void read_uN_into_u64_nocheck(const char **ptr, bool bswap, uint64_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c uint64_t, checking - * bounds. - * - * @sa read_uN(). - */ -bool read_uN_into_u64(const char **ptr, const char *end, bool bswap, - uint64_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c size_t, checking bounds. - * - * @sa read_uN(). - * - * @return Whether the read was in bounds and the value was less than or equal - * to @c SIZE_MAX. - */ -bool read_uN_into_u64(const char **ptr, const char *end, bool bswap, - uint64_t *ret); -#endif - -#define DEFINE_READ(size) \ -static inline void read_u##size##_nocheck(const char **ptr, bool bswap, \ - uint##size##_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - memcpy(&tmp, *ptr, sizeof(tmp)); \ - if (bswap) \ - tmp = bswap_##size(tmp); \ - *ret = tmp; \ - *ptr += sizeof(uint##size##_t); \ -} \ - \ -static inline bool read_u##size(const char **ptr, const char *end, \ - bool bswap, uint##size##_t *ret) \ -{ \ - if (!read_in_bounds(*ptr, end, sizeof(uint##size##_t))) \ - return false; \ - read_u##size##_nocheck(ptr, bswap, ret); \ - return true; \ -} \ - \ -static inline void read_u##size##_into_u64_nocheck(const char **ptr, \ - bool bswap, \ - uint64_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - read_u##size##_nocheck(ptr, bswap, &tmp); \ - *ret = tmp; \ -} \ - \ -static inline bool read_u##size##_into_u64(const char **ptr, \ - const char *end, bool bswap, \ - uint64_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - if (!read_u##size(ptr, end, bswap, &tmp)) \ - return false; \ - *ret = tmp; \ - return true; \ -} \ - \ -static inline bool read_u##size##_into_size_t(const char **ptr, \ - const char *end, \ - bool bswap, size_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - if (!read_u##size(ptr, end, bswap, &tmp)) \ - return false; \ - if (tmp > SIZE_MAX) \ - return false; \ - *ret = tmp; \ - return true; \ -} - -DEFINE_READ(16) -DEFINE_READ(32) -DEFINE_READ(64) - -static inline bool read_be32(const char **ptr, const char *end, uint32_t *ret) -{ - return read_u32(ptr, end, __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__, - ret); -} - -/** Advance @p ptr to the byte after the next null byte. */ -static inline bool skip_string(const char **ptr, const char *end) -{ - const char *nul; - - if (*ptr >= end) - return false; - - nul = memchr(*ptr, 0, end - *ptr); - if (!nul) - return false; - - *ptr = nul + 1; - return true; -} - -/** - * Parse a null terminated string in memory. - * - * @param[in,out] ptr Pointer to read from and advance. - * @param[in] end Pointer to one after the last valid address. - * @param[out] str_ret Returned string. Equal to the initial value of - * *ptr. - * @param[out] len_ret Returned string length not including the null byte. - */ -static inline bool read_string(const char **ptr, const char *end, - const char **str_ret, size_t *len_ret) -{ - const char *nul; - - if (*ptr >= end) - return false; - - nul = memchr(*ptr, 0, end - *ptr); - if (!nul) - return false; - - *str_ret = *ptr; - *len_ret = nul - *ptr; - *ptr = nul + 1; - return true; -} - -/** @} */ - -#endif /* DRGN_READ_H */ diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index a5266a13e..e8e6ba7dc 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -11,7 +11,6 @@ #include "internal.h" #include "helpers.h" #include "program.h" -#include "read.h" #include "string_builder.h" #include "symbol.h" From d512964c1e8e6169991e1731d6de509ec7553f43 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 28 Aug 2020 11:29:27 -0700 Subject: [PATCH 46/56] libdrgn: add drgn_error_copy() This is needed for a future change where we'll want to save an error and return it multiple times. Signed-off-by: Omar Sandoval --- libdrgn/drgn.h.in | 13 +++++++++++++ libdrgn/error.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 1742212b2..901aefc62 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -210,6 +210,19 @@ struct drgn_error *drgn_error_format_fault(uint64_t address, const char *format, ...) __attribute__((returns_nonnull,format(printf, 2, 3))); +/* + * Create a copy of a @ref drgn_error. + * + * The source's error message and path are copied if necessary, so the source + * error can be destroyed without affecting the new error and vice versa. + * + * @param[in] src Error to copy. + * @return A new error with the same fields. If there is a failure to allocate + * memory, @ref drgn_enomem is returned instead. + */ +struct drgn_error *drgn_error_copy(struct drgn_error *src) + __attribute__((returns_nonnull)); + /** * Write a @ref drgn_error to a @c stdio stream. * diff --git a/libdrgn/error.c b/libdrgn/error.c index bba0e5432..e230f92be 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -162,6 +162,39 @@ struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, return drgn_error_create_nodup(code, message); } +LIBDRGN_PUBLIC struct drgn_error *drgn_error_copy(struct drgn_error *src) +{ + if (!src->needs_destroy) + return src; + struct drgn_error *dst = malloc(sizeof(*dst)); + if (!dst) + return &drgn_enomem; + dst->code = src->code; + dst->needs_destroy = true; + dst->errnum = src->errnum; + if (src->path) { + dst->path = strdup(src->path); + if (!dst->path) { + free(dst); + return &drgn_enomem; + } + } else { + dst->path = NULL; + } + dst->address = src->address; + if (src->message) { + dst->message = strdup(src->message); + if (!dst->message) { + free(dst->path); + free(dst); + return &drgn_enomem; + } + } else { + dst->message = NULL; + } + return dst; +} + bool string_builder_append_error(struct string_builder *sb, struct drgn_error *err) { From 66ad5077c9fea5c8ce30b71693389b957b8dff38 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Sep 2020 13:05:45 -0700 Subject: [PATCH 47/56] libdrgn: dwarf_index: return indexed DIE entry from drgn_dwarf_index_iterator_next() For namespace support, we will want to access the struct drgn_dwarf_index_die for namespaces instead of the Dwarf_Die. Split drgn_dwarf_index_get_die() out of drgn_dwarf_index_iterator_next(). Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 51 +++++++++++------------------------ libdrgn/dwarf_index.h | 47 +++++++++++++++++++++++++++------ libdrgn/dwarf_info_cache.c | 54 ++++++++++++++++++++------------------ libdrgn/program.c | 15 +++++------ 4 files changed, 90 insertions(+), 77 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 215dabcc2..e8d520795 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -252,26 +252,6 @@ static inline const char *section_end(Elf_Data *data) return (const char *)data->d_buf + data->d_size; } -/* - * An indexed DIE. - * - * DIEs with the same name but different tags or files are considered distinct. - * We only compare the hash of the file name, not the string value, because a - * 64-bit collision is unlikely enough, especially when also considering the - * name and tag. - */ -struct drgn_dwarf_index_die { - /* - * The next DIE with the same name (as an index into - * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. - */ - uint32_t next; - uint8_t tag; - uint64_t file_name_hash; - Dwfl_Module *module; - size_t offset; -}; - DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash, string_eq) DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, @@ -2619,23 +2599,18 @@ drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, return false; } -struct drgn_error * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, - Dwarf_Die *die_ret, uint64_t *bias_ret) +struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { struct drgn_dwarf_index *dindex = it->dindex; struct drgn_dwarf_index_die *die; - Dwarf *dwarf; - Dwarf_Addr bias; - if (it->any_name) { for (;;) { - struct drgn_dwarf_index_shard *shard; - if (it->shard >= ARRAY_SIZE(dindex->shards)) - return &drgn_stop; + return NULL; - shard = &dindex->shards[it->shard]; + struct drgn_dwarf_index_shard *shard = + &dindex->shards[it->shard]; die = &shard->dies.data[it->index]; if (++it->index >= shard->dies.size) { @@ -2651,12 +2626,11 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, } } else { for (;;) { - struct drgn_dwarf_index_shard *shard; - if (it->index == UINT32_MAX) - return &drgn_stop; + return NULL; - shard = &dindex->shards[it->shard]; + struct drgn_dwarf_index_shard *shard = + &dindex->shards[it->shard]; die = &shard->dies.data[it->index]; it->index = die->next; @@ -2665,8 +2639,15 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, break; } } + return die; +} - dwarf = dwfl_module_getdwarf(die->module, &bias); +struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, + Dwarf_Die *die_ret, + uint64_t *bias_ret) +{ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(die->module, &bias); if (!dwarf) return drgn_error_libdwfl(); if (!dwarf_offdie(dwarf, die->offset, die_ret)) diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 83f05a06f..86db7cdf8 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -62,7 +62,32 @@ extern const Dwfl_Callbacks drgn_dwfl_callbacks; extern const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks; extern const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks; -struct drgn_dwarf_index_die; +/* + * An indexed DIE. + * + * DIEs with the same name but different tags or files are considered distinct. + * We only compare the hash of the file name, not the string value, because a + * 64-bit collision is unlikely enough, especially when also considering the + * name and tag. + */ +struct drgn_dwarf_index_die { + /* + * The next DIE with the same name (as an index into + * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. + */ + uint32_t next; + uint8_t tag; + union { + /* If tag != DW_TAG_namespace. */ + uint64_t file_name_hash; + /* TODO: explain hash */ + /* If tag == DW_TAG_namespace. */ + struct drgn_dwarf_index_namespace *namespace; + }; + Dwfl_Module *module; + size_t offset; +}; + DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, uint32_t) DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) @@ -411,18 +436,24 @@ void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, * DW_TAG_enumerator DIEs. * * @param[in] it DWARF index iterator. + * @return Next DIE, or @c NULL if there are no more matching DIEs. + */ +struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it); + +/** + * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. + * + * @param[in] die Indexed DIE. * @param[out] die_ret Returned DIE. * @param[out] bias_ret Returned difference between addresses in the loaded * module and addresses in the debugging information. This may be @c NULL if it * is not needed. - * @return @c NULL on success, non-@c NULL on error. In particular, when there - * are no more matching DIEs, @p die_ret is not modified and an error with code - * @ref DRGN_ERROR_STOP is returned; this @ref DRGN_ERROR_STOP error does not - * have to be passed to @ref drgn_error_destroy(). + * @return @c NULL on success, non-@c NULL on error. */ -struct drgn_error * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, - Dwarf_Die *die_ret, uint64_t *bias_ret); +struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, + Dwarf_Die *die_ret, + uint64_t *bias_ret); /** @} */ diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c index d8d2d6f93..da729f981 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/dwarf_info_cache.c @@ -346,29 +346,30 @@ drgn_dwarf_info_cache_find_complete(struct drgn_dwarf_info_cache *dicache, struct drgn_type **ret) { struct drgn_error *err; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - struct drgn_qualified_type qualified_type; + struct drgn_dwarf_index_iterator it; drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, strlen(name), &tag, 1); /* * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs * with DW_AT_declaration, so this will always be a complete type. */ - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (err) - return err; + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_stop; /* * Look for another matching DIE. If there is one, then we can't be sure * which type this is, so leave it incomplete rather than guessing. */ - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (!err) + if (drgn_dwarf_index_iterator_next(&it)) return &drgn_stop; - else if (err->code != DRGN_ERROR_STOP) - return err; + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) + return err; + struct drgn_qualified_type qualified_type; err = drgn_type_from_dwarf(dicache, &die, &qualified_type); if (err) return err; @@ -1237,10 +1238,8 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, { struct drgn_error *err; struct drgn_dwarf_info_cache *dicache = arg; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - uint64_t tag; + uint64_t tag; switch (kind) { case DRGN_TYPE_INT: case DRGN_TYPE_BOOL: @@ -1266,9 +1265,15 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, UNREACHABLE(); } + struct drgn_dwarf_index_iterator it; drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, name_len, &tag, 1); - while (!(err = drgn_dwarf_index_iterator_next(&it, &die, NULL))) { + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) + return err; if (die_matches_filename(&die, filename)) { err = drgn_type_from_dwarf(dicache, &die, ret); if (err) @@ -1281,8 +1286,6 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, return NULL; } } - if (err && err->code != DRGN_ERROR_STOP) - return err; return &drgn_not_found; } @@ -1436,13 +1439,9 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, { struct drgn_error *err; struct drgn_dwarf_info_cache *dicache = arg; - uint64_t tags[3]; - size_t num_tags; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - uint64_t bias; - num_tags = 0; + uint64_t tags[3]; + size_t num_tags = 0; if (flags & DRGN_FIND_OBJECT_CONSTANT) tags[num_tags++] = DW_TAG_enumerator; if (flags & DRGN_FIND_OBJECT_FUNCTION) @@ -1450,9 +1449,16 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, if (flags & DRGN_FIND_OBJECT_VARIABLE) tags[num_tags++] = DW_TAG_variable; + struct drgn_dwarf_index_iterator it; drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, strlen(name), tags, num_tags); - while (!(err = drgn_dwarf_index_iterator_next(&it, &die, &bias))) { + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + uint64_t bias; + err = drgn_dwarf_index_get_die(index_die, &die, &bias); + if (err) + return err; if (!die_matches_filename(&die, filename)) continue; switch (dwarf_tag(&die)) { @@ -1470,8 +1476,6 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, UNREACHABLE(); } } - if (err && err->code != DRGN_ERROR_STOP) - return err; return &drgn_not_found; } diff --git a/libdrgn/program.c b/libdrgn/program.c index f92f2a359..db3d3d337 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -631,29 +631,26 @@ static void drgn_program_set_language_from_main(struct drgn_program *prog, struct drgn_dwarf_index *dindex) { struct drgn_error *err; + struct drgn_dwarf_index_iterator it; static const uint64_t tags[] = { DW_TAG_subprogram }; - drgn_dwarf_index_iterator_init(&it, dindex, "main", strlen("main"), tags, ARRAY_SIZE(tags)); - for (;;) { + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { Dwarf_Die die; - const struct drgn_language *lang; - - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (err == &drgn_stop) { - break; - } else if (err) { + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) { drgn_error_destroy(err); continue; } + const struct drgn_language *lang; err = drgn_language_from_die(&die, &lang); if (err) { drgn_error_destroy(err); continue; } - if (lang) { prog->lang = lang; break; From a51abfcd70e8941349c37f2bf5bb53c6dd8841aa Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Thu, 20 Aug 2020 11:36:44 -0700 Subject: [PATCH 48/56] libdrgn: dwarf_index: keep CUs after indexing In order to index namespaces lazily, we need the CU structures. Rename struct compilation_unit to the less generic struct drgn_dwarf_index_cu and keep the CUs in a vector in the dindex. Signed-off-by: Jay Kamat --- libdrgn/dwarf_index.c | 87 ++++++++++++++++++++++++------------------- libdrgn/dwarf_index.h | 4 ++ 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index e8d520795..fe742ca30 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -215,7 +215,7 @@ DEFINE_VECTOR(uint8_vector, uint8_t) DEFINE_VECTOR(uint32_vector, uint32_t) DEFINE_VECTOR(uint64_vector, uint64_t) -struct compilation_unit { +struct drgn_dwarf_index_cu { struct drgn_dwfl_module_userdata *userdata; const char *ptr; size_t unit_length; @@ -240,6 +240,8 @@ struct compilation_unit { size_t num_file_names; }; +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) + static inline const char *section_ptr(Elf_Data *data, size_t offset) { if (offset > data->d_size) @@ -459,6 +461,7 @@ struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, drgn_dwarf_index_die_vector_init(&shard->dies); } drgn_dwarf_index_specification_map_init(&dindex->specifications); + drgn_dwarf_index_cu_vector_init(&dindex->cus); memset(&dindex->errors, 0, sizeof(dindex->errors)); dindex->num_errors = 0; const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); @@ -472,6 +475,13 @@ struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, return NULL; } +static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) +{ + free(cu->file_name_hashes); + free(cu->abbrev_insns); + free(cu->abbrev_decls); +} + void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) { if (!dindex) @@ -482,12 +492,16 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) assert(drgn_dwarf_module_table_size(&dindex->module_table) == 0); drgn_dwarf_module_vector_deinit(&dindex->no_build_id); drgn_dwarf_module_table_deinit(&dindex->module_table); + for (size_t i = 0; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + drgn_dwarf_index_cu_vector_deinit(&dindex->cus); drgn_dwarf_index_specification_map_deinit(&dindex->specifications); for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies); drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map); omp_destroy_lock(&dindex->shards[i].lock); } + dwfl_end(dindex->dwfl); } @@ -1138,7 +1152,7 @@ get_debug_sections(struct drgn_dwfl_module_userdata *userdata, static struct drgn_error *read_compilation_unit_header(const char *ptr, const char *end, - struct compilation_unit *cu) + struct drgn_dwarf_index_cu *cu) { uint32_t tmp; if (!mread_u32(&ptr, end, cu->bswap, &tmp)) @@ -1177,11 +1191,10 @@ static struct drgn_error *read_compilation_unit_header(const char *ptr, return NULL; } -DEFINE_VECTOR(compilation_unit_vector, struct compilation_unit) static struct drgn_error * read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, - struct compilation_unit_vector *cus) + struct drgn_dwarf_index_cu_vector *cus) { struct drgn_error *err; @@ -1193,9 +1206,8 @@ read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, const char *ptr = section_ptr(userdata->debug_info, 0); const char *end = section_end(userdata->debug_info); while (ptr < end) { - struct compilation_unit *cu; - - cu = compilation_unit_vector_append_entry(cus); + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_append_entry(cus); if (!cu) return &drgn_enomem; cu->userdata = userdata; @@ -1217,9 +1229,9 @@ read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, return NULL; } -static struct drgn_error *read_module_cus(struct drgn_dwarf_module *module, - struct compilation_unit_vector *cus, - const char **name_ret) +static struct drgn_error * +read_module_cus(struct drgn_dwarf_module *module, + struct drgn_dwarf_index_cu_vector *cus, const char **name_ret) { const size_t orig_cus_size = cus->size; for (size_t i = 0; i < module->dwfl_modules.size; i++) { @@ -1249,15 +1261,14 @@ static struct drgn_error *read_module_cus(struct drgn_dwarf_module *module, static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, struct drgn_dwarf_module **unindexed, - size_t num_unindexed, - struct compilation_unit_vector *all_cus) + size_t num_unindexed) { struct drgn_error *err = NULL; #pragma omp parallel { int thread_num = omp_get_thread_num(); - struct compilation_unit_vector cus = VECTOR_INIT; + struct drgn_dwarf_index_cu_vector cus = VECTOR_INIT; #pragma omp for schedule(dynamic) for (size_t i = 0; i < num_unindexed; i++) { @@ -1269,7 +1280,7 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, module_err = read_module_cus(unindexed[i], thread_num == 0 ? - all_cus : &cus, + &dindex->cus : &cus, &name); if (module_err) { #pragma omp critical(drgn_read_cus) @@ -1288,24 +1299,24 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, if (cus.size) { #pragma omp critical(drgn_read_cus) if (!err) { - if (compilation_unit_vector_reserve(all_cus, - all_cus->size + cus.size)) { - memcpy(all_cus->data + all_cus->size, + if (drgn_dwarf_index_cu_vector_reserve(&dindex->cus, + dindex->cus.size + cus.size)) { + memcpy(dindex->cus.data + dindex->cus.size, cus.data, cus.size * sizeof(*cus.data)); - all_cus->size += cus.size; + dindex->cus.size += cus.size; } else { err = &drgn_enomem; } } } - compilation_unit_vector_deinit(&cus); + drgn_dwarf_index_cu_vector_deinit(&cus); } return err; } static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, - struct compilation_unit *cu, + struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, struct uint8_vector *insns) { @@ -1591,7 +1602,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, return NULL; } -static struct drgn_error *read_abbrev_table(struct compilation_unit *cu) +static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu) { Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; const char *ptr = section_ptr(debug_abbrev, cu->debug_abbrev_offset); @@ -1617,7 +1628,7 @@ static struct drgn_error *read_abbrev_table(struct compilation_unit *cu) return NULL; } -static struct drgn_error *skip_lnp_header(struct compilation_unit *cu, +static struct drgn_error *skip_lnp_header(struct drgn_dwarf_index_cu *cu, const char **ptr, const char *end) { uint32_t tmp; @@ -1681,7 +1692,7 @@ DEFINE_VECTOR(siphash_vector, struct siphash) static struct drgn_error * read_file_name_table(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu, size_t stmt_list) + struct drgn_dwarf_index_cu *cu, size_t stmt_list) { /* * We don't care about hash flooding attacks, so don't bother with the @@ -1803,7 +1814,7 @@ index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, * DW_AT_specification. */ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu) + struct drgn_dwarf_index_cu *cu) { struct drgn_error *err; const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; @@ -2107,7 +2118,7 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, /* Second pass: index the actual DIEs. */ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu) + struct drgn_dwarf_index_cu *cu) { struct drgn_error *err; const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; @@ -2417,17 +2428,17 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) } static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, - struct compilation_unit *cus, - size_t num_cus) + const size_t old_cus_size) { struct drgn_error *err = NULL; #pragma omp parallel { #pragma omp for schedule(dynamic) - for (size_t i = 0; i < num_cus; i++) { + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { if (!err) { struct drgn_error *cu_err = - index_cu_first_pass(dindex, &cus[i]); + index_cu_first_pass(dindex, + &dindex->cus.data[i]); if (cu_err) { #pragma omp critical(drgn_index_cus) if (err) @@ -2439,10 +2450,11 @@ static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, } #pragma omp for schedule(dynamic) - for (size_t i = 0; i < num_cus; i++) { + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { if (!err) { struct drgn_error *cu_err = - index_cu_second_pass(dindex, &cus[i]); + index_cu_second_pass(dindex, + &dindex->cus.data[i]); if (cu_err) { #pragma omp critical(drgn_index_cus) if (err) @@ -2451,9 +2463,6 @@ static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, err = cu_err; } } - free(cus[i].file_name_hashes); - free(cus[i].abbrev_insns); - free(cus[i].abbrev_decls); } } return err; @@ -2469,7 +2478,7 @@ drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, { struct drgn_error *err; struct drgn_dwarf_module_vector unindexed = VECTOR_INIT; - struct compilation_unit_vector cus = VECTOR_INIT; + const size_t old_cus_size = dindex->cus.size; dwfl_report_end(dindex->dwfl, NULL, NULL); if (report_from_dwfl && @@ -2481,25 +2490,27 @@ drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, err = drgn_dwarf_index_get_unindexed(dindex, &unindexed); if (err) goto err; - err = read_cus(dindex, unindexed.data, unindexed.size, &cus); + err = read_cus(dindex, unindexed.data, unindexed.size); if (err) goto err; /* * After this point, if we hit an error, then we have to roll back the * index. */ - err = index_cus(dindex, cus.data, cus.size); + err = index_cus(dindex, old_cus_size); if (err) { rollback_dwarf_index(dindex); goto err; } out: - compilation_unit_vector_deinit(&cus); drgn_dwarf_module_vector_deinit(&unindexed); return err; err: + for (size_t i = old_cus_size; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + dindex->cus.size = old_cus_size; drgn_dwarf_index_free_modules(dindex, false, false); drgn_dwarf_index_reset_errors(dindex); goto out; diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 86db7cdf8..f78328cac 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -218,6 +218,8 @@ DEFINE_HASH_TABLE_TYPE(drgn_dwarf_module_table, struct drgn_dwarf_module *, DEFINE_HASH_SET_TYPE(c_string_set, const char *) +DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) + /** * Fast index of DWARF debugging information. * @@ -244,6 +246,8 @@ struct drgn_dwarf_index { * a program to cause contention. */ struct drgn_dwarf_index_specification_map specifications; + /** Indexed compilation units. */ + struct drgn_dwarf_index_cu_vector cus; Dwfl *dwfl; /** * Formatted errors reported by @ref drgn_dwarf_index_report_error(). From d1beb0184ac1cc87b6682a928944d8844cec8a7f Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Mon, 11 May 2020 10:16:05 -0700 Subject: [PATCH 49/56] libdrgn: add support for objects in C++ namespaces DWARF represents namespaces with DW_TAG_namespace DIEs. Add these to the DWARF index, with each namespace being its own sub-index. We only index the namespace itself when it is first accessed, which should help with startup time and simplifies tracking. Signed-off-by: Jay Kamat --- libdrgn/dwarf_index.c | 235 +++++++++++++++++++++++++++---------- libdrgn/dwarf_index.h | 45 +++++-- libdrgn/dwarf_info_cache.c | 42 ++++++- libdrgn/program.c | 11 +- tests/test_dwarf.py | 156 ++++++++++++++++++++++++ 5 files changed, 403 insertions(+), 86 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index fe742ca30..501236c5a 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -242,6 +242,16 @@ struct drgn_dwarf_index_cu { DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) +/* DIE which needs to be indexed. */ +struct drgn_dwarf_index_pending_die { + /* Compilation unit containing DIE. */ + struct drgn_dwarf_index_cu *cu; + /* Offset of DIE in .debug_info. */ + size_t offset; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) + static inline const char *section_ptr(Elf_Data *data, size_t offset) { if (offset > data->d_size) @@ -448,18 +458,28 @@ static void drgn_dwarf_index_free_modules(struct drgn_dwarf_index *dindex, dwfl_report_end(dindex->dwfl, drgn_dwfl_module_removed, &arg); } +static void +drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index *dindex) +{ + for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + omp_init_lock(&shard->lock); + drgn_dwarf_index_die_map_init(&shard->map); + drgn_dwarf_index_die_vector_init(&shard->dies); + } + ns->dindex = dindex; + drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); + ns->saved_err = NULL; +} + struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, const Dwfl_Callbacks *callbacks) { dindex->dwfl = dwfl_begin(callbacks); if (!dindex->dwfl) return drgn_error_libdwfl(); - for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; - omp_init_lock(&shard->lock); - drgn_dwarf_index_die_map_init(&shard->map); - drgn_dwarf_index_die_vector_init(&shard->dies); - } + drgn_dwarf_index_namespace_init(&dindex->global, dindex); drgn_dwarf_index_specification_map_init(&dindex->specifications); drgn_dwarf_index_cu_vector_init(&dindex->cus); memset(&dindex->errors, 0, sizeof(dindex->errors)); @@ -482,6 +502,26 @@ static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) free(cu->abbrev_decls); } +static void +drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) +{ + drgn_error_destroy(ns->saved_err); + drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); + for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + for (size_t j = 0; j < shard->dies.size; j++) { + struct drgn_dwarf_index_die *die = &shard->dies.data[j]; + if (die->tag == DW_TAG_namespace) { + drgn_dwarf_index_namespace_deinit(die->namespace); + free(die->namespace); + } + } + drgn_dwarf_index_die_vector_deinit(&shard->dies); + drgn_dwarf_index_die_map_deinit(&shard->map); + omp_destroy_lock(&shard->lock); + } +} + void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) { if (!dindex) @@ -496,12 +536,7 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); drgn_dwarf_index_cu_vector_deinit(&dindex->cus); drgn_dwarf_index_specification_map_deinit(&dindex->specifications); - for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies); - drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map); - omp_destroy_lock(&dindex->shards[i].lock); - } - + drgn_dwarf_index_namespace_deinit(&dindex->global); dwfl_end(dindex->dwfl); } @@ -1358,6 +1393,8 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, case DW_TAG_enumerator: /* Functions. */ case DW_TAG_subprogram: + /* Namespaces */ + case DW_TAG_namespace: /* If adding anything here, make sure it fits in DIE_FLAG_TAG_MASK. */ should_index = true; break; @@ -1436,7 +1473,9 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_decl_file && should_index) { + } else if (name == DW_AT_decl_file && should_index && + /* Namespaces are merged, so we ignore their file. */ + tag != DW_TAG_namespace) { switch (form) { case DW_FORM_data1: insn = ATTRIB_DECL_FILE_DATA1; @@ -1811,7 +1850,7 @@ index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, /* * First pass: read the abbreviation and file name tables and index DIEs with - * DW_AT_specification. + * DW_AT_specification. This recurses into namespaces. */ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, struct drgn_dwarf_index_cu *cu) @@ -2010,7 +2049,8 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, } if (insn & DIE_FLAG_CHILDREN) { - if (sibling) + if (sibling && + (insn & DIE_FLAG_TAG_MASK) != DW_TAG_namespace) ptr = sibling; else depth++; @@ -2034,7 +2074,8 @@ static bool find_definition(struct drgn_dwarf_index *dindex, uintptr_t die_addr, return true; } -static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint8_t tag, +static bool append_die_entry(struct drgn_dwarf_index *dindex, + struct drgn_dwarf_index_shard *shard, uint8_t tag, uint64_t file_name_hash, Dwfl_Module *module, size_t offset) { @@ -2046,13 +2087,24 @@ static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint8_t tag, return false; die->next = UINT32_MAX; die->tag = tag; - die->file_name_hash = file_name_hash; + if (die->tag == DW_TAG_namespace) { + die->namespace = malloc(sizeof(*die->namespace)); + if (!die->namespace) { + shard->dies.size--; + return false; + } + drgn_dwarf_index_namespace_init(die->namespace, dindex); + } else { + die->file_name_hash = file_name_hash; + } die->module = module; die->offset = offset; + return true; } -static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, +static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index_cu *cu, const char *name, uint8_t tag, uint64_t file_name_hash, Dwfl_Module *module, size_t offset) @@ -2071,33 +2123,33 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, struct drgn_dwarf_index_die *die; hp = drgn_dwarf_index_die_map_hash(&entry.key); - shard = &dindex->shards[hash_pair_to_shard(hp)]; + shard = &ns->shards[hash_pair_to_shard(hp)]; omp_set_lock(&shard->lock); it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, hp); if (!it.entry) { - if (!append_die_entry(shard, tag, file_name_hash, module, - offset)) { + if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, + module, offset)) { err = &drgn_enomem; - goto out; + goto err; } entry.value = shard->dies.size - 1; - if (drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, - NULL) == 1) - err = NULL; - else + if (!drgn_dwarf_index_die_map_insert_searched(&shard->map, + &entry, hp, + NULL)) { err = &drgn_enomem; + goto err; + } + die = &shard->dies.data[shard->dies.size - 1]; goto out; } die = &shard->dies.data[it.entry->value]; for (;;) { - if (die->tag == tag && - die->file_name_hash == file_name_hash) { - err = NULL; + const uint64_t die_file_name_hash = + die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; + if (die->tag == tag && die_file_name_hash == file_name_hash) goto out; - } if (die->next == UINT32_MAX) break; @@ -2105,23 +2157,36 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, } index = die - shard->dies.data; - if (!append_die_entry(shard, tag, file_name_hash, module, offset)) { + if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, module, + offset)) { err = &drgn_enomem; - goto out; + goto err; } + die = &shard->dies.data[shard->dies.size - 1]; shard->dies.data[index].next = shard->dies.size - 1; - err = NULL; out: + if (tag == DW_TAG_namespace) { + struct drgn_dwarf_index_pending_die *pending = + drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); + if (!pending) { + err = &drgn_enomem; + goto err; + } + pending->cu = cu; + pending->offset = offset; + } + err = NULL; +err: omp_unset_lock(&shard->lock); return err; } /* Second pass: index the actual DIEs. */ -static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_index_cu *cu) +static struct drgn_error * +index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index_cu *cu, const char *ptr) { struct drgn_error *err; - const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; Elf_Data *debug_info = cu->userdata->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); @@ -2320,7 +2385,7 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, */ die_offset = depth1_offset; } else if (declaration && - !find_definition(dindex, + !find_definition(ns->dindex, (uintptr_t)debug_info_buffer + die_offset, &module, &die_offset)) { @@ -2337,10 +2402,8 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, file_name_hash = cu->file_name_hashes[decl_file - 1]; else file_name_hash = 0; - if ((err = index_die(dindex, name, - insn & DIE_FLAG_TAG_MASK, - file_name_hash, module, - die_offset))) + if ((err = index_die(ns, cu, name, tag, file_name_hash, + module, die_offset))) return err; } @@ -2348,9 +2411,12 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, if (insn & DIE_FLAG_CHILDREN) { /* * We must descend into the children of enumeration_type - * DIEs to index enumerator DIEs. + * DIEs to index enumerator DIEs. We don't want to skip + * over the children of the top-level DIE even if it has + * a sibling pointer. */ - if (sibling && tag != DW_TAG_enumeration_type) + if (sibling && tag != DW_TAG_enumeration_type && + depth > 0) ptr = sibling; else depth++; @@ -2363,8 +2429,9 @@ static struct drgn_error *index_cu_second_pass(struct drgn_dwarf_index *dindex, static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) { - for (size_t i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; + for (size_t i = 0; i < ARRAY_SIZE(dindex->global.shards); i++) { + struct drgn_dwarf_index_shard *shard = + &dindex->global.shards[i]; /* * Because we're deleting everything that was added since the @@ -2436,9 +2503,9 @@ static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, #pragma omp for schedule(dynamic) for (size_t i = old_cus_size; i < dindex->cus.size; i++) { if (!err) { + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; struct drgn_error *cu_err = - index_cu_first_pass(dindex, - &dindex->cus.data[i]); + index_cu_first_pass(dindex, cu); if (cu_err) { #pragma omp critical(drgn_index_cus) if (err) @@ -2452,9 +2519,11 @@ static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, #pragma omp for schedule(dynamic) for (size_t i = old_cus_size; i < dindex->cus.size; i++) { if (!err) { + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; struct drgn_error *cu_err = - index_cu_second_pass(dindex, - &dindex->cus.data[i]); + index_cu_second_pass(&dindex->global, + cu, ptr); if (cu_err) { #pragma omp critical(drgn_index_cus) if (err) @@ -2560,12 +2629,49 @@ bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, return c_string_set_search(&dindex->names, &name).entry != NULL; } -void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index *dindex, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags) +static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) +{ + if (ns->saved_err) + return drgn_error_copy(ns->saved_err); + + struct drgn_error *err = NULL; + #pragma omp for schedule(dynamic) + for (size_t i = 0; i < ns->pending_dies.size; i++) { + if (!err) { + struct drgn_dwarf_index_pending_die *pending = + &ns->pending_dies.data[i]; + const char *ptr = + section_ptr(pending->cu->userdata->debug_info, + pending->offset); + struct drgn_error *cu_err = + index_cu_second_pass(ns, pending->cu, ptr); + if (cu_err) { + #pragma omp critical(drgn_index_namespace) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + } + if (err) { + ns->saved_err = err; + return drgn_error_copy(ns->saved_err); + } + ns->pending_dies.size = 0; + return err; +} + +struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_namespace *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags) { - it->dindex = dindex; + struct drgn_error *err = index_namespace(ns); + if (err) + return err; + it->ns = ns; if (name) { struct string key = { .str = name, @@ -2577,22 +2683,23 @@ void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, hp = drgn_dwarf_index_die_map_hash(&key); it->shard = hash_pair_to_shard(hp); - shard = &dindex->shards[it->shard]; + shard = &ns->shards[it->shard]; map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &key, hp); it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; it->any_name = false; } else { it->index = 0; - for (it->shard = 0; it->shard < ARRAY_SIZE(dindex->shards); + for (it->shard = 0; it->shard < ARRAY_SIZE(ns->shards); it->shard++) { - if (dindex->shards[it->shard].dies.size) + if (ns->shards[it->shard].dies.size) break; } it->any_name = true; } it->tags = tags; it->num_tags = num_tags; + return NULL; } static inline bool @@ -2613,21 +2720,21 @@ drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, struct drgn_dwarf_index_die * drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { - struct drgn_dwarf_index *dindex = it->dindex; + struct drgn_dwarf_index_namespace *ns = it->ns; struct drgn_dwarf_index_die *die; if (it->any_name) { for (;;) { - if (it->shard >= ARRAY_SIZE(dindex->shards)) + if (it->shard >= ARRAY_SIZE(ns->shards)) return NULL; struct drgn_dwarf_index_shard *shard = - &dindex->shards[it->shard]; + &ns->shards[it->shard]; die = &shard->dies.data[it->index]; if (++it->index >= shard->dies.size) { it->index = 0; - while (++it->shard < ARRAY_SIZE(dindex->shards)) { - if (dindex->shards[it->shard].dies.size) + while (++it->shard < ARRAY_SIZE(ns->shards)) { + if (ns->shards[it->shard].dies.size) break; } } @@ -2641,7 +2748,7 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) return NULL; struct drgn_dwarf_index_shard *shard = - &dindex->shards[it->shard]; + &ns->shards[it->shard]; die = &shard->dies.data[it->index]; it->index = die->next; diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index f78328cac..11f9ca847 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -78,9 +78,11 @@ struct drgn_dwarf_index_die { uint32_t next; uint8_t tag; union { - /* If tag != DW_TAG_namespace. */ + /* + * If tag != DW_TAG_namespace (namespaces are merged, so they + * don't need this). + */ uint64_t file_name_hash; - /* TODO: explain hash */ /* If tag == DW_TAG_namespace. */ struct drgn_dwarf_index_namespace *namespace; }; @@ -220,6 +222,25 @@ DEFINE_HASH_SET_TYPE(c_string_set, const char *) DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, + struct drgn_dwarf_index_pending_die) + +/** Mapping from names/tags to DIEs/nested namespaces. */ +struct drgn_dwarf_index_namespace { + /** + * Index shards. + * + * This is sharded to reduce lock contention. + */ + struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; + /** Parent DWARF index. */ + struct drgn_dwarf_index *dindex; + /** DIEs we have not indexed yet. */ + struct drgn_dwarf_index_pending_die_vector pending_dies; + /** Saved error from a previous index. */ + struct drgn_error *saved_err; +}; + /** * Fast index of DWARF debugging information. * @@ -231,12 +252,8 @@ DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) * Searches in the index are done with a @ref drgn_dwarf_index_iterator. */ struct drgn_dwarf_index { - /** - * Index shards. - * - * This is sharded to reduce lock contention. - */ - struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; + /** Global namespace. */ + struct drgn_dwarf_index_namespace global; /** * Map from address of DIE referenced by DW_AT_specification to DIE that * references it. This is used to resolve DIEs with DW_AT_declaration to @@ -407,7 +424,7 @@ bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, */ struct drgn_dwarf_index_iterator { /** @privatesection */ - struct drgn_dwarf_index *dindex; + struct drgn_dwarf_index_namespace *ns; const uint64_t *tags; size_t num_tags; size_t shard; @@ -424,11 +441,13 @@ struct drgn_dwarf_index_iterator { * @param[in] name_len Length of @c name. * @param[in] tags List of DIE tags to search for. * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index *dindex, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags); +struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_namespace *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags); /** * Get the next matching DIE from a DWARF index iterator. diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c index da729f981..6f4e6553f 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/dwarf_info_cache.c @@ -348,8 +348,11 @@ drgn_dwarf_info_cache_find_complete(struct drgn_dwarf_info_cache *dicache, struct drgn_error *err; struct drgn_dwarf_index_iterator it; - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, - strlen(name), &tag, 1); + err = drgn_dwarf_index_iterator_init(&it, &dicache->dindex.global, name, + strlen(name), &tag, 1); + if (err) + return err; + /* * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs * with DW_AT_declaration, so this will always be a complete type. @@ -1266,8 +1269,10 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, } struct drgn_dwarf_index_iterator it; - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, name_len, - &tag, 1); + err = drgn_dwarf_index_iterator_init(&it, &dicache->dindex.global, name, + name_len, &tag, 1); + if (err) + return err; struct drgn_dwarf_index_die *index_die; while ((index_die = drgn_dwarf_index_iterator_next(&it))) { Dwarf_Die die; @@ -1440,6 +1445,29 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, struct drgn_error *err; struct drgn_dwarf_info_cache *dicache = arg; + struct drgn_dwarf_index_namespace *ns = &dicache->dindex.global; + if (name_len >= 2 && memcmp(name, "::", 2) == 0) { + /* Explicit global namespace. */ + name_len -= 2; + name += 2; + } + const char *colons; + while ((colons = memmem(name, name_len, "::", 2))) { + struct drgn_dwarf_index_iterator it; + uint64_t ns_tag = DW_TAG_namespace; + err = drgn_dwarf_index_iterator_init(&it, ns, name, + colons - name, &ns_tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + ns = index_die->namespace; + name_len -= colons + 2 - name; + name = colons + 2; + } + uint64_t tags[3]; size_t num_tags = 0; if (flags & DRGN_FIND_OBJECT_CONSTANT) @@ -1450,8 +1478,10 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, tags[num_tags++] = DW_TAG_variable; struct drgn_dwarf_index_iterator it; - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, - strlen(name), tags, num_tags); + err = drgn_dwarf_index_iterator_init(&it, ns, name, strlen(name), tags, + num_tags); + if (err) + return err; struct drgn_dwarf_index_die *index_die; while ((index_die = drgn_dwarf_index_iterator_next(&it))) { Dwarf_Die die; diff --git a/libdrgn/program.c b/libdrgn/program.c index db3d3d337..872f40446 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -631,11 +631,16 @@ static void drgn_program_set_language_from_main(struct drgn_program *prog, struct drgn_dwarf_index *dindex) { struct drgn_error *err; - struct drgn_dwarf_index_iterator it; static const uint64_t tags[] = { DW_TAG_subprogram }; - drgn_dwarf_index_iterator_init(&it, dindex, "main", strlen("main"), - tags, ARRAY_SIZE(tags)); + + err = drgn_dwarf_index_iterator_init(&it, &dindex->global, "main", + strlen("main"), tags, + ARRAY_SIZE(tags)); + if (err) { + drgn_error_destroy(err); + return; + } struct drgn_dwarf_index_die *index_die; while ((index_die = drgn_dwarf_index_iterator_next(&it))) { Dwarf_Die die; diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 04ae4ff57..449818dbf 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -3504,11 +3504,167 @@ def test_specification(self): Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), ) + def test_namespace_reverse_specification(self): + """Test specification inside namespace while declaration is outside of it.""" + dies = ( + int_die, + DwarfDie( + DW_TAG.namespace, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), + ], + [ + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), + ) + ], + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), + ), + ) + + prog = dwarf_program(dies) + self.assertEqual( + prog["x"], + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + ) + def test_not_found(self): prog = dwarf_program(int_die) self.assertRaisesRegex(LookupError, "could not find", prog.object, "y") +class TestScopes(TestCase): + def test_global_namespace(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ) + ) + self.assertEqual( + prog["::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + self.assertEqual(prog["::target"], prog["target"]) + + def test_namespaces_single(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + + def test_namespaces_gcc(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + + def test_namespaces_nested(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "eve"),), + ( + DwarfDie( + DW_TAG.namespace, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "kerbin" + ), + ), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "minmus" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, 0 + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 47 + ), + ), + ), + ), + ), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::eve::kerbin::minmus"], + Object(prog, prog.int_type("int", 4, True), 47), + ) + + class TestProgram(TestCase): def test_language(self): dies = ( From 3ac9ae357b4f0ef827fc041a9166f47c73ebb807 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Sep 2020 17:41:23 -0700 Subject: [PATCH 50/56] libdrgn: rename drgn_dwarf_info_cache to drgn_debug_info The current name is too verbose. Let's go with a shorter, more generic name. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 4 +- libdrgn/{dwarf_info_cache.c => debug_info.c} | 291 +++++++++---------- libdrgn/debug_info.h | 94 ++++++ libdrgn/dwarf_info_cache.h | 100 ------- libdrgn/program.c | 38 ++- libdrgn/program.h | 2 +- 6 files changed, 258 insertions(+), 271 deletions(-) rename libdrgn/{dwarf_info_cache.c => debug_info.c} (82%) create mode 100644 libdrgn/debug_info.h delete mode 100644 libdrgn/dwarf_info_cache.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 43b5cdd06..c286dddce 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -20,10 +20,10 @@ ARCH_INS = arch_x86_64.c.in libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ binary_search_tree.h \ cityhash.h \ + debug_info.c \ + debug_info.h \ dwarf_index.c \ dwarf_index.h \ - dwarf_info_cache.c \ - dwarf_info_cache.h \ error.c \ error.h \ hash_table.c \ diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/debug_info.c similarity index 82% rename from libdrgn/dwarf_info_cache.c rename to libdrgn/debug_info.c index 6f4e6553f..6b8690437 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/debug_info.c @@ -7,8 +7,7 @@ #include #include "internal.h" -#include "dwarf_index.h" -#include "dwarf_info_cache.h" +#include "debug_info.h" #include "hash_table.h" #include "object.h" #include "object_index.h" @@ -16,7 +15,7 @@ #include "type.h" #include "vector.h" -DEFINE_HASH_TABLE_FUNCTIONS(dwarf_type_map, hash_pair_ptr_type, +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, hash_pair_ptr_type, hash_table_scalar_eq) struct drgn_type_from_dwarf_thunk { @@ -123,7 +122,7 @@ static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array * type. * - * @param[in] dicache Debugging information cache. + * @param[in] dbinfo Debugging information. * @param[in] die DIE to parse. * @param[in] can_be_incomplete_array Whether the type can be an incomplete * array type. If this is @c false and the type appears to be an incomplete @@ -135,24 +134,24 @@ static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) * @return @c NULL on success, non-@c NULL on error. */ static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, bool can_be_incomplete_array, +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + bool can_be_incomplete_array, bool *is_incomplete_array_ret, struct drgn_qualified_type *ret); /** * Parse a type from a DWARF debugging information entry. * - * @param[in] dicache Debugging information cache. + * @param[in] dbinfo Debugging information. * @param[in] die DIE to parse. * @param[out] ret Returned type. * @return @c NULL on success, non-@c NULL on error. */ static inline struct drgn_error * -drgn_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, +drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, struct drgn_qualified_type *ret) { - return drgn_type_from_dwarf_internal(dicache, die, true, NULL, ret); + return drgn_type_from_dwarf_internal(dbinfo, die, true, NULL, ret); } static struct drgn_error * @@ -161,7 +160,7 @@ drgn_type_from_dwarf_thunk_evaluate_fn(struct drgn_type_thunk *thunk, { struct drgn_type_from_dwarf_thunk *t = container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk); - return drgn_type_from_dwarf_internal(thunk->prog->_dicache, &t->die, + return drgn_type_from_dwarf_internal(thunk->prog->_dbinfo, &t->die, t->can_be_incomplete_array, NULL, ret); } @@ -172,9 +171,9 @@ static void drgn_type_from_dwarf_thunk_free_fn(struct drgn_type_thunk *thunk) } static struct drgn_error * -drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *parent_die, bool can_be_incomplete_array, - const char *tag_name, struct drgn_lazy_type *ret) +drgn_lazy_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *parent_die, + bool can_be_incomplete_array, const char *tag_name, + struct drgn_lazy_type *ret) { Dwarf_Attribute attr_mem, *attr; if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { @@ -193,7 +192,7 @@ drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, if (!thunk) return &drgn_enomem; - thunk->thunk.prog = dicache->prog; + thunk->thunk.prog = dbinfo->prog; thunk->thunk.evaluate_fn = drgn_type_from_dwarf_thunk_evaluate_fn; thunk->thunk.free_fn = drgn_type_from_dwarf_thunk_free_fn; thunk->die = type_die; @@ -206,7 +205,7 @@ drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, * Parse a type from the @c DW_AT_type attribute of a DWARF debugging * information entry. * - * @param[in] dicache Debugging information cache. + * @param[in] dbinfo Debugging information. * @param[in] parent_die Parent DIE. * @param[in] parent_lang Language of the parent DIE if it is already known, @c * NULL if it should be determined from @p parent_die. @@ -221,7 +220,7 @@ drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, * @return @c NULL on success, non-@c NULL on error. */ struct drgn_error * -drgn_type_from_dwarf_child(struct drgn_dwarf_info_cache *dicache, +drgn_type_from_dwarf_child(struct drgn_debug_info *dbinfo, Dwarf_Die *parent_die, const struct drgn_language *parent_lang, const char *tag_name, @@ -242,7 +241,7 @@ drgn_type_from_dwarf_child(struct drgn_dwarf_info_cache *dicache, if (err) return err; } - ret->type = drgn_void_type(dicache->prog, parent_lang); + ret->type = drgn_void_type(dbinfo->prog, parent_lang); ret->qualifiers = 0; return NULL; } else { @@ -257,13 +256,13 @@ drgn_type_from_dwarf_child(struct drgn_dwarf_info_cache *dicache, "%s has invalid DW_AT_type", tag_name); } - return drgn_type_from_dwarf_internal(dicache, &type_die, + return drgn_type_from_dwarf_internal(dbinfo, &type_die, can_be_incomplete_array, is_incomplete_array_ret, ret); } static struct drgn_error * -drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, +drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, const struct drgn_language *lang, struct drgn_type **ret) { @@ -288,18 +287,18 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, switch (encoding) { case DW_ATE_boolean: - return drgn_bool_type_create(dicache->prog, name, size, lang, + return drgn_bool_type_create(dbinfo->prog, name, size, lang, ret); case DW_ATE_float: - return drgn_float_type_create(dicache->prog, name, size, lang, + return drgn_float_type_create(dbinfo->prog, name, size, lang, ret); case DW_ATE_signed: case DW_ATE_signed_char: - return drgn_int_type_create(dicache->prog, name, size, true, + return drgn_int_type_create(dbinfo->prog, name, size, true, lang, ret); case DW_ATE_unsigned: case DW_ATE_unsigned_char: - return drgn_int_type_create(dicache->prog, name, size, false, + return drgn_int_type_create(dbinfo->prog, name, size, false, lang, ret); /* * GCC also supports complex integer types, but DWARF 4 doesn't have an @@ -314,7 +313,7 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, "DW_TAG_base_type has missing or invalid DW_AT_type"); } struct drgn_qualified_type real_type; - struct drgn_error *err = drgn_type_from_dwarf(dicache, &child, + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, &child, &real_type); if (err) return err; @@ -323,7 +322,7 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_type of DW_ATE_complex_float is not a floating-point or integer type"); } - return drgn_complex_type_create(dicache->prog, name, size, + return drgn_complex_type_create(dbinfo->prog, name, size, real_type.type, lang, ret); } default: @@ -341,14 +340,13 @@ drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, * Otherwise, it returns an error. */ static struct drgn_error * -drgn_dwarf_info_cache_find_complete(struct drgn_dwarf_info_cache *dicache, - uint64_t tag, const char *name, - struct drgn_type **ret) +drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, + const char *name, struct drgn_type **ret) { struct drgn_error *err; struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dicache->dindex.global, name, + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, strlen(name), &tag, 1); if (err) return err; @@ -373,7 +371,7 @@ drgn_dwarf_info_cache_find_complete(struct drgn_dwarf_info_cache *dicache, if (err) return err; struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf(dicache, &die, &qualified_type); + err = drgn_type_from_dwarf(dbinfo, &die, &qualified_type); if (err) return err; *ret = qualified_type.type; @@ -487,8 +485,8 @@ parse_member_offset(Dwarf_Die *die, struct drgn_lazy_type *member_type, } static struct drgn_error * -parse_member(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - bool little_endian, bool can_be_incomplete_array, +parse_member(struct drgn_debug_info *dbinfo, Dwarf_Die *die, bool little_endian, + bool can_be_incomplete_array, struct drgn_compound_type_builder *builder) { Dwarf_Attribute attr_mem, *attr; @@ -516,7 +514,7 @@ parse_member(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } struct drgn_lazy_type member_type; - struct drgn_error *err = drgn_lazy_type_from_dwarf(dicache, die, + struct drgn_error *err = drgn_lazy_type_from_dwarf(dbinfo, die, can_be_incomplete_array, "DW_TAG_member", &member_type); @@ -541,7 +539,7 @@ parse_member(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } static struct drgn_error * -drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, +drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, const struct drgn_language *lang, enum drgn_type_kind kind, struct drgn_type **ret) { @@ -588,14 +586,13 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, dw_tag_str); } if (declaration && tag) { - err = drgn_dwarf_info_cache_find_complete(dicache, dw_tag, tag, - ret); + err = drgn_debug_info_find_complete(dbinfo, dw_tag, tag, ret); if (!err || err->code != DRGN_ERROR_STOP) return err; } if (declaration) { - return drgn_incomplete_compound_type_create(dicache->prog, kind, + return drgn_incomplete_compound_type_create(dbinfo->prog, kind, tag, lang, ret); } @@ -607,7 +604,7 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } struct drgn_compound_type_builder builder; - drgn_compound_type_builder_init(&builder, dicache->prog, kind); + drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); bool little_endian; dwarf_die_is_little_endian(die, false, &little_endian); Dwarf_Die member = {}, child; @@ -615,7 +612,7 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, while (r == 0) { if (dwarf_tag(&child) == DW_TAG_member) { if (member.addr) { - err = parse_member(dicache, &member, + err = parse_member(dbinfo, &member, little_endian, false, &builder); if (err) @@ -635,7 +632,7 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, * structure with at least one other member. */ if (member.addr) { - err = parse_member(dicache, &member, little_endian, + err = parse_member(dbinfo, &member, little_endian, kind != DRGN_TYPE_UNION && builder.members.size > 0, &builder); @@ -702,7 +699,7 @@ parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, * so we have to fabricate the compatible type. */ static struct drgn_error * -enum_compatible_type_fallback(struct drgn_dwarf_info_cache *dicache, +enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, Dwarf_Die *die, bool is_signed, const struct drgn_language *lang, struct drgn_type **ret) @@ -712,12 +709,12 @@ enum_compatible_type_fallback(struct drgn_dwarf_info_cache *dicache, return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); } - return drgn_int_type_create(dicache->prog, "", size, is_signed, + return drgn_int_type_create(dbinfo->prog, "", size, is_signed, lang, ret); } static struct drgn_error * -drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, +drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, const struct drgn_language *lang, struct drgn_type **ret) { @@ -742,20 +739,20 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, "DW_TAG_enumeration_type has invalid DW_AT_declaration"); } if (declaration && tag) { - err = drgn_dwarf_info_cache_find_complete(dicache, - DW_TAG_enumeration_type, - tag, ret); + err = drgn_debug_info_find_complete(dbinfo, + DW_TAG_enumeration_type, + tag, ret); if (!err || err->code != DRGN_ERROR_STOP) return err; } if (declaration) { - return drgn_incomplete_enum_type_create(dicache->prog, tag, - lang, ret); + return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, + ret); } struct drgn_enum_type_builder builder; - drgn_enum_type_builder_init(&builder, dicache->prog); + drgn_enum_type_builder_init(&builder, dbinfo->prog); bool is_signed = false; Dwarf_Die child; int r = dwarf_child(die, &child); @@ -780,13 +777,13 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, "DW_TAG_enumeration_type has invalid DW_AT_type"); goto err; } else if (r) { - err = enum_compatible_type_fallback(dicache, die, is_signed, + err = enum_compatible_type_fallback(dbinfo, die, is_signed, lang, &compatible_type); if (err) goto err; } else { struct drgn_qualified_type qualified_compatible_type; - err = drgn_type_from_dwarf(dicache, &child, + err = drgn_type_from_dwarf(dbinfo, &child, &qualified_compatible_type); if (err) goto err; @@ -809,8 +806,7 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } static struct drgn_error * -drgn_typedef_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, +drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, const struct drgn_language *lang, bool can_be_incomplete_array, bool *is_incomplete_array_ret, @@ -823,7 +819,7 @@ drgn_typedef_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } struct drgn_qualified_type aliased_type; - struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_typedef", true, @@ -833,17 +829,17 @@ drgn_typedef_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, if (err) return err; - return drgn_typedef_type_create(dicache->prog, name, aliased_type, lang, + return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, ret); } static struct drgn_error * -drgn_pointer_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, +drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, struct drgn_type **ret) { struct drgn_qualified_type referenced_type; - struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_pointer_type", true, true, NULL, @@ -862,13 +858,13 @@ drgn_pointer_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, size = word; } else { uint8_t word_size; - err = drgn_program_word_size(dicache->prog, &word_size); + err = drgn_program_word_size(dbinfo->prog, &word_size); if (err) return err; size = word_size; } - return drgn_pointer_type_create(dicache->prog, referenced_type, size, + return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, lang, ret); } @@ -925,8 +921,8 @@ static struct drgn_error *subrange_length(Dwarf_Die *die, } static struct drgn_error * -drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, +drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, bool can_be_incomplete_array, bool *is_incomplete_array_ret, struct drgn_type **ret) @@ -960,7 +956,7 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } struct drgn_qualified_type element_type; - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_array_type", false, false, NULL, &element_type); @@ -972,16 +968,14 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, do { dimension = array_dimension_vector_pop(&dimensions); if (dimension->is_complete) { - err = drgn_array_type_create(dicache->prog, - element_type, + err = drgn_array_type_create(dbinfo->prog, element_type, dimension->length, lang, &type); } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_array_type_create(dicache->prog, - element_type, 0, lang, - &type); + err = drgn_array_type_create(dbinfo->prog, element_type, + 0, lang, &type); } else { - err = drgn_incomplete_array_type_create(dicache->prog, + err = drgn_incomplete_array_type_create(dbinfo->prog, element_type, lang, &type); } @@ -1000,7 +994,7 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } static struct drgn_error * -parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, +parse_formal_parameter(struct drgn_debug_info *dbinfo, Dwarf_Die *die, struct drgn_function_type_builder *builder) { Dwarf_Attribute attr_mem, *attr; @@ -1016,7 +1010,7 @@ parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } struct drgn_lazy_type parameter_type; - struct drgn_error *err = drgn_lazy_type_from_dwarf(dicache, die, true, + struct drgn_error *err = drgn_lazy_type_from_dwarf(dbinfo, die, true, "DW_TAG_formal_parameter", ¶meter_type); if (err) @@ -1030,8 +1024,8 @@ parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } static struct drgn_error * -drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, +drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, struct drgn_type **ret) { struct drgn_error *err; @@ -1040,7 +1034,7 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, dwarf_tag(die) == DW_TAG_subroutine_type ? "DW_TAG_subroutine_type" : "DW_TAG_subprogram"; struct drgn_function_type_builder builder; - drgn_function_type_builder_init(&builder, dicache->prog); + drgn_function_type_builder_init(&builder, dbinfo->prog); bool is_variadic = false; Dwarf_Die child; int r = dwarf_child(die, &child); @@ -1053,7 +1047,7 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, tag_name); goto err; } - err = parse_formal_parameter(dicache, &child, &builder); + err = parse_formal_parameter(dbinfo, &child, &builder); if (err) goto err; break; @@ -1078,7 +1072,7 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } struct drgn_qualified_type return_type; - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), tag_name, true, true, NULL, &return_type); @@ -1097,27 +1091,28 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, bool can_be_incomplete_array, +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + bool can_be_incomplete_array, bool *is_incomplete_array_ret, struct drgn_qualified_type *ret) { - if (dicache->depth >= 1000) { + if (dbinfo->depth >= 1000) { return drgn_error_create(DRGN_ERROR_RECURSION, "maximum DWARF type parsing depth exceeded"); } - struct dwarf_type_map_entry entry = { + struct drgn_dwarf_type_map_entry entry = { .key = die->addr, }; - struct hash_pair hp = dwarf_type_map_hash(&entry.key); - struct dwarf_type_map_iterator it = - dwarf_type_map_search_hashed(&dicache->map, &entry.key, hp); + struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); + struct drgn_dwarf_type_map_iterator it = + drgn_dwarf_type_map_search_hashed(&dbinfo->types, &entry.key, + hp); if (it.entry) { if (!can_be_incomplete_array && it.entry->value.is_incomplete_array) { - it = dwarf_type_map_search_hashed(&dicache->cant_be_incomplete_array_map, - &entry.key, hp); + it = drgn_dwarf_type_map_search_hashed(&dbinfo->cant_be_incomplete_array_types, + &entry.key, hp); } if (it.entry) { ret->type = it.entry->value.type; @@ -1132,77 +1127,77 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, return err; ret->qualifiers = 0; - dicache->depth++; + dbinfo->depth++; entry.value.is_incomplete_array = false; switch (dwarf_tag(die)) { case DW_TAG_const_type: - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_const_type", true, true, NULL, ret); ret->qualifiers |= DRGN_QUALIFIER_CONST; break; case DW_TAG_restrict_type: - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_restrict_type", true, true, NULL, ret); ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; break; case DW_TAG_volatile_type: - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_volatile_type", true, true, NULL, ret); ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; break; case DW_TAG_atomic_type: - err = drgn_type_from_dwarf_child(dicache, die, + err = drgn_type_from_dwarf_child(dbinfo, die, drgn_language_or_default(lang), "DW_TAG_atomic_type", true, true, NULL, ret); ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; break; case DW_TAG_base_type: - err = drgn_base_type_from_dwarf(dicache, die, lang, &ret->type); + err = drgn_base_type_from_dwarf(dbinfo, die, lang, &ret->type); break; case DW_TAG_structure_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, DRGN_TYPE_STRUCT, &ret->type); break; case DW_TAG_union_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, DRGN_TYPE_UNION, &ret->type); break; case DW_TAG_class_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, DRGN_TYPE_CLASS, &ret->type); break; case DW_TAG_enumeration_type: - err = drgn_enum_type_from_dwarf(dicache, die, lang, &ret->type); + err = drgn_enum_type_from_dwarf(dbinfo, die, lang, &ret->type); break; case DW_TAG_typedef: - err = drgn_typedef_type_from_dwarf(dicache, die, lang, + err = drgn_typedef_type_from_dwarf(dbinfo, die, lang, can_be_incomplete_array, &entry.value.is_incomplete_array, &ret->type); break; case DW_TAG_pointer_type: - err = drgn_pointer_type_from_dwarf(dicache, die, lang, + err = drgn_pointer_type_from_dwarf(dbinfo, die, lang, &ret->type); break; case DW_TAG_array_type: - err = drgn_array_type_from_dwarf(dicache, die, lang, + err = drgn_array_type_from_dwarf(dbinfo, die, lang, can_be_incomplete_array, &entry.value.is_incomplete_array, &ret->type); break; case DW_TAG_subroutine_type: case DW_TAG_subprogram: - err = drgn_function_type_from_dwarf(dicache, die, lang, + err = drgn_function_type_from_dwarf(dbinfo, die, lang, &ret->type); break; default: @@ -1211,18 +1206,18 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, dwarf_tag(die)); break; } - dicache->depth--; + dbinfo->depth--; if (err) return err; entry.value.type = ret->type; entry.value.qualifiers = ret->qualifiers; - struct dwarf_type_map *map; + struct drgn_dwarf_type_map *map; if (!can_be_incomplete_array && entry.value.is_incomplete_array) - map = &dicache->cant_be_incomplete_array_map; + map = &dbinfo->cant_be_incomplete_array_types; else - map = &dicache->map; - if (dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { + map = &dbinfo->types; + if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { /* * This will "leak" the type we created, but it'll still be * cleaned up when the program is freed. @@ -1234,13 +1229,13 @@ drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, return NULL; } -struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret) +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret) { struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache = arg; + struct drgn_debug_info *dbinfo = arg; uint64_t tag; switch (kind) { @@ -1269,7 +1264,7 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, } struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dicache->dindex.global, name, + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, name_len, &tag, 1); if (err) return err; @@ -1280,7 +1275,7 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, if (err) return err; if (die_matches_filename(&die, filename)) { - err = drgn_type_from_dwarf(dicache, &die, ret); + err = drgn_type_from_dwarf(dbinfo, &die, ret); if (err) return err; /* @@ -1295,7 +1290,7 @@ struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, } static struct drgn_error * -drgn_object_from_dwarf_enumerator(struct drgn_dwarf_info_cache *dicache, +drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, Dwarf_Die *die, const char *name, struct drgn_object *ret) { @@ -1304,7 +1299,7 @@ drgn_object_from_dwarf_enumerator(struct drgn_dwarf_info_cache *dicache, const struct drgn_type_enumerator *enumerators; size_t num_enumerators, i; - err = drgn_type_from_dwarf(dicache, die, &qualified_type); + err = drgn_type_from_dwarf(dbinfo, die, &qualified_type); if (err) return err; enumerators = drgn_type_enumerators(qualified_type.type); @@ -1326,12 +1321,12 @@ drgn_object_from_dwarf_enumerator(struct drgn_dwarf_info_cache *dicache, } static struct drgn_error * -drgn_object_from_dwarf_subprogram(struct drgn_dwarf_info_cache *dicache, +drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, Dwarf_Die *die, uint64_t bias, const char *name, struct drgn_object *ret) { struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf(dicache, die, + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, die, &qualified_type); if (err) return err; @@ -1348,8 +1343,7 @@ drgn_object_from_dwarf_subprogram(struct drgn_dwarf_info_cache *dicache, } static struct drgn_error * -drgn_object_from_dwarf_constant(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, +drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, struct drgn_qualified_type qualified_type, Dwarf_Attribute *attr, struct drgn_object *ret) { @@ -1397,12 +1391,12 @@ drgn_object_from_dwarf_constant(struct drgn_dwarf_info_cache *dicache, } static struct drgn_error * -drgn_object_from_dwarf_variable(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, uint64_t bias, const char *name, +drgn_object_from_dwarf_variable(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + uint64_t bias, const char *name, struct drgn_object *ret) { struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, NULL, + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, NULL, "DW_TAG_variable", true, true, NULL, &qualified_type); @@ -1427,7 +1421,7 @@ drgn_object_from_dwarf_variable(struct drgn_dwarf_info_cache *dicache, byte_order); } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { - return drgn_object_from_dwarf_constant(dicache, die, + return drgn_object_from_dwarf_constant(dbinfo, die, qualified_type, attr, ret); } else { @@ -1438,14 +1432,15 @@ drgn_object_from_dwarf_variable(struct drgn_dwarf_info_cache *dicache, } struct drgn_error * -drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret) +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret) { struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache = arg; + struct drgn_debug_info *dbinfo = arg; - struct drgn_dwarf_index_namespace *ns = &dicache->dindex.global; + struct drgn_dwarf_index_namespace *ns = &dbinfo->dindex.global; if (name_len >= 2 && memcmp(name, "::", 2) == 0) { /* Explicit global namespace. */ name_len -= 2; @@ -1493,14 +1488,14 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, continue; switch (dwarf_tag(&die)) { case DW_TAG_enumeration_type: - return drgn_object_from_dwarf_enumerator(dicache, &die, + return drgn_object_from_dwarf_enumerator(dbinfo, &die, name, ret); case DW_TAG_subprogram: - return drgn_object_from_dwarf_subprogram(dicache, &die, + return drgn_object_from_dwarf_subprogram(dbinfo, &die, bias, name, ret); case DW_TAG_variable: - return drgn_object_from_dwarf_variable(dicache, &die, + return drgn_object_from_dwarf_variable(dbinfo, &die, bias, name, ret); default: UNREACHABLE(); @@ -1510,35 +1505,35 @@ drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, } struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_program *prog, - const Dwfl_Callbacks *dwfl_callbacks, - struct drgn_dwarf_info_cache **ret) +drgn_debug_info_create(struct drgn_program *prog, + const Dwfl_Callbacks *dwfl_callbacks, + struct drgn_debug_info **ret) { struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache; + struct drgn_debug_info *dbinfo; - dicache = malloc(sizeof(*dicache)); - if (!dicache) + dbinfo = malloc(sizeof(*dbinfo)); + if (!dbinfo) return &drgn_enomem; - err = drgn_dwarf_index_init(&dicache->dindex, dwfl_callbacks); + err = drgn_dwarf_index_init(&dbinfo->dindex, dwfl_callbacks); if (err) { - free(dicache); + free(dbinfo); return err; } - dwarf_type_map_init(&dicache->map); - dwarf_type_map_init(&dicache->cant_be_incomplete_array_map); - dicache->depth = 0; - dicache->prog = prog; - *ret = dicache; + drgn_dwarf_type_map_init(&dbinfo->types); + drgn_dwarf_type_map_init(&dbinfo->cant_be_incomplete_array_types); + dbinfo->depth = 0; + dbinfo->prog = prog; + *ret = dbinfo; return NULL; } -void drgn_dwarf_info_cache_destroy(struct drgn_dwarf_info_cache *dicache) +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) { - if (!dicache) + if (!dbinfo) return; - dwarf_type_map_deinit(&dicache->cant_be_incomplete_array_map); - dwarf_type_map_deinit(&dicache->map); - drgn_dwarf_index_deinit(&dicache->dindex); - free(dicache); + drgn_dwarf_type_map_deinit(&dbinfo->cant_be_incomplete_array_types); + drgn_dwarf_type_map_deinit(&dbinfo->types); + drgn_dwarf_index_deinit(&dbinfo->dindex); + free(dbinfo); } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h new file mode 100644 index 000000000..fdf8dd24e --- /dev/null +++ b/libdrgn/debug_info.h @@ -0,0 +1,94 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0+ + +/** + * @file + * + * Debugging information handling. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_DEBUG_INFO_H +#define DRGN_DEBUG_INFO_H + +#include "drgn.h" +#include "dwarf_index.h" +#include "hash_table.h" + +/** + * @ingroup Internals + * + * @defgroup DebugInfo Debugging information cache + * + * Caching of debugging information. + * + * @ref drgn_debug_info caches debugging information (currently only DWARF). It + * translates the debugging information to types and objects. + * + * @{ + */ + +/** Cached type in a @ref drgn_debug_info. */ +struct drgn_dwarf_type { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + /** + * Whether this is an incomplete array type or a typedef of one. + * + * This is used to work around a GCC bug; see @ref + * drgn_type_from_dwarf_internal(). + */ + bool is_incomplete_array; +}; + +DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type); + +/** Cache of debugging information. */ +struct drgn_debug_info { + /** Index of DWARF debugging information. */ + struct drgn_dwarf_index dindex; + /** + * Cache of parsed types. + * + * The key is the address of the DIE (@c Dwarf_Die::addr). The value is + * a @ref drgn_dwarf_type. + */ + struct drgn_dwarf_type_map types; + /** + * Cache of parsed types which appear to be incomplete array types but + * can't be. + * + * See @ref drgn_type_from_dwarf_internal(). + */ + struct drgn_dwarf_type_map cant_be_incomplete_array_types; + /** Current parsing recursion depth. */ + int depth; + /** Program owning this cache. */ + struct drgn_program *prog; +}; + +/** Create a @ref drgn_debug_info. */ +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + const Dwfl_Callbacks *dwfl_callbacks, + struct drgn_debug_info **ret); + +/** Destroy a @ref drgn_debug_info. */ +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo); + +/** @ref drgn_type_find_fn() that uses debugging information. */ +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret); + +/** @ref drgn_object_find_fn() that uses debugging information. */ +struct drgn_error * +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret); + +/** @} */ + +#endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/dwarf_info_cache.h b/libdrgn/dwarf_info_cache.h deleted file mode 100644 index 7935782a8..000000000 --- a/libdrgn/dwarf_info_cache.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Debugging information cache. - * - * See @ref DWARFInfoCache. - */ - -#ifndef DRGN_DWARF_INFO_CACHE_H -#define DRGN_DWARF_INFO_CACHE_H - -#include "drgn.h" -#include "hash_table.h" - -/** - * @ingroup Internals - * - * @defgroup DWARFInfoCache Debugging information cache - * - * Caching of DWARF debugging information. - * - * @ref drgn_dwarf_info_cache bridges the raw DWARF information indexed by @ref - * drgn_dwarf_index to higher-level type and object finders. - * - * @{ - */ - -/** Cached type in a @ref drgn_dwarf_info_cache. */ -struct drgn_dwarf_type { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - /** - * Whether this is an incomplete array type or a typedef of one. - * - * This is used to work around a GCC bug; see @ref - * drgn_type_from_dwarf_internal(). - */ - bool is_incomplete_array; -}; - -DEFINE_HASH_MAP_TYPE(dwarf_type_map, const void *, struct drgn_dwarf_type); - -struct drgn_dwarf_index; - -/** - * Cache of types and objects from DWARF debugging information. - * - * This is the argument for @ref drgn_dwarf_type_find() and @ref - * drgn_dwarf_object_find(). - */ -struct drgn_dwarf_info_cache { - /** Index of DWARF debugging information. */ - struct drgn_dwarf_index dindex; - /** - * Cache of parsed types. - * - * The key is the address of the DIE (@c Dwarf_Die::addr). The value is - * a @ref drgn_dwarf_type. - */ - struct dwarf_type_map map; - /** - * Cache of parsed types which appear to be incomplete array types but - * can't be. - * - * See @ref drgn_type_from_dwarf_internal(). - */ - struct dwarf_type_map cant_be_incomplete_array_map; - /** Current parsing recursion depth. */ - int depth; - /** Program owning this cache. */ - struct drgn_program *prog; -}; - -/** Create a @ref drgn_dwarf_info_cache. */ -struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_program *prog, - const Dwfl_Callbacks *dwfl_callbacks, - struct drgn_dwarf_info_cache **ret); - -/** Destroy a @ref drgn_dwarf_info_cache. */ -void drgn_dwarf_info_cache_destroy(struct drgn_dwarf_info_cache *dicache); - -/** @ref drgn_type_find_fn() that uses DWARF debugging information. */ -struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret); - -/** @ref drgn_object_find_fn() that uses DWARF debugging information. */ -struct drgn_error * -drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret); - -/** @} */ - -#endif /* DRGN_DWARF_INFO_CACHE_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 872f40446..eb924b2f5 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -15,8 +15,7 @@ #include #include "internal.h" -#include "dwarf_index.h" -#include "dwarf_info_cache.h" +#include "debug_info.h" #include "language.h" #include "linux_kernel.h" #include "memory_reader.h" @@ -100,7 +99,7 @@ void drgn_program_deinit(struct drgn_program *prog) if (prog->core_fd != -1) close(prog->core_fd); - drgn_dwarf_info_cache_destroy(prog->_dicache); + drgn_debug_info_destroy(prog->_dbinfo); } LIBDRGN_PUBLIC struct drgn_error * @@ -529,10 +528,8 @@ static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, { struct drgn_error *err; - if (!prog->_dicache) { + if (!prog->_dbinfo) { const Dwfl_Callbacks *dwfl_callbacks; - struct drgn_dwarf_info_cache *dicache; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) dwfl_callbacks = &drgn_dwfl_callbacks; else if (prog->flags & DRGN_PROGRAM_IS_LIVE) @@ -540,27 +537,28 @@ static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, else dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - err = drgn_dwarf_info_cache_create(prog, dwfl_callbacks, - &dicache); + struct drgn_debug_info *dbinfo; + err = drgn_debug_info_create(prog, dwfl_callbacks, &dbinfo); if (err) return err; err = drgn_program_add_object_finder(prog, - drgn_dwarf_object_find, - dicache); + drgn_debug_info_find_object, + dbinfo); if (err) { - drgn_dwarf_info_cache_destroy(dicache); + drgn_debug_info_destroy(dbinfo); return err; } - err = drgn_program_add_type_finder(prog, drgn_dwarf_type_find, - dicache); + err = drgn_program_add_type_finder(prog, + drgn_debug_info_find_type, + dbinfo); if (err) { drgn_object_index_remove_finder(&prog->oindex); - drgn_dwarf_info_cache_destroy(dicache); + drgn_debug_info_destroy(dbinfo); return err; } - prog->_dicache = dicache; + prog->_dbinfo = dbinfo; } - *ret = &prog->_dicache->dindex; + *ret = &prog->_dbinfo->dindex; return NULL; } @@ -1146,8 +1144,8 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, GElf_Sym elf_sym; if (!module) { - if (prog->_dicache) { - module = dwfl_addrmodule(prog->_dicache->dindex.dwfl, + if (prog->_dbinfo) { + module = dwfl_addrmodule(prog->_dbinfo->dindex.dwfl, address); if (!module) return false; @@ -1245,8 +1243,8 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, .ret = ret, }; - if (prog->_dicache && - dwfl_getmodules(prog->_dicache->dindex.dwfl, find_symbol_by_name_cb, + if (prog->_dbinfo && + dwfl_getmodules(prog->_dbinfo->dindex.dwfl, find_symbol_by_name_cb, &arg, 0)) return arg.err; return drgn_error_format(DRGN_ERROR_LOOKUP, diff --git a/libdrgn/program.h b/libdrgn/program.h index 01d6c1eb7..202a73586 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -118,7 +118,7 @@ struct drgn_program { * Debugging information. */ struct drgn_object_index oindex; - struct drgn_dwarf_info_cache *_dicache; + struct drgn_debug_info *_dbinfo; /* * Program information. From f83bb7c71bb8f01e90e2d29b57c0ce4f2022aad6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Sep 2020 17:42:53 -0700 Subject: [PATCH 51/56] libdrgn: move debugging information tracking into drgn_debug_info Debugging information tracking is currently in two places: drgn_program finds debugging information, and drgn_dwarf_index stores it. Both of these responsibilities make more sense as part of drgn_debug_info, so let's move them there. This prepares us to track extra debugging information that isn't pertinent to indexing. This also reworks a couple of details of loading debugging information: - drgn_dwarf_module and drgn_dwfl_module_userdata are consolidated into a single structure, drgn_debug_info_module. - The first pass of DWARF indexing now happens in parallel with reading compilation units (by using OpenMP tasks). Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 1035 ++++++++++++++++++++++++++++- libdrgn/debug_info.h | 190 +++++- libdrgn/dwarf_index.c | 1403 +++++----------------------------------- libdrgn/dwarf_index.h | 271 ++------ libdrgn/linux_kernel.c | 240 ++++--- libdrgn/linux_kernel.h | 7 +- libdrgn/program.c | 131 +--- libdrgn/program.h | 3 +- libdrgn/stack_trace.c | 15 +- 9 files changed, 1557 insertions(+), 1738 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 6b8690437..cc0265b5a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1,20 +1,1017 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include +#include #include -#include +#include +#include +#include +#include #include +#include #include "internal.h" #include "debug_info.h" #include "hash_table.h" +#include "language.h" +#include "linux_kernel.h" #include "object.h" -#include "object_index.h" #include "program.h" #include "type.h" #include "vector.h" +DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) + +static inline struct hash_pair +drgn_debug_info_module_hash(const struct drgn_debug_info_module_key *key) +{ + size_t hash = cityhash_size_t(key->build_id, key->build_id_len); + hash = hash_combine(hash, key->start); + hash = hash_combine(hash, key->end); + return hash_pair_from_avalanching_hash(hash); +} +static inline bool +drgn_debug_info_module_eq(const struct drgn_debug_info_module_key *a, + const struct drgn_debug_info_module_key *b) +{ + return (a->build_id_len == b->build_id_len && + memcmp(a->build_id, b->build_id, a->build_id_len) == 0 && + a->start == b->start && a->end == b->end); +} +DEFINE_HASH_TABLE_FUNCTIONS(drgn_debug_info_module_table, + drgn_debug_info_module_hash, + drgn_debug_info_module_eq) + +DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_hash, c_string_eq) + +/** + * @c Dwfl_Callbacks::find_elf() implementation. + * + * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c + * Elf handle, which we need for a couple of reasons: + * + * - We usually already have the @c Elf handle open in order to identify the + * file. + * - For kernel modules, we set the section addresses in the @c Elf handle + * ourselves instead of using @c Dwfl_Callbacks::section_address(). + * + * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC + * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN + * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but + * @ref dwfl_report_elf() bypasses this hack. + * + * So, we're stuck using @c dwfl_report_module() and this dummy callback. + */ +static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, + const char *name, Dwarf_Addr base, + char **file_name, Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + /* + * libdwfl consumes the returned path, file descriptor, and ELF handle, + * so clear the fields. + */ + *file_name = module->path; + int fd = module->fd; + *elfp = module->elf; + module->path = NULL; + module->fd = -1; + module->elf = NULL; + return fd; +} + +/* + * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls + * back to dwfl_linux_proc_find_elf() otherwise. + */ +static int drgn_dwfl_linux_proc_find_elf(Dwfl_Module *dwfl_module, + void **userdatap, const char *name, + Dwarf_Addr base, char **file_name, + Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + if (module->elf) { + return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); + } + return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); +} + +/* + * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls + * back to dwfl_build_id_find_elf() otherwise. + */ +static int drgn_dwfl_build_id_find_elf(Dwfl_Module *dwfl_module, + void **userdatap, const char *name, + Dwarf_Addr base, char **file_name, + Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + if (module->elf) { + return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); + } + return dwfl_build_id_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); +} + +/** + * @c Dwfl_Callbacks::section_address() implementation. + * + * We set the section header @c sh_addr in memory instead of using this, but + * libdwfl requires the callback pointer to be non-@c NULL. It will be called + * for any sections that still have a zero @c sh_addr, meaning they are not + * present in memory. + */ +static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, + const char *name, Dwarf_Addr base, + const char *secname, Elf32_Word shndx, + const GElf_Shdr *shdr, Dwarf_Addr *addr) +{ + *addr = -1; + return DWARF_CB_OK; +} + +static const Dwfl_Callbacks drgn_dwfl_callbacks = { + .find_elf = drgn_dwfl_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks = { + .find_elf = drgn_dwfl_linux_proc_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { + .find_elf = drgn_dwfl_build_id_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static void +drgn_debug_info_module_destroy(struct drgn_debug_info_module *module) +{ + if (module) { + drgn_error_destroy(module->err); + elf_end(module->elf); + if (module->fd != -1) + close(module->fd); + free(module->path); + free(module->name); + free(module); + } +} + +static void +drgn_debug_info_module_finish_indexing(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module) +{ + module->state = DRGN_DEBUG_INFO_MODULE_INDEXED; + if (module->name) { + int ret = c_string_set_insert(&dbinfo->module_names, + (const char **)&module->name, + NULL); + /* drgn_debug_info_update_index() should've reserved enough. */ + assert(ret != -1); + } +} + +struct drgn_dwfl_module_removed_arg { + struct drgn_debug_info *dbinfo; + bool finish_indexing; + bool free_all; +}; + +static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, + const char *name, Dwarf_Addr base, + void *_arg) +{ + struct drgn_dwfl_module_removed_arg *arg = _arg; + /* + * userdatap is actually a void ** like for the other libdwfl callbacks, + * but dwfl_report_end() has the wrong signature for the removed + * callback. + */ + struct drgn_debug_info_module *module = *(void **)userdatap; + if (arg->finish_indexing && module && + module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) + drgn_debug_info_module_finish_indexing(arg->dbinfo, module); + if (arg->free_all || !module || + module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { + drgn_debug_info_module_destroy(module); + } else { + /* + * The module was already indexed. Report it again so libdwfl + * doesn't remove it. + */ + Dwarf_Addr end; + dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, + NULL, NULL); + dwfl_report_module(arg->dbinfo->dwfl, name, base, end); + } + return DWARF_CB_OK; +} + +static void drgn_debug_info_free_modules(struct drgn_debug_info *dbinfo, + bool finish_indexing, bool free_all) +{ + for (struct drgn_debug_info_module_table_iterator it = + drgn_debug_info_module_table_first(&dbinfo->modules); it.entry; ) { + struct drgn_debug_info_module *module = *it.entry; + struct drgn_debug_info_module **nextp = it.entry; + do { + struct drgn_debug_info_module *next = module->next; + if (finish_indexing && + module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) { + drgn_debug_info_module_finish_indexing(dbinfo, + module); + } + if (free_all || + module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { + if (module == *nextp) { + if (nextp == it.entry && !next) { + it = drgn_debug_info_module_table_delete_iterator(&dbinfo->modules, + it); + } else { + if (!next) + it = drgn_debug_info_module_table_next(it); + *nextp = next; + } + } + void **userdatap; + dwfl_module_info(module->dwfl_module, + &userdatap, NULL, NULL, NULL, + NULL, NULL, NULL); + *userdatap = NULL; + drgn_debug_info_module_destroy(module); + } else { + if (!next) + it = drgn_debug_info_module_table_next(it); + nextp = &module->next; + } + module = next; + } while (module); + } + + dwfl_report_begin(dbinfo->dwfl); + struct drgn_dwfl_module_removed_arg arg = { + .dbinfo = dbinfo, + .finish_indexing = finish_indexing, + .free_all = free_all, + }; + dwfl_report_end(dbinfo->dwfl, drgn_dwfl_module_removed, &arg); +} + +struct drgn_error * +drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, + const char *name, const char *message, + struct drgn_error *err) +{ + if (err && err->code == DRGN_ERROR_NO_MEMORY) { + /* Always fail hard if we're out of memory. */ + goto err; + } + if (load->num_errors == 0 && + !string_builder_append(&load->errors, + "could not get debugging information for:")) + goto err; + if (load->num_errors < load->max_errors) { + if (!string_builder_line_break(&load->errors)) + goto err; + if (name && !string_builder_append(&load->errors, name)) + goto err; + if (name && (message || err) && + !string_builder_append(&load->errors, " (")) + goto err; + if (message && !string_builder_append(&load->errors, message)) + goto err; + if (message && err && + !string_builder_append(&load->errors, ": ")) + goto err; + if (err && !string_builder_append_error(&load->errors, err)) + goto err; + if (name && (message || err) && + !string_builder_appendc(&load->errors, ')')) + goto err; + } + load->num_errors++; + drgn_error_destroy(err); + return NULL; + +err: + drgn_error_destroy(err); + return &drgn_enomem; +} + +static struct drgn_error * +drgn_debug_info_report_module(struct drgn_debug_info_load_state *load, + const void *build_id, size_t build_id_len, + uint64_t start, uint64_t end, const char *name, + Dwfl_Module *dwfl_module, const char *path, + int fd, Elf *elf, bool *new_ret) +{ + struct drgn_debug_info *dbinfo = load->dbinfo; + struct drgn_error *err; + char *path_key = NULL; + + if (new_ret) + *new_ret = false; + + struct hash_pair hp; + struct drgn_debug_info_module_table_iterator it; + if (build_id_len) { + struct drgn_debug_info_module_key key = { + .build_id = build_id, + .build_id_len = build_id_len, + .start = start, + .end = end, + }; + hp = drgn_debug_info_module_hash(&key); + it = drgn_debug_info_module_table_search_hashed(&dbinfo->modules, + &key, hp); + if (it.entry && + (*it.entry)->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { + /* We've already indexed this module. */ + err = NULL; + goto free; + } + } + + if (!dwfl_module) { + path_key = realpath(path, NULL); + if (!path_key) { + path_key = strdup(path); + if (!path_key) { + err = &drgn_enomem; + goto free; + } + } + + dwfl_module = dwfl_report_module(dbinfo->dwfl, path_key, start, + end); + if (!dwfl_module) { + err = drgn_error_libdwfl(); + goto free; + } + } + + void **userdatap; + dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, + NULL); + if (*userdatap) { + /* We've already reported this file at this offset. */ + err = NULL; + goto free; + } + if (new_ret) + *new_ret = true; + + struct drgn_debug_info_module *module = malloc(sizeof(*module)); + if (!module) { + err = &drgn_enomem; + goto free; + } + module->state = DRGN_DEBUG_INFO_MODULE_NEW; + module->build_id = build_id; + module->build_id_len = build_id_len; + module->start = start; + module->end = end; + if (name) { + module->name = strdup(name); + if (!module->name) { + err = &drgn_enomem; + free(module); + goto free; + } + } else { + module->name = NULL; + } + module->dwfl_module = dwfl_module; + module->path = path_key; + module->fd = fd; + module->elf = elf; + module->err = NULL; + module->next = NULL; + + /* path_key, fd and elf are owned by the module now. */ + + if (!drgn_debug_info_module_vector_append(&load->new_modules, + &module)) { + drgn_debug_info_module_destroy(module); + return &drgn_enomem; + } + if (build_id_len) { + if (it.entry) { + /* + * The first module with this build ID is in + * new_modules, so insert it after in the list, not + * before. + */ + module->next = (*it.entry)->next; + (*it.entry)->next = module; + } else if (drgn_debug_info_module_table_insert_searched(&dbinfo->modules, + &module, + hp, + NULL) < 0) { + load->new_modules.size--; + drgn_debug_info_module_destroy(module); + return &drgn_enomem; + } + } + *userdatap = module; + return NULL; + +free: + elf_end(elf); + if (fd != -1) + close(fd); + free(path_key); + return err; +} + +struct drgn_error * +drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, + const char *path, int fd, Elf *elf, uint64_t start, + uint64_t end, const char *name, bool *new_ret) +{ + + struct drgn_error *err; + const void *build_id; + ssize_t build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); + if (build_id_len < 0) { + err = drgn_debug_info_report_error(load, path, NULL, + drgn_error_libdwfl()); + close(fd); + elf_end(elf); + return err; + } else if (build_id_len == 0) { + build_id = NULL; + } + return drgn_debug_info_report_module(load, build_id, build_id_len, + start, end, name, NULL, path, fd, + elf, new_ret); +} + +static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, + void **userdatap, + const char *name, Dwarf_Addr base, + void *arg) +{ + struct drgn_debug_info_load_state *load = arg; + struct drgn_error *err; + + if (*userdatap) { + /* + * This was either reported from drgn_debug_info_report_elf() or + * already indexed. + */ + return DWARF_CB_OK; + } + + const unsigned char *build_id; + GElf_Addr build_id_vaddr; + int build_id_len = dwfl_module_build_id(dwfl_module, &build_id, + &build_id_vaddr); + if (build_id_len < 0) { + err = drgn_debug_info_report_error(load, name, NULL, + drgn_error_libdwfl()); + if (err) + goto err; + } else if (build_id_len == 0) { + build_id = NULL; + } + Dwarf_Addr end; + dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); + err = drgn_debug_info_report_module(load, build_id, build_id_len, base, + end, NULL, dwfl_module, name, -1, + NULL, NULL); + if (err) + goto err; + return DWARF_CB_OK; + +err: + drgn_error_destroy(err); + return DWARF_CB_ABORT; +} + +static struct drgn_error * +userspace_report_debug_info(struct drgn_debug_info_load_state *load) +{ + struct drgn_error *err; + + for (size_t i = 0; i < load->num_paths; i++) { + int fd; + Elf *elf; + err = open_elf_file(load->paths[i], &fd, &elf); + if (err) { + err = drgn_debug_info_report_error(load, load->paths[i], + NULL, err); + if (err) + return err; + continue; + } + /* + * We haven't implemented a way to get the load address for + * anything reported here, so for now we report it as unloaded. + */ + err = drgn_debug_info_report_elf(load, load->paths[i], fd, elf, + 0, 0, NULL, NULL); + if (err) + return err; + } + + if (load->load_default) { + Dwfl *dwfl = load->dbinfo->dwfl; + struct drgn_program *prog = load->dbinfo->prog; + if (prog->flags & DRGN_PROGRAM_IS_LIVE) { + int ret = dwfl_linux_proc_report(dwfl, prog->pid); + if (ret == -1) { + return drgn_error_libdwfl(); + } else if (ret) { + return drgn_error_create_os("dwfl_linux_proc_report", + ret, NULL); + } + } else if (dwfl_core_file_report(dwfl, prog->core, + NULL) == -1) { + return drgn_error_libdwfl(); + } + } + return NULL; +} + +static struct drgn_error *apply_relocation(Elf_Data *data, uint64_t r_offset, + uint32_t r_type, int64_t r_addend, + uint64_t st_value) +{ + char *p; + + p = (char *)data->d_buf + r_offset; + switch (r_type) { + case R_X86_64_NONE: + break; + case R_X86_64_32: + if (r_offset > SIZE_MAX - sizeof(uint32_t) || + r_offset + sizeof(uint32_t) > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation offset"); + } + *(uint32_t *)p = st_value + r_addend; + break; + case R_X86_64_64: + if (r_offset > SIZE_MAX - sizeof(uint64_t) || + r_offset + sizeof(uint64_t) > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation offset"); + } + *(uint64_t *)p = st_value + r_addend; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unimplemented relocation type %" PRIu32, + r_type); + } + return NULL; +} + +static struct drgn_error *relocate_section(Elf_Scn *scn, Elf_Scn *rela_scn, + Elf_Scn *symtab_scn, + uint64_t *sh_addrs, size_t shdrnum) +{ + struct drgn_error *err; + Elf_Data *data, *rela_data, *symtab_data; + const Elf64_Rela *relocs; + const Elf64_Sym *syms; + size_t num_relocs, num_syms; + size_t i; + GElf_Shdr *shdr, shdr_mem; + + err = read_elf_section(scn, &data); + if (err) + return err; + err = read_elf_section(rela_scn, &rela_data); + if (err) + return err; + err = read_elf_section(symtab_scn, &symtab_data); + if (err) + return err; + + relocs = (Elf64_Rela *)rela_data->d_buf; + num_relocs = rela_data->d_size / sizeof(Elf64_Rela); + syms = (Elf64_Sym *)symtab_data->d_buf; + num_syms = symtab_data->d_size / sizeof(Elf64_Sym); + + for (i = 0; i < num_relocs; i++) { + const Elf64_Rela *reloc = &relocs[i]; + uint32_t r_sym, r_type; + uint16_t st_shndx; + uint64_t sh_addr; + + r_sym = ELF64_R_SYM(reloc->r_info); + r_type = ELF64_R_TYPE(reloc->r_info); + + if (r_sym >= num_syms) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation symbol"); + } + st_shndx = syms[r_sym].st_shndx; + if (st_shndx == 0) { + sh_addr = 0; + } else if (st_shndx < shdrnum) { + sh_addr = sh_addrs[st_shndx - 1]; + } else { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid symbol section index"); + } + err = apply_relocation(data, reloc->r_offset, r_type, + reloc->r_addend, + sh_addr + syms[r_sym].st_value); + if (err) + return err; + } + + /* + * Mark the relocation section as empty so that libdwfl doesn't try to + * apply it again. + */ + shdr = gelf_getshdr(rela_scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + shdr->sh_size = 0; + if (!gelf_update_shdr(rela_scn, shdr)) + return drgn_error_libelf(); + rela_data->d_size = 0; + return NULL; +} + +/* + * Before the debugging information in a relocatable ELF file (e.g., Linux + * kernel module) can be used, it must have ELF relocations applied. This is + * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a + * much faster implementation. It is only implemented for x86-64; for other + * architectures, we can fall back to libdwfl. + */ +static struct drgn_error *apply_elf_relocations(Elf *elf) +{ + struct drgn_error *err; + GElf_Ehdr ehdr_mem, *ehdr; + size_t shdrnum, shstrndx; + uint64_t *sh_addrs; + Elf_Scn *scn; + + ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + + if (ehdr->e_type != ET_REL || + ehdr->e_machine != EM_X86_64 || + ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + ehdr->e_ident[EI_DATA] != + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + ELFDATA2LSB : ELFDATA2MSB)) { + /* Unsupported; fall back to libdwfl. */ + return NULL; + } + + if (elf_getshdrnum(elf, &shdrnum)) + return drgn_error_libelf(); + if (shdrnum > 1) { + sh_addrs = calloc(shdrnum - 1, sizeof(*sh_addrs)); + if (!sh_addrs) + return &drgn_enomem; + + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + size_t ndx; + + ndx = elf_ndxscn(scn); + if (ndx > 0 && ndx < shdrnum) { + GElf_Shdr *shdr, shdr_mem; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); + goto out; + } + sh_addrs[ndx - 1] = shdr->sh_addr; + } + } + } else { + sh_addrs = NULL; + } + + if (elf_getshdrstrndx(elf, &shstrndx)) { + err = drgn_error_libelf(); + goto out; + } + + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + const char *scnname; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); + goto out; + } + + if (shdr->sh_type != SHT_RELA) + continue; + + scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + continue; + + if (strstartswith(scnname, ".rela.debug_")) { + Elf_Scn *info_scn, *link_scn; + + info_scn = elf_getscn(elf, shdr->sh_info); + if (!info_scn) { + err = drgn_error_libelf(); + goto out; + } + + link_scn = elf_getscn(elf, shdr->sh_link); + if (!link_scn) { + err = drgn_error_libelf(); + goto out; + } + + err = relocate_section(info_scn, scn, link_scn, + sh_addrs, shdrnum); + if (err) + goto out; + } + } +out: + free(sh_addrs); + return NULL; +} + +static struct drgn_error * +drgn_get_debug_sections(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + if (module->elf) { + err = apply_elf_relocations(module->elf); + if (err) + return err; + } + + /* + * Note: not dwfl_module_getelf(), because then libdwfl applies + * ELF relocations to all sections, not just debug sections. + */ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + Elf *elf = dwarf_getelf(dwarf); + if (!elf) + return drgn_error_libdw(); + + module->bswap = (elf_getident(elf, NULL)[EI_DATA] != + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + ELFDATA2LSB : ELFDATA2MSB)); + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + module->debug_info = NULL; + module->debug_abbrev = NULL; + module->debug_str = NULL; + module->debug_line = NULL; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP)) + continue; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + continue; + + Elf_Data **sectionp; + if (!module->debug_info && strcmp(scnname, ".debug_info") == 0) + sectionp = &module->debug_info; + else if (!module->debug_abbrev && strcmp(scnname, ".debug_abbrev") == 0) + sectionp = &module->debug_abbrev; + else if (!module->debug_str && strcmp(scnname, ".debug_str") == 0) + sectionp = &module->debug_str; + else if (!module->debug_line && strcmp(scnname, ".debug_line") == 0) + sectionp = &module->debug_line; + else + continue; + err = read_elf_section(scn, sectionp); + if (err) + return err; + } + + /* + * Truncate any extraneous bytes so that we can assume that a pointer + * within .debug_str is always null-terminated. + */ + if (module->debug_str) { + const char *buf = module->debug_str->d_buf; + const char *nul = memrchr(buf, '\0', module->debug_str->d_size); + if (nul) + module->debug_str->d_size = nul - buf + 1; + else + module->debug_str->d_size = 0; + + } + return NULL; +} + +static struct drgn_error * +drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, + struct drgn_dwarf_index_update_state *dindex_state, + struct drgn_debug_info_module *head) +{ + struct drgn_error *err; + struct drgn_debug_info_module *module; + for (module = head; module; module = module->next) { + err = drgn_get_debug_sections(module); + if (err) { + module->err = err; + continue; + } + if (module->debug_info && module->debug_abbrev) { + module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; + drgn_dwarf_index_read_module(dindex_state, module); + return NULL; + } + } + /* + * We checked all of the files and didn't find debugging information. + * Report why for each one. + * + * (If we did find debugging information, we discard errors on the + * unused files.) + */ + err = NULL; + #pragma omp critical(drgn_debug_info_read_module_error) + for (module = head; module; module = module->next) { + const char *name = + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, + NULL, NULL, NULL, NULL); + if (module->err) { + err = drgn_debug_info_report_error(load, name, NULL, + module->err); + module->err = NULL; + } else { + err = drgn_debug_info_report_error(load, name, + "no debugging information", + NULL); + } + if (err) + break; + } + return err; +} + +static struct drgn_error * +drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) +{ + if (!load->new_modules.size) + return NULL; + struct drgn_debug_info *dbinfo = load->dbinfo; + if (!c_string_set_reserve(&dbinfo->module_names, + c_string_set_size(&dbinfo->module_names) + + load->new_modules.size)) + return &drgn_enomem; + struct drgn_dwarf_index_update_state dindex_state; + drgn_dwarf_index_update_begin(&dindex_state, &dbinfo->dindex); + /* + * In OpenMP 5.0, this could be "#pragma omp parallel master taskloop" + * (added in GCC 9 and Clang 10). + */ + #pragma omp parallel + #pragma omp master + #pragma omp taskloop + for (size_t i = 0; i < load->new_modules.size; i++) { + if (drgn_dwarf_index_update_cancelled(&dindex_state)) + continue; + struct drgn_error *module_err = + drgn_debug_info_read_module(load, &dindex_state, + load->new_modules.data[i]); + if (module_err) + drgn_dwarf_index_update_cancel(&dindex_state, module_err); + } + struct drgn_error *err = drgn_dwarf_index_update_end(&dindex_state); + if (err) + return err; + drgn_debug_info_free_modules(dbinfo, true, false); + return NULL; +} + +struct drgn_error * +drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) +{ + struct drgn_debug_info *dbinfo = load->dbinfo; + dwfl_report_end(dbinfo->dwfl, NULL, NULL); + struct drgn_error *err = drgn_debug_info_update_index(load); + dwfl_report_begin_add(dbinfo->dwfl); + if (err) + return err; + load->new_modules.size = 0; + return NULL; +} + +static struct drgn_error * +drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) +{ + if (load->num_errors > load->max_errors && + (!string_builder_line_break(&load->errors) || + !string_builder_appendf(&load->errors, "... %u more", + load->num_errors - load->max_errors))) { + free(load->errors.str); + return &drgn_enomem; + } + if (load->num_errors) { + return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, + &load->errors); + } else { + return NULL; + } +} + +struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, + const char **paths, size_t n, + bool load_default, bool load_main) +{ + struct drgn_program *prog = dbinfo->prog; + struct drgn_error *err; + + if (load_default) + load_main = true; + + const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + struct drgn_debug_info_load_state load = { + .dbinfo = dbinfo, + .paths = paths, + .num_paths = n, + .load_default = load_default, + .load_main = load_main, + .new_modules = VECTOR_INIT, + .max_errors = max_errors ? atoi(max_errors) : 5, + }; + dwfl_report_begin_add(dbinfo->dwfl); + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + err = linux_kernel_report_debug_info(&load); + else + err = userspace_report_debug_info(&load); + dwfl_report_end(dbinfo->dwfl, NULL, NULL); + if (err) + goto err; + + /* + * userspace_report_debug_info() reports the main debugging information + * directly with libdwfl, so we need to report it to dbinfo. + */ + if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && + dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, + &load, 0)) { + err = &drgn_enomem; + goto err; + } + + err = drgn_debug_info_update_index(&load); + if (err) + goto err; + + /* + * If this fails, it's too late to roll back. This can only fail with + * enomem, so it's not a big deal. + */ + err = drgn_debug_info_report_finalize_errors(&load); +out: + drgn_debug_info_module_vector_deinit(&load.new_modules); + return err; + +err: + drgn_debug_info_free_modules(dbinfo, false, false); + free(load.errors.str); + goto out; +} + +bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, + const char *name) +{ + return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; +} + DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, hash_pair_ptr_type, hash_table_scalar_eq) @@ -1504,26 +2501,31 @@ drgn_debug_info_find_object(const char *name, size_t name_len, return &drgn_not_found; } -struct drgn_error * -drgn_debug_info_create(struct drgn_program *prog, - const Dwfl_Callbacks *dwfl_callbacks, - struct drgn_debug_info **ret) +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + struct drgn_debug_info **ret) { - struct drgn_error *err; - struct drgn_debug_info *dbinfo; - - dbinfo = malloc(sizeof(*dbinfo)); + struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); if (!dbinfo) return &drgn_enomem; - err = drgn_dwarf_index_init(&dbinfo->dindex, dwfl_callbacks); - if (err) { + dbinfo->prog = prog; + const Dwfl_Callbacks *dwfl_callbacks; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + dwfl_callbacks = &drgn_dwfl_callbacks; + else if (prog->flags & DRGN_PROGRAM_IS_LIVE) + dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; + else + dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; + dbinfo->dwfl = dwfl_begin(dwfl_callbacks); + if (!dbinfo->dwfl) { free(dbinfo); - return err; + return drgn_error_libdwfl(); } + drgn_debug_info_module_table_init(&dbinfo->modules); + c_string_set_init(&dbinfo->module_names); + drgn_dwarf_index_init(&dbinfo->dindex); drgn_dwarf_type_map_init(&dbinfo->types); drgn_dwarf_type_map_init(&dbinfo->cant_be_incomplete_array_types); dbinfo->depth = 0; - dbinfo->prog = prog; *ret = dbinfo; return NULL; } @@ -1535,5 +2537,10 @@ void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) drgn_dwarf_type_map_deinit(&dbinfo->cant_be_incomplete_array_types); drgn_dwarf_type_map_deinit(&dbinfo->types); drgn_dwarf_index_deinit(&dbinfo->dindex); + c_string_set_deinit(&dbinfo->module_names); + drgn_debug_info_free_modules(dbinfo, false, true); + assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); + drgn_debug_info_module_table_deinit(&dbinfo->modules); + dwfl_end(dbinfo->dwfl); free(dbinfo); } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index fdf8dd24e..bca4be64b 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,9 +12,17 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H +#include +#include +#include +#include +#include + #include "drgn.h" #include "dwarf_index.h" #include "hash_table.h" +#include "string_builder.h" +#include "vector.h" /** * @ingroup Internals @@ -29,6 +37,86 @@ * @{ */ +/** State of a @ref drgn_debug_info_module. */ +enum drgn_debug_info_module_state { + /** Reported but not indexed. */ + DRGN_DEBUG_INFO_MODULE_NEW, + /** Reported and will be indexed on success. */ + DRGN_DEBUG_INFO_MODULE_INDEXING, + /** Indexed. Must not be freed until @ref drgn_debug_info_destroy(). */ + DRGN_DEBUG_INFO_MODULE_INDEXED, +} __attribute__((packed)); + +/** + * A module reported to a @ref drgn_debug_info. + * + * Conceptually, a module is an ELF file loaded at a specific address range (or + * not loaded). + * + * Files are identified by canonical path and, if present, build ID. Each (path, + * address range) is uniquely represented by a @ref drgn_debug_info_module. + */ +struct drgn_debug_info_module { + /** @c NULL if the module does not have a build ID. */ + const void *build_id; + /** Zero if the module does not have a build ID. */ + size_t build_id_len; + /** Load address range, or both 0 if not loaded. */ + uint64_t start, end; + /** Optional module name allocated with @c malloc(). */ + char *name; + + Dwfl_Module *dwfl_module; + Elf_Data *debug_info; + Elf_Data *debug_abbrev; + Elf_Data *debug_str; + Elf_Data *debug_line; + + /* + * path, elf, and fd are used when an ELF file was reported with + * drgn_debug_info_report_elf() so we can report the file to libdwfl + * later. They are not valid after loading. + */ + char *path; + Elf *elf; + int fd; + enum drgn_debug_info_module_state state; + bool bswap; + /** Error while loading. */ + struct drgn_error *err; + /** + * Next module with same build ID and address range. + * + * There may be multiple files with the same build ID (e.g., a stripped + * binary and its corresponding separate debug info file). While + * loading, all files with the same build ID and address range are + * linked in a list. Only one is indexed; the rest are destroyed. + */ + struct drgn_debug_info_module *next; +}; + +struct drgn_debug_info_module_key { + const void *build_id; + size_t build_id_len; + uint64_t start, end; +}; + +static inline struct drgn_debug_info_module_key +drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) +{ + return (struct drgn_debug_info_module_key){ + .build_id = (*entry)->build_id, + .build_id_len = (*entry)->build_id_len, + .start = (*entry)->start, + .end = (*entry)->end, + }; +} +DEFINE_HASH_TABLE_TYPE(drgn_debug_info_module_table, + struct drgn_debug_info_module *, + drgn_debug_info_module_key) + +DEFINE_HASH_SET_TYPE(c_string_set, const char *) + /** Cached type in a @ref drgn_debug_info. */ struct drgn_dwarf_type { struct drgn_type *type; @@ -46,8 +134,23 @@ DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type); /** Cache of debugging information. */ struct drgn_debug_info { + /** Program owning this cache. */ + struct drgn_program *prog; + + /** DWARF frontend library handle. */ + Dwfl *dwfl; + /** Modules keyed by build ID and address range. */ + struct drgn_debug_info_module_table modules; + /** + * Names of indexed modules. + * + * The entries in this set are @ref drgn_dwarf_module::name, so they + * should not be freed. + */ + struct c_string_set module_names; /** Index of DWARF debugging information. */ struct drgn_dwarf_index dindex; + /** * Cache of parsed types. * @@ -64,18 +167,99 @@ struct drgn_debug_info { struct drgn_dwarf_type_map cant_be_incomplete_array_types; /** Current parsing recursion depth. */ int depth; - /** Program owning this cache. */ - struct drgn_program *prog; }; /** Create a @ref drgn_debug_info. */ struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, - const Dwfl_Callbacks *dwfl_callbacks, struct drgn_debug_info **ret); /** Destroy a @ref drgn_debug_info. */ void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo); +DEFINE_VECTOR_TYPE(drgn_debug_info_module_vector, + struct drgn_debug_info_module *) + +/** State tracked while loading debugging information. */ +struct drgn_debug_info_load_state { + struct drgn_debug_info * const dbinfo; + const char ** const paths; + const size_t num_paths; + const bool load_default; + const bool load_main; + /** Newly added modules to be indexed. */ + struct drgn_debug_info_module_vector new_modules; + /** Formatted errors reported by @ref drgn_debug_info_report_error(). */ + struct string_builder errors; + /** Number of errors reported by @ref drgn_debug_info_report_error(). */ + unsigned int num_errors; + /** Maximum number of errors to report before truncating. */ + unsigned int max_errors; +}; + +/** + * Report a non-fatal error while loading debugging information. + * + * The error will be included in a @ref DRGN_ERROR_MISSING_DEBUG_INFO error + * returned by @ref drgn_debug_info_load(). + * + * @param[name] name An optional module name to prefix to the error message. + * @param[message] message An optional message with additional context to prefix + * to the error message. + * @param[err] err The error to report. This may be @c NULL if @p name and @p + * message provide sufficient information. This is destroyed on either success + * or failure. + * @return @c NULL on success, @ref drgn_enomem if the error could not be + * reported. + */ +struct drgn_error * +drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, + const char *name, const char *message, + struct drgn_error *err); + +/** + * Report a module to a @ref drgn_debug_info from an ELF file. + * + * This takes ownership of @p fd and @p elf on either success or failure. They + * should not be used (including closed or freed) after this returns. + * + * @param[in] path The path to the file. + * @param[in] fd A file descriptor referring to the file. + * @param[in] elf The Elf handle of the file. + * @param[in] start The (inclusive) start address of the loaded file, or 0 if + * the file is not loaded. + * @param[in] end The (exclusive) end address of the loaded file, or 0 if the + * file is not loaded. + * @param[in] name An optional name for the module. This is only used for @ref + * drgn_debug_info_is_indexed(). + * @param[out] new_ret Whether the module was newly created and reported. This + * is @c false if a module with the same build ID and address range was already + * loaded or a file with the same path and address range was already reported. + */ +struct drgn_error * +drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, + const char *path, int fd, Elf *elf, uint64_t start, + uint64_t end, const char *name, bool *new_ret); + +/** Index new debugging information and continue reporting. */ +struct drgn_error * +drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load); + +/** + * Load debugging information. + * + * @sa drgn_program_load_debug_info + */ +struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, + const char **paths, size_t n, + bool load_default, bool load_main); + +/** + * Return whether a @ref drgn_debug_info has indexed a module with the given + * name. + */ +bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, + const char *name); + /** @ref drgn_type_find_fn() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, const char *name, size_t name_len, diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 501236c5a..adf0a5a05 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -3,161 +3,16 @@ #include #include -#include -#include -#include -#include #include #include -#include #include #include -#include -#include -#include #include "internal.h" +#include "debug_info.h" #include "dwarf_index.h" #include "mread.h" #include "siphash.h" -#include "string_builder.h" - -DEFINE_VECTOR_FUNCTIONS(dwfl_module_vector) -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_module_vector) - -static inline struct hash_pair -drgn_dwarf_module_hash(const struct drgn_dwarf_module_key *key) -{ - size_t hash; - - hash = cityhash_size_t(key->build_id, key->build_id_len); - hash = hash_combine(hash, key->start); - hash = hash_combine(hash, key->end); - return hash_pair_from_avalanching_hash(hash); -} -static inline bool drgn_dwarf_module_eq(const struct drgn_dwarf_module_key *a, - const struct drgn_dwarf_module_key *b) -{ - return (a->build_id_len == b->build_id_len && - (a->build_id_len == 0 || - memcmp(a->build_id, b->build_id, a->build_id_len) == 0) && - a->start == b->start && a->end == b->end); -} -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_module_table, drgn_dwarf_module_hash, - drgn_dwarf_module_eq) - -DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_hash, c_string_eq) - -/** - * @c Dwfl_Callbacks::find_elf() implementation. - * - * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c - * Elf handle, which we need for a couple of reasons: - * - * - We usually already have the @c Elf handle open in order to identify the - * file. - * - For kernel modules, we set the section addresses in the @c Elf handle - * ourselves instead of using @c Dwfl_Callbacks::section_address(). - * - * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC - * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN - * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but - * @ref dwfl_report_elf() bypasses this hack. - * - * So, we're stuck using @c dwfl_report_module() and this dummy callback. - */ -static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, - const char *name, Dwarf_Addr base, - char **file_name, Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - int fd; - - /* - * libdwfl consumes the returned path, file descriptor, and ELF handle, - * so clear the fields. - */ - *file_name = userdata->path; - fd = userdata->fd; - *elfp = userdata->elf; - userdata->path = NULL; - userdata->fd = -1; - userdata->elf = NULL; - return fd; -} - -/* - * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls - * back to dwfl_linux_proc_find_elf() otherwise. - */ -static int drgn_dwfl_linux_proc_find_elf(Dwfl_Module *dwfl_module, - void **userdatap, const char *name, - Dwarf_Addr base, char **file_name, - Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - - if (userdata->elf) { - return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); - } - return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); -} - -/* - * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls - * back to dwfl_build_id_find_elf() otherwise. - */ -static int drgn_dwfl_build_id_find_elf(Dwfl_Module *dwfl_module, - void **userdatap, const char *name, - Dwarf_Addr base, char **file_name, - Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - - if (userdata->elf) { - return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); - } - return dwfl_build_id_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); -} - -/** - * @c Dwfl_Callbacks::section_address() implementation. - * - * We set the section header @c sh_addr in memory instead of using this, but - * libdwfl requires the callback pointer to be non-@c NULL. It will be called - * for any sections that still have a zero @c sh_addr, meaning they are not - * present in memory. - */ -static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - const char *secname, Elf32_Word shndx, - const GElf_Shdr *shdr, Dwarf_Addr *addr) -{ - *addr = -1; - return DWARF_CB_OK; -} - -const Dwfl_Callbacks drgn_dwfl_callbacks = { - .find_elf = drgn_dwfl_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; - -const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks = { - .find_elf = drgn_dwfl_linux_proc_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; - -const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { - .find_elf = drgn_dwfl_build_id_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; /* * The DWARF abbreviation table gets translated into a series of instructions. @@ -216,10 +71,9 @@ DEFINE_VECTOR(uint32_vector, uint32_t) DEFINE_VECTOR(uint64_vector, uint64_t) struct drgn_dwarf_index_cu { - struct drgn_dwfl_module_userdata *userdata; + struct drgn_debug_info_module *module; const char *ptr; - size_t unit_length; - uint64_t debug_abbrev_offset; + const char *end; uint8_t version; uint8_t address_size; bool is_64_bit; @@ -331,133 +185,6 @@ static inline struct drgn_error *mread_uleb128_into_size_t(const char **ptr, return NULL; } -static void drgn_dwarf_module_destroy(struct drgn_dwarf_module *module) -{ - if (module) { - dwfl_module_vector_deinit(&module->dwfl_modules); - free(module->name); - free(module->build_id); - free(module); - } -} - -static void -drgn_dwfl_module_userdata_destroy(struct drgn_dwfl_module_userdata *userdata) -{ - if (userdata) { - elf_end(userdata->elf); - if (userdata->fd != -1) - close(userdata->fd); - free(userdata->path); - free(userdata); - } -} - -struct drgn_dwfl_module_removed_arg { - Dwfl *dwfl; - bool finish_indexing; - bool free_all; -}; - -static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, - const char *name, Dwarf_Addr base, - void *_arg) -{ - struct drgn_dwfl_module_removed_arg *arg = _arg; - /* - * userdatap is actually a void ** like for the other libdwfl callbacks, - * but dwfl_report_end() has the wrong signature for the removed - * callback. - */ - struct drgn_dwfl_module_userdata *userdata = *(void **)userdatap; - - if (arg->finish_indexing && userdata && - userdata->state == DRGN_DWARF_MODULE_INDEXING) - userdata->state = DRGN_DWARF_MODULE_INDEXED; - if (arg->free_all || !userdata || - userdata->state != DRGN_DWARF_MODULE_INDEXED) { - drgn_dwfl_module_userdata_destroy(userdata); - } else { - Dwarf_Addr end; - - /* - * The module was already indexed. Report it again so libdwfl - * doesn't remove it. - */ - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, - NULL, NULL); - dwfl_report_module(arg->dwfl, name, base, end); - } - return DWARF_CB_OK; -} - -static void drgn_dwarf_module_finish_indexing(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module *module) -{ - module->state = DRGN_DWARF_MODULE_INDEXED; - /* - * We don't need this anymore (but reinitialize it to empty so that - * drgn_dwarf_index_get_unindexed() skips this module). - */ - dwfl_module_vector_deinit(&module->dwfl_modules); - dwfl_module_vector_init(&module->dwfl_modules); - if (module->name) { - int ret; - - ret = c_string_set_insert(&dindex->names, - (const char **)&module->name, NULL); - /* drgn_dwarf_index_get_unindexed() should've reserved enough for us. */ - assert(ret != -1); - } -} - -static void drgn_dwarf_index_free_modules(struct drgn_dwarf_index *dindex, - bool finish_indexing, bool free_all) -{ - struct drgn_dwfl_module_removed_arg arg = { - .dwfl = dindex->dwfl, - .finish_indexing = finish_indexing, - .free_all = free_all, - }; - struct drgn_dwarf_module_table_iterator it; - size_t i; - - for (it = drgn_dwarf_module_table_first(&dindex->module_table); - it.entry; ) { - struct drgn_dwarf_module *module = *it.entry; - - if (finish_indexing && - module->state == DRGN_DWARF_MODULE_INDEXING) - drgn_dwarf_module_finish_indexing(dindex, module); - if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) { - it = drgn_dwarf_module_table_delete_iterator(&dindex->module_table, - it); - drgn_dwarf_module_destroy(module); - } else { - it = drgn_dwarf_module_table_next(it); - } - } - - for (i = dindex->no_build_id.size; i-- > 0; ) { - struct drgn_dwarf_module *module = dindex->no_build_id.data[i]; - - if (finish_indexing && - module->state == DRGN_DWARF_MODULE_INDEXING) - drgn_dwarf_module_finish_indexing(dindex, module); - if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) { - dindex->no_build_id.size--; - if (i != dindex->no_build_id.size) { - dindex->no_build_id.data[i] = - dindex->no_build_id.data[dindex->no_build_id.size]; - } - drgn_dwarf_module_destroy(module); - } - } - - dwfl_report_begin(dindex->dwfl); - dwfl_report_end(dindex->dwfl, drgn_dwfl_module_removed, &arg); -} - static void drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, struct drgn_dwarf_index *dindex) @@ -473,26 +200,11 @@ drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, ns->saved_err = NULL; } -struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, - const Dwfl_Callbacks *callbacks) +void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) { - dindex->dwfl = dwfl_begin(callbacks); - if (!dindex->dwfl) - return drgn_error_libdwfl(); drgn_dwarf_index_namespace_init(&dindex->global, dindex); drgn_dwarf_index_specification_map_init(&dindex->specifications); drgn_dwarf_index_cu_vector_init(&dindex->cus); - memset(&dindex->errors, 0, sizeof(dindex->errors)); - dindex->num_errors = 0; - const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); - if (max_errors) - dindex->max_errors = atoi(max_errors); - else - dindex->max_errors = 5; - drgn_dwarf_module_table_init(&dindex->module_table); - drgn_dwarf_module_vector_init(&dindex->no_build_id); - c_string_set_init(&dindex->names); - return NULL; } static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) @@ -526,828 +238,29 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) { if (!dindex) return; - c_string_set_deinit(&dindex->names); - drgn_dwarf_index_free_modules(dindex, false, true); - assert(dindex->no_build_id.size == 0); - assert(drgn_dwarf_module_table_size(&dindex->module_table) == 0); - drgn_dwarf_module_vector_deinit(&dindex->no_build_id); - drgn_dwarf_module_table_deinit(&dindex->module_table); for (size_t i = 0; i < dindex->cus.size; i++) drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); drgn_dwarf_index_cu_vector_deinit(&dindex->cus); drgn_dwarf_index_specification_map_deinit(&dindex->specifications); drgn_dwarf_index_namespace_deinit(&dindex->global); - dwfl_end(dindex->dwfl); -} - -void drgn_dwarf_index_report_begin(struct drgn_dwarf_index *dindex) -{ - dwfl_report_begin_add(dindex->dwfl); -} - -struct drgn_error * -drgn_dwarf_index_report_error(struct drgn_dwarf_index *dindex, const char *name, - const char *message, struct drgn_error *err) -{ - if (err && err->code == DRGN_ERROR_NO_MEMORY) { - /* Always fail hard if we're out of memory. */ - goto err; - } - if (dindex->num_errors == 0 && - !string_builder_append(&dindex->errors, - "could not get debugging information for:")) - goto err; - if (dindex->num_errors < dindex->max_errors) { - if (!string_builder_line_break(&dindex->errors)) - goto err; - if (name && !string_builder_append(&dindex->errors, name)) - goto err; - if (name && (message || err) && - !string_builder_append(&dindex->errors, " (")) - goto err; - if (message && !string_builder_append(&dindex->errors, message)) - goto err; - if (message && err && - !string_builder_append(&dindex->errors, ": ")) - goto err; - if (err && !string_builder_append_error(&dindex->errors, err)) - goto err; - if (name && (message || err) && - !string_builder_appendc(&dindex->errors, ')')) - goto err; - } - dindex->num_errors++; - drgn_error_destroy(err); - return NULL; - -err: - drgn_error_destroy(err); - return &drgn_enomem; -} - -static void drgn_dwarf_index_reset_errors(struct drgn_dwarf_index *dindex) -{ - dindex->errors.len = 0; - dindex->num_errors = 0; -} - -static struct drgn_error * -drgn_dwarf_index_finalize_errors(struct drgn_dwarf_index *dindex) -{ - struct drgn_error *err; - - if (dindex->num_errors > dindex->max_errors && - (!string_builder_line_break(&dindex->errors) || - !string_builder_appendf(&dindex->errors, "... %u more", - dindex->num_errors - dindex->max_errors))) { - drgn_dwarf_index_reset_errors(dindex); - return &drgn_enomem; - } - if (dindex->num_errors) { - err = drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, - &dindex->errors); - memset(&dindex->errors, 0, sizeof(dindex->errors)); - dindex->num_errors = 0; - return err; - } else { - return NULL; - } -} - -static struct drgn_error * -drgn_dwarf_index_insert_module(struct drgn_dwarf_index *dindex, - const void *build_id, size_t build_id_len, - uint64_t start, uint64_t end, const char *name, - struct drgn_dwarf_module **ret) -{ - struct hash_pair hp; - struct drgn_dwarf_module_table_iterator it; - struct drgn_dwarf_module *module; - - if (build_id_len) { - struct drgn_dwarf_module_key key = { - .build_id = build_id, - .build_id_len = build_id_len, - .start = start, - .end = end, - }; - - hp = drgn_dwarf_module_table_hash(&key); - it = drgn_dwarf_module_table_search_hashed(&dindex->module_table, - &key, hp); - if (it.entry) { - module = *it.entry; - goto out; - } - } - - module = malloc(sizeof(*module)); - if (!module) - return &drgn_enomem; - module->start = start; - module->end = end; - if (name) { - module->name = strdup(name); - if (!module->name) - goto err_module; - } else { - module->name = NULL; - } - module->build_id_len = build_id_len; - if (build_id_len) { - module->build_id = malloc(build_id_len); - if (!module->build_id) - goto err_name; - memcpy(module->build_id, build_id, build_id_len); - if (drgn_dwarf_module_table_insert_searched(&dindex->module_table, - &module, hp, - &it) == -1) { - free(module->build_id); -err_name: - free(module->name); -err_module: - free(module); - return &drgn_enomem; - } - } else { - module->build_id = NULL; - if (!drgn_dwarf_module_vector_append(&dindex->no_build_id, - &module)) - goto err_name; - } - module->state = DRGN_DWARF_MODULE_NEW; - dwfl_module_vector_init(&module->dwfl_modules); -out: - *ret = module; - return NULL; } -struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex, - const char *path, int fd, - Elf *elf, uint64_t start, - uint64_t end, const char *name, - bool *new_ret) +void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, + struct drgn_dwarf_index *dindex) { - struct drgn_error *err; - const void *build_id; - ssize_t build_id_len; - struct drgn_dwarf_module *module; - char *path_key = NULL; - Dwfl_Module *dwfl_module; - void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - if (new_ret) - *new_ret = false; - - build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); - if (build_id_len == -1) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - drgn_error_libdwfl()); - goto free; - } - - err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len, - start, end, name, &module); - if (err) - goto free; - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* We've already indexed this module. */ - err = NULL; - goto free; - } - - path_key = realpath(path, NULL); - if (!path_key) { - path_key = strdup(path); - if (!path_key) { - err = &drgn_enomem; - goto free; - } - } - dwfl_module = dwfl_report_module(dindex->dwfl, path_key, start, end); - if (!dwfl_module) { - err = drgn_error_libdwfl(); - goto free; - } - - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, - NULL); - if (*userdatap) { - /* We've already reported this file at this offset. */ - err = NULL; - goto free; - } - - userdata = malloc(sizeof(*userdata)); - if (!userdata) { - err = &drgn_enomem; - goto free; - } - userdata->path = path_key; - userdata->fd = fd; - userdata->elf = elf; - userdata->state = DRGN_DWARF_MODULE_NEW; - userdata->module = dwfl_module; - *userdatap = userdata; - if (new_ret) - *new_ret = true; - - if (!dwfl_module_vector_append(&module->dwfl_modules, &dwfl_module)) { - /* - * NB: not goto free now that we're referencing the file from a - * Dwfl_Module. - */ - return &drgn_enomem; - } - return NULL; - -free: - elf_end(elf); - close(fd); - free(path_key); - return err; + state->dindex = dindex; + state->old_cus_size = dindex->cus.size; + state->err = NULL; } -static int drgn_dwarf_index_report_dwfl_module(Dwfl_Module *dwfl_module, - void **userdatap, - const char *name, - Dwarf_Addr base, void *arg) +void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, + struct drgn_error *err) { - struct drgn_error *err; - struct drgn_dwarf_index *dindex = arg; - struct drgn_dwfl_module_userdata *userdata = *userdatap; - const unsigned char *build_id; - int build_id_len; - GElf_Addr build_id_vaddr; - Dwarf_Addr end; - struct drgn_dwarf_module *module; - - if (userdata) { - /* - * This was either reported from - * drgn_dwarf_index_report_module() or already indexed. - */ - return DWARF_CB_OK; - } - - build_id_len = dwfl_module_build_id(dwfl_module, &build_id, - &build_id_vaddr); - if (build_id_len == -1) { - err = drgn_dwarf_index_report_error(dindex, name, NULL, - drgn_error_libdwfl()); - if (err) { - drgn_error_destroy(err); - return DWARF_CB_ABORT; - } - return DWARF_CB_OK; - } - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); - - err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len, - base, end, NULL, &module); - if (err) { + #pragma omp critical(drgn_dwarf_index_update_cancel) + if (state->err) drgn_error_destroy(err); - return DWARF_CB_ABORT; - } - - userdata = malloc(sizeof(*userdata)); - if (!userdata) - return DWARF_CB_ABORT; - *userdatap = userdata; - userdata->path = NULL; - userdata->fd = -1; - userdata->elf = NULL; - userdata->module = dwfl_module; - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* - * We've already indexed this module. Don't index it again, but - * keep the Dwfl_Module. - */ - userdata->state = DRGN_DWARF_MODULE_INDEXING; - } else { - userdata->state = DRGN_DWARF_MODULE_NEW; - if (!dwfl_module_vector_append(&module->dwfl_modules, - &dwfl_module)) - return DWARF_CB_ABORT; - } - return DWARF_CB_OK; -} - -static struct drgn_error * -append_unindexed_module(struct drgn_dwarf_module *module, - struct drgn_dwarf_module_vector *unindexed, - size_t *num_names) -{ - if (!module->dwfl_modules.size) { - /* This was either already indexed or had no new files. */ - return NULL; - } - if (!drgn_dwarf_module_vector_append(unindexed, &module)) - return &drgn_enomem; - *num_names += 1; - return NULL; -} - -static struct drgn_error * -drgn_dwarf_index_get_unindexed(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module_vector *unindexed) -{ - struct drgn_error *err; - size_t num_names = 0; - struct drgn_dwarf_module_table_iterator it; - size_t i; - - /* - * Walk the module table and no build ID lists, but skip modules with no - * Dwfl_Module (which may be because they were already indexed or - * because the files were already reported). - */ - for (it = drgn_dwarf_module_table_first(&dindex->module_table); - it.entry; it = drgn_dwarf_module_table_next(it)) { - err = append_unindexed_module(*it.entry, unindexed, &num_names); - if (err) - return err; - } - for (i = dindex->no_build_id.size; i-- > 0; ) { - struct drgn_dwarf_module *module = dindex->no_build_id.data[i]; - - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* - * If this module is indexed, then every module before - * it must be indexed, so we can stop looking. - */ - break; - } - err = append_unindexed_module(module, unindexed, &num_names); - if (err) - return err; - } - if (num_names && - !c_string_set_reserve(&dindex->names, - c_string_set_size(&dindex->names) + num_names)) - return &drgn_enomem; - return NULL; -} - -static struct drgn_error *apply_relocation(Elf_Data *data, uint64_t r_offset, - uint32_t r_type, int64_t r_addend, - uint64_t st_value) -{ - char *p; - - p = (char *)data->d_buf + r_offset; - switch (r_type) { - case R_X86_64_NONE: - break; - case R_X86_64_32: - if (r_offset > SIZE_MAX - sizeof(uint32_t) || - r_offset + sizeof(uint32_t) > data->d_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation offset"); - } - *(uint32_t *)p = st_value + r_addend; - break; - case R_X86_64_64: - if (r_offset > SIZE_MAX - sizeof(uint64_t) || - r_offset + sizeof(uint64_t) > data->d_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation offset"); - } - *(uint64_t *)p = st_value + r_addend; - break; - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "unimplemented relocation type %" PRIu32, - r_type); - } - return NULL; -} - -static struct drgn_error *relocate_section(Elf_Scn *scn, Elf_Scn *rela_scn, - Elf_Scn *symtab_scn, - uint64_t *sh_addrs, size_t shdrnum) -{ - struct drgn_error *err; - Elf_Data *data, *rela_data, *symtab_data; - const Elf64_Rela *relocs; - const Elf64_Sym *syms; - size_t num_relocs, num_syms; - size_t i; - GElf_Shdr *shdr, shdr_mem; - - err = read_elf_section(scn, &data); - if (err) - return err; - err = read_elf_section(rela_scn, &rela_data); - if (err) - return err; - err = read_elf_section(symtab_scn, &symtab_data); - if (err) - return err; - - relocs = (Elf64_Rela *)rela_data->d_buf; - num_relocs = rela_data->d_size / sizeof(Elf64_Rela); - syms = (Elf64_Sym *)symtab_data->d_buf; - num_syms = symtab_data->d_size / sizeof(Elf64_Sym); - - for (i = 0; i < num_relocs; i++) { - const Elf64_Rela *reloc = &relocs[i]; - uint32_t r_sym, r_type; - uint16_t st_shndx; - uint64_t sh_addr; - - r_sym = ELF64_R_SYM(reloc->r_info); - r_type = ELF64_R_TYPE(reloc->r_info); - - if (r_sym >= num_syms) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation symbol"); - } - st_shndx = syms[r_sym].st_shndx; - if (st_shndx == 0) { - sh_addr = 0; - } else if (st_shndx < shdrnum) { - sh_addr = sh_addrs[st_shndx - 1]; - } else { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid symbol section index"); - } - err = apply_relocation(data, reloc->r_offset, r_type, - reloc->r_addend, - sh_addr + syms[r_sym].st_value); - if (err) - return err; - } - - /* - * Mark the relocation section as empty so that libdwfl doesn't try to - * apply it again. - */ - shdr = gelf_getshdr(rela_scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - shdr->sh_size = 0; - if (!gelf_update_shdr(rela_scn, shdr)) - return drgn_error_libelf(); - rela_data->d_size = 0; - return NULL; -} - -/* - * Before the debugging information in a relocatable ELF file (e.g., Linux - * kernel module) can be used, it must have ELF relocations applied. This is - * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a - * much faster implementation. It is only implemented for x86-64; for other - * architectures, we can fall back to libdwfl. - */ -static struct drgn_error *apply_elf_relocations(Elf *elf) -{ - struct drgn_error *err; - GElf_Ehdr ehdr_mem, *ehdr; - size_t shdrnum, shstrndx; - uint64_t *sh_addrs; - Elf_Scn *scn; - - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - - if (ehdr->e_type != ET_REL || - ehdr->e_machine != EM_X86_64 || - ehdr->e_ident[EI_CLASS] != ELFCLASS64 || - ehdr->e_ident[EI_DATA] != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? - ELFDATA2LSB : ELFDATA2MSB)) { - /* Unsupported; fall back to libdwfl. */ - return NULL; - } - - if (elf_getshdrnum(elf, &shdrnum)) - return drgn_error_libelf(); - if (shdrnum > 1) { - sh_addrs = calloc(shdrnum - 1, sizeof(*sh_addrs)); - if (!sh_addrs) - return &drgn_enomem; - - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - size_t ndx; - - ndx = elf_ndxscn(scn); - if (ndx > 0 && ndx < shdrnum) { - GElf_Shdr *shdr, shdr_mem; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - sh_addrs[ndx - 1] = shdr->sh_addr; - } - } - } else { - sh_addrs = NULL; - } - - if (elf_getshdrstrndx(elf, &shstrndx)) { - err = drgn_error_libelf(); - goto out; - } - - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - - if (shdr->sh_type != SHT_RELA) - continue; - - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - continue; - - if (strstartswith(scnname, ".rela.debug_")) { - Elf_Scn *info_scn, *link_scn; - - info_scn = elf_getscn(elf, shdr->sh_info); - if (!info_scn) { - err = drgn_error_libelf(); - goto out; - } - - link_scn = elf_getscn(elf, shdr->sh_link); - if (!link_scn) { - err = drgn_error_libelf(); - goto out; - } - - err = relocate_section(info_scn, scn, link_scn, - sh_addrs, shdrnum); - if (err) - goto out; - } - } -out: - free(sh_addrs); - return NULL; -} - -static struct drgn_error * -get_debug_sections(struct drgn_dwfl_module_userdata *userdata, - bool *bswap_ret) -{ - struct drgn_error *err; - - if (userdata->elf) { - err = apply_elf_relocations(userdata->elf); - if (err) - return err; - } - - /* - * Note: not dwfl_module_getelf(), because then libdwfl applies - * ELF relocations to all sections, not just debug sections. - */ - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(userdata->module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - - Elf *elf = dwarf_getelf(dwarf); - if (!elf) - return drgn_error_libdw(); - - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - userdata->debug_info = NULL; - userdata->debug_abbrev = NULL; - userdata->debug_str = NULL; - userdata->debug_line = NULL; - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - - if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP)) - continue; - - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - continue; - - Elf_Data **sectionp; - if (!userdata->debug_info && strcmp(scnname, ".debug_info") == 0) - sectionp = &userdata->debug_info; - else if (!userdata->debug_abbrev && strcmp(scnname, ".debug_abbrev") == 0) - sectionp = &userdata->debug_abbrev; - else if (!userdata->debug_str && strcmp(scnname, ".debug_str") == 0) - sectionp = &userdata->debug_str; - else if (!userdata->debug_line && strcmp(scnname, ".debug_line") == 0) - sectionp = &userdata->debug_line; - else - continue; - err = read_elf_section(scn, sectionp); - if (err) - return err; - } - - if (!userdata->debug_info) { - return drgn_error_create(DRGN_ERROR_OTHER, - "no .debug_info section"); - } else if (!userdata->debug_abbrev) { - return drgn_error_create(DRGN_ERROR_OTHER, - "no .debug_abbrev section"); - } else if (!userdata->debug_str) { - return drgn_error_create(DRGN_ERROR_OTHER, - "no .debug_str section"); - } - - if (userdata->debug_str->d_size == 0 || - ((char *)userdata->debug_str->d_buf)[userdata->debug_str->d_size - 1]) { - return drgn_error_create(DRGN_ERROR_OTHER, - ".debug_str is not null terminated"); - } - - *bswap_ret = (elf_getident(elf, NULL)[EI_DATA] != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? - ELFDATA2LSB : ELFDATA2MSB)); - return NULL; -} - -static struct drgn_error *read_compilation_unit_header(const char *ptr, - const char *end, - struct drgn_dwarf_index_cu *cu) -{ - uint32_t tmp; - if (!mread_u32(&ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - cu->is_64_bit = tmp == UINT32_C(0xffffffff); - if (cu->is_64_bit) { - if (!mread_u64_into_size_t(&ptr, end, cu->bswap, - &cu->unit_length)) - return drgn_eof(); - } else { - cu->unit_length = tmp; - } - - uint16_t version; - if (!mread_u16(&ptr, end, cu->bswap, &version)) - return drgn_eof(); - if (version != 2 && version != 3 && version != 4) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown DWARF CU version %" PRIu16, - version); - } - cu->version = version; - - if (cu->is_64_bit) { - if (!mread_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset)) - return drgn_eof(); - } else { - if (!mread_u32_into_u64(&ptr, end, cu->bswap, - &cu->debug_abbrev_offset)) - return drgn_eof(); - } - - if (!mread_u8(&ptr, end, &cu->address_size)) - return drgn_eof(); - - return NULL; -} - - -static struct drgn_error * -read_dwfl_module_cus(struct drgn_dwfl_module_userdata *userdata, - struct drgn_dwarf_index_cu_vector *cus) -{ - struct drgn_error *err; - - bool bswap; - err = get_debug_sections(userdata, &bswap); - if (err) - return err; - - const char *ptr = section_ptr(userdata->debug_info, 0); - const char *end = section_end(userdata->debug_info); - while (ptr < end) { - struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_append_entry(cus); - if (!cu) - return &drgn_enomem; - cu->userdata = userdata; - cu->ptr = ptr; - cu->bswap = bswap; - err = read_compilation_unit_header(ptr, end, cu); - if (err) - return err; - cu->abbrev_decls = NULL; - cu->num_abbrev_decls = 0; - cu->abbrev_insns = NULL; - cu->file_name_hashes = NULL; - cu->num_file_names = 0; - - if (!mread_skip(&ptr, end, - (cu->is_64_bit ? 12 : 4) + cu->unit_length)) - return drgn_eof(); - } - return NULL; -} - -static struct drgn_error * -read_module_cus(struct drgn_dwarf_module *module, - struct drgn_dwarf_index_cu_vector *cus, const char **name_ret) -{ - const size_t orig_cus_size = cus->size; - for (size_t i = 0; i < module->dwfl_modules.size; i++) { - void **userdatap; - *name_ret = dwfl_module_info(module->dwfl_modules.data[i], - &userdatap, NULL, NULL, NULL, NULL, - NULL, NULL); - struct drgn_dwfl_module_userdata *userdata = *userdatap; - struct drgn_error *err = read_dwfl_module_cus(userdata, cus); - if (err) { - /* - * Ignore the error unless we have no more Dwfl_Modules - * to try. - */ - if (i == module->dwfl_modules.size - 1) - return err; - drgn_error_destroy(err); - cus->size = orig_cus_size; - continue; - } - userdata->state = DRGN_DWARF_MODULE_INDEXING; - module->state = DRGN_DWARF_MODULE_INDEXING; - return NULL; - } - UNREACHABLE(); -} - -static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module **unindexed, - size_t num_unindexed) -{ - struct drgn_error *err = NULL; - - #pragma omp parallel - { - int thread_num = omp_get_thread_num(); - struct drgn_dwarf_index_cu_vector cus = VECTOR_INIT; - - #pragma omp for schedule(dynamic) - for (size_t i = 0; i < num_unindexed; i++) { - struct drgn_error *module_err; - const char *name; - - if (err) - continue; - - module_err = read_module_cus(unindexed[i], - thread_num == 0 ? - &dindex->cus : &cus, - &name); - if (module_err) { - #pragma omp critical(drgn_read_cus) - if (err) { - drgn_error_destroy(module_err); - } else { - err = drgn_dwarf_index_report_error(dindex, - name, - NULL, - module_err); - } - continue; - } - } - - if (cus.size) { - #pragma omp critical(drgn_read_cus) - if (!err) { - if (drgn_dwarf_index_cu_vector_reserve(&dindex->cus, - dindex->cus.size + cus.size)) { - memcpy(dindex->cus.data + dindex->cus.size, - cus.data, - cus.size * sizeof(*cus.data)); - dindex->cus.size += cus.size; - } else { - err = &drgn_enomem; - } - } - } - drgn_dwarf_index_cu_vector_deinit(&cus); - } - return err; + else + state->err = err; } static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, @@ -1444,6 +357,10 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, } else if (name == DW_AT_name && should_index) { switch (form) { case DW_FORM_strp: + if (!cu->module->debug_str) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_strp without .debug_str section"); + } if (cu->is_64_bit) insn = ATTRIB_NAME_STRP8; else @@ -1455,8 +372,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_stmt_list && - cu->userdata->debug_line) { + } else if (name == DW_AT_stmt_list && cu->module->debug_line) { switch (form) { case DW_FORM_data4: insn = ATTRIB_STMT_LIST_LINEPTR4; @@ -1641,10 +557,11 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, return NULL; } -static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu) +static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, + size_t debug_abbrev_offset) { - Elf_Data *debug_abbrev = cu->userdata->debug_abbrev; - const char *ptr = section_ptr(debug_abbrev, cu->debug_abbrev_offset); + Elf_Data *debug_abbrev = cu->module->debug_abbrev; + const char *ptr = section_ptr(debug_abbrev, debug_abbrev_offset); if (!ptr) return drgn_eof(); const char *end = section_end(debug_abbrev); @@ -1667,6 +584,37 @@ static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu) return NULL; } +static struct drgn_error *read_cu(struct drgn_dwarf_index_cu *cu) +{ + + const char *ptr = &cu->ptr[cu->is_64_bit ? 12 : 4]; + uint16_t version; + if (!mread_u16(&ptr, cu->end, cu->bswap, &version)) + return drgn_eof(); + if (version < 2 || version > 4) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown DWARF CU version %" PRIu16, + version); + } + cu->version = version; + + size_t debug_abbrev_offset; + if (cu->is_64_bit) { + if (!mread_u64_into_size_t(&ptr, cu->end, cu->bswap, + &debug_abbrev_offset)) + return drgn_eof(); + } else { + if (!mread_u32_into_size_t(&ptr, cu->end, cu->bswap, + &debug_abbrev_offset)) + return drgn_eof(); + } + + if (!mread_u8(&ptr, cu->end, &cu->address_size)) + return drgn_eof(); + + return read_abbrev_table(cu, debug_abbrev_offset); +} + static struct drgn_error *skip_lnp_header(struct drgn_dwarf_index_cu *cu, const char **ptr, const char *end) { @@ -1740,7 +688,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, static const uint64_t siphash_key[2]; struct drgn_error *err; - Elf_Data *debug_line = cu->userdata->debug_line; + Elf_Data *debug_line = cu->module->debug_line; const char *ptr = section_ptr(debug_line, stmt_list); if (!ptr) return drgn_eof(); @@ -1849,22 +797,18 @@ index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, } /* - * First pass: read the abbreviation and file name tables and index DIEs with + * First pass: read the file name tables and index DIEs with * DW_AT_specification. This recurses into namespaces. */ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, struct drgn_dwarf_index_cu *cu) { struct drgn_error *err; - const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; - const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; - Elf_Data *debug_info = cu->userdata->debug_info; + Elf_Data *debug_info = cu->module->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; + const char *end = cu->end; unsigned int depth = 0; - - if ((err = read_abbrev_table(cu))) - return err; - for (;;) { size_t die_offset = ptr - debug_info_buffer; @@ -2043,7 +987,7 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, */ if (!declaration && (err = index_specification(dindex, specification, - cu->userdata->module, + cu->module->dwfl_module, die_offset))) return err; } @@ -2061,6 +1005,63 @@ static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, return NULL; } +void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module) +{ + const bool bswap = module->bswap; + const char *ptr = section_ptr(module->debug_info, 0); + const char *end = section_end(module->debug_info); + while (ptr < end) { + const char *cu_ptr = ptr; + uint32_t tmp; + if (!mread_u32(&ptr, end, bswap, &tmp)) + goto err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + size_t unit_length; + if (is_64_bit) { + if (!mread_u64_into_size_t(&ptr, end, bswap, + &unit_length)) + goto err; + } else { + unit_length = tmp; + } + if (!mread_skip(&ptr, end, unit_length)) + goto err; + + #pragma omp task + { + struct drgn_dwarf_index_cu cu = { + .module = module, + .ptr = cu_ptr, + .end = ptr, + .is_64_bit = is_64_bit, + .bswap = module->bswap, + }; + struct drgn_error *cu_err = read_cu(&cu); + if (cu_err) + goto cu_err; + + cu_err = index_cu_first_pass(state->dindex, &cu); + if (cu_err) + goto cu_err; + + #pragma omp critical(drgn_dwarf_index_cus) + if (!drgn_dwarf_index_cu_vector_append(&state->dindex->cus, + &cu)) + cu_err = &drgn_enomem; + if (cu_err) { +cu_err: + drgn_dwarf_index_cu_deinit(&cu); + drgn_dwarf_index_update_cancel(state, cu_err); + } + } + } + return; + +err: + drgn_dwarf_index_update_cancel(state, drgn_eof()); +} + static bool find_definition(struct drgn_dwarf_index *dindex, uintptr_t die_addr, Dwfl_Module **module_ret, size_t *offset_ret) { @@ -2187,14 +1188,13 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, struct drgn_dwarf_index_cu *cu, const char *ptr) { struct drgn_error *err; - const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; - Elf_Data *debug_info = cu->userdata->debug_info; + Elf_Data *debug_info = cu->module->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); - Elf_Data *debug_str = cu->userdata->debug_str; + Elf_Data *debug_str = cu->module->debug_str; + const char *end = cu->end; unsigned int depth = 0; uint8_t depth1_tag = 0; size_t depth1_offset = 0; - for (;;) { size_t die_offset = ptr - debug_info_buffer; @@ -2373,7 +1373,7 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, !specification) { if (insn & DIE_FLAG_DECLARATION) declaration = true; - Dwfl_Module *module = cu->userdata->module; + Dwfl_Module *module = cu->module->dwfl_module; if (tag == DW_TAG_enumerator) { if (depth1_tag != DW_TAG_enumeration_type) goto next; @@ -2427,7 +1427,7 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, return NULL; } -static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) +static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) { for (size_t i = 0; i < ARRAY_SIZE(dindex->global.shards); i++) { struct drgn_dwarf_index_shard *shard = @@ -2444,8 +1444,8 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) void **userdatap; dwfl_module_info(die->module, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); - struct drgn_dwfl_module_userdata *userdata = *userdatap; - if (userdata->state == DRGN_DWARF_MODULE_INDEXED) + struct drgn_debug_info_module *module = *userdatap; + if (module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) break; else shard->dies.size--; @@ -2484,8 +1484,8 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) void **userdatap; dwfl_module_info(it.entry->module, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); - struct drgn_dwfl_module_userdata *userdata = *userdatap; - if (userdata->state == DRGN_DWARF_MODULE_INDEXED) { + struct drgn_debug_info_module *module = *userdatap; + if (module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { it = drgn_dwarf_index_specification_map_next(it); } else { it = drgn_dwarf_index_specification_map_delete_iterator(&dindex->specifications, @@ -2494,139 +1494,36 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) } } -static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, - const size_t old_cus_size) -{ - struct drgn_error *err = NULL; - #pragma omp parallel - { - #pragma omp for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (!err) { - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - struct drgn_error *cu_err = - index_cu_first_pass(dindex, cu); - if (cu_err) { - #pragma omp critical(drgn_index_cus) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - } - - #pragma omp for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (!err) { - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; - struct drgn_error *cu_err = - index_cu_second_pass(&dindex->global, - cu, ptr); - if (cu_err) { - #pragma omp critical(drgn_index_cus) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - } - } - return err; -} - -/* - * Like drgn_dwarf_index_report_end(), but doesn't finalize reported errors or - * free unindexed modules on success. - */ -static struct drgn_error * -drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) +struct drgn_error * +drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) { - struct drgn_error *err; - struct drgn_dwarf_module_vector unindexed = VECTOR_INIT; - const size_t old_cus_size = dindex->cus.size; + struct drgn_dwarf_index *dindex = state->dindex; - dwfl_report_end(dindex->dwfl, NULL, NULL); - if (report_from_dwfl && - dwfl_getmodules(dindex->dwfl, drgn_dwarf_index_report_dwfl_module, - dindex, 0)) { - err = &drgn_enomem; + if (state->err) goto err; - } - err = drgn_dwarf_index_get_unindexed(dindex, &unindexed); - if (err) - goto err; - err = read_cus(dindex, unindexed.data, unindexed.size); - if (err) - goto err; - /* - * After this point, if we hit an error, then we have to roll back the - * index. - */ - err = index_cus(dindex, old_cus_size); - if (err) { - rollback_dwarf_index(dindex); + + #pragma omp parallel for schedule(dynamic) + for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) { + if (drgn_dwarf_index_update_cancelled(state)) + continue; + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; + struct drgn_error *cu_err = + index_cu_second_pass(&dindex->global, cu, ptr); + if (cu_err) + drgn_dwarf_index_update_cancel(state, cu_err); + } + if (state->err) { + drgn_dwarf_index_rollback(state->dindex); goto err; } - -out: - drgn_dwarf_module_vector_deinit(&unindexed); - return err; + return NULL; err: - for (size_t i = old_cus_size; i < dindex->cus.size; i++) + for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - dindex->cus.size = old_cus_size; - drgn_dwarf_index_free_modules(dindex, false, false); - drgn_dwarf_index_reset_errors(dindex); - goto out; -} - -struct drgn_error *drgn_dwarf_index_report_end(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) -{ - struct drgn_error *err; - - err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl); - if (err) - return err; - err = drgn_dwarf_index_finalize_errors(dindex); - if (err && err->code != DRGN_ERROR_MISSING_DEBUG_INFO) { - rollback_dwarf_index(dindex); - drgn_dwarf_index_free_modules(dindex, false, false); - return err; - } - drgn_dwarf_index_free_modules(dindex, true, false); - return err; -} - -struct drgn_error *drgn_dwarf_index_flush(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) -{ - struct drgn_error *err; - - err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl); - if (err) - return err; - drgn_dwarf_index_free_modules(dindex, true, false); - drgn_dwarf_index_report_begin(dindex); - return NULL; -} - -void drgn_dwarf_index_report_abort(struct drgn_dwarf_index *dindex) -{ - dwfl_report_end(dindex->dwfl, NULL, NULL); - drgn_dwarf_index_free_modules(dindex, false, false); - drgn_dwarf_index_reset_errors(dindex); -} - -bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, - const char *name) -{ - return c_string_set_search(&dindex->names, &name).entry != NULL; + dindex->cus.size = state->old_cus_size; + return state->err; } static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) @@ -2641,7 +1538,7 @@ static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) struct drgn_dwarf_index_pending_die *pending = &ns->pending_dies.data[i]; const char *ptr = - section_ptr(pending->cu->userdata->debug_info, + section_ptr(pending->cu->module->debug_info, pending->offset); struct drgn_error *cu_err = index_cu_second_pass(ns, pending->cu, ptr); diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 11f9ca847..d5edec592 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -12,18 +12,15 @@ #ifndef DRGN_DWARF_INDEX_H #define DRGN_DWARF_INDEX_H +#include #include -#include +#include #include #include #ifdef _OPENMP #include #else -static inline int omp_get_thread_num(void) -{ - return 0; -} typedef struct {} omp_lock_t; #define omp_init_lock(lock) do {} while (0) #define omp_destroy_lock(lock) do {} while (0) @@ -31,11 +28,12 @@ typedef struct {} omp_lock_t; #define omp_unset_lock(lock) do {} while (0) #endif -#include "drgn.h" #include "hash_table.h" -#include "string_builder.h" #include "vector.h" +struct drgn_debug_info_module; +struct drgn_error; + /** * @ingroup Internals * @@ -58,10 +56,6 @@ typedef struct {} omp_lock_t; * @{ */ -extern const Dwfl_Callbacks drgn_dwfl_callbacks; -extern const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks; -extern const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks; - /* * An indexed DIE. * @@ -132,94 +126,6 @@ DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, struct drgn_dwarf_index_specification, drgn_dwarf_index_specification_to_key) -/** State of a @ref drgn_dwarf_module or a @c Dwfl_Module. */ -enum drgn_dwarf_module_state { - /** Reported but not indexed. */ - DRGN_DWARF_MODULE_NEW, - /** Reported and will be indexed on success. */ - DRGN_DWARF_MODULE_INDEXING, - /** Indexed. Must not be freed until @ref drgn_dwarf_index_deinit(). */ - DRGN_DWARF_MODULE_INDEXED, -}; - -DEFINE_VECTOR_TYPE(dwfl_module_vector, Dwfl_Module *) - -/** - * A module reported to a @ref drgn_dwarf_index. - * - * Conceptually, a module is an ELF file loaded at a specific address range (or - * not loaded). - * - * Each (file, address range) referenced by a @ref drgn_dwarf_index is uniquely - * represented by one @c Dwfl_Module. Files are identified by canonical path. - * - * Each (binary, address range) is uniquely represented by a @ref - * drgn_dwarf_module. Binaries are identified by build ID; note that a single - * binary may be represented by multiple files (e.g., a stripped binary and its - * corresponding separate debug info file). If a file does not have a build ID, - * it is considered a different binary from other files with different canonical - * paths. - */ -struct drgn_dwarf_module { - /** Allocated with @c malloc() if @c build_id_len is non-zero. */ - void *build_id; - /** Zero if the module does not have a build ID. */ - size_t build_id_len; - /** Load address range, or both 0 if not loaded. */ - uint64_t start, end; - /** Optional module name allocated with @c malloc(). */ - char *name; - enum drgn_dwarf_module_state state; - /** - * Candidate Dwfl_Modules which were reported for this module. - * - * One of these will be indexed. Once the module is indexed, this is - * always empty. - */ - struct dwfl_module_vector dwfl_modules; -}; - -/** - * State tracked for each @c Dwfl_Module. - * - * @c path, @c elf, and @c fd are used when an ELF file was reported to a @ref - * drgn_dwarf_index so that we can report the ELF file to libdwfl later. - */ -struct drgn_dwfl_module_userdata { - char *path; - Elf *elf; - int fd; - enum drgn_dwarf_module_state state; - Dwfl_Module *module; - Elf_Data *debug_info; - Elf_Data *debug_abbrev; - Elf_Data *debug_str; - Elf_Data *debug_line; -}; - -DEFINE_VECTOR_TYPE(drgn_dwarf_module_vector, struct drgn_dwarf_module *) - -struct drgn_dwarf_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; -}; - -static inline struct drgn_dwarf_module_key -drgn_dwarf_module_key(struct drgn_dwarf_module * const *entry) -{ - return (struct drgn_dwarf_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; -} -DEFINE_HASH_TABLE_TYPE(drgn_dwarf_module_table, struct drgn_dwarf_module *, - drgn_dwarf_module_key) - -DEFINE_HASH_SET_TYPE(c_string_set, const char *) - DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, @@ -265,44 +171,10 @@ struct drgn_dwarf_index { struct drgn_dwarf_index_specification_map specifications; /** Indexed compilation units. */ struct drgn_dwarf_index_cu_vector cus; - Dwfl *dwfl; - /** - * Formatted errors reported by @ref drgn_dwarf_index_report_error(). - */ - struct string_builder errors; - /** - * Number of errors reported by @ref drgn_dwarf_index_report_error(). - */ - unsigned int num_errors; - /** Maximum number of errors to report before truncating. */ - unsigned int max_errors; - /** - * Modules keyed by build ID and address range. - * - * Every reported module is either here or in @ref no_build_id. While - * reporting modules, these include indexed and unindexed modules. - */ - struct drgn_dwarf_module_table module_table; - /** Modules that don't have a build ID. */ - struct drgn_dwarf_module_vector no_build_id; - /** - * Names of indexed modules. - * - * The entries in this set are @ref drgn_dwarf_module::name, so they - * should not be freed. - */ - struct c_string_set names; }; -/** - * Initialize a @ref drgn_dwarf_index. - * - * @param[in] callbacks One of @ref drgn_dwfl_callbacks, @ref - * drgn_linux_proc_dwfl_callbacks, or @ref - * drgn_userspace_core_dump_dwfl_callbacks. - */ -struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, - const Dwfl_Callbacks *callbacks); +/** Initialize a @ref drgn_dwarf_index. */ +void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); /** * Deinitialize a @ref drgn_dwarf_index. @@ -312,109 +184,78 @@ struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, */ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); -/** - * Start reporting modules to a @ref drgn_dwarf_index. - * - * This must be paired with a call to either @ref drgn_dwarf_index_report_end() - * or @ref drgn_dwarf_index_report_abort(). - */ -void drgn_dwarf_index_report_begin(struct drgn_dwarf_index *dindex); +/** State tracked while updating a @ref drgn_dwarf_index. */ +struct drgn_dwarf_index_update_state { + struct drgn_dwarf_index *dindex; + size_t old_cus_size; + struct drgn_error *err; +}; /** - * Report a non-fatal error to a @ref drgn_dwarf_index. + * Prepare to update a @ref drgn_dwarf_index. * - * These errors are reported by @ref drgn_dwarf_index_report_end() in the @ref - * DRGN_ERROR_MISSING_DEBUG_INFO error. - * - * @param[name] name An optional module name to prefix to the error message. - * @param[message] message An optional message with additional context to prefix - * to the error message. - * @param[err] err The error to report. This may be @c NULL if @p name and @p - * message provide sufficient information. - * @return @c NULL on success, @ref drgn_enomem if the error could not be - * reported. + * @param[out] state Initialized update state. Must be passed to @ref + * drgn_dwarf_index_update_end(). */ -struct drgn_error * -drgn_dwarf_index_report_error(struct drgn_dwarf_index *dindex, const char *name, - const char *message, struct drgn_error *err); +void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, + struct drgn_dwarf_index *dindex); /** - * Report a module to a @ref drgn_dwarf_index from an ELF file. + * Finish updating a @ref drgn_dwarf_index. * - * This takes ownership of @p fd and @p elf on either success or failure. They - * should not be used (including closed or freed) after this returns. + * This should be called once all of the tasks created by @ref + * drgn_dwarf_index_read_module() have completed (even if the update was + * cancelled). * - * If this fails, @ref drgn_dwarf_index_report_abort() must be called. + * If the update was not cancelled, this finishes indexing all modules reported + * by @ref drgn_dwarf_index_read_module(). If it was cancelled or there is an + * error while indexing, this rolls back the index and removes the newly + * reported modules. * - * @param[in] path The path to the file. - * @param[in] fd A file descriptor referring to the file. - * @param[in] elf The Elf handle of the file. - * @param[in] start The (inclusive) start address of the loaded file, or 0 if - * the file is not loaded. - * @param[in] end The (exclusive) end address of the loaded file, or 0 if the - * file is not loaded. - * @param[in] name An optional name for the module. This is only used for @ref - * drgn_dwarf_index_is_indexed(). - * @param[out] new_ret Whether the module was newly created and reported. This - * is @c false if a module with the same build ID and address range was already - * indexed or a file with the same path and address range was already reported. + * @return @c NULL on success, non-@c NULL if the update was cancelled or there + * was another error. */ -struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex, - const char *path, int fd, - Elf *elf, uint64_t start, - uint64_t end, const char *name, - bool *new_ret); +struct drgn_error * +drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state); /** - * Stop reporting modules to a @ref drgn_dwarf_index and index new DWARF - * information. - * - * This parses and indexes the debugging information for all modules that have - * not yet been indexed. + * Cancel an update of a @ref drgn_dwarf_index. * - * If debug information was not available for one or more modules, a @ref - * DRGN_ERROR_MISSING_DEBUG_INFO error is returned, those modules are freed, and - * all other modules are added to the index. + * This should be called if there is a fatal error and the update must be + * aborted. * - * On any other error, no new debugging information is indexed and all unindexed - * modules are freed. - * - * @param[in] report_from_dwfl Whether any Dwfl_Modules were reported - * to @ref drgn_dwarf_index::dwfl directly via libdwfl. In that case, we need to - * report those to the DWARF index, as well. + * @param[in] err Error to report. This will be returned from @ref + * drgn_dwarf_index_update_end(). If an error has already been reported, this + * error is destroyed. */ -struct drgn_error *drgn_dwarf_index_report_end(struct drgn_dwarf_index *dindex, - bool report_from_dwfl); +void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, + struct drgn_error *err); /** - * Index new DWARF information and continue reporting. + * Return whether an update of a @ref drgn_dwarf_index has been cancelled by + * @ref drgn_dwarf_index_update_cancel(). * - * This is similar to @ref drgn_dwarf_index_report_end() except that it does not - * finish reporting or return a @ref DRGN_ERROR_MISSING_DEBUG_INFO error. @ref - * After this is called, more modules may be reported. @ref - * drgn_dwarf_index_report_end() or @ref drgn_dwarf_index_report_abort() must - * still be called. + * Because updating is parallelized, this allows tasks other than the one that + * encountered the error to "fail fast". */ -struct drgn_error *drgn_dwarf_index_flush(struct drgn_dwarf_index *dindex, - bool report_from_dwfl); +static inline bool +drgn_dwarf_index_update_cancelled(struct drgn_dwarf_index_update_state *state) +{ + /* + * No need for omp critical/omp atomic since this is a best-effort + * optimization. + */ + return state->err != NULL; +} /** - * Stop reporting modules to a @ref drgn_dwarf_index and free all unindexed - * modules. - * - * This also clears all errors reported by @ref drgn_dwarf_index_report_error(). + * Read a module for updating a @ref drgn_dwarf_index. * - * This should be called instead of @ref drgn_dwarf_index_report_end() if a - * fatal error is encountered while reporting modules. - */ -void drgn_dwarf_index_report_abort(struct drgn_dwarf_index *dindex); - -/** - * Return whether a @ref drgn_dwarf_index has indexed a module with the given - * name. + * This creates OpenMP tasks to begin indexing the module. It may cancel the + * update. */ -bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, - const char *name); +void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module); /** * Iterator over DWARF debugging information. diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 8f19df14f..0c0a445a1 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -13,7 +13,7 @@ #include #include "internal.h" -#include "dwarf_index.h" +#include "debug_info.h" #include "helpers.h" #include "linux_kernel.h" #include "mread.h" @@ -1155,42 +1155,37 @@ DEFINE_HASH_TABLE(kernel_module_table, struct kernel_module_file *, kernel_module_table_key, c_string_hash, c_string_eq) static struct drgn_error * -report_loaded_kernel_module(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_loaded_kernel_module(struct drgn_debug_info_load_state *load, struct kernel_module_iterator *kmod_it, struct kernel_module_table *kmod_table) { struct drgn_error *err; - const char *name = kmod_it->name; - struct hash_pair hp; - struct kernel_module_table_iterator it; - struct kernel_module_file *kmod; - hp = kernel_module_table_hash(&name); - it = kernel_module_table_search_hashed(kmod_table, &name, hp); + const char *name = kmod_it->name; + struct hash_pair hp = kernel_module_table_hash(&name); + struct kernel_module_table_iterator it = + kernel_module_table_search_hashed(kmod_table, &name, hp); if (!it.entry) return &drgn_not_found; - kmod = *it.entry; + struct kernel_module_file *kmod = *it.entry; kernel_module_table_delete_iterator_hashed(kmod_table, it, hp); do { uint64_t start, end; - err = cache_kernel_module_sections(kmod_it, kmod->elf, &start, &end); if (err) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - "could not get section addresses", - err); + err = drgn_debug_info_report_error(load, kmod->path, + "could not get section addresses", + err); if (err) return err; continue; } - err = drgn_dwarf_index_report_elf(dindex, kmod->path, kmod->fd, - kmod->elf, start, end, - kmod->name, NULL); + err = drgn_debug_info_report_elf(load, kmod->path, kmod->fd, + kmod->elf, start, end, + kmod->name, NULL); kmod->elf = NULL; kmod->fd = -1; if (err) @@ -1201,8 +1196,7 @@ report_loaded_kernel_module(struct drgn_program *prog, } static struct drgn_error * -report_default_kernel_module(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_default_kernel_module(struct drgn_debug_info_load_state *load, struct kernel_module_iterator *kmod_it, struct depmod_index *depmod) { @@ -1213,71 +1207,71 @@ report_default_kernel_module(struct drgn_program *prog, NULL, }; struct drgn_error *err; + const char *depmod_path; size_t depmod_path_len; - size_t extension_len; - char *path; - int fd; - Elf *elf; - uint64_t start, end; - if (!depmod_index_find(depmod, kmod_it->name, &depmod_path, &depmod_path_len)) { - return drgn_dwarf_index_report_error(dindex, kmod_it->name, - "could not find module in depmod", - NULL); + return drgn_debug_info_report_error(load, kmod_it->name, + "could not find module in depmod", + NULL); } + size_t extension_len; if (depmod_path_len >= 3 && (memcmp(depmod_path + depmod_path_len - 3, ".gz", 3) == 0 || memcmp(depmod_path + depmod_path_len - 3, ".xz", 3) == 0)) extension_len = 3; else extension_len = 0; + char *path; + int fd; + Elf *elf; err = find_elf_file(&path, &fd, &elf, module_paths, - prog->vmcoreinfo.osrelease, + load->dbinfo->prog->vmcoreinfo.osrelease, depmod_path_len - extension_len, depmod_path, extension_len, depmod_path + depmod_path_len - extension_len); if (err) - return drgn_dwarf_index_report_error(dindex, NULL, NULL, err); + return drgn_debug_info_report_error(load, NULL, NULL, err); if (!elf) { - return drgn_dwarf_index_report_error(dindex, kmod_it->name, - "could not find .ko", - NULL); + return drgn_debug_info_report_error(load, kmod_it->name, + "could not find .ko", + NULL); } + uint64_t start, end; err = cache_kernel_module_sections(kmod_it, elf, &start, &end); if (err) { elf_end(elf); close(fd); free(path); - return drgn_dwarf_index_report_error(dindex, path, - "could not get section addresses", - err); + return drgn_debug_info_report_error(load, path, + "could not get section addresses", + err); } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, start, end, - kmod_it->name, NULL); + err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, + kmod_it->name, NULL); free(path); return err; } static struct drgn_error * -report_loaded_kernel_modules(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_loaded_kernel_modules(struct drgn_debug_info_load_state *load, struct kernel_module_table *kmod_table, struct depmod_index *depmod) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - struct kernel_module_iterator kmod_it; + struct kernel_module_iterator kmod_it; err = kernel_module_iterator_init(&kmod_it, prog); if (err) { kernel_module_iterator_error: - return drgn_dwarf_index_report_error(dindex, "kernel modules", - "could not find loaded kernel modules", - err); + return drgn_debug_info_report_error(load, "kernel modules", + "could not find loaded kernel modules", + err); } for (;;) { err = kernel_module_iterator_next(&kmod_it); @@ -1291,8 +1285,8 @@ report_loaded_kernel_modules(struct drgn_program *prog, /* Look for an explicitly-reported file first. */ if (kmod_table) { - err = report_loaded_kernel_module(prog, dindex, - &kmod_it, kmod_table); + err = report_loaded_kernel_module(load, &kmod_it, + kmod_table); if (!err) continue; else if (err != &drgn_not_found) @@ -1305,24 +1299,24 @@ report_loaded_kernel_modules(struct drgn_program *prog, * already indexed that module. */ if (depmod && - !drgn_dwarf_index_is_indexed(dindex, kmod_it.name)) { + !drgn_debug_info_is_indexed(load->dbinfo, kmod_it.name)) { if (!depmod->modules_dep.ptr) { err = depmod_index_init(depmod, prog->vmcoreinfo.osrelease); if (err) { depmod->modules_dep.ptr = NULL; - err = drgn_dwarf_index_report_error(dindex, - "kernel modules", - "could not read depmod", - err); + err = drgn_debug_info_report_error(load, + "kernel modules", + "could not read depmod", + err); if (err) break; depmod = NULL; continue; } } - err = report_default_kernel_module(prog, dindex, - &kmod_it, depmod); + err = report_default_kernel_module(load, &kmod_it, + depmod); if (err) break; } @@ -1332,15 +1326,14 @@ report_loaded_kernel_modules(struct drgn_program *prog, } static struct drgn_error * -report_kernel_modules(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_kernel_modules(struct drgn_debug_info_load_state *load, struct kernel_module_file *kmods, size_t num_kmods, - bool report_default, bool need_module_definition, - bool vmlinux_is_pending) + bool need_module_definition, bool vmlinux_is_pending) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - if (!num_kmods && !report_default) + if (!num_kmods && !load->load_default) return NULL; /* @@ -1352,7 +1345,7 @@ report_kernel_modules(struct drgn_program *prog, */ if (vmlinux_is_pending && (!(prog->flags & DRGN_PROGRAM_IS_LIVE) || need_module_definition)) { - err = drgn_dwarf_index_flush(dindex, false); + err = drgn_debug_info_report_flush(load); if (err) return err; } @@ -1368,10 +1361,10 @@ report_kernel_modules(struct drgn_program *prog, "name", &name_member); } if (err) { - return drgn_dwarf_index_report_error(dindex, - "kernel modules", - "could not get kernel module names", - err); + return drgn_debug_info_report_error(load, + "kernel modules", + "could not get kernel module names", + err); } module_name_offset = name_member.bit_offset / 8; } @@ -1387,18 +1380,18 @@ report_kernel_modules(struct drgn_program *prog, module_name_offset, &kmod->name); if (err) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - NULL, err); + err = drgn_debug_info_report_error(load, + kmod->path, + NULL, err); if (err) goto out; continue; } if (!kmod->name) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - "could not find kernel module name", - NULL); + err = drgn_debug_info_report_error(load, + kmod->path, + "could not find kernel module name", + NULL); if (err) goto out; continue; @@ -1422,9 +1415,9 @@ report_kernel_modules(struct drgn_program *prog, } } - err = report_loaded_kernel_modules(prog, dindex, + err = report_loaded_kernel_modules(load, num_kmods ? &kmod_table : NULL, - report_default ? &depmod : NULL); + load->load_default ? &depmod : NULL); if (err) goto out; @@ -1433,10 +1426,9 @@ report_kernel_modules(struct drgn_program *prog, struct kernel_module_file *kmod = *it.entry; it = kernel_module_table_delete_iterator(&kmod_table, it); do { - err = drgn_dwarf_index_report_elf(dindex, kmod->path, - kmod->fd, kmod->elf, - 0, 0, kmod->name, - NULL); + err = drgn_debug_info_report_elf(load, kmod->path, + kmod->fd, kmod->elf, 0, + 0, kmod->name, NULL); kmod->elf = NULL; kmod->fd = -1; if (err) @@ -1452,9 +1444,9 @@ report_kernel_modules(struct drgn_program *prog, return err; } -static struct drgn_error *report_vmlinux(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - bool *vmlinux_is_pending) +static struct drgn_error * +report_vmlinux(struct drgn_debug_info_load_state *load, + bool *vmlinux_is_pending) { static const char * const vmlinux_paths[] = { /* @@ -1468,54 +1460,49 @@ static struct drgn_error *report_vmlinux(struct drgn_program *prog, "/lib/modules/%s/vmlinux", NULL, }; + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; + char *path; int fd; Elf *elf; - uint64_t start, end; - err = find_elf_file(&path, &fd, &elf, vmlinux_paths, prog->vmcoreinfo.osrelease); if (err) - return drgn_dwarf_index_report_error(dindex, NULL, NULL, err); + return drgn_debug_info_report_error(load, NULL, NULL, err); if (!elf) { err = drgn_error_format(DRGN_ERROR_OTHER, "could not find vmlinux for %s", prog->vmcoreinfo.osrelease); - return drgn_dwarf_index_report_error(dindex, "kernel", NULL, - err); + return drgn_debug_info_report_error(load, "kernel", NULL, err); } + uint64_t start, end; err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, &end); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, err); + err = drgn_debug_info_report_error(load, path, NULL, err); elf_end(elf); close(fd); free(path); return err; } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, start, end, - "kernel", vmlinux_is_pending); + err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, + "kernel", vmlinux_is_pending); free(path); return err; } struct drgn_error * -linux_kernel_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default, bool report_main) +linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - struct kernel_module_file *kmods; - size_t i, num_kmods = 0; - bool need_module_definition = false; - bool vmlinux_is_pending = false; - if (n) { - kmods = malloc_array(n, sizeof(*kmods)); + struct kernel_module_file *kmods; + if (load->num_paths) { + kmods = malloc_array(load->num_paths, sizeof(*kmods)); if (!kmods) return &drgn_enomem; } else { @@ -1527,27 +1514,29 @@ linux_kernel_report_debug_info(struct drgn_program *prog, * modules. So, this sets aside kernel modules and reports everything * else. */ - for (i = 0; i < n; i++) { - const char *path = paths[i]; + size_t num_kmods = 0; + bool need_module_definition = false; + bool vmlinux_is_pending = false; + for (size_t i = 0; i < load->num_paths; i++) { + const char *path = load->paths[i]; int fd; Elf *elf; - Elf_Scn *this_module_scn, *modinfo_scn; - bool is_vmlinux; - err = open_elf_file(path, &fd, &elf); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - err); + err = drgn_debug_info_report_error(load, path, NULL, + err); if (err) goto out; continue; } + Elf_Scn *this_module_scn, *modinfo_scn; + bool is_vmlinux; err = identify_kernel_elf(elf, &this_module_scn, &modinfo_scn, &is_vmlinux); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - err); + err = drgn_debug_info_report_error(load, path, NULL, + err); elf_end(elf); close(fd); if (err) @@ -1556,16 +1545,14 @@ linux_kernel_report_debug_info(struct drgn_program *prog, } if (this_module_scn || modinfo_scn) { struct kernel_module_file *kmod = &kmods[num_kmods++]; - kmod->path = path; kmod->fd = fd; kmod->elf = elf; err = get_kernel_module_name_from_modinfo(modinfo_scn, &kmod->name); if (err) { - err = drgn_dwarf_index_report_error(dindex, - path, NULL, - err); + err = drgn_debug_info_report_error(load, path, + NULL, err); if (err) goto out; continue; @@ -1576,49 +1563,46 @@ linux_kernel_report_debug_info(struct drgn_program *prog, } } else if (is_vmlinux) { uint64_t start, end; - bool is_new; - err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, &end); if (err) { elf_end(elf); close(fd); - err = drgn_dwarf_index_report_error(dindex, - path, NULL, - err); + err = drgn_debug_info_report_error(load, path, + NULL, err); if (err) goto out; continue; } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, - start, end, "kernel", - &is_new); + bool is_new; + err = drgn_debug_info_report_elf(load, path, fd, elf, + start, end, "kernel", + &is_new); if (err) goto out; if (is_new) vmlinux_is_pending = true; } else { - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, - 0, 0, NULL, NULL); + err = drgn_debug_info_report_elf(load, path, fd, elf, 0, + 0, NULL, NULL); if (err) goto out; } } - if (report_main && !vmlinux_is_pending && - !drgn_dwarf_index_is_indexed(dindex, "kernel")) { - err = report_vmlinux(prog, dindex, &vmlinux_is_pending); + if (load->load_main && !vmlinux_is_pending && + !drgn_debug_info_is_indexed(load->dbinfo, "kernel")) { + err = report_vmlinux(load, &vmlinux_is_pending); if (err) goto out; } - err = report_kernel_modules(prog, dindex, kmods, num_kmods, - report_default, need_module_definition, - vmlinux_is_pending); + err = report_kernel_modules(load, kmods, num_kmods, + need_module_definition, vmlinux_is_pending); out: - for (i = 0; i < num_kmods; i++) { + for (size_t i = 0; i < num_kmods; i++) { elf_end(kmods[i].elf); if (kmods[i].fd != -1) close(kmods[i].fd); diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index c467bf27c..657962a69 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -8,7 +8,7 @@ #include "drgn.h" -struct drgn_dwarf_index; +struct drgn_debug_info_load_state; struct drgn_memory_reader; struct vmcoreinfo; @@ -33,10 +33,7 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, void *arg, struct drgn_object *ret); struct drgn_error * -linux_kernel_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default, bool report_main); +linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load); #define KDUMP_SIGNATURE "KDUMP " #define KDUMP_SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) diff --git a/libdrgn/program.c b/libdrgn/program.c index eb924b2f5..a33ce9bb8 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -523,22 +523,14 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, - struct drgn_dwarf_index **ret) +struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, + struct drgn_debug_info **ret) { struct drgn_error *err; if (!prog->_dbinfo) { - const Dwfl_Callbacks *dwfl_callbacks; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - dwfl_callbacks = &drgn_dwfl_callbacks; - else if (prog->flags & DRGN_PROGRAM_IS_LIVE) - dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; - else - dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - struct drgn_debug_info *dbinfo; - err = drgn_debug_info_create(prog, dwfl_callbacks, &dbinfo); + err = drgn_debug_info_create(prog, &dbinfo); if (err) return err; err = drgn_program_add_object_finder(prog, @@ -558,82 +550,18 @@ static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, } prog->_dbinfo = dbinfo; } - *ret = &prog->_dbinfo->dindex; - return NULL; -} - -struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_index *dindex; - - err = drgn_program_get_dindex(prog, &dindex); - if (err) - return err; - *ret = dindex->dwfl; - return NULL; -} - -static struct drgn_error * -userspace_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default) -{ - struct drgn_error *err; - size_t i; - - for (i = 0; i < n; i++) { - int fd; - Elf *elf; - - err = open_elf_file(paths[i], &fd, &elf); - if (err) { - err = drgn_dwarf_index_report_error(dindex, paths[i], - NULL, err); - if (err) - return err; - continue; - } - /* - * We haven't implemented a way to get the load address for - * anything reported here, so for now we report it as unloaded. - */ - err = drgn_dwarf_index_report_elf(dindex, paths[i], fd, elf, 0, - 0, NULL, NULL); - if (err) - return err; - } - - if (report_default) { - if (prog->flags & DRGN_PROGRAM_IS_LIVE) { - int ret; - - ret = dwfl_linux_proc_report(dindex->dwfl, prog->pid); - if (ret == -1) { - return drgn_error_libdwfl(); - } else if (ret) { - return drgn_error_create_os("dwfl_linux_proc_report", - ret, NULL); - } - } else if (dwfl_core_file_report(dindex->dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); - } - } + *ret = prog->_dbinfo; return NULL; } /* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_program *prog, - struct drgn_dwarf_index *dindex) +static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) { struct drgn_error *err; struct drgn_dwarf_index_iterator it; static const uint64_t tags[] = { DW_TAG_subprogram }; - - err = drgn_dwarf_index_iterator_init(&it, &dindex->global, "main", - strlen("main"), tags, + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, + "main", strlen("main"), tags, ARRAY_SIZE(tags)); if (err) { drgn_error_destroy(err); @@ -655,7 +583,7 @@ static void drgn_program_set_language_from_main(struct drgn_program *prog, continue; } if (lang) { - prog->lang = lang; + dbinfo->prog->lang = lang; break; } } @@ -686,40 +614,22 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, size_t n, bool load_default, bool load_main) { struct drgn_error *err; - struct drgn_dwarf_index *dindex; - bool report_from_dwfl; if (!n && !load_default && !load_main) return NULL; - if (load_default) - load_main = true; - - err = drgn_program_get_dindex(prog, &dindex); + struct drgn_debug_info *dbinfo; + err = drgn_program_get_dbinfo(prog, &dbinfo); if (err) return err; - drgn_dwarf_index_report_begin(dindex); - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - err = linux_kernel_report_debug_info(prog, dindex, paths, n, - load_default, load_main); - } else { - err = userspace_report_debug_info(prog, dindex, paths, n, - load_default); - } - if (err) { - drgn_dwarf_index_report_abort(dindex); - return err; - } - report_from_dwfl = (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && - load_main); - err = drgn_dwarf_index_report_end(dindex, report_from_dwfl); + err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { if (!prog->lang && !(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) - drgn_program_set_language_from_main(prog, dindex); + drgn_program_set_language_from_main(dbinfo); if (!prog->has_platform) { - dwfl_getdwarf(dindex->dwfl, + dwfl_getdwarf(dbinfo->dwfl, drgn_set_platform_from_dwarf, prog, 0); } } @@ -1139,14 +1049,9 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, Dwfl_Module *module, struct drgn_symbol *ret) { - const char *name; - GElf_Off offset; - GElf_Sym elf_sym; - if (!module) { if (prog->_dbinfo) { - module = dwfl_addrmodule(prog->_dbinfo->dindex.dwfl, - address); + module = dwfl_addrmodule(prog->_dbinfo->dwfl, address); if (!module) return false; } else { @@ -1154,8 +1059,10 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, } } - name = dwfl_module_addrinfo(module, address, &offset, &elf_sym, NULL, - NULL, NULL); + GElf_Off offset; + GElf_Sym elf_sym; + const char *name = dwfl_module_addrinfo(module, address, &offset, + &elf_sym, NULL, NULL, NULL); if (!name) return false; ret->name = name; @@ -1244,7 +1151,7 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, }; if (prog->_dbinfo && - dwfl_getmodules(prog->_dbinfo->dindex.dwfl, find_symbol_by_name_cb, + dwfl_getmodules(prog->_dbinfo->dwfl, find_symbol_by_name_cb, &arg, 0)) return arg.err; return drgn_error_format(DRGN_ERROR_LOOKUP, diff --git a/libdrgn/program.h b/libdrgn/program.h index 202a73586..cf2d87729 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -250,7 +250,8 @@ drgn_program_word_size(struct drgn_program *prog, uint8_t *ret) return NULL; } -struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret); +struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, + struct drgn_debug_info **ret); /** * Find the @c NT_PRSTATUS note for the given CPU. diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index e8e6ba7dc..e9f84bab4 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -9,6 +9,7 @@ #include #include "internal.h" +#include "debug_info.h" #include "helpers.h" #include "program.h" #include "string_builder.h" @@ -453,9 +454,6 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, struct drgn_stack_trace **ret) { struct drgn_error *err; - Dwfl *dwfl; - Dwfl_Thread *thread; - struct drgn_stack_trace *trace; if (!prog->has_platform) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, @@ -467,11 +465,12 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, "stack unwinding is not yet supported for live processes"); } - err = drgn_program_get_dwfl(prog, &dwfl); + struct drgn_debug_info *dbinfo; + err = drgn_program_get_dbinfo(prog, &dbinfo); if (err) return err; if (!prog->attached_dwfl_state) { - if (!dwfl_attach_state(dwfl, NULL, 0, + if (!dwfl_attach_state(dbinfo->dwfl, NULL, 0, &drgn_linux_kernel_thread_callbacks, prog)) return drgn_error_libdwfl(); @@ -480,7 +479,8 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, prog->stack_trace_tid = tid; prog->stack_trace_obj = obj; - thread = dwfl_attach_thread(dwfl, STACK_TRACE_OBJ_TID); + Dwfl_Thread *thread = dwfl_attach_thread(dbinfo->dwfl, + STACK_TRACE_OBJ_TID); prog->stack_trace_obj = NULL; prog->stack_trace_tid = 0; if (prog->stack_trace_err) @@ -490,7 +490,8 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, goto err; } - trace = malloc(sizeof(*trace) + sizeof(trace->frames[0])); + struct drgn_stack_trace *trace = malloc(sizeof(*trace) + + sizeof(trace->frames[0])); if (!trace) { err = &drgn_enomem; goto err; From e69d0c00640a2898c5737dd5eb0a23d3c9848a89 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Sep 2020 17:47:17 -0700 Subject: [PATCH 52/56] libdrgn: dwarf_index: fix use after free of pending CU If we create a pending CU for a namespace, then add more CUs to the index, the CU might get reallocated, resulting in a use after free. Fix it by storing the index of the CU instead of the pointer. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index adf0a5a05..9d8c73603 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -98,8 +98,8 @@ DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) /* DIE which needs to be indexed. */ struct drgn_dwarf_index_pending_die { - /* Compilation unit containing DIE. */ - struct drgn_dwarf_index_cu *cu; + /* Index of compilation unit containing DIE. */ + size_t cu; /* Offset of DIE in .debug_info. */ size_t offset; }; @@ -1173,7 +1173,7 @@ static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, err = &drgn_enomem; goto err; } - pending->cu = cu; + pending->cu = cu - ns->dindex->cus.data; pending->offset = offset; } err = NULL; @@ -1537,11 +1537,12 @@ static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) if (!err) { struct drgn_dwarf_index_pending_die *pending = &ns->pending_dies.data[i]; - const char *ptr = - section_ptr(pending->cu->module->debug_info, - pending->offset); + struct drgn_dwarf_index_cu *cu = + &ns->dindex->cus.data[pending->cu]; + const char *ptr = section_ptr(cu->module->debug_info, + pending->offset); struct drgn_error *cu_err = - index_cu_second_pass(ns, pending->cu, ptr); + index_cu_second_pass(ns, cu, ptr); if (cu_err) { #pragma omp critical(drgn_index_namespace) if (err) From 89b5da2abb52568d257cc36fe38b12c5e651f049 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Sep 2020 17:53:08 -0700 Subject: [PATCH 53/56] libdrgn: dwarf_index: free namespaces when rolling back Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 9d8c73603..7a65c1f57 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -1447,8 +1447,11 @@ static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) struct drgn_debug_info_module *module = *userdatap; if (module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) break; - else - shard->dies.size--; + if (die->tag == DW_TAG_namespace) { + drgn_dwarf_index_namespace_deinit(die->namespace); + free(die->namespace); + } + shard->dies.size--; } /* From fdbe3363867825c3f359392d2b71eb46c893cedd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 22 Sep 2020 15:45:10 -0700 Subject: [PATCH 54/56] libdrgn: use -isystem for elfutils headers The elfutils header files should be treated as if they were in the standard location, so use -isystem instead of -I. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index c286dddce..0e7e5590f 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -7,7 +7,7 @@ SUBDIRS = elfutils include .DELETE_ON_ERROR: -AM_CPPFLAGS = -Iinclude -D_GNU_SOURCE +AM_CPPFLAGS = -isystem include -D_GNU_SOURCE include_HEADERS = drgn.h From 286c09844eda99e0611aa3b1d55753c28b6c96e4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 23 Sep 2020 16:02:02 -0700 Subject: [PATCH 55/56] Clean up #includes with include-what-you-use I recently hit a couple of CI failures caused by relying on transitive includes that weren't always present. include-what-you-use is a Clang-based tool that helps with this. It's a bit finicky and noisy, so this adds scripts/iwyu.py to make running it more convenient (but not reliable enough to automate it in Travis). This cleans up all reasonable include-what-you-use warnings and reorganizes a few header files. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 3 +- libdrgn/arch_x86_64.c.in | 7 +- libdrgn/binary_search_tree.h | 3 + libdrgn/debug_info.c | 143 ++++++++++++++++++++- libdrgn/debug_info.h | 13 +- libdrgn/drgn.h.in | 10 +- libdrgn/dwarf_index.c | 5 +- libdrgn/error.c | 7 +- libdrgn/error.h | 2 - libdrgn/hash_table.h | 2 +- libdrgn/helpers.h | 6 + libdrgn/internal.c | 146 --------------------- libdrgn/kdump.c | 5 +- libdrgn/language.c | 4 +- libdrgn/language.h | 2 +- libdrgn/language_c.c | 8 +- libdrgn/lexer.c | 2 +- libdrgn/lexer.h | 5 +- libdrgn/linux_kernel.c | 14 ++- libdrgn/linux_kernel.h | 2 - libdrgn/linux_kernel_helpers.c | 7 +- libdrgn/memory_reader.c | 5 +- libdrgn/memory_reader.h | 5 +- libdrgn/mread.h | 2 + libdrgn/object.c | 6 +- libdrgn/object.h | 2 - libdrgn/object_index.c | 3 +- libdrgn/path.c | 5 +- libdrgn/{internal.h => path.h} | 45 ++----- libdrgn/platform.c | 5 +- libdrgn/platform.h | 1 + libdrgn/program.c | 15 ++- libdrgn/program.h | 10 +- libdrgn/python/drgnpy.h | 4 +- libdrgn/python/helpers.c | 1 + libdrgn/python/module.c | 5 +- libdrgn/python/object.c | 5 +- libdrgn/python/program.c | 3 + libdrgn/python/symbol.c | 2 + libdrgn/python/test.c | 3 +- libdrgn/python/type.c | 4 + libdrgn/python/util.c | 2 + libdrgn/serialize.c | 4 +- libdrgn/splay_tree.c | 2 +- libdrgn/stack_trace.c | 14 ++- libdrgn/string_builder.c | 2 +- libdrgn/string_builder.h | 1 + libdrgn/symbol.c | 4 +- libdrgn/type.c | 5 +- libdrgn/type.h | 5 +- libdrgn/util.h | 4 + libdrgn/vector.h | 4 +- scripts/iwyu.py | 224 +++++++++++++++++++++++++++++++++ 53 files changed, 543 insertions(+), 255 deletions(-) delete mode 100644 libdrgn/internal.c rename libdrgn/{internal.h => path.h} (77%) create mode 100755 scripts/iwyu.py diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 0e7e5590f..29651e1a7 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -28,8 +28,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ error.h \ hash_table.c \ hash_table.h \ - internal.c \ - internal.h \ language.c \ language.h \ language_c.c \ @@ -46,6 +44,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ object_index.c \ object_index.h \ path.c \ + path.h \ platform.c \ platform.h \ program.c \ diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in index 1bec2e2c1..a81950a8c 100644 --- a/libdrgn/arch_x86_64.c.in +++ b/libdrgn/arch_x86_64.c.in @@ -3,11 +3,16 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include +#include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "linux_kernel.h" #include "platform.h" #include "program.h" +#include "util.h" %} x86-64 diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index 0fa378435..3cca5c7dd 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -12,6 +12,9 @@ #ifndef DRGN_BINARY_SEARCH_TREE_H #define DRGN_BINARY_SEARCH_TREE_H +#include +#include + #include "util.h" /** diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index cc0265b5a..fde1a6d9c 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -6,20 +6,27 @@ #include #include #include +#include +#include #include #include +#include +#include #include #include #include -#include "internal.h" +#include "cityhash.h" #include "debug_info.h" +#include "error.h" #include "hash_table.h" #include "language.h" #include "linux_kernel.h" #include "object.h" +#include "path.h" #include "program.h" #include "type.h" +#include "util.h" #include "vector.h" DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) @@ -2544,3 +2551,137 @@ void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) dwfl_end(dbinfo->dwfl); free(dbinfo); } + +struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) +{ + struct drgn_error *err; + + *fd_ret = open(path, O_RDONLY); + if (*fd_ret == -1) + return drgn_error_create_os("open", errno, path); + *elf_ret = dwelf_elf_begin(*fd_ret); + if (!*elf_ret) { + err = drgn_error_libelf(); + goto err_fd; + } + if (elf_kind(*elf_ret) != ELF_K_ELF) { + err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); + goto err_elf; + } + return NULL; + +err_elf: + elf_end(*elf_ret); +err_fd: + close(*fd_ret); + return err; +} + +struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, + const char * const *path_formats, ...) +{ + struct drgn_error *err; + size_t i; + + for (i = 0; path_formats[i]; i++) { + va_list ap; + int ret; + char *path; + int fd; + Elf *elf; + + va_start(ap, path_formats); + ret = vasprintf(&path, path_formats[i], ap); + va_end(ap); + if (ret == -1) + return &drgn_enomem; + fd = open(path, O_RDONLY); + if (fd == -1) { + free(path); + continue; + } + elf = dwelf_elf_begin(fd); + if (!elf) { + close(fd); + free(path); + continue; + } + if (elf_kind(elf) != ELF_K_ELF) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s: not an ELF file", path); + elf_end(elf); + close(fd); + free(path); + return err; + } + *path_ret = path; + *fd_ret = fd; + *elf_ret = elf; + return NULL; + } + *path_ret = NULL; + *fd_ret = -1; + *elf_ret = NULL; + return NULL; +} + +struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) +{ + GElf_Shdr shdr_mem, *shdr; + Elf_Data *data; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if ((shdr->sh_flags & SHF_COMPRESSED) && elf_compress(scn, 0, 0) < 0) + return drgn_error_libelf(); + data = elf_getdata(scn, NULL); + if (!data) + return drgn_error_libelf(); + *ret = data; + return NULL; +} + +struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, + uint64_t *start_ret, uint64_t *end_ret) +{ + uint64_t start = UINT64_MAX, end = 0; + size_t phnum, i; + + /* + * Get the minimum and maximum addresses from the PT_LOAD segments. We + * ignore memory ranges that start beyond UINT64_MAX, and we truncate + * ranges that end beyond UINT64_MAX. + */ + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr; + uint64_t segment_start, segment_end; + + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) + continue; + if (__builtin_add_overflow(phdr->p_vaddr, bias, + &segment_start)) + continue; + if (__builtin_add_overflow(segment_start, phdr->p_memsz, + &segment_end)) + segment_end = UINT64_MAX; + if (segment_start < segment_end) { + if (segment_start < start) + start = segment_start; + if (segment_end > end) + end = segment_end; + } + } + if (start >= end) { + return drgn_error_create(DRGN_ERROR_OTHER, + "ELF file has no loadable segments"); + } + *start_ret = start; + *end_ret = end; + return NULL; +} diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index bca4be64b..ab0545c4f 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -14,9 +14,6 @@ #include #include -#include -#include -#include #include "drgn.h" #include "dwarf_index.h" @@ -273,6 +270,16 @@ drgn_debug_info_find_object(const char *name, size_t name_len, enum drgn_find_object_flags flags, void *arg, struct drgn_object *ret); +struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); + +struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, + const char * const *path_formats, ...); + +struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); + +struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, + uint64_t *start_ret, uint64_t *end_ret); + /** @} */ #endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 901aefc62..cfa867f4f 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -10,11 +10,14 @@ #ifndef DRGN_H #define DRGN_H -#include -#include +// IWYU pragma: begin_exports #include #include #include +// IWYU pragma: end_exports + +#include +#include #include #include @@ -248,9 +251,6 @@ void drgn_error_destroy(struct drgn_error *err); /** @} */ -struct drgn_type; -struct drgn_type_thunk; - /** * @ingroup Types * diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 7a65c1f57..5c7d05b37 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -8,11 +8,14 @@ #include #include -#include "internal.h" #include "debug_info.h" +#include "drgn.h" #include "dwarf_index.h" +#include "error.h" #include "mread.h" +#include "path.h" #include "siphash.h" +#include "util.h" /* * The DWARF abbreviation table gets translated into a series of instructions. diff --git a/libdrgn/error.c b/libdrgn/error.c index e230f92be..3a266492b 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -1,8 +1,9 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include #include +#include #include #include #include @@ -10,8 +11,10 @@ #include #include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "string_builder.h" +#include "util.h" LIBDRGN_PUBLIC struct drgn_error drgn_enomem = { .code = DRGN_ERROR_NO_MEMORY, diff --git a/libdrgn/error.h b/libdrgn/error.h index fe44b6665..4ef8adb13 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -12,8 +12,6 @@ #ifndef DRGN_ERROR_H #define DRGN_ERROR_H -#include - #include "drgn.h" /** diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 732232796..97961c1ea 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -13,7 +13,7 @@ #define DRGN_HASH_TABLE_H #ifdef __SSE2__ -#include +#include // IWYU pragma: keep #endif #ifdef __SSE4_2__ #include diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 0f844a74f..2251e12a7 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -13,6 +13,12 @@ #ifndef DRGN_HELPERS_H #define DRGN_HELPERS_H +#include +#include + +struct drgn_object; +struct drgn_program; + struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count); diff --git a/libdrgn/internal.c b/libdrgn/internal.c deleted file mode 100644 index 4c2e42aac..000000000 --- a/libdrgn/internal.c +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "internal.h" - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) -{ - struct drgn_error *err; - - *fd_ret = open(path, O_RDONLY); - if (*fd_ret == -1) - return drgn_error_create_os("open", errno, path); - *elf_ret = dwelf_elf_begin(*fd_ret); - if (!*elf_ret) { - err = drgn_error_libelf(); - goto err_fd; - } - if (elf_kind(*elf_ret) != ELF_K_ELF) { - err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); - goto err_elf; - } - return NULL; - -err_elf: - elf_end(*elf_ret); -err_fd: - close(*fd_ret); - return err; -} - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...) -{ - struct drgn_error *err; - size_t i; - - for (i = 0; path_formats[i]; i++) { - va_list ap; - int ret; - char *path; - int fd; - Elf *elf; - - va_start(ap, path_formats); - ret = vasprintf(&path, path_formats[i], ap); - va_end(ap); - if (ret == -1) - return &drgn_enomem; - fd = open(path, O_RDONLY); - if (fd == -1) { - free(path); - continue; - } - elf = dwelf_elf_begin(fd); - if (!elf) { - close(fd); - free(path); - continue; - } - if (elf_kind(elf) != ELF_K_ELF) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: not an ELF file", path); - elf_end(elf); - close(fd); - free(path); - return err; - } - *path_ret = path; - *fd_ret = fd; - *elf_ret = elf; - return NULL; - } - *path_ret = NULL; - *fd_ret = -1; - *elf_ret = NULL; - return NULL; -} - -struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) -{ - GElf_Shdr shdr_mem, *shdr; - Elf_Data *data; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - if ((shdr->sh_flags & SHF_COMPRESSED) && elf_compress(scn, 0, 0) < 0) - return drgn_error_libelf(); - data = elf_getdata(scn, NULL); - if (!data) - return drgn_error_libelf(); - *ret = data; - return NULL; -} - -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret) -{ - uint64_t start = UINT64_MAX, end = 0; - size_t phnum, i; - - /* - * Get the minimum and maximum addresses from the PT_LOAD segments. We - * ignore memory ranges that start beyond UINT64_MAX, and we truncate - * ranges that end beyond UINT64_MAX. - */ - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr; - uint64_t segment_start, segment_end; - - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) - continue; - if (__builtin_add_overflow(phdr->p_vaddr, bias, - &segment_start)) - continue; - if (__builtin_add_overflow(segment_start, phdr->p_memsz, - &segment_end)) - segment_end = UINT64_MAX; - if (segment_start < segment_end) { - if (segment_start < start) - start = segment_start; - if (segment_end > end) - end = segment_end; - } - } - if (start >= end) { - return drgn_error_create(DRGN_ERROR_OTHER, - "ELF file has no loadable segments"); - } - *start_ret = start; - *end_ret = end; - return NULL; -} diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index b352f5e9b..2155bc34a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -1,13 +1,12 @@ // Copyright 2019 - Serapheim Dimitropoulos // SPDX-License-Identifier: GPL-3.0+ -#include +#include #include -#include #include #include "linux_kernel.h" -#include "program.h" +#include "program.h" // IWYU pragma: associated static struct drgn_error *drgn_platform_from_kdump(kdump_ctx_t *ctx, struct drgn_platform *ret) diff --git a/libdrgn/language.c b/libdrgn/language.c index 09e7f6cc4..bfbd86461 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -1,7 +1,9 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "internal.h" +#include + +#include "error.h" #include "language.h" const struct drgn_language drgn_languages[] = { diff --git a/libdrgn/language.h b/libdrgn/language.h index 89aef6972..81280d42b 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -12,7 +12,7 @@ #ifndef DRGN_LANGUAGE_H #define DRGN_LANGUAGE_H -#include +#include #include "drgn.h" diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index a13f5d3cd..48f09eb77 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1,16 +1,18 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include #include #include #include #include #include +#include -#include "internal.h" +#include "error.h" #include "hash_table.h" -#include "language.h" +#include "language.h" // IWYU pragma: associated #include "lexer.h" #include "memory_reader.h" #include "object.h" @@ -18,6 +20,8 @@ #include "string_builder.h" #include "symbol.h" #include "type.h" +#include "util.h" +#include "vector.h" static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, diff --git a/libdrgn/lexer.c b/libdrgn/lexer.c index 426f72098..1463b1f3e 100644 --- a/libdrgn/lexer.c +++ b/libdrgn/lexer.c @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "internal.h" +#include "drgn.h" #include "lexer.h" DEFINE_VECTOR_FUNCTIONS(drgn_token_vector) diff --git a/libdrgn/lexer.h b/libdrgn/lexer.h index 62511e8a8..37910d6f8 100644 --- a/libdrgn/lexer.h +++ b/libdrgn/lexer.h @@ -30,7 +30,6 @@ * @{ */ -struct drgn_error; struct drgn_lexer; struct drgn_token; @@ -126,6 +125,10 @@ struct drgn_error *drgn_lexer_push(struct drgn_lexer *lexer, struct drgn_error *drgn_lexer_peek(struct drgn_lexer *lexer, struct drgn_token *token); +/* Exported only for testing. */ +struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, + struct drgn_token *token); + /** @} */ #endif /* DRGN_LEXER_H */ diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 0c0a445a1..3220259e0 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -2,6 +2,10 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include +#include +#include #include #include #include @@ -10,14 +14,20 @@ #include #include #include -#include -#include "internal.h" #include "debug_info.h" +#include "drgn.h" +#include "error.h" +#include "hash_table.h" #include "helpers.h" +#include "language.h" #include "linux_kernel.h" +#include "memory_reader.h" #include "mread.h" +#include "platform.h" #include "program.h" +#include "type.h" +#include "util.h" struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, size_t count, uint64_t offset, diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 657962a69..330b363e0 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -4,8 +4,6 @@ #ifndef DRGN_LINUX_KERNEL_H #define DRGN_LINUX_KERNEL_H -#include - #include "drgn.h" struct drgn_debug_info_load_state; diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index a04454aa6..5666af9bf 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -1,11 +1,14 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include +#include #include -#include -#include "internal.h" +#include "drgn.h" +#include "platform.h" #include "program.h" +#include "util.h" struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 48682c3ce..d02eacfaa 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,12 +1,13 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include #include #include -#include "internal.h" #include "memory_reader.h" +#include "util.h" DEFINE_BINARY_SEARCH_TREE_FUNCTIONS(drgn_memory_segment_tree, binary_search_tree_scalar_cmp, splay) diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index f6d94a332..5eb095319 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -12,11 +12,8 @@ #ifndef DRGN_MEMORY_READER_H #define DRGN_MEMORY_READER_H -#include -#include -#include - #include "binary_search_tree.h" +#include "drgn.h" /** * @ingroup Internals diff --git a/libdrgn/mread.h b/libdrgn/mread.h index dbb9f6a9d..bf1764cff 100644 --- a/libdrgn/mread.h +++ b/libdrgn/mread.h @@ -13,6 +13,8 @@ #define DRGN_MREAD_H #include +#include +#include #include /** diff --git a/libdrgn/object.c b/libdrgn/object.c index 498cd8eb6..f3c7871c5 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -1,18 +1,20 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include -#include #include #include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "language.h" #include "memory_reader.h" #include "object.h" #include "program.h" #include "serialize.h" #include "type.h" +#include "util.h" LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) diff --git a/libdrgn/object.h b/libdrgn/object.h index 6e6258c1b..670cf979a 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -12,8 +12,6 @@ #ifndef DRGN_OBJECT_H #define DRGN_OBJECT_H -#include - #include "drgn.h" #include "type.h" diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c index b2fc6296a..ca513775c 100644 --- a/libdrgn/object_index.c +++ b/libdrgn/object_index.c @@ -2,10 +2,9 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include -#include "internal.h" #include "object_index.h" -#include "type.h" void drgn_object_index_init(struct drgn_object_index *oindex) { diff --git a/libdrgn/path.c b/libdrgn/path.c index 6aa5ed690..a5b7a7278 100644 --- a/libdrgn/path.c +++ b/libdrgn/path.c @@ -2,9 +2,12 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include #include -#include "internal.h" +#include "path.h" +#include "util.h" bool path_iterator_next(struct path_iterator *it, const char **component, size_t *component_len) diff --git a/libdrgn/internal.h b/libdrgn/path.h similarity index 77% rename from libdrgn/internal.h rename to libdrgn/path.h index 0c4d37d1b..87bcce1e0 100644 --- a/libdrgn/internal.h +++ b/libdrgn/path.h @@ -4,46 +4,28 @@ /** * @file * - * Miscellanous internal drgn helpers. + * Paths. + * + * See @ref Paths. */ -#ifndef DRGN_INTERNAL_H -#define DRGN_INTERNAL_H +#ifndef DRGN_PATH_H +#define DRGN_PATH_H #include -#include -#include -#include +#include -#include "drgn.h" -#include "error.h" -#include "util.h" +#include /** * - * @defgroup Internals Internals + * @defgroup Paths Paths * - * Internal implementation. + * Utilities for working with paths. * * @{ */ -#ifndef LIBDRGN_PUBLIC -#define LIBDRGN_PUBLIC __attribute__((visibility("default"))) -#endif - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...); - -struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); - -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret); - -bool die_matches_filename(Dwarf_Die *die, const char *filename); - /** Path iterator input component. */ struct path_iterator_component { /** @@ -131,11 +113,8 @@ bool path_iterator_next(struct path_iterator *it, const char **component, bool path_ends_with(struct path_iterator *haystack, struct path_iterator *needle); -/** @} */ +bool die_matches_filename(Dwarf_Die *die, const char *filename); -struct drgn_lexer; -struct drgn_token; -struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, - struct drgn_token *token); +/** @} */ -#endif /* DRGN_INTERNAL_H */ +#endif /* DRGN_PATH_H */ diff --git a/libdrgn/platform.c b/libdrgn/platform.c index a2ba6df37..bee66ed5a 100644 --- a/libdrgn/platform.c +++ b/libdrgn/platform.c @@ -1,10 +1,11 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include -#include "internal.h" #include "platform.h" +#include "util.h" const struct drgn_architecture_info arch_info_unknown = { .name = "unknown", diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 8510cc45a..f3a76c384 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -5,6 +5,7 @@ #define DRGN_PLATFORM_H #include +#include #include "drgn.h" diff --git a/libdrgn/program.c b/libdrgn/program.c index a33ce9bb8..a0fbd1ef7 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1,29 +1,32 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include +#include +#include +#include +#include #include #include #include -#include #include #include #include +#include #include -#include -#include -#include -#include "internal.h" #include "debug_info.h" +#include "dwarf_index.h" +#include "error.h" #include "language.h" #include "linux_kernel.h" #include "memory_reader.h" #include "object_index.h" #include "program.h" -#include "string_builder.h" #include "symbol.h" #include "vector.h" +#include "util.h" DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) DEFINE_HASH_TABLE_FUNCTIONS(drgn_prstatus_map, hash_pair_int_type, diff --git a/libdrgn/program.h b/libdrgn/program.h index cf2d87729..81707d642 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -13,18 +13,24 @@ #define DRGN_PROGRAM_H #include +#include +#include #ifdef WITH_LIBKDUMPFILE #include #endif +#include "drgn.h" #include "hash_table.h" +#include "language.h" #include "memory_reader.h" #include "object_index.h" -#include "language.h" #include "platform.h" #include "type.h" #include "vector.h" +struct drgn_debug_info; +struct drgn_symbol; + /** * @ingroup Internals * @@ -59,8 +65,6 @@ DEFINE_VECTOR_TYPE(drgn_typep_vector, struct drgn_type *) DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct string) DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct string) -struct drgn_dwarf_info_cache; - struct drgn_program { /** @privatesection */ diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index a707dac57..ee31e3afa 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -6,12 +6,14 @@ #define PY_SSIZE_T_CLEAN -#include +// IWYU pragma: begin_exports #include #include "structmember.h" #include "docstrings.h" #include "../drgn.h" +// IWYU pragma: end_exports + #include "../hash_table.h" #include "../program.h" diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 8bfd69bfa..3e38bb90c 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -3,6 +3,7 @@ #include "drgnpy.h" #include "../helpers.h" +#include "../program.h" PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds) diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 48b741e63..d79b939ff 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,12 +1,13 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "drgnpy.h" -#include "../internal.h" #ifdef WITH_KDUMPFILE #include #endif +#include "drgnpy.h" +#include "../path.h" + PyObject *MissingDebugInfoError; PyObject *OutOfBoundsError; diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 73301d764..73a81a69d 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -1,13 +1,16 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include #include "drgnpy.h" #include "../error.h" #include "../object.h" +#include "../program.h" #include "../serialize.h" #include "../type.h" +#include "../util.h" static int DrgnObject_literal(struct drgn_object *res, PyObject *literal) { diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 96b04b0b1..bd9948354 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -2,7 +2,10 @@ // SPDX-License-Identifier: GPL-3.0+ #include "drgnpy.h" +#include "../hash_table.h" +#include "../program.h" #include "../vector.h" +#include "../util.h" DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, hash_pair_ptr_type, hash_table_scalar_eq) diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index 8fc2bfa26..2205b74c5 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -1,6 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog) diff --git a/libdrgn/python/test.c b/libdrgn/python/test.c index 2077a31ed..264d8b020 100644 --- a/libdrgn/python/test.c +++ b/libdrgn/python/test.c @@ -10,9 +10,8 @@ */ #include "drgnpy.h" - -#include "../internal.h" #include "../lexer.h" +#include "../path.h" #include "../serialize.h" DRGNPY_PUBLIC void drgn_test_lexer_init(struct drgn_lexer *lexer, diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 93e03cc7e..946f108ca 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -1,8 +1,12 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" +#include "../program.h" #include "../type.h" +#include "../util.h" static const char *drgn_type_kind_str(struct drgn_type *type) { diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index aa419c4bb..a84602c41 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,6 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" int append_string(PyObject *parts, const char *s) diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index 91cfa0f35..7aed8a842 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -2,11 +2,11 @@ // SPDX-License-Identifier: GPL-3.0+ #include -#include +#include #include -#include "internal.h" #include "serialize.h" +#include "util.h" void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint8_t bit_size, bool little_endian) diff --git a/libdrgn/splay_tree.c b/libdrgn/splay_tree.c index 7b216fc73..8241b8d9f 100644 --- a/libdrgn/splay_tree.c +++ b/libdrgn/splay_tree.c @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "binary_search_tree.h" +#include "binary_search_tree.h" // IWYU pragma: associated /* * Binary search tree splay operation based on the original paper [1]. Rotations diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index e9f84bab4..539f9dda7 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1,19 +1,25 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include -#include +#include +#include #include -#include #include #include +#include +#include -#include "internal.h" #include "debug_info.h" +#include "drgn.h" +#include "error.h" +#include "hash_table.h" #include "helpers.h" +#include "platform.h" #include "program.h" #include "string_builder.h" #include "symbol.h" +#include "type.h" +#include "util.h" struct drgn_stack_trace { struct drgn_program *prog; diff --git a/libdrgn/string_builder.c b/libdrgn/string_builder.c index 1fbf86b3c..21f9499b6 100644 --- a/libdrgn/string_builder.c +++ b/libdrgn/string_builder.c @@ -4,8 +4,8 @@ #include #include -#include "internal.h" #include "string_builder.h" +#include "util.h" bool string_builder_finalize(struct string_builder *sb, char **ret) { diff --git a/libdrgn/string_builder.h b/libdrgn/string_builder.h index a517c0686..ac35dcf99 100644 --- a/libdrgn/string_builder.h +++ b/libdrgn/string_builder.h @@ -13,6 +13,7 @@ #define DRGN_STRING_BUILDER_H #include +#include #include #include diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 369452c21..14278e33e 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -1,10 +1,12 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include +#include #include -#include "internal.h" #include "symbol.h" +#include "util.h" LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { diff --git a/libdrgn/type.c b/libdrgn/type.c index 764f50786..6917beae1 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1,13 +1,16 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include -#include "internal.h" +#include "cityhash.h" +#include "error.h" #include "hash_table.h" #include "language.h" #include "program.h" #include "type.h" +#include "util.h" const char * const drgn_type_kind_spelling[] = { [DRGN_TYPE_VOID] = "void", diff --git a/libdrgn/type.h b/libdrgn/type.h index 50b22563e..f5042310c 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -12,11 +12,14 @@ #ifndef DRGN_TYPE_H #define DRGN_TYPE_H +#include + #include "drgn.h" #include "hash_table.h" -#include "language.h" #include "vector.h" +struct drgn_language; + /** * @ingroup Internals * diff --git a/libdrgn/util.h b/libdrgn/util.h index 085999bdd..902b7b5f6 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -19,6 +19,10 @@ #include #include +#ifndef LIBDRGN_PUBLIC +#define LIBDRGN_PUBLIC __attribute__((visibility("default"))) +#endif + #ifdef NDEBUG #define UNREACHABLE() __builtin_unreachable() #else diff --git a/libdrgn/vector.h b/libdrgn/vector.h index f8771715c..d2ae7344a 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -13,8 +13,8 @@ #define DRGN_VECTOR_H #include -#include -#include +#include // IWYU pragma: keep +#include // IWYU pragma: keep /** * @ingroup Internals diff --git a/scripts/iwyu.py b/scripts/iwyu.py new file mode 100755 index 000000000..262204eca --- /dev/null +++ b/scripts/iwyu.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0+ + +import argparse +import json +import os +import os.path +import re +import subprocess +import sys +import sysconfig +import tempfile + +import yaml + +BUILD_BASE = "build/compile_commands" +CDB = BUILD_BASE + "/compile_commands.json" + +IWYU_REGEXES = [ + ("add", r"(.*) should add these lines:"), + ("remove", r"(.*) should remove these lines:"), + ("include_list", r"The full include-list for (.*):"), + ("none", r"---"), + ("none", r"\(.* has correct #includes/fwd-decls\)"), +] + + +# Python.h is the canonical header for the Python C API. The actual definitions +# come from internal header files, so we need an IWYU mapping file. Ideally we +# could do this with include mappings. Unfortunately, Python.h uses ""-style +# includes for those headers, one of which is "object.h". This conflicts with +# libdrgn's "object.h", and IWYU doesn't seem to have a way to distinguish +# between those in the mapping file. So, we generate symbol mappings with the +# find-all-symbols Clang tool. +def gen_python_mapping_file(mapping_path): + # These headers are guaranteed to be included by Python.h. See + # https://docs.python.org/3/c-api/intro.html#include-files. + IMPLIED_HEADERS = ( + "", + "", + "", + "", + "", + "", + ) + + include = sysconfig.get_path("include") + platinclude = sysconfig.get_path("platinclude") + + with open( + mapping_path + ".tmp", "w" + ) as imp, tempfile.TemporaryDirectory() as tmpdir: + imp.write("[\n") + for header in IMPLIED_HEADERS: + imp.write( + f' {{"include": ["{header}", "public", "", "public"]}},\n' + ) + + build_dir = os.path.join(tmpdir, "build") + os.mkdir(build_dir) + source = os.path.join(build_dir, "python.c") + with open(source, "w") as f: + f.write("#include ") + + commands = [ + { + "arguments": [ + "clang", + "-I", + include, + "-I", + platinclude, + "-c", + "python.c", + ], + "directory": build_dir, + "file": "python.c", + } + ] + with open(os.path.join(build_dir, "compile_commands.json"), "w") as f: + json.dump(commands, f) + + symbols_dir = os.path.join(tmpdir, "find_all_symbols") + os.mkdir(symbols_dir) + subprocess.check_call( + [ + "find-all-symbols", + "-p=" + build_dir, + "--output-dir=" + symbols_dir, + source, + ] + ) + + find_all_symbols_db = os.path.join(tmpdir, "find_all_symbols_db.yaml") + subprocess.check_call( + [ + "find-all-symbols", + "-p=" + build_dir, + "--merge-dir=" + symbols_dir, + find_all_symbols_db, + ] + ) + + with open(find_all_symbols_db, "r") as f: + for document in yaml.safe_load_all(f): + name = document["Name"] + path = document["FilePath"] + if path.startswith(include + "/"): + header = path[len(include) + 1 :] + elif path.startswith(platinclude + "/"): + header = path[len(platinclude) + 1 :] + else: + continue + if header == "pyconfig.h": + # Probably best not to use these. + continue + imp.write( + f' {{"symbol": ["{name}", "private", "", "public"]}}, # From {header}\n' + ) + # "cpython/object.h" defines struct _typeobject { ... } PyTypeObject. + # For some reason, include-what-you-mean wants struct _typeobject, but + # find-all-symbols only reports PyTypeObject. Add it manually. + imp.write( + f' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' + ) + + imp.write("]\n") + + os.rename(mapping_path + ".tmp", mapping_path) + + +def main(): + parser = argparse.ArgumentParser(description="run include-what-you-use on drgn") + parser.add_argument( + "source", nargs="*", help="run on given file instead of all source files" + ) + args = parser.parse_args() + + if args.source: + sources = {os.path.realpath(source) for source in args.source} + + subprocess.check_call( + [ + "bear", + "--cdb", + CDB, + "-a", + sys.executable, + "setup.py", + "build", + "-b", + BUILD_BASE, + "build_ext", + ] + ) + + python_mapping_file = os.path.join( + BUILD_BASE, + f"python.{sysconfig.get_platform()}.{sysconfig.get_python_version()}.imp", + ) + if not os.path.exists(python_mapping_file): + gen_python_mapping_file(python_mapping_file) + + with open(CDB, "r") as f: + commands = json.load(f) + + for command in commands: + if "elfutils" in os.path.relpath(command["directory"]): + continue + + if ( + args.source + and os.path.realpath(os.path.join(command["directory"], command["file"])) + not in sources + ): + continue + + with subprocess.Popen( + ["include-what-you-use"] + + command["arguments"][1:] + + [ + "-Xiwyu", + "--mapping_file=" + os.path.abspath(python_mapping_file), + "-w", # We don't want warnings from Clang. + ], + cwd=command["directory"], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as proc: + state = "none" + header = None + lines = [] + for line in proc.stdout: + line = line.rstrip("\n") + match = None + for new_state, regex in IWYU_REGEXES: + match = re.fullmatch(regex, line) + if match: + break + if match: + state = new_state + if state != "none": + path = os.path.relpath( + os.path.join(command["directory"], match.group(1)) + ) + if state in ("add", "remove"): + header = f"{path} should {state} these lines:" + else: + header = None + lines.clear() + elif state != "include_list" and line: + if header is not None: + print("\n" + header) + header = None + print(line) + print( + "Please ignore suggestions to declare opaque types if the appropriate header has already been included." + ) + + +if __name__ == "__main__": + main() From d829401e5f5f5b697c7923a41c1b04a0b92cedf5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 23 Sep 2020 16:42:15 -0700 Subject: [PATCH 56/56] vmtest: also disable onoatimehack on QEMU 5.0.1 The fix was backported to QEMU's 5.0 stable branch and released in 5.0.1. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vmtest/vm.py b/vmtest/vm.py index 6243abc42..05d5dd806 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -153,9 +153,10 @@ def run_in_vm(command: str, *, vmlinuz: str, build_dir: str) -> int: # multidevs was added in QEMU 4.2.0. multidevs = ",multidevs=remap" if qemu_version >= (4, 2) else "" - # QEMU's 9pfs O_NOATIME handling was fixed in 5.1.0. + # QEMU's 9pfs O_NOATIME handling was fixed in 5.1.0. The fix was backported + # to 5.0.1. env = os.environ.copy() - if qemu_version < (5, 1): + if qemu_version < (5, 0, 1): onoatimehack_so = _build_onoatimehack(build_dir) env["LD_PRELOAD"] = f"{onoatimehack_so}:{env.get('LD_PRELOAD', '')}"