diff --git a/.travis.yml b/.travis.yml index 31f9a4dfd..016e2c992 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ -dist: bionic +os: linux +dist: focal language: python python: @@ -6,15 +7,28 @@ python: - '3.7' - '3.6' install: + # If the host is running a kernel without Linux kernel commit b4d185175bc1 + # ("KVM: VMX: give unrestricted guest full control of CR3") (in v4.17), then + # stores to CR3 in the nested guest can spuriously fail and cause it to + # crash. We can work around this by disabling unrestricted guest support. + - | + if grep -q '^flags\b.*\bvmx\b' /proc/cpuinfo; then + echo "options kvm_intel unrestricted_guest=N" | sudo tee /etc/modprobe.d/kvm-cr3-workaround.conf > /dev/null + sudo modprobe -r kvm_intel + sudo modprobe kvm_intel + fi # Upstream defaults to world-read-writeable /dev/kvm. Debian/Ubuntu override # this; see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=892945. We want # the upstream default. - echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /lib/udev/rules.d/99-fix-kvm.rules > /dev/null - sudo udevadm control --reload-rules - # On systemd >= 238 we can use udevadm trigger -w and remove udevadm settle. - - sudo udevadm trigger /dev/kvm - - sudo udevadm settle -script: python setup.py test -K + - sudo udevadm trigger -w /dev/kvm + - pip install black isort mypy +script: + - black --check --diff . + - isort --check --diff . + - mypy --strict --no-warn-return-any drgn _drgn.pyi + - python setup.py test -K addons: apt: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 06c948848..ba9eb43ed 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -71,11 +71,15 @@ drgn assumes some `implementation-defined behavior Python ^^^^^^ -Python code in drgn is formatted with `black `_. -Code should be compatible with Python 3.6 and newer. +Python code in drgn should be compatible with Python 3.6 and newer. -Type hints should be provided for all public interfaces other than helpers -(including the C extension) and most private interfaces. +Python code should be formatted with `black `_ +and `isort `_:: + + $ isort . && black . + +Type hints should be provided for all interfaces (including helpers and the C +extension). Submitting PRs -------------- diff --git a/_drgn.pyi b/_drgn.pyi index 0dccb1251..467f67ca4 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -9,6 +9,7 @@ Don't use this module directly. Instead, use the drgn package. import enum import os +import sys from typing import ( Any, Callable, @@ -21,6 +22,32 @@ from typing import ( overload, ) +if sys.version_info < (3, 8): + from typing_extensions import Protocol +else: + from typing import Protocol + +# This is effectively typing.SupportsIndex without @typing.runtime_checkable +# (both of which are only available since Python 3.8), with a more +# self-explanatory name. +class IntegerLike(Protocol): + """ + An :class:`int` or integer-like object. + + Parameters annotated with this type expect an integer which may be given as + a Python :class:`int` or an :class:`Object` with integer type. + """ + + def __index__(self) -> int: ... + +Path = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] +""" +Filesystem path. + +Parameters annotated with this type accept a filesystem path as :class:`str`, +:class:`bytes`, or :class:`os.PathLike`. +""" + class Program: """ A ``Program`` represents a crashed or running program. It can be used to @@ -29,15 +56,18 @@ class Program: The main functionality of a ``Program`` is looking up objects (i.e., variables, constants, or functions). This is usually done with the :meth:`[] <.__getitem__>` operator. - - This class can be constructed directly, but it is usually more convenient - to use one of the :ref:`api-program-constructors`. - - :param platform: The platform of the program, or ``None`` if it should be - determined automatically when a core dump or symbol file is added. """ - def __init__(self, platform: Optional[Platform] = None) -> None: ... + def __init__(self, platform: Optional[Platform] = None) -> None: + """ + This class can be constructed directly, but it is usually more + convenient to use one of the :ref:`api-program-constructors`. + + :param platform: The platform of the program, or ``None`` if it should + be determined automatically when a core dump or symbol file is + added. + """ + ... flags: ProgramFlags """Flags which apply to this program.""" @@ -135,16 +165,14 @@ class Program: def object( self, name: str, - flags: Optional[FindObjectFlags] = None, + flags: FindObjectFlags = FindObjectFlags.ANY, filename: Optional[str] = None, ) -> Object: """ Get the object (variable, constant, or function) with the given name. :param name: The object name. - :param flags: Flags indicating what kind of object to look for. If this - is ``None`` or not given, it defaults to - :attr:`FindObjectFlags.ANY`. + :param flags: Flags indicating what kind of object to look for. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no objects with the given name are found in @@ -152,7 +180,7 @@ class Program: """ ... # address_or_name is positional-only. - def symbol(self, address_or_name: Union[int, str]) -> Symbol: + def symbol(self, address_or_name: Union[IntegerLike, str]) -> Symbol: """ Get the symbol containing the given address, or the global symbol with the given name. @@ -162,7 +190,12 @@ class Program: the given name """ ... - def stack_trace(self, thread: Union[Object, int]) -> StackTrace: + def stack_trace( + self, + # Object is already IntegerLike, but this explicitly documents that it + # can take non-integer Objects. + thread: Union[Object, IntegerLike], + ) -> StackTrace: """ Get the stack trace for the given thread in the program. @@ -201,22 +234,9 @@ class Program: the given file """ ... - def pointer_type( - self, - type: Union[str, Type], - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, - ) -> Type: - """ - Create a pointer type which points to the given type. - - :param type: The referenced type. - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - def read(self, address: int, size: int, physical: bool = False) -> bytes: + def read( + self, address: IntegerLike, size: IntegerLike, physical: bool = False + ) -> bytes: """ Read *size* bytes of memory starting at *address* in the program. The address may be virtual (the default) or physical if the program @@ -236,11 +256,19 @@ class Program: :raises ValueError: if *size* is negative """ ... - def read_u8(self, address: int, physical: bool = False) -> int: ... - def read_u16(self, address: int, physical: bool = False) -> int: ... - def read_u32(self, address: int, physical: bool = False) -> int: ... - def read_u64(self, address: int, physical: bool = False) -> int: ... - def read_word(self, address: int, physical: bool = False) -> int: + def read_u8(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u16(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u32(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_u64(self, address: IntegerLike, physical: bool = False) -> int: + "" + ... + def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ Read an unsigned integer from the program's memory in the program's byte order. @@ -262,8 +290,8 @@ class Program: ... def add_memory_segment( self, - address: int, - size: int, + address: IntegerLike, + size: IntegerLike, read_fn: Callable[[int, int, int, bool], bytes], physical: bool = False, ) -> None: @@ -316,7 +344,7 @@ class Program: return an :class:`Object`. """ ... - def set_core_dump(self, path: Union[str, bytes, os.PathLike]) -> None: + def set_core_dump(self, path: Path) -> None: """ Set the program to a core dump. @@ -349,7 +377,7 @@ class Program: ... def load_debug_info( self, - paths: Optional[Iterable[Union[str, bytes, os.PathLike]]] = None, + paths: Optional[Iterable[Path]] = None, default: bool = False, main: bool = False, ) -> None: @@ -388,7 +416,7 @@ class Program: This is equivalent to ``load_debug_info(None, True)``. """ ... - cache: dict + cache: Dict[Any, Any] """ Dictionary for caching program metadata. @@ -410,6 +438,282 @@ class Program: else: return prog['bar'] """ + def void_type( + self, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new void type. It has kind :attr:`TypeKind.VOID`. + + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def int_type( + self, + name: str, + size: IntegerLike, + is_signed: bool, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new integer type. It has kind :attr:`TypeKind.INT`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param is_signed: :attr:`Type.is_signed` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def bool_type( + self, + name: str, + size: IntegerLike, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def float_type( + self, + name: str, + size: IntegerLike, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def complex_type( + self, + name: str, + size: IntegerLike, + type: Type, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new complex type. It has kind :attr:`TypeKind.COMPLEX`. + + :param name: :attr:`Type.name` + :param size: :attr:`Type.size` + :param type: The corresponding real type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def struct_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. + + :param tag: :attr:`Type.tag` + :param size: :attr:`Type.size` + :param members: :attr:`Type.members` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def struct_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete structure type.""" + ... + @overload + def union_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, + this is the same as as :meth:`struct_type()`. + """ + ... + @overload + def union_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete union type.""" + ... + @overload + def class_type( + self, + tag: Optional[str], + size: IntegerLike, + members: Sequence[TypeMember], + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new class type. It has kind :attr:`TypeKind.CLASS`. Otherwise, + this is the same as as :meth:`struct_type()`. + """ + ... + @overload + def class_type( + self, + tag: Optional[str], + size: None = None, + members: None = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete class type.""" + ... + @overload + def enum_type( + self, + tag: Optional[str], + type: Type, + enumerators: Sequence[TypeEnumerator], + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. + + :param tag: :attr:`Type.tag` + :param type: The compatible integer type (:attr:`Type.type`) + :param enumerators: :attr:`Type.enumerators` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + @overload + def enum_type( + self, + tag: Optional[str], + type: None = None, + enumerators: None = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """Create a new incomplete enumerated type.""" + ... + def typedef_type( + self, + name: str, + type: Type, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. + + :param name: :attr:`Type.name` + :param type: The aliased type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def pointer_type( + self, + type: Type, + size: Optional[int] = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, + + You can usually use :meth:`Program:pointer_type()` instead. + + :param type: The referenced type (:attr:`Type.type`) + :param size: :attr:`Type.size`, or ``None`` to use the program's + default pointer size. + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def array_type( + self, + type: Type, + length: Optional[int] = None, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new array type. It has kind :attr:`TypeKind.ARRAY`. + + :param type: The element type (:attr:`Type.type`) + :param length: :attr:`Type.length` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... + def function_type( + self, + type: Type, + parameters: Sequence[TypeParameter], + is_variadic: bool = False, + *, + qualifiers: Qualifiers = Qualifiers.NONE, + language: Optional[Language] = None, + ) -> Type: + """ + Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. + + :param type: The return type (:attr:`Type.type`) + :param parameters: :attr:`Type.parameters` + :param is_variadic: :attr:`Type.is_variadic` + :param qualifiers: :attr:`Type.qualifiers` + :param lang: :attr:`Type.language` + """ + ... class ProgramFlags(enum.Flag): """ @@ -433,9 +737,13 @@ class FindObjectFlags(enum.Flag): """ CONSTANT = ... + "" FUNCTION = ... + "" VARIABLE = ... + "" ANY = ... + "" def filename_matches(haystack: Optional[str], needle: Optional[str]) -> bool: """ @@ -454,7 +762,7 @@ def filename_matches(haystack: Optional[str], needle: Optional[str]) -> bool: """ ... -def program_from_core_dump(path: Union[str, bytes, os.PathLike]) -> Program: +def program_from_core_dump(path: Path) -> Program: """ Create a :class:`Program` from a core dump file. The type of program (e.g., userspace or kernel) is determined automatically. @@ -484,15 +792,17 @@ class Platform: """ A ``Platform`` represents the environment (i.e., architecture and ABI) that a program runs on. - - :param arch: :attr:`Platform.arch` - :param flags: :attr:`Platform.flags`; if ``None``, default flags for the - architecture are used. """ def __init__( self, arch: Architecture, flags: Optional[PlatformFlags] = None - ) -> None: ... + ) -> None: + """ + :param arch: :attr:`Platform.arch` + :param flags: :attr:`Platform.flags`; if ``None``, default flags for + the architecture are used. + """ + ... arch: Architecture """Instruction set architecture of this platform.""" @@ -632,25 +942,6 @@ class Object: conflicting with structure, union, or class members. The attributes and methods always take precedence; use :meth:`member_()` if there is a conflict. - - Objects are usually obtained directly from a :class:`Program`, but they can - be constructed manually, as well (for example, if you got a variable - address from a log file). - - :param prog: The program to create this object in. - :param type: The type of the object. If omitted, this is deduced from - *value* according to the language's rules for literals. - :param value: The value of this object. See :meth:`value_()`. - :param address: The address of this object in the program. Either this or - *value* must be given, but not both. - :param byteorder: Byte order of the object. This should be ``'little'`` or - ``'big'``. The default is ``None``, which indicates the program byte - order. This must be ``None`` for primitive values. - :param bit_offset: Offset in bits from the object's address to the - beginning of the object. The default is ``None``, which means no - offset. This must be ``None`` for primitive values. - :param bit_field_size: Size in bits of this object if it is a bit field. - The default is ``None``, which means the object is not a bit field. """ def __init__( @@ -659,11 +950,33 @@ class Object: type: Union[str, Type, None] = None, value: Any = None, *, - address: Optional[int] = None, + address: Optional[IntegerLike] = None, byteorder: Optional[str] = None, - bit_offset: Optional[int] = None, - bit_field_size: Optional[int] = None, - ) -> None: ... + bit_offset: Optional[IntegerLike] = None, + bit_field_size: Optional[IntegerLike] = None, + ) -> None: + """ + Objects are usually obtained directly from a :class:`Program`, but they + can be constructed manually, as well (for example, if you got a + variable address from a log file). + + :param prog: The program to create this object in. + :param type: The type of the object. If omitted, this is deduced from + *value* according to the language's rules for literals. + :param value: The value of this object. See :meth:`value_()`. + :param address: The address of this object in the program. Either this + or *value* must be given, but not both. + :param byteorder: Byte order of the object. This should be ``'little'`` + or ``'big'``. The default is ``None``, which indicates the program + byte order. This must be ``None`` for primitive values. + :param bit_offset: Offset in bits from the object's address to the + beginning of the object. The default is ``None``, which means no + offset. This must be ``None`` for primitive values. + :param bit_field_size: Size in bits of this object if it is a bit + field. The default is ``None``, which means the object is not a bit + field. + """ + ... prog_: Program """Program that this object is from.""" @@ -703,7 +1016,7 @@ class Object: :param name: Attribute name. """ ... - def __getitem__(self, idx: Union[int, Object]) -> Object: + def __getitem__(self, idx: IntegerLike) -> Object: """ Implement ``self[idx]``. Get the array element at the given index. @@ -817,7 +1130,7 @@ class Object: def format_( self, *, - columns: Optional[int] = None, + columns: Optional[IntegerLike] = None, dereference: Optional[bool] = None, symbolize: Optional[bool] = None, string: Optional[bool] = None, @@ -1040,7 +1353,7 @@ class StackTrace: default. """ - def __getitem__(self, idx: int) -> StackFrame: ... + def __getitem__(self, idx: IntegerLike) -> StackFrame: ... class StackFrame: """ @@ -1066,7 +1379,7 @@ class StackFrame: instruction instead of the return address. """ ... - def register(self, reg: Union[str, int, Register]) -> int: + def register(self, reg: Union[str, IntegerLike, Register]) -> int: """ Get the value of the given register at this stack frame. The register can be specified by name (e.g., ``'rax'``), number (see @@ -1115,6 +1428,9 @@ class Type: memory-intensive. """ + prog: Program + """Program that this type is from.""" + kind: TypeKind """Kind of this type.""" @@ -1207,7 +1523,7 @@ class Type: is always ``True``. """ ... - def qualified(self, qualifiers: Optional[Qualifiers]) -> Type: + def qualified(self, qualifiers: Qualifiers) -> Type: """ Get a copy of this type with different qualifiers. @@ -1223,15 +1539,6 @@ class Type: class TypeMember: """ A ``TypeMember`` represents a member of a structure, union, or class type. - - :param type: Type of the member. This may be a :class:`Type` or a callable - that takes no arguments and returns a :class:`Type`. - :param name: Name of the member. This may be ``None`` if the member is - unnamed. - :param bit_offset: Offset of the member from the beginning of the type - in bits. - :param bit_field_size: Size in bits of this member if it is a bit field, - zero otherwise. """ def __init__( @@ -1240,12 +1547,23 @@ class TypeMember: name: Optional[str] = None, bit_offset: int = 0, bit_field_size: int = 0, - ) -> None: ... + ) -> None: + """ + :param type: :attr:`TypeMember.type`; may also be a callable that + takes no arguments and returns a :class:`Type`. + :param name: :attr:`TypeMember.name` + :param bit_offset: :attr:`TypeMember.bit_offset` + :param bit_field_size: :attr:`TypeMember.bit_field_size` + """ + ... type: Type + """Member type.""" name: Optional[str] + """Member name, or ``None`` if the member is unnamed.""" bit_offset: int + """Offset of the member from the beginning of the type in bits.""" offset: int """ @@ -1254,6 +1572,7 @@ class TypeMember: """ bit_field_size: int + """Size in bits of this member if it is a bit field, zero otherwise.""" class TypeEnumerator: """ @@ -1266,15 +1585,19 @@ class TypeEnumerator: >>> name, value = prog.type('enum pid_type').enumerators[0] >>> value 0 - - :param name: Enumerator name. - :param value: Enumerator value. """ - def __init__(self, name: str, value: int) -> None: ... + def __init__(self, name: str, value: int) -> None: + """ + :param name: :attr:`TypeEnumerator.name` + :param value: :attr:`TypeEnumerator.value` + """ + ... name: str + "Enumerator name." value: int + "Enumerator value." def __len__(self) -> int: ... def __getitem__(self, idx: int) -> Any: ... def __iter__(self) -> Iterator[Any]: ... @@ -1282,19 +1605,22 @@ class TypeEnumerator: class TypeParameter: """ A ``TypeParameter`` represents a parameter of a function type. - - :param type: Type of the parameter. This may be a :class:`Type` or a callable - that takes no arguments and returns a :class:`Type`. - :param name: Name of the parameter. This may be ``None`` if the parameter is - unnamed. """ def __init__( self, type: Union[Type, Callable[[], Type]], name: Optional[str] = None - ) -> None: ... + ) -> None: + """ + :param type: :attr:`TypeParameter.type`; may also be a callable that + takes no arguments and returns a :class:`Type`. + :param name: :attr:`TypeParameter.name` + """ + ... type: Type + """Parameter type.""" name: Optional[str] + """Parameter name, or ``None`` if the parameter is unnamed.""" class TypeKind(enum.Enum): """A ``TypeKind`` represents a kind of type.""" @@ -1342,27 +1668,48 @@ class PrimitiveType(enum.Enum): """A ``PrimitiveType`` represents a primitive type known to drgn.""" C_VOID = ... + "" C_CHAR = ... + "" C_SIGNED_CHAR = ... + "" C_UNSIGNED_CHAR = ... + "" C_SHORT = ... + "" C_UNSIGNED_SHORT = ... + "" C_INT = ... + "" C_UNSIGNED_INT = ... + "" C_LONG = ... + "" C_UNSIGNED_LONG = ... + "" C_LONG_LONG = ... + "" C_UNSIGNED_LONG_LONG = ... + "" C_BOOL = ... + "" C_FLOAT = ... + "" C_DOUBLE = ... + "" C_LONG_DOUBLE = ... + "" C_SIZE_T = ... + "" C_PTRDIFF_T = ... + "" class Qualifiers(enum.Flag): """``Qualifiers`` are modifiers on types.""" + NONE = ... + """No qualifiers.""" + CONST = ... """Constant type.""" @@ -1375,273 +1722,114 @@ class Qualifiers(enum.Flag): ATOMIC = ... """Atomic type.""" -def void_type( - qualifiers: Optional[Qualifiers] = None, *, language: Optional[Language] = None -) -> Type: - """ - Create a new void type. It has kind :attr:`TypeKind.VOID`. - - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def int_type( - name: str, - size: int, - is_signed: bool, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +# type_or_obj is positional-only. +def sizeof(type_or_obj: Union[Type, Object]) -> int: """ - Create a new integer type. It has kind :attr:`TypeKind.INT`. + Get the size of a :class:`Type` or :class:`Object` in bytes. - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param is_signed: :attr:`Type.is_signed` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` + :param type_or_obj: Entity to get the size of. + :raises TypeError: if the type does not have a size (e.g., because it is + incomplete or void) """ ... -def bool_type( - name: str, - size: int, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. - - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` +class FaultError(Exception): """ - ... - -def float_type( - name: str, - size: int, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: + This error is raised when a bad memory access is attempted (i.e., when + accessing a memory address which is not valid in a program). """ - Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... - -def complex_type( - name: str, - size: int, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new complex type. It has kind :attr:`TypeKind.COMPLEX`. + def __init__(self, address: int) -> None: + """ + :param address: :attr:`FaultError.address` + """ + ... + address: int + """Address that couldn't be accessed.""" - :param name: :attr:`Type.name` - :param size: :attr:`Type.size` - :param type: The corresponding real type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` +class MissingDebugInfoError(Exception): """ - ... - -def struct_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: + This error is raised when one or more files in a program do not have debug + information. """ - Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. - :param tag: :attr:`Type.tag` - :param size: :attr:`Type.size`; ``None`` if this is an incomplete type. - :param members: :attr:`Type.members` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ ... -def union_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +class OutOfBoundsError(Exception): """ - Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, - this is the same as :func:`struct_type()`. + This error is raised when attempting to access beyond the bounds of a value + object. """ - ... -def class_type( - tag: Optional[str], - size: Optional[int] = None, - members: Optional[Sequence[TypeMember]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new class type. It has kind :attr:`TypeKind.CLASS`. Otherwise, - this is the same as :func:`struct_type()`. - """ ... -def enum_type( - tag: Optional[str], - type: Optional[Type] = None, - enumerators: Optional[Sequence[TypeEnumerator]] = None, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: - """ - Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. - - :param tag: :attr:`Type.tag` - :param type: The compatible integer type (:attr:`Type.type`) - :param enumerators: :attr:`Type.enumerators` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` - """ - ... +_with_libkdumpfile: bool -def typedef_type( - name: str, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +def _linux_helper_read_vm( + prog: Program, pgtable: Object, address: IntegerLike, size: IntegerLike +) -> bytes: ... +def _linux_helper_radix_tree_lookup(root: Object, index: IntegerLike) -> Object: """ - Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. + Look up the entry at a given index in a radix tree. - :param name: :attr:`Type.name` - :param type: The aliased type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` + :param root: ``struct radix_tree_root *`` + :param index: Entry index. + :return: ``void *`` found entry, or ``NULL`` if not found. """ ... -def pointer_type( - size: int, - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: """ - Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, + Look up the entry with the given ID in an IDR. - You can usually use :meth:`Program:pointer_type()` instead. - - :param size: :attr:`Type.size` - :param type: The referenced type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` + :param idr: ``struct idr *`` + :param id: Entry ID. + :return: ``void *`` found entry, or ``NULL`` if not found. """ ... -def array_type( - length: Optional[int], - type: Type, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +def _linux_helper_find_pid( + prog_or_ns: Union[Program, Object], pid: IntegerLike +) -> Object: """ - Create a new array type. It has kind :attr:`TypeKind.ARRAY`. + Return the ``struct pid *`` for the given PID number. - :param length: :attr:`Type.length` - :param type: The element type (:attr:`Type.type`) - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` + :param prog_or_ns: ``struct pid_namespace *`` object, or :class:`Program` + to use initial PID namespace. + :return: ``struct pid *`` """ ... -def function_type( - type: Type, - parameters: Sequence[TypeParameter], - is_variadic: bool = False, - qualifiers: Optional[Qualifiers] = None, - *, - language: Optional[Language] = None, -) -> Type: +def _linux_helper_pid_task(pid: Object, pid_type: IntegerLike) -> Object: """ - Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. + Return the ``struct task_struct *`` containing the given ``struct pid *`` + of the given type. - :param type: The return type (:attr:`Type.type`) - :param parameters: :attr:`Type.parameters` - :param is_variadic: :attr:`Type.is_variadic` - :param qualifiers: :attr:`Type.qualifiers` - :param lang: :attr:`Type.language` + :param pid: ``struct pid *`` + :param pid_type: ``enum pid_type`` + :return: ``struct task_struct *`` """ ... -# type_or_obj is positional-only. -def sizeof(type_or_obj: Union[Type, Object]) -> int: +def _linux_helper_find_task( + prog_or_ns: Union[Program, Object], pid: IntegerLike +) -> Object: """ - Get the size of a :class:`Type` or :class:`Object` in bytes. + Return the task with the given PID. - :param type_or_obj: Entity to get the size of. - :raises TypeError: if the type does not have a size (e.g., because it is - incomplete or void) + :param prog_or_ns: ``struct pid_namespace *`` object, or :class:`Program` + to use initial PID namespace. + :return: ``struct task_struct *`` """ ... -class FaultError(Exception): - """ - This error is raised when a bad memory access is attempted (i.e., when - accessing a memory address which is not valid in a program). - - :param address: Address that couldn't be accessed. +def _linux_helper_kaslr_offset(prog: Program) -> int: """ - - def __init__(self, address: int) -> None: ... - address: int - -class MissingDebugInfoError(Exception): + Get the kernel address space layout randomization offset (zero if it is + disabled). """ - This error is raised when one or more files in a program do not have debug - information. - """ - ... -class OutOfBoundsError(Exception): - """ - This error is raised when attempting to access beyond the bounds of a value - object. - """ - +def _linux_helper_pgtable_l5_enabled(prog: Program) -> bool: + """Return whether 5-level paging is enabled.""" ... - -_with_libkdumpfile: bool - -def _linux_helper_read_vm(prog, pgtable, address, size): ... -def _linux_helper_radix_tree_lookup(root, index): ... -def _linux_helper_idr_find(idr, id): ... -def _linux_helper_find_pid(ns, pid): ... -def _linux_helper_pid_task(pid, pid_type): ... -def _linux_helper_find_task(ns, pid): ... -def _linux_helper_task_state_to_char(task): ... -def _linux_helper_kaslr_offset(prog): ... -def _linux_helper_pgtable_l5_enabled(prog): ... diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 0b57d0d07..020c47369 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,7 +7,7 @@ Programs -------- .. drgndoc:: Program - :include: __getitem__ + :exclude: (void|int|bool|float|complex|struct|union|class|enum|typedef|pointer|array|function)_type .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags @@ -57,7 +57,6 @@ Objects ------- .. drgndoc:: Object - :include: __getattribute__|__getitem__|__len__ .. drgndoc:: NULL .. drgndoc:: cast .. drgndoc:: reinterpret @@ -97,25 +96,27 @@ Type Constructors Custom drgn types can be created with the following factory functions. These can be used just like types obtained from :meth:`Program.type()`. -.. drgndoc:: void_type -.. drgndoc:: int_type -.. drgndoc:: bool_type -.. drgndoc:: float_type -.. drgndoc:: complex_type -.. drgndoc:: struct_type -.. drgndoc:: union_type -.. drgndoc:: class_type -.. drgndoc:: enum_type -.. drgndoc:: typedef_type -.. drgndoc:: pointer_type -.. drgndoc:: array_type -.. drgndoc:: function_type +.. drgndoc:: Program.void_type +.. drgndoc:: Program.int_type +.. drgndoc:: Program.bool_type +.. drgndoc:: Program.float_type +.. drgndoc:: Program.complex_type +.. drgndoc:: Program.struct_type +.. drgndoc:: Program.union_type +.. drgndoc:: Program.class_type +.. drgndoc:: Program.enum_type +.. drgndoc:: Program.typedef_type +.. drgndoc:: Program.pointer_type +.. drgndoc:: Program.array_type +.. drgndoc:: Program.function_type Miscellaneous ------------- .. drgndoc:: sizeof .. drgndoc:: execscript +.. drgndoc:: IntegerLike +.. drgndoc:: Path Exceptions ---------- diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index a5a6d7270..be92ddc24 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -5,14 +5,13 @@ import argparse import functools import sys -from typing import cast +from typing import Union, cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode from drgndoc.parse import Class, DocumentedNode, Node, parse_paths from drgndoc.util import dot_join - escapes = [] for c in range(256): if c == 0: @@ -42,7 +41,7 @@ escapes.append(e) -def escape_string(s): +def escape_string(s: str) -> str: return "".join([escapes[c] for c in s.encode("utf-8")]) @@ -91,21 +90,18 @@ def escape_string(s): def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node - if hasattr(node, "docstring"): + if node.has_docstring(): var_name = name.replace(".", "_") + "_DOC" if args.header: output_file.write("extern ") output_file.write(f"const char {var_name}[]") if not args.header: output_file.write(" =") - signature, lines = formatter.format( - cast(ResolvedNode[DocumentedNode], resolved), rst=False + lines = formatter.format( + cast(ResolvedNode[DocumentedNode], resolved), + name.rpartition(".")[2], + rst=False, ) - if signature: - lines[0:0] = [ - name.rpartition(".")[2] + signature, - "", - ] if lines: for i, line in enumerate(lines): output_file.write(f'\n\t"{escape_string(line)}') diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 5d6779f75..b347f4f4f 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -40,35 +40,33 @@ file for the C extension itself (drgndoc.docstrings). """ +import os.path +import re +from typing import Any, Dict, List, cast + import docutils.nodes import docutils.parsers.rst.directives import docutils.statemachine -import os.path -import re import sphinx.addnodes import sphinx.application import sphinx.environment import sphinx.util.docutils import sphinx.util.logging import sphinx.util.nodes -from typing import List, cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode from drgndoc.parse import ( Class, DocumentedNode, - Function, Import, ImportFrom, Module, Node, - Variable, parse_paths, ) from drgndoc.util import dot_join - logger = sphinx.util.logging.getLogger(__name__) @@ -101,11 +99,10 @@ class DrgnDocDirective(sphinx.util.docutils.SphinxDirective): required_arguments = 1 optional_arguments = 0 option_spec = { - "include": docutils.parsers.rst.directives.unchanged, "exclude": docutils.parsers.rst.directives.unchanged, } - def run(self) -> List[docutils.nodes.Node]: + def run(self) -> Any: parts = [] py_module = self.env.ref_context.get("py:module") if py_module: @@ -119,98 +116,66 @@ def run(self) -> List[docutils.nodes.Node]: if not isinstance(resolved, ResolvedNode): logger.warning("name %r not found", resolved) return [] + if not resolved.node.has_docstring(): + logger.warning("name %r is not documented", resolved.qualified_name()) + return [] docnode = docutils.nodes.section() - self._run(name, "", resolved, docnode) + self._run(name, "", self.arguments[0], resolved, docnode) return docnode.children - def _include_attr(self, attr: ResolvedNode[Node], attr_name: str) -> bool: - """ - Return whether the given recursive attribute should be documented. - - We recursively include nodes that are: - 1. Not imports. - 2. Match the "include" pattern OR don't start with an underscore. - AND - 3. Do not match the "exclude" pattern. - - The "include" and "exclude" patterns are applied to the name relative - to the object being documented by the directive. - """ - if isinstance(attr.node, (Import, ImportFrom)): - return False - - if not attr_name: - return True - - dot = attr_name.rfind(".") - if dot + 1 < len(attr_name) and attr_name[dot + 1] == "_": - include_pattern = self.options.get("include") - if include_pattern is None or not re.fullmatch(include_pattern, attr_name): - return False - exclude_pattern = self.options.get("exclude") - return exclude_pattern is None or not re.fullmatch(exclude_pattern, attr_name) - def _run( self, top_name: str, attr_name: str, + name: str, resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: - if not self._include_attr(resolved, attr_name): + exclude_pattern = self.options.get("exclude") + if exclude_pattern is not None and re.fullmatch(exclude_pattern, attr_name): return + + if isinstance(resolved.node, (Import, ImportFrom)): + # Only include imports that are explicitly aliased (i.e., import + # ... as ... or from ... import ... as ...). + # TODO: we should also include imports listed in __all__. + if not resolved.node.aliased: + return + imported = self.env.drgndoc_namespace.resolve_name_in_scope( + resolved.modules, resolved.classes, resolved.name + ) + if not isinstance(imported, ResolvedNode): + return + resolved = imported + resolved = cast(ResolvedNode[DocumentedNode], resolved) - node = resolved.node - if isinstance(node, Module): - directive = "py:module" + if isinstance(resolved.node, Module): return self._run_module( top_name, attr_name, cast(ResolvedNode[Module], resolved), docnode ) - sourcename = "" - if resolved.module and resolved.module.node.path: - sourcename = resolved.module.node.path - if sourcename: - self.env.note_dependency(sourcename) - - if isinstance(node, Class): - directive = "py:class" - elif isinstance(node, Function): - directive = "py:method" if resolved.class_ else "py:function" - elif isinstance(node, Variable): - directive = "py:attribute" if resolved.class_ else "py:data" - else: - assert False, type(node).__name__ - - argument = (attr_name or top_name).rpartition(".")[2] - extra_argument, lines = self.env.drgndoc_formatter.format( + lines = self.env.drgndoc_formatter.format( resolved, + name, self.env.ref_context.get("py:module", ""), ".".join(self.env.ref_context.get("py:classes", ())), ) + if not lines: + # Not documented. Ignore it. + return - contents = docutils.statemachine.StringList() - contents.append( - f".. {directive}:: {argument}{extra_argument}", sourcename, - ) - if isinstance(node, Function): - if node.async_: - contents.append(" :async:", sourcename) - if resolved.class_: - if node.have_decorator("classmethod"): - contents.append(" :classmethod:", sourcename) - if node.have_decorator("staticmethod"): - contents.append(" :staticmethod:", sourcename) + sourcename = "" + if resolved.modules and resolved.modules[-1].node.path: + sourcename = resolved.modules[-1].node.path + if sourcename: + self.env.note_dependency(sourcename) + contents = docutils.statemachine.StringList(lines, sourcename) contents.append("", sourcename) - if lines: - for line in lines: - contents.append(" " + line, sourcename) - contents.append("", sourcename) self.state.nested_parse(contents, 0, docnode) - if isinstance(node, Class): + if isinstance(resolved.node, Class): for desc in reversed(docnode.children): if isinstance(desc, sphinx.addnodes.desc): break @@ -228,9 +193,14 @@ def _run( py_classes.append(resolved.name) self.env.ref_context["py:class"] = resolved.name for member in resolved.attrs(): - self._run( - top_name, dot_join(attr_name, member.name), member, desc_content - ) + if member.name != "__init__": + self._run( + top_name, + dot_join(attr_name, member.name), + member.name, + member, + desc_content, + ) py_classes.pop() self.env.ref_context["py:class"] = py_classes[-1] if py_classes else None @@ -242,14 +212,16 @@ def _run_module( docnode: docutils.nodes.Node, ) -> None: node = resolved.node + if node.docstring is None: + # Not documented. Ignore it. + return + sourcename = node.path or "" if sourcename: self.env.note_dependency(sourcename) - - contents = docutils.statemachine.StringList() - if node.docstring: - for line in node.docstring.splitlines(): - contents.append(line, sourcename) + contents = docutils.statemachine.StringList( + node.docstring.splitlines(), sourcename + ) sphinx.util.nodes.nested_parse_with_titles(self.state, contents, docnode) @@ -268,14 +240,16 @@ def _run_module( have_old_py_module = False self.env.ref_context["py:module"] = dot_join(top_name, attr_name) for attr in resolved.attrs(): - self._run(top_name, dot_join(attr_name, attr.name), attr, section) + self._run( + top_name, dot_join(attr_name, attr.name), attr.name, attr, section + ) if have_old_py_module: self.env.ref_context["py:module"] = old_py_module else: del self.env.ref_context["py:module"] -def setup(app: sphinx.application.Sphinx) -> dict: +def setup(app: sphinx.application.Sphinx) -> Dict[str, Any]: app.connect("builder-inited", drgndoc_init) # List of modules or packages. app.add_config_value("drgndoc_paths", [], "env") diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index efa162887..fec1be9cb 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -6,24 +6,35 @@ from typing import Any, List, Optional, Pattern, Sequence, Tuple, cast from drgndoc.namespace import BoundNode, Namespace, ResolvedNode -from drgndoc.parse import Class, DocumentedNode, Function, Module, Variable +from drgndoc.parse import ( + Class, + DocumentedNode, + Function, + FunctionSignature, + Module, + Variable, +) from drgndoc.visitor import NodeVisitor +def _is_name_constant(node: ast.Constant) -> bool: + return node.value is None or node.value is True or node.value is False + + class _FormatVisitor(NodeVisitor): def __init__( self, namespace: Namespace, substitutions: Sequence[Tuple[Pattern[str], Any]], - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], ) -> None: self._namespace = namespace self._substitutions = substitutions - self._module = module - self._class = class_ + self._modules = modules + self._classes = classes self._context_module = context_module self._context_class = context_class self._parts: List[str] = [] @@ -53,11 +64,16 @@ def visit_Constant( if node.value is ...: self._parts.append("...") else: + obj = self._rst and _is_name_constant(node) quote = self._rst and not isinstance(node.value, (int, float)) - if quote: + if obj: + self._parts.append(":py:obj:`") + elif quote: self._parts.append("``") self._parts.append(repr(node.value)) - if quote: + if obj: + self._parts.append("`") + elif quote: self._parts.append("``") def _append_resolved_name(self, name: str) -> None: @@ -65,7 +81,7 @@ def _append_resolved_name(self, name: str) -> None: self._parts.append(":py:obj:`") resolved = self._namespace.resolve_name_in_scope( - self._module, self._class, name + self._modules, self._classes, name ) if isinstance(resolved, ResolvedNode): target = resolved.qualified_name() @@ -113,6 +129,10 @@ def visit_Attribute( name_stack.append(value.id) name_stack.reverse() self._append_resolved_name(".".join(name_stack)) + elif isinstance(value, ast.Constant) and _is_name_constant(value): + name_stack.append(repr(value.value)) + name_stack.reverse() + self._append_resolved_name(".".join(name_stack)) elif isinstance(value, ast.Constant) and not isinstance( value.value, (type(...), int, float) ): @@ -170,9 +190,7 @@ def visit_Tuple( for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") - self._visit( - elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None, - ) + self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if len(node.elts) == 1: self._parts.append(",") if parens: @@ -188,9 +206,7 @@ def visit_List( for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") - self._visit( - elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None, - ) + self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if self._rst: self._parts.append("\\") self._parts.append("]") @@ -205,73 +221,33 @@ def __init__( self._namespace = namespace self._substitutions = substitutions - def _add_class_info( - self, - resolved: ResolvedNode[Class], - context_module: Optional[str], - context_class: Optional[str], - rst: bool, - lines: List[str], - ) -> str: - node = resolved.node - if node.bases: - visitor = _FormatVisitor( - self._namespace, - self._substitutions, - resolved.module, - resolved.class_, - context_module, - context_class, - ) - bases = [visitor.visit(base, rst) for base in node.bases] - lines[0:0] = ["Bases: " + ", ".join(bases), ""] - - extra_argument = "" - try: - init = resolved.attr("__init__") - except KeyError: - pass - else: - if isinstance(init.node, Function): - init_context_class = resolved.name - if context_class: - init_context_class = context_class + "." + init_context_class - extra_argument = self._add_function_info( - cast(ResolvedNode[Function], init), - context_module, - init_context_class, - rst, - False, - lines, - ) - return extra_argument - - def _add_function_info( + def _format_function_signature( self, - resolved: ResolvedNode[Function], + node: FunctionSignature, + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], rst: bool, want_rtype: bool, - lines: List[str], - ) -> str: + ) -> Tuple[str, List[str]]: visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.module, - resolved.class_, + modules, + classes, context_module, context_class, ) - node = resolved.node + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() if rst: - if node.docstring is None: - want_rtype = False - + lines = [] params_need_type = set() params_have_type = set() - for line in lines: + for line in docstring_lines: + lines.append(" " + line) match = re.match(r":(param|type)\s+([a-zA-Z0-9_]+):", line) if match: if match.group(1) == "param": @@ -281,15 +257,17 @@ def _add_function_info( elif line.startswith(":rtype:"): want_rtype = False params_need_type -= params_have_type - lines.append("") + else: + lines = docstring_lines signature = ["("] need_comma = False + need_blank_line = bool(lines) def visit_arg( arg: ast.arg, default: Optional[ast.expr] = None, prefix: str = "" ) -> None: - nonlocal need_comma + nonlocal need_comma, need_blank_line if need_comma: signature.append(", ") if prefix: @@ -308,7 +286,10 @@ def visit_arg( need_comma = True if rst and arg.annotation and arg.arg in params_need_type: - lines.append(f":type {arg.arg}: {visitor.visit(arg.annotation)}") + if need_blank_line: + lines.append("") + need_blank_line = False + lines.append(f" :type {arg.arg}: {visitor.visit(arg.annotation)}") posonlyargs = getattr(node.args, "posonlyargs", []) num_posargs = len(posonlyargs) + len(node.args.args) @@ -320,7 +301,7 @@ def visit_arg( ] else: default = None - if i == 0 and resolved.class_ and not node.have_decorator("staticmethod"): + if i == 0 and classes and not node.has_decorator("staticmethod"): # Skip self for methods and cls for class methods. continue visit_arg(arg, default) @@ -346,81 +327,233 @@ def visit_arg( if want_rtype and node.returns: if rst: - lines.append(":rtype: " + visitor.visit(node.returns)) + if need_blank_line: + lines.append("") + need_blank_line = False + lines.append(" :rtype: " + visitor.visit(node.returns)) else: signature.append(" -> ") signature.append(visitor.visit(node.returns, False)) - return "".join(signature) + return "".join(signature), lines + + def _format_class( + self, + resolved: ResolvedNode[Class], + name: str, + context_module: Optional[str] = None, + context_class: Optional[str] = None, + rst: bool = True, + ) -> List[str]: + node = resolved.node + + init_signatures = None + try: + init = resolved.attr("__init__") + except KeyError: + pass + else: + if isinstance(init.node, Function): + init_signatures = [ + signature + for signature in init.node.signatures + if signature.docstring is not None + ] + init_context_class = resolved.name + if context_class: + init_context_class = context_class + "." + init_context_class + + lines = [] + for i, signature_node in enumerate(init_signatures or (None,)): + if i > 0: + lines.append("") + + signature_lines: Optional[List[str]] + if signature_node: + signature, signature_lines = self._format_function_signature( + signature_node, + init.modules, + init.classes, + context_module, + init_context_class, + rst, + False, + ) + else: + signature = "" + signature_lines = None + + if rst: + lines.append(f".. py:class:: {name}{signature}") + if i > 0: + lines.append(" :noindex:") + elif signature: + lines.append(f"{name}{signature}") + + if i == 0: + if node.bases: + visitor = _FormatVisitor( + self._namespace, + self._substitutions, + resolved.modules, + resolved.classes, + context_module, + context_class, + ) + bases = [visitor.visit(base, rst) for base in node.bases] + if lines: + lines.append("") + lines.append((" " if rst else "") + "Bases: " + ", ".join(bases)) + + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() + if docstring_lines: + if lines: + lines.append("") + if rst: + for line in docstring_lines: + lines.append(" " + line) + else: + lines.extend(docstring_lines) + + if signature_lines: + lines.append("") + lines.extend(signature_lines) + return lines + + def _format_function( + self, + resolved: ResolvedNode[Function], + name: str, + context_module: Optional[str] = None, + context_class: Optional[str] = None, + rst: bool = True, + ) -> List[str]: + node = resolved.node + + lines = [] + for i, signature_node in enumerate( + signature + for signature in node.signatures + if signature.docstring is not None + ): + if i > 0: + lines.append("") + signature, signature_lines = self._format_function_signature( + signature_node, + resolved.modules, + resolved.classes, + context_module, + context_class, + rst, + True, + ) - def _add_variable_info( + if rst: + directive = "py:method" if resolved.classes else "py:function" + lines.append(f".. {directive}:: {name}{signature}") + if i > 0: + lines.append(" :noindex:") + if node.async_: + lines.append(" :async:") + if signature_node.has_decorator("classmethod") or name in ( + "__init_subclass__", + "__class_getitem__", + ): + lines.append(" :classmethod:") + if signature_node.has_decorator("staticmethod"): + lines.append(" :staticmethod:") + else: + lines.append(f"{name}{signature}") + if signature_lines: + lines.append("") + lines.extend(signature_lines) + return lines + + def _format_variable( self, resolved: ResolvedNode[Variable], + name: str, context_module: Optional[str], context_class: Optional[str], rst: bool, - lines: List[str], - ) -> None: - annotation = resolved.node.annotation - if not annotation: - return - for line in lines: - if line.startswith(":vartype:"): - return + ) -> List[str]: + node = resolved.node + assert node.docstring is not None + docstring_lines = node.docstring.splitlines() + + have_vartype = any(line.startswith(":vartype:") for line in docstring_lines) visitor = _FormatVisitor( self._namespace, self._substitutions, - resolved.module, - resolved.class_, + resolved.modules, + resolved.classes, context_module, context_class, ) if rst: - lines.append("") - lines.append(":vartype: " + visitor.visit(annotation)) + directive = "py:attribute" if resolved.classes else "py:data" + lines = [f".. {directive}:: {name}"] + if docstring_lines: + lines.append("") + for line in docstring_lines: + lines.append(" " + line) + if node.annotation and not have_vartype: + lines.append("") + lines.append(" :vartype: " + visitor.visit(node.annotation)) + return lines else: - lines[0:0] = [visitor.visit(annotation, False), ""] + if node.annotation and not have_vartype: + if docstring_lines: + docstring_lines.insert(0, "") + docstring_lines.insert(0, visitor.visit(node.annotation, False)) + return docstring_lines def format( self, resolved: ResolvedNode[DocumentedNode], + name: Optional[str] = None, context_module: Optional[str] = None, context_class: Optional[str] = None, rst: bool = True, - ) -> Tuple[str, List[str]]: - if context_module is None and resolved.module: - context_module = resolved.module.name - if context_class is None and resolved.class_: - context_class = resolved.class_.name - + ) -> List[str]: node = resolved.node - lines = node.docstring.splitlines() if node.docstring else [] + if not node.has_docstring(): + return [] + + if name is None: + name = resolved.name + if context_module is None and resolved.modules: + context_module = ".".join([module.name for module in resolved.modules]) + if context_class is None and resolved.classes: + context_module = ".".join([class_.name for class_ in resolved.classes]) - signature = "" if isinstance(node, Class): - signature = self._add_class_info( + return self._format_class( cast(ResolvedNode[Class], resolved), + name, context_module, context_class, rst, - lines, ) elif isinstance(node, Function): - signature = self._add_function_info( + return self._format_function( cast(ResolvedNode[Function], resolved), + name, context_module, context_class, rst, - True, - lines, ) elif isinstance(node, Variable): - self._add_variable_info( + return self._format_variable( cast(ResolvedNode[Variable], resolved), + name, context_module, context_class, rst, - lines, ) - return signature, lines + else: + assert isinstance(node, Module) + assert node.docstring is not None + return node.docstring.splitlines() diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index cf2602853..5bf92365d 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -2,15 +2,7 @@ # SPDX-License-Identifier: GPL-3.0+ import itertools -from typing import ( - Generic, - Iterator, - List, - Mapping, - Optional, - TypeVar, - Union, -) +from typing import Generic, Iterator, List, Mapping, Optional, Sequence, TypeVar, Union from drgndoc.parse import ( Class, @@ -24,7 +16,6 @@ ) from drgndoc.util import dot_join - NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) @@ -37,49 +28,46 @@ def __init__(self, name: str, node: NodeT_co) -> None: class ResolvedNode(Generic[NodeT_co]): def __init__( self, - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], name: str, node: NodeT_co, ) -> None: - self.module = module - self.class_ = class_ + self.modules = modules + self.classes = classes self.name = name self.node = node def qualified_name(self) -> str: - return dot_join( - self.module.name if self.module else None, - self.class_.name if self.class_ else None, - self.name, + return ".".join( + itertools.chain( + (module.name for module in self.modules), + (class_.name for class_ in self.classes), + (self.name,), + ) ) def attrs(self) -> Iterator["ResolvedNode[Node]"]: if isinstance(self.node, Module): - module_name = dot_join(self.module.name if self.module else None, self.name) + modules = list(self.modules) + modules.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): - yield ResolvedNode(BoundNode(module_name, self.node), None, attr, node) + yield ResolvedNode(modules, self.classes, attr, node) elif isinstance(self.node, Class): - class_name = dot_join(self.class_.name if self.class_ else None, self.name) + classes = list(self.classes) + classes.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): - yield ResolvedNode( - self.module, BoundNode(class_name, self.node), attr, node - ) + yield ResolvedNode(self.modules, classes, attr, node) def attr(self, attr: str) -> "ResolvedNode[Node]": if isinstance(self.node, Module): - module_name = dot_join(self.module.name if self.module else None, self.name) - return ResolvedNode( - BoundNode(module_name, self.node), None, attr, self.node.attrs[attr] - ) + modules = list(self.modules) + modules.append(BoundNode(self.name, self.node)) + return ResolvedNode(modules, self.classes, attr, self.node.attrs[attr]) elif isinstance(self.node, Class): - class_name = dot_join(self.class_.name if self.class_ else None, self.name) - return ResolvedNode( - self.module, - BoundNode(class_name, self.node), - attr, - self.node.attrs[attr], - ) + classes = list(self.classes) + classes.append(BoundNode(self.name, self.node)) + return ResolvedNode(self.modules, classes, attr, self.node.attrs[attr]) else: raise KeyError(attr) @@ -91,30 +79,20 @@ class Namespace: def __init__(self, modules: Mapping[str, Module]) -> None: self.modules = modules + # NB: this modifies the passed lists. def _resolve_name( self, - module_name: Optional[str], - module: Optional[Module], - class_name: Optional[str], - class_: Optional[Class], + modules: List[BoundNode[Module]], + classes: List[BoundNode[Class]], name_components: List[str], ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: - assert (module_name is None) == (module is None) - assert (class_name is None) == (class_ is None) - module_name_parts = [] - if module_name is not None: - module_name_parts.append(module_name) - class_name_parts = [] - if class_name is not None: - class_name_parts.append(class_name) - name_components.reverse() while name_components: attrs: Mapping[str, Node] - if class_: - attrs = class_.attrs - elif module: - attrs = module.attrs + if classes: + attrs = classes[-1].node.attrs + elif modules: + attrs = modules[-1].node.attrs else: attrs = self.modules name = name_components.pop() @@ -124,65 +102,56 @@ def _resolve_name( break if isinstance(node, (Import, ImportFrom)): - module_name_parts.clear() - class_name_parts.clear() - module = None - class_ = None + classes.clear() if isinstance(node, Import): - import_name = node.module + modules.clear() elif isinstance(node, ImportFrom): - if node.module is None or node.level != 0: - raise NotImplementedError("TODO: relative imports") - import_name = node.module + if node.level >= len(modules): + # Relative import beyond top-level package. Bail. + break + # Absolute import is level 0, which clears the whole list. + del modules[-node.level :] name_components.append(node.name) - name_components.extend(reversed(import_name.split("."))) + if node.module is not None: + name_components.extend(reversed(node.module.split("."))) elif name_components: if isinstance(node, Module): - assert not class_ - module = node - module_name_parts.append(name) + assert not classes + modules.append(BoundNode(name, node)) elif isinstance(node, Class): - class_ = node - class_name_parts.append(name) + classes.append(BoundNode(name, node)) else: break else: assert isinstance(node, (Module, Class, Function, Variable)) - return ResolvedNode( - BoundNode(".".join(module_name_parts), module) if module else None, - BoundNode(".".join(class_name_parts), class_) if class_ else None, - name, - node, - ) + return ResolvedNode(modules, classes, name, node) return ".".join( itertools.chain( - module_name_parts, class_name_parts, (name,), reversed(name_components) + (module.name for module in modules), + (class_.name for class_ in classes), + (name,), + reversed(name_components), ) ) def resolve_global_name( self, name: str ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: - return self._resolve_name(None, None, None, None, name.split(".")) + return self._resolve_name([], [], name.split(".")) def resolve_name_in_scope( self, - module: Optional[BoundNode[Module]], - class_: Optional[BoundNode[Class]], + modules: Sequence[BoundNode[Module]], + classes: Sequence[BoundNode[Class]], name: str, ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: name_components = name.split(".") attr = name_components[0] - if class_ and attr in class_.node.attrs: - pass - elif module and attr in module.node.attrs: - class_ = None + if classes and attr in classes[-1].node.attrs: + classes = list(classes) + elif modules and attr in modules[-1].node.attrs: + classes = [] else: return name - return self._resolve_name( - module.name if module else None, - module.node if module else None, - class_.name if class_ else None, - class_.node if class_ else None, - name_components, - ) + modules = list(modules) + return self._resolve_name(modules, classes, name_components) diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index 4b272cfbc..0dcb766d3 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -24,28 +24,40 @@ class _PreTransformer(ast.NodeTransformer): # Replace string forward references with the parsed expression. - def _visit_annotation(self, node): + @overload + def _visit_annotation(self, node: ast.expr) -> ast.expr: + ... + + @overload + def _visit_annotation(self, node: None) -> None: + ... + + def _visit_annotation(self, node: Optional[ast.expr]) -> Optional[ast.expr]: if isinstance(node, ast.Constant) and isinstance(node.value, str): - node = self.visit(ast.parse(node.value, "", "eval")) + node = self.visit( + cast(ast.Expression, ast.parse(node.value, "", "eval")).body + ) return node - def visit_arg(self, node): - node = self.generic_visit(node) + def visit_arg(self, node: ast.arg) -> ast.arg: + node = cast(ast.arg, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node - def visit_FunctionDef(self, node): - node = self.generic_visit(node) + def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef: + node = cast(ast.FunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node - def visit_AsyncFunctionDef(self, node): - node = self.generic_visit(node) + def visit_AsyncFunctionDef( + self, node: ast.AsyncFunctionDef + ) -> ast.AsyncFunctionDef: + node = cast(ast.AsyncFunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node - def visit_AnnAssign(self, node): - node = self.generic_visit(node) + def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AnnAssign: + node = cast(ast.AnnAssign, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node @@ -79,6 +91,9 @@ def __init__( self.docstring = docstring self.attrs = attrs + def has_docstring(self) -> bool: + return self.docstring is not None + class Class: def __init__( @@ -91,29 +106,39 @@ def __init__( self.docstring = docstring self.attrs = attrs + def has_docstring(self) -> bool: + return self.docstring is not None -class Function: + +class FunctionSignature: def __init__( self, args: ast.arguments, - decorator_list: Sequence[ast.expr], returns: Optional[ast.expr], - async_: bool, + decorator_list: Sequence[ast.expr], docstring: Optional[str], ) -> None: self.args = args - self.decorator_list = decorator_list self.returns = returns - self.async_ = async_ + self.decorator_list = decorator_list self.docstring = docstring - def have_decorator(self, name: str) -> bool: + def has_decorator(self, name: str) -> bool: return any( isinstance(decorator, ast.Name) and decorator.id == name for decorator in self.decorator_list ) +class Function: + def __init__(self, async_: bool, signatures: Sequence[FunctionSignature]) -> None: + self.async_ = async_ + self.signatures = signatures + + def has_docstring(self) -> bool: + return any(signature.docstring is not None for signature in self.signatures) + + class Variable: def __init__( self, annotation: Optional[ast.expr], docstring: Optional[str] @@ -121,17 +146,30 @@ def __init__( self.annotation = annotation self.docstring = docstring + def has_docstring(self) -> bool: + return self.docstring is not None + class Import: - def __init__(self, module: str) -> None: + def __init__(self, module: str, aliased: bool) -> None: self.module = module + self.aliased = aliased + + def has_docstring(self) -> bool: + return False class ImportFrom: - def __init__(self, name: str, module: Optional[str], level: int) -> None: + def __init__( + self, name: str, module: Optional[str], level: int, aliased: bool + ) -> None: self.name = name self.module = module self.level = level + self.aliased = aliased + + def has_docstring(self) -> bool: + return False Node = Union[Module, Class, Function, Variable, Import, ImportFrom] @@ -176,27 +214,34 @@ def visit_ClassDef( self._attrs = attrs self._attrs[node.name] = class_node - def visit_FunctionDef( + def _visit_function( self, - node: ast.FunctionDef, + node: Union[ast.FunctionDef, ast.AsyncFunctionDef], parent: Optional[ast.AST], sibling: Optional[ast.AST], ) -> None: - self._attrs[node.name] = Function( - node.args, node.decorator_list, node.returns, False, ast.get_docstring(node) + signature = FunctionSignature( + node.args, node.returns, node.decorator_list, ast.get_docstring(node) ) + async_ = isinstance(node, ast.AsyncFunctionDef) + func = self._attrs.get(node.name) + # If we have a previous overload definition, we can add to it. + # Otherwise, we replace it. + if ( + func + and isinstance(func, Function) + and func.async_ == async_ + and func.signatures[-1].has_decorator("overload") + ): + signatures = list(func.signatures) + signatures.append(signature) + else: + signatures = [signature] + self._attrs[node.name] = Function(async_, signatures) # NB: we intentionally don't visit the function body. - def visit_AsyncFunctionDef( - self, - node: ast.AsyncFunctionDef, - parent: Optional[ast.AST], - sibling: Optional[ast.AST], - ) -> None: - self._attrs[node.name] = Function( - node.args, node.decorator_list, node.returns, True, ast.get_docstring(node) - ) - # NB: we intentionally don't visit the function body. + visit_FunctionDef = _visit_function + visit_AsyncFunctionDef = _visit_function def _add_assign( self, @@ -258,7 +303,7 @@ def visit_Import( else: name = alias.asname module_name = alias.name - self._attrs[name] = Import(module_name) + self._attrs[name] = Import(module_name, alias.asname is not None) def visit_ImportFrom( self, @@ -268,7 +313,9 @@ def visit_ImportFrom( ) -> None: for alias in node.names: name = alias.name if alias.asname is None else alias.asname - self._attrs[name] = ImportFrom(alias.name, node.module, node.level) + self._attrs[name] = ImportFrom( + alias.name, node.module, node.level, alias.asname is not None + ) def parse_source( diff --git a/docs/exts/setuptools_config.py b/docs/exts/setuptools_config.py index 0d45cc13c..f9acf072b 100644 --- a/docs/exts/setuptools_config.py +++ b/docs/exts/setuptools_config.py @@ -23,9 +23,8 @@ from __future__ import unicode_literals import os -import sys import subprocess - +import sys if "check_output" not in dir(subprocess): import subprocess32 as subprocess diff --git a/drgn/__init__.py b/drgn/__init__.py index 69f87611d..2898a5075 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -46,14 +46,16 @@ import types from _drgn import ( + NULL, Architecture, FaultError, FindObjectFlags, + IntegerLike, Language, MissingDebugInfoError, - NULL, Object, OutOfBoundsError, + Path, Platform, PlatformFlags, PrimitiveType, @@ -69,41 +71,29 @@ TypeKind, TypeMember, TypeParameter, - _with_libkdumpfile, - array_type, - bool_type, + _with_libkdumpfile as _with_libkdumpfile, cast, - class_type, - complex_type, container_of, - enum_type, filename_matches, - float_type, - function_type, host_platform, - int_type, - pointer_type, program_from_core_dump, program_from_kernel, program_from_pid, reinterpret, sizeof, - struct_type, - typedef_type, - union_type, - void_type, ) - __all__ = ( "Architecture", "FaultError", "FindObjectFlags", + "IntegerLike", "Language", "MissingDebugInfoError", "NULL", "Object", "OutOfBoundsError", + "Path", "Platform", "PlatformFlags", "PrimitiveType", @@ -119,37 +109,25 @@ "TypeKind", "TypeMember", "TypeParameter", - "array_type", - "bool_type", "cast", - "class_type", - "complex_type", "container_of", - "enum_type", "execscript", "filename_matches", - "float_type", - "function_type", "host_platform", - "int_type", - "pointer_type", "program_from_core_dump", "program_from_kernel", "program_from_pid", "reinterpret", "sizeof", - "struct_type", - "typedef_type", - "union_type", - "void_type", ) -try: +if sys.version_info >= (3, 8): _open_code = io.open_code -except AttributeError: +else: + from typing import BinaryIO - def _open_code(path): + def _open_code(path: str) -> BinaryIO: return open(path, "rb") @@ -176,40 +154,35 @@ def execscript(path: str, *args: str) -> None: added back to the calling context. This is most useful for executing scripts from interactive mode. For - example, you could have a script named ``tasks.py``: + example, you could have a script named ``exe.py``: .. code-block:: python3 + \"\"\"Get all tasks executing a given file.\"\"\" + import sys - \"\"\" - Get all tasks in a given state. - \"\"\" + from drgn.helpers.linux.fs import d_path + from drgn.helpers.linux.pid import find_task - # From include/linux/sched.h. - def task_state_index(task): - task_state = task.state.value_() - if task_state == 0x402: # TASK_IDLE - return 8 + def task_exe_path(task): + if task.mm: + return d_path(task.mm.exe_file.f_path).decode() else: - state = (task_state | task.exit_state.value_()) & 0x7f - return state.bit_length() - - def task_state_to_char(task): - return 'RSDTtXZPI'[task_state_index(task)] + return None tasks = [ task for task in for_each_task(prog) - if task_state_to_char(task) == sys.argv[1] + if task_exe_path(task) == sys.argv[1] ] Then, you could execute it and use the defined variables and functions: - >>> execscript('tasks.py', 'R') - >>> tasks[0].comm - (char [16])"python3" - >>> task_state_to_char(find_task(prog, 1)) - 'S' + >>> execscript('exe.py', '/usr/bin/bash') + >>> tasks[0].pid + (pid_t)358442 + >>> task_exe_path(find_task(prog, 357954)) + '/usr/bin/vim' :param path: File path of the script. :param args: Zero or more additional arguments to pass to the script. This diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index cbdd6ee7a..701bb383d 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -33,7 +33,6 @@ import importlib import pkgutil - __all__ = [] for _module_info in pkgutil.iter_modules( __path__, # type: ignore[name-defined] # python/mypy#1422 diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index 061c8af1c..78ee2434b 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -10,7 +10,9 @@ (``struct hd_struct``). """ -from drgn import container_of +from typing import Iterator + +from drgn import Object, Program, container_of from drgn.helpers import escape_ascii_string from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from drgn.helpers.linux.list import list_for_each_entry @@ -27,34 +29,33 @@ ) -def disk_devt(disk): +def disk_devt(disk: Object) -> Object: """ - .. c:function:: dev_t disk_devt(struct gendisk *disk) - Get a disk's device number. + + :param disk: ``struct gendisk *`` + :return: ``dev_t`` """ - return MKDEV(disk.major, disk.first_minor) + return Object(disk.prog_, "dev_t", MKDEV(disk.major, disk.first_minor)) -def disk_name(disk): +def disk_name(disk: Object) -> bytes: """ - .. c:function:: char *disk_name(struct gendisk *disk) - Get the name of a disk (e.g., ``sda``). - :rtype: bytes + :param disk: ``struct gendisk *`` """ return disk.disk_name.string_() -def _for_each_block_device(prog): +def _for_each_block_device(prog: Program) -> Iterator[Object]: try: class_in_private = prog.cache["knode_class_in_device_private"] except KeyError: # We need a proper has_member(), but this is fine for now. class_in_private = any( member.name == "knode_class" - for member in prog.type("struct device_private").members + for member in prog.type("struct device_private").members # type: ignore[union-attr] ) prog.cache["knode_class_in_device_private"] = class_in_private devices = prog["block_class"].p.klist_devices.k_list.address_of_() @@ -67,7 +68,7 @@ def _for_each_block_device(prog): yield from list_for_each_entry("struct device", devices, "knode_class.n_node") -def for_each_disk(prog): +def for_each_disk(prog: Program) -> Iterator[Object]: """ Iterate over all disks in the system. @@ -79,7 +80,7 @@ def for_each_disk(prog): yield container_of(device, "struct gendisk", "part0.__dev") -def print_disks(prog): +def print_disks(prog: Program) -> None: """Print all of the disks in the system.""" for disk in for_each_disk(prog): major = disk.major.value_() @@ -88,27 +89,26 @@ def print_disks(prog): print(f"{major}:{minor} {name} ({disk.type_.type_name()})0x{disk.value_():x}") -def part_devt(part): +def part_devt(part: Object) -> Object: """ - .. c:function:: dev_t part_devt(struct hd_struct *part) - Get a partition's device number. + + :param part: ``struct hd_struct *`` + :return: ``dev_t`` """ return part.__dev.devt -def part_name(part): +def part_name(part: Object) -> bytes: """ - .. c:function:: char *part_name(struct hd_struct *part) - Get the name of a partition (e.g., ``sda1``). - :rtype: bytes + :param part: ``struct hd_struct *`` """ return part.__dev.kobj.name.string_() -def for_each_partition(prog): +def for_each_partition(prog: Program) -> Iterator[Object]: """ Iterate over all partitions in the system. @@ -118,7 +118,7 @@ def for_each_partition(prog): yield container_of(device, "struct hd_struct", "__dev") -def print_partitions(prog): +def print_partitions(prog: Program) -> None: """Print all of the partitions in the system.""" for part in for_each_partition(prog): devt = part_devt(part).value_() diff --git a/drgn/helpers/linux/boot.py b/drgn/helpers/linux/boot.py index 7d064960d..427298627 100644 --- a/drgn/helpers/linux/boot.py +++ b/drgn/helpers/linux/boot.py @@ -9,29 +9,12 @@ Linux kernel boot configuration. """ -from _drgn import _linux_helper_kaslr_offset, _linux_helper_pgtable_l5_enabled - +from _drgn import ( + _linux_helper_kaslr_offset as kaslr_offset, + _linux_helper_pgtable_l5_enabled as pgtable_l5_enabled, +) __all__ = ( "kaslr_offset", "pgtable_l5_enabled", ) - - -def kaslr_offset(prog): - """ - .. c:function:: unsigned long kaslr_offset(void) - - Get the kernel address space layout randomization offset (zero if it is - disabled). - """ - return _linux_helper_kaslr_offset(prog) - - -def pgtable_l5_enabled(prog): - """ - .. c:function:: bool pgtable_l5_enabled(void) - - Return whether 5-level paging is enabled. - """ - return _linux_helper_pgtable_l5_enabled(prog) diff --git a/drgn/helpers/linux/bpf.py b/drgn/helpers/linux/bpf.py index 291c28cba..af7d083d6 100644 --- a/drgn/helpers/linux/bpf.py +++ b/drgn/helpers/linux/bpf.py @@ -12,12 +12,12 @@ import itertools +from typing import Iterator -from drgn import cast +from drgn import IntegerLike, Object, Program, cast from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import list_for_each_entry - __all__ = ( "bpf_map_for_each", "bpf_prog_for_each", @@ -26,11 +26,9 @@ ) -def bpf_map_for_each(prog): +def bpf_map_for_each(prog: Program) -> Iterator[Object]: """ - .. c:function:: bpf_map_for_each(prog) - - Iterate over all bpf maps. + Iterate over all BPF maps. :return: Iterator of ``struct bpf_map *`` objects. """ @@ -38,11 +36,9 @@ def bpf_map_for_each(prog): yield cast("struct bpf_map *", entry) -def bpf_prog_for_each(prog): +def bpf_prog_for_each(prog: Program) -> Iterator[Object]: """ - .. c:function:: bpf_prog_for_each(prog) - - Iterate over all bpf programs. + Iterate over all BPF programs. :return: Iterator of ``struct bpf_prog *`` objects. """ @@ -50,13 +46,15 @@ def bpf_prog_for_each(prog): yield cast("struct bpf_prog *", entry) -def cgroup_bpf_prog_for_each(cgrp, bpf_attach_type): +def cgroup_bpf_prog_for_each( + cgrp: Object, bpf_attach_type: IntegerLike +) -> Iterator[Object]: """ - .. c:function:: cgroup_bpf_prog_for_each(struct cgroup *cgrp, int bpf_attach_type) - - Iterate over all cgroup bpf programs of the given attach type attached to + Iterate over all cgroup BPF programs of the given attach type attached to the given cgroup. + :param cgrp: ``struct cgroup *`` + :param bpf_attach_type: ``enum bpf_attach_type`` :return: Iterator of ``struct bpf_prog *`` objects. """ progs_head = cgrp.bpf.progs[bpf_attach_type] @@ -66,13 +64,15 @@ def cgroup_bpf_prog_for_each(cgrp, bpf_attach_type): yield pl.prog -def cgroup_bpf_prog_for_each_effective(cgrp, bpf_attach_type): +def cgroup_bpf_prog_for_each_effective( + cgrp: Object, bpf_attach_type: IntegerLike +) -> Iterator[Object]: """ - .. c:function:: cgroup_bpf_prog_for_each(struct cgroup *cgrp, int bpf_attach_type) - - Iterate over all effective cgroup bpf programs of the given attach type for + Iterate over all effective cgroup BPF programs of the given attach type for the given cgroup. + :param cgrp: ``struct cgroup *`` + :param bpf_attach_type: ``enum bpf_attach_type`` :return: Iterator of ``struct bpf_prog *`` objects. """ prog_array_items = cgrp.bpf.effective[bpf_attach_type].items diff --git a/drgn/helpers/linux/cgroup.py b/drgn/helpers/linux/cgroup.py index c492136e0..b0e112a05 100644 --- a/drgn/helpers/linux/cgroup.py +++ b/drgn/helpers/linux/cgroup.py @@ -10,8 +10,9 @@ supported. """ +from typing import Callable, Iterator -from drgn import NULL, cast, container_of +from drgn import NULL, Object, cast, container_of from drgn.helpers.linux.kernfs import kernfs_name, kernfs_path from drgn.helpers.linux.list import list_for_each_entry @@ -27,22 +28,24 @@ ) -def sock_cgroup_ptr(skcd): +def sock_cgroup_ptr(skcd: Object) -> Object: """ - .. c:function:: struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) - Get the cgroup for a socket from the given ``struct sock_cgroup_data *`` (usually from ``struct sock::sk_cgrp_data``). + + :param skcd: ``struct sock_cgroup_data *`` + :return: ``struct cgroup *`` """ return cast("struct cgroup *", skcd.val) -def cgroup_parent(cgrp): +def cgroup_parent(cgrp: Object) -> Object: """ - .. c:function:: struct cgroup *cgroup_parent(struct cgroup *cgrp) - Return the parent cgroup of the given cgroup if it exists, ``NULL`` otherwise. + + :param cgrp: ``struct cgroup *`` + :return: ``struct cgroup *`` """ parent_css = cgrp.self.parent if parent_css: @@ -50,34 +53,32 @@ def cgroup_parent(cgrp): return NULL(cgrp.prog_, "struct cgroup *") -def cgroup_name(cgrp): +def cgroup_name(cgrp: Object) -> bytes: """ - .. c:function:: char *cgroup_name(struct cgroup *cgrp) - Get the name of the given cgroup. - :rtype: bytes + :param cgrp: ``struct cgroup *`` """ return kernfs_name(cgrp.kn) -def cgroup_path(cgrp): +def cgroup_path(cgrp: Object) -> bytes: """ - .. c:function:: char *cgroup_path(struct cgroup *cgrp) - Get the full path of the given cgroup. - :rtype: bytes + :param cgrp: ``struct cgroup *`` """ return kernfs_path(cgrp.kn) -def css_next_child(pos, parent): +def css_next_child(pos: Object, parent: Object) -> Object: """ - .. c:function:: struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent) - Get the next child (or ``NULL`` if there is none) of the given parent starting from the given position (``NULL`` to initiate traversal). + + :param pos: ``struct cgroup_subsys_state *`` + :param parent: ``struct cgroup_subsys_state *`` + :return: ``struct cgroup_subsys_state *`` """ if not pos: next_ = container_of( @@ -98,13 +99,15 @@ def css_next_child(pos, parent): return NULL(next_.prog_, "struct cgroup_subsys_state *") -def css_next_descendant_pre(pos, root): +def css_next_descendant_pre(pos: Object, root: Object) -> Object: """ - .. c:function:: struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *root) - Get the next pre-order descendant (or ``NULL`` if there is none) of the given css root starting from the given position (``NULL`` to initiate traversal). + + :param pos: ``struct cgroup_subsys_state *`` + :param root: ``struct cgroup_subsys_state *`` + :return: ``struct cgroup_subsys_state *`` """ # If first iteration, visit root. if not pos: @@ -126,7 +129,9 @@ def css_next_descendant_pre(pos, root): return NULL(root.prog_, "struct cgroup_subsys_state *") -def _css_for_each_impl(next_fn, css): +def _css_for_each_impl( + next_fn: Callable[[Object, Object], Object], css: Object +) -> Iterator[Object]: pos = NULL(css.prog_, "struct cgroup_subsys_state *") while True: pos = next_fn(pos, css) @@ -136,23 +141,21 @@ def _css_for_each_impl(next_fn, css): yield pos -def css_for_each_child(css): +def css_for_each_child(css: Object) -> Iterator[Object]: """ - .. c:function:: css_for_each_child(struct cgroup_subsys_state *css) - Iterate through children of the given css. + :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_child, css) -def css_for_each_descendant_pre(css): +def css_for_each_descendant_pre(css: Object) -> Iterator[Object]: """ - .. c:function:: css_for_each_descendant_pre(struct cgroup_subsys_state *css) - Iterate through the given css's descendants in pre-order. + :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_descendant_pre, css) diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 4d9a55508..2547452b1 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -9,6 +9,10 @@ masks from :linux:`include/linux/cpumask.h`. """ +from typing import Iterator + +from drgn import Object, Program, sizeof + __all__ = ( "for_each_cpu", "for_each_online_cpu", @@ -17,45 +21,31 @@ ) -def for_each_cpu(mask): +def for_each_cpu(mask: Object) -> Iterator[int]: """ - .. c:function:: for_each_cpu(struct cpumask mask) - Iterate over all of the CPUs in the given mask. - :rtype: Iterator[int] + :param mask: ``struct cpumask`` """ bits = mask.bits - word_bits = 8 * bits.type_.type.size - for i in range(bits.type_.length): + word_bits = 8 * sizeof(bits.type_.type) + for i in range(bits.type_.length): # type: ignore word = bits[i].value_() for j in range(word_bits): if word & (1 << j): yield (word_bits * i) + j -def for_each_possible_cpu(prog): - """ - Iterate over all possible CPUs. - - :rtype: Iterator[int] - """ +def for_each_possible_cpu(prog: Program) -> Iterator[int]: + """Iterate over all possible CPUs.""" return for_each_cpu(prog["__cpu_possible_mask"]) -def for_each_online_cpu(prog): - """ - Iterate over all online CPUs. - - :rtype: Iterator[int] - """ +def for_each_online_cpu(prog: Program) -> Iterator[int]: + """Iterate over all online CPUs.""" return for_each_cpu(prog["__cpu_online_mask"]) -def for_each_present_cpu(prog): - """ - Iterate over all present CPUs. - - :rtype: Iterator[int] - """ +def for_each_present_cpu(prog: Program) -> Iterator[int]: + """Iterate over all present CPUs.""" return for_each_cpu(prog["__cpu_present_mask"]) diff --git a/drgn/helpers/linux/device.py b/drgn/helpers/linux/device.py index f339ec2d6..0efc10e4d 100644 --- a/drgn/helpers/linux/device.py +++ b/drgn/helpers/linux/device.py @@ -9,7 +9,9 @@ Linux devices, including the kernel encoding of ``dev_t``. """ -from drgn import Object, cast +import operator + +from drgn import IntegerLike __all__ = ( "MAJOR", @@ -23,37 +25,29 @@ _MINORMASK = (1 << _MINORBITS) - 1 -def MAJOR(dev): +def MAJOR(dev: IntegerLike) -> int: """ - .. c:function:: unsigned int MAJOR(dev_t dev) - Return the major ID of a kernel ``dev_t``. + + :param dev: ``dev_t`` object or :class:``int``. """ - major = dev >> _MINORBITS - if isinstance(major, Object): - return cast("unsigned int", major) - return major + return operator.index(dev) >> _MINORBITS -def MINOR(dev): +def MINOR(dev: IntegerLike) -> int: """ - .. c:function:: unsigned int MINOR(dev_t dev) - Return the minor ID of a kernel ``dev_t``. + + :param dev: ``dev_t`` object or :class:``int``. """ - minor = dev & _MINORMASK - if isinstance(minor, Object): - return cast("unsigned int", minor) - return minor + return operator.index(dev) & _MINORMASK -def MKDEV(major, minor): +def MKDEV(major: IntegerLike, minor: IntegerLike) -> int: """ - .. c:function:: dev_t MKDEV(unsigned int major, unsigned int minor) - Return a kernel ``dev_t`` from the major and minor IDs. + + :param major: Device major ID. + :param minor: Device minor ID. """ - dev = (major << _MINORBITS) | minor - if isinstance(dev, Object): - return cast("dev_t", dev) - return dev + return (operator.index(major) << _MINORBITS) | operator.index(minor) diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 5d30a7205..27a922118 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -10,8 +10,9 @@ """ import os +from typing import Iterator, Optional, Tuple, Union, overload -from drgn import Object, Program, container_of +from drgn import IntegerLike, Object, Path, Program, container_of, sizeof from drgn.helpers import escape_ascii_string from drgn.helpers.linux.list import ( hlist_empty, @@ -37,7 +38,7 @@ ) -def _follow_mount(mnt, dentry): +def _follow_mount(mnt: Object, dentry: Object) -> Tuple[Object, Object]: # DCACHE_MOUNTED is a macro, so we can't easily get the value. But, it # hasn't changed since v2.6.38, so let's hardcode it for now. DCACHE_MOUNTED = 0x10000 @@ -54,7 +55,9 @@ def _follow_mount(mnt, dentry): return mnt, dentry -def _follow_dotdot(mnt, dentry, root_mnt, root_dentry): +def _follow_dotdot( + mnt: Object, dentry: Object, root_mnt: Object, root_dentry: Object +) -> Tuple[Object, Object]: while dentry != root_dentry or mnt != root_mnt: d_parent = dentry.d_parent.read_() if dentry != d_parent: @@ -68,31 +71,33 @@ def _follow_dotdot(mnt, dentry, root_mnt, root_dentry): return _follow_mount(mnt, dentry) -def path_lookup(prog_or_root, path, allow_negative=False): +def path_lookup( + prog_or_root: Union[Program, Object], path: Path, allow_negative: bool = False +) -> Object: """ - .. c:function:: struct path path_lookup(struct path *root, const char *path, bool allow_negative) - - Look up the given path name relative to the given root directory. If given - a :class:`Program` instead of a ``struct path``, the initial root - filesystem is used. - - :param bool allow_negative: Whether to allow returning a negative dentry - (i.e., a dentry for a non-existent path). + Look up the given path name. + + :param prog_or_root: ``struct path *`` object to use as root directory, or + :class:`Program` to use the initial root filesystem. + :param path: Path to lookup. + :param allow_negative: Whether to allow returning a negative dentry (i.e., + a dentry for a non-existent path). + :return: ``struct path`` :raises Exception: if the dentry is negative and ``allow_negative`` is ``False``, or if the path is not present in the dcache. The latter does not necessarily mean that the path does not exist; it may be uncached. On a live system, you can make the kernel cache the path by accessing it (e.g., with :func:`open()` or :func:`os.stat()`): - >>> path_lookup(prog, '/usr/include/stdlib.h') - ... - Exception: could not find '/usr/include/stdlib.h' in dcache - >>> open('/usr/include/stdlib.h').close() - >>> path_lookup(prog, '/usr/include/stdlib.h') - (struct path){ - .mnt = (struct vfsmount *)0xffff8b70413cdca0, - .dentry = (struct dentry *)0xffff8b702ac2c480, - } + >>> path_lookup(prog, '/usr/include/stdlib.h') + ... + Exception: could not find '/usr/include/stdlib.h' in dcache + >>> open('/usr/include/stdlib.h').close() + >>> path_lookup(prog, '/usr/include/stdlib.h') + (struct path){ + .mnt = (struct vfsmount *)0xffff8b70413cdca0, + .dentry = (struct dentry *)0xffff8b702ac2c480, + } """ if isinstance(prog_or_root, Program): prog_or_root = prog_or_root["init_task"].fs.root @@ -121,20 +126,35 @@ def path_lookup(prog_or_root, path, allow_negative=False): return Object( mnt.prog_, "struct path", - value={"mnt": mnt.mnt.address_of_(), "dentry": dentry,}, + value={"mnt": mnt.mnt.address_of_(), "dentry": dentry}, ) -def d_path(path_or_vfsmnt, dentry=None): +@overload +def d_path(path: Object) -> bytes: + """ + Return the full path of a dentry given a ``struct path``. + + :param path: ``struct path`` or ``struct path *`` + """ + ... + + +@overload +def d_path(vfsmnt: Object, dentry: Object) -> bytes: """ - .. c:function:: char *d_path(struct path *path) - .. c:function:: char *d_path(struct vfsmount *vfsmnt, struct dentry *dentry) + Return the full path of a dentry given a mount and dentry. - Return the full path of a dentry given a ``struct path *`` or a mount and a - dentry. + :param vfsmnt: ``struct vfsmount *`` + :param dentry: ``struct dentry *`` """ - type_name = str(path_or_vfsmnt.type_.type_name()) - if type_name == "struct path" or type_name == "struct path *": + ... + + +def d_path( # type: ignore # Need positional-only arguments. + path_or_vfsmnt: Object, dentry: Optional[Object] = None +) -> bytes: + if dentry is None: vfsmnt = path_or_vfsmnt.mnt dentry = path_or_vfsmnt.dentry.read_() else: @@ -144,7 +164,7 @@ def d_path(path_or_vfsmnt, dentry=None): d_op = dentry.d_op.read_() if d_op and d_op.d_dname: - return None + return b"[" + dentry.d_inode.i_sb.s_type.name.string_() + b"]" components = [] while True: @@ -167,11 +187,11 @@ def d_path(path_or_vfsmnt, dentry=None): return b"/" -def dentry_path(dentry): +def dentry_path(dentry: Object) -> bytes: """ - .. c:function:: char *dentry_path(struct dentry *dentry) - Return the path of a dentry from the root of its filesystem. + + :param dentry: ``struct dentry *`` """ components = [] while True: @@ -183,11 +203,12 @@ def dentry_path(dentry): return b"/".join(reversed(components)) -def inode_path(inode): +def inode_path(inode: Object) -> Optional[bytes]: """ - .. c:function:: char *inode_path(struct inode *inode) - Return any path of an inode from the root of its filesystem. + + :param inode: ``struct inode *`` + :return: Path, or ``None`` if the inode has no aliases. """ if hlist_empty(inode.i_dentry): return None @@ -196,14 +217,12 @@ def inode_path(inode): ) -def inode_paths(inode): +def inode_paths(inode: Object) -> Iterator[bytes]: """ - .. c:function:: inode_paths(struct inode *inode) - Return an iterator over all of the paths of an inode from the root of its filesystem. - :rtype: Iterator[bytes] + :param inode: ``struct inode *`` """ return ( dentry_path(dentry) @@ -213,55 +232,54 @@ def inode_paths(inode): ) -def mount_src(mnt): +def mount_src(mnt: Object) -> bytes: """ - .. c:function:: char *mount_src(struct mount *mnt) - Get the source device name for a mount. - :rtype: bytes + :param mnt: ``struct mount *`` """ return mnt.mnt_devname.string_() -def mount_dst(mnt): +def mount_dst(mnt: Object) -> bytes: """ - .. c:function:: char *mount_dst(struct mount *mnt) - Get the path of a mount point. - :rtype: bytes + :param mnt: ``struct mount *`` """ return d_path(mnt.mnt.address_of_(), mnt.mnt.mnt_root) -def mount_fstype(mnt): +def mount_fstype(mnt: Object) -> bytes: """ - .. c:function:: char *mount_fstype(struct mount *mnt) - Get the filesystem type of a mount. - :rtype: bytes + :param mnt: ``struct mount *`` """ sb = mnt.mnt.mnt_sb.read_() fstype = sb.s_type.name.string_() - subtype = sb.s_subtype.read_() - if subtype: - subtype = subtype.string_() + subtype_obj = sb.s_subtype.read_() + if subtype_obj: + subtype = subtype_obj.string_() if subtype: fstype += b"." + subtype return fstype -def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): +def for_each_mount( + prog_or_ns: Union[Program, Object], + src: Optional[Path] = None, + dst: Optional[Path] = None, + fstype: Optional[Union[str, bytes]] = None, +) -> Iterator[Object]: """ - .. c:function:: for_each_mount(struct mnt_namespace *ns, char *src, char *dst, char *fstype) - - Iterate over all of the mounts in a given namespace. If given a - :class:`Program` instead, the initial mount namespace is used. returned - mounts can be filtered by source, destination, or filesystem type, all of - which are encoded using :func:`os.fsencode()`. + Iterate over all of the mounts in a given namespace. + :param prog_or_ns: ``struct mnt_namespace *`` to iterate over, or + :class:`Program` to iterate over initial mount namespace. + :param src: Only include mounts with this source device name. + :param dst: Only include mounts with this destination path. + :param fstype: Only include mounts with this filesystem type. :return: Iterator of ``struct mount *`` objects. """ if isinstance(prog_or_ns, Program): @@ -283,10 +301,13 @@ def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): yield mnt -def print_mounts(prog_or_ns, src=None, dst=None, fstype=None): +def print_mounts( + prog_or_ns: Union[Program, Object], + src: Optional[Path] = None, + dst: Optional[Path] = None, + fstype: Optional[Union[str, bytes]] = None, +) -> None: """ - .. c:function:: print_mounts(struct mnt_namespace *ns, char *src, char *dst, char *fstype) - Print the mount table of a given namespace. The arguments are the same as :func:`for_each_mount()`. The output format is similar to ``/proc/mounts`` but prints the value of each ``struct mount *``. @@ -300,26 +321,26 @@ def print_mounts(prog_or_ns, src=None, dst=None, fstype=None): ) -def fget(task, fd): +def fget(task: Object, fd: IntegerLike) -> Object: """ - .. c:function:: struct file *fget(struct task_struct *task, int fd) - Return the kernel file descriptor of the fd of a given task. + + :param task: ``struct task_struct *`` + :param fd: File descriptor. + :return: ``struct file *`` """ return task.files.fdt.fd[fd] -def for_each_file(task): +def for_each_file(task: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: for_each_file(struct task_struct *task) - Iterate over all of the files open in a given task. + :param task: ``struct task_struct *`` :return: Iterator of (fd, ``struct file *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ fdt = task.files.fdt.read_() - bits_per_long = 8 * fdt.open_fds.type_.type.size + bits_per_long = 8 * sizeof(fdt.open_fds.type_.type) for i in range((fdt.max_fds.value_() + bits_per_long - 1) // bits_per_long): word = fdt.open_fds[i].value_() for j in range(bits_per_long): @@ -329,15 +350,13 @@ def for_each_file(task): yield fd, file -def print_files(task): +def print_files(task: Object) -> None: """ - .. c:function:: print_files(struct task_struct *task) - Print the open files of a given task. + + :param task: ``struct task_struct *`` """ for fd, file in for_each_file(task): path = d_path(file.f_path) - if path is None: - path = file.f_inode.i_sb.s_type.name.string_() - path = escape_ascii_string(path, escape_backslash=True) - print(f"{fd} {path} ({file.type_.type_name()})0x{file.value_():x}") + escaped_path = escape_ascii_string(path, escape_backslash=True) + print(f"{fd} {escaped_path} ({file.type_.type_name()})0x{file.value_():x}") diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index 76bd521c5..d5fa25d6b 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -11,9 +11,11 @@ IDRs were not based on radix trees. """ -from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup -from _drgn import _linux_helper_idr_find +from typing import Iterator, Tuple +from _drgn import _linux_helper_idr_find as idr_find +from drgn import Object +from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup __all__ = ( "idr_find", @@ -21,24 +23,12 @@ ) -def idr_find(idr, id): - """ - .. c:function:: void *idr_find(struct idr *idr, unsigned long id) - - Look up the entry with the given id in an IDR. If it is not found, this - returns a ``NULL`` object. - """ - return _linux_helper_idr_find(idr, id) - - -def idr_for_each(idr): +def idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: idr_for_each(struct idr *idr) - Iterate over all of the entries in an IDR. + :param idr: ``struct idr *`` :return: Iterator of (index, ``void *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ try: base = idr.idr_base.value_() diff --git a/drgn/helpers/linux/kconfig.py b/drgn/helpers/linux/kconfig.py index 5baf58b27..e26b3a45e 100644 --- a/drgn/helpers/linux/kconfig.py +++ b/drgn/helpers/linux/kconfig.py @@ -13,10 +13,12 @@ import types from typing import Mapping +from drgn import Program + __all__ = ("get_kconfig",) -def get_kconfig(prog) -> Mapping[str, str]: +def get_kconfig(prog: Program) -> Mapping[str, str]: """ Get the kernel build configuration as a mapping from the option name to the value. @@ -49,7 +51,7 @@ def get_kconfig(prog) -> Mapping[str, str]: ) # The data is delimited by the magic strings "IKCFG_ST" and "IKCFG_ED" # plus a NUL byte. - start = kernel_config_data.address_ + 8 + start = kernel_config_data.address_ + 8 # type: ignore[operator] size = len(kernel_config_data) - 17 data = prog.read(start, size) diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index 33c68d4a1..103ef7ca5 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -9,38 +9,36 @@ kernfs pseudo filesystem interface in :linux:`include/linux/kernfs.h`. """ +from drgn import Object + __all__ = ( "kernfs_name", "kernfs_path", ) -def kernfs_name(kn): +def kernfs_name(kn: Object) -> bytes: """ - .. c:function:: char *kernfs_name(struct kernfs_node *kn) - Get the name of the given kernfs node. - :rtype: bytes + :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" return kn.name.string_() if kn.parent else b"/" -def _kernfs_root(kn): +def _kernfs_root(kn: Object) -> Object: if kn.parent: kn = kn.parent return kn.dir.root -def kernfs_path(kn): +def kernfs_path(kn: Object) -> bytes: """ - .. c:function:: char *kernfs_path(struct kernfs_node *kn) - Get full path of the given kernfs node. - :rtype: bytes + :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index d29e06f66..dc3aace4f 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -10,8 +10,9 @@ hlist_head``) in :linux:`include/linux/list.h`. """ -from drgn import NULL, container_of +from typing import Iterator, Union +from drgn import NULL, Object, Type, container_of __all__ = ( "hlist_empty", @@ -31,91 +32,107 @@ ) -def list_empty(head): +def list_empty(head: Object) -> bool: """ - .. c:function:: bool list_empty(struct list_head *head) - Return whether a list is empty. + + :param head: ``struct list_head *`` """ head = head.read_() return head.next == head -def list_is_singular(head): +def list_is_singular(head: Object) -> bool: """ - .. c:function:: bool list_is_singular(struct list_head *head) - Return whether a list has only one element. + + :param head: ``struct list_head *`` """ head = head.read_() next = head.next return next != head and next == head.prev -def list_first_entry(head, type, member): +def list_first_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ - .. c:function:: type *list_first_entry(struct list_head *head, type, member) - Return the first entry in a list. The list is assumed to be non-empty. See also :func:`list_first_entry_or_null()`. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(head.next, type, member) -def list_first_entry_or_null(head, type, member): +def list_first_entry_or_null( + head: Object, type: Union[str, Type], member: str +) -> Object: """ - .. c:function:: type *list_first_entry_or_null(struct list_head *head, type, member) - Return the first entry in a list or ``NULL`` if the list is empty. See also :func:`list_first_entry()`. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ head = head.read_() pos = head.next.read_() if pos == head: + if isinstance(type, str): + type = head.prog_.type(type) return NULL(head.prog_, head.prog_.pointer_type(type)) else: return container_of(pos, type, member) -def list_last_entry(head, type, member): +def list_last_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ - .. c:function:: type *list_last_entry(struct list_head *head, type, member) - Return the last entry in a list. The list is assumed to be non-empty. + + :param head: ``struct list_head *`` + :param type: Entry type. + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(head.prev, type, member) -def list_next_entry(pos, member): +def list_next_entry(pos: Object, member: str) -> Object: """ - .. c:function:: type *list_next_entry(type *pos, member) - Return the next entry in a list. + + :param pos: ``type*`` + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(getattr(pos, member).next, pos.type_.type, member) -def list_prev_entry(pos, member): +def list_prev_entry(pos: Object, member: str) -> Object: """ - .. c:function:: type *list_prev_entry(type *pos, member) - Return the previous entry in a list. + + :param pos: ``type*`` + :param member: Name of list node member in entry type. + :return: ``type *`` """ return container_of(getattr(pos, member).prev, pos.type_.type, member) -def list_for_each(head): +def list_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: list_for_each(struct list_head *head) - Iterate over all of the nodes in a list. + :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() @@ -125,12 +142,11 @@ def list_for_each(head): pos = pos.next.read_() -def list_for_each_reverse(head): +def list_for_each_reverse(head: Object) -> Iterator[Object]: """ - .. c:function:: list_for_each_reverse(struct list_head *head) - Iterate over all of the nodes in a list in reverse order. + :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() @@ -140,47 +156,48 @@ def list_for_each_reverse(head): pos = pos.prev.read_() -def list_for_each_entry(type, head, member): +def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: """ - .. c:function:: list_for_each_entry(type, struct list_head *head, member) - - Iterate over all of the entries in a list, given the type of the entry and - the ``struct list_head`` member in that type. + Iterate over all of the entries in a list. + :param type: Entry type. + :param head: ``struct list_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in list_for_each(head): yield container_of(pos, type, member) -def list_for_each_entry_reverse(type, head, member): +def list_for_each_entry_reverse( + type: str, head: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: list_for_each_entry_reverse(type, struct list_head *head, member) - - Iterate over all of the entries in a list in reverse order, given the type - of the entry and the ``struct list_head`` member in that type. + Iterate over all of the entries in a list in reverse order. + :param type: Entry type. + :param head: ``struct list_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in list_for_each_reverse(head): yield container_of(pos, type, member) -def hlist_empty(head): +def hlist_empty(head: Object) -> bool: """ - .. c:function:: bool hlist_empty(struct hlist_head *head) - Return whether a hash list is empty. + + :param head: ``struct hlist_head *`` """ return not head.first -def hlist_for_each(head): +def hlist_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: hlist_for_each(struct hlist_head *head) - Iterate over all of the nodes in a hash list. + :param head: ``struct hlist_head *`` :return: Iterator of ``struct hlist_node *`` objects. """ pos = head.first.read_() @@ -189,13 +206,13 @@ def hlist_for_each(head): pos = pos.next.read_() -def hlist_for_each_entry(type, head, member): +def hlist_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: """ - .. c:function:: hlist_for_each_entry(type, struct hlist_head *head, member) - - Iterate over all of the entries in a has list, given the type of the entry - and the ``struct hlist_node`` member in that type. + Iterate over all of the entries in a hash list. + :param type: Entry type. + :param head: ``struct hlist_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ for pos in hlist_for_each(head): diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index 51a490762..a5161e3b8 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -11,57 +11,47 @@ list is not a ``NULL`` pointer, but a "nulls" marker. """ -from drgn import container_of +from typing import Iterator +from drgn import Object, container_of __all__ = ( "hlist_nulls_empty", - "hlist_nulls_entry", "hlist_nulls_for_each_entry", "is_a_nulls", ) -def is_a_nulls(pos): +def is_a_nulls(pos: Object) -> bool: """ - .. c:function:: bool is_a_nulls(struct hlist_nulls_node *pos) - Return whether a a pointer is a nulls marker. + + :param pos: ``struct hlist_nulls_node *`` """ return bool(pos.value_() & 1) -def hlist_nulls_empty(head): +def hlist_nulls_empty(head: Object) -> bool: """ - .. c:function:: bool hlist_nulls_empty(struct hlist_nulls_head *head) - Return whether a nulls hash list is empty. - """ - return is_a_nulls(head.first) - -def hlist_nulls_entry(pos, type, member): + :param head: ``struct hlist_nulls_head *`` """ - .. c:function:: type *hlist_nulls_entry(struct hlist_nulls_node *pos, type, member) - - Return an entry in a nulls hash list. - - The nulls hash list is assumed to be non-empty. - """ - return container_of(pos, type, member) + return is_a_nulls(head.first) -def hlist_nulls_for_each_entry(type, head, member): +def hlist_nulls_for_each_entry( + type: str, head: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: hlist_nulls_for_each_entry(type, struct hlist_nulls_head *head, member) - - Iterate over all the entries in a nulls hash list specified by ``struct - hlist_nulls_head`` head, given the type of the entry and the ``struct - hlist_nulls_node`` member in that type. + Iterate over all the entries in a nulls hash list. + :param type: Entry type. + :param head: ``struct hlist_nulls_head *`` + :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ pos = head.first while not is_a_nulls(pos): - yield hlist_nulls_entry(pos, type, member) + yield container_of(pos, type, member) pos = pos.next diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 684b2444f..50f9c124e 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -10,11 +10,11 @@ implemented. """ -from typing import List +import operator +from typing import Any, Iterator, List, Optional, Union, overload from _drgn import _linux_helper_read_vm -from drgn import Object, cast - +from drgn import IntegerLike, Object, Program, cast __all__ = ( "access_process_vm", @@ -31,110 +31,193 @@ ) -def for_each_page(prog): +def for_each_page(prog: Program) -> Iterator[Object]: """ Iterate over all pages in the system. :return: Iterator of ``struct page *`` objects. """ vmemmap = prog["vmemmap"] - for i in range(prog["max_pfn"]): + for i in range(prog["max_pfn"].value_()): yield vmemmap + i -def page_to_pfn(page): +def page_to_pfn(page: Object) -> Object: """ - .. c:function:: unsigned long page_to_pfn(struct page *page) - Get the page frame number (PFN) of a page. + + :param page: ``struct page *`` + :return: ``unsigned long`` """ return cast("unsigned long", page - page.prog_["vmemmap"]) -def pfn_to_page(prog_or_pfn, pfn=None): +@overload +def pfn_to_page(pfn: Object) -> Object: + """ + Get the page with a page frame number (PFN) given as an :class:`.Object`. + + :param pfn: ``unsigned long`` + :return: ``struct page *`` + """ + ... + + +@overload +def pfn_to_page(prog: Program, pfn: IntegerLike) -> Object: """ - .. c:function:: struct page *pfn_to_page(unsigned long pfn) + Get the page with a page frame number (PFN) given as a :class:`.Program` + and an integer. - Get the page with the given page frame number (PFN). This can take the PFN - as an :class:`Object`, or a :class:`Program` and the PFN as an ``int``. + :param pfn: Page frame number. + :return: ``struct page *`` """ + ... + + +def pfn_to_page( # type: ignore # Need positional-only arguments. + prog_or_pfn: Union[Program, Object], pfn: Optional[IntegerLike] = None +) -> Object: if pfn is None: + assert isinstance(prog_or_pfn, Object) prog = prog_or_pfn.prog_ pfn = prog_or_pfn else: + assert isinstance(prog_or_pfn, Program) prog = prog_or_pfn return prog["vmemmap"] + pfn -def virt_to_pfn(prog_or_addr, addr=None): +@overload +def virt_to_pfn(addr: Object) -> Object: """ - .. c:function:: unsigned long virt_to_pfn(void *addr) + Get the page frame number (PFN) of a directly mapped virtual address given + as an :class:`.Object`. - Get the page frame number (PFN) of a directly mapped virtual address. This - can take the address as an :class:`Object`, or a :class:`Program` and the - address as an ``int``. + :param addr: ``void *`` + :return: ``unsigned long`` """ + ... + + +@overload +def virt_to_pfn(prog: Program, addr: IntegerLike) -> Object: + """ + Get the page frame number (PFN) of a directly mapped virtual address given + as a :class:`.Program` and an integer. + + :param addr: Virtual address. + :return: ``unsigned long`` + """ + ... + + +def virt_to_pfn( # type: ignore # Need positional-only arguments. + prog_or_addr: Union[Program, Object], addr: Optional[IntegerLike] = None +) -> Object: if addr is None: + assert isinstance(prog_or_addr, Object) prog = prog_or_addr.prog_ - addr = prog_or_addr.value_() + addr = prog_or_addr else: + assert isinstance(prog_or_addr, Program) prog = prog_or_addr - return Object(prog, "unsigned long", value=(addr - prog["PAGE_OFFSET"]) >> 12) + return cast("unsigned long", (operator.index(addr) - prog["PAGE_OFFSET"]) >> 12) + + +@overload +def pfn_to_virt(pfn: Object) -> Object: + """ + Get the directly mapped virtual address of a page frame number (PFN) given + as an :class:`.Object`. + + :param pfn: ``unsigned long`` + :return: ``void *`` + """ + ... -def pfn_to_virt(prog_or_pfn, pfn=None): +@overload +def pfn_to_virt(prog: Program, pfn: IntegerLike) -> Object: """ - .. c:function:: void *pfn_to_virt(unsigned long pfn) + Get the directly mapped virtual address of a page frame number (PFN) given + as a :class:`.Program` and an integer. - Get the directly mapped virtual address of the given page frame number - (PFN). This can take the PFN as an :class:`Object`, or a :class:`Program` - and the PFN as an ``int``. + :param pfn: Page frame number. + :return: ``void *`` """ + + +def pfn_to_virt( # type: ignore # Need positional-only arguments. + prog_or_pfn: Union[Program, Object], pfn: Optional[IntegerLike] = None +) -> Object: if pfn is None: + assert isinstance(prog_or_pfn, Object) prog = prog_or_pfn.prog_ - pfn = prog_or_pfn.value_() + pfn = prog_or_pfn else: + assert isinstance(prog_or_pfn, Program) prog = prog_or_pfn - return Object(prog, "void *", value=(pfn << 12) + prog["PAGE_OFFSET"]) + return cast("void *", (operator.index(pfn) << 12) + prog["PAGE_OFFSET"]) -def page_to_virt(page): +def page_to_virt(page: Object) -> Object: """ - .. c:function:: void *page_to_virt(struct page *page) - Get the directly mapped virtual address of a page. + + :param page: ``struct page *`` + :return: ``void *`` """ return pfn_to_virt(page_to_pfn(page)) -def virt_to_page(prog_or_addr, addr=None): +@overload +def virt_to_page(addr: Object) -> Object: """ - .. c:function:: struct page *virt_to_page(void *addr) + Get the page containing a directly mapped virtual address given as an + :class:`.Object`. - Get the page containing a directly mapped virtual address. This can take - the address as an :class:`Object`, or a :class:`Program` and the address as - an ``int``. + :param addr: ``void *`` + :return: ``struct page *`` """ - return pfn_to_page(virt_to_pfn(prog_or_addr, addr)) + ... -def access_process_vm(task, address, size) -> bytes: +@overload +def virt_to_page(prog: Program, addr: IntegerLike) -> Object: + """ + Get the page containing a directly mapped virtual address given as a + :class:`.Program` and an integer. + + :param addr: Virtual address. + :return: ``struct page *`` """ - .. c:function:: char *access_process_vm(struct task_struct *task, void *address, size_t size) + ... + +def virt_to_page( # type: ignore # Need positional-only arguments. + prog_or_addr: Union[Program, Object], addr: Optional[IntegerLike] = None +) -> Object: + return pfn_to_page(virt_to_pfn(prog_or_addr, addr)) # type: ignore[arg-type] + + +def access_process_vm(task: Object, address: IntegerLike, size: IntegerLike) -> bytes: + """ Read memory from a task's virtual address space. >>> task = find_task(prog, 1490152) >>> access_process_vm(task, 0x7f8a62b56da0, 12) b'hello, world' + + :param task: ``struct task_struct *`` + :param address: Starting address. + :param size: Number of bytes to read. """ return _linux_helper_read_vm(task.prog_, task.mm.pgd, address, size) -def access_remote_vm(mm, address, size) -> bytes: +def access_remote_vm(mm: Object, address: IntegerLike, size: IntegerLike) -> bytes: """ - .. c:function:: char *access_remote_vm(struct mm_struct *mm, void *address, size_t size) - Read memory from a virtual address space. This is similar to :func:`access_process_vm()`, but it takes a ``struct mm_struct *`` instead of a ``struct task_struct *``. @@ -142,11 +225,15 @@ def access_remote_vm(mm, address, size) -> bytes: >>> task = find_task(prog, 1490152) >>> access_remote_vm(task.mm, 0x7f8a62b56da0, 12) b'hello, world' + + :param mm: ``struct mm_struct *`` + :param address: Starting address. + :param size: Number of bytes to read. """ return _linux_helper_read_vm(mm.prog_, mm.pgd, address, size) -def cmdline(task) -> List[bytes]: +def cmdline(task: Object) -> List[bytes]: """ Get the list of command line arguments of a task. @@ -157,6 +244,8 @@ def cmdline(task) -> List[bytes]: $ tr '\\0' ' ' < /proc/1495216/cmdline vim drgn/helpers/linux/mm.py + + :param task: ``struct task_struct *`` """ mm = task.mm.read_() arg_start = mm.arg_start.value_() @@ -164,7 +253,7 @@ def cmdline(task) -> List[bytes]: return access_remote_vm(mm, arg_start, arg_end - arg_start).split(b"\0")[:-1] -def environ(task) -> List[bytes]: +def environ(task: Object) -> List[bytes]: """ Get the list of environment variables of a task. @@ -177,6 +266,8 @@ def environ(task) -> List[bytes]: HOME=/root PATH=/usr/local/sbin:/usr/local/bin:/usr/bin LOGNAME=root + + :param task: ``struct task_struct *`` """ mm = task.mm.read_() env_start = mm.env_start.value_() diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 4a8e406e4..7f1e15a79 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -9,36 +9,37 @@ Linux kernel networking subsystem. """ +from typing import Iterator + +from drgn import Object from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry from drgn.helpers.linux.tcp import sk_tcpstate - __all__ = ( "sk_fullsock", "sk_nulls_for_each", ) -def sk_fullsock(sk): +def sk_fullsock(sk: Object) -> bool: """ - .. c:function:: bool sk_fullsock(struct sock *sk) - Check whether a socket is a full socket, i.e., not a time-wait or request socket. + + :param sk: ``struct sock *`` """ prog = sk.prog_ state = sk.__sk_common.skc_state.value_() return state != prog["TCP_SYN_RECV"] and state != prog["TCP_TIME_WAIT"] -def sk_nulls_for_each(head): +def sk_nulls_for_each(head: Object) -> Iterator[Object]: """ - .. c:function:: sk_nulls_for_each(struct hlist_nulls_head *head) - Iterate over all the entries in a nulls hash list of sockets specified by ``struct hlist_nulls_head`` head. - :return: Iterator of ``struct sock`` objects. + :param head: ``struct hlist_nulls_head *`` + :return: Iterator of ``struct sock *`` objects. """ for sk in hlist_nulls_for_each_entry( "struct sock", head, "__sk_common.skc_nulls_node" diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index 1b97d7b3e..402ce801a 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -10,31 +10,32 @@ from :linux:`include/linux/percpu_counter.h`. """ -from drgn import Object +from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu - __all__ = ( "per_cpu_ptr", "percpu_counter_sum", ) -def per_cpu_ptr(ptr, cpu): +def per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: """ - .. c:function:: type *per_cpu_ptr(type __percpu *ptr, int cpu) - Return the per-CPU pointer for a given CPU. + + :param ptr: ``type __percpu *`` + :param cpu: CPU number. + :return: ``type *`` """ offset = ptr.prog_["__per_cpu_offset"][cpu].value_() return Object(ptr.prog_, ptr.type_, value=ptr.value_() + offset) -def percpu_counter_sum(fbc): +def percpu_counter_sum(fbc: Object) -> int: """ - .. c:function:: s64 percpu_counter_sum(struct percpu_counter *fbc) - Return the sum of a per-CPU counter. + + :param fbc: ``struct percpu_counter *`` """ ret = fbc.count.value_() ptr = fbc.counters diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 1ff744f19..fe816c492 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -9,14 +9,16 @@ IDs and processes. """ -from drgn import NULL, Program, cast, container_of -from drgn.helpers.linux.idr import idr_find, idr_for_each -from drgn.helpers.linux.list import hlist_for_each_entry +from typing import Iterator, Union + from _drgn import ( - _linux_helper_find_pid, - _linux_helper_find_task, - _linux_helper_pid_task, + _linux_helper_find_pid as find_pid, + _linux_helper_find_task as find_task, + _linux_helper_pid_task as pid_task, ) +from drgn import NULL, Object, Program, cast, container_of +from drgn.helpers.linux.idr import idr_find, idr_for_each +from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_pid", @@ -27,24 +29,12 @@ ) -def find_pid(prog_or_ns, nr): +def for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: """ - .. c:function:: struct pid *find_pid(struct pid_namespace *ns, int nr) - - Return the ``struct pid *`` for the given PID number in the given - namespace. If given a :class:`Program` instead, the initial PID namespace - is used. - """ - return _linux_helper_find_pid(prog_or_ns, nr) - - -def for_each_pid(prog_or_ns): - """ - .. c:function:: for_each_pid(struct pid_namespace *ns) - - Iterate over all of the PIDs in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. + Iterate over all PIDs in a namespace. + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. :return: Iterator of ``struct pid *`` objects. """ if isinstance(prog_or_ns, Program): @@ -66,33 +56,12 @@ def for_each_pid(prog_or_ns): yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") -def pid_task(pid, pid_type): +def for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: """ - .. c:function:: struct task_struct *pid_task(struct pid *pid, enum pid_type pid_type) - - Return the ``struct task_struct *`` containing the given ``struct pid *`` - of the given type. - """ - return _linux_helper_pid_task(pid, pid_type) - - -def find_task(prog_or_ns, pid): - """ - .. c:function:: struct task_struct *find_task(struct pid_namespace *ns, int pid) - - Return the task with the given PID in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. - """ - return _linux_helper_find_task(prog_or_ns, pid) - - -def for_each_task(prog_or_ns): - """ - .. c:function:: for_each_task(struct pid_namespace *ns) - - Iterate over all of the tasks visible in the given namespace. If given a - :class:`Program` instead, the initial PID namespace is used. + Iterate over all of the tasks visible in a namespace. + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. :return: Iterator of ``struct task_struct *`` objects. """ if isinstance(prog_or_ns, Program): diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index 2796c6bbc..b0aa67e97 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -9,9 +9,10 @@ radix trees from :linux:`include/linux/radix-tree.h`. """ -from drgn import Object, cast -from _drgn import _linux_helper_radix_tree_lookup +from typing import Iterator, Tuple +from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup +from drgn import Object, cast __all__ = ( "radix_tree_for_each", @@ -21,15 +22,15 @@ _RADIX_TREE_ENTRY_MASK = 3 -def _is_internal_node(node, internal_node): +def _is_internal_node(node: Object, internal_node: int) -> bool: return (node.value_() & _RADIX_TREE_ENTRY_MASK) == internal_node -def _entry_to_node(node, internal_node): +def _entry_to_node(node: Object, internal_node: int) -> Object: return Object(node.prog_, node.type_, value=node.value_() & ~internal_node) -def _radix_tree_root_node(root): +def _radix_tree_root_node(root: Object) -> Tuple[Object, int]: try: node = root.xa_head except AttributeError: @@ -38,28 +39,16 @@ def _radix_tree_root_node(root): return cast("struct xa_node *", node).read_(), 2 -def radix_tree_lookup(root, index): - """ - .. c:function:: void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) - - Look up the entry at a given index in a radix tree. If it is not found, - this returns a ``NULL`` object. - """ - return _linux_helper_radix_tree_lookup(root, index) - - -def radix_tree_for_each(root): +def radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: """ - .. c:function:: radix_tree_for_each(struct radix_tree_root *root) - Iterate over all of the entries in a radix tree. + :param root: ``struct radix_tree_root *`` :return: Iterator of (index, ``void *``) tuples. - :rtype: Iterator[tuple[int, Object]] """ node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) - def aux(node, index): + def aux(node: Object, index: int) -> Iterator[Tuple[int, Object]]: if _is_internal_node(node, RADIX_TREE_INTERNAL_NODE): parent = _entry_to_node(node, RADIX_TREE_INTERNAL_NODE) for i, slot in enumerate(parent.slots): diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index ee33e468b..2bbd9a980 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -9,8 +9,9 @@ red-black trees from :linux:`include/linux/rbtree.h`. """ -from drgn import Object, NULL, container_of +from typing import Callable, Iterator, TypeVar +from drgn import NULL, Object, container_of __all__ = ( "RB_EMPTY_NODE", @@ -25,31 +26,33 @@ ) -def RB_EMPTY_NODE(node): +def RB_EMPTY_NODE(node: Object) -> bool: """ - .. c:function:: bool RB_EMPTY_NODE(struct rb_node *node) - Return whether a red-black tree node is empty, i.e., not inserted in a tree. + + :param node: ``struct rb_node *`` """ return node.__rb_parent_color.value_() == node.value_() -def rb_parent(node): +def rb_parent(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_parent(struct rb_node *node) - Return the parent node of a red-black tree node. + + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ return Object(node.prog_, node.type_, value=node.__rb_parent_color.value_() & ~3) -def rb_first(root): +def rb_first(root: Object) -> Object: """ - .. c:function:: struct rb_node *rb_first(struct rb_root *root) + Return the first node (in sort order) in a red-black tree, or ``NULL`` if + the tree is empty. - Return the first node (in sort order) in a red-black tree, or a ``NULL`` - object if the tree is empty. + :param root: ``struct rb_root *`` + :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: @@ -61,12 +64,13 @@ def rb_first(root): node = next -def rb_last(root): +def rb_last(root: Object) -> Object: """ - .. c:function:: struct rb_node *rb_last(struct rb_root *root) + Return the last node (in sort order) in a red-black tree, or ``NULL`` if + the tree is empty. - Return the last node (in sort order) in a red-black tree, or a ``NULL`` - object if the tree is empty. + :param root: ``struct rb_root *`` + :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: @@ -78,12 +82,13 @@ def rb_last(root): node = next -def rb_next(node): +def rb_next(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_next(struct rb_node *node) + Return the next node (in sort order) after a red-black node, or ``NULL`` if + the node is the last node in the tree or is empty. - Return the next node (in sort order) after a red-black node, or a ``NULL`` - object if the node is the last node in the tree or is empty. + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ node = node.read_() @@ -106,12 +111,13 @@ def rb_next(node): return parent -def rb_prev(node): +def rb_prev(node: Object) -> Object: """ - .. c:function:: struct rb_node *rb_prev(struct rb_node *node) + Return the previous node (in sort order) before a red-black node, or + ``NULL`` if the node is the first node in the tree or is empty. - Return the previous node (in sort order) before a red-black node, or a - ``NULL`` object if the node is the first node in the tree or is empty. + :param node: ``struct rb_node *`` + :return: ``struct rb_node *`` """ node = node.read_() @@ -134,16 +140,15 @@ def rb_prev(node): return parent -def rbtree_inorder_for_each(root): +def rbtree_inorder_for_each(root: Object) -> Iterator[Object]: """ - .. c:function:: rbtree_inorder_for_each(struct rb_root *root) - Iterate over all of the nodes in a red-black tree, in sort order. + :param root: ``struct rb_root *`` :return: Iterator of ``struct rb_node *`` objects. """ - def aux(node): + def aux(node: Object) -> Iterator[Object]: if node: yield from aux(node.rb_left.read_()) yield node @@ -152,32 +157,45 @@ def aux(node): yield from aux(root.rb_node.read_()) -def rbtree_inorder_for_each_entry(type, root, member): +def rbtree_inorder_for_each_entry( + type: str, root: Object, member: str +) -> Iterator[Object]: """ - .. c:function:: rbtree_inorder_for_each_entry(type, struct rb_root *root, member) - - Iterate over all of the entries in a red-black tree, given the type of the - entry and the ``struct rb_node`` member in that type. The entries are - returned in sort order. + Iterate over all of the entries in a red-black tree in sorted order. + :param type: Entry type. + :param root: ``struct rb_root *`` + :param member: Name of red-black node member in entry type. :return: Iterator of ``type *`` objects. """ for node in rbtree_inorder_for_each(root): yield container_of(node, type, member) -def rb_find(type, root, member, key, cmp): - """ - .. c:function:: type *rb_find(type, struct rb_root *root, member, key_type key, int (*cmp)(key_type, type *)) +KeyType = TypeVar("KeyType") - Find an entry in a red-black tree, given a key and a comparator function - which takes the key and an entry. The comparator should return < 0 if the - key is less than the entry, > 0 if it is greater than the entry, or 0 if it - matches the entry. This returns a ``NULL`` object if no entry matches the - key. + +def rb_find( + type: str, + root: Object, + member: str, + key: KeyType, + cmp: Callable[[KeyType, Object], int], +) -> Object: + """ + Find an entry in a red-black tree given a key and a comparator function. Note that this function does not have an analogue in the Linux kernel source code, as tree searches are all open-coded. + + :param type: Entry type. + :param root: ``struct rb_root *`` + :param member: Name of red-black node member in entry type. + :param key: Key to find. + :param cmp: Callback taking key and entry that returns < 0 if the key is + less than the entry, > 0 if the key is greater than the entry, and 0 if + the key matches the entry. + :return: ``type *`` found entry, or ``NULL`` if not found. """ node = root.rb_node.read_() while node: @@ -189,4 +207,4 @@ def rb_find(type, root, member, key, cmp): node = node.rb_right.read_() else: return entry - return node + return NULL(root.prog_, type) diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index 85fdd74e9..3d5276867 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -9,21 +9,50 @@ Linux CPU scheduler. """ -from _drgn import _linux_helper_task_state_to_char - +from drgn import Object __all__ = ("task_state_to_char",) +_TASK_NOLOAD = 0x400 -def task_state_to_char(task): - """ - .. c:function char task_state_to_char(struct task_struct *task) +def task_state_to_char(task: Object) -> str: + """ Get the state of the task as a character (e.g., ``'R'`` for running). See `ps(1) `_ for a description of the process state codes. - :rtype: str + :param task: ``struct task_struct *`` """ - return _linux_helper_task_state_to_char(task) + prog = task.prog_ + task_state_chars: str + TASK_REPORT: int + try: + task_state_chars, TASK_REPORT = prog.cache["task_state_to_char"] + except KeyError: + task_state_array = prog["task_state_array"] + # Walk through task_state_array backwards looking for the largest state + # that we know is in TASK_REPORT, then populate the task state mapping. + chars = None + for i in range(len(task_state_array) - 1, -1, -1): + c: int = task_state_array[i][0].value_() + if chars is None and c in b"RSDTtXZP": + chars = bytearray(i + 1) + TASK_REPORT = (1 << i) - 1 + if chars is not None: + chars[i] = c + if chars is None: + raise Exception("could not parse task_state_array") + task_state_chars = chars.decode("ascii") + prog.cache["task_state_to_char"] = task_state_chars, TASK_REPORT + task_state = task.state.value_() + exit_state = task.exit_state.value_() + state = (task_state | exit_state) & TASK_REPORT + char = task_state_chars[state.bit_length()] + # States beyond TASK_REPORT are special. As of Linux v5.8, TASK_IDLE is the + # only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. + if char == "D" and (task_state & ~state) == _TASK_NOLOAD: + return "I" + else: + return char diff --git a/drgn/helpers/linux/tcp.py b/drgn/helpers/linux/tcp.py index 63c32ced2..72c5387ff 100644 --- a/drgn/helpers/linux/tcp.py +++ b/drgn/helpers/linux/tcp.py @@ -9,15 +9,16 @@ protocol in the Linux kernel. """ -from drgn import cast +from drgn import Object, cast __all__ = ("sk_tcpstate",) -def sk_tcpstate(sk): +def sk_tcpstate(sk: Object) -> Object: """ - .. c:function:: enum TcpState sk_tcpstate(struct sock *sk) - Return the TCP protocol state of a socket. + + :param sk: ``struct sock *`` + :return: TCP state enum value. """ return cast(sk.prog_["TCP_ESTABLISHED"].type_, sk.__sk_common.skc_state) diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index d6edfc57e..d64d09974 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -10,8 +10,9 @@ """ import operator +from typing import Iterator, Union -from drgn import NULL, Object +from drgn import NULL, IntegerLike, Object, Program from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( @@ -20,18 +21,18 @@ ) -def _kuid_val(uid): +def _kuid_val(uid: Union[Object, IntegerLike]) -> int: if isinstance(uid, Object) and uid.type_.type_name() == "kuid_t": uid = uid.val return operator.index(uid) -def find_user(prog, uid): +def find_user(prog: Program, uid: Union[Object, IntegerLike]) -> Object: """ - .. c:function:: struct user_struct *find_user(kuid_t uid) + Return the user structure with the given UID. - Return the user structure with the given UID, which may be a ``kuid_t`` or - an integer. + :param uid: ``kuid_t`` object or integer. + :return: ``struct user_state *`` """ try: uidhashentry = prog.cache["uidhashentry"] @@ -41,7 +42,7 @@ def find_user(prog, uid): uidhash_bits = uidhash_sz.bit_length() - 1 uidhash_mask = uidhash_sz - 1 - def uidhashentry(uid): + def uidhashentry(uid: int) -> Object: hash = ((uid >> uidhash_bits) + uid) & uidhash_mask return uidhash_table + hash @@ -56,7 +57,7 @@ def uidhashentry(uid): return NULL(prog, "struct user_struct *") -def for_each_user(prog): +def for_each_user(prog: Program) -> Iterator[Object]: """ Iterate over all users in the system. diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index de6dd93c3..05dc3e6aa 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -9,12 +9,13 @@ import importlib import os import os.path -import pkg_resources import runpy import shutil import sys from typing import Any, Dict +import pkg_resources + import drgn @@ -155,7 +156,7 @@ def main() -> None: if not isinstance(e, FileNotFoundError) and not args.quiet: print("could not read history:", str(e), file=sys.stderr) - def write_history_file(): + def write_history_file() -> None: try: readline.write_history_file(histfile) except OSError as e: diff --git a/drgn/internal/rlcompleter.py b/drgn/internal/rlcompleter.py index 0684276c2..f2321e856 100644 --- a/drgn/internal/rlcompleter.py +++ b/drgn/internal/rlcompleter.py @@ -4,12 +4,11 @@ """Improved readline completer""" import builtins -import re import keyword +import re import readline from typing import Any, Dict, List, Optional - _EXPR_RE = re.compile( r""" ( diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index 3d85bb1d1..ae17bf84c 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -3,11 +3,10 @@ """List the paths of all descendants of a cgroup v2""" +from contextlib import contextmanager import os import sys -from contextlib import contextmanager - from drgn import cast from drgn.helpers import enum_type_to_class from drgn.helpers.linux import ( @@ -18,7 +17,6 @@ find_task, ) - BpfAttachType = enum_type_to_class( prog.type("enum bpf_attach_type"), "BpfAttachType", diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 081c6854b..5e9d75cf4 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -3,12 +3,12 @@ """List the paths of all inodes cached in a given filesystem""" -from drgn.helpers.linux.fs import for_each_mount, inode_path -from drgn.helpers.linux.list import list_for_each_entry import os import sys import time +from drgn.helpers.linux.fs import for_each_mount, inode_path +from drgn.helpers.linux.list import list_for_each_entry if len(sys.argv) == 1: path = "/" diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index b7fef922a..b6e8ddf0c 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -5,7 +5,6 @@ from drgn.helpers.linux.list import list_for_each_entry - print("Module Size Used by") for mod in list_for_each_entry("struct module", prog["modules"].address_of_(), "list"): name = mod.name.string_().decode() diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 45adfc7c8..5346db05c 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -5,7 +5,6 @@ from drgn.helpers.linux.pid import for_each_task - print("PID COMM") for task in for_each_task(prog): pid = task.pid.value_() diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index 438146908..68d8e84c0 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -19,7 +19,6 @@ sock_cgroup_ptr, ) - TcpState = enum_type_to_class( prog["TCP_ESTABLISHED"].type_, "TcpState", diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 426175991..29651e1a7 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -7,7 +7,7 @@ SUBDIRS = elfutils include .DELETE_ON_ERROR: -AM_CPPFLAGS = -Iinclude -D_GNU_SOURCE +AM_CPPFLAGS = -isystem include -D_GNU_SOURCE include_HEADERS = drgn.h @@ -20,16 +20,14 @@ ARCH_INS = arch_x86_64.c.in libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ binary_search_tree.h \ cityhash.h \ + debug_info.c \ + debug_info.h \ dwarf_index.c \ dwarf_index.h \ - dwarf_info_cache.c \ - dwarf_info_cache.h \ error.c \ error.h \ hash_table.c \ hash_table.h \ - internal.c \ - internal.h \ language.c \ language.h \ language_c.c \ @@ -40,16 +38,17 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ linux_kernel_helpers.c \ memory_reader.c \ memory_reader.h \ + mread.h \ object.c \ object.h \ object_index.c \ object_index.h \ path.c \ + path.h \ platform.c \ platform.h \ program.c \ program.h \ - read.h \ serialize.c \ serialize.h \ siphash.h \ @@ -61,8 +60,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_INS:.c.in=.c) \ symbol.h \ type.c \ type.h \ - type_index.c \ - type_index.h \ util.h \ vector.c \ vector.h diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in index c6e4399ca..a81950a8c 100644 --- a/libdrgn/arch_x86_64.c.in +++ b/libdrgn/arch_x86_64.c.in @@ -3,11 +3,16 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include +#include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "linux_kernel.h" #include "platform.h" #include "program.h" +#include "util.h" %} x86-64 @@ -172,10 +177,13 @@ prstatus_set_initial_registers_x86_64(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "NT_PRSTATUS is truncated"); } + bool bswap; + struct drgn_error *err = drgn_program_bswap(prog, &bswap); + if (err) + return err; return set_initial_registers_from_struct_x86_64(thread, (char *)prstatus + 112, - size - 112, - drgn_program_bswap(prog)); + size - 112, bswap); } static inline struct drgn_error *read_register(struct drgn_object *reg_obj, @@ -205,7 +213,7 @@ set_initial_registers_inactive_task_frame(Dwfl_Thread *thread, Dwarf_Word dwarf_regs[5]; uint64_t sp; - drgn_object_init(®_obj, frame_obj->prog); + drgn_object_init(®_obj, drgn_object_program(frame_obj)); err = read_register(®_obj, frame_obj, "bx", &dwarf_regs[0]); if (err) @@ -260,7 +268,7 @@ linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread, const struct drgn_object *task_obj) { struct drgn_error *err; - struct drgn_program *prog = task_obj->prog; + struct drgn_program *prog = drgn_object_program(task_obj); struct drgn_object sp_obj; drgn_object_init(&sp_obj, prog); @@ -462,9 +470,11 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it, static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000); struct drgn_program *prog = it->prog; struct pgtable_iterator_x86_64 *arch = (void *)it->arch; - struct drgn_error *err; - bool bswap = drgn_program_bswap(prog); int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level; + bool bswap; + struct drgn_error *err = drgn_program_bswap(prog, &bswap); + if (err) + return err; /* Find the lowest level with cached entries. */ for (level = 0; level < levels; level++) { diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index 0fa378435..3cca5c7dd 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -12,6 +12,9 @@ #ifndef DRGN_BINARY_SEARCH_TREE_H #define DRGN_BINARY_SEARCH_TREE_H +#include +#include + #include "util.h" /** diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 354ae78f5..5a589c483 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -6,8 +6,14 @@ import sys -def gen_constant_class(drgn_h, output_file, class_name, enum_class, regex): - matches = re.findall(r"^\s*(" + regex + r")\s*[=,]", drgn_h, flags=re.MULTILINE) +def gen_constant_class(drgn_h, output_file, class_name, enum_class, constants, regex): + constants = list(constants) + constants.extend( + ("_".join(groups[1:]), groups[0]) + for groups in re.findall( + r"^\s*(" + regex + r")\s*[=,]", drgn_h, flags=re.MULTILINE + ) + ) output_file.write( f""" static int add_{class_name}(PyObject *m, PyObject *enum_module) @@ -15,15 +21,15 @@ def gen_constant_class(drgn_h, output_file, class_name, enum_class, regex): PyObject *tmp, *item; int ret = -1; - tmp = PyList_New({len(matches)}); + tmp = PyList_New({len(constants)}); if (!tmp) goto out; """ ) - for i, groups in enumerate(matches): + for i, (name, value) in enumerate(constants): output_file.write( f"""\ - item = Py_BuildValue("sk", "{'_'.join(groups[1:])}", {groups[0]}); + item = Py_BuildValue("sk", "{name}", {value}); if (!item) goto out; PyList_SET_ITEM(tmp, {i}, item); @@ -72,23 +78,30 @@ def gen_constants(input_file, output_file): """ ) gen_constant_class( - drgn_h, output_file, "Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)" + drgn_h, output_file, "Architecture", "Enum", (), r"DRGN_ARCH_([a-zA-Z0-9_]+)" ) gen_constant_class( drgn_h, output_file, "FindObjectFlags", "Flag", + (), r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)", ) gen_constant_class( - drgn_h, output_file, "PrimitiveType", "Enum", r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)" + drgn_h, + output_file, + "PrimitiveType", + "Enum", + (), + r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)", ) gen_constant_class( drgn_h, output_file, "PlatformFlags", "Flag", + (), r"DRGN_PLATFORM_([a-zA-Z0-9_]+)(? +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cityhash.h" +#include "debug_info.h" +#include "error.h" +#include "hash_table.h" +#include "language.h" +#include "linux_kernel.h" +#include "object.h" +#include "path.h" +#include "program.h" +#include "type.h" +#include "util.h" +#include "vector.h" + +DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) + +static inline struct hash_pair +drgn_debug_info_module_hash(const struct drgn_debug_info_module_key *key) +{ + size_t hash = cityhash_size_t(key->build_id, key->build_id_len); + hash = hash_combine(hash, key->start); + hash = hash_combine(hash, key->end); + return hash_pair_from_avalanching_hash(hash); +} +static inline bool +drgn_debug_info_module_eq(const struct drgn_debug_info_module_key *a, + const struct drgn_debug_info_module_key *b) +{ + return (a->build_id_len == b->build_id_len && + memcmp(a->build_id, b->build_id, a->build_id_len) == 0 && + a->start == b->start && a->end == b->end); +} +DEFINE_HASH_TABLE_FUNCTIONS(drgn_debug_info_module_table, + drgn_debug_info_module_hash, + drgn_debug_info_module_eq) + +DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_hash, c_string_eq) + +/** + * @c Dwfl_Callbacks::find_elf() implementation. + * + * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c + * Elf handle, which we need for a couple of reasons: + * + * - We usually already have the @c Elf handle open in order to identify the + * file. + * - For kernel modules, we set the section addresses in the @c Elf handle + * ourselves instead of using @c Dwfl_Callbacks::section_address(). + * + * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC + * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN + * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but + * @ref dwfl_report_elf() bypasses this hack. + * + * So, we're stuck using @c dwfl_report_module() and this dummy callback. + */ +static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, + const char *name, Dwarf_Addr base, + char **file_name, Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + /* + * libdwfl consumes the returned path, file descriptor, and ELF handle, + * so clear the fields. + */ + *file_name = module->path; + int fd = module->fd; + *elfp = module->elf; + module->path = NULL; + module->fd = -1; + module->elf = NULL; + return fd; +} + +/* + * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls + * back to dwfl_linux_proc_find_elf() otherwise. + */ +static int drgn_dwfl_linux_proc_find_elf(Dwfl_Module *dwfl_module, + void **userdatap, const char *name, + Dwarf_Addr base, char **file_name, + Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + if (module->elf) { + return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); + } + return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); +} + +/* + * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls + * back to dwfl_build_id_find_elf() otherwise. + */ +static int drgn_dwfl_build_id_find_elf(Dwfl_Module *dwfl_module, + void **userdatap, const char *name, + Dwarf_Addr base, char **file_name, + Elf **elfp) +{ + struct drgn_debug_info_module *module = *userdatap; + if (module->elf) { + return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); + } + return dwfl_build_id_find_elf(dwfl_module, userdatap, name, base, + file_name, elfp); +} + +/** + * @c Dwfl_Callbacks::section_address() implementation. + * + * We set the section header @c sh_addr in memory instead of using this, but + * libdwfl requires the callback pointer to be non-@c NULL. It will be called + * for any sections that still have a zero @c sh_addr, meaning they are not + * present in memory. + */ +static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, + const char *name, Dwarf_Addr base, + const char *secname, Elf32_Word shndx, + const GElf_Shdr *shdr, Dwarf_Addr *addr) +{ + *addr = -1; + return DWARF_CB_OK; +} + +static const Dwfl_Callbacks drgn_dwfl_callbacks = { + .find_elf = drgn_dwfl_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks = { + .find_elf = drgn_dwfl_linux_proc_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { + .find_elf = drgn_dwfl_build_id_find_elf, + .find_debuginfo = dwfl_standard_find_debuginfo, + .section_address = drgn_dwfl_section_address, +}; + +static void +drgn_debug_info_module_destroy(struct drgn_debug_info_module *module) +{ + if (module) { + drgn_error_destroy(module->err); + elf_end(module->elf); + if (module->fd != -1) + close(module->fd); + free(module->path); + free(module->name); + free(module); + } +} + +static void +drgn_debug_info_module_finish_indexing(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module) +{ + module->state = DRGN_DEBUG_INFO_MODULE_INDEXED; + if (module->name) { + int ret = c_string_set_insert(&dbinfo->module_names, + (const char **)&module->name, + NULL); + /* drgn_debug_info_update_index() should've reserved enough. */ + assert(ret != -1); + } +} + +struct drgn_dwfl_module_removed_arg { + struct drgn_debug_info *dbinfo; + bool finish_indexing; + bool free_all; +}; + +static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, + const char *name, Dwarf_Addr base, + void *_arg) +{ + struct drgn_dwfl_module_removed_arg *arg = _arg; + /* + * userdatap is actually a void ** like for the other libdwfl callbacks, + * but dwfl_report_end() has the wrong signature for the removed + * callback. + */ + struct drgn_debug_info_module *module = *(void **)userdatap; + if (arg->finish_indexing && module && + module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) + drgn_debug_info_module_finish_indexing(arg->dbinfo, module); + if (arg->free_all || !module || + module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { + drgn_debug_info_module_destroy(module); + } else { + /* + * The module was already indexed. Report it again so libdwfl + * doesn't remove it. + */ + Dwarf_Addr end; + dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, + NULL, NULL); + dwfl_report_module(arg->dbinfo->dwfl, name, base, end); + } + return DWARF_CB_OK; +} + +static void drgn_debug_info_free_modules(struct drgn_debug_info *dbinfo, + bool finish_indexing, bool free_all) +{ + for (struct drgn_debug_info_module_table_iterator it = + drgn_debug_info_module_table_first(&dbinfo->modules); it.entry; ) { + struct drgn_debug_info_module *module = *it.entry; + struct drgn_debug_info_module **nextp = it.entry; + do { + struct drgn_debug_info_module *next = module->next; + if (finish_indexing && + module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) { + drgn_debug_info_module_finish_indexing(dbinfo, + module); + } + if (free_all || + module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { + if (module == *nextp) { + if (nextp == it.entry && !next) { + it = drgn_debug_info_module_table_delete_iterator(&dbinfo->modules, + it); + } else { + if (!next) + it = drgn_debug_info_module_table_next(it); + *nextp = next; + } + } + void **userdatap; + dwfl_module_info(module->dwfl_module, + &userdatap, NULL, NULL, NULL, + NULL, NULL, NULL); + *userdatap = NULL; + drgn_debug_info_module_destroy(module); + } else { + if (!next) + it = drgn_debug_info_module_table_next(it); + nextp = &module->next; + } + module = next; + } while (module); + } + + dwfl_report_begin(dbinfo->dwfl); + struct drgn_dwfl_module_removed_arg arg = { + .dbinfo = dbinfo, + .finish_indexing = finish_indexing, + .free_all = free_all, + }; + dwfl_report_end(dbinfo->dwfl, drgn_dwfl_module_removed, &arg); +} + +struct drgn_error * +drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, + const char *name, const char *message, + struct drgn_error *err) +{ + if (err && err->code == DRGN_ERROR_NO_MEMORY) { + /* Always fail hard if we're out of memory. */ + goto err; + } + if (load->num_errors == 0 && + !string_builder_append(&load->errors, + "could not get debugging information for:")) + goto err; + if (load->num_errors < load->max_errors) { + if (!string_builder_line_break(&load->errors)) + goto err; + if (name && !string_builder_append(&load->errors, name)) + goto err; + if (name && (message || err) && + !string_builder_append(&load->errors, " (")) + goto err; + if (message && !string_builder_append(&load->errors, message)) + goto err; + if (message && err && + !string_builder_append(&load->errors, ": ")) + goto err; + if (err && !string_builder_append_error(&load->errors, err)) + goto err; + if (name && (message || err) && + !string_builder_appendc(&load->errors, ')')) + goto err; + } + load->num_errors++; + drgn_error_destroy(err); + return NULL; + +err: + drgn_error_destroy(err); + return &drgn_enomem; +} + +static struct drgn_error * +drgn_debug_info_report_module(struct drgn_debug_info_load_state *load, + const void *build_id, size_t build_id_len, + uint64_t start, uint64_t end, const char *name, + Dwfl_Module *dwfl_module, const char *path, + int fd, Elf *elf, bool *new_ret) +{ + struct drgn_debug_info *dbinfo = load->dbinfo; + struct drgn_error *err; + char *path_key = NULL; + + if (new_ret) + *new_ret = false; + + struct hash_pair hp; + struct drgn_debug_info_module_table_iterator it; + if (build_id_len) { + struct drgn_debug_info_module_key key = { + .build_id = build_id, + .build_id_len = build_id_len, + .start = start, + .end = end, + }; + hp = drgn_debug_info_module_hash(&key); + it = drgn_debug_info_module_table_search_hashed(&dbinfo->modules, + &key, hp); + if (it.entry && + (*it.entry)->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { + /* We've already indexed this module. */ + err = NULL; + goto free; + } + } + + if (!dwfl_module) { + path_key = realpath(path, NULL); + if (!path_key) { + path_key = strdup(path); + if (!path_key) { + err = &drgn_enomem; + goto free; + } + } + + dwfl_module = dwfl_report_module(dbinfo->dwfl, path_key, start, + end); + if (!dwfl_module) { + err = drgn_error_libdwfl(); + goto free; + } + } + + void **userdatap; + dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, + NULL); + if (*userdatap) { + /* We've already reported this file at this offset. */ + err = NULL; + goto free; + } + if (new_ret) + *new_ret = true; + + struct drgn_debug_info_module *module = malloc(sizeof(*module)); + if (!module) { + err = &drgn_enomem; + goto free; + } + module->state = DRGN_DEBUG_INFO_MODULE_NEW; + module->build_id = build_id; + module->build_id_len = build_id_len; + module->start = start; + module->end = end; + if (name) { + module->name = strdup(name); + if (!module->name) { + err = &drgn_enomem; + free(module); + goto free; + } + } else { + module->name = NULL; + } + module->dwfl_module = dwfl_module; + module->path = path_key; + module->fd = fd; + module->elf = elf; + module->err = NULL; + module->next = NULL; + + /* path_key, fd and elf are owned by the module now. */ + + if (!drgn_debug_info_module_vector_append(&load->new_modules, + &module)) { + drgn_debug_info_module_destroy(module); + return &drgn_enomem; + } + if (build_id_len) { + if (it.entry) { + /* + * The first module with this build ID is in + * new_modules, so insert it after in the list, not + * before. + */ + module->next = (*it.entry)->next; + (*it.entry)->next = module; + } else if (drgn_debug_info_module_table_insert_searched(&dbinfo->modules, + &module, + hp, + NULL) < 0) { + load->new_modules.size--; + drgn_debug_info_module_destroy(module); + return &drgn_enomem; + } + } + *userdatap = module; + return NULL; + +free: + elf_end(elf); + if (fd != -1) + close(fd); + free(path_key); + return err; +} + +struct drgn_error * +drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, + const char *path, int fd, Elf *elf, uint64_t start, + uint64_t end, const char *name, bool *new_ret) +{ + + struct drgn_error *err; + const void *build_id; + ssize_t build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); + if (build_id_len < 0) { + err = drgn_debug_info_report_error(load, path, NULL, + drgn_error_libdwfl()); + close(fd); + elf_end(elf); + return err; + } else if (build_id_len == 0) { + build_id = NULL; + } + return drgn_debug_info_report_module(load, build_id, build_id_len, + start, end, name, NULL, path, fd, + elf, new_ret); +} + +static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, + void **userdatap, + const char *name, Dwarf_Addr base, + void *arg) +{ + struct drgn_debug_info_load_state *load = arg; + struct drgn_error *err; + + if (*userdatap) { + /* + * This was either reported from drgn_debug_info_report_elf() or + * already indexed. + */ + return DWARF_CB_OK; + } + + const unsigned char *build_id; + GElf_Addr build_id_vaddr; + int build_id_len = dwfl_module_build_id(dwfl_module, &build_id, + &build_id_vaddr); + if (build_id_len < 0) { + err = drgn_debug_info_report_error(load, name, NULL, + drgn_error_libdwfl()); + if (err) + goto err; + } else if (build_id_len == 0) { + build_id = NULL; + } + Dwarf_Addr end; + dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); + err = drgn_debug_info_report_module(load, build_id, build_id_len, base, + end, NULL, dwfl_module, name, -1, + NULL, NULL); + if (err) + goto err; + return DWARF_CB_OK; + +err: + drgn_error_destroy(err); + return DWARF_CB_ABORT; +} + +static struct drgn_error * +userspace_report_debug_info(struct drgn_debug_info_load_state *load) +{ + struct drgn_error *err; + + for (size_t i = 0; i < load->num_paths; i++) { + int fd; + Elf *elf; + err = open_elf_file(load->paths[i], &fd, &elf); + if (err) { + err = drgn_debug_info_report_error(load, load->paths[i], + NULL, err); + if (err) + return err; + continue; + } + /* + * We haven't implemented a way to get the load address for + * anything reported here, so for now we report it as unloaded. + */ + err = drgn_debug_info_report_elf(load, load->paths[i], fd, elf, + 0, 0, NULL, NULL); + if (err) + return err; + } + + if (load->load_default) { + Dwfl *dwfl = load->dbinfo->dwfl; + struct drgn_program *prog = load->dbinfo->prog; + if (prog->flags & DRGN_PROGRAM_IS_LIVE) { + int ret = dwfl_linux_proc_report(dwfl, prog->pid); + if (ret == -1) { + return drgn_error_libdwfl(); + } else if (ret) { + return drgn_error_create_os("dwfl_linux_proc_report", + ret, NULL); + } + } else if (dwfl_core_file_report(dwfl, prog->core, + NULL) == -1) { + return drgn_error_libdwfl(); + } + } + return NULL; +} + +static struct drgn_error *apply_relocation(Elf_Data *data, uint64_t r_offset, + uint32_t r_type, int64_t r_addend, + uint64_t st_value) +{ + char *p; + + p = (char *)data->d_buf + r_offset; + switch (r_type) { + case R_X86_64_NONE: + break; + case R_X86_64_32: + if (r_offset > SIZE_MAX - sizeof(uint32_t) || + r_offset + sizeof(uint32_t) > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation offset"); + } + *(uint32_t *)p = st_value + r_addend; + break; + case R_X86_64_64: + if (r_offset > SIZE_MAX - sizeof(uint64_t) || + r_offset + sizeof(uint64_t) > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation offset"); + } + *(uint64_t *)p = st_value + r_addend; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unimplemented relocation type %" PRIu32, + r_type); + } + return NULL; +} + +static struct drgn_error *relocate_section(Elf_Scn *scn, Elf_Scn *rela_scn, + Elf_Scn *symtab_scn, + uint64_t *sh_addrs, size_t shdrnum) +{ + struct drgn_error *err; + Elf_Data *data, *rela_data, *symtab_data; + const Elf64_Rela *relocs; + const Elf64_Sym *syms; + size_t num_relocs, num_syms; + size_t i; + GElf_Shdr *shdr, shdr_mem; + + err = read_elf_section(scn, &data); + if (err) + return err; + err = read_elf_section(rela_scn, &rela_data); + if (err) + return err; + err = read_elf_section(symtab_scn, &symtab_data); + if (err) + return err; + + relocs = (Elf64_Rela *)rela_data->d_buf; + num_relocs = rela_data->d_size / sizeof(Elf64_Rela); + syms = (Elf64_Sym *)symtab_data->d_buf; + num_syms = symtab_data->d_size / sizeof(Elf64_Sym); + + for (i = 0; i < num_relocs; i++) { + const Elf64_Rela *reloc = &relocs[i]; + uint32_t r_sym, r_type; + uint16_t st_shndx; + uint64_t sh_addr; + + r_sym = ELF64_R_SYM(reloc->r_info); + r_type = ELF64_R_TYPE(reloc->r_info); + + if (r_sym >= num_syms) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid relocation symbol"); + } + st_shndx = syms[r_sym].st_shndx; + if (st_shndx == 0) { + sh_addr = 0; + } else if (st_shndx < shdrnum) { + sh_addr = sh_addrs[st_shndx - 1]; + } else { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid symbol section index"); + } + err = apply_relocation(data, reloc->r_offset, r_type, + reloc->r_addend, + sh_addr + syms[r_sym].st_value); + if (err) + return err; + } + + /* + * Mark the relocation section as empty so that libdwfl doesn't try to + * apply it again. + */ + shdr = gelf_getshdr(rela_scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + shdr->sh_size = 0; + if (!gelf_update_shdr(rela_scn, shdr)) + return drgn_error_libelf(); + rela_data->d_size = 0; + return NULL; +} + +/* + * Before the debugging information in a relocatable ELF file (e.g., Linux + * kernel module) can be used, it must have ELF relocations applied. This is + * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a + * much faster implementation. It is only implemented for x86-64; for other + * architectures, we can fall back to libdwfl. + */ +static struct drgn_error *apply_elf_relocations(Elf *elf) +{ + struct drgn_error *err; + GElf_Ehdr ehdr_mem, *ehdr; + size_t shdrnum, shstrndx; + uint64_t *sh_addrs; + Elf_Scn *scn; + + ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + + if (ehdr->e_type != ET_REL || + ehdr->e_machine != EM_X86_64 || + ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + ehdr->e_ident[EI_DATA] != + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + ELFDATA2LSB : ELFDATA2MSB)) { + /* Unsupported; fall back to libdwfl. */ + return NULL; + } + + if (elf_getshdrnum(elf, &shdrnum)) + return drgn_error_libelf(); + if (shdrnum > 1) { + sh_addrs = calloc(shdrnum - 1, sizeof(*sh_addrs)); + if (!sh_addrs) + return &drgn_enomem; + + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + size_t ndx; + + ndx = elf_ndxscn(scn); + if (ndx > 0 && ndx < shdrnum) { + GElf_Shdr *shdr, shdr_mem; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); + goto out; + } + sh_addrs[ndx - 1] = shdr->sh_addr; + } + } + } else { + sh_addrs = NULL; + } + + if (elf_getshdrstrndx(elf, &shstrndx)) { + err = drgn_error_libelf(); + goto out; + } + + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + const char *scnname; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); + goto out; + } + + if (shdr->sh_type != SHT_RELA) + continue; + + scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + continue; + + if (strstartswith(scnname, ".rela.debug_")) { + Elf_Scn *info_scn, *link_scn; + + info_scn = elf_getscn(elf, shdr->sh_info); + if (!info_scn) { + err = drgn_error_libelf(); + goto out; + } + + link_scn = elf_getscn(elf, shdr->sh_link); + if (!link_scn) { + err = drgn_error_libelf(); + goto out; + } + + err = relocate_section(info_scn, scn, link_scn, + sh_addrs, shdrnum); + if (err) + goto out; + } + } +out: + free(sh_addrs); + return NULL; +} + +static struct drgn_error * +drgn_get_debug_sections(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + if (module->elf) { + err = apply_elf_relocations(module->elf); + if (err) + return err; + } + + /* + * Note: not dwfl_module_getelf(), because then libdwfl applies + * ELF relocations to all sections, not just debug sections. + */ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + Elf *elf = dwarf_getelf(dwarf); + if (!elf) + return drgn_error_libdw(); + + module->bswap = (elf_getident(elf, NULL)[EI_DATA] != + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + ELFDATA2LSB : ELFDATA2MSB)); + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + module->debug_info = NULL; + module->debug_abbrev = NULL; + module->debug_str = NULL; + module->debug_line = NULL; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP)) + continue; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + continue; + + Elf_Data **sectionp; + if (!module->debug_info && strcmp(scnname, ".debug_info") == 0) + sectionp = &module->debug_info; + else if (!module->debug_abbrev && strcmp(scnname, ".debug_abbrev") == 0) + sectionp = &module->debug_abbrev; + else if (!module->debug_str && strcmp(scnname, ".debug_str") == 0) + sectionp = &module->debug_str; + else if (!module->debug_line && strcmp(scnname, ".debug_line") == 0) + sectionp = &module->debug_line; + else + continue; + err = read_elf_section(scn, sectionp); + if (err) + return err; + } + + /* + * Truncate any extraneous bytes so that we can assume that a pointer + * within .debug_str is always null-terminated. + */ + if (module->debug_str) { + const char *buf = module->debug_str->d_buf; + const char *nul = memrchr(buf, '\0', module->debug_str->d_size); + if (nul) + module->debug_str->d_size = nul - buf + 1; + else + module->debug_str->d_size = 0; + + } + return NULL; +} + +static struct drgn_error * +drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, + struct drgn_dwarf_index_update_state *dindex_state, + struct drgn_debug_info_module *head) +{ + struct drgn_error *err; + struct drgn_debug_info_module *module; + for (module = head; module; module = module->next) { + err = drgn_get_debug_sections(module); + if (err) { + module->err = err; + continue; + } + if (module->debug_info && module->debug_abbrev) { + module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; + drgn_dwarf_index_read_module(dindex_state, module); + return NULL; + } + } + /* + * We checked all of the files and didn't find debugging information. + * Report why for each one. + * + * (If we did find debugging information, we discard errors on the + * unused files.) + */ + err = NULL; + #pragma omp critical(drgn_debug_info_read_module_error) + for (module = head; module; module = module->next) { + const char *name = + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, + NULL, NULL, NULL, NULL); + if (module->err) { + err = drgn_debug_info_report_error(load, name, NULL, + module->err); + module->err = NULL; + } else { + err = drgn_debug_info_report_error(load, name, + "no debugging information", + NULL); + } + if (err) + break; + } + return err; +} + +static struct drgn_error * +drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) +{ + if (!load->new_modules.size) + return NULL; + struct drgn_debug_info *dbinfo = load->dbinfo; + if (!c_string_set_reserve(&dbinfo->module_names, + c_string_set_size(&dbinfo->module_names) + + load->new_modules.size)) + return &drgn_enomem; + struct drgn_dwarf_index_update_state dindex_state; + drgn_dwarf_index_update_begin(&dindex_state, &dbinfo->dindex); + /* + * In OpenMP 5.0, this could be "#pragma omp parallel master taskloop" + * (added in GCC 9 and Clang 10). + */ + #pragma omp parallel + #pragma omp master + #pragma omp taskloop + for (size_t i = 0; i < load->new_modules.size; i++) { + if (drgn_dwarf_index_update_cancelled(&dindex_state)) + continue; + struct drgn_error *module_err = + drgn_debug_info_read_module(load, &dindex_state, + load->new_modules.data[i]); + if (module_err) + drgn_dwarf_index_update_cancel(&dindex_state, module_err); + } + struct drgn_error *err = drgn_dwarf_index_update_end(&dindex_state); + if (err) + return err; + drgn_debug_info_free_modules(dbinfo, true, false); + return NULL; +} + +struct drgn_error * +drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) +{ + struct drgn_debug_info *dbinfo = load->dbinfo; + dwfl_report_end(dbinfo->dwfl, NULL, NULL); + struct drgn_error *err = drgn_debug_info_update_index(load); + dwfl_report_begin_add(dbinfo->dwfl); + if (err) + return err; + load->new_modules.size = 0; + return NULL; +} + +static struct drgn_error * +drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) +{ + if (load->num_errors > load->max_errors && + (!string_builder_line_break(&load->errors) || + !string_builder_appendf(&load->errors, "... %u more", + load->num_errors - load->max_errors))) { + free(load->errors.str); + return &drgn_enomem; + } + if (load->num_errors) { + return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, + &load->errors); + } else { + return NULL; + } +} + +struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, + const char **paths, size_t n, + bool load_default, bool load_main) +{ + struct drgn_program *prog = dbinfo->prog; + struct drgn_error *err; + + if (load_default) + load_main = true; + + const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + struct drgn_debug_info_load_state load = { + .dbinfo = dbinfo, + .paths = paths, + .num_paths = n, + .load_default = load_default, + .load_main = load_main, + .new_modules = VECTOR_INIT, + .max_errors = max_errors ? atoi(max_errors) : 5, + }; + dwfl_report_begin_add(dbinfo->dwfl); + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + err = linux_kernel_report_debug_info(&load); + else + err = userspace_report_debug_info(&load); + dwfl_report_end(dbinfo->dwfl, NULL, NULL); + if (err) + goto err; + + /* + * userspace_report_debug_info() reports the main debugging information + * directly with libdwfl, so we need to report it to dbinfo. + */ + if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && + dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, + &load, 0)) { + err = &drgn_enomem; + goto err; + } + + err = drgn_debug_info_update_index(&load); + if (err) + goto err; + + /* + * If this fails, it's too late to roll back. This can only fail with + * enomem, so it's not a big deal. + */ + err = drgn_debug_info_report_finalize_errors(&load); +out: + drgn_debug_info_module_vector_deinit(&load.new_modules); + return err; + +err: + drgn_debug_info_free_modules(dbinfo, false, false); + free(load.errors.str); + goto out; +} + +bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, + const char *name) +{ + return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; +} + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, hash_pair_ptr_type, + hash_table_scalar_eq) + +struct drgn_type_from_dwarf_thunk { + struct drgn_type_thunk thunk; + Dwarf_Die die; + bool can_be_incomplete_array; +}; + +/** + * Return whether a DWARF DIE is little-endian. + * + * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c + * false, only the ELF header is checked and this function cannot fail. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, + bool check_attr, bool *ret) +{ + Dwarf_Attribute endianity_attr_mem, *endianity_attr; + Dwarf_Word endianity; + if (check_attr && + (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, + &endianity_attr_mem))) { + if (dwarf_formudata(endianity_attr, &endianity)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_endianity"); + } + } else { + endianity = DW_END_default; + } + switch (endianity) { + case DW_END_default: { + Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); + *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; + return NULL; + } + case DW_END_little: + *ret = true; + return NULL; + case DW_END_big: + *ret = false; + return NULL; + default: + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_endianity"); + } +} + +/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ +static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, + bool check_attr, + enum drgn_byte_order *ret) +{ + bool little_endian; + struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, + &little_endian); + /* + * dwarf_die_is_little_endian() can't fail if check_attr is false, so + * the !check_attr test suppresses maybe-uninitialized warnings. + */ + if (!err || !check_attr) + *ret = little_endian ? DRGN_LITTLE_ENDIAN : DRGN_BIG_ENDIAN; + return err; +} + +static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) + return 1; + + return dwarf_formref_die(attr, ret) ? 0 : -1; +} + +static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { + *ret = false; + return 0; + } + return dwarf_formflag(attr, ret); +} + +/** + * Parse a type from a DWARF debugging information entry. + * + * This is the same as @ref drgn_type_from_dwarf() except that it can be used to + * work around a bug in GCC < 9.0 that zero length array types are encoded the + * same as incomplete array types. There are a few places where GCC allows + * zero-length arrays but not incomplete arrays: + * + * - As the type of a member of a structure with only one member. + * - As the type of a structure member other than the last member. + * - As the type of a union member. + * - As the element type of an array. + * + * In these cases, we know that what appears to be an incomplete array type must + * actually have a length of zero. In other cases, a subrange DIE without + * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array + * type. + * + * @param[in] dbinfo Debugging information. + * @param[in] die DIE to parse. + * @param[in] can_be_incomplete_array Whether the type can be an incomplete + * array type. If this is @c false and the type appears to be an incomplete + * array type, its length is set to zero instead. + * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete + * array type or a typedef of an incomplete array type (regardless of @p + * can_be_incomplete_array). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret); + +/** + * Parse a type from a DWARF debugging information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] die DIE to parse. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static inline struct drgn_error * +drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + struct drgn_qualified_type *ret) +{ + return drgn_type_from_dwarf_internal(dbinfo, die, true, NULL, ret); +} + +static struct drgn_error * +drgn_type_from_dwarf_thunk_evaluate_fn(struct drgn_type_thunk *thunk, + struct drgn_qualified_type *ret) +{ + struct drgn_type_from_dwarf_thunk *t = + container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk); + return drgn_type_from_dwarf_internal(thunk->prog->_dbinfo, &t->die, + t->can_be_incomplete_array, NULL, + ret); +} + +static void drgn_type_from_dwarf_thunk_free_fn(struct drgn_type_thunk *thunk) +{ + free(container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk)); +} + +static struct drgn_error * +drgn_lazy_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *parent_die, + bool can_be_incomplete_array, const char *tag_name, + struct drgn_lazy_type *ret) +{ + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s is missing DW_AT_type", + tag_name); + } + + Dwarf_Die type_die; + if (!dwarf_formref_die(attr, &type_die)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_type", tag_name); + } + + struct drgn_type_from_dwarf_thunk *thunk = malloc(sizeof(*thunk)); + if (!thunk) + return &drgn_enomem; + + thunk->thunk.prog = dbinfo->prog; + thunk->thunk.evaluate_fn = drgn_type_from_dwarf_thunk_evaluate_fn; + thunk->thunk.free_fn = drgn_type_from_dwarf_thunk_free_fn; + thunk->die = type_die; + thunk->can_be_incomplete_array = can_be_incomplete_array; + drgn_lazy_type_init_thunk(ret, &thunk->thunk); + return NULL; +} + +/** + * Parse a type from the @c DW_AT_type attribute of a DWARF debugging + * information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] parent_die Parent DIE. + * @param[in] parent_lang Language of the parent DIE if it is already known, @c + * NULL if it should be determined from @p parent_die. + * @param[in] tag_name Spelling of the DWARF tag of @p parent_die. Used for + * error messages. + * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, + * which is interpreted as a void type. If this is false and the @c DW_AT_type + * attribute is missing, an error is returned. + * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). + * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_type_from_dwarf_child(struct drgn_debug_info *dbinfo, + Dwarf_Die *parent_die, + const struct drgn_language *parent_lang, + const char *tag_name, + bool can_be_void, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + Dwarf_Die type_die; + + if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { + if (can_be_void) { + if (!parent_lang) { + err = drgn_language_from_die(parent_die, + &parent_lang); + if (err) + return err; + } + ret->type = drgn_void_type(dbinfo->prog, parent_lang); + ret->qualifiers = 0; + return NULL; + } else { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s is missing DW_AT_type", + tag_name); + } + } + + if (!dwarf_formref_die(attr, &type_die)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_type", tag_name); + } + + return drgn_type_from_dwarf_internal(dbinfo, &type_die, + can_be_incomplete_array, + is_incomplete_array_ret, ret); +} + +static struct drgn_error * +drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr; + Dwarf_Word encoding; + if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || + dwarf_formudata(&attr, &encoding)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_encoding"); + } + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); + } + + switch (encoding) { + case DW_ATE_boolean: + return drgn_bool_type_create(dbinfo->prog, name, size, lang, + ret); + case DW_ATE_float: + return drgn_float_type_create(dbinfo->prog, name, size, lang, + ret); + case DW_ATE_signed: + case DW_ATE_signed_char: + return drgn_int_type_create(dbinfo->prog, name, size, true, + lang, ret); + case DW_ATE_unsigned: + case DW_ATE_unsigned_char: + return drgn_int_type_create(dbinfo->prog, name, size, false, + lang, ret); + /* + * GCC also supports complex integer types, but DWARF 4 doesn't have an + * encoding for that. GCC as of 8.2 emits DW_ATE_lo_user, but that's + * ambiguous because it also emits that in other cases. For now, we + * don't support it. + */ + case DW_ATE_complex_float: { + Dwarf_Die child; + if (dwarf_type(die, &child)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_type"); + } + struct drgn_qualified_type real_type; + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, &child, + &real_type); + if (err) + return err; + if (drgn_type_kind(real_type.type) != DRGN_TYPE_FLOAT && + drgn_type_kind(real_type.type) != DRGN_TYPE_INT) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_type of DW_ATE_complex_float is not a floating-point or integer type"); + } + return drgn_complex_type_create(dbinfo->prog, name, size, + real_type.type, lang, ret); + } + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_base_type has unknown DWARF encoding 0x%llx", + (unsigned long long)encoding); + } +} + +/* + * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and + * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of + * true). This tries to find the complete type. If it succeeds, it returns NULL. + * If it can't find a complete type, it returns a DRGN_ERROR_STOP error. + * Otherwise, it returns an error. + */ +static struct drgn_error * +drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, + const char *name, struct drgn_type **ret) +{ + struct drgn_error *err; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, + strlen(name), &tag, 1); + if (err) + return err; + + /* + * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs + * with DW_AT_declaration, so this will always be a complete type. + */ + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_stop; + /* + * Look for another matching DIE. If there is one, then we can't be sure + * which type this is, so leave it incomplete rather than guessing. + */ + if (drgn_dwarf_index_iterator_next(&it)) + return &drgn_stop; + + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) + return err; + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf(dbinfo, &die, &qualified_type); + if (err) + return err; + *ret = qualified_type.type; + return NULL; +} + +static struct drgn_error * +parse_member_offset(Dwarf_Die *die, struct drgn_lazy_type *member_type, + uint64_t bit_field_size, bool little_endian, uint64_t *ret) +{ + struct drgn_error *err; + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + /* + * The simplest case is when we have DW_AT_data_bit_offset, which is + * already the offset in bits from the beginning of the containing + * object to the beginning of the member (which may be a bit field). + */ + attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_bit_offset"); + } + *ret = bit_offset; + return NULL; + } + + /* + * Otherwise, we might have DW_AT_data_member_location, which is the + * offset in bytes from the beginning of the containing object. + */ + attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); + if (attr) { + Dwarf_Word byte_offset; + + if (dwarf_formudata(attr, &byte_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_member_location"); + } + *ret = 8 * byte_offset; + } else { + *ret = 0; + } + + /* + * In addition to DW_AT_data_member_location, a bit field might have + * DW_AT_bit_offset, which is the offset in bits of the most significant + * bit of the bit field from the most significant bit of the containing + * object. + */ + attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_offset"); + } + + /* + * If the architecture is little-endian, then we must compute + * the location of the most significant bit from the size of the + * member, then subtract the bit offset and bit size to get the + * location of the beginning of the bit field. + * + * If the architecture is big-endian, then the most significant + * bit of the bit field is the beginning. + */ + if (little_endian) { + uint64_t byte_size; + + attr = dwarf_attr_integrate(die, DW_AT_byte_size, + &attr_mem); + /* + * If the member has an explicit byte size, we can use + * that. Otherwise, we have to get it from the member + * type. + */ + if (attr) { + Dwarf_Word word; + + if (dwarf_formudata(attr, &word)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_byte_size"); + } + byte_size = word; + } else { + struct drgn_qualified_type containing_type; + + err = drgn_lazy_type_evaluate(member_type, + &containing_type); + if (err) + return err; + if (!drgn_type_has_size(containing_type.type)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member bit field type does not have size"); + } + byte_size = drgn_type_size(containing_type.type); + } + *ret += 8 * byte_size - bit_offset - bit_field_size; + } else { + *ret += bit_offset; + } + } + + return NULL; +} + +static struct drgn_error * +parse_member(struct drgn_debug_info *dbinfo, Dwarf_Die *die, bool little_endian, + bool can_be_incomplete_array, + struct drgn_compound_type_builder *builder) +{ + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + uint64_t bit_field_size; + if ((attr = dwarf_attr_integrate(die, DW_AT_bit_size, &attr_mem))) { + Dwarf_Word bit_size; + if (dwarf_formudata(attr, &bit_size)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_size"); + } + bit_field_size = bit_size; + } else { + bit_field_size = 0; + } + + struct drgn_lazy_type member_type; + struct drgn_error *err = drgn_lazy_type_from_dwarf(dbinfo, die, + can_be_incomplete_array, + "DW_TAG_member", + &member_type); + if (err) + return err; + + uint64_t bit_offset; + err = parse_member_offset(die, &member_type, bit_field_size, + little_endian, &bit_offset); + if (err) + goto err; + + err = drgn_compound_type_builder_add_member(builder, member_type, name, + bit_offset, bit_field_size); + if (err) + goto err; + return NULL; + +err: + drgn_lazy_type_deinit(&member_type); + return err; +} + +static struct drgn_error * +drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, const struct drgn_language *lang, + enum drgn_type_kind kind, struct drgn_type **ret) +{ + struct drgn_error *err; + + const char *dw_tag_str; + uint64_t dw_tag; + switch (kind) { + case DRGN_TYPE_STRUCT: + dw_tag_str = "DW_TAG_structure_type"; + dw_tag = DW_TAG_structure_type; + break; + case DRGN_TYPE_UNION: + dw_tag_str = "DW_TAG_union_type"; + dw_tag = DW_TAG_union_type; + break; + case DRGN_TYPE_CLASS: + dw_tag_str = "DW_TAG_class_type"; + dw_tag = DW_TAG_class_type; + break; + default: + UNREACHABLE(); + } + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_name", + dw_tag_str); + } + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_declaration", + dw_tag_str); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, dw_tag, tag, ret); + if (!err || err->code != DRGN_ERROR_STOP) + return err; + } + + if (declaration) { + return drgn_incomplete_compound_type_create(dbinfo->prog, kind, + tag, lang, ret); + } + + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has missing or invalid DW_AT_byte_size", + dw_tag_str); + } + + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); + bool little_endian; + dwarf_die_is_little_endian(die, false, &little_endian); + Dwarf_Die member = {}, child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_member) { + if (member.addr) { + err = parse_member(dbinfo, &member, + little_endian, false, + &builder); + if (err) + goto err; + } + member = child; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + /* + * Flexible array members are only allowed as the last member of a + * structure with at least one other member. + */ + if (member.addr) { + err = parse_member(dbinfo, &member, little_endian, + kind != DRGN_TYPE_UNION && + builder.members.size > 0, + &builder); + if (err) + goto err; + } + + err = drgn_compound_type_create(&builder, tag, size, lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_compound_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, + bool *is_signed) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator is missing DW_AT_const_value"); + } + struct drgn_error *err; + if (attr->form == DW_FORM_sdata || + attr->form == DW_FORM_implicit_const) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) + goto invalid; + err = drgn_enum_type_builder_add_signed(builder, name, + svalue); + /* + * GCC before 7.1 didn't include DW_AT_encoding for + * DW_TAG_enumeration_type DIEs, so we have to guess the sign + * for enum_compatible_type_fallback(). + */ + if (!err && svalue < 0) + *is_signed = true; + } else { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) + goto invalid; + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); + } + return err; + +invalid: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has invalid DW_AT_const_value"); +} + +/* + * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, + * so we have to fabricate the compatible type. + */ +static struct drgn_error * +enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); + } + return drgn_int_type_create(dbinfo->prog, "", size, is_signed, + lang, ret); +} + +static struct drgn_error * +drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_name"); + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_declaration"); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, + DW_TAG_enumeration_type, + tag, ret); + if (!err || err->code != DRGN_ERROR_STOP) + return err; + } + + if (declaration) { + return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, + ret); + } + + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, dbinfo->prog); + bool is_signed = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_enumerator) { + err = parse_enumerator(&child, &builder, &is_signed); + if (err) + goto err; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_type *compatible_type; + r = dwarf_type(die, &child); + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_type"); + goto err; + } else if (r) { + err = enum_compatible_type_fallback(dbinfo, die, is_signed, + lang, &compatible_type); + if (err) + goto err; + } else { + struct drgn_qualified_type qualified_compatible_type; + err = drgn_type_from_dwarf(dbinfo, &child, + &qualified_compatible_type); + if (err) + goto err; + compatible_type = qualified_compatible_type.type; + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); + goto err; + } + } + + err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_enum_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_typedef has missing or invalid DW_AT_name"); + } + + struct drgn_qualified_type aliased_type; + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_typedef", + true, + can_be_incomplete_array, + is_incomplete_array_ret, + &aliased_type); + if (err) + return err; + + return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, + ret); +} + +static struct drgn_error * +drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_qualified_type referenced_type; + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_pointer_type", + true, true, NULL, + &referenced_type); + if (err) + return err; + + Dwarf_Attribute attr_mem, *attr; + uint64_t size; + if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_pointer_type has invalid DW_AT_byte_size"); + } + size = word; + } else { + uint8_t word_size; + err = drgn_program_word_size(dbinfo->prog, &word_size); + if (err) + return err; + size = word_size; + } + + return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, + lang, ret); +} + +struct array_dimension { + uint64_t length; + bool is_complete; +}; + +DEFINE_VECTOR(array_dimension_vector, struct array_dimension) + +static struct drgn_error *subrange_length(Dwarf_Die *die, + struct array_dimension *dimension) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + Dwarf_Word word; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && + !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { + dimension->is_complete = false; + return NULL; + } + + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_subrange_type has invalid %s", + attr->code == DW_AT_upper_bound ? + "DW_AT_upper_bound" : + "DW_AT_count"); + } + + dimension->is_complete = true; + /* + * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array + * variables without an explicit size (e.g., `int arr[] = {};`). + */ + if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && + word == (Dwarf_Word)-1) { + dimension->length = 0; + } else if (attr->code == DW_AT_upper_bound) { + if (word >= UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_upper_bound is too large"); + } + dimension->length = (uint64_t)word + 1; + } else { + if (word > UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_count is too large"); + } + dimension->length = word; + } + return NULL; +} + +static struct drgn_error * +drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + struct drgn_error *err; + struct array_dimension_vector dimensions = VECTOR_INIT; + struct array_dimension *dimension; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_subrange_type) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) + goto out; + err = subrange_length(&child, dimension); + if (err) + goto out; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto out; + } + if (!dimensions.size) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) + goto out; + dimension->is_complete = false; + } + + struct drgn_qualified_type element_type; + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_array_type", false, false, + NULL, &element_type); + if (err) + goto out; + + *is_incomplete_array_ret = !dimensions.data[0].is_complete; + struct drgn_type *type; + do { + dimension = array_dimension_vector_pop(&dimensions); + if (dimension->is_complete) { + err = drgn_array_type_create(dbinfo->prog, element_type, + dimension->length, lang, + &type); + } else if (dimensions.size || !can_be_incomplete_array) { + err = drgn_array_type_create(dbinfo->prog, element_type, + 0, lang, &type); + } else { + err = drgn_incomplete_array_type_create(dbinfo->prog, + element_type, + lang, &type); + } + if (err) + goto out; + + element_type.type = type; + element_type.qualifiers = 0; + } while (dimensions.size); + + *ret = type; + err = NULL; +out: + array_dimension_vector_deinit(&dimensions); + return err; +} + +static struct drgn_error * +parse_formal_parameter(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + struct drgn_function_type_builder *builder) +{ + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_formal_parameter has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + struct drgn_lazy_type parameter_type; + struct drgn_error *err = drgn_lazy_type_from_dwarf(dbinfo, die, true, + "DW_TAG_formal_parameter", + ¶meter_type); + if (err) + return err; + + err = drgn_function_type_builder_add_parameter(builder, parameter_type, + name); + if (err) + drgn_lazy_type_deinit(¶meter_type); + return err; +} + +static struct drgn_error * +drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + const char *tag_name = + dwarf_tag(die) == DW_TAG_subroutine_type ? + "DW_TAG_subroutine_type" : "DW_TAG_subprogram"; + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, dbinfo->prog); + bool is_variadic = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + switch (dwarf_tag(&child)) { + case DW_TAG_formal_parameter: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", + tag_name); + goto err; + } + err = parse_formal_parameter(dbinfo, &child, &builder); + if (err) + goto err; + break; + case DW_TAG_unspecified_parameters: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has multiple DW_TAG_unspecified_parameters children", + tag_name); + goto err; + } + is_variadic = true; + break; + default: + break; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_qualified_type return_type; + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + tag_name, true, true, NULL, + &return_type); + if (err) + goto err; + + err = drgn_function_type_create(&builder, return_type, is_variadic, + lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_function_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + if (dbinfo->depth >= 1000) { + return drgn_error_create(DRGN_ERROR_RECURSION, + "maximum DWARF type parsing depth exceeded"); + } + + struct drgn_dwarf_type_map_entry entry = { + .key = die->addr, + }; + struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); + struct drgn_dwarf_type_map_iterator it = + drgn_dwarf_type_map_search_hashed(&dbinfo->types, &entry.key, + hp); + if (it.entry) { + if (!can_be_incomplete_array && + it.entry->value.is_incomplete_array) { + it = drgn_dwarf_type_map_search_hashed(&dbinfo->cant_be_incomplete_array_types, + &entry.key, hp); + } + if (it.entry) { + ret->type = it.entry->value.type; + ret->qualifiers = it.entry->value.qualifiers; + return NULL; + } + } + + const struct drgn_language *lang; + struct drgn_error *err = drgn_language_from_die(die, &lang); + if (err) + return err; + + ret->qualifiers = 0; + dbinfo->depth++; + entry.value.is_incomplete_array = false; + switch (dwarf_tag(die)) { + case DW_TAG_const_type: + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_const_type", true, + true, NULL, ret); + ret->qualifiers |= DRGN_QUALIFIER_CONST; + break; + case DW_TAG_restrict_type: + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_restrict_type", true, + true, NULL, ret); + ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; + break; + case DW_TAG_volatile_type: + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_volatile_type", true, + true, NULL, ret); + ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; + break; + case DW_TAG_atomic_type: + err = drgn_type_from_dwarf_child(dbinfo, die, + drgn_language_or_default(lang), + "DW_TAG_atomic_type", true, + true, NULL, ret); + ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; + break; + case DW_TAG_base_type: + err = drgn_base_type_from_dwarf(dbinfo, die, lang, &ret->type); + break; + case DW_TAG_structure_type: + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, + DRGN_TYPE_STRUCT, + &ret->type); + break; + case DW_TAG_union_type: + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, + DRGN_TYPE_UNION, + &ret->type); + break; + case DW_TAG_class_type: + err = drgn_compound_type_from_dwarf(dbinfo, die, lang, + DRGN_TYPE_CLASS, + &ret->type); + break; + case DW_TAG_enumeration_type: + err = drgn_enum_type_from_dwarf(dbinfo, die, lang, &ret->type); + break; + case DW_TAG_typedef: + err = drgn_typedef_type_from_dwarf(dbinfo, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_pointer_type: + err = drgn_pointer_type_from_dwarf(dbinfo, die, lang, + &ret->type); + break; + case DW_TAG_array_type: + err = drgn_array_type_from_dwarf(dbinfo, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_subroutine_type: + case DW_TAG_subprogram: + err = drgn_function_type_from_dwarf(dbinfo, die, lang, + &ret->type); + break; + default: + err = drgn_error_format(DRGN_ERROR_OTHER, + "unknown DWARF type tag 0x%x", + dwarf_tag(die)); + break; + } + dbinfo->depth--; + if (err) + return err; + + entry.value.type = ret->type; + entry.value.qualifiers = ret->qualifiers; + struct drgn_dwarf_type_map *map; + if (!can_be_incomplete_array && entry.value.is_incomplete_array) + map = &dbinfo->cant_be_incomplete_array_types; + else + map = &dbinfo->types; + if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { + /* + * This will "leak" the type we created, but it'll still be + * cleaned up when the program is freed. + */ + return &drgn_enomem; + } + if (is_incomplete_array_ret) + *is_incomplete_array_ret = entry.value.is_incomplete_array; + return NULL; +} + +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + uint64_t tag; + switch (kind) { + case DRGN_TYPE_INT: + case DRGN_TYPE_BOOL: + case DRGN_TYPE_FLOAT: + tag = DW_TAG_base_type; + break; + case DRGN_TYPE_STRUCT: + tag = DW_TAG_structure_type; + break; + case DRGN_TYPE_UNION: + tag = DW_TAG_union_type; + break; + case DRGN_TYPE_CLASS: + tag = DW_TAG_class_type; + break; + case DRGN_TYPE_ENUM: + tag = DW_TAG_enumeration_type; + break; + case DRGN_TYPE_TYPEDEF: + tag = DW_TAG_typedef; + break; + default: + UNREACHABLE(); + } + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, + name_len, &tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) + return err; + if (die_matches_filename(&die, filename)) { + err = drgn_type_from_dwarf(dbinfo, &die, ret); + if (err) + return err; + /* + * For DW_TAG_base_type, we need to check that the type + * we found was the right kind. + */ + if (drgn_type_kind(ret->type) == kind) + return NULL; + } + } + return &drgn_not_found; +} + +static struct drgn_error * +drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, const char *name, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_qualified_type qualified_type; + const struct drgn_type_enumerator *enumerators; + size_t num_enumerators, i; + + err = drgn_type_from_dwarf(dbinfo, die, &qualified_type); + if (err) + return err; + enumerators = drgn_type_enumerators(qualified_type.type); + num_enumerators = drgn_type_num_enumerators(qualified_type.type); + for (i = 0; i < num_enumerators; i++) { + if (strcmp(enumerators[i].name, name) != 0) + continue; + + if (drgn_enum_type_is_signed(qualified_type.type)) { + return drgn_object_set_signed(ret, qualified_type, + enumerators[i].svalue, 0); + } else { + return drgn_object_set_unsigned(ret, qualified_type, + enumerators[i].uvalue, + 0); + } + } + UNREACHABLE(); +} + +static struct drgn_error * +drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, uint64_t bias, + const char *name, struct drgn_object *ret) +{ + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, die, + &qualified_type); + if (err) + return err; + Dwarf_Addr low_pc; + if (dwarf_lowpc(die, &low_pc) == -1) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find address of '%s'", + name); + } + enum drgn_byte_order byte_order; + dwarf_die_byte_order(die, false, &byte_order); + return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, + 0, byte_order); +} + +static struct drgn_error * +drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + Dwarf_Attribute *attr, struct drgn_object *ret) +{ + struct drgn_object_type type; + enum drgn_object_kind kind; + uint64_t bit_size; + struct drgn_error *err = drgn_object_set_common(qualified_type, 0, + &type, &kind, + &bit_size); + if (err) + return err; + Dwarf_Block block; + if (dwarf_formblock(attr, &block) == 0) { + bool little_endian; + err = dwarf_die_is_little_endian(die, true, &little_endian); + if (err) + return err; + if (block.length < drgn_value_size(bit_size, 0)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_const_value block is too small"); + } + return drgn_object_set_buffer_internal(ret, &type, kind, + bit_size, block.data, 0, + little_endian); + } else if (kind == DRGN_OBJECT_SIGNED) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + return drgn_object_set_signed_internal(ret, &type, bit_size, + svalue); + } else if (kind == DRGN_OBJECT_UNSIGNED) { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + return drgn_object_set_unsigned_internal(ret, &type, bit_size, + uvalue); + } else { + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_const_value form"); + } +} + +static struct drgn_error * +drgn_object_from_dwarf_variable(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + uint64_t bias, const char *name, + struct drgn_object *ret) +{ + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_type_from_dwarf_child(dbinfo, die, NULL, + "DW_TAG_variable", + true, true, NULL, + &qualified_type); + if (err) + return err; + Dwarf_Attribute attr_mem, *attr; + if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { + Dwarf_Op *loc; + size_t nloc; + if (dwarf_getlocation(attr, &loc, &nloc)) + return drgn_error_libdw(); + if (nloc != 1 || loc[0].atom != DW_OP_addr) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_location has unimplemented operation"); + } + enum drgn_byte_order byte_order; + err = dwarf_die_byte_order(die, true, &byte_order); + if (err) + return err; + return drgn_object_set_reference(ret, qualified_type, + loc[0].number + bias, 0, 0, + byte_order); + } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, + &attr_mem))) { + return drgn_object_from_dwarf_constant(dbinfo, die, + qualified_type, attr, + ret); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find address or value of '%s'", + name); + } +} + +struct drgn_error * +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + struct drgn_dwarf_index_namespace *ns = &dbinfo->dindex.global; + if (name_len >= 2 && memcmp(name, "::", 2) == 0) { + /* Explicit global namespace. */ + name_len -= 2; + name += 2; + } + const char *colons; + while ((colons = memmem(name, name_len, "::", 2))) { + struct drgn_dwarf_index_iterator it; + uint64_t ns_tag = DW_TAG_namespace; + err = drgn_dwarf_index_iterator_init(&it, ns, name, + colons - name, &ns_tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + ns = index_die->namespace; + name_len -= colons + 2 - name; + name = colons + 2; + } + + uint64_t tags[3]; + size_t num_tags = 0; + if (flags & DRGN_FIND_OBJECT_CONSTANT) + tags[num_tags++] = DW_TAG_enumerator; + if (flags & DRGN_FIND_OBJECT_FUNCTION) + tags[num_tags++] = DW_TAG_subprogram; + if (flags & DRGN_FIND_OBJECT_VARIABLE) + tags[num_tags++] = DW_TAG_variable; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, ns, name, strlen(name), tags, + num_tags); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + uint64_t bias; + err = drgn_dwarf_index_get_die(index_die, &die, &bias); + if (err) + return err; + if (!die_matches_filename(&die, filename)) + continue; + switch (dwarf_tag(&die)) { + case DW_TAG_enumeration_type: + return drgn_object_from_dwarf_enumerator(dbinfo, &die, + name, ret); + case DW_TAG_subprogram: + return drgn_object_from_dwarf_subprogram(dbinfo, &die, + bias, name, + ret); + case DW_TAG_variable: + return drgn_object_from_dwarf_variable(dbinfo, &die, + bias, name, ret); + default: + UNREACHABLE(); + } + } + return &drgn_not_found; +} + +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + struct drgn_debug_info **ret) +{ + struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); + if (!dbinfo) + return &drgn_enomem; + dbinfo->prog = prog; + const Dwfl_Callbacks *dwfl_callbacks; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + dwfl_callbacks = &drgn_dwfl_callbacks; + else if (prog->flags & DRGN_PROGRAM_IS_LIVE) + dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; + else + dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; + dbinfo->dwfl = dwfl_begin(dwfl_callbacks); + if (!dbinfo->dwfl) { + free(dbinfo); + return drgn_error_libdwfl(); + } + drgn_debug_info_module_table_init(&dbinfo->modules); + c_string_set_init(&dbinfo->module_names); + drgn_dwarf_index_init(&dbinfo->dindex); + drgn_dwarf_type_map_init(&dbinfo->types); + drgn_dwarf_type_map_init(&dbinfo->cant_be_incomplete_array_types); + dbinfo->depth = 0; + *ret = dbinfo; + return NULL; +} + +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) +{ + if (!dbinfo) + return; + drgn_dwarf_type_map_deinit(&dbinfo->cant_be_incomplete_array_types); + drgn_dwarf_type_map_deinit(&dbinfo->types); + drgn_dwarf_index_deinit(&dbinfo->dindex); + c_string_set_deinit(&dbinfo->module_names); + drgn_debug_info_free_modules(dbinfo, false, true); + assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); + drgn_debug_info_module_table_deinit(&dbinfo->modules); + dwfl_end(dbinfo->dwfl); + free(dbinfo); +} + +struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) +{ + struct drgn_error *err; + + *fd_ret = open(path, O_RDONLY); + if (*fd_ret == -1) + return drgn_error_create_os("open", errno, path); + *elf_ret = dwelf_elf_begin(*fd_ret); + if (!*elf_ret) { + err = drgn_error_libelf(); + goto err_fd; + } + if (elf_kind(*elf_ret) != ELF_K_ELF) { + err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); + goto err_elf; + } + return NULL; + +err_elf: + elf_end(*elf_ret); +err_fd: + close(*fd_ret); + return err; +} + +struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, + const char * const *path_formats, ...) +{ + struct drgn_error *err; + size_t i; + + for (i = 0; path_formats[i]; i++) { + va_list ap; + int ret; + char *path; + int fd; + Elf *elf; + + va_start(ap, path_formats); + ret = vasprintf(&path, path_formats[i], ap); + va_end(ap); + if (ret == -1) + return &drgn_enomem; + fd = open(path, O_RDONLY); + if (fd == -1) { + free(path); + continue; + } + elf = dwelf_elf_begin(fd); + if (!elf) { + close(fd); + free(path); + continue; + } + if (elf_kind(elf) != ELF_K_ELF) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s: not an ELF file", path); + elf_end(elf); + close(fd); + free(path); + return err; + } + *path_ret = path; + *fd_ret = fd; + *elf_ret = elf; + return NULL; + } + *path_ret = NULL; + *fd_ret = -1; + *elf_ret = NULL; + return NULL; +} + +struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) +{ + GElf_Shdr shdr_mem, *shdr; + Elf_Data *data; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if ((shdr->sh_flags & SHF_COMPRESSED) && elf_compress(scn, 0, 0) < 0) + return drgn_error_libelf(); + data = elf_getdata(scn, NULL); + if (!data) + return drgn_error_libelf(); + *ret = data; + return NULL; +} + +struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, + uint64_t *start_ret, uint64_t *end_ret) +{ + uint64_t start = UINT64_MAX, end = 0; + size_t phnum, i; + + /* + * Get the minimum and maximum addresses from the PT_LOAD segments. We + * ignore memory ranges that start beyond UINT64_MAX, and we truncate + * ranges that end beyond UINT64_MAX. + */ + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr; + uint64_t segment_start, segment_end; + + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) + continue; + if (__builtin_add_overflow(phdr->p_vaddr, bias, + &segment_start)) + continue; + if (__builtin_add_overflow(segment_start, phdr->p_memsz, + &segment_end)) + segment_end = UINT64_MAX; + if (segment_start < segment_end) { + if (segment_start < start) + start = segment_start; + if (segment_end > end) + end = segment_end; + } + } + if (start >= end) { + return drgn_error_create(DRGN_ERROR_OTHER, + "ELF file has no loadable segments"); + } + *start_ret = start; + *end_ret = end; + return NULL; +} diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h new file mode 100644 index 000000000..ab0545c4f --- /dev/null +++ b/libdrgn/debug_info.h @@ -0,0 +1,285 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0+ + +/** + * @file + * + * Debugging information handling. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_DEBUG_INFO_H +#define DRGN_DEBUG_INFO_H + +#include +#include + +#include "drgn.h" +#include "dwarf_index.h" +#include "hash_table.h" +#include "string_builder.h" +#include "vector.h" + +/** + * @ingroup Internals + * + * @defgroup DebugInfo Debugging information cache + * + * Caching of debugging information. + * + * @ref drgn_debug_info caches debugging information (currently only DWARF). It + * translates the debugging information to types and objects. + * + * @{ + */ + +/** State of a @ref drgn_debug_info_module. */ +enum drgn_debug_info_module_state { + /** Reported but not indexed. */ + DRGN_DEBUG_INFO_MODULE_NEW, + /** Reported and will be indexed on success. */ + DRGN_DEBUG_INFO_MODULE_INDEXING, + /** Indexed. Must not be freed until @ref drgn_debug_info_destroy(). */ + DRGN_DEBUG_INFO_MODULE_INDEXED, +} __attribute__((packed)); + +/** + * A module reported to a @ref drgn_debug_info. + * + * Conceptually, a module is an ELF file loaded at a specific address range (or + * not loaded). + * + * Files are identified by canonical path and, if present, build ID. Each (path, + * address range) is uniquely represented by a @ref drgn_debug_info_module. + */ +struct drgn_debug_info_module { + /** @c NULL if the module does not have a build ID. */ + const void *build_id; + /** Zero if the module does not have a build ID. */ + size_t build_id_len; + /** Load address range, or both 0 if not loaded. */ + uint64_t start, end; + /** Optional module name allocated with @c malloc(). */ + char *name; + + Dwfl_Module *dwfl_module; + Elf_Data *debug_info; + Elf_Data *debug_abbrev; + Elf_Data *debug_str; + Elf_Data *debug_line; + + /* + * path, elf, and fd are used when an ELF file was reported with + * drgn_debug_info_report_elf() so we can report the file to libdwfl + * later. They are not valid after loading. + */ + char *path; + Elf *elf; + int fd; + enum drgn_debug_info_module_state state; + bool bswap; + /** Error while loading. */ + struct drgn_error *err; + /** + * Next module with same build ID and address range. + * + * There may be multiple files with the same build ID (e.g., a stripped + * binary and its corresponding separate debug info file). While + * loading, all files with the same build ID and address range are + * linked in a list. Only one is indexed; the rest are destroyed. + */ + struct drgn_debug_info_module *next; +}; + +struct drgn_debug_info_module_key { + const void *build_id; + size_t build_id_len; + uint64_t start, end; +}; + +static inline struct drgn_debug_info_module_key +drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) +{ + return (struct drgn_debug_info_module_key){ + .build_id = (*entry)->build_id, + .build_id_len = (*entry)->build_id_len, + .start = (*entry)->start, + .end = (*entry)->end, + }; +} +DEFINE_HASH_TABLE_TYPE(drgn_debug_info_module_table, + struct drgn_debug_info_module *, + drgn_debug_info_module_key) + +DEFINE_HASH_SET_TYPE(c_string_set, const char *) + +/** Cached type in a @ref drgn_debug_info. */ +struct drgn_dwarf_type { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + /** + * Whether this is an incomplete array type or a typedef of one. + * + * This is used to work around a GCC bug; see @ref + * drgn_type_from_dwarf_internal(). + */ + bool is_incomplete_array; +}; + +DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type); + +/** Cache of debugging information. */ +struct drgn_debug_info { + /** Program owning this cache. */ + struct drgn_program *prog; + + /** DWARF frontend library handle. */ + Dwfl *dwfl; + /** Modules keyed by build ID and address range. */ + struct drgn_debug_info_module_table modules; + /** + * Names of indexed modules. + * + * The entries in this set are @ref drgn_dwarf_module::name, so they + * should not be freed. + */ + struct c_string_set module_names; + /** Index of DWARF debugging information. */ + struct drgn_dwarf_index dindex; + + /** + * Cache of parsed types. + * + * The key is the address of the DIE (@c Dwarf_Die::addr). The value is + * a @ref drgn_dwarf_type. + */ + struct drgn_dwarf_type_map types; + /** + * Cache of parsed types which appear to be incomplete array types but + * can't be. + * + * See @ref drgn_type_from_dwarf_internal(). + */ + struct drgn_dwarf_type_map cant_be_incomplete_array_types; + /** Current parsing recursion depth. */ + int depth; +}; + +/** Create a @ref drgn_debug_info. */ +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + struct drgn_debug_info **ret); + +/** Destroy a @ref drgn_debug_info. */ +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo); + +DEFINE_VECTOR_TYPE(drgn_debug_info_module_vector, + struct drgn_debug_info_module *) + +/** State tracked while loading debugging information. */ +struct drgn_debug_info_load_state { + struct drgn_debug_info * const dbinfo; + const char ** const paths; + const size_t num_paths; + const bool load_default; + const bool load_main; + /** Newly added modules to be indexed. */ + struct drgn_debug_info_module_vector new_modules; + /** Formatted errors reported by @ref drgn_debug_info_report_error(). */ + struct string_builder errors; + /** Number of errors reported by @ref drgn_debug_info_report_error(). */ + unsigned int num_errors; + /** Maximum number of errors to report before truncating. */ + unsigned int max_errors; +}; + +/** + * Report a non-fatal error while loading debugging information. + * + * The error will be included in a @ref DRGN_ERROR_MISSING_DEBUG_INFO error + * returned by @ref drgn_debug_info_load(). + * + * @param[name] name An optional module name to prefix to the error message. + * @param[message] message An optional message with additional context to prefix + * to the error message. + * @param[err] err The error to report. This may be @c NULL if @p name and @p + * message provide sufficient information. This is destroyed on either success + * or failure. + * @return @c NULL on success, @ref drgn_enomem if the error could not be + * reported. + */ +struct drgn_error * +drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, + const char *name, const char *message, + struct drgn_error *err); + +/** + * Report a module to a @ref drgn_debug_info from an ELF file. + * + * This takes ownership of @p fd and @p elf on either success or failure. They + * should not be used (including closed or freed) after this returns. + * + * @param[in] path The path to the file. + * @param[in] fd A file descriptor referring to the file. + * @param[in] elf The Elf handle of the file. + * @param[in] start The (inclusive) start address of the loaded file, or 0 if + * the file is not loaded. + * @param[in] end The (exclusive) end address of the loaded file, or 0 if the + * file is not loaded. + * @param[in] name An optional name for the module. This is only used for @ref + * drgn_debug_info_is_indexed(). + * @param[out] new_ret Whether the module was newly created and reported. This + * is @c false if a module with the same build ID and address range was already + * loaded or a file with the same path and address range was already reported. + */ +struct drgn_error * +drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, + const char *path, int fd, Elf *elf, uint64_t start, + uint64_t end, const char *name, bool *new_ret); + +/** Index new debugging information and continue reporting. */ +struct drgn_error * +drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load); + +/** + * Load debugging information. + * + * @sa drgn_program_load_debug_info + */ +struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, + const char **paths, size_t n, + bool load_default, bool load_main); + +/** + * Return whether a @ref drgn_debug_info has indexed a module with the given + * name. + */ +bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, + const char *name); + +/** @ref drgn_type_find_fn() that uses debugging information. */ +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret); + +/** @ref drgn_object_find_fn() that uses debugging information. */ +struct drgn_error * +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret); + +struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); + +struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, + const char * const *path_formats, ...); + +struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); + +struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, + uint64_t *start_ret, uint64_t *end_ret); + +/** @} */ + +#endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 9c3804e95..cfa867f4f 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -10,11 +10,14 @@ #ifndef DRGN_H #define DRGN_H -#include -#include +// IWYU pragma: begin_exports #include #include #include +// IWYU pragma: end_exports + +#include +#include #include #include @@ -210,6 +213,19 @@ struct drgn_error *drgn_error_format_fault(uint64_t address, const char *format, ...) __attribute__((returns_nonnull,format(printf, 2, 3))); +/* + * Create a copy of a @ref drgn_error. + * + * The source's error message and path are copied if necessary, so the source + * error can be destroyed without affecting the new error and vice versa. + * + * @param[in] src Error to copy. + * @return A new error with the same fields. If there is a failure to allocate + * memory, @ref drgn_enomem is returned instead. + */ +struct drgn_error *drgn_error_copy(struct drgn_error *src) + __attribute__((returns_nonnull)); + /** * Write a @ref drgn_error to a @c stdio stream. * @@ -235,9 +251,6 @@ void drgn_error_destroy(struct drgn_error *err); /** @} */ -struct drgn_type; -struct drgn_type_thunk; - /** * @ingroup Types * @@ -425,6 +438,7 @@ struct drgn_type { enum drgn_primitive_type primitive; /* These are the qualifiers for the wrapped type, not this type. */ enum drgn_qualifiers qualifiers; + struct drgn_program *program; const struct drgn_language *language; /* * This mess of unions is used to make this as compact as possible. Use @@ -493,6 +507,12 @@ static inline bool drgn_type_is_complete(struct drgn_type *type) return type->_private.is_complete; } +static inline struct drgn_program * +drgn_type_program(struct drgn_type *type) +{ + return type->_private.program; +} + /** Get the language of a type. */ static inline const struct drgn_language * drgn_type_language(struct drgn_type *type) @@ -1628,8 +1648,6 @@ static inline bool drgn_value_is_inline(uint64_t bit_size, uint64_t bit_offset) * provided functions. */ struct drgn_object { - /** Program that this object belongs to. */ - struct drgn_program *prog; /** Type of this object. */ struct drgn_type *type; /** @@ -1727,6 +1745,12 @@ void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog); */ void drgn_object_deinit(struct drgn_object *obj); +/** Get the program that a @ref drgn_object is from. */ +static inline struct drgn_program * +drgn_object_program(const struct drgn_object *obj) +{ + return drgn_type_program(obj->type); +} /** Get the language of a @ref drgn_object from its type. */ static inline const struct drgn_language * diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index b78d84e3b..5c7d05b37 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -3,176 +3,19 @@ #include #include -#include -#include -#include -#include #include #include -#include #include #include -#include -#include -#include -#include "internal.h" +#include "debug_info.h" +#include "drgn.h" #include "dwarf_index.h" -#include "read.h" +#include "error.h" +#include "mread.h" +#include "path.h" #include "siphash.h" -#include "string_builder.h" - -DEFINE_VECTOR_FUNCTIONS(dwfl_module_vector) -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_module_vector) - -static inline struct hash_pair -drgn_dwarf_module_hash(const struct drgn_dwarf_module_key *key) -{ - size_t hash; - - hash = cityhash_size_t(key->build_id, key->build_id_len); - hash = hash_combine(hash, key->start); - hash = hash_combine(hash, key->end); - return hash_pair_from_avalanching_hash(hash); -} -static inline bool drgn_dwarf_module_eq(const struct drgn_dwarf_module_key *a, - const struct drgn_dwarf_module_key *b) -{ - return (a->build_id_len == b->build_id_len && - (a->build_id_len == 0 || - memcmp(a->build_id, b->build_id, a->build_id_len) == 0) && - a->start == b->start && a->end == b->end); -} -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_module_table, drgn_dwarf_module_hash, - drgn_dwarf_module_eq) - -DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_hash, c_string_eq) - -/** - * @c Dwfl_Callbacks::find_elf() implementation. - * - * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c - * Elf handle, which we need for a couple of reasons: - * - * - We usually already have the @c Elf handle open in order to identify the - * file. - * - For kernel modules, we set the section addresses in the @c Elf handle - * ourselves instead of using @c Dwfl_Callbacks::section_address(). - * - * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC - * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN - * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but - * @ref dwfl_report_elf() bypasses this hack. - * - * So, we're stuck using @c dwfl_report_module() and this dummy callback. - */ -static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, - const char *name, Dwarf_Addr base, - char **file_name, Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - int fd; - - /* - * libdwfl consumes the returned path, file descriptor, and ELF handle, - * so clear the fields. - */ - *file_name = userdata->path; - fd = userdata->fd; - *elfp = userdata->elf; - userdata->path = NULL; - userdata->fd = -1; - userdata->elf = NULL; - return fd; -} - -/* - * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls - * back to dwfl_linux_proc_find_elf() otherwise. - */ -static int drgn_dwfl_linux_proc_find_elf(Dwfl_Module *dwfl_module, - void **userdatap, const char *name, - Dwarf_Addr base, char **file_name, - Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - - if (userdata->elf) { - return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); - } - return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); -} - -/* - * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls - * back to dwfl_build_id_find_elf() otherwise. - */ -static int drgn_dwfl_build_id_find_elf(Dwfl_Module *dwfl_module, - void **userdatap, const char *name, - Dwarf_Addr base, char **file_name, - Elf **elfp) -{ - struct drgn_dwfl_module_userdata *userdata = *userdatap; - - if (userdata->elf) { - return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); - } - return dwfl_build_id_find_elf(dwfl_module, userdatap, name, base, - file_name, elfp); -} - -/** - * @c Dwfl_Callbacks::section_address() implementation. - * - * We set the section header @c sh_addr in memory instead of using this, but - * libdwfl requires the callback pointer to be non-@c NULL. It will be called - * for any sections that still have a zero @c sh_addr, meaning they are not - * present in memory. - */ -static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - const char *secname, Elf32_Word shndx, - const GElf_Shdr *shdr, Dwarf_Addr *addr) -{ - *addr = -1; - return DWARF_CB_OK; -} - -const Dwfl_Callbacks drgn_dwfl_callbacks = { - .find_elf = drgn_dwfl_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; - -const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks = { - .find_elf = drgn_dwfl_linux_proc_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; - -const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = { - .find_elf = drgn_dwfl_build_id_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; - -enum { - SECTION_DEBUG_INFO, - SECTION_DEBUG_ABBREV, - SECTION_DEBUG_STR, - SECTION_DEBUG_LINE, - DRGN_DWARF_INDEX_NUM_SECTIONS, -}; - -static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = { - [SECTION_DEBUG_INFO] = ".debug_info", - [SECTION_DEBUG_ABBREV] = ".debug_abbrev", - [SECTION_DEBUG_STR] = ".debug_str", - [SECTION_DEBUG_LINE] = ".debug_line", -}; +#include "util.h" /* * The DWARF abbreviation table gets translated into a series of instructions. @@ -180,12 +23,12 @@ static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = { * over. The next few instructions mean that the corresponding attribute can be * skipped over. The remaining instructions indicate that the corresponding * attribute should be parsed. Finally, every sequence of instructions - * corresponding to a DIE is terminated by a zero byte followed by a bitmask of - * TAG_FLAG_* bits combined with the DWARF tag (which may be set to zero if the - * tag is not of interest). + * corresponding to a DIE is terminated by a zero byte followed by the DIE + * flags, which are a bitmask of flags combined with the DWARF tag (which may be + * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 229, + INSN_MAX_SKIP = 226, ATTRIB_BLOCK1, ATTRIB_BLOCK2, ATTRIB_BLOCK4, @@ -207,1182 +50,251 @@ enum { ATTRIB_DECL_FILE_DATA4, ATTRIB_DECL_FILE_DATA8, ATTRIB_DECL_FILE_UDATA, + ATTRIB_DECLARATION_FLAG, ATTRIB_SPECIFICATION_REF1, ATTRIB_SPECIFICATION_REF2, ATTRIB_SPECIFICATION_REF4, ATTRIB_SPECIFICATION_REF8, ATTRIB_SPECIFICATION_REF_UDATA, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_UDATA, + ATTRIB_SPECIFICATION_REF_ADDR4, + ATTRIB_SPECIFICATION_REF_ADDR8, + ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_ADDR8, }; enum { - /* Maximum number of bits used by the tags we care about. */ - TAG_BITS = 6, - TAG_MASK = (1 << TAG_BITS) - 1, + /* Mask of tags that we care about. */ + DIE_FLAG_TAG_MASK = 0x3f, /* The remaining bits can be used for other purposes. */ - TAG_FLAG_DECLARATION = 0x40, - TAG_FLAG_CHILDREN = 0x80, + DIE_FLAG_DECLARATION = 0x40, + DIE_FLAG_CHILDREN = 0x80, }; DEFINE_VECTOR(uint8_vector, uint8_t) DEFINE_VECTOR(uint32_vector, uint32_t) DEFINE_VECTOR(uint64_vector, uint64_t) -struct abbrev_table { - /* - * This is indexed on the DWARF abbreviation code minus one. It maps the - * abbreviation code to an index in insns where the instruction stream - * for that code begins. - * - * Technically, abbreviation codes don't have to be sequential. In - * practice, GCC seems to always generate sequential codes starting at - * one, so we can get away with a flat array. - */ - struct uint32_vector decls; - struct uint8_vector insns; -}; - -#define ABBREV_TABLE_INIT { VECTOR_INIT, VECTOR_INIT } - -static void abbrev_table_deinit(struct abbrev_table *abbrev) -{ - uint8_vector_deinit(&abbrev->insns); - uint32_vector_deinit(&abbrev->decls); -} - -struct compilation_unit { - Dwfl_Module *module; - Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS]; +struct drgn_dwarf_index_cu { + struct drgn_debug_info_module *module; const char *ptr; - uint64_t unit_length; - uint64_t debug_abbrev_offset; + const char *end; + uint8_t version; uint8_t address_size; bool is_64_bit; bool bswap; -}; - -static inline const char *section_ptr(Elf_Data *data, size_t offset) -{ - return &((char *)data->d_buf)[offset]; -} - -static inline const char *section_end(Elf_Data *data) -{ - return section_ptr(data, data->d_size); -} - -/* - * An indexed DIE. - * - * DIEs with the same name but different tags or files are considered distinct. - * We only compare the hash of the file name, not the string value, because a - * 64-bit collision is unlikely enough, especially when also considering the - * name and tag. - */ -struct drgn_dwarf_index_die { - uint64_t tag; - uint64_t file_name_hash; - /* - * The next DIE with the same name (as an index into - * drgn_dwarf_index_shard::dies), or SIZE_MAX if this is the last DIE. - */ - size_t next; - Dwfl_Module *module; - uint64_t offset; -}; - -/* - * The key is the DIE name. The value is the first DIE with that name (as an - * index into drgn_dwarf_index_shard::dies). - */ -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash, string_eq) -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) - -static inline size_t hash_pair_to_shard(struct hash_pair hp) -{ - /* - * The 8 most significant bits of the hash are used as the F14 tag, so - * we don't want to use those for sharding. - */ - return ((hp.first >> - (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & - (((size_t)1 << DRGN_DWARF_INDEX_SHARD_BITS) - 1)); -} - -static inline struct drgn_error *drgn_eof(void) -{ - return drgn_error_create(DRGN_ERROR_OTHER, - "debug information is truncated"); -} - -static inline bool skip_leb128(const char **ptr, const char *end) -{ - for (;;) { - if (*ptr >= end) - return false; - if (!(*(const uint8_t *)(*ptr)++ & 0x80)) - return true; - } -} - -static inline struct drgn_error *read_uleb128(const char **ptr, const char *end, - uint64_t *value) -{ - int shift = 0; - uint8_t byte; - - *value = 0; - for (;;) { - if (*ptr >= end) - return drgn_eof(); - byte = *(const uint8_t *)*ptr; - (*ptr)++; - if (shift == 63 && byte > 1) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "ULEB128 overflowed unsigned 64-bit integer"); - } - *value |= (uint64_t)(byte & 0x7f) << shift; - shift += 7; - if (!(byte & 0x80)) - break; - } - return NULL; -} - -static inline struct drgn_error *read_uleb128_into_size_t(const char **ptr, - const char *end, - size_t *value) -{ - struct drgn_error *err; - uint64_t tmp; - - if ((err = read_uleb128(ptr, end, &tmp))) - return err; - - if (tmp > SIZE_MAX) - return drgn_eof(); - *value = tmp; - return NULL; -} - -static void free_shards(struct drgn_dwarf_index *dindex, size_t n) -{ - size_t i; - - for (i = 0; i < n; i++) { - drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies); - drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map); - omp_destroy_lock(&dindex->shards[i].lock); - } -} - -static void drgn_dwarf_module_destroy(struct drgn_dwarf_module *module) -{ - if (module) { - dwfl_module_vector_deinit(&module->dwfl_modules); - free(module->name); - free(module->build_id); - free(module); - } -} - -static void -drgn_dwfl_module_userdata_destroy(struct drgn_dwfl_module_userdata *userdata) -{ - if (userdata) { - elf_end(userdata->elf); - if (userdata->fd != -1) - close(userdata->fd); - free(userdata->path); - free(userdata); - } -} - -struct drgn_dwfl_module_removed_arg { - Dwfl *dwfl; - bool finish_indexing; - bool free_all; -}; - -static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, - const char *name, Dwarf_Addr base, - void *_arg) -{ - struct drgn_dwfl_module_removed_arg *arg = _arg; - /* - * userdatap is actually a void ** like for the other libdwfl callbacks, - * but dwfl_report_end() has the wrong signature for the removed - * callback. - */ - struct drgn_dwfl_module_userdata *userdata = *(void **)userdatap; - - if (arg->finish_indexing && userdata && - userdata->state == DRGN_DWARF_MODULE_INDEXING) - userdata->state = DRGN_DWARF_MODULE_INDEXED; - if (arg->free_all || !userdata || - userdata->state != DRGN_DWARF_MODULE_INDEXED) { - drgn_dwfl_module_userdata_destroy(userdata); - } else { - Dwarf_Addr end; - - /* - * The module was already indexed. Report it again so libdwfl - * doesn't remove it. - */ - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, - NULL, NULL); - dwfl_report_module(arg->dwfl, name, base, end); - } - return DWARF_CB_OK; -} - -static void drgn_dwarf_module_finish_indexing(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module *module) -{ - module->state = DRGN_DWARF_MODULE_INDEXED; - /* - * We don't need this anymore (but reinitialize it to empty so that - * drgn_dwarf_index_get_unindexed() skips this module). - */ - dwfl_module_vector_deinit(&module->dwfl_modules); - dwfl_module_vector_init(&module->dwfl_modules); - if (module->name) { - int ret; - - ret = c_string_set_insert(&dindex->names, - (const char **)&module->name, NULL); - /* drgn_dwarf_index_get_unindexed() should've reserved enough for us. */ - assert(ret != -1); - } -} - -static void drgn_dwarf_index_free_modules(struct drgn_dwarf_index *dindex, - bool finish_indexing, bool free_all) -{ - struct drgn_dwfl_module_removed_arg arg = { - .dwfl = dindex->dwfl, - .finish_indexing = finish_indexing, - .free_all = free_all, - }; - struct drgn_dwarf_module_table_iterator it; - size_t i; - - for (it = drgn_dwarf_module_table_first(&dindex->module_table); - it.entry; ) { - struct drgn_dwarf_module *module = *it.entry; - - if (finish_indexing && - module->state == DRGN_DWARF_MODULE_INDEXING) - drgn_dwarf_module_finish_indexing(dindex, module); - if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) { - it = drgn_dwarf_module_table_delete_iterator(&dindex->module_table, - it); - drgn_dwarf_module_destroy(module); - } else { - it = drgn_dwarf_module_table_next(it); - } - } - - for (i = dindex->no_build_id.size; i-- > 0; ) { - struct drgn_dwarf_module *module = dindex->no_build_id.data[i]; - - if (finish_indexing && - module->state == DRGN_DWARF_MODULE_INDEXING) - drgn_dwarf_module_finish_indexing(dindex, module); - if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) { - dindex->no_build_id.size--; - if (i != dindex->no_build_id.size) { - dindex->no_build_id.data[i] = - dindex->no_build_id.data[dindex->no_build_id.size]; - } - drgn_dwarf_module_destroy(module); - } - } - - dwfl_report_begin(dindex->dwfl); - dwfl_report_end(dindex->dwfl, drgn_dwfl_module_removed, &arg); -} - -struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, - const Dwfl_Callbacks *callbacks) -{ - size_t i; - char *max_errors; - - dindex->dwfl = dwfl_begin(callbacks); - if (!dindex->dwfl) - return drgn_error_libdwfl(); - for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; - - omp_init_lock(&shard->lock); - drgn_dwarf_index_die_map_init(&shard->map); - drgn_dwarf_index_die_vector_init(&shard->dies); - } - memset(&dindex->errors, 0, sizeof(dindex->errors)); - dindex->num_errors = 0; - max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); - if (max_errors) - dindex->max_errors = atoi(max_errors); - else - dindex->max_errors = 5; - drgn_dwarf_module_table_init(&dindex->module_table); - drgn_dwarf_module_vector_init(&dindex->no_build_id); - c_string_set_init(&dindex->names); - return NULL; -} - -void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) -{ - if (!dindex) - return; - c_string_set_deinit(&dindex->names); - drgn_dwarf_index_free_modules(dindex, false, true); - assert(dindex->no_build_id.size == 0); - assert(drgn_dwarf_module_table_size(&dindex->module_table) == 0); - drgn_dwarf_module_vector_deinit(&dindex->no_build_id); - drgn_dwarf_module_table_deinit(&dindex->module_table); - free_shards(dindex, ARRAY_SIZE(dindex->shards)); - dwfl_end(dindex->dwfl); -} - -void drgn_dwarf_index_report_begin(struct drgn_dwarf_index *dindex) -{ - dwfl_report_begin_add(dindex->dwfl); -} - -struct drgn_error * -drgn_dwarf_index_report_error(struct drgn_dwarf_index *dindex, const char *name, - const char *message, struct drgn_error *err) -{ - if (err && err->code == DRGN_ERROR_NO_MEMORY) { - /* Always fail hard if we're out of memory. */ - goto err; - } - if (dindex->num_errors == 0 && - !string_builder_append(&dindex->errors, - "could not get debugging information for:")) - goto err; - if (dindex->num_errors < dindex->max_errors) { - if (!string_builder_line_break(&dindex->errors)) - goto err; - if (name && !string_builder_append(&dindex->errors, name)) - goto err; - if (name && (message || err) && - !string_builder_append(&dindex->errors, " (")) - goto err; - if (message && !string_builder_append(&dindex->errors, message)) - goto err; - if (message && err && - !string_builder_append(&dindex->errors, ": ")) - goto err; - if (err && !string_builder_append_error(&dindex->errors, err)) - goto err; - if (name && (message || err) && - !string_builder_appendc(&dindex->errors, ')')) - goto err; - } - dindex->num_errors++; - drgn_error_destroy(err); - return NULL; - -err: - drgn_error_destroy(err); - return &drgn_enomem; -} - -static void drgn_dwarf_index_reset_errors(struct drgn_dwarf_index *dindex) -{ - dindex->errors.len = 0; - dindex->num_errors = 0; -} - -static struct drgn_error * -drgn_dwarf_index_finalize_errors(struct drgn_dwarf_index *dindex) -{ - struct drgn_error *err; - - if (dindex->num_errors > dindex->max_errors && - (!string_builder_line_break(&dindex->errors) || - !string_builder_appendf(&dindex->errors, "... %u more", - dindex->num_errors - dindex->max_errors))) { - drgn_dwarf_index_reset_errors(dindex); - return &drgn_enomem; - } - if (dindex->num_errors) { - err = drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, - &dindex->errors); - memset(&dindex->errors, 0, sizeof(dindex->errors)); - dindex->num_errors = 0; - return err; - } else { - return NULL; - } -} - -static struct drgn_error * -drgn_dwarf_index_insert_module(struct drgn_dwarf_index *dindex, - const void *build_id, size_t build_id_len, - uint64_t start, uint64_t end, const char *name, - struct drgn_dwarf_module **ret) -{ - struct hash_pair hp; - struct drgn_dwarf_module_table_iterator it; - struct drgn_dwarf_module *module; - - if (build_id_len) { - struct drgn_dwarf_module_key key = { - .build_id = build_id, - .build_id_len = build_id_len, - .start = start, - .end = end, - }; - - hp = drgn_dwarf_module_table_hash(&key); - it = drgn_dwarf_module_table_search_hashed(&dindex->module_table, - &key, hp); - if (it.entry) { - module = *it.entry; - goto out; - } - } - - module = malloc(sizeof(*module)); - if (!module) - return &drgn_enomem; - module->start = start; - module->end = end; - if (name) { - module->name = strdup(name); - if (!module->name) - goto err_module; - } else { - module->name = NULL; - } - module->build_id_len = build_id_len; - if (build_id_len) { - module->build_id = malloc(build_id_len); - if (!module->build_id) - goto err_name; - memcpy(module->build_id, build_id, build_id_len); - if (drgn_dwarf_module_table_insert_searched(&dindex->module_table, - &module, hp, - &it) == -1) { - free(module->build_id); -err_name: - free(module->name); -err_module: - free(module); - return &drgn_enomem; - } - } else { - module->build_id = NULL; - if (!drgn_dwarf_module_vector_append(&dindex->no_build_id, - &module)) - goto err_name; - } - module->state = DRGN_DWARF_MODULE_NEW; - dwfl_module_vector_init(&module->dwfl_modules); -out: - *ret = module; - return NULL; -} - -struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex, - const char *path, int fd, - Elf *elf, uint64_t start, - uint64_t end, const char *name, - bool *new_ret) -{ - struct drgn_error *err; - const void *build_id; - ssize_t build_id_len; - struct drgn_dwarf_module *module; - char *path_key = NULL; - Dwfl_Module *dwfl_module; - void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - if (new_ret) - *new_ret = false; - - build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); - if (build_id_len == -1) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - drgn_error_libdwfl()); - goto free; - } - - err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len, - start, end, name, &module); - if (err) - goto free; - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* We've already indexed this module. */ - err = NULL; - goto free; - } - - path_key = realpath(path, NULL); - if (!path_key) { - path_key = strdup(path); - if (!path_key) { - err = &drgn_enomem; - goto free; - } - } - dwfl_module = dwfl_report_module(dindex->dwfl, path_key, start, end); - if (!dwfl_module) { - err = drgn_error_libdwfl(); - goto free; - } - - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, - NULL); - if (*userdatap) { - /* We've already reported this file at this offset. */ - err = NULL; - goto free; - } - - userdata = malloc(sizeof(*userdata)); - if (!userdata) { - err = &drgn_enomem; - goto free; - } - userdata->path = path_key; - userdata->fd = fd; - userdata->elf = elf; - userdata->state = DRGN_DWARF_MODULE_NEW; - *userdatap = userdata; - if (new_ret) - *new_ret = true; - - if (!dwfl_module_vector_append(&module->dwfl_modules, &dwfl_module)) { - /* - * NB: not goto free now that we're referencing the file from a - * Dwfl_Module. - */ - return &drgn_enomem; - } - return NULL; - -free: - elf_end(elf); - close(fd); - free(path_key); - return err; -} - -static int drgn_dwarf_index_report_dwfl_module(Dwfl_Module *dwfl_module, - void **userdatap, - const char *name, - Dwarf_Addr base, void *arg) -{ - struct drgn_error *err; - struct drgn_dwarf_index *dindex = arg; - struct drgn_dwfl_module_userdata *userdata = *userdatap; - const unsigned char *build_id; - int build_id_len; - GElf_Addr build_id_vaddr; - Dwarf_Addr end; - struct drgn_dwarf_module *module; - - if (userdata) { - /* - * This was either reported from - * drgn_dwarf_index_report_module() or already indexed. - */ - return DWARF_CB_OK; - } - - build_id_len = dwfl_module_build_id(dwfl_module, &build_id, - &build_id_vaddr); - if (build_id_len == -1) { - err = drgn_dwarf_index_report_error(dindex, name, NULL, - drgn_error_libdwfl()); - if (err) { - drgn_error_destroy(err); - return DWARF_CB_ABORT; - } - return DWARF_CB_OK; - } - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); - - err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len, - base, end, NULL, &module); - if (err) { - drgn_error_destroy(err); - return DWARF_CB_ABORT; - } - - userdata = malloc(sizeof(*userdata)); - if (!userdata) - return DWARF_CB_ABORT; - *userdatap = userdata; - userdata->path = NULL; - userdata->fd = -1; - userdata->elf = NULL; - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* - * We've already indexed this module. Don't index it again, but - * keep the Dwfl_Module. - */ - userdata->state = DRGN_DWARF_MODULE_INDEXING; - } else { - userdata->state = DRGN_DWARF_MODULE_NEW; - if (!dwfl_module_vector_append(&module->dwfl_modules, - &dwfl_module)) - return DWARF_CB_ABORT; - } - return DWARF_CB_OK; -} - -static struct drgn_error * -append_unindexed_module(struct drgn_dwarf_module *module, - struct drgn_dwarf_module_vector *unindexed, - size_t *num_names) -{ - if (!module->dwfl_modules.size) { - /* This was either already indexed or had no new files. */ - return NULL; - } - if (!drgn_dwarf_module_vector_append(unindexed, &module)) - return &drgn_enomem; - *num_names += 1; - return NULL; -} - -static struct drgn_error * -drgn_dwarf_index_get_unindexed(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module_vector *unindexed) -{ - struct drgn_error *err; - size_t num_names = 0; - struct drgn_dwarf_module_table_iterator it; - size_t i; - - /* - * Walk the module table and no build ID lists, but skip modules with no - * Dwfl_Module (which may be because they were already indexed or - * because the files were already reported). - */ - for (it = drgn_dwarf_module_table_first(&dindex->module_table); - it.entry; it = drgn_dwarf_module_table_next(it)) { - err = append_unindexed_module(*it.entry, unindexed, &num_names); - if (err) - return err; - } - for (i = dindex->no_build_id.size; i-- > 0; ) { - struct drgn_dwarf_module *module = dindex->no_build_id.data[i]; - - if (module->state == DRGN_DWARF_MODULE_INDEXED) { - /* - * If this module is indexed, then every module before - * it must be indexed, so we can stop looking. - */ - break; - } - err = append_unindexed_module(module, unindexed, &num_names); - if (err) - return err; - } - if (num_names && - !c_string_set_reserve(&dindex->names, - c_string_set_size(&dindex->names) + num_names)) - return &drgn_enomem; - return NULL; -} - -static struct drgn_error *apply_relocation(Elf_Data *data, uint64_t r_offset, - uint32_t r_type, int64_t r_addend, - uint64_t st_value) -{ - char *p; - - p = (char *)data->d_buf + r_offset; - switch (r_type) { - case R_X86_64_NONE: - break; - case R_X86_64_32: - if (r_offset > SIZE_MAX - sizeof(uint32_t) || - r_offset + sizeof(uint32_t) > data->d_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation offset"); - } - *(uint32_t *)p = st_value + r_addend; - break; - case R_X86_64_64: - if (r_offset > SIZE_MAX - sizeof(uint64_t) || - r_offset + sizeof(uint64_t) > data->d_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation offset"); - } - *(uint64_t *)p = st_value + r_addend; - break; - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "unimplemented relocation type %" PRIu32, - r_type); - } - return NULL; -} - -static struct drgn_error *relocate_section(Elf_Scn *scn, Elf_Scn *rela_scn, - Elf_Scn *symtab_scn, - uint64_t *sh_addrs, size_t shdrnum) -{ - struct drgn_error *err; - Elf_Data *data, *rela_data, *symtab_data; - const Elf64_Rela *relocs; - const Elf64_Sym *syms; - size_t num_relocs, num_syms; - size_t i; - GElf_Shdr *shdr, shdr_mem; - - err = read_elf_section(scn, &data); - if (err) - return err; - err = read_elf_section(rela_scn, &rela_data); - if (err) - return err; - err = read_elf_section(symtab_scn, &symtab_data); - if (err) - return err; - - relocs = (Elf64_Rela *)rela_data->d_buf; - num_relocs = rela_data->d_size / sizeof(Elf64_Rela); - syms = (Elf64_Sym *)symtab_data->d_buf; - num_syms = symtab_data->d_size / sizeof(Elf64_Sym); - - for (i = 0; i < num_relocs; i++) { - const Elf64_Rela *reloc = &relocs[i]; - uint32_t r_sym, r_type; - uint16_t st_shndx; - uint64_t sh_addr; - - r_sym = ELF64_R_SYM(reloc->r_info); - r_type = ELF64_R_TYPE(reloc->r_info); - - if (r_sym >= num_syms) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid relocation symbol"); - } - st_shndx = syms[r_sym].st_shndx; - if (st_shndx == 0) { - sh_addr = 0; - } else if (st_shndx < shdrnum) { - sh_addr = sh_addrs[st_shndx - 1]; - } else { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid symbol section index"); - } - err = apply_relocation(data, reloc->r_offset, r_type, - reloc->r_addend, - sh_addr + syms[r_sym].st_value); - if (err) - return err; - } - /* - * Mark the relocation section as empty so that libdwfl doesn't try to - * apply it again. - */ - shdr = gelf_getshdr(rela_scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - shdr->sh_size = 0; - if (!gelf_update_shdr(rela_scn, shdr)) - return drgn_error_libelf(); - rela_data->d_size = 0; - return NULL; -} - -/* - * Before the debugging information in a relocatable ELF file (e.g., Linux - * kernel module) can be used, it must have ELF relocations applied. This is - * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a - * much faster implementation. It is only implemented for x86-64; for other - * architectures, we can fall back to libdwfl. - */ -static struct drgn_error *apply_elf_relocations(Elf *elf) -{ - struct drgn_error *err; - GElf_Ehdr ehdr_mem, *ehdr; - size_t shdrnum, shstrndx; - uint64_t *sh_addrs; - Elf_Scn *scn; - - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - - if (ehdr->e_type != ET_REL || - ehdr->e_machine != EM_X86_64 || - ehdr->e_ident[EI_CLASS] != ELFCLASS64 || - ehdr->e_ident[EI_DATA] != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? - ELFDATA2LSB : ELFDATA2MSB)) { - /* Unsupported; fall back to libdwfl. */ - return NULL; - } - - if (elf_getshdrnum(elf, &shdrnum)) - return drgn_error_libelf(); - if (shdrnum > 1) { - sh_addrs = calloc(shdrnum - 1, sizeof(*sh_addrs)); - if (!sh_addrs) - return &drgn_enomem; - - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - size_t ndx; - - ndx = elf_ndxscn(scn); - if (ndx > 0 && ndx < shdrnum) { - GElf_Shdr *shdr, shdr_mem; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - sh_addrs[ndx - 1] = shdr->sh_addr; - } - } - } else { - sh_addrs = NULL; - } - - if (elf_getshdrstrndx(elf, &shstrndx)) { - err = drgn_error_libelf(); - goto out; - } - - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - - if (shdr->sh_type != SHT_RELA) - continue; - - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - continue; - - if (strstartswith(scnname, ".rela.debug_")) { - Elf_Scn *info_scn, *link_scn; - - info_scn = elf_getscn(elf, shdr->sh_info); - if (!info_scn) { - err = drgn_error_libelf(); - goto out; - } - - link_scn = elf_getscn(elf, shdr->sh_link); - if (!link_scn) { - err = drgn_error_libelf(); - goto out; - } - - err = relocate_section(info_scn, scn, link_scn, - sh_addrs, shdrnum); - if (err) - goto out; - } - } -out: - free(sh_addrs); - return NULL; -} - -static struct drgn_error *get_debug_sections(Elf *elf, Elf_Data **sections) -{ - struct drgn_error *err; - size_t shstrndx; - Elf_Scn *scn = NULL; - size_t i; - Elf_Data *debug_str; - - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - - if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP)) - continue; - - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - continue; - - for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) { - if (sections[i]) - continue; - - if (strcmp(scnname, section_name[i]) != 0) - continue; - - err = read_elf_section(scn, §ions[i]); - if (err) - return err; - } - } - - for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) { - if (i != SECTION_DEBUG_LINE && !sections[i]) { - return drgn_error_format(DRGN_ERROR_OTHER, - "no %s section", - section_name[i]); - } - } - - debug_str = sections[SECTION_DEBUG_STR]; - if (debug_str->d_size == 0 || - ((char *)debug_str->d_buf)[debug_str->d_size - 1] != '\0') { - return drgn_error_create(DRGN_ERROR_OTHER, - ".debug_str is not null terminated"); - } - return NULL; -} - -static struct drgn_error *read_compilation_unit_header(const char *ptr, - const char *end, - struct compilation_unit *cu) -{ - uint32_t tmp; - uint16_t version; - - if (!read_u32(&ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - cu->is_64_bit = tmp == UINT32_C(0xffffffff); - if (cu->is_64_bit) { - if (!read_u64(&ptr, end, cu->bswap, &cu->unit_length)) - return drgn_eof(); - } else { - cu->unit_length = tmp; - } + * This is indexed on the DWARF abbreviation code minus one. It maps the + * abbreviation code to an index in abbrev_insns where the instruction + * stream for that code begins. + * + * Technically, abbreviation codes don't have to be sequential. In + * practice, GCC and Clang seem to always generate sequential codes + * starting at one, so we can get away with a flat array. + */ + uint32_t *abbrev_decls; + size_t num_abbrev_decls; + uint8_t *abbrev_insns; + uint64_t *file_name_hashes; + size_t num_file_names; +}; - if (!read_u16(&ptr, end, cu->bswap, &version)) - return drgn_eof(); - if (version != 2 && version != 3 && version != 4) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown DWARF CU version %" PRIu16, - version); - } +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) - if (cu->is_64_bit) { - if (!read_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset)) - return drgn_eof(); - } else { - if (!read_u32_into_u64(&ptr, end, cu->bswap, - &cu->debug_abbrev_offset)) - return drgn_eof(); - } +/* DIE which needs to be indexed. */ +struct drgn_dwarf_index_pending_die { + /* Index of compilation unit containing DIE. */ + size_t cu; + /* Offset of DIE in .debug_info. */ + size_t offset; +}; - if (!read_u8(&ptr, end, &cu->address_size)) - return drgn_eof(); +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) - return NULL; +static inline const char *section_ptr(Elf_Data *data, size_t offset) +{ + if (offset > data->d_size) + return NULL; + return (const char *)data->d_buf + offset; } -DEFINE_VECTOR(compilation_unit_vector, struct compilation_unit) - -static struct drgn_error * -read_dwfl_module_cus(Dwfl_Module *dwfl_module, - struct drgn_dwfl_module_userdata *userdata, - struct compilation_unit_vector *cus) +static inline const char *section_end(Elf_Data *data) { - struct drgn_error *err; - Dwarf *dwarf; - Dwarf_Addr bias; - Elf *elf; - Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS] = {}; - bool bswap; - const char *ptr, *end; + return (const char *)data->d_buf + data->d_size; +} - if (userdata->elf) { - err = apply_elf_relocations(userdata->elf); - if (err) - return err; - } +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash, string_eq) +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, + hash_pair_int_type, hash_table_scalar_eq) +static inline size_t hash_pair_to_shard(struct hash_pair hp) +{ /* - * Note: not dwfl_module_getelf(), because then libdwfl applies - * ELF relocations to all sections, not just debug sections. + * The 8 most significant bits of the hash are used as the F14 tag, so + * we don't want to use those for sharding. */ - dwarf = dwfl_module_getdwarf(dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - - elf = dwarf_getelf(dwarf); - if (!elf) - return drgn_error_libdw(); - - err = get_debug_sections(elf, sections); - if (err) - return err; - - bswap = (elf_getident(elf, NULL)[EI_DATA] != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? - ELFDATA2LSB : ELFDATA2MSB)); - - ptr = section_ptr(sections[SECTION_DEBUG_INFO], 0); - end = section_end(sections[SECTION_DEBUG_INFO]); - while (ptr < end) { - struct compilation_unit *cu; + return ((hp.first >> + (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & + (((size_t)1 << DRGN_DWARF_INDEX_SHARD_BITS) - 1)); +} - cu = compilation_unit_vector_append_entry(cus); - if (!cu) - return &drgn_enomem; - cu->module = dwfl_module; - memcpy(cu->sections, sections, sizeof(cu->sections)); - cu->ptr = ptr; - cu->bswap = bswap; - err = read_compilation_unit_header(ptr, end, cu); - if (err) - return err; +static inline struct drgn_error *drgn_eof(void) +{ + return drgn_error_create(DRGN_ERROR_OTHER, + "debug information is truncated"); +} - ptr += (cu->is_64_bit ? 12 : 4) + cu->unit_length; +static inline bool mread_skip_leb128(const char **ptr, const char *end) +{ + while (*ptr < end) { + if (!(*(const uint8_t *)(*ptr)++ & 0x80)) + return true; } - return NULL; + return false; } -static struct drgn_error *read_module_cus(struct drgn_dwarf_module *module, - struct compilation_unit_vector *cus, - const char **name_ret) +static inline struct drgn_error *mread_uleb128(const char **ptr, + const char *end, uint64_t *value) { - struct drgn_error *err; - const size_t orig_cus_size = cus->size; - size_t i; - - for (i = 0; i < module->dwfl_modules.size; i++) { - Dwfl_Module *dwfl_module; - void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - dwfl_module = module->dwfl_modules.data[i]; - *name_ret = dwfl_module_info(dwfl_module, &userdatap, NULL, - NULL, NULL, NULL, NULL, NULL); - userdata = *userdatap; - err = read_dwfl_module_cus(dwfl_module, userdata, cus); - if (err) { - /* - * Ignore the error unless we have no more Dwfl_Modules - * to try. - */ - if (i == module->dwfl_modules.size - 1) - return err; - drgn_error_destroy(err); - cus->size = orig_cus_size; - continue; + int shift = 0; + *value = 0; + while (*ptr < end) { + uint8_t byte = *(const uint8_t *)*ptr; + (*ptr)++; + if (shift == 63 && byte > 1) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "ULEB128 overflowed unsigned 64-bit integer"); } - userdata->state = DRGN_DWARF_MODULE_INDEXING; - module->state = DRGN_DWARF_MODULE_INDEXING; - return NULL; + *value |= (uint64_t)(byte & 0x7f) << shift; + shift += 7; + if (!(byte & 0x80)) + return NULL; } - UNREACHABLE(); + return drgn_eof(); } -static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_module **unindexed, - size_t num_unindexed, - struct compilation_unit_vector *all_cus) +static inline struct drgn_error *mread_uleb128_into_size_t(const char **ptr, + const char *end, + size_t *value) { - struct drgn_error *err = NULL; + struct drgn_error *err; + uint64_t tmp; - #pragma omp parallel - { - struct compilation_unit_vector cus = VECTOR_INIT; + if ((err = mread_uleb128(ptr, end, &tmp))) + return err; - #pragma omp for schedule(dynamic) - for (size_t i = 0; i < num_unindexed; i++) { - struct drgn_error *module_err; - const char *name; + if (tmp > SIZE_MAX) + return drgn_eof(); + *value = tmp; + return NULL; +} - if (err) - continue; +static void +drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index *dindex) +{ + for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + omp_init_lock(&shard->lock); + drgn_dwarf_index_die_map_init(&shard->map); + drgn_dwarf_index_die_vector_init(&shard->dies); + } + ns->dindex = dindex; + drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); + ns->saved_err = NULL; +} - module_err = read_module_cus(unindexed[i], &cus, &name); - if (module_err) { - #pragma omp critical(drgn_read_cus) - if (err) { - drgn_error_destroy(module_err); - } else { - err = drgn_dwarf_index_report_error(dindex, - name, - NULL, - module_err); - } - continue; - } - } +void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) +{ + drgn_dwarf_index_namespace_init(&dindex->global, dindex); + drgn_dwarf_index_specification_map_init(&dindex->specifications); + drgn_dwarf_index_cu_vector_init(&dindex->cus); +} - if (cus.size) { - #pragma omp critical(drgn_read_cus) - if (!err) { - if (compilation_unit_vector_reserve(all_cus, - all_cus->size + cus.size)) { - memcpy(all_cus->data + all_cus->size, - cus.data, - cus.size * sizeof(*cus.data)); - all_cus->size += cus.size; - } else { - err = &drgn_enomem; - } +static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) +{ + free(cu->file_name_hashes); + free(cu->abbrev_insns); + free(cu->abbrev_decls); +} + +static void +drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) +{ + drgn_error_destroy(ns->saved_err); + drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); + for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + for (size_t j = 0; j < shard->dies.size; j++) { + struct drgn_dwarf_index_die *die = &shard->dies.data[j]; + if (die->tag == DW_TAG_namespace) { + drgn_dwarf_index_namespace_deinit(die->namespace); + free(die->namespace); } } - compilation_unit_vector_deinit(&cus); + drgn_dwarf_index_die_vector_deinit(&shard->dies); + drgn_dwarf_index_die_map_deinit(&shard->map); + omp_destroy_lock(&shard->lock); } - return err; +} + +void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) +{ + if (!dindex) + return; + for (size_t i = 0; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + drgn_dwarf_index_cu_vector_deinit(&dindex->cus); + drgn_dwarf_index_specification_map_deinit(&dindex->specifications); + drgn_dwarf_index_namespace_deinit(&dindex->global); +} + +void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, + struct drgn_dwarf_index *dindex) +{ + state->dindex = dindex; + state->old_cus_size = dindex->cus.size; + state->err = NULL; +} + +void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, + struct drgn_error *err) +{ + #pragma omp critical(drgn_dwarf_index_update_cancel) + if (state->err) + drgn_error_destroy(err); + else + state->err = err; } static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, - const struct compilation_unit *cu, - struct abbrev_table *abbrev) + struct drgn_dwarf_index_cu *cu, + struct uint32_vector *decls, + struct uint8_vector *insns) { struct drgn_error *err; - uint64_t code; - uint32_t insn_index; - uint64_t tag; - uint8_t children; - uint8_t die_flags; - bool should_index; - bool first = true; - uint8_t insn; static_assert(ATTRIB_MAX_INSN == UINT8_MAX, "maximum DWARF attribute instruction is invalid"); - if ((err = read_uleb128(ptr, end, &code))) + uint64_t code; + if ((err = mread_uleb128(ptr, end, &code))) return err; if (code == 0) return &drgn_stop; - if (code != abbrev->decls.size + 1) { + if (code != decls->size + 1) { return drgn_error_create(DRGN_ERROR_OTHER, "DWARF abbreviation table is not sequential"); } - insn_index = abbrev->insns.size; - if (!uint32_vector_append(&abbrev->decls, &insn_index)) + uint32_t insn_index = insns->size; + if (!uint32_vector_append(decls, &insn_index)) return &drgn_enomem; - if ((err = read_uleb128(ptr, end, &tag))) + uint64_t tag; + if ((err = mread_uleb128(ptr, end, &tag))) return err; + bool should_index; switch (tag) { /* Types. */ case DW_TAG_base_type: @@ -1397,40 +309,35 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, case DW_TAG_enumerator: /* Functions. */ case DW_TAG_subprogram: + /* Namespaces */ + case DW_TAG_namespace: + /* If adding anything here, make sure it fits in DIE_FLAG_TAG_MASK. */ should_index = true; break; default: should_index = false; break; } + uint8_t die_flags = should_index ? tag : 0; - if (should_index || tag == DW_TAG_compile_unit || - tag == DW_TAG_partial_unit) - die_flags = tag; - else - die_flags = 0; - - if (!read_u8(ptr, end, &children)) + uint8_t children; + if (!mread_u8(ptr, end, &children)) return drgn_eof(); if (children) - die_flags |= TAG_FLAG_CHILDREN; + die_flags |= DIE_FLAG_CHILDREN; + bool first = true; + uint8_t insn; for (;;) { uint64_t name, form; - - if ((err = read_uleb128(ptr, end, &name))) + if ((err = mread_uleb128(ptr, end, &name))) return err; - if ((err = read_uleb128(ptr, end, &form))) + if ((err = mread_uleb128(ptr, end, &form))) return err; if (name == 0 && form == 0) break; - if (name == DW_AT_sibling && tag != DW_TAG_enumeration_type) { - /* - * If we are indexing enumerators, we must descend into - * DW_TAG_enumeration_type to find the DW_TAG_enumerator - * children instead of skipping to the sibling DIE. - */ + if (name == DW_AT_sibling) { switch (form) { case DW_FORM_ref1: insn = ATTRIB_SIBLING_REF1; @@ -1453,6 +360,10 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, } else if (name == DW_AT_name && should_index) { switch (form) { case DW_FORM_strp: + if (!cu->module->debug_str) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_strp without .debug_str section"); + } if (cu->is_64_bit) insn = ATTRIB_NAME_STRP8; else @@ -1464,10 +375,7 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_stmt_list && - (tag == DW_TAG_compile_unit || - tag == DW_TAG_partial_unit) && - cu->sections[SECTION_DEBUG_LINE]) { + } else if (name == DW_AT_stmt_list && cu->module->debug_line) { switch (form) { case DW_FORM_data4: insn = ATTRIB_STMT_LIST_LINEPTR4; @@ -1484,7 +392,9 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_decl_file && should_index) { + } else if (name == DW_AT_decl_file && should_index && + /* Namespaces are merged, so we ignore their file. */ + tag != DW_TAG_namespace) { switch (form) { case DW_FORM_data1: insn = ATTRIB_DECL_FILE_DATA1; @@ -1510,13 +420,24 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, default: break; } - } else if (name == DW_AT_declaration) { - /* - * In theory, this could be DW_FORM_flag with a value of - * zero, but in practice, GCC always uses - * DW_FORM_flag_present. - */ - die_flags |= TAG_FLAG_DECLARATION; + } else if (name == DW_AT_declaration && should_index) { + switch (form) { + case DW_FORM_flag: + insn = ATTRIB_DECLARATION_FLAG; + goto append_insn; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as + * we have a free DIE flag bit, we might as well + * use it. + */ + die_flags |= DIE_FLAG_DECLARATION; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown attribute form %" PRIu64 " for DW_AT_declaration", + form); + } } else if (name == DW_AT_specification && should_index) { switch (form) { case DW_FORM_ref1: @@ -1534,8 +455,27 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, case DW_FORM_ref_udata: insn = ATTRIB_SPECIFICATION_REF_UDATA; goto append_insn; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + insn = ATTRIB_SPECIFICATION_REF_ADDR8; + else + insn = ATTRIB_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + insn = ATTRIB_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + insn = ATTRIB_SPECIFICATION_REF_ADDR4; + else + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + cu->address_size); + } + goto append_insn; default: - break; + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown attribute form %" PRIu64 " for DW_AT_specification", + form); } } @@ -1598,62 +538,98 @@ static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end, } if (!first) { - uint8_t last_insn; - - last_insn = abbrev->insns.data[abbrev->insns.size - 1]; + uint8_t last_insn = insns->data[insns->size - 1]; if (last_insn + insn <= INSN_MAX_SKIP) { - abbrev->insns.data[abbrev->insns.size - 1] += insn; + insns->data[insns->size - 1] += insn; continue; } else if (last_insn < INSN_MAX_SKIP) { insn = last_insn + insn - INSN_MAX_SKIP; - abbrev->insns.data[abbrev->insns.size - 1] = - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; } } append_insn: first = false; - if (!uint8_vector_append(&abbrev->insns, &insn)) + if (!uint8_vector_append(insns, &insn)) return &drgn_enomem; } insn = 0; - if (!uint8_vector_append(&abbrev->insns, &insn) || - !uint8_vector_append(&abbrev->insns, &die_flags)) + if (!uint8_vector_append(insns, &insn) || + !uint8_vector_append(insns, &die_flags)) return &drgn_enomem; return NULL; } -static struct drgn_error *read_abbrev_table(const char *ptr, const char *end, - const struct compilation_unit *cu, - struct abbrev_table *abbrev) +static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, + size_t debug_abbrev_offset) { - struct drgn_error *err; - + Elf_Data *debug_abbrev = cu->module->debug_abbrev; + const char *ptr = section_ptr(debug_abbrev, debug_abbrev_offset); + if (!ptr) + return drgn_eof(); + const char *end = section_end(debug_abbrev); + struct uint32_vector decls = VECTOR_INIT; + struct uint8_vector insns = VECTOR_INIT; for (;;) { - err = read_abbrev_decl(&ptr, end, cu, abbrev); - if (err && err->code == DRGN_ERROR_STOP) + struct drgn_error *err = read_abbrev_decl(&ptr, end, cu, &decls, + &insns); + if (err && err->code == DRGN_ERROR_STOP) { break; - else if (err) + } else if (err) { + uint8_vector_deinit(&insns); + uint32_vector_deinit(&decls); return err; + } } + cu->abbrev_decls = decls.data; + cu->num_abbrev_decls = decls.size; + cu->abbrev_insns = insns.data; return NULL; } -static struct drgn_error *skip_lnp_header(struct compilation_unit *cu, - const char **ptr, const char *end) +static struct drgn_error *read_cu(struct drgn_dwarf_index_cu *cu) { - uint32_t tmp; - bool is_64_bit; + + const char *ptr = &cu->ptr[cu->is_64_bit ? 12 : 4]; uint16_t version; - uint8_t opcode_base; + if (!mread_u16(&ptr, cu->end, cu->bswap, &version)) + return drgn_eof(); + if (version < 2 || version > 4) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown DWARF CU version %" PRIu16, + version); + } + cu->version = version; + + size_t debug_abbrev_offset; + if (cu->is_64_bit) { + if (!mread_u64_into_size_t(&ptr, cu->end, cu->bswap, + &debug_abbrev_offset)) + return drgn_eof(); + } else { + if (!mread_u32_into_size_t(&ptr, cu->end, cu->bswap, + &debug_abbrev_offset)) + return drgn_eof(); + } + + if (!mread_u8(&ptr, cu->end, &cu->address_size)) + return drgn_eof(); + + return read_abbrev_table(cu, debug_abbrev_offset); +} - if (!read_u32(ptr, end, cu->bswap, &tmp)) +static struct drgn_error *skip_lnp_header(struct drgn_dwarf_index_cu *cu, + const char **ptr, const char *end) +{ + uint32_t tmp; + if (!mread_u32(ptr, end, cu->bswap, &tmp)) + return drgn_eof(); + bool is_64_bit = tmp == UINT32_C(0xffffffff); + if (is_64_bit && !mread_skip(ptr, end, sizeof(uint64_t))) return drgn_eof(); - is_64_bit = tmp == UINT32_C(0xffffffff); - if (is_64_bit) - *ptr += sizeof(uint64_t); - if (!read_u16(ptr, end, cu->bswap, &version)) + uint16_t version; + if (!mread_u16(ptr, end, cu->bswap, &version)) return drgn_eof(); if (version != 2 && version != 3 && version != 4) { return drgn_error_format(DRGN_ERROR_OTHER, @@ -1662,19 +638,20 @@ static struct drgn_error *skip_lnp_header(struct compilation_unit *cu, } /* + * Skip: * header_length * minimum_instruction_length * maximum_operations_per_instruction (DWARF 4 only) * default_is_stmt * line_base * line_range + * standard_opcode_lengths */ - *ptr += (is_64_bit ? 8 : 4) + 4 + (version >= 4); - - if (!read_u8(ptr, end, &opcode_base)) + uint8_t opcode_base; + if (!mread_skip(ptr, end, (is_64_bit ? 8 : 4) + 4 + (version >= 4)) || + !mread_u8(ptr, end, &opcode_base) || + !mread_skip(ptr, end, opcode_base - 1)) return drgn_eof(); - /* standard_opcode_lengths */ - *ptr += opcode_base - 1; return NULL; } @@ -1705,8 +682,7 @@ DEFINE_VECTOR(siphash_vector, struct siphash) static struct drgn_error * read_file_name_table(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu, size_t stmt_list, - struct uint64_vector *file_name_table) + struct drgn_dwarf_index_cu *cu, size_t stmt_list) { /* * We don't care about hash flooding attacks, so don't bother with the @@ -1714,8 +690,11 @@ read_file_name_table(struct drgn_dwarf_index *dindex, */ static const uint64_t siphash_key[2]; struct drgn_error *err; - Elf_Data *debug_line = cu->sections[SECTION_DEBUG_LINE]; + + Elf_Data *debug_line = cu->module->debug_line; const char *ptr = section_ptr(debug_line, stmt_list); + if (!ptr) + return drgn_eof(); const char *end = section_end(debug_line); err = skip_lnp_header(cu, &ptr, end); @@ -1726,8 +705,10 @@ read_file_name_table(struct drgn_dwarf_index *dindex, for (;;) { const char *path; size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) - return drgn_eof(); + if (!mread_string(&ptr, end, &path, &path_len)) { + err = drgn_eof(); + goto out_directories; + } if (!path_len) break; @@ -1735,79 +716,402 @@ read_file_name_table(struct drgn_dwarf_index *dindex, siphash_vector_append_entry(&directories); if (!hash) { err = &drgn_enomem; - goto out; + goto out_directories; } siphash_init(hash, siphash_key); hash_directory(hash, path, path_len); } - for (;;) { - const char *path; - size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) { - err = drgn_eof(); - goto out; + struct uint64_vector file_name_hashes = VECTOR_INIT; + for (;;) { + const char *path; + size_t path_len; + if (!mread_string(&ptr, end, &path, &path_len)) { + err = drgn_eof(); + goto out_hashes; + } + if (!path_len) + break; + + uint64_t directory_index; + if ((err = mread_uleb128(&ptr, end, &directory_index))) + goto out_hashes; + /* mtime, size */ + if (!mread_skip_leb128(&ptr, end) || + !mread_skip_leb128(&ptr, end)) { + err = drgn_eof(); + goto out_hashes; + } + + if (directory_index > directories.size) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "directory index %" PRIu64 " is invalid", + directory_index); + goto out_hashes; + } + + struct siphash hash; + if (directory_index) + hash = directories.data[directory_index - 1]; + else + siphash_init(&hash, siphash_key); + siphash_update(&hash, path, path_len); + + uint64_t file_name_hash = siphash_final(&hash); + if (!uint64_vector_append(&file_name_hashes, &file_name_hash)) { + err = &drgn_enomem; + goto out_hashes; + } + } + + cu->file_name_hashes = file_name_hashes.data; + cu->num_file_names = file_name_hashes.size; + err = NULL; + goto out_directories; + +out_hashes: + uint64_vector_deinit(&file_name_hashes); +out_directories: + siphash_vector_deinit(&directories); + return err; +} + +static struct drgn_error * +index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, + Dwfl_Module *module, size_t offset) +{ + struct drgn_dwarf_index_specification entry = { + .declaration = declaration, + .module = module, + .offset = offset, + }; + struct hash_pair hp = + drgn_dwarf_index_specification_map_hash(&declaration); + int ret; + #pragma omp critical(drgn_index_specification) + ret = drgn_dwarf_index_specification_map_insert_hashed(&dindex->specifications, + &entry, hp, + NULL); + /* + * There may be duplicates if multiple DIEs reference one declaration, + * but we ignore them. + */ + return ret == -1 ? &drgn_enomem : NULL; +} + +/* + * First pass: read the file name tables and index DIEs with + * DW_AT_specification. This recurses into namespaces. + */ +static struct drgn_error *index_cu_first_pass(struct drgn_dwarf_index *dindex, + struct drgn_dwarf_index_cu *cu) +{ + struct drgn_error *err; + Elf_Data *debug_info = cu->module->debug_info; + const char *debug_info_buffer = section_ptr(debug_info, 0); + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; + const char *end = cu->end; + unsigned int depth = 0; + for (;;) { + size_t die_offset = ptr - debug_info_buffer; + + uint64_t code; + if ((err = mread_uleb128(&ptr, end, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + bool declaration = false; + uintptr_t specification = 0; + size_t stmt_list = SIZE_MAX; + const char *sibling = NULL; + uint8_t insn; + while ((insn = *insnp++)) { + size_t skip, tmp; + switch (insn) { + case ATTRIB_BLOCK1: + if (!mread_u8_into_size_t(&ptr, end, &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_EXPRLOC: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &skip))) + return err; + goto skip; + case ATTRIB_LEB128: + case ATTRIB_DECL_FILE_UDATA: + if (!mread_skip_leb128(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_STRING: + case ATTRIB_NAME_STRING: + if (!mread_skip_string(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_SIBLING_REF1: + if (!mread_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF_UDATA: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +sibling: + if (!(sibling = mread_begin(cu->ptr, end, tmp))) + return drgn_eof(); + __builtin_prefetch(sibling); + break; + case ATTRIB_STMT_LIST_LINEPTR4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) + return drgn_eof(); + break; + case ATTRIB_STMT_LIST_LINEPTR8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &stmt_list)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA1: + skip = 1; + goto skip; + case ATTRIB_DECL_FILE_DATA2: + skip = 2; + goto skip; + case ATTRIB_NAME_STRP4: + case ATTRIB_DECL_FILE_DATA4: + skip = 4; + goto skip; + case ATTRIB_NAME_STRP8: + case ATTRIB_DECL_FILE_DATA8: + skip = 8; + goto skip; + case ATTRIB_DECLARATION_FLAG: { + uint8_t flag; + if (!mread_u8(&ptr, end, &flag)) + return drgn_eof(); + if (flag) + declaration = true; + break; + } + case ATTRIB_SPECIFICATION_REF1: + if (!mread_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification; + case ATTRIB_SPECIFICATION_REF_UDATA: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +specification: + specification = (uintptr_t)cu->ptr + tmp; + break; + case ATTRIB_SPECIFICATION_REF_ADDR4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto specification_ref_addr; + case ATTRIB_SPECIFICATION_REF_ADDR8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); +specification_ref_addr: + specification = (uintptr_t)debug_info_buffer + tmp; + break; + default: + skip = insn; +skip: + if (!mread_skip(&ptr, end, skip)) + return drgn_eof(); + break; + } } - if (!path_len) - break; + insn = *insnp; - uint64_t directory_index; - if ((err = read_uleb128(&ptr, end, &directory_index))) - goto out; - /* mtime, size */ - if (!skip_leb128(&ptr, end) || !skip_leb128(&ptr, end)) { - err = drgn_eof(); - goto out; + if (depth == 0) { + if (stmt_list != SIZE_MAX && + (err = read_file_name_table(dindex, cu, stmt_list))) + return err; + } else if (specification) { + if (insn & DIE_FLAG_DECLARATION) + declaration = true; + /* + * For now, we don't handle DIEs with + * DW_AT_specification which are themselves + * declarations. We may need to handle + * DW_AT_specification "chains" in the future. + */ + if (!declaration && + (err = index_specification(dindex, specification, + cu->module->dwfl_module, + die_offset))) + return err; } - if (directory_index > directories.size) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "directory index %" PRIu64 " is invalid", - directory_index); - goto out; + if (insn & DIE_FLAG_CHILDREN) { + if (sibling && + (insn & DIE_FLAG_TAG_MASK) != DW_TAG_namespace) + ptr = sibling; + else + depth++; + } else if (depth == 0) { + break; } + } + return NULL; +} - struct siphash hash; - if (directory_index) - hash = directories.data[directory_index - 1]; - else - siphash_init(&hash, siphash_key); - siphash_update(&hash, path, path_len); +void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module) +{ + const bool bswap = module->bswap; + const char *ptr = section_ptr(module->debug_info, 0); + const char *end = section_end(module->debug_info); + while (ptr < end) { + const char *cu_ptr = ptr; + uint32_t tmp; + if (!mread_u32(&ptr, end, bswap, &tmp)) + goto err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + size_t unit_length; + if (is_64_bit) { + if (!mread_u64_into_size_t(&ptr, end, bswap, + &unit_length)) + goto err; + } else { + unit_length = tmp; + } + if (!mread_skip(&ptr, end, unit_length)) + goto err; - uint64_t file_name_hash = siphash_final(&hash); - if (!uint64_vector_append(file_name_table, &file_name_hash)) { - err = &drgn_enomem; - goto out; + #pragma omp task + { + struct drgn_dwarf_index_cu cu = { + .module = module, + .ptr = cu_ptr, + .end = ptr, + .is_64_bit = is_64_bit, + .bswap = module->bswap, + }; + struct drgn_error *cu_err = read_cu(&cu); + if (cu_err) + goto cu_err; + + cu_err = index_cu_first_pass(state->dindex, &cu); + if (cu_err) + goto cu_err; + + #pragma omp critical(drgn_dwarf_index_cus) + if (!drgn_dwarf_index_cu_vector_append(&state->dindex->cus, + &cu)) + cu_err = &drgn_enomem; + if (cu_err) { +cu_err: + drgn_dwarf_index_cu_deinit(&cu); + drgn_dwarf_index_update_cancel(state, cu_err); + } } } + return; - err = NULL; -out: - siphash_vector_deinit(&directories); - return err; +err: + drgn_dwarf_index_update_cancel(state, drgn_eof()); } -static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint64_t tag, - uint64_t file_name_hash, Dwfl_Module *module, - uint64_t offset) +static bool find_definition(struct drgn_dwarf_index *dindex, uintptr_t die_addr, + Dwfl_Module **module_ret, size_t *offset_ret) { - struct drgn_dwarf_index_die *die; + struct drgn_dwarf_index_specification_map_iterator it = + drgn_dwarf_index_specification_map_search(&dindex->specifications, + &die_addr); + if (!it.entry) + return false; + *module_ret = it.entry->module; + *offset_ret = it.entry->offset; + return true; +} - die = drgn_dwarf_index_die_vector_append_entry(&shard->dies); +static bool append_die_entry(struct drgn_dwarf_index *dindex, + struct drgn_dwarf_index_shard *shard, uint8_t tag, + uint64_t file_name_hash, Dwfl_Module *module, + size_t offset) +{ + if (shard->dies.size == UINT32_MAX) + return false; + struct drgn_dwarf_index_die *die = + drgn_dwarf_index_die_vector_append_entry(&shard->dies); if (!die) return false; + die->next = UINT32_MAX; die->tag = tag; - die->file_name_hash = file_name_hash; + if (die->tag == DW_TAG_namespace) { + die->namespace = malloc(sizeof(*die->namespace)); + if (!die->namespace) { + shard->dies.size--; + return false; + } + drgn_dwarf_index_namespace_init(die->namespace, dindex); + } else { + die->file_name_hash = file_name_hash; + } die->module = module; die->offset = offset; - die->next = SIZE_MAX; + return true; } -static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, - const char *name, uint64_t tag, +static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index_cu *cu, + const char *name, uint8_t tag, uint64_t file_name_hash, - Dwfl_Module *module, uint64_t offset) + Dwfl_Module *module, size_t offset) { struct drgn_error *err; struct drgn_dwarf_index_die_map_entry entry = { @@ -1823,353 +1127,314 @@ static struct drgn_error *index_die(struct drgn_dwarf_index *dindex, struct drgn_dwarf_index_die *die; hp = drgn_dwarf_index_die_map_hash(&entry.key); - shard = &dindex->shards[hash_pair_to_shard(hp)]; + shard = &ns->shards[hash_pair_to_shard(hp)]; omp_set_lock(&shard->lock); it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, hp); if (!it.entry) { - if (!append_die_entry(shard, tag, file_name_hash, module, - offset)) { + if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, + module, offset)) { err = &drgn_enomem; - goto out; + goto err; } entry.value = shard->dies.size - 1; - if (drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, - NULL) == 1) - err = NULL; - else + if (!drgn_dwarf_index_die_map_insert_searched(&shard->map, + &entry, hp, + NULL)) { err = &drgn_enomem; + goto err; + } + die = &shard->dies.data[shard->dies.size - 1]; goto out; } die = &shard->dies.data[it.entry->value]; for (;;) { - if (die->tag == tag && - die->file_name_hash == file_name_hash) { - err = NULL; + const uint64_t die_file_name_hash = + die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; + if (die->tag == tag && die_file_name_hash == file_name_hash) goto out; - } - if (die->next == SIZE_MAX) + if (die->next == UINT32_MAX) break; die = &shard->dies.data[die->next]; } index = die - shard->dies.data; - if (!append_die_entry(shard, tag, file_name_hash, module, offset)) { + if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, module, + offset)) { err = &drgn_enomem; - goto out; + goto err; } + die = &shard->dies.data[shard->dies.size - 1]; shard->dies.data[index].next = shard->dies.size - 1; - err = NULL; out: - omp_unset_lock(&shard->lock); - return err; -} - -struct die { - const char *sibling; - const char *name; - size_t stmt_list; - size_t decl_file; - const char *specification; - uint8_t flags; -}; - -static struct drgn_error *read_die(struct compilation_unit *cu, - const struct abbrev_table *abbrev, - const char **ptr, const char *end, - const char *debug_str_buffer, - const char *debug_str_end, struct die *die) -{ - struct drgn_error *err; - uint64_t code; - uint8_t *insnp; - uint8_t insn; - - if ((err = read_uleb128(ptr, end, &code))) - return err; - if (code == 0) - return &drgn_stop; - - if (code < 1 || code > abbrev->decls.size) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown abbreviation code %" PRIu64, - code); - } - insnp = &abbrev->insns.data[abbrev->decls.data[code - 1]]; - - while ((insn = *insnp++)) { - size_t skip, tmp; - - switch (insn) { - case ATTRIB_BLOCK1: - if (!read_u8_into_size_t(ptr, end, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_BLOCK2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_BLOCK4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &skip)) - return drgn_eof(); - goto skip; - case ATTRIB_EXPRLOC: - if ((err = read_uleb128_into_size_t(ptr, end, &skip))) - return err; - goto skip; - case ATTRIB_LEB128: - if (!skip_leb128(ptr, end)) - return drgn_eof(); - break; - case ATTRIB_NAME_STRING: - die->name = *ptr; - /* fallthrough */ - case ATTRIB_STRING: - if (!skip_string(ptr, end)) - return drgn_eof(); - break; - case ATTRIB_SIBLING_REF1: - if (!read_u8_into_size_t(ptr, end, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, &tmp))) - return err; -sibling: - if (!read_in_bounds(cu->ptr, end, tmp)) - return drgn_eof(); - die->sibling = &cu->ptr[tmp]; - __builtin_prefetch(die->sibling); - break; - case ATTRIB_NAME_STRP4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto strp; - case ATTRIB_NAME_STRP8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); -strp: - if (!read_in_bounds(debug_str_buffer, debug_str_end, - tmp)) - return drgn_eof(); - die->name = &debug_str_buffer[tmp]; - __builtin_prefetch(die->name); - break; - case ATTRIB_STMT_LIST_LINEPTR4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, - &die->stmt_list)) - return drgn_eof(); - break; - case ATTRIB_STMT_LIST_LINEPTR8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, - &die->stmt_list)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA1: - if (!read_u8_into_size_t(ptr, end, &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_DATA8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, - &die->decl_file)) - return drgn_eof(); - break; - case ATTRIB_DECL_FILE_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, - &die->decl_file))) - return err; - break; - case ATTRIB_SPECIFICATION_REF1: - if (!read_u8_into_size_t(ptr, end, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF2: - if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF4: - if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF8: - if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp)) - return drgn_eof(); - goto specification; - case ATTRIB_SPECIFICATION_REF_UDATA: - if ((err = read_uleb128_into_size_t(ptr, end, &tmp))) - return err; -specification: - if (!read_in_bounds(cu->ptr, end, tmp)) - return drgn_eof(); - die->specification = &cu->ptr[tmp]; - __builtin_prefetch(die->specification); - break; - default: - skip = insn; -skip: - if (!read_in_bounds(*ptr, end, skip)) - return drgn_eof(); - *ptr += skip; - break; + if (tag == DW_TAG_namespace) { + struct drgn_dwarf_index_pending_die *pending = + drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); + if (!pending) { + err = &drgn_enomem; + goto err; } + pending->cu = cu - ns->dindex->cus.data; + pending->offset = offset; } - - die->flags = *insnp; - - return NULL; + err = NULL; +err: + omp_unset_lock(&shard->lock); + return err; } -static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, - struct compilation_unit *cu) +/* Second pass: index the actual DIEs. */ +static struct drgn_error * +index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index_cu *cu, const char *ptr) { struct drgn_error *err; - struct abbrev_table abbrev = ABBREV_TABLE_INIT; - struct uint64_vector file_name_table = VECTOR_INIT; - Elf_Data *debug_abbrev = cu->sections[SECTION_DEBUG_ABBREV]; - const char *debug_abbrev_end = section_end(debug_abbrev); - const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; - const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length]; - Elf_Data *debug_info = cu->sections[SECTION_DEBUG_INFO]; + Elf_Data *debug_info = cu->module->debug_info; const char *debug_info_buffer = section_ptr(debug_info, 0); - Elf_Data *debug_str = cu->sections[SECTION_DEBUG_STR]; - const char *debug_str_buffer = section_ptr(debug_str, 0); - const char *debug_str_end = section_end(debug_str); + Elf_Data *debug_str = cu->module->debug_str; + const char *end = cu->end; unsigned int depth = 0; - uint64_t enum_die_offset = 0; - - if ((err = read_abbrev_table(section_ptr(debug_abbrev, - cu->debug_abbrev_offset), - debug_abbrev_end, cu, &abbrev))) - goto out; - + uint8_t depth1_tag = 0; + size_t depth1_offset = 0; for (;;) { - struct die die = { - .stmt_list = SIZE_MAX, - }; - uint64_t die_offset = ptr - debug_info_buffer; - uint64_t tag; + size_t die_offset = ptr - debug_info_buffer; - err = read_die(cu, &abbrev, &ptr, end, debug_str_buffer, - debug_str_end, &die); - if (err && err->code == DRGN_ERROR_STOP) { - depth--; - if (depth == 1) - enum_die_offset = 0; - else if (depth == 0) + uint64_t code; + if ((err = mread_uleb128(&ptr, end, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else break; - continue; - } else if (err) { - goto out; + } else if (code > cu->num_abbrev_decls) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown abbreviation code %" PRIu64, + code); } - tag = die.flags & TAG_MASK; - if (tag == DW_TAG_compile_unit || tag == DW_TAG_partial_unit) { - if (depth == 0 && die.stmt_list != SIZE_MAX && - (err = read_file_name_table(dindex, cu, - die.stmt_list, - &file_name_table))) - goto out; - } else if (tag && !(die.flags & TAG_FLAG_DECLARATION)) { - uint64_t file_name_hash; + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + const char *name = NULL; + size_t decl_file = 0; + bool declaration = false; + bool specification = false; + const char *sibling = NULL; + uint8_t insn; + while ((insn = *insnp++)) { + size_t skip, tmp; + switch (insn) { + case ATTRIB_BLOCK1: + if (!mread_u8_into_size_t(&ptr, end, &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_BLOCK4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &skip)) + return drgn_eof(); + goto skip; + case ATTRIB_EXPRLOC: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &skip))) + return err; + goto skip; + case ATTRIB_SPECIFICATION_REF_UDATA: + specification = true; + /* fallthrough */ + case ATTRIB_LEB128: + if (!mread_skip_leb128(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_NAME_STRING: + name = ptr; + /* fallthrough */ + case ATTRIB_STRING: + if (!mread_skip_string(&ptr, end)) + return drgn_eof(); + break; + case ATTRIB_SIBLING_REF1: + if (!mread_u8_into_size_t(&ptr, end, &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto sibling; + case ATTRIB_SIBLING_REF_UDATA: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &tmp))) + return err; +sibling: + if (!(sibling = mread_begin(cu->ptr, end, tmp))) + return drgn_eof(); + __builtin_prefetch(sibling); + break; + case ATTRIB_NAME_STRP4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); + goto strp; + case ATTRIB_NAME_STRP8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &tmp)) + return drgn_eof(); +strp: + if (!(name = section_ptr(debug_str, tmp))) + return drgn_eof(); + __builtin_prefetch(name); + break; + case ATTRIB_STMT_LIST_LINEPTR4: + skip = 4; + goto skip; + case ATTRIB_STMT_LIST_LINEPTR8: + skip = 8; + goto skip; + case ATTRIB_DECL_FILE_DATA1: + if (!mread_u8_into_size_t(&ptr, end, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA2: + if (!mread_u16_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA4: + if (!mread_u32_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_DATA8: + if (!mread_u64_into_size_t(&ptr, end, cu->bswap, + &decl_file)) + return drgn_eof(); + break; + case ATTRIB_DECL_FILE_UDATA: + if ((err = mread_uleb128_into_size_t(&ptr, end, + &decl_file))) + return err; + break; + case ATTRIB_DECLARATION_FLAG: { + uint8_t flag; + if (!mread_u8(&ptr, end, &flag)) + return drgn_eof(); + if (flag) + declaration = true; + break; + } + case ATTRIB_SPECIFICATION_REF1: + specification = true; + skip = 1; + goto skip; + case ATTRIB_SPECIFICATION_REF2: + specification = true; + skip = 2; + goto skip; + case ATTRIB_SPECIFICATION_REF4: + case ATTRIB_SPECIFICATION_REF_ADDR4: + specification = true; + skip = 4; + goto skip; + case ATTRIB_SPECIFICATION_REF8: + case ATTRIB_SPECIFICATION_REF_ADDR8: + specification = true; + skip = 8; + goto skip; + default: + skip = insn; +skip: + if (!mread_skip(&ptr, end, skip)) + return drgn_eof(); + break; + } + } + insn = *insnp; - /* - * NB: the enumerator name points to the - * enumeration_type DIE instead of the enumerator DIE. - */ - if (depth == 1 && tag == DW_TAG_enumeration_type) - enum_die_offset = die_offset; - else if (depth == 2 && tag == DW_TAG_enumerator && - enum_die_offset) - die_offset = enum_die_offset; - else if (depth != 1) - goto next; - - if (die.specification && (!die.name || !die.decl_file)) { - struct die decl = {}; - const char *decl_ptr = die.specification; - - if ((err = read_die(cu, &abbrev, &decl_ptr, end, - debug_str_buffer, - debug_str_end, &decl))) - goto out; - if (!die.name && decl.name) - die.name = decl.name; - if (!die.decl_file && decl.decl_file) - die.decl_file = decl.decl_file; + uint8_t tag = insn & DIE_FLAG_TAG_MASK; + if (depth == 1) { + depth1_tag = tag; + depth1_offset = die_offset; + } + if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && + !specification) { + if (insn & DIE_FLAG_DECLARATION) + declaration = true; + Dwfl_Module *module = cu->module->dwfl_module; + if (tag == DW_TAG_enumerator) { + if (depth1_tag != DW_TAG_enumeration_type) + goto next; + /* + * NB: the enumerator name points to the + * enumeration_type DIE. Also, enumerators can't + * be declared in C/C++, so we don't check for + * that. + */ + die_offset = depth1_offset; + } else if (declaration && + !find_definition(ns->dindex, + (uintptr_t)debug_info_buffer + + die_offset, + &module, &die_offset)) { + goto next; } - if (die.name) { - if (die.decl_file > file_name_table.size) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "invalid DW_AT_decl_file %zu", - die.decl_file); - goto out; - } - if (die.decl_file) - file_name_hash = file_name_table.data[die.decl_file - 1]; - else - file_name_hash = 0; - if ((err = index_die(dindex, die.name, tag, - file_name_hash, cu->module, - die_offset))) - goto out; + if (decl_file > cu->num_file_names) { + return drgn_error_format(DRGN_ERROR_OTHER, + "invalid DW_AT_decl_file %zu", + decl_file); } + uint64_t file_name_hash; + if (decl_file) + file_name_hash = cu->file_name_hashes[decl_file - 1]; + else + file_name_hash = 0; + if ((err = index_die(ns, cu, name, tag, file_name_hash, + module, die_offset))) + return err; } next: - if (die.flags & TAG_FLAG_CHILDREN) { - if (die.sibling) - ptr = die.sibling; + if (insn & DIE_FLAG_CHILDREN) { + /* + * We must descend into the children of enumeration_type + * DIEs to index enumerator DIEs. We don't want to skip + * over the children of the top-level DIE even if it has + * a sibling pointer. + */ + if (sibling && tag != DW_TAG_enumeration_type && + depth > 0) + ptr = sibling; else depth++; } else if (depth == 0) { break; } } - - err = NULL; -out: - uint64_vector_deinit(&file_name_table); - abbrev_table_deinit(&abbrev); - return err; + return NULL; } -static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) +static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) { - size_t i; - - for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) { - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die *die; - struct drgn_dwarf_index_die_map_iterator it; - size_t index; - - shard = &dindex->shards[i]; + for (size_t i = 0; i < ARRAY_SIZE(dindex->global.shards); i++) { + struct drgn_dwarf_index_shard *shard = + &dindex->global.shards[i]; /* * Because we're deleting everything that was added since the @@ -2177,17 +1442,19 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) * entry that was added for this update. */ while (shard->dies.size) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[shard->dies.size - 1]; void **userdatap; - struct drgn_dwfl_module_userdata *userdata; - - die = &shard->dies.data[shard->dies.size - 1]; dwfl_module_info(die->module, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); - userdata = *userdatap; - if (userdata->state == DRGN_DWARF_MODULE_INDEXED) + struct drgn_debug_info_module *module = *userdatap; + if (module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) break; - else - shard->dies.size--; + if (die->tag == DW_TAG_namespace) { + drgn_dwarf_index_namespace_deinit(die->namespace); + free(die->namespace); + } + shard->dies.size--; } /* @@ -2196,15 +1463,17 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) * entries must also be new, so there's no need to preserve * them. */ - for (index = 0; index < shard->dies.size; i++) { - die = &shard->dies.data[index]; - if (die->next != SIZE_MAX && + for (size_t index = 0; index < shard->dies.size; i++) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[index]; + if (die->next != UINT32_MAX && die->next >= shard->dies.size) - die->next = SIZE_MAX; + die->next = UINT32_MAX; } /* Finally, delete the new entries in the map. */ - for (it = drgn_dwarf_index_die_map_first(&shard->map); + for (struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_first(&shard->map); it.entry; ) { if (it.entry->value >= shard->dies.size) { it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, @@ -2214,130 +1483,99 @@ static void rollback_dwarf_index(struct drgn_dwarf_index *dindex) } } } -} - -static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex, - struct compilation_unit *cus, - size_t num_cus) -{ - struct drgn_error *err = NULL; - size_t i; - - #pragma omp parallel for schedule(dynamic) - for (i = 0; i < num_cus; i++) { - struct drgn_error *cu_err; - if (err) - continue; - - cu_err = index_cu(dindex, &cus[i]); - if (cu_err) { - #pragma omp critical(drgn_index_cus) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; + for (struct drgn_dwarf_index_specification_map_iterator it = + drgn_dwarf_index_specification_map_first(&dindex->specifications); + it.entry; ) { + void **userdatap; + dwfl_module_info(it.entry->module, &userdatap, NULL, NULL, NULL, + NULL, NULL, NULL); + struct drgn_debug_info_module *module = *userdatap; + if (module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { + it = drgn_dwarf_index_specification_map_next(it); + } else { + it = drgn_dwarf_index_specification_map_delete_iterator(&dindex->specifications, + it); } } - return err; } -/* - * Like drgn_dwarf_index_report_end(), but doesn't finalize reported errors or - * free unindexed modules on success. - */ -static struct drgn_error * -drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) +struct drgn_error * +drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) { - struct drgn_error *err; - struct drgn_dwarf_module_vector unindexed = VECTOR_INIT; - struct compilation_unit_vector cus = VECTOR_INIT; + struct drgn_dwarf_index *dindex = state->dindex; - dwfl_report_end(dindex->dwfl, NULL, NULL); - if (report_from_dwfl && - dwfl_getmodules(dindex->dwfl, drgn_dwarf_index_report_dwfl_module, - dindex, 0)) { - err = &drgn_enomem; - goto err; - } - err = drgn_dwarf_index_get_unindexed(dindex, &unindexed); - if (err) + if (state->err) goto err; - err = read_cus(dindex, unindexed.data, unindexed.size, &cus); - if (err) - goto err; - /* - * After this point, if we hit an error, then we have to roll back the - * index. - */ - err = index_cus(dindex, cus.data, cus.size); - if (err) { - rollback_dwarf_index(dindex); + + #pragma omp parallel for schedule(dynamic) + for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) { + if (drgn_dwarf_index_update_cancelled(state)) + continue; + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; + const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; + struct drgn_error *cu_err = + index_cu_second_pass(&dindex->global, cu, ptr); + if (cu_err) + drgn_dwarf_index_update_cancel(state, cu_err); + } + if (state->err) { + drgn_dwarf_index_rollback(state->dindex); goto err; } - -out: - compilation_unit_vector_deinit(&cus); - drgn_dwarf_module_vector_deinit(&unindexed); - return err; + return NULL; err: - drgn_dwarf_index_free_modules(dindex, false, false); - drgn_dwarf_index_reset_errors(dindex); - goto out; + for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + dindex->cus.size = state->old_cus_size; + return state->err; } -struct drgn_error *drgn_dwarf_index_report_end(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) +static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) { - struct drgn_error *err; + if (ns->saved_err) + return drgn_error_copy(ns->saved_err); - err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl); - if (err) - return err; - err = drgn_dwarf_index_finalize_errors(dindex); - if (err && err->code != DRGN_ERROR_MISSING_DEBUG_INFO) { - rollback_dwarf_index(dindex); - drgn_dwarf_index_free_modules(dindex, false, false); - return err; + struct drgn_error *err = NULL; + #pragma omp for schedule(dynamic) + for (size_t i = 0; i < ns->pending_dies.size; i++) { + if (!err) { + struct drgn_dwarf_index_pending_die *pending = + &ns->pending_dies.data[i]; + struct drgn_dwarf_index_cu *cu = + &ns->dindex->cus.data[pending->cu]; + const char *ptr = section_ptr(cu->module->debug_info, + pending->offset); + struct drgn_error *cu_err = + index_cu_second_pass(ns, cu, ptr); + if (cu_err) { + #pragma omp critical(drgn_index_namespace) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } } - drgn_dwarf_index_free_modules(dindex, true, false); + if (err) { + ns->saved_err = err; + return drgn_error_copy(ns->saved_err); + } + ns->pending_dies.size = 0; return err; } -struct drgn_error *drgn_dwarf_index_flush(struct drgn_dwarf_index *dindex, - bool report_from_dwfl) +struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_namespace *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags) { - struct drgn_error *err; - - err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl); + struct drgn_error *err = index_namespace(ns); if (err) return err; - drgn_dwarf_index_free_modules(dindex, true, false); - drgn_dwarf_index_report_begin(dindex); - return NULL; -} - -void drgn_dwarf_index_report_abort(struct drgn_dwarf_index *dindex) -{ - dwfl_report_end(dindex->dwfl, NULL, NULL); - drgn_dwarf_index_free_modules(dindex, false, false); - drgn_dwarf_index_reset_errors(dindex); -} - -bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, - const char *name) -{ - return c_string_set_search(&dindex->names, &name).entry != NULL; -} - -void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index *dindex, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags) -{ - it->dindex = dindex; + it->ns = ns; if (name) { struct string key = { .str = name, @@ -2349,22 +1587,23 @@ void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, hp = drgn_dwarf_index_die_map_hash(&key); it->shard = hash_pair_to_shard(hp); - shard = &dindex->shards[it->shard]; + shard = &ns->shards[it->shard]; map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &key, hp); - it->index = map_it.entry ? map_it.entry->value : SIZE_MAX; + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; it->any_name = false; } else { it->index = 0; - for (it->shard = 0; it->shard < ARRAY_SIZE(dindex->shards); + for (it->shard = 0; it->shard < ARRAY_SIZE(ns->shards); it->shard++) { - if (dindex->shards[it->shard].dies.size) + if (ns->shards[it->shard].dies.size) break; } it->any_name = true; } it->tags = tags; it->num_tags = num_tags; + return NULL; } static inline bool @@ -2382,29 +1621,24 @@ drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, return false; } -struct drgn_error * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, - Dwarf_Die *die_ret, uint64_t *bias_ret) +struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { - struct drgn_dwarf_index *dindex = it->dindex; + struct drgn_dwarf_index_namespace *ns = it->ns; struct drgn_dwarf_index_die *die; - Dwarf *dwarf; - Dwarf_Addr bias; - if (it->any_name) { for (;;) { - struct drgn_dwarf_index_shard *shard; - - if (it->shard >= ARRAY_SIZE(dindex->shards)) - return &drgn_stop; + if (it->shard >= ARRAY_SIZE(ns->shards)) + return NULL; - shard = &dindex->shards[it->shard]; + struct drgn_dwarf_index_shard *shard = + &ns->shards[it->shard]; die = &shard->dies.data[it->index]; if (++it->index >= shard->dies.size) { it->index = 0; - while (++it->shard < ARRAY_SIZE(dindex->shards)) { - if (dindex->shards[it->shard].dies.size) + while (++it->shard < ARRAY_SIZE(ns->shards)) { + if (ns->shards[it->shard].dies.size) break; } } @@ -2414,12 +1648,11 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, } } else { for (;;) { - struct drgn_dwarf_index_shard *shard; + if (it->index == UINT32_MAX) + return NULL; - if (it->index == SIZE_MAX) - return &drgn_stop; - - shard = &dindex->shards[it->shard]; + struct drgn_dwarf_index_shard *shard = + &ns->shards[it->shard]; die = &shard->dies.data[it->index]; it->index = die->next; @@ -2428,8 +1661,15 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, break; } } + return die; +} - dwarf = dwfl_module_getdwarf(die->module, &bias); +struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, + Dwarf_Die *die_ret, + uint64_t *bias_ret) +{ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(die->module, &bias); if (!dwarf) return drgn_error_libdwfl(); if (!dwarf_offdie(dwarf, die->offset, die_ret)) diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index da7a4f9f2..d5edec592 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -12,8 +12,9 @@ #ifndef DRGN_DWARF_INDEX_H #define DRGN_DWARF_INDEX_H +#include #include -#include +#include #include #include @@ -27,11 +28,12 @@ typedef struct {} omp_lock_t; #define omp_unset_lock(lock) do {} while (0) #endif -#include "drgn.h" #include "hash_table.h" -#include "string_builder.h" #include "vector.h" +struct drgn_debug_info_module; +struct drgn_error; + /** * @ingroup Internals * @@ -54,17 +56,44 @@ typedef struct {} omp_lock_t; * @{ */ -extern const Dwfl_Callbacks drgn_dwfl_callbacks; -extern const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks; -extern const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks; +/* + * An indexed DIE. + * + * DIEs with the same name but different tags or files are considered distinct. + * We only compare the hash of the file name, not the string value, because a + * 64-bit collision is unlikely enough, especially when also considering the + * name and tag. + */ +struct drgn_dwarf_index_die { + /* + * The next DIE with the same name (as an index into + * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. + */ + uint32_t next; + uint8_t tag; + union { + /* + * If tag != DW_TAG_namespace (namespaces are merged, so they + * don't need this). + */ + uint64_t file_name_hash; + /* If tag == DW_TAG_namespace. */ + struct drgn_dwarf_index_namespace *namespace; + }; + Dwfl_Module *module; + size_t offset; +}; -struct drgn_dwarf_index_die; -DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, size_t) +DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, uint32_t) DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) struct drgn_dwarf_index_shard { /** @privatesection */ omp_lock_t lock; + /* + * Map from name to list of DIEs with that name (as the index in + * drgn_dwarf_index_shard::dies of the first DIE with that name). + */ struct drgn_dwarf_index_die_map map; /* * We store all entries in a shard as a single array, which is more @@ -75,88 +104,48 @@ struct drgn_dwarf_index_shard { #define DRGN_DWARF_INDEX_SHARD_BITS 8 -/** State of a @ref drgn_dwarf_module or a @c Dwfl_Module. */ -enum drgn_dwarf_module_state { - /** Reported but not indexed. */ - DRGN_DWARF_MODULE_NEW, - /** Reported and will be indexed on success. */ - DRGN_DWARF_MODULE_INDEXING, - /** Indexed. Must not be freed until @ref drgn_dwarf_index_deinit(). */ - DRGN_DWARF_MODULE_INDEXED, -}; - -DEFINE_VECTOR_TYPE(dwfl_module_vector, Dwfl_Module *) - -/** - * A module reported to a @ref drgn_dwarf_index. - * - * Conceptually, a module is an ELF file loaded at a specific address range (or - * not loaded). - * - * Each (file, address range) referenced by a @ref drgn_dwarf_index is uniquely - * represented by one @c Dwfl_Module. Files are identified by canonical path. - * - * Each (binary, address range) is uniquely represented by a @ref - * drgn_dwarf_module. Binaries are identified by build ID; note that a single - * binary may be represented by multiple files (e.g., a stripped binary and its - * corresponding separate debug info file). If a file does not have a build ID, - * it is considered a different binary from other files with different canonical - * paths. - */ -struct drgn_dwarf_module { - /** Allocated with @c malloc() if @c build_id_len is non-zero. */ - void *build_id; - /** Zero if the module does not have a build ID. */ - size_t build_id_len; - /** Load address range, or both 0 if not loaded. */ - uint64_t start, end; - /** Optional module name allocated with @c malloc(). */ - char *name; - enum drgn_dwarf_module_state state; - /** - * Candidate Dwfl_Modules which were reported for this module. - * - * One of these will be indexed. Once the module is indexed, this is - * always empty. +/* A DIE with a DW_AT_specification attribute. */ +struct drgn_dwarf_index_specification { + /* + * Address of non-defining declaration DIE referenced by + * DW_AT_specification. */ - struct dwfl_module_vector dwfl_modules; + uintptr_t declaration; + /* Module and offset of DIE. */ + Dwfl_Module *module; + size_t offset; }; -/** - * State tracked for each @c Dwfl_Module. - * - * @c path, @c elf, and @c fd are used when an ELF file was reported to a @ref - * drgn_dwarf_index so that we can report the ELF file to libdwfl later. - */ -struct drgn_dwfl_module_userdata { - char *path; - Elf *elf; - int fd; - enum drgn_dwarf_module_state state; -}; +static inline uintptr_t +drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) +{ + return entry->declaration; +} -DEFINE_VECTOR_TYPE(drgn_dwarf_module_vector, struct drgn_dwarf_module *) +DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, + struct drgn_dwarf_index_specification, + drgn_dwarf_index_specification_to_key) -struct drgn_dwarf_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; -}; +DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) -static inline struct drgn_dwarf_module_key -drgn_dwarf_module_key(struct drgn_dwarf_module * const *entry) -{ - return (struct drgn_dwarf_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; -} -DEFINE_HASH_TABLE_TYPE(drgn_dwarf_module_table, struct drgn_dwarf_module *, - drgn_dwarf_module_key) +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, + struct drgn_dwarf_index_pending_die) -DEFINE_HASH_SET_TYPE(c_string_set, const char *) +/** Mapping from names/tags to DIEs/nested namespaces. */ +struct drgn_dwarf_index_namespace { + /** + * Index shards. + * + * This is sharded to reduce lock contention. + */ + struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; + /** Parent DWARF index. */ + struct drgn_dwarf_index *dindex; + /** DIEs we have not indexed yet. */ + struct drgn_dwarf_index_pending_die_vector pending_dies; + /** Saved error from a previous index. */ + struct drgn_error *saved_err; +}; /** * Fast index of DWARF debugging information. @@ -169,50 +158,23 @@ DEFINE_HASH_SET_TYPE(c_string_set, const char *) * Searches in the index are done with a @ref drgn_dwarf_index_iterator. */ struct drgn_dwarf_index { + /** Global namespace. */ + struct drgn_dwarf_index_namespace global; /** - * Index shards. + * Map from address of DIE referenced by DW_AT_specification to DIE that + * references it. This is used to resolve DIEs with DW_AT_declaration to + * their definition. * - * This is sharded to reduce lock contention. + * This is not sharded because there typically aren't enough of these in + * a program to cause contention. */ - struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; - Dwfl *dwfl; - /** - * Formatted errors reported by @ref drgn_dwarf_index_report_error(). - */ - struct string_builder errors; - /** - * Number of errors reported by @ref drgn_dwarf_index_report_error(). - */ - unsigned int num_errors; - /** Maximum number of errors to report before truncating. */ - unsigned int max_errors; - /** - * Modules keyed by build ID and address range. - * - * Every reported module is either here or in @ref no_build_id. While - * reporting modules, these include indexed and unindexed modules. - */ - struct drgn_dwarf_module_table module_table; - /** Modules that don't have a build ID. */ - struct drgn_dwarf_module_vector no_build_id; - /** - * Names of indexed modules. - * - * The entries in this set are @ref drgn_dwarf_module::name, so they - * should not be freed. - */ - struct c_string_set names; + struct drgn_dwarf_index_specification_map specifications; + /** Indexed compilation units. */ + struct drgn_dwarf_index_cu_vector cus; }; -/** - * Initialize a @ref drgn_dwarf_index. - * - * @param[in] callbacks One of @ref drgn_dwfl_callbacks, @ref - * drgn_linux_proc_dwfl_callbacks, or @ref - * drgn_userspace_core_dump_dwfl_callbacks. - */ -struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, - const Dwfl_Callbacks *callbacks); +/** Initialize a @ref drgn_dwarf_index. */ +void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); /** * Deinitialize a @ref drgn_dwarf_index. @@ -222,109 +184,78 @@ struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex, */ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); -/** - * Start reporting modules to a @ref drgn_dwarf_index. - * - * This must be paired with a call to either @ref drgn_dwarf_index_report_end() - * or @ref drgn_dwarf_index_report_abort(). - */ -void drgn_dwarf_index_report_begin(struct drgn_dwarf_index *dindex); +/** State tracked while updating a @ref drgn_dwarf_index. */ +struct drgn_dwarf_index_update_state { + struct drgn_dwarf_index *dindex; + size_t old_cus_size; + struct drgn_error *err; +}; /** - * Report a non-fatal error to a @ref drgn_dwarf_index. - * - * These errors are reported by @ref drgn_dwarf_index_report_end() in the @ref - * DRGN_ERROR_MISSING_DEBUG_INFO error. + * Prepare to update a @ref drgn_dwarf_index. * - * @param[name] name An optional module name to prefix to the error message. - * @param[message] message An optional message with additional context to prefix - * to the error message. - * @param[err] err The error to report. This may be @c NULL if @p name and @p - * message provide sufficient information. - * @return @c NULL on success, @ref drgn_enomem if the error could not be - * reported. + * @param[out] state Initialized update state. Must be passed to @ref + * drgn_dwarf_index_update_end(). */ -struct drgn_error * -drgn_dwarf_index_report_error(struct drgn_dwarf_index *dindex, const char *name, - const char *message, struct drgn_error *err); +void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, + struct drgn_dwarf_index *dindex); /** - * Report a module to a @ref drgn_dwarf_index from an ELF file. + * Finish updating a @ref drgn_dwarf_index. * - * This takes ownership of @p fd and @p elf on either success or failure. They - * should not be used (including closed or freed) after this returns. + * This should be called once all of the tasks created by @ref + * drgn_dwarf_index_read_module() have completed (even if the update was + * cancelled). * - * If this fails, @ref drgn_dwarf_index_report_abort() must be called. + * If the update was not cancelled, this finishes indexing all modules reported + * by @ref drgn_dwarf_index_read_module(). If it was cancelled or there is an + * error while indexing, this rolls back the index and removes the newly + * reported modules. * - * @param[in] path The path to the file. - * @param[in] fd A file descriptor referring to the file. - * @param[in] elf The Elf handle of the file. - * @param[in] start The (inclusive) start address of the loaded file, or 0 if - * the file is not loaded. - * @param[in] end The (exclusive) end address of the loaded file, or 0 if the - * file is not loaded. - * @param[in] name An optional name for the module. This is only used for @ref - * drgn_dwarf_index_is_indexed(). - * @param[out] new_ret Whether the module was newly created and reported. This - * is @c false if a module with the same build ID and address range was already - * indexed or a file with the same path and address range was already reported. + * @return @c NULL on success, non-@c NULL if the update was cancelled or there + * was another error. */ -struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex, - const char *path, int fd, - Elf *elf, uint64_t start, - uint64_t end, const char *name, - bool *new_ret); +struct drgn_error * +drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state); /** - * Stop reporting modules to a @ref drgn_dwarf_index and index new DWARF - * information. - * - * This parses and indexes the debugging information for all modules that have - * not yet been indexed. + * Cancel an update of a @ref drgn_dwarf_index. * - * If debug information was not available for one or more modules, a @ref - * DRGN_ERROR_MISSING_DEBUG_INFO error is returned, those modules are freed, and - * all other modules are added to the index. + * This should be called if there is a fatal error and the update must be + * aborted. * - * On any other error, no new debugging information is indexed and all unindexed - * modules are freed. - * - * @param[in] report_from_dwfl Whether any Dwfl_Modules were reported - * to @ref drgn_dwarf_index::dwfl directly via libdwfl. In that case, we need to - * report those to the DWARF index, as well. + * @param[in] err Error to report. This will be returned from @ref + * drgn_dwarf_index_update_end(). If an error has already been reported, this + * error is destroyed. */ -struct drgn_error *drgn_dwarf_index_report_end(struct drgn_dwarf_index *dindex, - bool report_from_dwfl); +void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, + struct drgn_error *err); /** - * Index new DWARF information and continue reporting. + * Return whether an update of a @ref drgn_dwarf_index has been cancelled by + * @ref drgn_dwarf_index_update_cancel(). * - * This is similar to @ref drgn_dwarf_index_report_end() except that it does not - * finish reporting or return a @ref DRGN_ERROR_MISSING_DEBUG_INFO error. @ref - * After this is called, more modules may be reported. @ref - * drgn_dwarf_index_report_end() or @ref drgn_dwarf_index_report_abort() must - * still be called. + * Because updating is parallelized, this allows tasks other than the one that + * encountered the error to "fail fast". */ -struct drgn_error *drgn_dwarf_index_flush(struct drgn_dwarf_index *dindex, - bool report_from_dwfl); +static inline bool +drgn_dwarf_index_update_cancelled(struct drgn_dwarf_index_update_state *state) +{ + /* + * No need for omp critical/omp atomic since this is a best-effort + * optimization. + */ + return state->err != NULL; +} /** - * Stop reporting modules to a @ref drgn_dwarf_index and free all unindexed - * modules. + * Read a module for updating a @ref drgn_dwarf_index. * - * This also clears all errors reported by @ref drgn_dwarf_index_report_error(). - * - * This should be called instead of @ref drgn_dwarf_index_report_end() if a - * fatal error is encountered while reporting modules. + * This creates OpenMP tasks to begin indexing the module. It may cancel the + * update. */ -void drgn_dwarf_index_report_abort(struct drgn_dwarf_index *dindex); - -/** - * Return whether a @ref drgn_dwarf_index has indexed a module with the given - * name. - */ -bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, - const char *name); +void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module); /** * Iterator over DWARF debugging information. @@ -334,11 +265,11 @@ bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex, */ struct drgn_dwarf_index_iterator { /** @privatesection */ - struct drgn_dwarf_index *dindex; + struct drgn_dwarf_index_namespace *ns; const uint64_t *tags; size_t num_tags; size_t shard; - size_t index; + uint32_t index; bool any_name; }; @@ -351,11 +282,13 @@ struct drgn_dwarf_index_iterator { * @param[in] name_len Length of @c name. * @param[in] tags List of DIE tags to search for. * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index *dindex, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags); +struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_namespace *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags); /** * Get the next matching DIE from a DWARF index iterator. @@ -367,18 +300,24 @@ void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, * DW_TAG_enumerator DIEs. * * @param[in] it DWARF index iterator. + * @return Next DIE, or @c NULL if there are no more matching DIEs. + */ +struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it); + +/** + * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. + * + * @param[in] die Indexed DIE. * @param[out] die_ret Returned DIE. * @param[out] bias_ret Returned difference between addresses in the loaded * module and addresses in the debugging information. This may be @c NULL if it * is not needed. - * @return @c NULL on success, non-@c NULL on error. In particular, when there - * are no more matching DIEs, @p die_ret is not modified and an error with code - * @ref DRGN_ERROR_STOP is returned; this @ref DRGN_ERROR_STOP error does not - * have to be passed to @ref drgn_error_destroy(). + * @return @c NULL on success, non-@c NULL on error. */ -struct drgn_error * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it, - Dwarf_Die *die_ret, uint64_t *bias_ret); +struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, + Dwarf_Die *die_ret, + uint64_t *bias_ret); /** @} */ diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c deleted file mode 100644 index 536d99a2e..000000000 --- a/libdrgn/dwarf_info_cache.c +++ /dev/null @@ -1,1710 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include -#include -#include - -#include "internal.h" -#include "dwarf_index.h" -#include "dwarf_info_cache.h" -#include "hash_table.h" -#include "object.h" -#include "object_index.h" -#include "type_index.h" -#include "vector.h" - -DEFINE_HASH_TABLE_FUNCTIONS(dwarf_type_map, hash_pair_ptr_type, - hash_table_scalar_eq) -DEFINE_VECTOR(drgn_type_member_vector, struct drgn_type_member) -DEFINE_VECTOR(drgn_type_enumerator_vector, struct drgn_type_enumerator) -DEFINE_VECTOR(drgn_type_parameter_vector, struct drgn_type_parameter) - -struct drgn_type_from_dwarf_thunk { - struct drgn_type_thunk thunk; - struct drgn_dwarf_info_cache *dicache; - Dwarf_Die die; - bool can_be_incomplete_array; -}; - -static void drgn_dwarf_type_free(struct drgn_dwarf_type *dwarf_type) -{ - if (dwarf_type->should_free) { - struct drgn_type *type = dwarf_type->type; - - if (drgn_type_has_members(type)) { - struct drgn_type_member *members; - size_t num_members, i; - - members = drgn_type_members(type); - num_members = drgn_type_num_members(type); - for (i = 0; i < num_members; i++) - drgn_type_member_deinit(&members[i]); - free(members); - } - if (drgn_type_has_parameters(type)) { - struct drgn_type_parameter *parameters; - size_t num_parameters, i; - - parameters = drgn_type_parameters(type); - num_parameters = drgn_type_num_parameters(type); - for (i = 0; i < num_parameters; i++) - drgn_type_parameter_deinit(¶meters[i]); - free(parameters); - } - if (drgn_type_has_enumerators(type)) - free(drgn_type_enumerators(type)); - free(type); - } -} - -/** - * Return whether a DWARF DIE is little-endian. - * - * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c - * false, only the ELF header is checked and this function cannot fail. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, - bool check_attr, bool *ret) -{ - Dwarf_Attribute endianity_attr_mem, *endianity_attr; - Dwarf_Word endianity; - if (check_attr && - (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, - &endianity_attr_mem))) { - if (dwarf_formudata(endianity_attr, &endianity)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_endianity"); - } - } else { - endianity = DW_END_default; - } - switch (endianity) { - case DW_END_default: { - Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); - *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; - return NULL; - } - case DW_END_little: - *ret = true; - return NULL; - case DW_END_big: - *ret = false; - return NULL; - default: - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_endianity"); - } -} - -/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ -static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, - bool check_attr, - enum drgn_byte_order *ret) -{ - bool little_endian; - struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, - &little_endian); - /* - * dwarf_die_is_little_endian() can't fail if check_attr is false, so - * the !check_attr test suppresses maybe-uninitialized warnings. - */ - if (!err || !check_attr) - *ret = little_endian ? DRGN_LITTLE_ENDIAN : DRGN_BIG_ENDIAN; - return err; -} - -static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) - return 1; - - return dwarf_formref_die(attr, ret) ? 0 : -1; -} - -static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { - *ret = false; - return 0; - } - return dwarf_formflag(attr, ret); -} - -/** - * Parse a type from a DWARF debugging information entry. - * - * This is the same as @ref drgn_type_from_dwarf() except that it can be used to - * work around a bug in GCC < 9.0 that zero length array types are encoded the - * same as incomplete array types. There are a few places where GCC allows - * zero-length arrays but not incomplete arrays: - * - * - As the type of a member of a structure with only one member. - * - As the type of a structure member other than the last member. - * - As the type of a union member. - * - As the element type of an array. - * - * In these cases, we know that what appears to be an incomplete array type must - * actually have a length of zero. In other cases, a subrange DIE without - * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array - * type. - * - * @param[in] dicache Debugging information cache. - * @param[in] die DIE to parse. - * @param[in] can_be_incomplete_array Whether the type can be an incomplete - * array type. If this is @c false and the type appears to be an incomplete - * array type, its length is set to zero instead. - * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete - * array type or a typedef of an incomplete array type (regardless of @p - * can_be_incomplete_array). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret); - -/** - * Parse a type from a DWARF debugging information entry. - * - * @param[in] dicache Debugging information cache. - * @param[in] die DIE to parse. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static inline struct drgn_error * -drgn_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - struct drgn_qualified_type *ret) -{ - return drgn_type_from_dwarf_internal(dicache, die, true, NULL, ret); -} - -static struct drgn_error * -drgn_type_from_dwarf_thunk_evaluate_fn(struct drgn_type_thunk *thunk, - struct drgn_qualified_type *ret) -{ - struct drgn_type_from_dwarf_thunk *t; - - t = container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk); - return drgn_type_from_dwarf_internal(t->dicache, &t->die, - t->can_be_incomplete_array, NULL, - ret); -} - -static void drgn_type_from_dwarf_thunk_free_fn(struct drgn_type_thunk *thunk) -{ - free(container_of(thunk, struct drgn_type_from_dwarf_thunk, thunk)); -} - -static struct drgn_error * -drgn_lazy_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *parent_die, bool can_be_incomplete_array, - const char *tag_name, struct drgn_lazy_type *ret) -{ - struct drgn_type_from_dwarf_thunk *thunk; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Die type_die; - - if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s is missing DW_AT_type", - tag_name); - } - - if (!dwarf_formref_die(attr, &type_die)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_type", tag_name); - } - - thunk = malloc(sizeof(*thunk)); - if (!thunk) - return &drgn_enomem; - - thunk->thunk.evaluate_fn = drgn_type_from_dwarf_thunk_evaluate_fn; - thunk->thunk.free_fn = drgn_type_from_dwarf_thunk_free_fn; - thunk->dicache = dicache; - thunk->die = type_die; - thunk->can_be_incomplete_array = can_be_incomplete_array; - drgn_lazy_type_init_thunk(ret, &thunk->thunk); - return NULL; -} - -/** - * Parse a type from the @c DW_AT_type attribute of a DWARF debugging - * information entry. - * - * @param[in] dicache Debugging information cache. - * @param[in] parent_die Parent DIE. - * @param[in] parent_lang Language of the parent DIE if it is already known, @c - * NULL if it should be determined from @p parent_die. - * @param[in] tag_name Spelling of the DWARF tag of @p parent_die. Used for - * error messages. - * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, - * which is interpreted as a void type. If this is false and the @c DW_AT_type - * attribute is missing, an error is returned. - * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). - * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_from_dwarf_child(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *parent_die, - const struct drgn_language *parent_lang, - const char *tag_name, - bool can_be_void, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Die type_die; - - if (!(attr = dwarf_attr_integrate(parent_die, DW_AT_type, &attr_mem))) { - if (can_be_void) { - if (!parent_lang) { - err = drgn_language_from_die(parent_die, - &parent_lang); - if (err) - return err; - } - ret->type = drgn_void_type(parent_lang); - ret->qualifiers = 0; - return NULL; - } else { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s is missing DW_AT_type", - tag_name); - } - } - - if (!dwarf_formref_die(attr, &type_die)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_type", tag_name); - } - - return drgn_type_from_dwarf_internal(dicache, &type_die, - can_be_incomplete_array, - is_incomplete_array_ret, ret); -} - -static struct drgn_error * -drgn_base_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_type *type; - Dwarf_Attribute attr; - Dwarf_Word encoding; - const char *name; - int size; - - name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_name"); - } - - if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || - dwarf_formudata(&attr, &encoding)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_encoding"); - } - size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - switch (encoding) { - case DW_ATE_boolean: - drgn_bool_type_init(type, name, size, lang); - break; - case DW_ATE_float: - drgn_float_type_init(type, name, size, lang); - break; - case DW_ATE_signed: - case DW_ATE_signed_char: - drgn_int_type_init(type, name, size, true, lang); - break; - case DW_ATE_unsigned: - case DW_ATE_unsigned_char: - drgn_int_type_init(type, name, size, false, lang); - break; - /* - * GCC also supports complex integer types, but DWARF 4 doesn't have an - * encoding for that. GCC as of 8.2 emits DW_ATE_lo_user, but that's - * ambiguous because it also emits that in other cases. For now, we - * don't support it. - */ - case DW_ATE_complex_float: { - struct drgn_qualified_type real_type; - Dwarf_Die child; - - if (dwarf_type(die, &child)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_type"); - } - err = drgn_type_from_dwarf(dicache, &child, &real_type); - if (err) - return err; - if (drgn_type_kind(real_type.type) != DRGN_TYPE_FLOAT && - drgn_type_kind(real_type.type) != DRGN_TYPE_INT) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_type of DW_ATE_complex_float is not a floating-point or integer type"); - } - drgn_complex_type_init(type, name, size, real_type.type, lang); - break; - } - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_base_type has unknown DWARF encoding 0x%llx", - (unsigned long long)encoding); - } - *ret = type; - return NULL; -} - -/* - * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and - * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of - * true). This tries to find the complete type. If it succeeds, it returns NULL. - * If it can't find a complete type, it returns a DRGN_ERROR_STOP error. - * Otherwise, it returns an error. - */ -static struct drgn_error * -drgn_dwarf_info_cache_find_complete(struct drgn_dwarf_info_cache *dicache, - uint64_t tag, const char *name, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - struct drgn_qualified_type qualified_type; - - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, - strlen(name), &tag, 1); - /* - * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs - * with DW_AT_declaration, so this will always be a complete type. - */ - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (err) - return err; - /* - * Look for another matching DIE. If there is one, then we can't be sure - * which type this is, so leave it incomplete rather than guessing. - */ - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (!err) - return &drgn_stop; - else if (err->code != DRGN_ERROR_STOP) - return err; - - err = drgn_type_from_dwarf(dicache, &die, &qualified_type); - if (err) - return err; - *ret = qualified_type.type; - return NULL; -} - -static struct drgn_error * -parse_member_offset(Dwarf_Die *die, struct drgn_lazy_type *member_type, - uint64_t bit_field_size, bool little_endian, uint64_t *ret) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - /* - * The simplest case is when we have DW_AT_data_bit_offset, which is - * already the offset in bits from the beginning of the containing - * object to the beginning of the member (which may be a bit field). - */ - attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_bit_offset"); - } - *ret = bit_offset; - return NULL; - } - - /* - * Otherwise, we might have DW_AT_data_member_location, which is the - * offset in bytes from the beginning of the containing object. - */ - attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); - if (attr) { - Dwarf_Word byte_offset; - - if (dwarf_formudata(attr, &byte_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_member_location"); - } - *ret = 8 * byte_offset; - } else { - *ret = 0; - } - - /* - * In addition to DW_AT_data_member_location, a bit field might have - * DW_AT_bit_offset, which is the offset in bits of the most significant - * bit of the bit field from the most significant bit of the containing - * object. - */ - attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_offset"); - } - - /* - * If the architecture is little-endian, then we must compute - * the location of the most significant bit from the size of the - * member, then subtract the bit offset and bit size to get the - * location of the beginning of the bit field. - * - * If the architecture is big-endian, then the most significant - * bit of the bit field is the beginning. - */ - if (little_endian) { - uint64_t byte_size; - - attr = dwarf_attr_integrate(die, DW_AT_byte_size, - &attr_mem); - /* - * If the member has an explicit byte size, we can use - * that. Otherwise, we have to get it from the member - * type. - */ - if (attr) { - Dwarf_Word word; - - if (dwarf_formudata(attr, &word)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_byte_size"); - } - byte_size = word; - } else { - struct drgn_qualified_type containing_type; - - err = drgn_lazy_type_evaluate(member_type, - &containing_type); - if (err) - return err; - if (!drgn_type_has_size(containing_type.type)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member bit field type does not have size"); - } - byte_size = drgn_type_size(containing_type.type); - } - *ret += 8 * byte_size - bit_offset - bit_field_size; - } else { - *ret += bit_offset; - } - } - - return NULL; -} - -static struct drgn_error *parse_member(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - struct drgn_type_member *member, - bool little_endian) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - struct drgn_lazy_type member_type; - const char *name; - uint64_t bit_offset; - uint64_t bit_field_size; - - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); - if (attr) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - attr = dwarf_attr_integrate(die, DW_AT_bit_size, &attr_mem); - if (attr) { - Dwarf_Word bit_size; - - if (dwarf_formudata(attr, &bit_size)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_size"); - } - bit_field_size = bit_size; - } else { - bit_field_size = 0; - } - - err = drgn_lazy_type_from_dwarf(dicache, die, false, "DW_TAG_member", - &member_type); - if (err) - return err; - - err = parse_member_offset(die, &member_type, bit_field_size, - little_endian, &bit_offset); - if (err) { - drgn_lazy_type_deinit(&member_type); - return err; - } - - drgn_type_member_init(member, member_type, name, bit_offset, - bit_field_size); - return NULL; -} - -static struct drgn_error * -drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - const struct drgn_language *lang, - enum drgn_type_kind kind, - struct drgn_type **ret, bool *should_free) -{ - struct drgn_error *err; - - const char *dw_tag_str; - uint64_t dw_tag; - switch (kind) { - case DRGN_TYPE_STRUCT: - dw_tag_str = "DW_TAG_structure_type"; - dw_tag = DW_TAG_structure_type; - break; - case DRGN_TYPE_UNION: - dw_tag_str = "DW_TAG_union_type"; - dw_tag = DW_TAG_union_type; - break; - case DRGN_TYPE_CLASS: - dw_tag_str = "DW_TAG_class_type"; - dw_tag = DW_TAG_class_type; - break; - default: - UNREACHABLE(); - } - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_name", - dw_tag_str); - } - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_declaration", - dw_tag_str); - } - if (declaration && tag) { - err = drgn_dwarf_info_cache_find_complete(dicache, - dw_tag, tag, ret); - if (!err) { - *should_free = false; - return NULL; - } else if (err->code != DRGN_ERROR_STOP) { - return err; - } - } - - *should_free = true; - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - - if (declaration) { - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init_incomplete(type, tag, lang); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init_incomplete(type, tag, lang); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init_incomplete(type, tag, lang); - break; - default: - UNREACHABLE(); - } - *ret = type; - return NULL; - } - - struct drgn_type_member_vector members = VECTOR_INIT; - int size = dwarf_bytesize(die); - if (size == -1) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has missing or invalid DW_AT_byte_size", - dw_tag_str); - goto err; - } - - bool little_endian; - dwarf_die_is_little_endian(die, false, &little_endian); - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_member) { - struct drgn_type_member *member = - drgn_type_member_vector_append_entry(&members); - if (!member) { - err = &drgn_enomem; - goto err; - } - err = parse_member(dicache, &child, member, - little_endian); - if (err) { - members.size--; - goto err; - } - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - drgn_type_member_vector_shrink_to_fit(&members); - - if (kind == DRGN_TYPE_UNION) { - drgn_union_type_init(type, tag, size, members.data, - members.size, lang); - } else { - if (kind == DRGN_TYPE_STRUCT) { - drgn_struct_type_init(type, tag, size, members.data, - members.size, lang); - } else { - drgn_class_type_init(type, tag, size, members.data, - members.size, lang); - } - /* - * Flexible array members are only allowed as the last member of - * a structure with more than one named member. We defaulted - * can_be_incomplete_array to false in parse_member(), so fix it - * up. - */ - if (members.size > 1) { - struct drgn_type_member *member = - &drgn_type_members(type)[members.size - 1]; - /* - * The type may have already been evaluated if it's a - * bit field. Arrays can't be bit fields, so it's okay - * if we missed it. - */ - if (!drgn_lazy_type_is_evaluated(&member->type)) { - struct drgn_type_from_dwarf_thunk *thunk = - container_of(member->type.thunk, struct - drgn_type_from_dwarf_thunk, - thunk); - thunk->can_be_incomplete_array = true; - } - } - } - *ret = type; - return NULL; - -err: - for (size_t i = 0; i < members.size; i++) - drgn_type_member_deinit(&members.data[i]); - drgn_type_member_vector_deinit(&members); - free(type); - return err; -} - -static struct drgn_error * -parse_enumerator(Dwarf_Die *die, struct drgn_type_enumerator *enumerator, - bool *is_signed) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - const char *name; - int r; - - name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has missing or invalid DW_AT_name"); - } - - attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem); - if (!attr) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator is missing DW_AT_const_value"); - } - - if (attr->form == DW_FORM_sdata || - attr->form == DW_FORM_implicit_const) { - Dwarf_Sword svalue; - - r = dwarf_formsdata(attr, &svalue); - if (r == 0) { - drgn_type_enumerator_init_signed(enumerator, name, - svalue); - if (svalue < 0) - *is_signed = true; - } - } else { - Dwarf_Word uvalue; - - r = dwarf_formudata(attr, &uvalue); - if (r == 0) { - drgn_type_enumerator_init_unsigned(enumerator, name, - uvalue); - } - } - if (r) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has invalid DW_AT_const_value"); - } - return NULL; -} - -static struct drgn_type fallback_enum_compatible_types[2][4]; - -__attribute__((constructor(200))) -static void fallback_enum_compatible_types_init(void) -{ - unsigned int is_signed, shift; - - for (is_signed = 0; is_signed < 2; is_signed++) { - for (shift = 0; - shift < ARRAY_SIZE(fallback_enum_compatible_types[0]); - shift++) { - struct drgn_type *type; - - type = &fallback_enum_compatible_types[is_signed][shift]; - drgn_int_type_init(type, "", 1 << shift, - is_signed, NULL); - } - } -} - -/* - * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, - * so we have to fabricate the compatible type. - * - * GCC before 7.1 didn't include DW_AT_encoding for DW_TAG_enumeration_type - * DIEs, either, so we also have to guess at the sign. - */ -static struct drgn_error * -enum_compatible_type_fallback(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, bool is_signed, - struct drgn_type **ret) -{ - int size; - - size = dwarf_bytesize(die); - switch (size) { - case 1: - *ret = &fallback_enum_compatible_types[is_signed][0]; - return NULL; - case 2: - *ret = &fallback_enum_compatible_types[is_signed][1]; - return NULL; - case 4: - *ret = &fallback_enum_compatible_types[is_signed][2]; - return NULL; - case 8: - *ret = &fallback_enum_compatible_types[is_signed][3]; - return NULL; - case -1: - *ret = NULL; - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); - default: - *ret = NULL; - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has unsupported DW_AT_byte_size %d", - size); - } -} - -static struct drgn_error * -drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret, bool *should_free) -{ - struct drgn_error *err; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_name"); - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_declaration"); - } - if (declaration && tag) { - err = drgn_dwarf_info_cache_find_complete(dicache, - DW_TAG_enumeration_type, - tag, ret); - if (!err) { - *should_free = false; - return NULL; - } else if (err->code != DRGN_ERROR_STOP) { - return err; - } - } - - *should_free = true; - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - - if (declaration) { - drgn_enum_type_init_incomplete(type, tag, lang); - *ret = type; - return NULL; - } - - struct drgn_type_enumerator_vector enumerators = - VECTOR_INIT; - bool is_signed = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - int tag; - - tag = dwarf_tag(&child); - if (tag == DW_TAG_enumerator) { - struct drgn_type_enumerator *enumerator; - - enumerator = drgn_type_enumerator_vector_append_entry(&enumerators); - if (!enumerator) { - err = &drgn_enomem; - goto err; - } - err = parse_enumerator(&child, enumerator, &is_signed); - if (err) - goto err; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - drgn_type_enumerator_vector_shrink_to_fit(&enumerators); - - struct drgn_type *compatible_type; - r = dwarf_type(die, &child); - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_type"); - goto err; - } else if (r) { - err = enum_compatible_type_fallback(dicache, die, is_signed, - &compatible_type); - if (err) - goto err; - } else { - struct drgn_qualified_type qualified_compatible_type; - err = drgn_type_from_dwarf(dicache, &child, - &qualified_compatible_type); - if (err) - goto err; - compatible_type = qualified_compatible_type.type; - if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); - goto err; - } - } - - drgn_enum_type_init(type, tag, compatible_type, enumerators.data, - enumerators.size, lang); - *ret = type; - return NULL; - -err: - drgn_type_enumerator_vector_deinit(&enumerators); - free(type); - return err; -} - -static struct drgn_error * -drgn_typedef_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_type *type; - struct drgn_qualified_type aliased_type; - const char *name; - - name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_typedef has missing or invalid DW_AT_name"); - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_typedef", true, - can_be_incomplete_array, - is_incomplete_array_ret, - &aliased_type); - if (err) { - free(type); - return err; - } - - drgn_typedef_type_init(type, name, aliased_type, lang); - *ret = type; - return NULL; -} - -static struct drgn_error * -drgn_pointer_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_qualified_type referenced_type; - - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_pointer_type", true, true, - NULL, &referenced_type); - if (err) - return err; - - return drgn_type_index_pointer_type(dicache->tindex, referenced_type, - lang, ret); -} - -struct array_dimension { - uint64_t length; - bool is_complete; -}; - -DEFINE_VECTOR(array_dimension_vector, struct array_dimension) - -static struct drgn_error *subrange_length(Dwarf_Die *die, - struct array_dimension *dimension) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Word word; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && - !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { - dimension->is_complete = false; - return NULL; - } - - if (dwarf_formudata(attr, &word)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_subrange_type has invalid %s", - attr->code == DW_AT_upper_bound ? - "DW_AT_upper_bound" : - "DW_AT_count"); - } - - dimension->is_complete = true; - /* - * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array - * variables without an explicit size (e.g., `int arr[] = {};`). - */ - if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && - word == (Dwarf_Word)-1) { - dimension->length = 0; - } else if (attr->code == DW_AT_upper_bound) { - if (word >= UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_upper_bound is too large"); - } - dimension->length = (uint64_t)word + 1; - } else { - if (word > UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_count is too large"); - } - dimension->length = word; - } - return NULL; -} - -static struct drgn_error * -drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct array_dimension_vector dimensions = VECTOR_INIT; - struct array_dimension *dimension; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_subrange_type) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - err = subrange_length(&child, dimension); - if (err) - goto out; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto out; - } - if (!dimensions.size) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - dimension->is_complete = false; - } - - struct drgn_qualified_type element_type; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_array_type", false, false, - NULL, &element_type); - if (err) - goto out; - - *is_incomplete_array_ret = !dimensions.data[0].is_complete; - struct drgn_type *type; - do { - dimension = array_dimension_vector_pop(&dimensions); - if (dimension->is_complete) { - err = drgn_type_index_array_type(dicache->tindex, - dimension->length, - element_type, lang, - &type); - } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_type_index_array_type(dicache->tindex, 0, - element_type, lang, - &type); - } else { - err = drgn_type_index_incomplete_array_type(dicache->tindex, - element_type, - lang, - &type); - } - if (err) - goto out; - - element_type.type = type; - element_type.qualifiers = 0; - } while (dimensions.size); - - *ret = type; - err = NULL; -out: - array_dimension_vector_deinit(&dimensions); - return err; -} - -static struct drgn_error * -parse_formal_parameter(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, - struct drgn_type_parameter *parameter) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - const char *name; - struct drgn_lazy_type parameter_type; - - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); - if (attr) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_formal_parameter has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - err = drgn_lazy_type_from_dwarf(dicache, die, true, - "DW_TAG_formal_parameter", - ¶meter_type); - if (err) - return err; - - drgn_type_parameter_init(parameter, parameter_type, name); - return NULL; -} - -static struct drgn_error * -drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - - struct drgn_type *type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - - const char *tag_name = - dwarf_tag(die) == DW_TAG_subroutine_type ? - "DW_TAG_subroutine_type" : "DW_TAG_subprogram"; - struct drgn_type_parameter_vector parameters = VECTOR_INIT; - bool is_variadic = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - int tag = dwarf_tag(&child); - if (tag == DW_TAG_formal_parameter) { - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", - tag_name); - goto err; - } - - struct drgn_type_parameter *parameter = - drgn_type_parameter_vector_append_entry(¶meters); - if (!parameter) { - err = &drgn_enomem; - goto err; - } - err = parse_formal_parameter(dicache, &child, parameter); - if (err) { - parameters.size--; - goto err; - } - } else if (tag == DW_TAG_unspecified_parameters) { - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has multiple DW_TAG_unspecified_parameters children", - tag_name); - goto err; - } - is_variadic = true; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - drgn_type_parameter_vector_shrink_to_fit(¶meters); - - struct drgn_qualified_type return_type; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - tag_name, true, true, NULL, - &return_type); - if (err) - goto err; - - drgn_function_type_init(type, return_type, parameters.data, - parameters.size, is_variadic, lang); - *ret = type; - return NULL; - -err: - for (size_t i = 0; i < parameters.size; i++) - drgn_type_parameter_deinit(¶meters.data[i]); - drgn_type_parameter_vector_deinit(¶meters); - free(type); - return err; -} - -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - const struct drgn_language *lang; - struct hash_pair hp; - struct dwarf_type_map_entry entry = { - .key = die->addr, - }; - struct dwarf_type_map *map; - struct dwarf_type_map_iterator it; - - if (dicache->depth >= 1000) { - return drgn_error_create(DRGN_ERROR_RECURSION, - "maximum DWARF type parsing depth exceeded"); - } - - hp = dwarf_type_map_hash(&entry.key); - it = dwarf_type_map_search_hashed(&dicache->map, &entry.key, hp); - if (it.entry) { - if (!can_be_incomplete_array && - it.entry->value.is_incomplete_array) { - map = &dicache->cant_be_incomplete_array_map; - it = dwarf_type_map_search_hashed(map, &entry.key, hp); - } - if (it.entry) { - ret->type = it.entry->value.type; - ret->qualifiers = it.entry->value.qualifiers; - return NULL; - } - } - - err = drgn_language_from_die(die, &lang); - if (err) - return err; - - ret->qualifiers = 0; - dicache->depth++; - entry.value.is_incomplete_array = false; - switch (dwarf_tag(die)) { - case DW_TAG_const_type: - /* - * Qualified types share the struct drgn_type with the - * unqualified type. - */ - entry.value.should_free = false; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_const_type", true, - true, NULL, ret); - ret->qualifiers |= DRGN_QUALIFIER_CONST; - break; - case DW_TAG_restrict_type: - entry.value.should_free = false; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_restrict_type", true, - true, NULL, ret); - ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; - break; - case DW_TAG_volatile_type: - entry.value.should_free = false; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_volatile_type", true, - true, NULL, ret); - ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; - break; - case DW_TAG_atomic_type: - entry.value.should_free = false; - err = drgn_type_from_dwarf_child(dicache, die, - drgn_language_or_default(lang), - "DW_TAG_atomic_type", true, - true, NULL, ret); - ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; - break; - case DW_TAG_base_type: - entry.value.should_free = true; - err = drgn_base_type_from_dwarf(dicache, die, lang, &ret->type); - break; - case DW_TAG_structure_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, - DRGN_TYPE_STRUCT, - &ret->type, - &entry.value.should_free); - break; - case DW_TAG_union_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, - DRGN_TYPE_UNION, &ret->type, - &entry.value.should_free); - break; - case DW_TAG_class_type: - err = drgn_compound_type_from_dwarf(dicache, die, lang, - DRGN_TYPE_CLASS, &ret->type, - &entry.value.should_free); - break; - case DW_TAG_enumeration_type: - err = drgn_enum_type_from_dwarf(dicache, die, lang, &ret->type, - &entry.value.should_free); - break; - case DW_TAG_typedef: - entry.value.should_free = true; - err = drgn_typedef_type_from_dwarf(dicache, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_pointer_type: - /* Pointer types are owned by the type index. */ - entry.value.should_free = false; - err = drgn_pointer_type_from_dwarf(dicache, die, lang, &ret->type); - break; - case DW_TAG_array_type: - /* Array types are owned by the type index. */ - entry.value.should_free = false; - err = drgn_array_type_from_dwarf(dicache, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_subroutine_type: - case DW_TAG_subprogram: - entry.value.should_free = true; - err = drgn_function_type_from_dwarf(dicache, die, lang, - &ret->type); - break; - default: - err = drgn_error_format(DRGN_ERROR_OTHER, - "unknown DWARF type tag 0x%x", - dwarf_tag(die)); - break; - } - dicache->depth--; - if (err) - return err; - - entry.value.type = ret->type; - entry.value.qualifiers = ret->qualifiers; - if (!can_be_incomplete_array && entry.value.is_incomplete_array) - map = &dicache->cant_be_incomplete_array_map; - else - map = &dicache->map; - if (dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { - drgn_dwarf_type_free(&entry.value); - return &drgn_enomem; - } - if (is_incomplete_array_ret) - *is_incomplete_array_ret = entry.value.is_incomplete_array; - return NULL; -} - -struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache = arg; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - uint64_t tag; - - switch (kind) { - case DRGN_TYPE_INT: - case DRGN_TYPE_BOOL: - case DRGN_TYPE_FLOAT: - tag = DW_TAG_base_type; - break; - case DRGN_TYPE_STRUCT: - tag = DW_TAG_structure_type; - break; - case DRGN_TYPE_UNION: - tag = DW_TAG_union_type; - break; - case DRGN_TYPE_CLASS: - tag = DW_TAG_class_type; - break; - case DRGN_TYPE_ENUM: - tag = DW_TAG_enumeration_type; - break; - case DRGN_TYPE_TYPEDEF: - tag = DW_TAG_typedef; - break; - default: - UNREACHABLE(); - } - - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, name_len, - &tag, 1); - while (!(err = drgn_dwarf_index_iterator_next(&it, &die, NULL))) { - if (die_matches_filename(&die, filename)) { - err = drgn_type_from_dwarf(dicache, &die, ret); - if (err) - return err; - /* - * For DW_TAG_base_type, we need to check that the type - * we found was the right kind. - */ - if (drgn_type_kind(ret->type) == kind) - return NULL; - } - } - if (err && err->code != DRGN_ERROR_STOP) - return err; - return &drgn_not_found; -} - -static struct drgn_error * -drgn_object_from_dwarf_enumerator(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, const char *name, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - const struct drgn_type_enumerator *enumerators; - size_t num_enumerators, i; - - err = drgn_type_from_dwarf(dicache, die, &qualified_type); - if (err) - return err; - enumerators = drgn_type_enumerators(qualified_type.type); - num_enumerators = drgn_type_num_enumerators(qualified_type.type); - for (i = 0; i < num_enumerators; i++) { - if (strcmp(enumerators[i].name, name) != 0) - continue; - - if (drgn_enum_type_is_signed(qualified_type.type)) { - return drgn_object_set_signed(ret, qualified_type, - enumerators[i].svalue, 0); - } else { - return drgn_object_set_unsigned(ret, qualified_type, - enumerators[i].uvalue, - 0); - } - } - UNREACHABLE(); -} - -static struct drgn_error * -drgn_object_from_dwarf_subprogram(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, uint64_t bias, - const char *name, struct drgn_object *ret) -{ - struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf(dicache, die, - &qualified_type); - if (err) - return err; - Dwarf_Addr low_pc; - if (dwarf_lowpc(die, &low_pc) == -1) { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find address of '%s'", - name); - } - enum drgn_byte_order byte_order; - dwarf_die_byte_order(die, false, &byte_order); - return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, - 0, byte_order); -} - -static struct drgn_error * -drgn_object_from_dwarf_constant(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, - struct drgn_qualified_type qualified_type, - Dwarf_Attribute *attr, struct drgn_object *ret) -{ - struct drgn_object_type type; - enum drgn_object_kind kind; - uint64_t bit_size; - struct drgn_error *err = drgn_object_set_common(qualified_type, 0, - &type, &kind, - &bit_size); - if (err) - return err; - Dwarf_Block block; - if (dwarf_formblock(attr, &block) == 0) { - bool little_endian; - err = dwarf_die_is_little_endian(die, true, &little_endian); - if (err) - return err; - if (block.length < drgn_value_size(bit_size, 0)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_const_value block is too small"); - } - return drgn_object_set_buffer_internal(ret, &type, kind, - bit_size, block.data, 0, - little_endian); - } else if (kind == DRGN_OBJECT_SIGNED) { - Dwarf_Sword svalue; - if (dwarf_formsdata(attr, &svalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - return drgn_object_set_signed_internal(ret, &type, bit_size, - svalue); - } else if (kind == DRGN_OBJECT_UNSIGNED) { - Dwarf_Word uvalue; - if (dwarf_formudata(attr, &uvalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - return drgn_object_set_unsigned_internal(ret, &type, bit_size, - uvalue); - } else { - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_const_value form"); - } -} - -static struct drgn_error * -drgn_object_from_dwarf_variable(struct drgn_dwarf_info_cache *dicache, - Dwarf_Die *die, uint64_t bias, const char *name, - struct drgn_object *ret) -{ - struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf_child(dicache, die, NULL, - "DW_TAG_variable", - true, true, NULL, - &qualified_type); - if (err) - return err; - Dwarf_Attribute attr_mem, *attr; - if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { - Dwarf_Op *loc; - size_t nloc; - if (dwarf_getlocation(attr, &loc, &nloc)) - return drgn_error_libdw(); - if (nloc != 1 || loc[0].atom != DW_OP_addr) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_location has unimplemented operation"); - } - enum drgn_byte_order byte_order; - err = dwarf_die_byte_order(die, true, &byte_order); - if (err) - return err; - return drgn_object_set_reference(ret, qualified_type, - loc[0].number + bias, 0, 0, - byte_order); - } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, - &attr_mem))) { - return drgn_object_from_dwarf_constant(dicache, die, - qualified_type, attr, - ret); - } else { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find address or value of '%s'", - name); - } -} - -struct drgn_error * -drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache = arg; - uint64_t tags[3]; - size_t num_tags; - struct drgn_dwarf_index_iterator it; - Dwarf_Die die; - uint64_t bias; - - num_tags = 0; - if (flags & DRGN_FIND_OBJECT_CONSTANT) - tags[num_tags++] = DW_TAG_enumerator; - if (flags & DRGN_FIND_OBJECT_FUNCTION) - tags[num_tags++] = DW_TAG_subprogram; - if (flags & DRGN_FIND_OBJECT_VARIABLE) - tags[num_tags++] = DW_TAG_variable; - - drgn_dwarf_index_iterator_init(&it, &dicache->dindex, name, - strlen(name), tags, num_tags); - while (!(err = drgn_dwarf_index_iterator_next(&it, &die, &bias))) { - if (!die_matches_filename(&die, filename)) - continue; - switch (dwarf_tag(&die)) { - case DW_TAG_enumeration_type: - return drgn_object_from_dwarf_enumerator(dicache, &die, - name, ret); - case DW_TAG_subprogram: - return drgn_object_from_dwarf_subprogram(dicache, &die, - bias, name, - ret); - case DW_TAG_variable: - return drgn_object_from_dwarf_variable(dicache, &die, - bias, name, ret); - default: - UNREACHABLE(); - } - } - if (err && err->code != DRGN_ERROR_STOP) - return err; - return &drgn_not_found; -} - -struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_type_index *tindex, - const Dwfl_Callbacks *dwfl_callbacks, - struct drgn_dwarf_info_cache **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_info_cache *dicache; - - dicache = malloc(sizeof(*dicache)); - if (!dicache) - return &drgn_enomem; - err = drgn_dwarf_index_init(&dicache->dindex, dwfl_callbacks); - if (err) { - free(dicache); - return err; - } - dwarf_type_map_init(&dicache->map); - dwarf_type_map_init(&dicache->cant_be_incomplete_array_map); - dicache->depth = 0; - dicache->tindex = tindex; - *ret = dicache; - return NULL; -} - -void drgn_dwarf_info_cache_destroy(struct drgn_dwarf_info_cache *dicache) -{ - struct dwarf_type_map_iterator it; - - if (!dicache) - return; - - for (it = dwarf_type_map_first(&dicache->map); it.entry; - it = dwarf_type_map_next(it)) - drgn_dwarf_type_free(&it.entry->value); - /* Arrays don't need to be freed, but typedefs do. */ - for (it = dwarf_type_map_first(&dicache->cant_be_incomplete_array_map); - it.entry; it = dwarf_type_map_next(it)) - drgn_dwarf_type_free(&it.entry->value); - dwarf_type_map_deinit(&dicache->cant_be_incomplete_array_map); - dwarf_type_map_deinit(&dicache->map); - drgn_dwarf_index_deinit(&dicache->dindex); - free(dicache); -} diff --git a/libdrgn/dwarf_info_cache.h b/libdrgn/dwarf_info_cache.h deleted file mode 100644 index 2d8a871c3..000000000 --- a/libdrgn/dwarf_info_cache.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Debugging information cache. - * - * See @ref DWARFInfoCache. - */ - -#ifndef DRGN_DWARF_INFO_CACHE_H -#define DRGN_DWARF_INFO_CACHE_H - -#include "drgn.h" -#include "hash_table.h" - -/** - * @ingroup Internals - * - * @defgroup DWARFInfoCache Debugging information cache - * - * Caching of DWARF debugging information. - * - * @ref drgn_dwarf_info_cache bridges the raw DWARF information indexed by @ref - * drgn_dwarf_index to the higher-level @ref drgn_type_index and @ref - * drgn_object_index. - * - * @{ - */ - -/** Cached type in a @ref drgn_dwarf_info_cache. */ -struct drgn_dwarf_type { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - /** - * Whether this is an incomplete array type or a typedef of one. - * - * This is used to work around a GCC bug; see @ref - * drgn_type_from_dwarf_internal(). - */ - bool is_incomplete_array; - /** Whether we need to free @c type. */ - bool should_free; -}; - -DEFINE_HASH_MAP_TYPE(dwarf_type_map, const void *, struct drgn_dwarf_type); - -struct drgn_dwarf_index; - -/** - * Cache of types and objects from DWARF debugging information. - * - * This is the argument for @ref drgn_dwarf_type_find() and @ref - * drgn_dwarf_object_find(). - */ -struct drgn_dwarf_info_cache { - /** Index of DWARF debugging information. */ - struct drgn_dwarf_index dindex; - /** - * Cache of parsed types. - * - * The key is the address of the DIE (@c Dwarf_Die::addr). The value is - * a @ref drgn_dwarf_type. - */ - struct dwarf_type_map map; - /** - * Cache of parsed types which appear to be incomplete array types but - * can't be. - * - * See @ref drgn_type_from_dwarf_internal(). - */ - struct dwarf_type_map cant_be_incomplete_array_map; - /** Current parsing recursion depth. */ - int depth; - /** Type index. */ - struct drgn_type_index *tindex; -}; - -/** Create a @ref drgn_dwarf_info_cache. */ -struct drgn_error * -drgn_dwarf_info_cache_create(struct drgn_type_index *tindex, - const Dwfl_Callbacks *dwfl_callbacks, - struct drgn_dwarf_info_cache **ret); - -/** Destroy a @ref drgn_dwarf_info_cache. */ -void drgn_dwarf_info_cache_destroy(struct drgn_dwarf_info_cache *dicache); - -/** @ref drgn_type_find_fn() that uses DWARF debugging information. */ -struct drgn_error *drgn_dwarf_type_find(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret); - -/** @ref drgn_object_find_fn() that uses DWARF debugging information. */ -struct drgn_error * -drgn_dwarf_object_find(const char *name, size_t name_len, const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret); - -/** @} */ - -#endif /* DRGN_DWARF_INFO_CACHE_H */ diff --git a/libdrgn/error.c b/libdrgn/error.c index bba0e5432..3a266492b 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -1,8 +1,9 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include #include +#include #include #include #include @@ -10,8 +11,10 @@ #include #include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "string_builder.h" +#include "util.h" LIBDRGN_PUBLIC struct drgn_error drgn_enomem = { .code = DRGN_ERROR_NO_MEMORY, @@ -162,6 +165,39 @@ struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, return drgn_error_create_nodup(code, message); } +LIBDRGN_PUBLIC struct drgn_error *drgn_error_copy(struct drgn_error *src) +{ + if (!src->needs_destroy) + return src; + struct drgn_error *dst = malloc(sizeof(*dst)); + if (!dst) + return &drgn_enomem; + dst->code = src->code; + dst->needs_destroy = true; + dst->errnum = src->errnum; + if (src->path) { + dst->path = strdup(src->path); + if (!dst->path) { + free(dst); + return &drgn_enomem; + } + } else { + dst->path = NULL; + } + dst->address = src->address; + if (src->message) { + dst->message = strdup(src->message); + if (!dst->message) { + free(dst->path); + free(dst); + return &drgn_enomem; + } + } else { + dst->message = NULL; + } + return dst; +} + bool string_builder_append_error(struct string_builder *sb, struct drgn_error *err) { diff --git a/libdrgn/error.h b/libdrgn/error.h index fe44b6665..4ef8adb13 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -12,8 +12,6 @@ #ifndef DRGN_ERROR_H #define DRGN_ERROR_H -#include - #include "drgn.h" /** diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 732232796..97961c1ea 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -13,7 +13,7 @@ #define DRGN_HASH_TABLE_H #ifdef __SSE2__ -#include +#include // IWYU pragma: keep #endif #ifdef __SSE4_2__ #include diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index e287d19e4..2251e12a7 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -13,6 +13,12 @@ #ifndef DRGN_HELPERS_H #define DRGN_HELPERS_H +#include +#include + +struct drgn_object; +struct drgn_program; + struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count); @@ -37,7 +43,4 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, const struct drgn_object *ns, uint64_t pid); -struct drgn_error * -linux_helper_task_state_to_char(const struct drgn_object *task, char *ret); - #endif /* DRGN_HELPERS_H */ diff --git a/libdrgn/internal.c b/libdrgn/internal.c deleted file mode 100644 index 4c2e42aac..000000000 --- a/libdrgn/internal.c +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "internal.h" - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) -{ - struct drgn_error *err; - - *fd_ret = open(path, O_RDONLY); - if (*fd_ret == -1) - return drgn_error_create_os("open", errno, path); - *elf_ret = dwelf_elf_begin(*fd_ret); - if (!*elf_ret) { - err = drgn_error_libelf(); - goto err_fd; - } - if (elf_kind(*elf_ret) != ELF_K_ELF) { - err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); - goto err_elf; - } - return NULL; - -err_elf: - elf_end(*elf_ret); -err_fd: - close(*fd_ret); - return err; -} - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...) -{ - struct drgn_error *err; - size_t i; - - for (i = 0; path_formats[i]; i++) { - va_list ap; - int ret; - char *path; - int fd; - Elf *elf; - - va_start(ap, path_formats); - ret = vasprintf(&path, path_formats[i], ap); - va_end(ap); - if (ret == -1) - return &drgn_enomem; - fd = open(path, O_RDONLY); - if (fd == -1) { - free(path); - continue; - } - elf = dwelf_elf_begin(fd); - if (!elf) { - close(fd); - free(path); - continue; - } - if (elf_kind(elf) != ELF_K_ELF) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: not an ELF file", path); - elf_end(elf); - close(fd); - free(path); - return err; - } - *path_ret = path; - *fd_ret = fd; - *elf_ret = elf; - return NULL; - } - *path_ret = NULL; - *fd_ret = -1; - *elf_ret = NULL; - return NULL; -} - -struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) -{ - GElf_Shdr shdr_mem, *shdr; - Elf_Data *data; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - if ((shdr->sh_flags & SHF_COMPRESSED) && elf_compress(scn, 0, 0) < 0) - return drgn_error_libelf(); - data = elf_getdata(scn, NULL); - if (!data) - return drgn_error_libelf(); - *ret = data; - return NULL; -} - -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret) -{ - uint64_t start = UINT64_MAX, end = 0; - size_t phnum, i; - - /* - * Get the minimum and maximum addresses from the PT_LOAD segments. We - * ignore memory ranges that start beyond UINT64_MAX, and we truncate - * ranges that end beyond UINT64_MAX. - */ - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr; - uint64_t segment_start, segment_end; - - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) - continue; - if (__builtin_add_overflow(phdr->p_vaddr, bias, - &segment_start)) - continue; - if (__builtin_add_overflow(segment_start, phdr->p_memsz, - &segment_end)) - segment_end = UINT64_MAX; - if (segment_start < segment_end) { - if (segment_start < start) - start = segment_start; - if (segment_end > end) - end = segment_end; - } - } - if (start >= end) { - return drgn_error_create(DRGN_ERROR_OTHER, - "ELF file has no loadable segments"); - } - *start_ret = start; - *end_ret = end; - return NULL; -} diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index b352f5e9b..2155bc34a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -1,13 +1,12 @@ // Copyright 2019 - Serapheim Dimitropoulos // SPDX-License-Identifier: GPL-3.0+ -#include +#include #include -#include #include #include "linux_kernel.h" -#include "program.h" +#include "program.h" // IWYU pragma: associated static struct drgn_error *drgn_platform_from_kdump(kdump_ctx_t *ctx, struct drgn_platform *ret) diff --git a/libdrgn/language.c b/libdrgn/language.c index e6219cd2d..bfbd86461 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -1,19 +1,14 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "internal.h" +#include + +#include "error.h" #include "language.h" const struct drgn_language drgn_languages[] = { [DRGN_LANGUAGE_C] = { .name = "C", - .void_type = { - { - .kind = DRGN_TYPE_VOID, - .primitive = DRGN_C_TYPE_VOID, - .language = &drgn_language_c, - }, - }, .format_type_name = c_format_type_name, .format_type = c_format_type, .format_object = c_format_object, @@ -41,13 +36,6 @@ const struct drgn_language drgn_languages[] = { }, [DRGN_LANGUAGE_CPP] = { .name = "C++", - .void_type = { - { - .kind = DRGN_TYPE_VOID, - .primitive = DRGN_C_TYPE_VOID, - .language = &drgn_language_cpp, - }, - }, .format_type_name = c_format_type_name, .format_type = c_format_type, .format_object = c_format_object, diff --git a/libdrgn/language.h b/libdrgn/language.h index 9d129dcc1..81280d42b 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -12,8 +12,9 @@ #ifndef DRGN_LANGUAGE_H #define DRGN_LANGUAGE_H +#include + #include "drgn.h" -#include /** * @ingroup Internals @@ -28,15 +29,13 @@ * @{ */ -struct drgn_type_index; - typedef struct drgn_error *drgn_format_type_fn(struct drgn_qualified_type, char **); typedef struct drgn_error *drgn_format_object_fn(const struct drgn_object *, size_t, enum drgn_format_object_flags, char **); -typedef struct drgn_error *drgn_find_type_fn(struct drgn_type_index *tindex, +typedef struct drgn_error *drgn_find_type_fn(struct drgn_program *prog, const char *name, const char *filename, struct drgn_qualified_type *ret); @@ -69,8 +68,6 @@ typedef struct drgn_error *drgn_cmp_op(const struct drgn_object *lhs, struct drgn_language { /** Name of this programming language. */ const char *name; - /** Void type for this language. See @ref drgn_void_type(). */ - struct drgn_type void_type; /** Implement @ref drgn_format_type_name(). */ drgn_format_type_fn *format_type_name; /** Implement @ref drgn_format_type(). */ @@ -78,10 +75,10 @@ struct drgn_language { /** Implement @ref drgn_format_object(). */ drgn_format_object_fn *format_object; /** - * Implement @ref drgn_type_index_find(). + * Implement @ref drgn_program_find_type(). * * This should parse @p name and call @ref - * drgn_type_index_find_parsed(). + * drgn_program_find_type_impl(). */ drgn_find_type_fn *find_type; /** @@ -164,8 +161,8 @@ enum { extern const struct drgn_language drgn_languages[DRGN_NUM_LANGUAGES]; -#define drgn_language_cpp drgn_languages[DRGN_LANGUAGE_CPP] #define drgn_language_c drgn_languages[DRGN_LANGUAGE_C] +#define drgn_language_cpp drgn_languages[DRGN_LANGUAGE_CPP] /** * Return flags that should be passed through when formatting an object diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index df4f6800a..48f09eb77 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1,16 +1,18 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include #include #include #include #include #include +#include -#include "internal.h" +#include "error.h" #include "hash_table.h" -#include "language.h" +#include "language.h" // IWYU pragma: associated #include "lexer.h" #include "memory_reader.h" #include "object.h" @@ -18,7 +20,8 @@ #include "string_builder.h" #include "symbol.h" #include "type.h" -#include "type_index.h" +#include "util.h" +#include "vector.h" static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, @@ -1160,7 +1163,7 @@ c_format_compound_object(const struct drgn_object *obj, new->member < new->end) { struct drgn_object member; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); do { struct drgn_qualified_type member_type; bool zero; @@ -1188,7 +1191,7 @@ c_format_compound_object(const struct drgn_object *obj, return err; } - err = c_format_initializer(obj->prog, &iter.iter, indent, + err = c_format_initializer(drgn_object_program(obj), &iter.iter, indent, one_line_columns, multi_line_columns, flags & DRGN_FORMAT_OBJECT_MEMBERS_SAME_LINE, sb); @@ -1292,7 +1295,7 @@ c_format_pointer_object(const struct drgn_object *obj, return err; have_symbol = ((flags & DRGN_FORMAT_OBJECT_SYMBOLIZE) && - drgn_program_find_symbol_by_address_internal(obj->prog, + drgn_program_find_symbol_by_address_internal(drgn_object_program(obj), uvalue, NULL, &sym)); @@ -1317,12 +1320,12 @@ c_format_pointer_object(const struct drgn_object *obj, return &drgn_enomem; if (c_string) { - err = c_format_string(&obj->prog->reader, uvalue, UINT64_MAX, - sb); + err = c_format_string(&drgn_object_program(obj)->reader, uvalue, + UINT64_MAX, sb); } else { struct drgn_object dereferenced; - drgn_object_init(&dereferenced, obj->prog); + drgn_object_init(&dereferenced, drgn_object_program(obj)); err = drgn_object_dereference(&dereferenced, obj); if (err) { drgn_object_deinit(&dereferenced); @@ -1452,7 +1455,7 @@ c_format_array_object(const struct drgn_object *obj, if ((flags & DRGN_FORMAT_OBJECT_STRING) && iter.length && is_character_type(iter.element_type.type)) { if (obj->is_reference) { - return c_format_string(&obj->prog->reader, + return c_format_string(&drgn_object_program(obj)->reader, obj->reference.address, iter.length, sb); } else { @@ -1492,7 +1495,7 @@ c_format_array_object(const struct drgn_object *obj, iter.length) { struct drgn_object element; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); do { bool zero; @@ -1516,8 +1519,9 @@ c_format_array_object(const struct drgn_object *obj, if (err) return err; } - return c_format_initializer(obj->prog, &iter.iter, indent, - one_line_columns, multi_line_columns, + return c_format_initializer(drgn_object_program(obj), &iter.iter, + indent, one_line_columns, + multi_line_columns, flags & DRGN_FORMAT_OBJECT_ELEMENTS_SAME_LINE, sb); } @@ -2111,7 +2115,7 @@ enum drgn_primitive_type c_parse_specifier_list(const char *s) } static struct drgn_error * -c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, +c_parse_specifier_qualifier_list(struct drgn_program *prog, struct drgn_lexer *lexer, const char *filename, struct drgn_qualified_type *ret) { @@ -2206,17 +2210,17 @@ c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, kind = DRGN_TYPE_ENUM; } else if (identifier) { if (strstartswith(identifier, "size_t")) { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_SIZE_T, - &ret->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_SIZE_T, + &ret->type); if (err) return err; ret->qualifiers = 0; goto out; } else if (strstartswith(identifier, "ptrdiff_t")) { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_PTRDIFF_T, - &ret->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_PTRDIFF_T, + &ret->type); if (err) return err; ret->qualifiers = 0; @@ -2229,15 +2233,15 @@ c_parse_specifier_qualifier_list(struct drgn_type_index *tindex, "expected type specifier"); } - err = drgn_type_index_find_parsed(tindex, kind, identifier, + err = drgn_program_find_type_impl(prog, kind, identifier, identifier_len, filename, ret); if (err) return err; } else { - err = drgn_type_index_find_primitive(tindex, - specifier_kind[specifier], - &ret->type); + err = drgn_program_find_primitive_type(prog, + specifier_kind[specifier], + &ret->type); if (err) return err; ret->qualifiers = 0; @@ -2259,7 +2263,7 @@ struct c_declarator { /* These functions don't free the declarator list on error. */ static struct drgn_error * -c_parse_abstract_declarator(struct drgn_type_index *tindex, +c_parse_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner); @@ -2289,7 +2293,7 @@ c_parse_optional_type_qualifier_list(struct drgn_lexer *lexer, } static struct drgn_error * -c_parse_pointer(struct drgn_type_index *tindex, struct drgn_lexer *lexer, +c_parse_pointer(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) { struct drgn_error *err; @@ -2329,7 +2333,7 @@ c_parse_pointer(struct drgn_type_index *tindex, struct drgn_lexer *lexer, } static struct drgn_error * -c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, +c_parse_direct_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) @@ -2351,7 +2355,7 @@ c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, if (token2.kind == C_TOKEN_ASTERISK || token2.kind == C_TOKEN_LPAREN || token2.kind == C_TOKEN_LBRACKET) { - err = c_parse_abstract_declarator(tindex, lexer, outer, + err = c_parse_abstract_declarator(prog, lexer, outer, inner); if (err) return err; @@ -2431,7 +2435,7 @@ c_parse_direct_abstract_declarator(struct drgn_type_index *tindex, } static struct drgn_error * -c_parse_abstract_declarator(struct drgn_type_index *tindex, +c_parse_abstract_declarator(struct drgn_program *prog, struct drgn_lexer *lexer, struct c_declarator **outer, struct c_declarator **inner) @@ -2443,7 +2447,7 @@ c_parse_abstract_declarator(struct drgn_type_index *tindex, if (err) return err; if (token.kind == C_TOKEN_ASTERISK) { - err = c_parse_pointer(tindex, lexer, outer, inner); + err = c_parse_pointer(prog, lexer, outer, inner); if (err) return err; @@ -2452,21 +2456,21 @@ c_parse_abstract_declarator(struct drgn_type_index *tindex, token.kind == C_TOKEN_LBRACKET) { struct c_declarator *tmp; - err = c_parse_direct_abstract_declarator(tindex, lexer, + err = c_parse_direct_abstract_declarator(prog, lexer, outer, &tmp); if (err) return err; } return NULL; } else { - return c_parse_direct_abstract_declarator(tindex, lexer, outer, + return c_parse_direct_abstract_declarator(prog, lexer, outer, inner); } } /* This always frees the declarator list regardless of success or failure. */ static struct drgn_error * -c_type_from_declarator(struct drgn_type_index *tindex, +c_type_from_declarator(struct drgn_program *prog, struct c_declarator *declarator, struct drgn_qualified_type *ret) { @@ -2475,21 +2479,28 @@ c_type_from_declarator(struct drgn_type_index *tindex, if (!declarator) return NULL; - err = c_type_from_declarator(tindex, declarator->next, ret); + err = c_type_from_declarator(prog, declarator->next, ret); if (err) { free(declarator); return err; } if (declarator->kind == C_TOKEN_ASTERISK) { - err = drgn_type_index_pointer_type(tindex, *ret, NULL, - &ret->type); + uint8_t word_size; + err = drgn_program_word_size(prog, &word_size); + if (!err) { + err = drgn_pointer_type_create(prog, *ret, word_size, + drgn_type_language(ret->type), + &ret->type); + } } else if (declarator->is_complete) { - err = drgn_type_index_array_type(tindex, declarator->length, - *ret, NULL, &ret->type); + err = drgn_array_type_create(prog, *ret, declarator->length, + drgn_type_language(ret->type), + &ret->type); } else { - err = drgn_type_index_incomplete_array_type(tindex, *ret, NULL, - &ret->type); + err = drgn_incomplete_array_type_create(prog, *ret, + drgn_type_language(ret->type), + &ret->type); } if (!err) @@ -2498,7 +2509,7 @@ c_type_from_declarator(struct drgn_type_index *tindex, return err; } -struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, +struct drgn_error *c_find_type(struct drgn_program *prog, const char *name, const char *filename, struct drgn_qualified_type *ret) { @@ -2508,7 +2519,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, drgn_lexer_init(&lexer, drgn_lexer_c, name); - err = c_parse_specifier_qualifier_list(tindex, &lexer, filename, ret); + err = c_parse_specifier_qualifier_list(prog, &lexer, filename, ret); if (err) goto out; @@ -2522,8 +2533,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, if (err) return err; - err = c_parse_abstract_declarator(tindex, &lexer, &outer, - &inner); + err = c_parse_abstract_declarator(prog, &lexer, &outer, &inner); if (err) { while (outer) { struct c_declarator *next; @@ -2535,7 +2545,7 @@ struct drgn_error *c_find_type(struct drgn_type_index *tindex, const char *name, goto out; } - err = c_type_from_declarator(tindex, outer, ret); + err = c_type_from_declarator(prog, outer, ret); if (err) goto out; @@ -2580,11 +2590,10 @@ struct drgn_error *c_bit_offset(struct drgn_program *prog, struct drgn_member_value *member; struct drgn_qualified_type member_type; - err = drgn_type_index_find_member(&prog->tindex, - type, - token.value, - token.len, - &member); + err = drgn_program_find_member(prog, type, + token.value, + token.len, + &member); if (err) goto out; if (__builtin_add_overflow(bit_offset, @@ -2703,9 +2712,9 @@ struct drgn_error *c_integer_literal(struct drgn_object *res, uint64_t uvalue) bits = fls(uvalue); qualified_type.qualifiers = 0; for (i = 0; i < ARRAY_SIZE(types); i++) { - err = drgn_type_index_find_primitive(&res->prog->tindex, - types[i], - &qualified_type.type); + err = drgn_program_find_primitive_type(drgn_object_program(res), + types[i], + &qualified_type.type); if (err) return err; @@ -2729,9 +2738,9 @@ struct drgn_error *c_bool_literal(struct drgn_object *res, bool bvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_type_index_find_primitive(&res->prog->tindex, - DRGN_C_TYPE_INT, - &qualified_type.type); + err = drgn_program_find_primitive_type(drgn_object_program(res), + DRGN_C_TYPE_INT, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -2743,9 +2752,9 @@ struct drgn_error *c_float_literal(struct drgn_object *res, double fvalue) struct drgn_error *err; struct drgn_qualified_type qualified_type; - err = drgn_type_index_find_primitive(&res->prog->tindex, - DRGN_C_TYPE_DOUBLE, - &qualified_type.type); + err = drgn_program_find_primitive_type(drgn_object_program(res), + DRGN_C_TYPE_DOUBLE, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -2800,7 +2809,7 @@ static bool c_can_represent_all_values(struct drgn_type *type1, return false; } -static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, +static struct drgn_error *c_integer_promotions(struct drgn_program *prog, struct drgn_object_type *type) { struct drgn_error *err; @@ -2850,8 +2859,8 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, */ if (primitive >= ARRAY_SIZE(c_integer_conversion_rank) || type->bit_field_size) { - err = drgn_type_index_find_primitive(tindex, DRGN_C_TYPE_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, @@ -2862,9 +2871,9 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, return NULL; } - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, @@ -2886,16 +2895,16 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, * If int can represent all values of the original type, then the result * is int. Otherwise, the result is unsigned int. */ - err = drgn_type_index_find_primitive(tindex, DRGN_C_TYPE_INT, - &int_type); + err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, + &int_type); if (err) return err; if (c_can_represent_all_values(int_type, 0, type->underlying_type, 0)) { type->type = int_type; } else { - err = drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - &type->type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + &type->type); if (err) return err; } @@ -2904,7 +2913,7 @@ static struct drgn_error *c_integer_promotions(struct drgn_type_index *tindex, } static struct drgn_error * -c_corresponding_unsigned_type(struct drgn_type_index *tindex, +c_corresponding_unsigned_type(struct drgn_program *prog, enum drgn_primitive_type type, struct drgn_type **ret) { @@ -2914,23 +2923,23 @@ c_corresponding_unsigned_type(struct drgn_type_index *tindex, * handle them here. */ case DRGN_C_TYPE_INT: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_INT, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_INT, + ret); case DRGN_C_TYPE_LONG: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + ret); case DRGN_C_TYPE_LONG_LONG: - return drgn_type_index_find_primitive(tindex, - DRGN_C_TYPE_UNSIGNED_LONG_LONG, - ret); + return drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + ret); default: UNREACHABLE(); } } -static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, +static struct drgn_error *c_common_real_type(struct drgn_program *prog, struct drgn_object_type *type1, struct drgn_object_type *type2, struct drgn_object_type *ret) @@ -2980,10 +2989,10 @@ static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, * Otherwise, the integer promotions are performed before applying the * following rules. */ - err = c_integer_promotions(tindex, type1); + err = c_integer_promotions(prog, type1); if (err) return err; - err = c_integer_promotions(tindex, type2); + err = c_integer_promotions(prog, type2); if (err) return err; @@ -3103,7 +3112,7 @@ static struct drgn_error *c_common_real_type(struct drgn_type_index *tindex, * rank, then it must have greater size and thus be able to represent * all values of the unsigned integer type. */ - err = c_corresponding_unsigned_type(tindex, + err = c_corresponding_unsigned_type(prog, is_signed1 ? primitive1 : primitive2, &ret->type); if (err) @@ -3129,25 +3138,36 @@ static struct drgn_error *c_operand_type(const struct drgn_object *obj, *type_ret = drgn_object_type(obj); switch (drgn_type_kind(type_ret->underlying_type)) { - case DRGN_TYPE_ARRAY: - err = drgn_type_index_pointer_type(&obj->prog->tindex, - drgn_type_type(type_ret->underlying_type), - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + case DRGN_TYPE_ARRAY: { + uint8_t word_size; + err = drgn_program_word_size(drgn_object_program(obj), + &word_size); + if (err) + return err; + err = drgn_pointer_type_create(drgn_object_program(obj), + drgn_type_type(type_ret->underlying_type), + word_size, + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; break; + } case DRGN_TYPE_FUNCTION: { struct drgn_qualified_type function_type = { .type = type_ret->underlying_type, .qualifiers = type_ret->qualifiers, }; - - err = drgn_type_index_pointer_type(&obj->prog->tindex, - function_type, - drgn_type_language(type_ret->underlying_type), - &type_ret->type); + uint8_t word_size; + err = drgn_program_word_size(drgn_object_program(obj), + &word_size); + if (err) + return err; + err = drgn_pointer_type_create(drgn_object_program(obj), + function_type, word_size, + drgn_type_language(type_ret->underlying_type), + &type_ret->type); if (err) return err; type_ret->underlying_type = type_ret->type; @@ -3257,7 +3277,7 @@ struct drgn_error *c_op_cmp(const struct drgn_object *lhs, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, &rhs_type, &type); if (err) return err; @@ -3299,7 +3319,7 @@ struct drgn_error *c_op_add(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, &rhs_type, &type); if (err) return err; @@ -3330,9 +3350,9 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (lhs_pointer && rhs_pointer) { struct drgn_object_type type = {}; - err = drgn_type_index_find_primitive(&lhs->prog->tindex, - DRGN_C_TYPE_PTRDIFF_T, - &type.type); + err = drgn_program_find_primitive_type(drgn_object_program(lhs), + DRGN_C_TYPE_PTRDIFF_T, + &type.type); if (err) return err; type.underlying_type = drgn_underlying_type(type.type); @@ -3351,7 +3371,7 @@ struct drgn_error *c_op_sub(struct drgn_object *res, if (!drgn_type_is_arithmetic(lhs_type.underlying_type) || !drgn_type_is_arithmetic(rhs_type.underlying_type)) goto type_error; - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, &rhs_type, &type); if (err) return err; @@ -3382,8 +3402,8 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary "#op, &lhs_type, \ &rhs_type); \ \ - err = c_common_real_type(&lhs->prog->tindex, &lhs_type, &rhs_type, \ - &type); \ + err = c_common_real_type(drgn_object_program(lhs), &lhs_type, \ + &rhs_type, &type); \ if (err) \ return err; \ \ @@ -3416,10 +3436,10 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ return drgn_error_binary_op("binary " #op, &lhs_type, \ &rhs_type); \ \ - err = c_integer_promotions(&lhs->prog->tindex, &lhs_type); \ + err = c_integer_promotions(drgn_object_program(lhs), &lhs_type); \ if (err) \ return err; \ - err = c_integer_promotions(&lhs->prog->tindex, &rhs_type); \ + err = c_integer_promotions(drgn_object_program(lhs), &rhs_type); \ if (err) \ return err; \ \ @@ -3442,7 +3462,7 @@ struct drgn_error *c_op_##op_name(struct drgn_object *res, \ if (!drgn_type_is_##check(type.underlying_type)) \ return drgn_error_unary_op("unary " #op, &type); \ \ - err = c_integer_promotions(&obj->prog->tindex, &type); \ + err = c_integer_promotions(drgn_object_program(obj), &type); \ if (err) \ return err; \ \ diff --git a/libdrgn/lexer.c b/libdrgn/lexer.c index 426f72098..1463b1f3e 100644 --- a/libdrgn/lexer.c +++ b/libdrgn/lexer.c @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "internal.h" +#include "drgn.h" #include "lexer.h" DEFINE_VECTOR_FUNCTIONS(drgn_token_vector) diff --git a/libdrgn/lexer.h b/libdrgn/lexer.h index 62511e8a8..37910d6f8 100644 --- a/libdrgn/lexer.h +++ b/libdrgn/lexer.h @@ -30,7 +30,6 @@ * @{ */ -struct drgn_error; struct drgn_lexer; struct drgn_token; @@ -126,6 +125,10 @@ struct drgn_error *drgn_lexer_push(struct drgn_lexer *lexer, struct drgn_error *drgn_lexer_peek(struct drgn_lexer *lexer, struct drgn_token *token); +/* Exported only for testing. */ +struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, + struct drgn_token *token); + /** @} */ #endif /* DRGN_LEXER_H */ diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 964f541a3..3220259e0 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -2,6 +2,10 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include +#include +#include #include #include #include @@ -10,14 +14,20 @@ #include #include #include -#include -#include "internal.h" -#include "dwarf_index.h" +#include "debug_info.h" +#include "drgn.h" +#include "error.h" +#include "hash_table.h" #include "helpers.h" +#include "language.h" #include "linux_kernel.h" +#include "memory_reader.h" +#include "mread.h" +#include "platform.h" #include "program.h" -#include "read.h" +#include "type.h" +#include "util.h" struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, size_t count, uint64_t offset, @@ -286,18 +296,18 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, } } - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, prog->page_offset, 0); } else if (name_len == strlen("PAGE_SHIFT") && memcmp(name, "PAGE_SHIFT", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_INT, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_INT, + &qualified_type.type); if (err) return err; return drgn_object_set_signed(ret, qualified_type, @@ -305,9 +315,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, 0); } else if (name_len == strlen("PAGE_SIZE") && memcmp(name, "PAGE_SIZE", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -315,9 +325,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, 0); } else if (name_len == strlen("PAGE_MASK") && memcmp(name, "PAGE_MASK", name_len) == 0) { - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -330,9 +340,9 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, err = linux_kernel_get_thread_size(prog, &thread_size); if (err) return err; - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_UNSIGNED_LONG, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_UNSIGNED_LONG, + &qualified_type.type); if (err) return err; return drgn_object_set_unsigned(ret, qualified_type, @@ -341,16 +351,16 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, memcmp(name, "UTS_RELEASE", name_len) == 0) { size_t len; - err = drgn_type_index_find_primitive(&prog->tindex, - DRGN_C_TYPE_CHAR, - &qualified_type.type); + err = drgn_program_find_primitive_type(prog, + DRGN_C_TYPE_CHAR, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = DRGN_QUALIFIER_CONST; len = strlen(prog->vmcoreinfo.osrelease); - err = drgn_type_index_array_type(&prog->tindex, len + 1, - qualified_type, NULL, - &qualified_type.type); + err = drgn_array_type_create(prog, qualified_type, + len + 1, &drgn_language_c, + &qualified_type.type); if (err) return err; qualified_type.qualifiers = 0; @@ -753,12 +763,10 @@ struct kmod_index { static struct drgn_error *kmod_index_validate(struct kmod_index *index, const char *path) { - const char *ptr; + const char *ptr = index->ptr; uint32_t magic, version; - - ptr = index->ptr; - if (!read_be32(&ptr, index->end, &magic) || - !read_be32(&ptr, index->end, &version)) { + if (!mread_be32(&ptr, index->end, &magic) || + !mread_be32(&ptr, index->end, &version)) { return drgn_error_format(DRGN_ERROR_OTHER, "%s is too short", path); } @@ -824,20 +832,21 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); + + /* kmod_index_validate() already checked that this is within bounds. */ const char *ptr = index->ptr + 8; uint32_t offset; - for (;;) { - if (!read_be32(&ptr, index->end, &offset)) + if (!mread_be32(&ptr, index->end, &offset) || + !(ptr = mread_begin(index->ptr, index->end, + offset & INDEX_NODE_MASK))) return NULL; - ptr = index->ptr + (offset & INDEX_NODE_MASK); if (offset & INDEX_NODE_PREFIX) { const char *prefix; size_t prefix_len; - - if (!read_string(&ptr, index->end, &prefix, - &prefix_len)) + if (!mread_string(&ptr, index->end, &prefix, + &prefix_len)) return NULL; if (strncmp(key, prefix, prefix_len) != 0) return NULL; @@ -846,20 +855,21 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) if (offset & INDEX_NODE_CHILDS) { uint8_t first, last; - - if (!read_u8(&ptr, index->end, &first) || - !read_u8(&ptr, index->end, &last)) + if (!mread_u8(&ptr, index->end, &first) || + !mread_u8(&ptr, index->end, &last)) return NULL; if (*key) { uint8_t cur = *key; - - if (cur < first || cur > last) + if (cur < first || cur > last || + !mread_skip(&ptr, index->end, + 4 * (cur - first))) return NULL; - ptr += 4 * (cur - first); key++; continue; } else { - ptr += 4 * (last - first + 1); + if (!mread_skip(&ptr, index->end, + 4 * (last - first + 1))) + return NULL; break; } } else if (*key) { @@ -904,27 +914,24 @@ static void depmod_index_deinit(struct depmod_index *depmod) static bool depmod_index_find(struct depmod_index *depmod, const char *name, const char **path_ret, size_t *len_ret) { - const char *ptr; - uint32_t value_count; - const char *deps; - size_t deps_len; - char *colon; - - ptr = kmod_index_find(&depmod->modules_dep, name); + const char *ptr = kmod_index_find(&depmod->modules_dep, name); if (!ptr) return false; - if (!read_be32(&ptr, depmod->modules_dep.end, &value_count) || + uint32_t value_count; + if (!mread_be32(&ptr, depmod->modules_dep.end, &value_count) || !value_count) return false; /* Skip over priority. */ - ptr += 4; - if (!read_string(&ptr, depmod->modules_dep.end, &deps, + const char *deps; + size_t deps_len; + if (!mread_skip(&ptr, depmod->modules_dep.end, 4) || + !mread_string(&ptr, depmod->modules_dep.end, &deps, &deps_len)) return false; - colon = strchr(deps, ':'); + const char *colon = strchr(deps, ':'); if (!colon) return false; @@ -1158,42 +1165,37 @@ DEFINE_HASH_TABLE(kernel_module_table, struct kernel_module_file *, kernel_module_table_key, c_string_hash, c_string_eq) static struct drgn_error * -report_loaded_kernel_module(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_loaded_kernel_module(struct drgn_debug_info_load_state *load, struct kernel_module_iterator *kmod_it, struct kernel_module_table *kmod_table) { struct drgn_error *err; - const char *name = kmod_it->name; - struct hash_pair hp; - struct kernel_module_table_iterator it; - struct kernel_module_file *kmod; - hp = kernel_module_table_hash(&name); - it = kernel_module_table_search_hashed(kmod_table, &name, hp); + const char *name = kmod_it->name; + struct hash_pair hp = kernel_module_table_hash(&name); + struct kernel_module_table_iterator it = + kernel_module_table_search_hashed(kmod_table, &name, hp); if (!it.entry) return &drgn_not_found; - kmod = *it.entry; + struct kernel_module_file *kmod = *it.entry; kernel_module_table_delete_iterator_hashed(kmod_table, it, hp); do { uint64_t start, end; - err = cache_kernel_module_sections(kmod_it, kmod->elf, &start, &end); if (err) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - "could not get section addresses", - err); + err = drgn_debug_info_report_error(load, kmod->path, + "could not get section addresses", + err); if (err) return err; continue; } - err = drgn_dwarf_index_report_elf(dindex, kmod->path, kmod->fd, - kmod->elf, start, end, - kmod->name, NULL); + err = drgn_debug_info_report_elf(load, kmod->path, kmod->fd, + kmod->elf, start, end, + kmod->name, NULL); kmod->elf = NULL; kmod->fd = -1; if (err) @@ -1204,8 +1206,7 @@ report_loaded_kernel_module(struct drgn_program *prog, } static struct drgn_error * -report_default_kernel_module(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_default_kernel_module(struct drgn_debug_info_load_state *load, struct kernel_module_iterator *kmod_it, struct depmod_index *depmod) { @@ -1216,71 +1217,71 @@ report_default_kernel_module(struct drgn_program *prog, NULL, }; struct drgn_error *err; + const char *depmod_path; size_t depmod_path_len; - size_t extension_len; - char *path; - int fd; - Elf *elf; - uint64_t start, end; - if (!depmod_index_find(depmod, kmod_it->name, &depmod_path, &depmod_path_len)) { - return drgn_dwarf_index_report_error(dindex, kmod_it->name, - "could not find module in depmod", - NULL); + return drgn_debug_info_report_error(load, kmod_it->name, + "could not find module in depmod", + NULL); } + size_t extension_len; if (depmod_path_len >= 3 && (memcmp(depmod_path + depmod_path_len - 3, ".gz", 3) == 0 || memcmp(depmod_path + depmod_path_len - 3, ".xz", 3) == 0)) extension_len = 3; else extension_len = 0; + char *path; + int fd; + Elf *elf; err = find_elf_file(&path, &fd, &elf, module_paths, - prog->vmcoreinfo.osrelease, + load->dbinfo->prog->vmcoreinfo.osrelease, depmod_path_len - extension_len, depmod_path, extension_len, depmod_path + depmod_path_len - extension_len); if (err) - return drgn_dwarf_index_report_error(dindex, NULL, NULL, err); + return drgn_debug_info_report_error(load, NULL, NULL, err); if (!elf) { - return drgn_dwarf_index_report_error(dindex, kmod_it->name, - "could not find .ko", - NULL); + return drgn_debug_info_report_error(load, kmod_it->name, + "could not find .ko", + NULL); } + uint64_t start, end; err = cache_kernel_module_sections(kmod_it, elf, &start, &end); if (err) { elf_end(elf); close(fd); free(path); - return drgn_dwarf_index_report_error(dindex, path, - "could not get section addresses", - err); + return drgn_debug_info_report_error(load, path, + "could not get section addresses", + err); } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, start, end, - kmod_it->name, NULL); + err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, + kmod_it->name, NULL); free(path); return err; } static struct drgn_error * -report_loaded_kernel_modules(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_loaded_kernel_modules(struct drgn_debug_info_load_state *load, struct kernel_module_table *kmod_table, struct depmod_index *depmod) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - struct kernel_module_iterator kmod_it; + struct kernel_module_iterator kmod_it; err = kernel_module_iterator_init(&kmod_it, prog); if (err) { kernel_module_iterator_error: - return drgn_dwarf_index_report_error(dindex, "kernel modules", - "could not find loaded kernel modules", - err); + return drgn_debug_info_report_error(load, "kernel modules", + "could not find loaded kernel modules", + err); } for (;;) { err = kernel_module_iterator_next(&kmod_it); @@ -1294,8 +1295,8 @@ report_loaded_kernel_modules(struct drgn_program *prog, /* Look for an explicitly-reported file first. */ if (kmod_table) { - err = report_loaded_kernel_module(prog, dindex, - &kmod_it, kmod_table); + err = report_loaded_kernel_module(load, &kmod_it, + kmod_table); if (!err) continue; else if (err != &drgn_not_found) @@ -1308,24 +1309,24 @@ report_loaded_kernel_modules(struct drgn_program *prog, * already indexed that module. */ if (depmod && - !drgn_dwarf_index_is_indexed(dindex, kmod_it.name)) { + !drgn_debug_info_is_indexed(load->dbinfo, kmod_it.name)) { if (!depmod->modules_dep.ptr) { err = depmod_index_init(depmod, prog->vmcoreinfo.osrelease); if (err) { depmod->modules_dep.ptr = NULL; - err = drgn_dwarf_index_report_error(dindex, - "kernel modules", - "could not read depmod", - err); + err = drgn_debug_info_report_error(load, + "kernel modules", + "could not read depmod", + err); if (err) break; depmod = NULL; continue; } } - err = report_default_kernel_module(prog, dindex, - &kmod_it, depmod); + err = report_default_kernel_module(load, &kmod_it, + depmod); if (err) break; } @@ -1335,15 +1336,14 @@ report_loaded_kernel_modules(struct drgn_program *prog, } static struct drgn_error * -report_kernel_modules(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, +report_kernel_modules(struct drgn_debug_info_load_state *load, struct kernel_module_file *kmods, size_t num_kmods, - bool report_default, bool need_module_definition, - bool vmlinux_is_pending) + bool need_module_definition, bool vmlinux_is_pending) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - if (!num_kmods && !report_default) + if (!num_kmods && !load->load_default) return NULL; /* @@ -1355,7 +1355,7 @@ report_kernel_modules(struct drgn_program *prog, */ if (vmlinux_is_pending && (!(prog->flags & DRGN_PROGRAM_IS_LIVE) || need_module_definition)) { - err = drgn_dwarf_index_flush(dindex, false); + err = drgn_debug_info_report_flush(load); if (err) return err; } @@ -1371,10 +1371,10 @@ report_kernel_modules(struct drgn_program *prog, "name", &name_member); } if (err) { - return drgn_dwarf_index_report_error(dindex, - "kernel modules", - "could not get kernel module names", - err); + return drgn_debug_info_report_error(load, + "kernel modules", + "could not get kernel module names", + err); } module_name_offset = name_member.bit_offset / 8; } @@ -1390,18 +1390,18 @@ report_kernel_modules(struct drgn_program *prog, module_name_offset, &kmod->name); if (err) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - NULL, err); + err = drgn_debug_info_report_error(load, + kmod->path, + NULL, err); if (err) goto out; continue; } if (!kmod->name) { - err = drgn_dwarf_index_report_error(dindex, - kmod->path, - "could not find kernel module name", - NULL); + err = drgn_debug_info_report_error(load, + kmod->path, + "could not find kernel module name", + NULL); if (err) goto out; continue; @@ -1425,9 +1425,9 @@ report_kernel_modules(struct drgn_program *prog, } } - err = report_loaded_kernel_modules(prog, dindex, + err = report_loaded_kernel_modules(load, num_kmods ? &kmod_table : NULL, - report_default ? &depmod : NULL); + load->load_default ? &depmod : NULL); if (err) goto out; @@ -1436,10 +1436,9 @@ report_kernel_modules(struct drgn_program *prog, struct kernel_module_file *kmod = *it.entry; it = kernel_module_table_delete_iterator(&kmod_table, it); do { - err = drgn_dwarf_index_report_elf(dindex, kmod->path, - kmod->fd, kmod->elf, - 0, 0, kmod->name, - NULL); + err = drgn_debug_info_report_elf(load, kmod->path, + kmod->fd, kmod->elf, 0, + 0, kmod->name, NULL); kmod->elf = NULL; kmod->fd = -1; if (err) @@ -1455,9 +1454,9 @@ report_kernel_modules(struct drgn_program *prog, return err; } -static struct drgn_error *report_vmlinux(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - bool *vmlinux_is_pending) +static struct drgn_error * +report_vmlinux(struct drgn_debug_info_load_state *load, + bool *vmlinux_is_pending) { static const char * const vmlinux_paths[] = { /* @@ -1471,54 +1470,49 @@ static struct drgn_error *report_vmlinux(struct drgn_program *prog, "/lib/modules/%s/vmlinux", NULL, }; + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; + char *path; int fd; Elf *elf; - uint64_t start, end; - err = find_elf_file(&path, &fd, &elf, vmlinux_paths, prog->vmcoreinfo.osrelease); if (err) - return drgn_dwarf_index_report_error(dindex, NULL, NULL, err); + return drgn_debug_info_report_error(load, NULL, NULL, err); if (!elf) { err = drgn_error_format(DRGN_ERROR_OTHER, "could not find vmlinux for %s", prog->vmcoreinfo.osrelease); - return drgn_dwarf_index_report_error(dindex, "kernel", NULL, - err); + return drgn_debug_info_report_error(load, "kernel", NULL, err); } + uint64_t start, end; err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, &end); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, err); + err = drgn_debug_info_report_error(load, path, NULL, err); elf_end(elf); close(fd); free(path); return err; } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, start, end, - "kernel", vmlinux_is_pending); + err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, + "kernel", vmlinux_is_pending); free(path); return err; } struct drgn_error * -linux_kernel_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default, bool report_main) +linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load) { + struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; - struct kernel_module_file *kmods; - size_t i, num_kmods = 0; - bool need_module_definition = false; - bool vmlinux_is_pending = false; - if (n) { - kmods = malloc_array(n, sizeof(*kmods)); + struct kernel_module_file *kmods; + if (load->num_paths) { + kmods = malloc_array(load->num_paths, sizeof(*kmods)); if (!kmods) return &drgn_enomem; } else { @@ -1530,27 +1524,29 @@ linux_kernel_report_debug_info(struct drgn_program *prog, * modules. So, this sets aside kernel modules and reports everything * else. */ - for (i = 0; i < n; i++) { - const char *path = paths[i]; + size_t num_kmods = 0; + bool need_module_definition = false; + bool vmlinux_is_pending = false; + for (size_t i = 0; i < load->num_paths; i++) { + const char *path = load->paths[i]; int fd; Elf *elf; - Elf_Scn *this_module_scn, *modinfo_scn; - bool is_vmlinux; - err = open_elf_file(path, &fd, &elf); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - err); + err = drgn_debug_info_report_error(load, path, NULL, + err); if (err) goto out; continue; } + Elf_Scn *this_module_scn, *modinfo_scn; + bool is_vmlinux; err = identify_kernel_elf(elf, &this_module_scn, &modinfo_scn, &is_vmlinux); if (err) { - err = drgn_dwarf_index_report_error(dindex, path, NULL, - err); + err = drgn_debug_info_report_error(load, path, NULL, + err); elf_end(elf); close(fd); if (err) @@ -1559,16 +1555,14 @@ linux_kernel_report_debug_info(struct drgn_program *prog, } if (this_module_scn || modinfo_scn) { struct kernel_module_file *kmod = &kmods[num_kmods++]; - kmod->path = path; kmod->fd = fd; kmod->elf = elf; err = get_kernel_module_name_from_modinfo(modinfo_scn, &kmod->name); if (err) { - err = drgn_dwarf_index_report_error(dindex, - path, NULL, - err); + err = drgn_debug_info_report_error(load, path, + NULL, err); if (err) goto out; continue; @@ -1579,49 +1573,46 @@ linux_kernel_report_debug_info(struct drgn_program *prog, } } else if (is_vmlinux) { uint64_t start, end; - bool is_new; - err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, &end); if (err) { elf_end(elf); close(fd); - err = drgn_dwarf_index_report_error(dindex, - path, NULL, - err); + err = drgn_debug_info_report_error(load, path, + NULL, err); if (err) goto out; continue; } - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, - start, end, "kernel", - &is_new); + bool is_new; + err = drgn_debug_info_report_elf(load, path, fd, elf, + start, end, "kernel", + &is_new); if (err) goto out; if (is_new) vmlinux_is_pending = true; } else { - err = drgn_dwarf_index_report_elf(dindex, path, fd, elf, - 0, 0, NULL, NULL); + err = drgn_debug_info_report_elf(load, path, fd, elf, 0, + 0, NULL, NULL); if (err) goto out; } } - if (report_main && !vmlinux_is_pending && - !drgn_dwarf_index_is_indexed(dindex, "kernel")) { - err = report_vmlinux(prog, dindex, &vmlinux_is_pending); + if (load->load_main && !vmlinux_is_pending && + !drgn_debug_info_is_indexed(load->dbinfo, "kernel")) { + err = report_vmlinux(load, &vmlinux_is_pending); if (err) goto out; } - err = report_kernel_modules(prog, dindex, kmods, num_kmods, - report_default, need_module_definition, - vmlinux_is_pending); + err = report_kernel_modules(load, kmods, num_kmods, + need_module_definition, vmlinux_is_pending); out: - for (i = 0; i < num_kmods; i++) { + for (size_t i = 0; i < num_kmods; i++) { elf_end(kmods[i].elf); if (kmods[i].fd != -1) close(kmods[i].fd); diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index c467bf27c..330b363e0 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -4,11 +4,9 @@ #ifndef DRGN_LINUX_KERNEL_H #define DRGN_LINUX_KERNEL_H -#include - #include "drgn.h" -struct drgn_dwarf_index; +struct drgn_debug_info_load_state; struct drgn_memory_reader; struct vmcoreinfo; @@ -33,10 +31,7 @@ struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, void *arg, struct drgn_object *ret); struct drgn_error * -linux_kernel_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default, bool report_main); +linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load); #define KDUMP_SIGNATURE "KDUMP " #define KDUMP_SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index dedaed776..5666af9bf 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -1,11 +1,14 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include +#include #include -#include -#include "internal.h" +#include "drgn.h" +#include "platform.h" #include "program.h" +#include "util.h" struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, @@ -108,14 +111,15 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, struct drgn_member_info member; struct drgn_qualified_type node_type; - drgn_object_init(&node, res->prog); - drgn_object_init(&tmp, res->prog); + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); /* node = root->xa_head */ err = drgn_object_member_dereference(&node, root, "xa_head"); if (!err) { - err = drgn_program_find_type(res->prog, "struct xa_node *", - NULL, &node_type); + err = drgn_program_find_type(drgn_object_program(res), + "struct xa_node *", NULL, + &node_type); if (err) goto out; RADIX_TREE_INTERNAL_NODE = 2; @@ -125,14 +129,14 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, err = drgn_object_member_dereference(&node, root, "rnode"); if (err) goto out; - err = drgn_program_find_type(res->prog, "void *", NULL, - &node_type); + err = drgn_program_find_type(drgn_object_program(res), "void *", + NULL, &node_type); if (err) goto out; err = drgn_object_cast(&node, node_type, &node); if (err) goto out; - err = drgn_program_find_type(res->prog, + err = drgn_program_find_type(drgn_object_program(res), "struct radix_tree_node *", NULL, &node_type); if (err) @@ -142,7 +146,7 @@ linux_helper_radix_tree_lookup(struct drgn_object *res, goto out; } - err = drgn_program_member_info(res->prog, + err = drgn_program_member_info(drgn_object_program(res), drgn_type_type(node_type.type).type, "slots", &member); if (err) @@ -204,7 +208,7 @@ struct drgn_error *linux_helper_idr_find(struct drgn_object *res, struct drgn_error *err; struct drgn_object tmp; - drgn_object_init(&tmp, res->prog); + drgn_object_init(&tmp, drgn_object_program(res)); /* id -= idr->idr_base */ err = drgn_object_member_dereference(&tmp, idr, "idr_base"); @@ -255,29 +259,29 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, union drgn_value ns_level, pidhash_shift; uint64_t i; - err = drgn_program_find_type(res->prog, "struct pid *", NULL, - &pidp_type); + err = drgn_program_find_type(drgn_object_program(res), "struct pid *", + NULL, &pidp_type); if (err) return err; - err = drgn_program_find_type(res->prog, "struct upid", NULL, - &upid_type); + err = drgn_program_find_type(drgn_object_program(res), "struct upid", + NULL, &upid_type); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "pid_chain", - &pid_chain_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "pid_chain", &pid_chain_member); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "nr", - &nr_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "nr", &nr_member); if (err) return err; - err = drgn_program_member_info(res->prog, upid_type.type, "ns", - &ns_member); + err = drgn_program_member_info(drgn_object_program(res), upid_type.type, + "ns", &ns_member); if (err) return err; - drgn_object_init(&node, res->prog); - drgn_object_init(&tmp, res->prog); + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); err = drgn_object_read(&tmp, ns); if (err) @@ -293,7 +297,8 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, goto out; /* i = 1 << pidhash_shift */ - err = drgn_program_find_object(res->prog, "pidhash_shift", NULL, + err = drgn_program_find_object(drgn_object_program(res), + "pidhash_shift", NULL, DRGN_FIND_OBJECT_ANY, &tmp); if (err) goto out; @@ -383,7 +388,7 @@ struct drgn_error *linux_helper_find_pid(struct drgn_object *res, struct drgn_error *err; struct drgn_object tmp; - drgn_object_init(&tmp, res->prog); + drgn_object_init(&tmp, drgn_object_program(res)); /* (struct pid *)idr_find(&ns->idr, pid) */ err = drgn_object_member_dereference(&tmp, ns, "idr"); @@ -396,14 +401,16 @@ struct drgn_error *linux_helper_find_pid(struct drgn_object *res, err = linux_helper_idr_find(&tmp, &tmp, pid); if (err) goto out; - err = drgn_program_find_type(res->prog, "struct pid *", NULL, + err = drgn_program_find_type(drgn_object_program(res), + "struct pid *", NULL, &qualified_type); if (err) goto out; err = drgn_object_cast(res, qualified_type, &tmp); } else if (err->code == DRGN_ERROR_LOOKUP) { drgn_error_destroy(err); - err = drgn_program_find_object(res->prog, "pid_hash", NULL, + err = drgn_program_find_object(drgn_object_program(res), + "pid_hash", NULL, DRGN_FIND_OBJECT_ANY, &tmp); if (err) goto out; @@ -425,9 +432,10 @@ struct drgn_error *linux_helper_pid_task(struct drgn_object *res, struct drgn_object first; char member[64]; - drgn_object_init(&first, res->prog); + drgn_object_init(&first, drgn_object_program(res)); - err = drgn_program_find_type(res->prog, "struct task_struct *", NULL, + err = drgn_program_find_type(drgn_object_program(res), + "struct task_struct *", NULL, &task_structp_type); if (err) goto out; @@ -482,14 +490,14 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, struct drgn_object pid_type_obj; union drgn_value pid_type; - drgn_object_init(&pid_obj, res->prog); - drgn_object_init(&pid_type_obj, res->prog); + drgn_object_init(&pid_obj, drgn_object_program(res)); + drgn_object_init(&pid_type_obj, drgn_object_program(res)); err = linux_helper_find_pid(&pid_obj, ns, pid); if (err) goto out; - err = drgn_program_find_object(res->prog, "PIDTYPE_PID", NULL, - DRGN_FIND_OBJECT_CONSTANT, + err = drgn_program_find_object(drgn_object_program(res), "PIDTYPE_PID", + NULL, DRGN_FIND_OBJECT_CONSTANT, &pid_type_obj); if (err) goto out; @@ -502,122 +510,3 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, drgn_object_deinit(&pid_obj); return err; } - -static struct drgn_error *cache_task_state_chars(struct drgn_object *tmp) -{ - struct drgn_error *err; - struct drgn_program *prog = tmp->prog; - struct drgn_object task_state_array; - uint64_t length; - size_t i; - char *task_state_chars = NULL; - int64_t task_report = 0; - - drgn_object_init(&task_state_array, prog); - - err = drgn_program_find_object(prog, "task_state_array", NULL, - DRGN_FIND_OBJECT_ANY, &task_state_array); - if (err) - goto out; - - if (drgn_type_kind(task_state_array.type) != DRGN_TYPE_ARRAY) { - err = drgn_error_create(DRGN_ERROR_TYPE, - "task_state_array is not an array"); - goto out; - } - length = drgn_type_length(task_state_array.type); - if (length == 0 || length >= 64) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "task_state_array length is invalid"); - goto out; - } - - /* - * Walk through task_state_array backwards looking for the largest state - * that we know is in TASK_REPORT. - */ - for (i = length; i--; ) { - union drgn_value value; - char c; - - err = drgn_object_subscript(tmp, &task_state_array, i); - if (err) - goto out; - err = drgn_object_dereference(tmp, tmp); - if (err) - goto out; - err = drgn_object_read_integer(tmp, &value); - if (err) - goto out; - c = value.uvalue; - if (!task_state_chars && strchr("RSDTtXZP", c)) { - task_state_chars = malloc(i + 1); - if (!task_state_chars) { - err = &drgn_enomem; - goto out; - } - task_report = (UINT64_C(1) << i) - 1; - } - if (task_state_chars) - task_state_chars[i] = c; - } - if (!task_state_chars) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "could not parse task_state_array"); - goto out; - } - - prog->task_state_chars = task_state_chars; - prog->task_report = task_report; - task_state_chars = NULL; - err = NULL; -out: - free(task_state_chars); - drgn_object_deinit(&task_state_array); - return err; -} - -struct drgn_error * -linux_helper_task_state_to_char(const struct drgn_object *task, char *ret) -{ - static const uint64_t TASK_NOLOAD = 0x400; - struct drgn_error *err; - struct drgn_program *prog = task->prog; - struct drgn_object tmp; - union drgn_value task_state, exit_state; - uint64_t state; - - drgn_object_init(&tmp, prog); - - if (!prog->task_state_chars) { - err = cache_task_state_chars(&tmp); - if (err) - goto out; - } - - err = drgn_object_member_dereference(&tmp, task, "state"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &task_state); - if (err) - goto out; - err = drgn_object_member_dereference(&tmp, task, "exit_state"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &exit_state); - if (err) - goto out; - - state = (task_state.uvalue | exit_state.uvalue) & prog->task_report; - *ret = prog->task_state_chars[fls(state)]; - /* - * States beyond TASK_REPORT are special. As of Linux v5.3, TASK_IDLE is - * the only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. - */ - if (*ret == 'D' && (task_state.uvalue & ~state) == TASK_NOLOAD) - *ret = 'I'; - err = NULL; -out: - drgn_object_deinit(&tmp); - return err; -} diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 48682c3ce..d02eacfaa 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,12 +1,13 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include #include #include -#include "internal.h" #include "memory_reader.h" +#include "util.h" DEFINE_BINARY_SEARCH_TREE_FUNCTIONS(drgn_memory_segment_tree, binary_search_tree_scalar_cmp, splay) diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index f6d94a332..5eb095319 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -12,11 +12,8 @@ #ifndef DRGN_MEMORY_READER_H #define DRGN_MEMORY_READER_H -#include -#include -#include - #include "binary_search_tree.h" +#include "drgn.h" /** * @ingroup Internals diff --git a/libdrgn/mread.h b/libdrgn/mread.h new file mode 100644 index 000000000..bf1764cff --- /dev/null +++ b/libdrgn/mread.h @@ -0,0 +1,258 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0+ + +/** + * @file + * + * Helpers for parsing values in memory. + * + * See @ref MemoryParsing. + */ + +#ifndef DRGN_MREAD_H +#define DRGN_MREAD_H + +#include +#include +#include +#include + +/** + * @ingroup Internals + * + * @defgroup MemoryParsing Memory parsing + * + * Helpers for reading values in memory. + * + * This provides helpers for reading values in memory (e.g., from an mmap'd + * file) with safe bounds checking. + * + * @{ + */ + +/** + * Return whether ptr + offset is within @p end. + * + * @param[in] ptr Pointer to check. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset to check. + * @return @c true if the result would be in bounds, @c false if not. + */ +static inline bool mread_in_bounds(const char *ptr, const char *end, + size_t offset) +{ + return end - ptr >= offset; +} + +/** + * Return start + offset, checking bounds. + * + * @param[in] start Pointer to first valid byte. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset from @p start. + * @return start + offset if it is within @p end, @c NULL if not. + */ +static inline const char *mread_begin(const char *start, const char *end, + size_t offset) +{ + return mread_in_bounds(start, end, offset) ? start + offset : NULL; +} + +/** + * Advance @p ptr by @p offset, checking bounds. + * + * @param[in,out] ptr Pointer to check and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] offset Offset to advance by. + * @return @c true if the pointer was advanced, @c false if it was not advanced + * because the result would be out of bounds. + */ +static inline bool mread_skip(const char **ptr, const char *end, size_t offset) +{ + if (!mread_in_bounds(*ptr, end, offset)) + return false; + *ptr += offset; + return true; +} + +/** + * Read an unsigned 8-bit integer in memory and advance @p ptr. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[out] ret Returned value. + * @return @c true on success, @c false if the read was out of bounds. + */ +static inline bool mread_u8(const char **ptr, const char *end, uint8_t *ret) +{ + if (!mread_in_bounds(*ptr, end, sizeof(uint8_t))) + return false; + *ret = *(const uint8_t *)*ptr; + *ptr += sizeof(uint8_t); + return true; +} + +/** + * Read an unsigned 8-bit integer in memory into a @c size_t and advance @p ptr. + * + * @sa mread_u8() + */ +static inline bool mread_u8_into_size_t(const char **ptr, const char *end, + size_t *ret) +{ + uint8_t tmp; + if (!mread_u8(ptr, end, &tmp)) + return false; + /* SIZE_MAX is required to be at least 65535, so this won't overflow. */ + *ret = tmp; + return true; +} + +#ifdef DOXYGEN +/** + * Read an unsigned N-bit integer in memory and advance @p ptr. + * + * This is defined for N of 16, 32, and 64. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[in] bswap Whether to swap the byte order of the read value. + * @param[out] ret Returned value. + * @return @c true on success, @c false if the read was out of bounds. + */ +bool mread_uN(const char **ptr, const char *end, bool bswap, uintN_t *ret); + +/** + * Read an unsigned N-bit little-endian integer in memory and advance @p ptr. + * + * @sa mread_uN() + */ +bool mread_leN(const char **ptr, const char *end, uintN_t *ret); + +/** + * Read an unsigned N-bit big-endian integer in memory and advance @p ptr. + * + * @sa mread_uN() + */ +bool mread_beN(const char **ptr, const char *end, uintN_t *ret); + +/** + * Read an unsigned N-bit integer in memory into a @c uint64_t and advance @p + * ptr. + * + * @sa mread_uN() + */ +bool mread_uN_into_u64(const char **ptr, const char *end, bool bswap, + uint64_t *ret); + +/** + * Read an unsigned N-bit integer in memory into a @c size_t and advance @p + * ptr. + * + * @sa mread_uN() + * + * @return @c true on success, @c false if the read was out of bounds or the + * result is too large for a @c size_t + */ +bool mread_uN_into_size_t(const char **ptr, const char *end, bool bswap, + uint64_t *ret); +#endif + +#define DEFINE_READ(size) \ +static inline bool mread_u##size(const char **ptr, const char *end, bool bswap, \ + uint##size##_t *ret) \ +{ \ + if (!mread_in_bounds(*ptr, end, sizeof(uint##size##_t))) \ + return false; \ + uint##size##_t tmp; \ + memcpy(&tmp, *ptr, sizeof(tmp)); \ + if (bswap) \ + tmp = bswap_##size(tmp); \ + *ret = tmp; \ + *ptr += sizeof(uint##size##_t); \ + return true; \ +} \ + \ +static inline bool mread_le##size(const char **ptr, const char *end, \ + uint##size##_t *ret) \ +{ \ + return mread_u##size(ptr, end, \ + __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__, ret); \ +} \ + \ +static inline bool mread_be##size(const char **ptr, const char *end, \ + uint##size##_t *ret) \ +{ \ + return mread_u##size(ptr, end, __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__, \ + ret); \ +} \ + \ +static inline bool mread_u##size##_into_u64(const char **ptr, const char *end, \ + bool bswap, uint64_t *ret) \ +{ \ + uint##size##_t tmp; \ + if (!mread_u##size(ptr, end, bswap, &tmp)) \ + return false; \ + *ret = tmp; \ + return true; \ +} \ + \ +static inline bool mread_u##size##_into_size_t(const char **ptr, \ + const char *end, bool bswap, \ + size_t *ret) \ +{ \ + uint##size##_t tmp; \ + if (!mread_u##size(ptr, end, bswap, &tmp)) \ + return false; \ + if (tmp > SIZE_MAX) \ + return false; \ + *ret = tmp; \ + return true; \ +} + +DEFINE_READ(16) +DEFINE_READ(32) +DEFINE_READ(64) + +/** + * Advance @p ptr to the byte after the next null byte. + * + * @param[in,out] ptr Pointer to advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @return @c true if the pointer was advanced, @c false if no null byte was + * found. + */ +static inline bool mread_skip_string(const char **ptr, const char *end) +{ + const char *nul = memchr(*ptr, 0, end - *ptr); + if (!nul) + return false; + *ptr = nul + 1; + return true; +} + +/** + * Read a null-terminated string in memory and advance @p ptr. + * + * @param[in,out] ptr Pointer to read from and advance. + * @param[in] end Pointer to one byte after the last valid byte. + * @param[out] str_ret Returned string. Equal to the initial value of + * *ptr. + * @param[out] len_ret Returned string length not including the null byte. + * @return @c true on success, @c false if no null byte was found. + */ +static inline bool mread_string(const char **ptr, const char *end, + const char **str_ret, size_t *len_ret) +{ + const char *nul = memchr(*ptr, 0, end - *ptr); + if (!nul) + return false; + *str_ret = *ptr; + *len_ret = nul - *ptr; + *ptr = nul + 1; + return true; +} + +/** @} */ + +#endif /* DRGN_MREAD_H */ diff --git a/libdrgn/object.c b/libdrgn/object.c index 4f0d57c02..f3c7871c5 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -1,24 +1,25 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include -#include #include #include -#include "internal.h" +#include "drgn.h" +#include "error.h" #include "language.h" #include "memory_reader.h" #include "object.h" #include "program.h" #include "serialize.h" #include "type.h" -#include "type_index.h" +#include "util.h" -void drgn_object_init(struct drgn_object *obj, struct drgn_program *prog) +LIBDRGN_PUBLIC void drgn_object_init(struct drgn_object *obj, + struct drgn_program *prog) { - obj->prog = prog; - obj->type = drgn_void_type(drgn_program_language(prog)); + obj->type = drgn_void_type(prog, NULL); obj->bit_size = 0; obj->qualifiers = 0; obj->kind = DRGN_OBJECT_NONE; @@ -284,12 +285,7 @@ drgn_byte_order_to_little_endian(struct drgn_program *prog, *ret = true; return NULL; case DRGN_PROGRAM_ENDIAN: - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program byte order is not known"); - } - *ret = drgn_program_is_little_endian(prog); - return NULL; + return drgn_program_is_little_endian(prog, ret); default: return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "invalid byte order"); @@ -361,8 +357,8 @@ drgn_object_set_buffer(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -381,22 +377,18 @@ drgn_object_set_reference_internal(struct drgn_object *res, uint64_t bit_size, uint64_t address, uint64_t bit_offset, bool little_endian) { - struct drgn_error *err; - - if (!res->prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } + bool is_64_bit; + struct drgn_error *err = + drgn_program_is_64_bit(drgn_object_program(res), &is_64_bit); + if (err) + return err; err = sanity_check_object(kind, type->bit_field_size, bit_size); if (err) return err; address += bit_offset / 8; - if (drgn_program_is_64_bit(res->prog)) - address &= UINT64_MAX; - else - address &= UINT32_MAX; + address &= is_64_bit ? UINT64_MAX : UINT32_MAX; bit_offset %= 8; if (bit_size > UINT64_MAX - bit_offset) { return drgn_error_format(DRGN_ERROR_OVERFLOW, @@ -423,8 +415,8 @@ drgn_object_set_reference(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -443,7 +435,7 @@ drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj) if (res == obj) return NULL; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -537,7 +529,7 @@ drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, enum drgn_object_kind kind; uint64_t bit_size; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -600,8 +592,8 @@ drgn_object_read_reference(const struct drgn_object *obj, if (!buf) return &drgn_enomem; } - err = drgn_memory_reader_read(&obj->prog->reader, buf, - obj->reference.address, size, + err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, + buf, obj->reference.address, size, false); if (err) { if (buf != value->ibuf) @@ -617,8 +609,8 @@ drgn_object_read_reference(const struct drgn_object *obj, char buf[9]; assert(size <= sizeof(buf)); - err = drgn_memory_reader_read(&obj->prog->reader, buf, - obj->reference.address, size, + err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, + buf, obj->reference.address, size, false); if (err) return err; @@ -637,7 +629,7 @@ drgn_object_read(struct drgn_object *res, const struct drgn_object *obj) if (obj->is_reference) { union drgn_value value; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -827,8 +819,8 @@ drgn_object_read_c_string(const struct drgn_object *obj, char **ret) obj->type); } - return drgn_program_read_c_string(obj->prog, address, false, max_size, - ret); + return drgn_program_read_c_string(drgn_object_program(obj), address, + false, max_size, ret); } LIBDRGN_PUBLIC struct drgn_error * @@ -941,7 +933,7 @@ drgn_compound_object_is_zero(const struct drgn_object *obj, struct drgn_type_member *members; size_t num_members, i; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); members = drgn_type_members(underlying_type); num_members = drgn_type_num_members(underlying_type); for (i = 0; i < num_members; i++) { @@ -982,7 +974,7 @@ drgn_array_object_is_zero(const struct drgn_object *obj, if (err) return err; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); length = drgn_type_length(underlying_type); for (i = 0; i < length; i++) { err = drgn_object_slice(&element, obj, element_type, @@ -1075,7 +1067,7 @@ drgn_object_cast(struct drgn_object *res, { const struct drgn_language *lang = drgn_type_language(qualified_type.type); - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1094,13 +1086,13 @@ drgn_object_reinterpret(struct drgn_object *res, enum drgn_object_kind kind; uint64_t bit_size; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_byte_order_to_little_endian(res->prog, byte_order, - &little_endian); + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &little_endian); if (err) return err; @@ -1133,24 +1125,24 @@ drgn_object_reinterpret(struct drgn_object *res, LIBDRGN_PUBLIC struct drgn_error * drgn_object_integer_literal(struct drgn_object *res, uint64_t uvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->integer_literal(res, uvalue); } LIBDRGN_PUBLIC struct drgn_error * drgn_object_bool_literal(struct drgn_object *res, bool bvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->bool_literal(res, bvalue); } LIBDRGN_PUBLIC struct drgn_error * drgn_object_float_literal(struct drgn_object *res, double fvalue) { - const struct drgn_language *lang = drgn_program_language(res->prog); - + const struct drgn_language *lang = + drgn_program_language(drgn_object_program(res)); return lang->float_literal(res, fvalue); } @@ -1167,8 +1159,7 @@ LIBDRGN_PUBLIC struct drgn_error *drgn_object_cmp(const struct drgn_object *lhs, int *ret) { const struct drgn_language *lang = drgn_object_language(lhs); - - if (lhs->prog != rhs->prog) { + if (drgn_object_program(lhs) != drgn_object_program(rhs)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1234,7 +1225,8 @@ drgn_object_##op_name(struct drgn_object *res, const struct drgn_object *lhs, \ { \ const struct drgn_language *lang = drgn_object_language(lhs); \ \ - if (lhs->prog != res->prog || rhs->prog != res->prog) { \ + if (drgn_object_program(lhs) != drgn_object_program(res) || \ + drgn_object_program(rhs) != drgn_object_program(res)) { \ return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ "objects are from different programs");\ } \ @@ -1263,7 +1255,7 @@ drgn_object_##op_name(struct drgn_object *res, const struct drgn_object *obj) \ { \ const struct drgn_language *lang = drgn_object_language(obj); \ \ - if (res->prog != obj->prog) { \ + if (drgn_object_program(res) != drgn_object_program(obj)) { \ return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ "objects are from different programs");\ } \ @@ -1282,10 +1274,7 @@ UNARY_OP(not) LIBDRGN_PUBLIC struct drgn_error * drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1300,13 +1289,22 @@ drgn_object_address_of(struct drgn_object *res, const struct drgn_object *obj) "cannot take address of bit field"); } - err = drgn_type_index_pointer_type(&obj->prog->tindex, - drgn_object_qualified_type(obj), - NULL, &qualified_type.type); + struct drgn_qualified_type qualified_type = + drgn_object_qualified_type(obj); + uint8_t word_size; + struct drgn_error *err = + drgn_program_word_size(drgn_object_program(obj), &word_size); if (err) return err; - qualified_type.qualifiers = 0; - return drgn_object_set_unsigned(res, qualified_type, + struct drgn_qualified_type result_type; + err = drgn_pointer_type_create(drgn_object_program(obj), qualified_type, + word_size, + drgn_type_language(qualified_type.type), + &result_type.type); + if (err) + return err; + result_type.qualifiers = 0; + return drgn_object_set_unsigned(res, result_type, obj->reference.address, 0); } @@ -1317,12 +1315,13 @@ drgn_object_subscript(struct drgn_object *res, const struct drgn_object *obj, struct drgn_error *err; struct drgn_element_info element; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_program_element_info(obj->prog, obj->type, &element); + err = drgn_program_element_info(drgn_object_program(obj), obj->type, + &element); if (err) return err; @@ -1344,13 +1343,13 @@ drgn_object_member(struct drgn_object *res, const struct drgn_object *obj, struct drgn_error *err; struct drgn_member_info member; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } - err = drgn_program_member_info(obj->prog, obj->type, member_name, - &member); + err = drgn_program_member_info(drgn_object_program(obj), obj->type, + member_name, &member); if (err) return err; return drgn_object_slice(res, obj, member.qualified_type, @@ -1366,7 +1365,7 @@ struct drgn_error *drgn_object_member_dereference(struct drgn_object *res, struct drgn_member_value *member; struct drgn_qualified_type qualified_type; - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1377,10 +1376,10 @@ struct drgn_error *drgn_object_member_dereference(struct drgn_object *res, obj->type); } - err = drgn_type_index_find_member(&obj->prog->tindex, - drgn_type_type(underlying_type).type, - member_name, strlen(member_name), - &member); + err = drgn_program_find_member(drgn_object_program(obj), + drgn_type_type(underlying_type).type, + member_name, strlen(member_name), + &member); if (err) return err; @@ -1398,12 +1397,7 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, struct drgn_qualified_type qualified_type, const char *member_designator) { - const struct drgn_language *lang = drgn_object_language(obj); - struct drgn_error *err; - uint64_t address, bit_offset; - struct drgn_qualified_type result_type; - - if (res->prog != obj->prog) { + if (drgn_object_program(res) != drgn_object_program(obj)) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "objects are from different programs"); } @@ -1414,8 +1408,12 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, obj->type); } - err = lang->bit_offset(obj->prog, qualified_type.type, - member_designator, &bit_offset); + const struct drgn_language *lang = drgn_object_language(obj); + uint64_t bit_offset; + struct drgn_error *err = lang->bit_offset(drgn_object_program(obj), + qualified_type.type, + member_designator, + &bit_offset); if (err) return err; if (bit_offset % 8) { @@ -1423,12 +1421,20 @@ drgn_object_container_of(struct drgn_object *res, const struct drgn_object *obj, "container_of() member is not byte-aligned"); } + uint64_t address; err = drgn_object_value_unsigned(obj, &address); if (err) return err; - err = drgn_type_index_pointer_type(&obj->prog->tindex, qualified_type, - NULL, &result_type.type); + uint8_t word_size; + err = drgn_program_word_size(drgn_object_program(obj), &word_size); + if (err) + return err; + struct drgn_qualified_type result_type; + err = drgn_pointer_type_create(drgn_object_program(obj), qualified_type, + word_size, + drgn_type_language(qualified_type.type), + &result_type.type); if (err) return err; result_type.qualifiers = 0; diff --git a/libdrgn/object.h b/libdrgn/object.h index 6e6258c1b..670cf979a 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -12,8 +12,6 @@ #ifndef DRGN_OBJECT_H #define DRGN_OBJECT_H -#include - #include "drgn.h" #include "type.h" diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c index 68f069b0c..ca513775c 100644 --- a/libdrgn/object_index.c +++ b/libdrgn/object_index.c @@ -2,10 +2,9 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include -#include "internal.h" #include "object_index.h" -#include "type.h" void drgn_object_index_init(struct drgn_object_index *oindex) { @@ -41,6 +40,13 @@ drgn_object_index_add_finder(struct drgn_object_index *oindex, return NULL; } +void drgn_object_index_remove_finder(struct drgn_object_index *oindex) +{ + struct drgn_object_finder *finder = oindex->finders->next; + free(oindex->finders); + oindex->finders = finder; +} + struct drgn_error *drgn_object_index_find(struct drgn_object_index *oindex, const char *name, const char *filename, diff --git a/libdrgn/object_index.h b/libdrgn/object_index.h index fcd384795..022fbc222 100644 --- a/libdrgn/object_index.h +++ b/libdrgn/object_index.h @@ -41,7 +41,7 @@ struct drgn_object_finder { * Object index. * * A object index is used to find objects (variables, constants, and functions) - * by name. The types are found using callbacks which are registered with @ref + * by name. The objects are found using callbacks which are registered with @ref * drgn_object_index_add_finder(). @ref drgn_object_index_find() searches for an * object. */ @@ -61,6 +61,9 @@ struct drgn_error * drgn_object_index_add_finder(struct drgn_object_index *oindex, drgn_object_find_fn fn, void *arg); +/** Remove the most recently added object finding callback. */ +void drgn_object_index_remove_finder(struct drgn_object_index *oindex); + /** * Find an object in a @ref drgn_object_index. * diff --git a/libdrgn/path.c b/libdrgn/path.c index 6aa5ed690..a5b7a7278 100644 --- a/libdrgn/path.c +++ b/libdrgn/path.c @@ -2,9 +2,12 @@ // SPDX-License-Identifier: GPL-3.0+ #include +#include +#include #include -#include "internal.h" +#include "path.h" +#include "util.h" bool path_iterator_next(struct path_iterator *it, const char **component, size_t *component_len) diff --git a/libdrgn/internal.h b/libdrgn/path.h similarity index 77% rename from libdrgn/internal.h rename to libdrgn/path.h index 0c4d37d1b..87bcce1e0 100644 --- a/libdrgn/internal.h +++ b/libdrgn/path.h @@ -4,46 +4,28 @@ /** * @file * - * Miscellanous internal drgn helpers. + * Paths. + * + * See @ref Paths. */ -#ifndef DRGN_INTERNAL_H -#define DRGN_INTERNAL_H +#ifndef DRGN_PATH_H +#define DRGN_PATH_H #include -#include -#include -#include +#include -#include "drgn.h" -#include "error.h" -#include "util.h" +#include /** * - * @defgroup Internals Internals + * @defgroup Paths Paths * - * Internal implementation. + * Utilities for working with paths. * * @{ */ -#ifndef LIBDRGN_PUBLIC -#define LIBDRGN_PUBLIC __attribute__((visibility("default"))) -#endif - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...); - -struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); - -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret); - -bool die_matches_filename(Dwarf_Die *die, const char *filename); - /** Path iterator input component. */ struct path_iterator_component { /** @@ -131,11 +113,8 @@ bool path_iterator_next(struct path_iterator *it, const char **component, bool path_ends_with(struct path_iterator *haystack, struct path_iterator *needle); -/** @} */ +bool die_matches_filename(Dwarf_Die *die, const char *filename); -struct drgn_lexer; -struct drgn_token; -struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, - struct drgn_token *token); +/** @} */ -#endif /* DRGN_INTERNAL_H */ +#endif /* DRGN_PATH_H */ diff --git a/libdrgn/platform.c b/libdrgn/platform.c index a2ba6df37..bee66ed5a 100644 --- a/libdrgn/platform.c +++ b/libdrgn/platform.c @@ -1,10 +1,11 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include -#include "internal.h" #include "platform.h" +#include "util.h" const struct drgn_architecture_info arch_info_unknown = { .name = "unknown", diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 8510cc45a..f3a76c384 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -5,6 +5,7 @@ #define DRGN_PLATFORM_H #include +#include #include "drgn.h" diff --git a/libdrgn/program.c b/libdrgn/program.c index bbd45f369..a0fbd1ef7 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1,32 +1,32 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include +#include +#include +#include +#include #include #include #include -#include #include #include #include +#include #include -#include -#include -#include -#include "internal.h" +#include "debug_info.h" #include "dwarf_index.h" -#include "dwarf_info_cache.h" +#include "error.h" #include "language.h" #include "linux_kernel.h" #include "memory_reader.h" #include "object_index.h" #include "program.h" -#include "read.h" -#include "string_builder.h" #include "symbol.h" -#include "type_index.h" #include "vector.h" +#include "util.h" DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) DEFINE_HASH_TABLE_FUNCTIONS(drgn_prstatus_map, hash_pair_int_type, @@ -63,8 +63,6 @@ void drgn_program_set_platform(struct drgn_program *prog, if (!prog->has_platform) { prog->platform = *platform; prog->has_platform = true; - prog->tindex.word_size = - platform->flags & DRGN_PLATFORM_IS_64_BIT ? 8 : 4; } } @@ -73,7 +71,7 @@ void drgn_program_init(struct drgn_program *prog, { memset(prog, 0, sizeof(*prog)); drgn_memory_reader_init(&prog->reader); - drgn_type_index_init(&prog->tindex); + drgn_program_init_types(prog); drgn_object_index_init(&prog->oindex); prog->core_fd = -1; if (platform) @@ -82,7 +80,6 @@ void drgn_program_init(struct drgn_program *prog, void drgn_program_deinit(struct drgn_program *prog) { - free(prog->task_state_chars); if (prog->prstatus_cached) { if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) drgn_prstatus_vector_deinit(&prog->prstatus_vector); @@ -92,7 +89,7 @@ void drgn_program_deinit(struct drgn_program *prog) free(prog->pgtable_it); drgn_object_index_deinit(&prog->oindex); - drgn_type_index_deinit(&prog->tindex); + drgn_program_deinit_types(prog); drgn_memory_reader_deinit(&prog->reader); free(prog->file_segments); @@ -105,7 +102,7 @@ void drgn_program_deinit(struct drgn_program *prog) if (prog->core_fd != -1) close(prog->core_fd); - drgn_dwarf_info_cache_destroy(prog->_dicache); + drgn_debug_info_destroy(prog->_dbinfo); } LIBDRGN_PUBLIC struct drgn_error * @@ -139,13 +136,6 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, read_fn, arg, physical); } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, - void *arg) -{ - return drgn_type_index_add_finder(&prog->tindex, fn, arg); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_add_object_finder(struct drgn_program *prog, drgn_object_find_fn fn, void *arg) @@ -536,138 +526,67 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -static struct drgn_error *drgn_program_get_dindex(struct drgn_program *prog, - struct drgn_dwarf_index **ret) +struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, + struct drgn_debug_info **ret) { struct drgn_error *err; - if (!prog->_dicache) { - const Dwfl_Callbacks *dwfl_callbacks; - struct drgn_dwarf_info_cache *dicache; - - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - dwfl_callbacks = &drgn_dwfl_callbacks; - else if (prog->flags & DRGN_PROGRAM_IS_LIVE) - dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; - else - dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - - err = drgn_dwarf_info_cache_create(&prog->tindex, - dwfl_callbacks, &dicache); + if (!prog->_dbinfo) { + struct drgn_debug_info *dbinfo; + err = drgn_debug_info_create(prog, &dbinfo); if (err) return err; - err = drgn_program_add_type_finder(prog, drgn_dwarf_type_find, - dicache); - if (err) { - drgn_dwarf_info_cache_destroy(dicache); - return err; - } err = drgn_program_add_object_finder(prog, - drgn_dwarf_object_find, - dicache); + drgn_debug_info_find_object, + dbinfo); if (err) { - drgn_type_index_remove_finder(&prog->tindex); - drgn_dwarf_info_cache_destroy(dicache); + drgn_debug_info_destroy(dbinfo); return err; } - prog->_dicache = dicache; - } - *ret = &prog->_dicache->dindex; - return NULL; -} - -struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_index *dindex; - - err = drgn_program_get_dindex(prog, &dindex); - if (err) - return err; - *ret = dindex->dwfl; - return NULL; -} - -static struct drgn_error * -userspace_report_debug_info(struct drgn_program *prog, - struct drgn_dwarf_index *dindex, - const char **paths, size_t n, - bool report_default) -{ - struct drgn_error *err; - size_t i; - - for (i = 0; i < n; i++) { - int fd; - Elf *elf; - - err = open_elf_file(paths[i], &fd, &elf); + err = drgn_program_add_type_finder(prog, + drgn_debug_info_find_type, + dbinfo); if (err) { - err = drgn_dwarf_index_report_error(dindex, paths[i], - NULL, err); - if (err) - return err; - continue; - } - /* - * We haven't implemented a way to get the load address for - * anything reported here, so for now we report it as unloaded. - */ - err = drgn_dwarf_index_report_elf(dindex, paths[i], fd, elf, 0, - 0, NULL, NULL); - if (err) + drgn_object_index_remove_finder(&prog->oindex); + drgn_debug_info_destroy(dbinfo); return err; - } - - if (report_default) { - if (prog->flags & DRGN_PROGRAM_IS_LIVE) { - int ret; - - ret = dwfl_linux_proc_report(dindex->dwfl, prog->pid); - if (ret == -1) { - return drgn_error_libdwfl(); - } else if (ret) { - return drgn_error_create_os("dwfl_linux_proc_report", - ret, NULL); - } - } else if (dwfl_core_file_report(dindex->dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); } + prog->_dbinfo = dbinfo; } + *ret = prog->_dbinfo; return NULL; } /* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_program *prog, - struct drgn_dwarf_index *dindex) +static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) { struct drgn_error *err; struct drgn_dwarf_index_iterator it; static const uint64_t tags[] = { DW_TAG_subprogram }; - - drgn_dwarf_index_iterator_init(&it, dindex, "main", strlen("main"), - tags, ARRAY_SIZE(tags)); - for (;;) { + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, + "main", strlen("main"), tags, + ARRAY_SIZE(tags)); + if (err) { + drgn_error_destroy(err); + return; + } + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { Dwarf_Die die; - const struct drgn_language *lang; - - err = drgn_dwarf_index_iterator_next(&it, &die, NULL); - if (err == &drgn_stop) { - break; - } else if (err) { + err = drgn_dwarf_index_get_die(index_die, &die, NULL); + if (err) { drgn_error_destroy(err); continue; } + const struct drgn_language *lang; err = drgn_language_from_die(&die, &lang); if (err) { drgn_error_destroy(err); continue; } - if (lang) { - prog->lang = lang; + dbinfo->prog->lang = lang; break; } } @@ -698,65 +617,56 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, size_t n, bool load_default, bool load_main) { struct drgn_error *err; - struct drgn_dwarf_index *dindex; - bool report_from_dwfl; if (!n && !load_default && !load_main) return NULL; - if (load_default) - load_main = true; - - err = drgn_program_get_dindex(prog, &dindex); + struct drgn_debug_info *dbinfo; + err = drgn_program_get_dbinfo(prog, &dbinfo); if (err) return err; - drgn_dwarf_index_report_begin(dindex); - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - err = linux_kernel_report_debug_info(prog, dindex, paths, n, - load_default, load_main); - } else { - err = userspace_report_debug_info(prog, dindex, paths, n, - load_default); - } - if (err) { - drgn_dwarf_index_report_abort(dindex); - return err; - } - report_from_dwfl = (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && - load_main); - err = drgn_dwarf_index_report_end(dindex, report_from_dwfl); + err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { if (!prog->lang && !(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) - drgn_program_set_language_from_main(prog, dindex); + drgn_program_set_language_from_main(dbinfo); if (!prog->has_platform) { - dwfl_getdwarf(dindex->dwfl, + dwfl_getdwarf(dbinfo->dwfl, drgn_set_platform_from_dwarf, prog, 0); } } return err; } -static uint32_t get_prstatus_pid(struct drgn_program *prog, const char *data, - size_t size) +static struct drgn_error *get_prstatus_pid(struct drgn_program *prog, const char *data, + size_t size, uint32_t *ret) { + bool is_64_bit, bswap; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + + size_t offset = is_64_bit ? 32 : 24; uint32_t pr_pid; - memcpy(&pr_pid, data + (drgn_program_is_64_bit(prog) ? 32 : 24), - sizeof(pr_pid)); - if (drgn_program_bswap(prog)) + if (size < offset + sizeof(pr_pid)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "NT_PRSTATUS is truncated"); + } + memcpy(&pr_pid, data + offset, sizeof(pr_pid)); + if (bswap) pr_pid = bswap_32(pr_pid); - return pr_pid; + *ret = pr_pid; + return NULL; } struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, const char *data, size_t size) { - if (size < (drgn_program_is_64_bit(prog) ? 36 : 28)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "NT_PRSTATUS is truncated"); - } if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { struct string *entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector); @@ -766,9 +676,12 @@ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, entry->len = size; } else { struct drgn_prstatus_map_entry entry = { - .key = get_prstatus_pid(prog, data, size), .value = { data, size }, }; + struct drgn_error *err = get_prstatus_pid(prog, data, size, + &entry.key); + if (err) + return err; if (drgn_prstatus_map_insert(&prog->prstatus_map, &entry, NULL) == -1) return &drgn_enomem; @@ -863,21 +776,19 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, struct string *ret, uint32_t *tid_ret) { - struct drgn_error *err; - assert(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL); - err = drgn_program_cache_prstatus(prog); + struct drgn_error *err = drgn_program_cache_prstatus(prog); if (err) return err; if (cpu < prog->prstatus_vector.size) { *ret = prog->prstatus_vector.data[cpu]; - *tid_ret = get_prstatus_pid(prog, ret->str, ret->len); + return get_prstatus_pid(prog, ret->str, ret->len, tid_ret); } else { ret->str = NULL; ret->len = 0; + return NULL; } - return NULL; } struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, @@ -1069,18 +980,16 @@ LIBDRGN_PUBLIC struct drgn_error * \ drgn_program_read_u##n(struct drgn_program *prog, uint64_t address, \ bool physical, uint##n##_t *ret) \ { \ - struct drgn_error *err; \ + bool bswap; \ + struct drgn_error *err = drgn_program_bswap(prog, &bswap); \ + if (err) \ + return err; \ uint##n##_t tmp; \ - \ - if (!prog->has_platform) { \ - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, \ - "program byte order is not known"); \ - } \ err = drgn_memory_reader_read(&prog->reader, &tmp, address, \ sizeof(tmp), physical); \ if (err) \ return err; \ - if (drgn_program_bswap(prog)) \ + if (bswap) \ tmp = bswap_##n(tmp); \ *ret = tmp; \ return NULL; \ @@ -1095,19 +1004,20 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_word(struct drgn_program *prog, uint64_t address, bool physical, uint64_t *ret) { - struct drgn_error *err; - - if (!prog->has_platform) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "program word size is not known"); - } - if (drgn_program_is_64_bit(prog)) { + bool is_64_bit, bswap; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + err = drgn_program_bswap(prog, &bswap); + if (err) + return err; + if (is_64_bit) { uint64_t tmp; err = drgn_memory_reader_read(&prog->reader, &tmp, address, sizeof(tmp), physical); if (err) return err; - if (drgn_program_bswap(prog)) + if (bswap) tmp = bswap_64(tmp); *ret = tmp; } else { @@ -1116,28 +1026,20 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, sizeof(tmp), physical); if (err) return err; - if (drgn_program_bswap(prog)) + if (bswap) tmp = bswap_32(tmp); *ret = tmp; } return NULL; } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_find_type(struct drgn_program *prog, const char *name, - const char *filename, struct drgn_qualified_type *ret) -{ - return drgn_type_index_find(&prog->tindex, name, filename, - drgn_program_language(prog), ret); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_find_object(struct drgn_program *prog, const char *name, const char *filename, enum drgn_find_object_flags flags, struct drgn_object *ret) { - if (ret && ret->prog != prog) { + if (ret && drgn_object_program(ret) != prog) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "object is from wrong program"); } @@ -1150,14 +1052,9 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, Dwfl_Module *module, struct drgn_symbol *ret) { - const char *name; - GElf_Off offset; - GElf_Sym elf_sym; - if (!module) { - if (prog->_dicache) { - module = dwfl_addrmodule(prog->_dicache->dindex.dwfl, - address); + if (prog->_dbinfo) { + module = dwfl_addrmodule(prog->_dbinfo->dwfl, address); if (!module) return false; } else { @@ -1165,8 +1062,10 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, } } - name = dwfl_module_addrinfo(module, address, &offset, &elf_sym, NULL, - NULL, NULL); + GElf_Off offset; + GElf_Sym elf_sym; + const char *name = dwfl_module_addrinfo(module, address, &offset, + &elf_sym, NULL, NULL, NULL); if (!name) return false; ret->name = name; @@ -1254,8 +1153,8 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, .ret = ret, }; - if (prog->_dicache && - dwfl_getmodules(prog->_dicache->dindex.dwfl, find_symbol_by_name_cb, + if (prog->_dbinfo && + dwfl_getmodules(prog->_dbinfo->dwfl, find_symbol_by_name_cb, &arg, 0)) return arg.err; return drgn_error_format(DRGN_ERROR_LOOKUP, @@ -1288,8 +1187,8 @@ drgn_program_member_info(struct drgn_program *prog, struct drgn_type *type, struct drgn_error *err; struct drgn_member_value *member; - err = drgn_type_index_find_member(&prog->tindex, type, member_name, - strlen(member_name), &member); + err = drgn_program_find_member(prog, type, member_name, + strlen(member_name), &member); if (err) return err; diff --git a/libdrgn/program.h b/libdrgn/program.h index 7efbdcf23..81707d642 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -13,17 +13,24 @@ #define DRGN_PROGRAM_H #include +#include +#include #ifdef WITH_LIBKDUMPFILE #include #endif +#include "drgn.h" #include "hash_table.h" +#include "language.h" #include "memory_reader.h" #include "object_index.h" #include "platform.h" -#include "type_index.h" +#include "type.h" #include "vector.h" +struct drgn_debug_info; +struct drgn_symbol; + /** * @ingroup Internals * @@ -54,46 +61,81 @@ struct vmcoreinfo { bool pgtable_l5_enabled; }; +DEFINE_VECTOR_TYPE(drgn_typep_vector, struct drgn_type *) DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct string) DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct string) -struct drgn_dwarf_info_cache; -struct drgn_dwarf_index; - struct drgn_program { /** @privatesection */ - struct drgn_memory_reader reader; - struct drgn_type_index tindex; - struct drgn_object_index oindex; - struct drgn_memory_file_segment *file_segments; - /* Default language of the program. */ - const struct drgn_language *lang; + /* - * Valid iff flags & DRGN_PROGRAM_IS_LINUX_KERNEL. + * Memory/core dump. */ - struct vmcoreinfo vmcoreinfo; - /* Cached PAGE_OFFSET. */ - uint64_t page_offset; - /* Cached vmemmap. */ - uint64_t vmemmap; - /* Cached THREAD_SIZE. */ - uint64_t thread_size; + struct drgn_memory_reader reader; + /* Elf core dump or /proc/pid/mem file segments. */ + struct drgn_memory_file_segment *file_segments; + /* Elf core dump. Not valid for live programs or kdump files. */ + Elf *core; + /* File descriptor for ELF core dump, kdump file, or /proc/pid/mem. */ + int core_fd; + /* PID of live userspace program. */ + pid_t pid; #ifdef WITH_LIBKDUMPFILE kdump_ctx_t *kdump_ctx; #endif + /* - * Valid iff !(flags & DRGN_PROGRAM_IS_LIVE), unless the file - * was a kdump file. + * Types. + */ + /** Callbacks for finding types. */ + struct drgn_type_finder *type_finders; + /** Void type for each language. */ + struct drgn_type void_types[DRGN_NUM_LANGUAGES]; + /** Cache of primitive types. */ + struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; + /** Cache of deduplicated types. */ + struct drgn_dedupe_type_set dedupe_types; + /** + * List of created types that cannot be deduplicated. + * + * Complete structure, union, and class types, as well as function + * types, refer to lazily-evaluated types, so they cannot be easily + * deduplicated. + * + * Complete enumerated types could be deduplicated, but it's probably + * not worth the effort of hashing and comparing long lists of + * enumerators. + * + * All other types, including incomplete structure, union, class, and + * enumerated types, are deduplicated. + */ + struct drgn_typep_vector created_types; + /** Cache for @ref drgn_program_find_member(). */ + struct drgn_member_map members; + /** + * Set of types which have been already cached in @ref + * drgn_program::members. + */ + struct drgn_type_set members_cached; + + /* + * Debugging information. + */ + struct drgn_object_index oindex; + struct drgn_debug_info *_dbinfo; + + /* + * Program information. + */ + /* Default language of the program. */ + const struct drgn_language *lang; + struct drgn_platform platform; + bool has_platform; + enum drgn_program_flags flags; + + /* + * Stack traces. */ - Elf *core; - int core_fd; - /* - * Valid iff - * (flags & (DRGN_PROGRAM_IS_LINUX_KERNEL | DRGN_PROGRAM_IS_LIVE)) == - * DRGN_PROGRAM_IS_LIVE. - */ - pid_t pid; - struct drgn_dwarf_info_cache *_dicache; union { /* * For the Linux kernel, PRSTATUS notes indexed by CPU. See @ref @@ -109,22 +151,26 @@ struct drgn_program { /* See @ref drgn_object_stack_trace_next_thread(). */ const struct drgn_object *stack_trace_obj; uint32_t stack_trace_tid; - enum drgn_program_flags flags; - struct drgn_platform platform; - bool has_platform; - bool attached_dwfl_state; bool prstatus_cached; + bool attached_dwfl_state; + + /* + * Linux kernel-specific. + */ + struct vmcoreinfo vmcoreinfo; + /* Cached PAGE_OFFSET. */ + uint64_t page_offset; + /* Cached vmemmap. */ + uint64_t vmemmap; + /* Cached THREAD_SIZE. */ + uint64_t thread_size; + /* Page table iterator for linux_helper_read_vm(). */ + struct pgtable_iterator *pgtable_it; /* * Whether @ref drgn_program::pgtable_it is currently being used. Used * to prevent address translation from recursing. */ bool pgtable_it_in_use; - - /* Page table iterator for linux_helper_read_vm(). */ - struct pgtable_iterator *pgtable_it; - /* Cache for @ref linux_helper_task_state_to_char(). */ - char *task_state_chars; - uint64_t task_report; }; /** Initialize a @ref drgn_program. */ @@ -159,29 +205,57 @@ struct drgn_error *drgn_program_init_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid); -static inline bool drgn_program_is_little_endian(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_is_little_endian(struct drgn_program *prog, bool *ret) { - assert(prog->has_platform); - return prog->platform.flags & DRGN_PLATFORM_IS_LITTLE_ENDIAN; + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program byte order is not known"); + } + *ret = prog->platform.flags & DRGN_PLATFORM_IS_LITTLE_ENDIAN; + return NULL; } /** * Return whether a @ref drgn_program has a different endianness than the host * system. */ -static inline bool drgn_program_bswap(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_bswap(struct drgn_program *prog, bool *ret) +{ + bool is_little_endian; + struct drgn_error *err = drgn_program_is_little_endian(prog, + &is_little_endian); + if (err) + return err; + *ret = is_little_endian != (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__); + return NULL; +} + +static inline struct drgn_error * +drgn_program_is_64_bit(struct drgn_program *prog, bool *ret) { - return (drgn_program_is_little_endian(prog) != - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)); + if (!prog->has_platform) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "program word size is not known"); + } + *ret = prog->platform.flags & DRGN_PLATFORM_IS_64_BIT; + return NULL; } -static inline bool drgn_program_is_64_bit(struct drgn_program *prog) +static inline struct drgn_error * +drgn_program_word_size(struct drgn_program *prog, uint8_t *ret) { - assert(prog->has_platform); - return prog->platform.flags & DRGN_PLATFORM_IS_64_BIT; + bool is_64_bit; + struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); + if (err) + return err; + *ret = is_64_bit ? 8 : 4; + return NULL; } -struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret); +struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, + struct drgn_debug_info **ret); /** * Find the @c NT_PRSTATUS note for the given CPU. diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 0dd47174e..ee31e3afa 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -6,12 +6,15 @@ #define PY_SSIZE_T_CLEAN -#include +// IWYU pragma: begin_exports #include #include "structmember.h" #include "docstrings.h" #include "../drgn.h" +// IWYU pragma: end_exports + +#include "../hash_table.h" #include "../program.h" /* These were added in Python 3.7. */ @@ -42,28 +45,14 @@ typedef struct { } DrgnObject; typedef struct { - PyObject_VAR_HEAD + PyObject_HEAD + struct drgn_type *type; enum drgn_qualifiers qualifiers; /* - * This serves two purposes: it caches attributes which were previously - * converted from a struct drgn_type member, and it keeps a reference to - * any objects which are referenced internally by _type. For example, in - * order to avoid doing a strdup(), we can set the name of a type - * directly to PyUnicode_AsUTF8(s). This is only valid as long as s is - * alive, so we store it here. + * Cache of attributes which were previously converted from a struct + * drgn_type member or used to create the type. */ PyObject *attr_cache; - /* - * A Type object can wrap a struct drgn_type created elsewhere, or it - * can have an embedded struct drgn_type. In the latter case, type - * points to _type. - */ - struct drgn_type *type; - union { - struct drgn_type _type[0]; - /* An object which must be kept alive for type to be valid. */ - PyObject *parent; - }; } DrgnType; typedef struct { @@ -88,11 +77,17 @@ typedef struct { struct drgn_platform *platform; } Platform; +DEFINE_HASH_SET_TYPE(pyobjectp_set, PyObject *) + typedef struct { PyObject_HEAD struct drgn_program prog; - PyObject *objects; PyObject *cache; + /* + * Set of objects that we need to hold a reference to during the + * lifetime of the Program. + */ + struct pyobjectp_set objects; } Program; typedef struct { @@ -119,38 +114,31 @@ typedef struct { PyObject *value; } TypeEnumerator; -/* - * LazyType.obj is a tagged pointer to a PyObject. If the - * DRGNPY_LAZY_TYPE_UNEVALUATED flag is unset, then LazyType.obj is the - * evaluated Type. If it is set and LazyType.lazy_type is set, then LazyType.obj - * is the parent Type and LazyType.lazy_type must be evaluated and wrapped. If - * the flag is set and LazyType.lazy_type is not set, then LazyType.obj is a - * Python callable that should return the Type. - */ -enum { - DRGNPY_LAZY_TYPE_UNEVALUATED = 1, - DRGNPY_LAZY_TYPE_MASK = ~(uintptr_t)1, -}; -static_assert(alignof(PyObject) >= 2, "PyObject is not aligned"); - -#define LazyType_HEAD \ - PyObject_HEAD \ - uintptr_t obj; \ - struct drgn_lazy_type *lazy_type; - typedef struct { - LazyType_HEAD + PyObject_HEAD + enum { + /* obj is the evaluated Type. */ + DRGNPY_LAZY_TYPE_EVALUATED, + /* lazy_type must be evaluated and wrapped. */ + DRGNPY_LAZY_TYPE_UNEVALUATED, + /* obj is a Python callable that should return the Type. */ + DRGNPY_LAZY_TYPE_CALLABLE, + } state; + union { + PyObject *obj; + struct drgn_lazy_type *lazy_type; + }; } LazyType; typedef struct { - LazyType_HEAD + LazyType lazy_type; PyObject *name; PyObject *bit_offset; PyObject *bit_field_size; } TypeMember; typedef struct { - LazyType_HEAD + LazyType lazy_type; PyObject *name; } TypeParameter; @@ -205,7 +193,7 @@ static inline DrgnObject *DrgnObject_alloc(Program *prog) } static inline Program *DrgnObject_prog(DrgnObject *obj) { - return container_of(obj->obj.prog, Program, prog); + return container_of(drgn_object_program(&obj->obj), Program, prog); } PyObject *DrgnObject_NULL(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *cast(PyObject *self, PyObject *args, PyObject *kwds); @@ -215,6 +203,8 @@ DrgnObject *DrgnObject_container_of(PyObject *self, PyObject *args, PyObject *Platform_wrap(const struct drgn_platform *platform); +int Program_hold_object(Program *prog, PyObject *obj); +bool Program_hold_reserve(Program *prog, size_t n); int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, struct drgn_qualified_type *ret); Program *program_from_core_dump(PyObject *self, PyObject *args, PyObject *kwds); @@ -223,29 +213,24 @@ Program *program_from_pid(PyObject *self, PyObject *args, PyObject *kwds); PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog); -static inline PyObject *DrgnType_parent(DrgnType *type) +static inline Program *DrgnType_prog(DrgnType *type) { - if (type->type == type->_type) - return (PyObject *)type; - else - return type->parent; + return container_of(drgn_type_program(type->type), Program, prog); } -PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type, - PyObject *parent); -int qualifiers_converter(PyObject *arg, void *result); -DrgnType *void_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *struct_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *union_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *class_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *typedef_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *pointer_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *array_type(PyObject *self, PyObject *args, PyObject *kwds); -DrgnType *function_type(PyObject *self, PyObject *args, PyObject *kwds); +PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type); +DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_float_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_complex_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_struct_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_union_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_class_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_enum_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_typedef_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_pointer_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds); +DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds); int append_string(PyObject *parts, const char *s); int append_format(PyObject *parts, const char *format, ...); diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 433e78310..3e38bb90c 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -3,6 +3,7 @@ #include "drgnpy.h" #include "../helpers.h" +#include "../program.h" PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds) @@ -141,7 +142,7 @@ static int prog_or_pid_ns_converter(PyObject *o, void *p) DrgnObject *drgnpy_linux_helper_find_pid(PyObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"ns", "pid", NULL}; + static char *keywords[] = {"prog_or_ns", "pid", NULL}; struct drgn_error *err; struct prog_or_ns_arg prog_or_ns; struct index_arg pid = {}; @@ -219,24 +220,6 @@ DrgnObject *drgnpy_linux_helper_find_task(PyObject *self, PyObject *args, return res; } -PyObject *drgnpy_linux_helper_task_state_to_char(PyObject *self, PyObject *args, - PyObject *kwds) -{ - static char *keywords[] = {"task", NULL}; - struct drgn_error *err; - DrgnObject *task; - char c; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:task_state_to_char", - keywords, &DrgnObject_type, &task)) - return NULL; - - err = linux_helper_task_state_to_char(&task->obj, &c); - if (err) - return set_drgn_error(err); - return PyUnicode_FromStringAndSize(&c, 1); -} - PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *args, PyObject *kwds) diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 43f182213..d79b939ff 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,12 +1,13 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "drgnpy.h" -#include "../internal.h" #ifdef WITH_KDUMPFILE #include #endif +#include "drgnpy.h" +#include "../path.h" + PyObject *MissingDebugInfoError; PyObject *OutOfBoundsError; @@ -88,33 +89,6 @@ static PyMethodDef drgn_methods[] = { METH_NOARGS, drgn_program_from_kernel_DOC}, {"program_from_pid", (PyCFunction)program_from_pid, METH_VARARGS | METH_KEYWORDS, drgn_program_from_pid_DOC}, - {"void_type", (PyCFunction)void_type, METH_VARARGS | METH_KEYWORDS, - drgn_void_type_DOC}, - {"int_type", (PyCFunction)int_type, METH_VARARGS | METH_KEYWORDS, - drgn_int_type_DOC}, - {"bool_type", (PyCFunction)bool_type, METH_VARARGS | METH_KEYWORDS, - drgn_bool_type_DOC}, - {"float_type", (PyCFunction)float_type, METH_VARARGS | METH_KEYWORDS, - drgn_float_type_DOC}, - {"complex_type", (PyCFunction)complex_type, - METH_VARARGS | METH_KEYWORDS, drgn_complex_type_DOC}, - {"struct_type", (PyCFunction)struct_type, METH_VARARGS | METH_KEYWORDS, - drgn_struct_type_DOC}, - {"union_type", (PyCFunction)union_type, METH_VARARGS | METH_KEYWORDS, - drgn_union_type_DOC}, - {"class_type", (PyCFunction)class_type, METH_VARARGS | METH_KEYWORDS, - drgn_class_type_DOC}, - {"enum_type", (PyCFunction)enum_type, METH_VARARGS | METH_KEYWORDS, - drgn_enum_type_DOC}, - {"typedef_type", (PyCFunction)typedef_type, - METH_VARARGS | METH_KEYWORDS, - drgn_typedef_type_DOC}, - {"pointer_type", (PyCFunction)pointer_type, - METH_VARARGS | METH_KEYWORDS, drgn_pointer_type_DOC}, - {"array_type", (PyCFunction)array_type, METH_VARARGS | METH_KEYWORDS, - drgn_array_type_DOC}, - {"function_type", (PyCFunction)function_type, - METH_VARARGS | METH_KEYWORDS, drgn_function_type_DOC}, {"_linux_helper_read_vm", (PyCFunction)drgnpy_linux_helper_read_vm, METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", @@ -128,9 +102,6 @@ static PyMethodDef drgn_methods[] = { METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_find_task", (PyCFunction)drgnpy_linux_helper_find_task, METH_VARARGS | METH_KEYWORDS}, - {"_linux_helper_task_state_to_char", - (PyCFunction)drgnpy_linux_helper_task_state_to_char, - METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_kaslr_offset", (PyCFunction)drgnpy_linux_helper_kaslr_offset, METH_VARARGS | METH_KEYWORDS}, @@ -148,6 +119,66 @@ static struct PyModuleDef drgnmodule = { drgn_methods, }; +/* + * These are for type checking and aren't strictly required at runtime, but + * adding them anyways results in better pydoc output and saves us from fiddling + * with typing.TYPE_CHECKING/forward references. + */ +static int add_type_aliases(PyObject *m) +{ + /* + * This should be a subclass of typing.Protocol, but that is only + * available since Python 3.8. + */ + PyObject *IntegerLike = PyType_FromSpec(&(PyType_Spec){ + .name = "_drgn.IntegerLike", + .flags = Py_TPFLAGS_DEFAULT, + .slots = (PyType_Slot []){{0, NULL}}, + }); + if (!IntegerLike) + return -1; + if (PyModule_AddObject(m, "IntegerLike", IntegerLike) == -1) { + Py_DECREF(IntegerLike); + return -1; + } + + PyObject *os_module = PyImport_ImportModule("os"); + if (!os_module) + return -1; + PyObject *os_PathLike = PyObject_GetAttrString(os_module, "PathLike"); + Py_DECREF(os_module); + if (!os_PathLike) + return -1; + PyObject *item = Py_BuildValue("OOO", &PyUnicode_Type, &PyBytes_Type, + os_PathLike); + Py_DECREF(os_PathLike); + if (!item) + return -1; + + PyObject *typing_module = PyImport_ImportModule("typing"); + if (!typing_module) { + Py_DECREF(item); + return -1; + } + PyObject *typing_Union = PyObject_GetAttrString(typing_module, "Union"); + Py_DECREF(typing_module); + if (!typing_Union) { + Py_DECREF(item); + return -1; + } + + PyObject *Path = PyObject_GetItem(typing_Union, item); + Py_DECREF(typing_Union); + Py_DECREF(item); + if (!Path) + return -1; + if (PyModule_AddObject(m, "Path", Path) == -1) { + Py_DECREF(Path); + return -1; + } + return 0; +} + DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) { PyObject *m; @@ -158,7 +189,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) if (!m) return NULL; - if (add_module_constants(m) == -1) + if (add_module_constants(m) == -1 || add_type_aliases(m) == -1) goto err; FaultError_type.tp_base = (PyTypeObject *)PyExc_Exception; diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 117c99c61..73a81a69d 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -1,13 +1,16 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include +#include +#include #include "drgnpy.h" #include "../error.h" #include "../object.h" +#include "../program.h" #include "../serialize.h" #include "../type.h" +#include "../util.h" static int DrgnObject_literal(struct drgn_object *res, PyObject *literal) { @@ -312,7 +315,8 @@ static int buffer_object_from_value(struct drgn_object *res, uint64_t bit_size, size; char *buf; - err = drgn_byte_order_to_little_endian(res->prog, byte_order, + err = drgn_byte_order_to_little_endian(drgn_object_program(res), + byte_order, &value.little_endian); if (err) { set_drgn_error(err); @@ -354,9 +358,9 @@ static int buffer_object_from_value(struct drgn_object *res, memset(buf, 0, size); value.bit_offset = bit_offset; - if (serialize_py_object(res->prog, buf, bit_offset + bit_size, - bit_offset, value_obj, &type, - value.little_endian) == -1) { + if (serialize_py_object(drgn_object_program(res), buf, + bit_offset + bit_size, bit_offset, value_obj, + &type, value.little_endian) == -1) { if (buf != value.ibuf) free(buf); return -1; @@ -590,7 +594,7 @@ static PyObject *DrgnObject_compound_value(struct drgn_object *obj, if (!dict) return NULL; - drgn_object_init(&member, obj->prog); + drgn_object_init(&member, drgn_object_program(obj)); members = drgn_type_members(underlying_type); num_members = drgn_type_num_members(underlying_type); for (i = 0; i < num_members; i++) { @@ -662,7 +666,7 @@ static PyObject *DrgnObject_array_value(struct drgn_object *obj, if (!list) return NULL; - drgn_object_init(&element, obj->prog); + drgn_object_init(&element, drgn_object_program(obj)); for (i = 0; i < length; i++) { PyObject *element_value; @@ -866,16 +870,28 @@ static PyObject *DrgnObject_repr(DrgnObject *self) Py_DECREF(tmp); } - if ((self->obj.is_reference || self->obj.kind == DRGN_OBJECT_BUFFER) && - self->obj.prog->has_platform) { + if (self->obj.is_reference || self->obj.kind == DRGN_OBJECT_BUFFER) { bool little_endian; - if (self->obj.is_reference) little_endian = self->obj.reference.little_endian; else little_endian = self->obj.value.little_endian; - if (little_endian != - drgn_program_is_little_endian(self->obj.prog) && + + bool print_byteorder; + if (drgn_object_program(&self->obj)->has_platform) { + bool prog_little_endian; + err = drgn_program_is_little_endian(drgn_object_program(&self->obj), + &prog_little_endian); + if (err) { + set_drgn_error(err); + goto out; + } + print_byteorder = little_endian != prog_little_endian; + } else { + print_byteorder = true; + } + + if (print_byteorder && append_format(parts, ", byteorder='%s'", little_endian ? "little" : "big") == -1) goto out; @@ -1025,8 +1041,7 @@ static Program *DrgnObject_get_prog(DrgnObject *self, void *arg) static PyObject *DrgnObject_get_type(DrgnObject *self, void *arg) { - return DrgnType_wrap(drgn_object_qualified_type(&self->obj), - (PyObject *)DrgnObject_prog(self)); + return DrgnType_wrap(drgn_object_qualified_type(&self->obj)); } static PyObject *DrgnObject_get_address(DrgnObject *self, void *arg) @@ -1075,53 +1090,55 @@ static int DrgnObject_binary_operand(PyObject *self, PyObject *other, } else { *obj = tmp; /* If self isn't a DrgnObject, then other must be. */ - drgn_object_init(tmp, ((DrgnObject *)other)->obj.prog); + drgn_object_init(tmp, + drgn_object_program(&((DrgnObject *)other)->obj)); return DrgnObject_literal(tmp, self); } } -#define DrgnObject_BINARY_OP(op) \ -static PyObject *DrgnObject_##op(PyObject *left, PyObject *right) \ -{ \ - struct drgn_error *err; \ - struct drgn_object *lhs, lhs_tmp, *rhs, rhs_tmp; \ - DrgnObject *res = NULL; \ - int ret; \ - \ - ret = DrgnObject_binary_operand(left, right, &lhs, &lhs_tmp); \ - if (ret) \ - goto out; \ - ret = DrgnObject_binary_operand(right, left, &rhs, &rhs_tmp); \ - if (ret) \ - goto out_lhs; \ - \ - res = DrgnObject_alloc(container_of(lhs->prog, Program, prog)); \ - if (!res) { \ - ret = -1; \ - goto out_rhs; \ - } \ - \ - err = drgn_object_##op(&res->obj, lhs, rhs); \ - if (err) { \ - set_drgn_error(err); \ - Py_DECREF(res); \ - ret = -1; \ - goto out_rhs; \ - } \ - \ -out_rhs: \ - if (rhs == &rhs_tmp) \ - drgn_object_deinit(&rhs_tmp); \ -out_lhs: \ - if (lhs == &lhs_tmp) \ - drgn_object_deinit(&lhs_tmp); \ -out: \ - if (ret == -1) \ - return NULL; \ - else if (ret) \ - Py_RETURN_NOTIMPLEMENTED; \ - else \ - return (PyObject *)res; \ +#define DrgnObject_BINARY_OP(op) \ +static PyObject *DrgnObject_##op(PyObject *left, PyObject *right) \ +{ \ + struct drgn_error *err; \ + struct drgn_object *lhs, lhs_tmp, *rhs, rhs_tmp; \ + DrgnObject *res = NULL; \ + int ret; \ + \ + ret = DrgnObject_binary_operand(left, right, &lhs, &lhs_tmp); \ + if (ret) \ + goto out; \ + ret = DrgnObject_binary_operand(right, left, &rhs, &rhs_tmp); \ + if (ret) \ + goto out_lhs; \ + \ + res = DrgnObject_alloc(container_of(drgn_object_program(lhs), Program, \ + prog)); \ + if (!res) { \ + ret = -1; \ + goto out_rhs; \ + } \ + \ + err = drgn_object_##op(&res->obj, lhs, rhs); \ + if (err) { \ + set_drgn_error(err); \ + Py_DECREF(res); \ + ret = -1; \ + goto out_rhs; \ + } \ + \ +out_rhs: \ + if (rhs == &rhs_tmp) \ + drgn_object_deinit(&rhs_tmp); \ +out_lhs: \ + if (lhs == &lhs_tmp) \ + drgn_object_deinit(&lhs_tmp); \ +out: \ + if (ret == -1) \ + return NULL; \ + else if (ret) \ + Py_RETURN_NOTIMPLEMENTED; \ + else \ + return (PyObject *)res; \ } DrgnObject_BINARY_OP(add) DrgnObject_BINARY_OP(sub) diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 479654551..bd9948354 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -2,31 +2,26 @@ // SPDX-License-Identifier: GPL-3.0+ #include "drgnpy.h" +#include "../hash_table.h" +#include "../program.h" #include "../vector.h" +#include "../util.h" -static int Program_hold_object(Program *prog, PyObject *obj) -{ - PyObject *key; - int ret; +DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, hash_pair_ptr_type, + hash_table_scalar_eq) - key = PyLong_FromVoidPtr(obj); - if (!key) +int Program_hold_object(Program *prog, PyObject *obj) +{ + if (pyobjectp_set_insert(&prog->objects, &obj, NULL) == -1) return -1; - - ret = PyDict_SetItem(prog->objects, key, obj); - Py_DECREF(key); - return ret; + Py_INCREF(obj); + return 0; } -static int Program_hold_type(Program *prog, DrgnType *type) +bool Program_hold_reserve(Program *prog, size_t n) { - PyObject *parent; - - parent = DrgnType_parent(type); - if (parent && parent != (PyObject *)prog) - return Program_hold_object(prog, parent); - else - return 0; + return pyobjectp_set_reserve(&prog->objects, + pyobjectp_set_size(&prog->objects) + n); } int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, @@ -35,8 +30,11 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, struct drgn_error *err; if (PyObject_TypeCheck(type_obj, &DrgnType_type)) { - if (Program_hold_type(prog, (DrgnType *)type_obj) == -1) + if (DrgnType_prog((DrgnType *)type_obj) != prog) { + PyErr_SetString(PyExc_ValueError, + "type is from different program"); return -1; + } ret->type = ((DrgnType *)type_obj)->type; ret->qualifiers = ((DrgnType *)type_obj)->qualifiers; } else if (PyUnicode_Check(type_obj)) { @@ -66,15 +64,13 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, static Program *Program_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"platform", NULL}; - PyObject *platform_obj = NULL, *objects, *cache; - struct drgn_platform *platform; - Program *prog; - + static char *keywords[] = { "platform", NULL }; + PyObject *platform_obj = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:Program", keywords, &platform_obj)) return NULL; + struct drgn_platform *platform; if (!platform_obj || platform_obj == Py_None) { platform = NULL; } else if (PyObject_TypeCheck(platform_obj, &Platform_type)) { @@ -85,22 +81,17 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, return NULL; } - objects = PyDict_New(); - if (!objects) - return NULL; - - cache = PyDict_New(); + PyObject *cache = PyDict_New(); if (!cache) return NULL; - prog = (Program *)Program_type.tp_alloc(&Program_type, 0); + Program *prog = (Program *)Program_type.tp_alloc(&Program_type, 0); if (!prog) { Py_DECREF(cache); - Py_DECREF(objects); return NULL; } - prog->objects = objects; prog->cache = cache; + pyobjectp_set_init(&prog->objects); drgn_program_init(&prog->prog, platform); return prog; } @@ -108,21 +99,33 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, static void Program_dealloc(Program *self) { drgn_program_deinit(&self->prog); - Py_XDECREF(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_DECREF(*it.entry); + pyobjectp_set_deinit(&self->objects); Py_XDECREF(self->cache); Py_TYPE(self)->tp_free((PyObject *)self); } static int Program_traverse(Program *self, visitproc visit, void *arg) { - Py_VISIT(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_VISIT(*it.entry); Py_VISIT(self->cache); return 0; } static int Program_clear(Program *self) { - Py_CLEAR(self->objects); + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&self->objects); it.entry; + it = pyobjectp_set_next(it)) + Py_DECREF(*it.entry); + pyobjectp_set_deinit(&self->objects); + pyobjectp_set_init(&self->objects); Py_CLEAR(self->cache); return 0; } @@ -239,8 +242,15 @@ static struct drgn_error *py_type_find_fn(enum drgn_type_kind kind, err = drgn_error_from_python(); goto out_type_obj; } - if (Program_hold_type((Program *)PyTuple_GET_ITEM(arg, 0), - (DrgnType *)type_obj) == -1) { + /* + * This check is also done in libdrgn, but we need it here because if + * the type isn't from this program, then there's no guarantee that it + * will remain valid after we decrement its reference count. + */ + if (DrgnType_prog((DrgnType *)type_obj) != + (Program *)PyTuple_GET_ITEM(arg, 0)) { + PyErr_SetString(PyExc_ValueError, + "type find callback returned type from wrong program"); err = drgn_error_from_python(); goto out_type_obj; } @@ -594,36 +604,7 @@ static PyObject *Program_find_type(Program *self, PyObject *args, PyObject *kwds path_cleanup(&filename); if (err) return set_drgn_error(err); - return DrgnType_wrap(qualified_type, (PyObject *)self); -} - -static PyObject *Program_pointer_type(Program *self, PyObject *args, - PyObject *kwds) -{ - static char *keywords[] = {"type", "qualifiers", "language", NULL}; - struct drgn_error *err; - PyObject *referenced_type_obj; - struct drgn_qualified_type referenced_type; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - struct drgn_qualified_type qualified_type; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&$O&:pointer_type", - keywords, &referenced_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) - return NULL; - - if (Program_type_arg(self, referenced_type_obj, false, - &referenced_type) == -1) - return NULL; - - err = drgn_type_index_pointer_type(&self->prog.tindex, referenced_type, - language, &qualified_type.type); - if (err) - return set_drgn_error(err); - qualified_type.qualifiers = qualifiers; - return DrgnType_wrap(qualified_type, (PyObject *)self); + return DrgnType_wrap(qualified_type); } static DrgnObject *Program_find_object(Program *self, const char *name, @@ -659,7 +640,6 @@ static DrgnObject *Program_object(Program *self, PyObject *args, struct enum_arg flags = { .type = FindObjectFlags_class, .value = DRGN_FIND_OBJECT_ANY, - .allow_none = true, }; struct path_arg filename = {.allow_none = true}; @@ -912,8 +892,6 @@ static PyMethodDef Program_methods[] = { #undef METHOD_READ_U {"type", (PyCFunction)Program_find_type, METH_VARARGS | METH_KEYWORDS, drgn_Program_type_DOC}, - {"pointer_type", (PyCFunction)Program_pointer_type, - METH_VARARGS | METH_KEYWORDS, drgn_Program_pointer_type_DOC}, {"object", (PyCFunction)Program_object, METH_VARARGS | METH_KEYWORDS, drgn_Program_object_DOC}, {"constant", (PyCFunction)Program_constant, @@ -926,6 +904,32 @@ static PyMethodDef Program_methods[] = { METH_VARARGS | METH_KEYWORDS, drgn_Program_stack_trace_DOC}, {"symbol", (PyCFunction)Program_symbol, METH_O, drgn_Program_symbol_DOC}, + {"void_type", (PyCFunction)Program_void_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_void_type_DOC}, + {"int_type", (PyCFunction)Program_int_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_int_type_DOC}, + {"bool_type", (PyCFunction)Program_bool_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_bool_type_DOC}, + {"float_type", (PyCFunction)Program_float_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_float_type_DOC}, + {"complex_type", (PyCFunction)Program_complex_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_complex_type_DOC}, + {"struct_type", (PyCFunction)Program_struct_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_struct_type_DOC}, + {"union_type", (PyCFunction)Program_union_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_union_type_DOC}, + {"class_type", (PyCFunction)Program_class_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_class_type_DOC}, + {"enum_type", (PyCFunction)Program_enum_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_enum_type_DOC}, + {"typedef_type", (PyCFunction)Program_typedef_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_typedef_type_DOC}, + {"pointer_type", (PyCFunction)Program_pointer_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_pointer_type_DOC}, + {"array_type", (PyCFunction)Program_array_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_array_type_DOC}, + {"function_type", (PyCFunction)Program_function_type, + METH_VARARGS | METH_KEYWORDS, drgn_Program_function_type_DOC}, {}, }; diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index 8fc2bfa26..2205b74c5 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -1,6 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog) diff --git a/libdrgn/python/test.c b/libdrgn/python/test.c index 2077a31ed..264d8b020 100644 --- a/libdrgn/python/test.c +++ b/libdrgn/python/test.c @@ -10,9 +10,8 @@ */ #include "drgnpy.h" - -#include "../internal.h" #include "../lexer.h" +#include "../path.h" #include "../serialize.h" DRGNPY_PUBLIC void drgn_test_lexer_init(struct drgn_lexer *lexer, diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 649dcfff2..946f108ca 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -1,140 +1,52 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" +#include "../program.h" #include "../type.h" +#include "../util.h" static const char *drgn_type_kind_str(struct drgn_type *type) { return drgn_type_kind_spelling[drgn_type_kind(type)]; } -static DrgnType *DrgnType_new(enum drgn_qualifiers qualifiers) -{ - DrgnType *type_obj; - - type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, 1); - if (!type_obj) - return NULL; - type_obj->qualifiers = qualifiers; - type_obj->attr_cache = PyDict_New(); - if (!type_obj->attr_cache) { - Py_DECREF(type_obj); - return NULL; - } - type_obj->type = type_obj->_type; - return type_obj; -} - -DRGNPY_PUBLIC PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type, - PyObject *parent) +DRGNPY_PUBLIC PyObject *DrgnType_wrap(struct drgn_qualified_type qualified_type) { - DrgnType *type_obj; - - type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, 0); + DrgnType *type_obj = (DrgnType *)DrgnType_type.tp_alloc(&DrgnType_type, + 0); if (!type_obj) return NULL; + type_obj->type = qualified_type.type; type_obj->qualifiers = qualified_type.qualifiers; + Py_INCREF(DrgnType_prog(type_obj)); type_obj->attr_cache = PyDict_New(); if (!type_obj->attr_cache) { Py_DECREF(type_obj); return NULL; } - type_obj->type = qualified_type.type; - if (parent) { - Py_INCREF(parent); - type_obj->parent = parent; - } return (PyObject *)type_obj; } -static DrgnType *LazyType_get_borrowed(LazyType *self) -{ - if (unlikely(self->obj & DRGNPY_LAZY_TYPE_UNEVALUATED)) { - PyObject *obj; - PyObject *type; - - obj = (PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK); - if (self->lazy_type) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - bool clear = false; - - /* Avoid the thread state overhead if we can. */ - if (!drgn_lazy_type_is_evaluated(self->lazy_type)) - clear = set_drgn_in_python(); - err = drgn_lazy_type_evaluate(self->lazy_type, - &qualified_type); - if (clear) - clear_drgn_in_python(); - if (err) { - set_drgn_error(err); - return NULL; - } - type = DrgnType_wrap(qualified_type, obj); - if (!type) - return NULL; - } else { - type = PyObject_CallObject(obj, NULL); - if (!type) - return NULL; - if (!PyObject_TypeCheck(type, &DrgnType_type)) { - Py_DECREF(type); - PyErr_SetString(PyExc_TypeError, - "type callable must return Type"); - return NULL; - } - } - Py_DECREF(obj); - self->obj = (uintptr_t)type; - } - return (DrgnType *)self->obj; -} - -static DrgnType *LazyType_get(LazyType *self, void *arg) -{ - DrgnType *ret; - - ret = LazyType_get_borrowed(self); - Py_XINCREF(ret); - return ret; -} - -struct py_type_thunk { - struct drgn_type_thunk thunk; - LazyType *lazy_type; -}; - -static struct drgn_error * -py_type_thunk_evaluate_fn(struct drgn_type_thunk *thunk, - struct drgn_qualified_type *ret) +static inline struct drgn_qualified_type DrgnType_unwrap(DrgnType *type) { - struct py_type_thunk *t = container_of(thunk, struct py_type_thunk, thunk); - PyGILState_STATE gstate; - struct drgn_error *err = NULL; - DrgnType *type; - - gstate = PyGILState_Ensure(); - type = LazyType_get_borrowed(t->lazy_type); - if (!type) { - err = drgn_error_from_python(); - goto out; - } - ret->type = type->type; - ret->qualifiers = type->qualifiers; -out: - PyGILState_Release(gstate); - return err; + return (struct drgn_qualified_type){ + .type = type->type, + .qualifiers = type->qualifiers, + }; } -static void py_type_thunk_free_fn(struct drgn_type_thunk *thunk) +static PyObject *DrgnType_get_ptr(DrgnType *self, void *arg) { - free(container_of(thunk, struct py_type_thunk, thunk)); + return PyLong_FromVoidPtr(self->type); } -static PyObject *DrgnType_get_ptr(DrgnType *self, void *arg) +static Program *DrgnType_get_prog(DrgnType *self, void *arg) { - return PyLong_FromVoidPtr(self->type); + Py_INCREF(DrgnType_prog(self)); + return DrgnType_prog(self); } static PyObject *DrgnType_get_kind(DrgnType *self) @@ -232,12 +144,10 @@ static PyObject *DrgnType_get_type(DrgnType *self) drgn_type_kind_str(self->type)); } if (drgn_type_kind(self->type) == DRGN_TYPE_ENUM && - !drgn_type_is_complete(self->type)) { + !drgn_type_is_complete(self->type)) Py_RETURN_NONE; - } else { - return DrgnType_wrap(drgn_type_type(self->type), - (PyObject *)self); - } + else + return DrgnType_wrap(drgn_type_type(self->type)); } static PyObject *DrgnType_get_members(DrgnType *self) @@ -270,9 +180,8 @@ static PyObject *DrgnType_get_members(DrgnType *self) if (!item) goto err; PyTuple_SET_ITEM(members_obj, i, (PyObject *)item); - Py_INCREF(self); - item->obj = (uintptr_t)self | DRGNPY_LAZY_TYPE_UNEVALUATED; - item->lazy_type = &member->type; + item->lazy_type.state = DRGNPY_LAZY_TYPE_UNEVALUATED; + item->lazy_type.lazy_type = &member->type; if (member->name) { item->name = PyUnicode_FromString(member->name); if (!item->name) @@ -370,9 +279,8 @@ static PyObject *DrgnType_get_parameters(DrgnType *self) if (!item) goto err; PyTuple_SET_ITEM(parameters_obj, i, (PyObject *)item); - Py_INCREF(self); - item->obj = (uintptr_t)self | DRGNPY_LAZY_TYPE_UNEVALUATED; - item->lazy_type = ¶meter->type; + item->lazy_type.state = DRGNPY_LAZY_TYPE_UNEVALUATED; + item->lazy_type.lazy_type = ¶meter->type; if (parameter->name) { item->name = PyUnicode_FromString(parameter->name); if (!item->name) @@ -452,6 +360,7 @@ static PyGetSetDef DrgnType_getset[] = { "This is used for testing.\n" "\n" ":vartype: int"}, + {"prog", (getter)DrgnType_get_prog, NULL, drgn_Type_prog_DOC}, {"kind", (getter)DrgnType_getter, NULL, drgn_Type_kind_DOC, &DrgnType_attr_kind}, {"primitive", (getter)DrgnType_getter, NULL, drgn_Type_primitive_DOC, @@ -483,103 +392,32 @@ static PyGetSetDef DrgnType_getset[] = { {}, }; -static int type_arg(PyObject *arg, struct drgn_qualified_type *qualified_type, - DrgnType *type_obj) -{ - Py_INCREF(arg); - if (!PyObject_IsInstance(arg, (PyObject *)&DrgnType_type)) { - Py_DECREF(arg); - PyErr_SetString(PyExc_TypeError, "type must be Type"); - return -1; - } - - if (type_obj) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_type.id, arg) == -1) { - Py_DECREF(arg); - return -1; - } - } - qualified_type->type = ((DrgnType *)arg)->type; - qualified_type->qualifiers = ((DrgnType *)arg)->qualifiers; - Py_DECREF(arg); - return 0; -} - -static int lazy_type_from_py(struct drgn_lazy_type *lazy_type, LazyType *obj) -{ - if (obj->obj & DRGNPY_LAZY_TYPE_UNEVALUATED) { - struct py_type_thunk *thunk; - - thunk = malloc(sizeof(*thunk)); - if (!thunk) { - PyErr_NoMemory(); - return -1; - } - thunk->thunk.evaluate_fn = py_type_thunk_evaluate_fn; - thunk->thunk.free_fn = py_type_thunk_free_fn; - thunk->lazy_type = obj; - drgn_lazy_type_init_thunk(lazy_type, &thunk->thunk); - } else { - DrgnType *type = (DrgnType *)obj->obj; - - drgn_lazy_type_init_evaluated(lazy_type, type->type, - type->qualifiers); - } - return 0; -} - static void DrgnType_dealloc(DrgnType *self) { - if (self->type != self->_type) { - Py_XDECREF(self->parent); - } else if (drgn_type_is_complete(self->type)) { - if (drgn_type_has_members(self->type)) { - struct drgn_type_member *members; - size_t num_members, i; - - members = drgn_type_members(self->type); - num_members = drgn_type_num_members(self->type); - for (i = 0; i < num_members; i++) - drgn_type_member_deinit(&members[i]); - free(members); - } - if (drgn_type_has_parameters(self->type)) { - struct drgn_type_parameter *parameters; - size_t num_parameters, i; - - parameters = drgn_type_parameters(self->type); - num_parameters = drgn_type_num_parameters(self->type); - for (i = 0; i < num_parameters; i++) - drgn_type_parameter_deinit(¶meters[i]); - free(parameters); - } - if (drgn_type_has_enumerators(self->type)) - free(drgn_type_enumerators(self->type)); - } Py_XDECREF(self->attr_cache); + if (self->type) + Py_DECREF(DrgnType_prog(self)); Py_TYPE(self)->tp_free((PyObject *)self); } static int DrgnType_traverse(DrgnType *self, visitproc visit, void *arg) { - if (self->type != self->_type) - Py_VISIT(self->parent); Py_VISIT(self->attr_cache); + if (self->type) + Py_VISIT(DrgnType_prog(self)); return 0; } static int DrgnType_clear(DrgnType *self) { - if (self->type != self->_type) - Py_CLEAR(self->parent); Py_CLEAR(self->attr_cache); + if (self->type) { + Py_DECREF(DrgnType_prog(self)); + self->type = NULL; + } return 0; } -#undef visit_type_thunks -#undef visit_lazy_type - static int append_field(PyObject *parts, bool *first, const char *format, ...) { va_list ap; @@ -677,7 +515,7 @@ static PyObject *DrgnType_repr(DrgnType *self) if (!parts) return NULL; - if (append_format(parts, "%s_type(", + if (append_format(parts, "prog.%s_type(", drgn_type_kind_str(self->type)) == -1) goto out; if (append_member(parts, self, &first, name) == -1) @@ -694,14 +532,34 @@ static PyObject *DrgnType_repr(DrgnType *self) goto join; } - if (append_member(parts, self, &first, size) == -1) - goto out_repr_leave; - if (append_member(parts, self, &first, length) == -1) + if (drgn_type_kind(self->type) != DRGN_TYPE_POINTER && + append_member(parts, self, &first, size) == -1) goto out_repr_leave; if (append_member(parts, self, &first, is_signed) == -1) goto out_repr_leave; if (append_member(parts, self, &first, type) == -1) goto out_repr_leave; + if (drgn_type_kind(self->type) == DRGN_TYPE_POINTER) { + bool print_size; + if (drgn_type_program(self->type)->has_platform) { + uint8_t word_size; + struct drgn_error *err = + drgn_program_word_size(drgn_type_program(self->type), + &word_size); + if (err) { + set_drgn_error(err); + goto out_repr_leave; + } + print_size = drgn_type_size(self->type) != word_size; + } else { + print_size = true; + } + if (print_size && + append_member(parts, self, &first, size) == -1) + goto out_repr_leave; + } + if (append_member(parts, self, &first, length) == -1) + goto out_repr_leave; if (append_member(parts, self, &first, members) == -1) goto out_repr_leave; if (append_member(parts, self, &first, enumerators) == -1) @@ -723,6 +581,15 @@ static PyObject *DrgnType_repr(DrgnType *self) } Py_DECREF(obj); } + if (drgn_type_language(self->type) != + drgn_program_language(drgn_type_program(self->type))) { + PyObject *obj = DrgnType_get_language(self, NULL); + if (append_field(parts, &first, "language=%R", obj) == -1) { + Py_DECREF(obj); + goto out_repr_leave; + } + Py_DECREF(obj); + } if (append_string(parts, ")") == -1) goto out_repr_leave; @@ -742,38 +609,25 @@ static PyObject *DrgnType_repr(DrgnType *self) static PyObject *DrgnType_str(DrgnType *self) { - struct drgn_qualified_type qualified_type = { - .type = self->type, - .qualifiers = self->qualifiers, - }; - struct drgn_error *err; - PyObject *ret; char *str; - - err = drgn_format_type(qualified_type, &str); + struct drgn_error *err = drgn_format_type(DrgnType_unwrap(self), &str); if (err) return set_drgn_error(err); - ret = PyUnicode_FromString(str); + PyObject *ret = PyUnicode_FromString(str); free(str); return ret; } static PyObject *DrgnType_type_name(DrgnType *self) { - struct drgn_qualified_type qualified_type = { - .type = self->type, - .qualifiers = self->qualifiers, - }; - struct drgn_error *err; - PyObject *ret; char *str; - - err = drgn_format_type_name(qualified_type, &str); + struct drgn_error *err = drgn_format_type_name(DrgnType_unwrap(self), + &str); if (err) return set_drgn_error(err); - ret = PyUnicode_FromString(str); + PyObject *ret = PyUnicode_FromString(str); free(str); return ret; } @@ -783,17 +637,15 @@ static PyObject *DrgnType_is_complete(DrgnType *self) return PyBool_FromLong(drgn_type_is_complete(self->type)); } -int qualifiers_converter(PyObject *o, void *p) +static int qualifiers_converter(PyObject *o, void *p) { struct enum_arg arg = { .type = Qualifiers_class, .value = 0, - .allow_none = true, }; - if (!enum_converter(o, &arg)) return 0; - *(unsigned char *)p = arg.value; + *(enum drgn_qualifiers *)p = arg.value; return 1; } @@ -801,44 +653,35 @@ static PyObject *DrgnType_qualified(DrgnType *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "qualifiers", NULL, }; - unsigned char qualifiers; - struct drgn_qualified_type qualified_type; - + enum drgn_qualifiers qualifiers; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:qualified", keywords, qualifiers_converter, &qualifiers)) return NULL; - qualified_type.type = self->type; - qualified_type.qualifiers = qualifiers; - return DrgnType_wrap(qualified_type, DrgnType_parent(self)); + struct drgn_qualified_type qualified_type = { + .type = self->type, + .qualifiers = qualifiers, + }; + return DrgnType_wrap(qualified_type); } static PyObject *DrgnType_unqualified(DrgnType *self) { - struct drgn_qualified_type qualified_type; - - qualified_type.type = self->type; - qualified_type.qualifiers = 0; - return DrgnType_wrap(qualified_type, DrgnType_parent(self)); + struct drgn_qualified_type qualified_type = { .type = self->type }; + return DrgnType_wrap(qualified_type); } static PyObject *DrgnType_richcompare(DrgnType *self, PyObject *other, int op) { - struct drgn_error *err; - struct drgn_qualified_type qualified_type1, qualified_type2; - bool clear; - bool ret; - if (!PyObject_TypeCheck(other, &DrgnType_type) || (op != Py_EQ && op != Py_NE)) Py_RETURN_NOTIMPLEMENTED; - clear = set_drgn_in_python(); - qualified_type1.type = self->type; - qualified_type1.qualifiers = self->qualifiers; - qualified_type2.type = ((DrgnType *)other)->type; - qualified_type2.qualifiers = ((DrgnType *)other)->qualifiers; - err = drgn_qualified_type_eq(qualified_type1, qualified_type2, &ret); + bool clear = set_drgn_in_python(); + bool ret; + struct drgn_error *err = drgn_qualified_type_eq(DrgnType_unwrap(self), + DrgnType_unwrap((DrgnType *)other), + &ret); if (clear) clear_drgn_in_python(); if (err) @@ -867,8 +710,6 @@ PyTypeObject DrgnType_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.Type", .tp_basicsize = sizeof(DrgnType), - /* The "item" of a Type object is an optional struct drgn_type. */ - .tp_itemsize = sizeof(struct drgn_type), .tp_dealloc = (destructor)DrgnType_dealloc, .tp_repr = (reprfunc)DrgnType_repr, .tp_str = (reprfunc)DrgnType_str, @@ -982,6 +823,58 @@ PyTypeObject TypeEnumerator_type = { .tp_new = (newfunc)TypeEnumerator_new, }; +static DrgnType *LazyType_get_borrowed(LazyType *self) +{ + if (unlikely(self->state != DRGNPY_LAZY_TYPE_EVALUATED)) { + PyObject *type; + if (self->state == DRGNPY_LAZY_TYPE_UNEVALUATED) { + bool clear = false; + /* Avoid the thread state overhead if we can. */ + if (!drgn_lazy_type_is_evaluated(self->lazy_type)) + clear = set_drgn_in_python(); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = + drgn_lazy_type_evaluate(self->lazy_type, + &qualified_type); + if (clear) + clear_drgn_in_python(); + if (err) + return set_drgn_error(err); + type = DrgnType_wrap(qualified_type); + if (!type) + return NULL; + } else { /* (self->state == DRGNPY_LAZY_TYPE_CALLABLE) */ + type = PyObject_CallObject(self->obj, NULL); + if (!type) + return NULL; + if (!PyObject_TypeCheck(type, &DrgnType_type)) { + Py_DECREF(type); + PyErr_SetString(PyExc_TypeError, + "type callable must return Type"); + return NULL; + } + Py_DECREF(self->obj); + } + self->state = DRGNPY_LAZY_TYPE_EVALUATED; + self->obj = type; + } + return (DrgnType *)self->obj; +} + +static DrgnType *LazyType_get(LazyType *self, void *arg) +{ + DrgnType *ret = LazyType_get_borrowed(self); + Py_XINCREF(ret); + return ret; +} + +static void LazyType_dealloc(LazyType *self) +{ + if (self->state != DRGNPY_LAZY_TYPE_UNEVALUATED) + Py_XDECREF(self->obj); + Py_TYPE(self)->tp_free((PyObject *)self); +} + static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { @@ -989,7 +882,7 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, "type", "name", "bit_offset", "bit_field_size", NULL }; PyObject *type_arg, *name = Py_None, *bit_offset = NULL, *bit_field_size = NULL; - uintptr_t obj; + int type_state; TypeMember *member; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO!O!:TypeMember", @@ -999,9 +892,9 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, return NULL; if (PyCallable_Check(type_arg)) { - obj = (uintptr_t)type_arg | DRGNPY_LAZY_TYPE_UNEVALUATED; + type_state = DRGNPY_LAZY_TYPE_CALLABLE; } else if (PyObject_TypeCheck(type_arg, &DrgnType_type)) { - obj = (uintptr_t)type_arg; + type_state = DRGNPY_LAZY_TYPE_EVALUATED; } else { PyErr_SetString(PyExc_TypeError, "TypeMember type must be type or callable returning Type"); @@ -1018,8 +911,9 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, if (!member) return NULL; + member->lazy_type.state = type_state; Py_INCREF(type_arg); - member->obj = obj; + member->lazy_type.obj = type_arg; Py_INCREF(name); member->name = name; @@ -1052,8 +946,7 @@ static void TypeMember_dealloc(TypeMember *self) Py_XDECREF(self->bit_field_size); Py_XDECREF(self->bit_offset); Py_XDECREF(self->name); - Py_XDECREF((PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK)); - Py_TYPE(self)->tp_free((PyObject *)self); + LazyType_dealloc((LazyType *)self); } static PyObject *TypeMember_get_offset(TypeMember *self, void *arg) @@ -1163,7 +1056,7 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, { static char *keywords[] = {"type", "name", NULL}; PyObject *type_arg, *name = Py_None; - uintptr_t obj; + int type_state; TypeParameter *parameter; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:TypeParameter", @@ -1171,9 +1064,9 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, return NULL; if (PyCallable_Check(type_arg)) { - obj = (uintptr_t)type_arg | DRGNPY_LAZY_TYPE_UNEVALUATED; + type_state = DRGNPY_LAZY_TYPE_CALLABLE; } else if (PyObject_TypeCheck(type_arg, &DrgnType_type)) { - obj = (uintptr_t)type_arg; + type_state = DRGNPY_LAZY_TYPE_EVALUATED; } else { PyErr_SetString(PyExc_TypeError, "TypeParameter type must be type or callable returning Type"); @@ -1188,8 +1081,9 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, parameter = (TypeParameter *)subtype->tp_alloc(subtype, 0); if (parameter) { + parameter->lazy_type.state = type_state; Py_INCREF(type_arg); - parameter->obj = obj; + parameter->lazy_type.obj = type_arg; Py_INCREF(name); parameter->name = name; } @@ -1199,8 +1093,7 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, static void TypeParameter_dealloc(TypeParameter *self) { Py_XDECREF(self->name); - Py_XDECREF((PyObject *)(self->obj & DRGNPY_LAZY_TYPE_MASK)); - Py_TYPE(self)->tp_free((PyObject *)self); + LazyType_dealloc((LazyType *)self); } static PyObject *TypeParameter_repr(TypeParameter *self) @@ -1272,55 +1165,65 @@ PyTypeObject TypeParameter_type = { .tp_new = (newfunc)TypeParameter_new, }; -DrgnType *void_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_void_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { "qualifiers", "language", NULL, }; - unsigned char qualifiers = 0; + static char *keywords[] = { "qualifiers", "language", NULL }; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - struct drgn_qualified_type qualified_type; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&$O&:void_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$O&O&:void_type", keywords, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - qualified_type.type = drgn_void_type(language); - qualified_type.qualifiers = qualifiers; - return (DrgnType *)DrgnType_wrap(qualified_type, NULL); + struct drgn_qualified_type qualified_type = { + .type = drgn_void_type(&self->prog, language), + .qualifiers = qualifiers, + }; + return (DrgnType *)DrgnType_wrap(qualified_type); } -DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_int_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "is_signed", "qualifiers", "language", NULL, + "name", "size", "is_signed", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; + struct index_arg size = {}; int is_signed; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!kp|O&$O&:int_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&p|$O&O&:int_type", keywords, &PyUnicode_Type, &name_obj, - &size, &is_signed, + index_converter, &size, &is_signed, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_int_type_init(type_obj->type, name, size, is_signed, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_int_type_create(&self->prog, name, + size.uvalue, is_signed, + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1330,36 +1233,45 @@ DrgnType *int_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_bool_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "qualifiers", "language", NULL, + "name", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; - unsigned char qualifiers = 0; + struct index_arg size = {}; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!k|O&$O&:bool_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:bool_type", keywords, &PyUnicode_Type, &name_obj, - &size, qualifiers_converter, - &qualifiers, language_converter, - &language)) + index_converter, &size, + qualifiers_converter, &qualifiers, + language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_bool_type_init(type_obj->type, name, size, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_bool_type_create(&self->prog, name, + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1369,36 +1281,46 @@ DrgnType *bool_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_float_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "size", "qualifiers", "language", NULL, + "name", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - unsigned long size; - unsigned char qualifiers = 0; + struct index_arg size = {}; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!k|O&$O&:float_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&|$O&O&:float_type", keywords, &PyUnicode_Type, &name_obj, - &size, qualifiers_converter, - &qualifiers, language_converter, - &language)) + index_converter, &size, + qualifiers_converter, &qualifiers, + language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_float_type_init(type_obj->type, name, size, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_float_type_create(&self->prog, name, + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + return NULL; - if (drgn_type_name(type_obj->type) == name && + if (drgn_type_name(qualified_type.type) == name && _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); @@ -1408,61 +1330,65 @@ DrgnType *float_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_complex_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { "name", "size", "type", "qualifiers", NULL, }; - DrgnType *type_obj; + static char *keywords[] = { + "name", "size", "type", "qualifiers", "language", NULL + }; PyObject *name_obj; - const char *name; - unsigned long size; - PyObject *real_type_obj; - struct drgn_type *real_type; - unsigned char qualifiers = 0; + struct index_arg size = {}; + DrgnType *real_type_obj; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!kO|O&$O&:complex_type", - keywords, &PyUnicode_Type, &name_obj, - &size, &real_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!O&O!|$O&O&:complex_type", keywords, + &PyUnicode_Type, &name_obj, + index_converter, &size, &DrgnType_type, + &real_type_obj, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - if (!PyObject_TypeCheck(real_type_obj, &DrgnType_type)) { - PyErr_SetString(PyExc_TypeError, - "complex_type() real type must be Type"); - return NULL; - } - real_type = ((DrgnType *)real_type_obj)->type; + struct drgn_type *real_type = real_type_obj->type; if (drgn_type_kind(real_type) != DRGN_TYPE_FLOAT && drgn_type_kind(real_type) != DRGN_TYPE_INT) { PyErr_SetString(PyExc_ValueError, "complex_type() real type must be floating-point or integer type"); return NULL; } - if (((DrgnType *)real_type_obj)->qualifiers) { + if (real_type_obj->qualifiers) { PyErr_SetString(PyExc_ValueError, "complex_type() real type must be unqualified"); return NULL; } - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - drgn_complex_type_init(type_obj->type, name, size, real_type, language); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_complex_type_create(&self->prog, name, + size.uvalue, + real_type, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); - if (drgn_type_name(type_obj->type) == name && - _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, - name_obj) == -1) { - Py_DECREF(type_obj); + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) return NULL; - } + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, - real_type_obj) == -1) { + (PyObject *)real_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, + name_obj) == -1) { Py_DECREF(type_obj); return NULL; } @@ -1470,55 +1396,128 @@ DrgnType *complex_type(PyObject *self, PyObject *args, PyObject *kwds) return type_obj; } -static int unpack_member(struct drgn_type_member *members, - PyObject *cached_members_obj, size_t i) +struct py_type_thunk { + struct drgn_type_thunk thunk; + LazyType *lazy_type; +}; + +static struct drgn_error * +py_type_thunk_evaluate_fn(struct drgn_type_thunk *thunk, + struct drgn_qualified_type *ret) { - TypeMember *item; - const char *name; - unsigned long long bit_offset, bit_field_size; - struct drgn_lazy_type member_type; + struct py_type_thunk *t = container_of(thunk, struct py_type_thunk, thunk); + PyGILState_STATE gstate = PyGILState_Ensure(); + DrgnType *type = LazyType_get_borrowed(t->lazy_type); + struct drgn_error *err; + if (type) { + ret->type = type->type; + ret->qualifiers = type->qualifiers; + err = NULL; + } else { + err = drgn_error_from_python(); + } + PyGILState_Release(gstate); + return err; +} + +static void py_type_thunk_free_fn(struct drgn_type_thunk *thunk) +{ + free(container_of(thunk, struct py_type_thunk, thunk)); +} + +static int lazy_type_from_py(struct drgn_lazy_type *lazy_type, LazyType *obj, + struct drgn_program *prog, bool *can_cache) +{ + if (obj->state == DRGNPY_LAZY_TYPE_EVALUATED) { + DrgnType *type = (DrgnType *)obj->obj; + drgn_lazy_type_init_evaluated(lazy_type, type->type, + type->qualifiers); + } else { + struct py_type_thunk *thunk = malloc(sizeof(*thunk)); + if (!thunk) { + PyErr_NoMemory(); + return -1; + } + thunk->thunk.prog = prog; + thunk->thunk.evaluate_fn = py_type_thunk_evaluate_fn; + thunk->thunk.free_fn = py_type_thunk_free_fn; + thunk->lazy_type = obj; + drgn_lazy_type_init_thunk(lazy_type, &thunk->thunk); + /* + * We created a new thunk, so we can't reuse the passed + * LazyType. Don't cache the container so we create a new one + * when it's accessed. + */ + *can_cache = false; + } + return 0; +} - item = (TypeMember *)PyTuple_GET_ITEM(cached_members_obj, i); +static int unpack_member(struct drgn_compound_type_builder *builder, + PyObject *item, bool *can_cache) +{ if (!PyObject_TypeCheck((PyObject *)item, &TypeMember_type)) { PyErr_SetString(PyExc_TypeError, "member must be TypeMember"); return -1; } + TypeMember *member = (TypeMember *)item; - if (item->name == Py_None) { + const char *name; + if (member->name == Py_None) { name = NULL; } else { - name = PyUnicode_AsUTF8(item->name); + name = PyUnicode_AsUTF8(member->name); if (!name) return -1; } - bit_offset = PyLong_AsUnsignedLongLong(item->bit_offset); + unsigned long long bit_offset = + PyLong_AsUnsignedLongLong(member->bit_offset); if (bit_offset == (unsigned long long)-1 && PyErr_Occurred()) return -1; - bit_field_size = PyLong_AsUnsignedLongLong(item->bit_field_size); + unsigned long long bit_field_size = + PyLong_AsUnsignedLongLong(member->bit_field_size); if (bit_field_size == (unsigned long long)-1 && PyErr_Occurred()) return -1; - if (lazy_type_from_py(&member_type, (LazyType *)item) == -1) + struct drgn_lazy_type member_type; + if (lazy_type_from_py(&member_type, (LazyType *)member, + builder->prog, can_cache) == -1) + return -1; + struct drgn_error *err = + drgn_compound_type_builder_add_member(builder, member_type, + name, bit_offset, + bit_field_size); + if (err) { + drgn_lazy_type_deinit(&member_type); + set_drgn_error(err); return -1; - drgn_type_member_init(&members[i], member_type, name, bit_offset, - bit_field_size); + } return 0; } -static DrgnType *compound_type(PyObject *tag_obj, PyObject *size_obj, - PyObject *members_obj, - enum drgn_qualifiers qualifiers, - const struct drgn_language *language, - enum drgn_type_kind kind) +#define compound_type_arg_format "O|O&O$O&O&" + +static DrgnType *Program_compound_type(Program *self, PyObject *args, + PyObject *kwds, const char *arg_format, + enum drgn_type_kind kind) { - const char *tag; - DrgnType *type_obj = NULL; - unsigned long long size; - PyObject *cached_members_obj = NULL; - struct drgn_type_member *members = NULL; - size_t num_members; + static char *keywords[] = { + "tag", "size", "members", "qualifiers", "language", NULL + }; + PyObject *tag_obj; + struct index_arg size = { .allow_none = true, .is_none = true }; + PyObject *members_obj = Py_None; + enum drgn_qualifiers qualifiers = 0; + const struct drgn_language *language = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_format, keywords, + &tag_obj, index_converter, &size, + &members_obj, qualifiers_converter, + &qualifiers, language_converter, + &language)) + return NULL; + const char *tag; if (tag_obj == Py_None) { tag = NULL; } else if (PyUnicode_Check(tag_obj)) { @@ -1532,236 +1531,169 @@ static DrgnType *compound_type(PyObject *tag_obj, PyObject *size_obj, return NULL; } + PyObject *cached_members; + bool can_cache_members = true; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; if (members_obj == Py_None) { - if (size_obj != Py_None) { + if (!size.is_none) { PyErr_Format(PyExc_ValueError, "incomplete %s type must not have size", drgn_type_kind_spelling[kind]); return NULL; } - } else { - size_t i; - if (size_obj == Py_None) { + if (!Program_hold_reserve(self, tag_obj != Py_None)) + return NULL; + + err = drgn_incomplete_compound_type_create(&self->prog, kind, + tag, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + cached_members = NULL; + } else { + if (size.is_none) { PyErr_Format(PyExc_ValueError, "%s type must have size", drgn_type_kind_spelling[kind]); return NULL; } - size = PyLong_AsUnsignedLongLong(size_obj); - if (size == (unsigned long long)-1) - return NULL; - if (!PySequence_Check(members_obj)) { PyErr_SetString(PyExc_TypeError, "members must be sequence or None"); return NULL; } - cached_members_obj = PySequence_Tuple(members_obj); - if (!cached_members_obj) + cached_members = PySequence_Tuple(members_obj); + if (!cached_members) return NULL; - num_members = PyTuple_GET_SIZE(cached_members_obj); - members = malloc_array(num_members, - sizeof(struct drgn_type_member)); - if (!members) - goto err; + size_t num_members = PyTuple_GET_SIZE(cached_members); + + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, &self->prog, kind); + for (size_t i = 0; i < num_members; i++) { + if (unpack_member(&builder, + PyTuple_GET_ITEM(cached_members, i), + &can_cache_members) == -1) + goto err_builder; + } - for (i = 0; i < num_members; i++) { - if (unpack_member(members, cached_members_obj, i) == -1) - goto err; + if (!Program_hold_reserve(self, 1 + (tag_obj != Py_None))) + goto err_builder; + + err = drgn_compound_type_create(&builder, tag, size.uvalue, + language, &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_compound_type_builder_deinit(&builder); + goto err_members; } + + Program_hold_object(self, cached_members); } - type_obj = DrgnType_new(qualifiers); + if (tag_obj != Py_None && drgn_type_tag(qualified_type.type) == tag) + Program_hold_object(self, tag_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) - goto err; + goto err_members; if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_tag.id, - tag_obj) == -1) - goto err; - - if (members_obj == Py_None) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_members.id, Py_None) == -1) - goto err; + tag_obj) == -1 || + (can_cache_members && + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_members.id, + cached_members ? + cached_members : Py_None) == -1)) + goto err_type; + Py_XDECREF(cached_members); - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init_incomplete(type_obj->type, tag, - language); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init_incomplete(type_obj->type, tag, - language); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init_incomplete(type_obj->type, tag, - language); - break; - default: - UNREACHABLE(); - } - } else { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_members.id, - cached_members_obj) == -1) - goto err; - Py_DECREF(cached_members_obj); - - switch (kind) { - case DRGN_TYPE_STRUCT: - drgn_struct_type_init(type_obj->type, tag, size, - members, num_members, language); - break; - case DRGN_TYPE_UNION: - drgn_union_type_init(type_obj->type, tag, size, members, - num_members, language); - break; - case DRGN_TYPE_CLASS: - drgn_class_type_init(type_obj->type, tag, size, members, - num_members, language); - break; - default: - UNREACHABLE(); - } - } return type_obj; -err: - Py_XDECREF(type_obj); - free(members); - Py_XDECREF(cached_members_obj); +err_type: + Py_DECREF(type_obj); +err_members: + Py_XDECREF(cached_members); return NULL; } -DrgnType *struct_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_struct_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:struct_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_STRUCT); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":struct_type", + DRGN_TYPE_STRUCT); } -DrgnType *union_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_union_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:union_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_UNION); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":union_type", + DRGN_TYPE_UNION); } -DrgnType *class_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_class_type(Program *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = { - "tag", "size", "members", "qualifiers", "language", NULL, - }; - PyObject *tag_obj; - PyObject *size_obj = Py_None; - PyObject *members_obj = Py_None; - unsigned char qualifiers = 0; - const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:class_type", - keywords, &tag_obj, &size_obj, - &members_obj, qualifiers_converter, - &qualifiers, language_converter, - &language)) - return NULL; - - return compound_type(tag_obj, size_obj, members_obj, qualifiers, - language, DRGN_TYPE_CLASS); + return Program_compound_type(self, args, kwds, + compound_type_arg_format ":class_type", + DRGN_TYPE_CLASS); } -static int unpack_enumerator(struct drgn_type_enumerator *enumerators, - PyObject *cached_enumerators_obj, - size_t i, bool is_signed) +static int unpack_enumerator(struct drgn_enum_type_builder *builder, + PyObject *item, bool is_signed) { - TypeEnumerator *item; - const char *name; - - item = (TypeEnumerator *)PyTuple_GET_ITEM(cached_enumerators_obj, i); - if (!PyObject_TypeCheck((PyObject *)item, &TypeEnumerator_type)) { + if (!PyObject_TypeCheck(item, &TypeEnumerator_type)) { PyErr_SetString(PyExc_TypeError, "enumerator must be TypeEnumerator"); return -1; } + TypeEnumerator *enumerator = (TypeEnumerator *)item; - name = PyUnicode_AsUTF8(item->name); + const char *name = PyUnicode_AsUTF8(enumerator->name); if (!name) return -1; + struct drgn_error *err; if (is_signed) { - long long svalue; - - svalue = PyLong_AsLongLong(item->value); + long long svalue = PyLong_AsLongLong(enumerator->value); if (svalue == -1 && PyErr_Occurred()) return -1; - drgn_type_enumerator_init_signed(&enumerators[i], name, - svalue); + err = drgn_enum_type_builder_add_signed(builder, name, svalue); } else { - unsigned long long uvalue; - - uvalue = PyLong_AsUnsignedLongLong(item->value); + unsigned long long uvalue = + PyLong_AsUnsignedLongLong(enumerator->value); if (uvalue == (unsigned long long)-1 && PyErr_Occurred()) return -1; - drgn_type_enumerator_init_unsigned(&enumerators[i], name, - uvalue); + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); + } + if (err) { + set_drgn_error(err); + return -1; } return 0; } -DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_enum_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "tag", "type", "enumerators", "qualifiers", "language", NULL, + "tag", "type", "enumerators", "qualifiers", "language", NULL }; - DrgnType *type_obj = NULL; PyObject *tag_obj; - const char *tag; PyObject *compatible_type_obj = Py_None; - struct drgn_type *compatible_type; PyObject *enumerators_obj = Py_None; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - PyObject *cached_enumerators_obj = NULL; - struct drgn_type_enumerator *enumerators = NULL; - size_t num_enumerators; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO&$O&:enum_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO$O&O&:enum_type", keywords, &tag_obj, &compatible_type_obj, &enumerators_obj, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; + const char *tag; if (tag_obj == Py_None) { tag = NULL; } else if (PyUnicode_Check(tag_obj)) { @@ -1774,316 +1706,369 @@ DrgnType *enum_type(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (compatible_type_obj == Py_None) { - compatible_type = NULL; - } else if (PyObject_TypeCheck(compatible_type_obj, &DrgnType_type)) { - compatible_type = ((DrgnType *)compatible_type_obj)->type; - if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { - PyErr_SetString(PyExc_ValueError, - "enum_type() compatible type must be integer type"); - return NULL; - } - if (((DrgnType *)compatible_type_obj)->qualifiers) { - PyErr_SetString(PyExc_ValueError, - "enum_type() compatible type must be unqualified"); - return NULL; - } - } else { + if (compatible_type_obj != Py_None && + !PyObject_TypeCheck(compatible_type_obj, &DrgnType_type)) { PyErr_SetString(PyExc_TypeError, "enum_type() compatible type must be Type or None"); return NULL; } + PyObject *cached_enumerators; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; if (enumerators_obj == Py_None) { - if (compatible_type) { + if (compatible_type_obj != Py_None) { PyErr_SetString(PyExc_ValueError, "incomplete enum type must not have compatible type"); return NULL; } - num_enumerators = 0; - } else { - bool is_signed; - size_t i; - if (!compatible_type) { + if (!Program_hold_reserve(self, tag_obj != Py_None)) + return NULL; + + err = drgn_incomplete_enum_type_create(&self->prog, tag, + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + cached_enumerators = NULL; + } else { + if (compatible_type_obj == Py_None) { PyErr_SetString(PyExc_ValueError, "enum type must have compatible type"); return NULL; } + struct drgn_type *compatible_type = + ((DrgnType *)compatible_type_obj)->type; + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + PyErr_SetString(PyExc_ValueError, + "enum_type() compatible type must be integer type"); + return NULL; + } + if (((DrgnType *)compatible_type_obj)->qualifiers) { + PyErr_SetString(PyExc_ValueError, + "enum_type() compatible type must be unqualified"); + return NULL; + } + if (!PySequence_Check(enumerators_obj)) { PyErr_SetString(PyExc_TypeError, "enumerators must be sequence or None"); return NULL; } - cached_enumerators_obj = PySequence_Tuple(enumerators_obj); - if (!cached_enumerators_obj) + cached_enumerators = PySequence_Tuple(enumerators_obj); + if (!cached_enumerators) return NULL; - - num_enumerators = PyTuple_GET_SIZE(cached_enumerators_obj); - enumerators = malloc_array(num_enumerators, - sizeof(struct drgn_type_enumerator)); - if (!enumerators) - goto err; - is_signed = drgn_type_is_signed(compatible_type); - for (i = 0; i < num_enumerators; i++) { - if (unpack_enumerator(enumerators, - cached_enumerators_obj, i, + size_t num_enumerators = PyTuple_GET_SIZE(cached_enumerators); + + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, &self->prog); + bool is_signed = drgn_type_is_signed(compatible_type); + for (size_t i = 0; i < num_enumerators; i++) { + if (unpack_enumerator(&builder, + PyTuple_GET_ITEM(cached_enumerators, i), is_signed) == -1) - goto err; + goto err_enumerators; + } + + if (!Program_hold_reserve(self, 1 + (tag_obj != Py_None))) + goto err_builder; + + err = drgn_enum_type_create(&builder, tag, compatible_type, + language, &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_enum_type_builder_deinit(&builder); + goto err_enumerators; } + + Program_hold_object(self, cached_enumerators); } - type_obj = DrgnType_new(qualifiers); + if (tag_obj != Py_None && drgn_type_tag(qualified_type.type) == tag) + Program_hold_object(self, tag_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) - goto err; + goto err_enumerators; if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_tag.id, - tag_obj) == -1) - goto err; - if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, - compatible_type_obj) == -1) - goto err; + tag_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + compatible_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, + &DrgnType_attr_enumerators.id, + cached_enumerators ? + cached_enumerators : Py_None) == -1) + goto err_type; + Py_XDECREF(cached_enumerators); - if (enumerators_obj == Py_None) { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_enumerators.id, - Py_None) == -1) - goto err; - - drgn_enum_type_init_incomplete(type_obj->type, tag, language); - } else { - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_enumerators.id, - cached_enumerators_obj) == -1) - goto err; - Py_DECREF(cached_enumerators_obj); - - drgn_enum_type_init(type_obj->type, tag, compatible_type, - enumerators, num_enumerators, language); - } return type_obj; -err: - Py_XDECREF(type_obj); - free(enumerators); - Py_XDECREF(cached_enumerators_obj); +err_type: + Py_DECREF(type_obj); +err_enumerators: + Py_XDECREF(cached_enumerators); return NULL; } -DrgnType *typedef_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_typedef_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "name", "type", "qualifiers", "language", NULL, + "name", "type", "qualifiers", "language", NULL }; - DrgnType *type_obj; PyObject *name_obj; - const char *name; - PyObject *aliased_type_obj; - struct drgn_qualified_type aliased_type; - unsigned char qualifiers = 0; + DrgnType *aliased_type_obj; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|O&$O&:typedef_type", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!|$O&O&:typedef_type", keywords, &PyUnicode_Type, &name_obj, - &aliased_type_obj, + &DrgnType_type, &aliased_type_obj, qualifiers_converter, &qualifiers, language_converter, &language)) return NULL; - name = PyUnicode_AsUTF8(name_obj); + const char *name = PyUnicode_AsUTF8(name_obj); if (!name) return NULL; - type_obj = DrgnType_new(qualifiers); - if (!type_obj) + if (!Program_hold_reserve(self, 1)) return NULL; - if (type_arg(aliased_type_obj, &aliased_type, type_obj) == -1) { - Py_DECREF(type_obj); + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_typedef_type_create(&self->prog, name, + DrgnType_unwrap(aliased_type_obj), + language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + + if (drgn_type_name(qualified_type.type) == name) + Program_hold_object(self, name_obj); + + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) return NULL; - } - if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)aliased_type_obj) == -1 || + _PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_name.id, name_obj) == -1) { Py_DECREF(type_obj); return NULL; } - drgn_typedef_type_init(type_obj->type, name, aliased_type, language); return type_obj; } -DrgnType *pointer_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_pointer_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "size", "type", "qualifiers", "language", NULL, + "type", "size", "qualifiers", "language", NULL }; - DrgnType *type_obj; - unsigned long size; - PyObject *referenced_type_obj; - struct drgn_qualified_type referenced_type; - unsigned char qualifiers = 0; + DrgnType *referenced_type_obj; + struct index_arg size = { .allow_none = true, .is_none = true }; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "kO|O&$O&:pointer_type", - keywords, &size, &referenced_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:pointer_type", + keywords, &DrgnType_type, + &referenced_type_obj, index_converter, + &size, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - type_obj = DrgnType_new(qualifiers); + if (size.is_none) { + uint8_t word_size; + struct drgn_error *err = drgn_program_word_size(&self->prog, + &word_size); + if (err) + return set_drgn_error(err); + size.uvalue = word_size; + } + + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_pointer_type_create(&self->prog, + DrgnType_unwrap(referenced_type_obj), + size.uvalue, language, + &qualified_type.type); + if (err) + return set_drgn_error(err); + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) return NULL; - if (type_arg(referenced_type_obj, &referenced_type, type_obj) == -1) { + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)referenced_type_obj) == -1) { Py_DECREF(type_obj); return NULL; } - drgn_pointer_type_init(type_obj->type, size, referenced_type, language); return type_obj; } -DrgnType *array_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "length", "type", "qualifiers", "language", NULL, + "type", "length", "qualifiers", "language", NULL }; - DrgnType *type_obj; - PyObject *length_obj; - unsigned long long length; - PyObject *element_type_obj; - struct drgn_qualified_type element_type; - unsigned char qualifiers = 0; + DrgnType *element_type_obj; + struct index_arg length = { .allow_none = true, .is_none = true }; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O&$O&:array_type", - keywords, &length_obj, - &element_type_obj, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O&$O&O&:array_type", + keywords, &DrgnType_type, + &element_type_obj, index_converter, + &length, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; - if (length_obj == Py_None) { - length = 0; + struct drgn_qualified_type qualified_type; + struct drgn_error *err; + if (length.is_none) { + err = drgn_incomplete_array_type_create(&self->prog, + DrgnType_unwrap(element_type_obj), + language, + &qualified_type.type); } else { - if (!PyLong_Check(length_obj)) { - PyErr_SetString(PyExc_TypeError, - "length must be integer or None"); - return NULL; - } - length = PyLong_AsUnsignedLongLong(length_obj); - if (length == (unsigned long long)-1 && PyErr_Occurred()) - return NULL; + err = drgn_array_type_create(&self->prog, + DrgnType_unwrap(element_type_obj), + length.uvalue, language, + &qualified_type.type); } - - type_obj = DrgnType_new(qualifiers); + if (err) + return set_drgn_error(err); + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); if (!type_obj) return NULL; - if (type_arg(element_type_obj, &element_type, type_obj) == -1) { + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)element_type_obj) == -1) { Py_DECREF(type_obj); return NULL; } - if (length_obj == Py_None) { - drgn_array_type_init_incomplete(type_obj->type, element_type, - language); - } else { - drgn_array_type_init(type_obj->type, length, element_type, - language); - } return type_obj; } -static int unpack_parameter(struct drgn_type_parameter *parameters, - PyObject *cached_parameters_obj, size_t i) +static int unpack_parameter(struct drgn_function_type_builder *builder, + PyObject *item, bool *can_cache) { - TypeParameter *item; - const char *name; - struct drgn_lazy_type parameter_type; - - item = (TypeParameter *)PyTuple_GET_ITEM(cached_parameters_obj, i); - if (!PyObject_TypeCheck((PyObject *)item, &TypeParameter_type)) { - PyErr_SetString(PyExc_TypeError, "parameter must be TypeParameter"); + if (!PyObject_TypeCheck(item, &TypeParameter_type)) { + PyErr_SetString(PyExc_TypeError, + "parameter must be TypeParameter"); return -1; } + TypeParameter *parameter = (TypeParameter *)item; - if (item->name == Py_None) { + const char *name; + if (parameter->name == Py_None) { name = NULL; } else { - name = PyUnicode_AsUTF8(item->name); + name = PyUnicode_AsUTF8(parameter->name); if (!name) return -1; } - if (lazy_type_from_py(¶meter_type, (LazyType *)item) == -1) + struct drgn_lazy_type parameter_type; + if (lazy_type_from_py(¶meter_type, (LazyType *)parameter, + builder->prog, can_cache) == -1) return -1; - drgn_type_parameter_init(¶meters[i], parameter_type, name); + struct drgn_error *err = + drgn_function_type_builder_add_parameter(builder, + parameter_type, name); + if (err) { + drgn_lazy_type_deinit(¶meter_type); + set_drgn_error(err); + return -1; + } return 0; } -DrgnType *function_type(PyObject *self, PyObject *args, PyObject *kwds) +DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "type", "parameters", "is_variadic", "qualifiers", "language", NULL, }; - DrgnType *type_obj = NULL; - PyObject *return_type_obj; - struct drgn_qualified_type return_type; - PyObject *parameters_obj, *cached_parameters_obj = NULL; - struct drgn_type_parameter *parameters = NULL; - size_t num_parameters, i; + DrgnType *return_type_obj; + PyObject *parameters_obj; int is_variadic = 0; - unsigned char qualifiers = 0; + enum drgn_qualifiers qualifiers = 0; const struct drgn_language *language = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|pO&$O&:function_type", - keywords, &return_type_obj, - ¶meters_obj, &is_variadic, - qualifiers_converter, &qualifiers, - language_converter, &language)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|p$O&O&:function_type", + keywords, &DrgnType_type, + &return_type_obj, ¶meters_obj, + &is_variadic, qualifiers_converter, + &qualifiers, language_converter, + &language)) return NULL; if (!PySequence_Check(parameters_obj)) { PyErr_SetString(PyExc_TypeError, "parameters must be sequence"); return NULL; } - cached_parameters_obj = PySequence_Tuple(parameters_obj); - if (!cached_parameters_obj) + + PyObject *cached_parameters = PySequence_Tuple(parameters_obj); + if (!cached_parameters) return NULL; + size_t num_parameters = PyTuple_GET_SIZE(cached_parameters); + bool can_cache_parameters = true; - num_parameters = PyTuple_GET_SIZE(cached_parameters_obj); - parameters = malloc_array(num_parameters, - sizeof(struct drgn_type_parameter)); - if (!parameters) - goto err; - for (i = 0; i < num_parameters; i++) { - if (unpack_parameter(parameters, cached_parameters_obj, i) == -1) - goto err; + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, &self->prog); + for (size_t i = 0; i < num_parameters; i++) { + if (unpack_parameter(&builder, + PyTuple_GET_ITEM(cached_parameters, i), + &can_cache_parameters) == -1) + goto err_builder; } - type_obj = DrgnType_new(qualifiers); - if (!type_obj) - goto err; + if (!Program_hold_reserve(self, 1)) + goto err_builder; - if (type_arg(return_type_obj, &return_type, type_obj) == -1) - goto err; + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_function_type_create(&builder, + DrgnType_unwrap(return_type_obj), + is_variadic, + language, + &qualified_type.type); + if (err) { + set_drgn_error(err); +err_builder: + drgn_function_type_builder_deinit(&builder); + goto err_parameters; + } + + Program_hold_object(self, cached_parameters); - if (_PyDict_SetItemId(type_obj->attr_cache, - &DrgnType_attr_parameters.id, - cached_parameters_obj) == -1) - goto err; - Py_DECREF(cached_parameters_obj); + qualified_type.qualifiers = qualifiers; + DrgnType *type_obj = (DrgnType *)DrgnType_wrap(qualified_type); + if (!type_obj) + goto err_parameters; + + if (_PyDict_SetItemId(type_obj->attr_cache, &DrgnType_attr_type.id, + (PyObject *)return_type_obj) == -1 || + (can_cache_parameters && + _PyDict_SetItemId(type_obj->attr_cache, + &DrgnType_attr_parameters.id, + cached_parameters) == -1)) + goto err_type; + Py_DECREF(cached_parameters); - drgn_function_type_init(type_obj->type, return_type, parameters, - num_parameters, is_variadic, language); return type_obj; -err: - Py_XDECREF(type_obj); - free(parameters); - Py_XDECREF(cached_parameters_obj); +err_type: + Py_DECREF(type_obj); +err_parameters: + Py_DECREF(cached_parameters); return NULL; } diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index aa419c4bb..a84602c41 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,6 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include + #include "drgnpy.h" int append_string(PyObject *parts, const char *s) diff --git a/libdrgn/read.h b/libdrgn/read.h deleted file mode 100644 index 7b9b16ff5..000000000 --- a/libdrgn/read.h +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Helpers for parsing values in memory. - * - * See @ref MemoryParsing. - */ - -#ifndef DRGN_READ_H -#define DRGN_READ_H - -#include -#include - -/** - * @ingroup Internals - * - * @defgroup MemoryParsing Memory parsing - * - * Helpers for parsing values in memory. - * - * This provides helpers for parsing values in memory (e.g., from an mmap'd - * file) with safe bounds checking. - * - * These helpers take a cursor (@p ptr) which is read from and advanced. They - * are bounds-checked against an end pointer (@p end). If desired, they will - * swap the byte order of the read value. The @c readN helpers are defined for N - * of 16, 32, and 64. - * - * @{ - */ - -/** Return whether ptr + size is within @p end. */ -static inline bool read_in_bounds(const char *ptr, const char *end, size_t size) -{ - return ptr <= end && (size_t)(end - ptr) >= size; -} - -/** Parse an unsigned 8-bit integer in memory. */ -static inline bool read_u8(const char **ptr, const char *end, uint8_t *ret) -{ - if (!read_in_bounds(*ptr, end, sizeof(uint8_t))) - return false; - *ret = *(const uint8_t *)*ptr; - *ptr += sizeof(uint8_t); - return true; -} - -/** Parse an unsigned 8-bit integer in memory into a @c size_t. */ -static inline bool read_u8_into_size_t(const char **ptr, const char *end, - size_t *ret) -{ - uint8_t tmp; - - if (!read_u8(ptr, end, &tmp)) - return false; - if (tmp > SIZE_MAX) - return false; - *ret = tmp; - return true; -} - -#ifdef DOXYGEN -/** - * Parse an unsigned N-bit integer in memory. - * - * This does not perform any bounds checking, so it should only be used if - * bounds checking was already done. - * - * This is defined for N of 16, 32, and 64. - * - * @param[in,out] ptr Pointer to read from and advance. - * @param[in] bswap Whether to swap the byte order of the read value. - * @param[out] ret Returned value. - */ -void read_uN_nocheck(const char **ptr, bool bswap, uintN_t *ret); - -/** - * Parse an unsigned N-bit integer in memory, checking bounds. - * - * @sa read_uN_nocheck(). - * - * @param[in] end Pointer to one after the last valid address. - * @return Whether the read was in bounds. - */ -bool read_uN(const char **ptr, const char *end, bool bswap, uintN_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c uint64_t. - * - * @sa read_uN_nocheck(). - */ -void read_uN_into_u64_nocheck(const char **ptr, bool bswap, uint64_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c uint64_t, checking - * bounds. - * - * @sa read_uN(). - */ -bool read_uN_into_u64(const char **ptr, const char *end, bool bswap, - uint64_t *ret); - -/** - * Parse an unsigned N-bit integer in memory into a @c size_t, checking bounds. - * - * @sa read_uN(). - * - * @return Whether the read was in bounds and the value was less than or equal - * to @c SIZE_MAX. - */ -bool read_uN_into_u64(const char **ptr, const char *end, bool bswap, - uint64_t *ret); -#endif - -#define DEFINE_READ(size) \ -static inline void read_u##size##_nocheck(const char **ptr, bool bswap, \ - uint##size##_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - memcpy(&tmp, *ptr, sizeof(tmp)); \ - if (bswap) \ - tmp = bswap_##size(tmp); \ - *ret = tmp; \ - *ptr += sizeof(uint##size##_t); \ -} \ - \ -static inline bool read_u##size(const char **ptr, const char *end, \ - bool bswap, uint##size##_t *ret) \ -{ \ - if (!read_in_bounds(*ptr, end, sizeof(uint##size##_t))) \ - return false; \ - read_u##size##_nocheck(ptr, bswap, ret); \ - return true; \ -} \ - \ -static inline void read_u##size##_into_u64_nocheck(const char **ptr, \ - bool bswap, \ - uint64_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - read_u##size##_nocheck(ptr, bswap, &tmp); \ - *ret = tmp; \ -} \ - \ -static inline bool read_u##size##_into_u64(const char **ptr, \ - const char *end, bool bswap, \ - uint64_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - if (!read_u##size(ptr, end, bswap, &tmp)) \ - return false; \ - *ret = tmp; \ - return true; \ -} \ - \ -static inline bool read_u##size##_into_size_t(const char **ptr, \ - const char *end, \ - bool bswap, size_t *ret) \ -{ \ - uint##size##_t tmp; \ - \ - if (!read_u##size(ptr, end, bswap, &tmp)) \ - return false; \ - if (tmp > SIZE_MAX) \ - return false; \ - *ret = tmp; \ - return true; \ -} - -DEFINE_READ(16) -DEFINE_READ(32) -DEFINE_READ(64) - -static inline bool read_be32(const char **ptr, const char *end, uint32_t *ret) -{ - return read_u32(ptr, end, __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__, - ret); -} - -/** Advance @p ptr to the byte after the next null byte. */ -static inline bool skip_string(const char **ptr, const char *end) -{ - const char *nul; - - if (*ptr >= end) - return false; - - nul = memchr(*ptr, 0, end - *ptr); - if (!nul) - return false; - - *ptr = nul + 1; - return true; -} - -/** - * Parse a null terminated string in memory. - * - * @param[in,out] ptr Pointer to read from and advance. - * @param[in] end Pointer to one after the last valid address. - * @param[out] str_ret Returned string. Equal to the initial value of - * *ptr. - * @param[out] len_ret Returned string length not including the null byte. - */ -static inline bool read_string(const char **ptr, const char *end, - const char **str_ret, size_t *len_ret) -{ - const char *nul; - - if (*ptr >= end) - return false; - - nul = memchr(*ptr, 0, end - *ptr); - if (!nul) - return false; - - *str_ret = *ptr; - *len_ret = nul - *ptr; - *ptr = nul + 1; - return true; -} - -/** @} */ - -#endif /* DRGN_READ_H */ diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index 91cfa0f35..7aed8a842 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -2,11 +2,11 @@ // SPDX-License-Identifier: GPL-3.0+ #include -#include +#include #include -#include "internal.h" #include "serialize.h" +#include "util.h" void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint8_t bit_size, bool little_endian) diff --git a/libdrgn/splay_tree.c b/libdrgn/splay_tree.c index 7b216fc73..8241b8d9f 100644 --- a/libdrgn/splay_tree.c +++ b/libdrgn/splay_tree.c @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include "binary_search_tree.h" +#include "binary_search_tree.h" // IWYU pragma: associated /* * Binary search tree splay operation based on the original paper [1]. Rotations diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 4224e9a05..539f9dda7 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1,19 +1,25 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ -#include -#include +#include +#include #include -#include #include #include +#include +#include -#include "internal.h" +#include "debug_info.h" +#include "drgn.h" +#include "error.h" +#include "hash_table.h" #include "helpers.h" +#include "platform.h" #include "program.h" -#include "read.h" #include "string_builder.h" #include "symbol.h" +#include "type.h" +#include "util.h" struct drgn_stack_trace { struct drgn_program *prog; @@ -454,9 +460,6 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, struct drgn_stack_trace **ret) { struct drgn_error *err; - Dwfl *dwfl; - Dwfl_Thread *thread; - struct drgn_stack_trace *trace; if (!prog->has_platform) { return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, @@ -468,11 +471,12 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, "stack unwinding is not yet supported for live processes"); } - err = drgn_program_get_dwfl(prog, &dwfl); + struct drgn_debug_info *dbinfo; + err = drgn_program_get_dbinfo(prog, &dbinfo); if (err) return err; if (!prog->attached_dwfl_state) { - if (!dwfl_attach_state(dwfl, NULL, 0, + if (!dwfl_attach_state(dbinfo->dwfl, NULL, 0, &drgn_linux_kernel_thread_callbacks, prog)) return drgn_error_libdwfl(); @@ -481,7 +485,8 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, prog->stack_trace_tid = tid; prog->stack_trace_obj = obj; - thread = dwfl_attach_thread(dwfl, STACK_TRACE_OBJ_TID); + Dwfl_Thread *thread = dwfl_attach_thread(dbinfo->dwfl, + STACK_TRACE_OBJ_TID); prog->stack_trace_obj = NULL; prog->stack_trace_tid = 0; if (prog->stack_trace_err) @@ -491,7 +496,8 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, goto err; } - trace = malloc(sizeof(*trace) + sizeof(trace->frames[0])); + struct drgn_stack_trace *trace = malloc(sizeof(*trace) + + sizeof(trace->frames[0])); if (!trace) { err = &drgn_enomem; goto err; @@ -553,8 +559,10 @@ drgn_object_stack_trace(const struct drgn_object *obj, err = drgn_object_read_integer(obj, &value); if (err) return err; - return drgn_get_stack_trace(obj->prog, value.uvalue, NULL, ret); + return drgn_get_stack_trace(drgn_object_program(obj), + value.uvalue, NULL, ret); } else { - return drgn_get_stack_trace(obj->prog, 0, obj, ret); + return drgn_get_stack_trace(drgn_object_program(obj), 0, obj, + ret); } } diff --git a/libdrgn/string_builder.c b/libdrgn/string_builder.c index 1fbf86b3c..21f9499b6 100644 --- a/libdrgn/string_builder.c +++ b/libdrgn/string_builder.c @@ -4,8 +4,8 @@ #include #include -#include "internal.h" #include "string_builder.h" +#include "util.h" bool string_builder_finalize(struct string_builder *sb, char **ret) { diff --git a/libdrgn/string_builder.h b/libdrgn/string_builder.h index a517c0686..ac35dcf99 100644 --- a/libdrgn/string_builder.h +++ b/libdrgn/string_builder.h @@ -13,6 +13,7 @@ #define DRGN_STRING_BUILDER_H #include +#include #include #include diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 369452c21..14278e33e 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -1,10 +1,12 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include +#include #include -#include "internal.h" #include "symbol.h" +#include "util.h" LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { diff --git a/libdrgn/type.c b/libdrgn/type.c index b9e537efd..6917beae1 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1,13 +1,16 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0+ +#include #include -#include "internal.h" +#include "cityhash.h" +#include "error.h" #include "hash_table.h" #include "language.h" +#include "program.h" #include "type.h" -#include "type_index.h" +#include "util.h" const char * const drgn_type_kind_spelling[] = { [DRGN_TYPE_VOID] = "void", @@ -25,7 +28,17 @@ const char * const drgn_type_kind_spelling[] = { [DRGN_TYPE_FUNCTION] = "function", }; -const char * const * const +/** + * Names of primitive types. + * + * In some languages, like C, the same primitive type can be spelled in multiple + * ways. For example, "int" can also be spelled "signed int" or "int signed". + * + * This maps each @ref drgn_primitive_type to a ``NULL``-terminated array of the + * different ways to spell that type. The spelling at index zero is the + * preferred spelling. + */ +static const char * const * const drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM] = { [DRGN_C_TYPE_VOID] = (const char * []){ "void", NULL, }, [DRGN_C_TYPE_CHAR] = (const char * []){ "char", NULL, }, @@ -93,7 +106,11 @@ drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM] = { [DRGN_C_TYPE_PTRDIFF_T] = (const char * []){ "ptrdiff_t", NULL, }, }; -const enum drgn_type_kind +/** + * Mapping from a @ref drgn_type_primitive to the corresponding @ref + * drgn_type_kind. + */ +static const enum drgn_type_kind drgn_primitive_type_kind[DRGN_PRIMITIVE_TYPE_NUM + 1] = { [DRGN_C_TYPE_CHAR] = DRGN_TYPE_INT, [DRGN_C_TYPE_SIGNED_CHAR] = DRGN_TYPE_INT, @@ -133,27 +150,48 @@ drgn_primitive_type_is_signed(enum drgn_primitive_type primitive) } } -void drgn_type_thunk_free(struct drgn_type_thunk *thunk) +static struct hash_pair drgn_member_hash_pair(const struct drgn_member_key *key) { - thunk->free_fn(thunk); + size_t hash; + if (key->name) + hash = cityhash_size_t(key->name, key->name_len); + else + hash = 0; + hash = hash_combine((uintptr_t)key->type, hash); + return hash_pair_from_avalanching_hash(hash); +} + +static bool drgn_member_eq(const struct drgn_member_key *a, + const struct drgn_member_key *b) +{ + return (a->type == b->type && a->name_len == b->name_len && + (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); } +DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_hash_pair, + drgn_member_eq) + +DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, hash_pair_ptr_type, + hash_table_scalar_eq) + struct drgn_error *drgn_lazy_type_evaluate(struct drgn_lazy_type *lazy_type, - struct drgn_qualified_type *qualified_type) + struct drgn_qualified_type *ret) { if (drgn_lazy_type_is_evaluated(lazy_type)) { - qualified_type->type = lazy_type->type; - qualified_type->qualifiers = lazy_type->qualifiers; + ret->type = lazy_type->type; + ret->qualifiers = lazy_type->qualifiers; } else { - struct drgn_error *err; struct drgn_type_thunk *thunk_ptr = lazy_type->thunk; struct drgn_type_thunk thunk = *thunk_ptr; - - err = thunk.evaluate_fn(thunk_ptr, qualified_type); + struct drgn_error *err = thunk.evaluate_fn(thunk_ptr, ret); if (err) return err; - drgn_lazy_type_init_evaluated(lazy_type, qualified_type->type, - qualified_type->qualifiers); + if (drgn_type_program(ret->type) != thunk.prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + drgn_lazy_type_init_evaluated(lazy_type, ret->type, + ret->qualifiers); thunk.free_fn(thunk_ptr); } return NULL; @@ -165,6 +203,19 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type) drgn_type_thunk_free(lazy_type->thunk); } +static inline struct drgn_error * +drgn_lazy_type_check_prog(struct drgn_lazy_type *lazy_type, + struct drgn_program *prog) +{ + if ((drgn_lazy_type_is_evaluated(lazy_type) ? + drgn_type_program(lazy_type->type) : + lazy_type->thunk->prog) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + return NULL; +} + LIBDRGN_PUBLIC struct drgn_error * drgn_member_type(struct drgn_type_member *member, struct drgn_qualified_type *ret) @@ -179,272 +230,605 @@ drgn_parameter_type(struct drgn_type_parameter *parameter, return drgn_lazy_type_evaluate(¶meter->type, ret); } -void drgn_int_type_init(struct drgn_type *type, const char *name, uint64_t size, - bool is_signed, const struct drgn_language *lang) +static struct hash_pair drgn_type_dedupe_hash(struct drgn_type * const *entry) { - enum drgn_primitive_type primitive; + struct drgn_type *type = *entry; + size_t hash = hash_combine(drgn_type_kind(type), + (uintptr_t)drgn_type_language(type)); + /* + * We don't dedupe complete compound or enumerated types, and typedefs + * inherit is_complete from the aliased type, so is_complete can only + * differ for otherwise equal array types. We implicitly include that in + * the hash with the is_complete check below, so we don't need to hash + * it explicitly. + */ + if (drgn_type_has_name(type)) { + const char *name = drgn_type_name(type); + hash = hash_combine(hash, cityhash_size_t(name, strlen(name))); + } + if (drgn_type_has_size(type)) + hash = hash_combine(hash, drgn_type_size(type)); + if (drgn_type_has_is_signed(type)) + hash = hash_combine(hash, drgn_type_is_signed(type)); + const char *tag; + if (drgn_type_has_tag(type) && (tag = drgn_type_tag(type))) + hash = hash_combine(hash, cityhash_size_t(tag, strlen(tag))); + if (drgn_type_has_type(type)) { + struct drgn_qualified_type qualified_type = + drgn_type_type(type); + hash = hash_combine(hash, (uintptr_t)qualified_type.type); + hash = hash_combine(hash, qualified_type.qualifiers); + } + if (drgn_type_has_length(type) && drgn_type_is_complete(type)) + hash = hash_combine(hash, drgn_type_length(type)); + return hash_pair_from_avalanching_hash(hash); +} - assert(name); - type->_private.kind = DRGN_TYPE_INT; - type->_private.is_complete = true; - primitive = c_parse_specifier_list(name); - if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_INT && - (primitive == DRGN_C_TYPE_CHAR || - is_signed == drgn_primitive_type_is_signed(primitive))) { - type->_private.primitive = primitive; - type->_private.name = - drgn_primitive_type_spellings[primitive][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; +static bool drgn_type_dedupe_eq(struct drgn_type * const *entry_a, + struct drgn_type * const *entry_b) +{ + struct drgn_type *a = *entry_a; + struct drgn_type *b = *entry_b; + + if (drgn_type_kind(a) != drgn_type_kind(b) || + drgn_type_language(a) != drgn_type_language(b) || + drgn_type_is_complete(a) != drgn_type_is_complete(b)) + return false; + if (drgn_type_has_name(a) && + strcmp(drgn_type_name(a), drgn_type_name(b)) != 0) + return false; + if (drgn_type_has_size(a) && drgn_type_size(a) != drgn_type_size(b)) + return false; + if (drgn_type_has_is_signed(a) && + drgn_type_is_signed(a) != drgn_type_is_signed(b)) + return false; + if (drgn_type_has_tag(a)) { + const char *tag_a = drgn_type_tag(a); + const char *tag_b = drgn_type_tag(b); + if ((!tag_a != !tag_b) || (tag_a && strcmp(tag_a, tag_b) != 0)) + return false; } - type->_private.size = size; - type->_private.is_signed = is_signed; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_has_type(a)) { + struct drgn_qualified_type type_a = drgn_type_type(a); + struct drgn_qualified_type type_b = drgn_type_type(b); + if (type_a.type != type_b.type || + type_a.qualifiers != type_b.qualifiers) + return false; + } + if (drgn_type_has_length(a) && + drgn_type_length(a) != drgn_type_length(b)) + return false; + return true; } -void drgn_bool_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang) +/* + * We don't deduplicate complete compound types, complete enumerated types, or + * function types, so the hash and comparison functions ignore members, + * enumerators, parameters, and is_variadic. + */ +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash, + drgn_type_dedupe_eq) + +DEFINE_VECTOR_FUNCTIONS(drgn_typep_vector) + +static struct drgn_error *find_or_create_type(struct drgn_type *key, + struct drgn_type **ret) { - assert(name); - type->_private.kind = DRGN_TYPE_BOOL; - type->_private.is_complete = true; - if (c_parse_specifier_list(name) == DRGN_C_TYPE_BOOL) { - type->_private.primitive = DRGN_C_TYPE_BOOL; - type->_private.name = - drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; + struct drgn_program *prog = key->_private.program; + struct hash_pair hp = drgn_dedupe_type_set_hash(&key); + struct drgn_dedupe_type_set_iterator it = + drgn_dedupe_type_set_search_hashed(&prog->dedupe_types, &key, + hp); + if (it.entry) { + *ret = *it.entry; + return NULL; } - type->_private.size = size; - type->_private.language = drgn_language_or_default(lang); + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + + *type = *key; + if (!drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, + hp, NULL)) { + free(type); + return &drgn_enomem; + } + *ret = type; + return NULL; } -void drgn_float_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang) +struct drgn_type *drgn_void_type(struct drgn_program *prog, + const struct drgn_language *lang) { - enum drgn_primitive_type primitive; + if (!lang) + lang = drgn_program_language(prog); + return &prog->void_types[lang - drgn_languages]; +} - assert(name); - type->_private.kind = DRGN_TYPE_FLOAT; - type->_private.is_complete = true; - primitive = c_parse_specifier_list(name); - if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_FLOAT) { - type->_private.primitive = primitive; - type->_private.name = - drgn_primitive_type_spellings[primitive][0]; - } else { - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; +struct drgn_error *drgn_int_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_INT && + (primitive == DRGN_C_TYPE_CHAR || + is_signed == drgn_primitive_type_is_signed(primitive))) + name = drgn_primitive_type_spellings[primitive][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_INT, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .is_signed = is_signed, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_bool_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (primitive == DRGN_C_TYPE_BOOL) + name = drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_BOOL, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_float_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + enum drgn_primitive_type primitive = c_parse_specifier_list(name); + if (drgn_primitive_type_kind[primitive] == DRGN_TYPE_FLOAT) + name = drgn_primitive_type_spellings[primitive][0]; + else + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_FLOAT, + .is_complete = true, + .primitive = primitive, + .name = name, + .size = size, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +struct drgn_error *drgn_complex_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + struct drgn_type *real_type, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + if (drgn_type_program(real_type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); } - type->_private.size = size; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_kind(real_type) != DRGN_TYPE_FLOAT && + drgn_type_kind(real_type) != DRGN_TYPE_INT) { + return drgn_error_create(DRGN_ERROR_TYPE, + "real type of complex type must be floating-point or integer type"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_COMPLEX, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .name = name, + .size = size, + .type = real_type, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_complex_type_init(struct drgn_type *type, const char *name, - uint64_t size, struct drgn_type *real_type, - const struct drgn_language *lang) +DEFINE_VECTOR_FUNCTIONS(drgn_type_member_vector) + +void drgn_compound_type_builder_init(struct drgn_compound_type_builder *builder, + struct drgn_program *prog, + enum drgn_type_kind kind) { - assert(name); - assert(real_type); - assert(drgn_type_kind(real_type) == DRGN_TYPE_FLOAT || - drgn_type_kind(real_type) == DRGN_TYPE_INT); - type->_private.kind = DRGN_TYPE_COMPLEX; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; - type->_private.size = size; - type->_private.type = real_type; - type->_private.qualifiers = 0; - type->_private.language = drgn_language_or_default(lang); + assert(kind == DRGN_TYPE_STRUCT || + kind == DRGN_TYPE_UNION || + kind == DRGN_TYPE_CLASS); + builder->prog = prog; + builder->kind = kind; + drgn_type_member_vector_init(&builder->members); } -void drgn_struct_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +void +drgn_compound_type_builder_deinit(struct drgn_compound_type_builder *builder) { - type->_private.kind = DRGN_TYPE_STRUCT; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + for (size_t i = 0; i < builder->members.size; i++) + drgn_lazy_type_deinit(&builder->members.data[i].type); + drgn_type_member_vector_deinit(&builder->members); } -void drgn_struct_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_compound_type_builder_add_member(struct drgn_compound_type_builder *builder, + struct drgn_lazy_type type, + const char *name, uint64_t bit_offset, + uint64_t bit_field_size) { - type->_private.kind = DRGN_TYPE_STRUCT; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + struct drgn_error *err = drgn_lazy_type_check_prog(&type, + builder->prog); + if (err) + return err; + struct drgn_type_member *member = + drgn_type_member_vector_append_entry(&builder->members); + if (!member) + return &drgn_enomem; + member->type = type; + member->name = name; + member->bit_offset = bit_offset; + member->bit_field_size = bit_field_size; + return NULL; } -void drgn_union_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +struct drgn_error * +drgn_compound_type_create(struct drgn_compound_type_builder *builder, + const char *tag, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_UNION; + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_member_vector_shrink_to_fit(&builder->members); + + type->_private.kind = builder->kind; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.tag = tag; type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + type->_private.members = builder->members.data; + type->_private.num_members = builder->members.size; + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } -void drgn_union_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_compound_type_create(struct drgn_program *prog, + enum drgn_type_kind kind, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_UNION; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + assert(kind == DRGN_TYPE_STRUCT || + kind == DRGN_TYPE_UNION || + kind == DRGN_TYPE_CLASS); + struct drgn_type key = { + { + .kind = kind, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .tag = tag, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_class_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang) +DEFINE_VECTOR_FUNCTIONS(drgn_type_enumerator_vector) + +void drgn_enum_type_builder_init(struct drgn_enum_type_builder *builder, + struct drgn_program *prog) { - type->_private.kind = DRGN_TYPE_CLASS; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = size; - type->_private.members = members; - type->_private.num_members = num_members; - type->_private.language = drgn_language_or_default(lang); + builder->prog = prog; + drgn_type_enumerator_vector_init(&builder->enumerators); } -void drgn_class_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +void drgn_enum_type_builder_deinit(struct drgn_enum_type_builder *builder) { - type->_private.kind = DRGN_TYPE_CLASS; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.size = 0; - type->_private.members = NULL; - type->_private.num_members = 0; - type->_private.language = drgn_language_or_default(lang); + drgn_type_enumerator_vector_deinit(&builder->enumerators); +} + +struct drgn_error * +drgn_enum_type_builder_add_signed(struct drgn_enum_type_builder *builder, + const char *name, int64_t svalue) +{ + struct drgn_type_enumerator *enumerator = + drgn_type_enumerator_vector_append_entry(&builder->enumerators); + if (!enumerator) + return &drgn_enomem; + enumerator->name = name; + enumerator->svalue = svalue; + return NULL; +} + +struct drgn_error * +drgn_enum_type_builder_add_unsigned(struct drgn_enum_type_builder *builder, + const char *name, uint64_t uvalue) +{ + struct drgn_type_enumerator *enumerator = + drgn_type_enumerator_vector_append_entry(&builder->enumerators); + if (!enumerator) + return &drgn_enomem; + enumerator->name = name; + enumerator->uvalue = uvalue; + return NULL; } -void drgn_enum_type_init(struct drgn_type *type, const char *tag, - struct drgn_type *compatible_type, - struct drgn_type_enumerator *enumerators, - size_t num_enumerators, - const struct drgn_language *lang) +struct drgn_error *drgn_enum_type_create(struct drgn_enum_type_builder *builder, + const char *tag, + struct drgn_type *compatible_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - assert(drgn_type_kind(compatible_type) == DRGN_TYPE_INT); + if (drgn_type_program(compatible_type) != builder->prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + return drgn_error_create(DRGN_ERROR_TYPE, + "compatible type of enum type must be integer type"); + } + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_enumerator_vector_shrink_to_fit(&builder->enumerators); + type->_private.kind = DRGN_TYPE_ENUM; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.tag = tag; type->_private.type = compatible_type; type->_private.qualifiers = 0; - type->_private.enumerators = enumerators; - type->_private.num_enumerators = num_enumerators; - type->_private.language = drgn_language_or_default(lang); + type->_private.enumerators = builder->enumerators.data; + type->_private.num_enumerators = builder->enumerators.size; + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } -void drgn_enum_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_enum_type_create(struct drgn_program *prog, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ENUM; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.tag = tag; - type->_private.type = NULL; - type->_private.qualifiers = 0; - type->_private.enumerators = NULL; - type->_private.num_enumerators = 0; - type->_private.language = drgn_language_or_default(lang); + struct drgn_type key = { + { + .kind = DRGN_TYPE_ENUM, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .tag = tag, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_typedef_type_init(struct drgn_type *type, const char *name, - struct drgn_qualified_type aliased_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_typedef_type_create(struct drgn_program *prog, const char *name, + struct drgn_qualified_type aliased_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_TYPEDEF; - type->_private.is_complete = drgn_type_is_complete(aliased_type.type); + if (drgn_type_program(aliased_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + enum drgn_primitive_type primitive; if (strcmp(name, "size_t") == 0) - type->_private.primitive = DRGN_C_TYPE_SIZE_T; + primitive = DRGN_C_TYPE_SIZE_T; else if (strcmp(name, "ptrdiff_t") == 0) - type->_private.primitive = DRGN_C_TYPE_PTRDIFF_T; + primitive = DRGN_C_TYPE_PTRDIFF_T; else - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.name = name; - type->_private.type = aliased_type.type; - type->_private.qualifiers = aliased_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + primitive = DRGN_NOT_PRIMITIVE_TYPE; + + struct drgn_type key = { + { + .kind = DRGN_TYPE_TYPEDEF, + .is_complete = drgn_type_is_complete(aliased_type.type), + .primitive = primitive, + .name = name, + .type = aliased_type.type, + .qualifiers = aliased_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_pointer_type_init(struct drgn_type *type, uint64_t size, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_pointer_type_create(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + uint64_t size, const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_POINTER; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.size = size; - type->_private.type = referenced_type.type; - type->_private.qualifiers = referenced_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(referenced_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_POINTER, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .size = size, + .type = referenced_type.type, + .qualifiers = referenced_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_array_type_init(struct drgn_type *type, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + uint64_t length, const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ARRAY; - type->_private.is_complete = true; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.length = length; - type->_private.type = element_type.type; - type->_private.qualifiers = element_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(element_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_ARRAY, + .is_complete = true, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .length = length, + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); } -void drgn_array_type_init_incomplete(struct drgn_type *type, - struct drgn_qualified_type element_type, - const struct drgn_language *lang) +struct drgn_error * +drgn_incomplete_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret) { - type->_private.kind = DRGN_TYPE_ARRAY; - type->_private.is_complete = false; - type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; - type->_private.length = 0; - type->_private.type = element_type.type; - type->_private.qualifiers = element_type.qualifiers; - type->_private.language = drgn_language_or_default(lang); + if (drgn_type_program(element_type.type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type key = { + { + .kind = DRGN_TYPE_ARRAY, + .is_complete = false, + .primitive = DRGN_NOT_PRIMITIVE_TYPE, + .type = element_type.type, + .qualifiers = element_type.qualifiers, + .program = prog, + .language = lang ? lang : drgn_program_language(prog), + } + }; + return find_or_create_type(&key, ret); +} + +DEFINE_VECTOR_FUNCTIONS(drgn_type_parameter_vector) + +void drgn_function_type_builder_init(struct drgn_function_type_builder *builder, + struct drgn_program *prog) +{ + builder->prog = prog; + drgn_type_parameter_vector_init(&builder->parameters); } -void drgn_function_type_init(struct drgn_type *type, - struct drgn_qualified_type return_type, - struct drgn_type_parameter *parameters, - size_t num_parameters, bool is_variadic, - const struct drgn_language *lang) +void +drgn_function_type_builder_deinit(struct drgn_function_type_builder *builder) { + for (size_t i = 0; i < builder->parameters.size; i++) + drgn_lazy_type_deinit(&builder->parameters.data[i].type); + drgn_type_parameter_vector_deinit(&builder->parameters); +} + +struct drgn_error * +drgn_function_type_builder_add_parameter(struct drgn_function_type_builder *builder, + struct drgn_lazy_type type, + const char *name) +{ + struct drgn_error *err = drgn_lazy_type_check_prog(&type, + builder->prog); + if (err) + return err; + struct drgn_type_parameter *parameter = + drgn_type_parameter_vector_append_entry(&builder->parameters); + if (!parameter) + return &drgn_enomem; + parameter->type = type; + parameter->name = name; + return NULL; +} + +struct drgn_error * +drgn_function_type_create(struct drgn_function_type_builder *builder, + struct drgn_qualified_type return_type, + bool is_variadic, const struct drgn_language *lang, + struct drgn_type **ret) +{ + if (drgn_type_program(return_type.type) != builder->prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type is from different program"); + } + + struct drgn_type *type = malloc(sizeof(*type)); + if (!type) + return &drgn_enomem; + if (!drgn_typep_vector_append(&builder->prog->created_types, &type)) { + free(type); + return &drgn_enomem; + } + + drgn_type_parameter_vector_shrink_to_fit(&builder->parameters); + type->_private.kind = DRGN_TYPE_FUNCTION; type->_private.is_complete = true; type->_private.primitive = DRGN_NOT_PRIMITIVE_TYPE; type->_private.type = return_type.type; type->_private.qualifiers = return_type.qualifiers; - type->_private.parameters = parameters; - type->_private.num_parameters = num_parameters; + type->_private.parameters = builder->parameters.data; + type->_private.num_parameters = builder->parameters.size; type->_private.is_variadic = is_variadic; - type->_private.language = drgn_language_or_default(lang); + type->_private.program = builder->prog; + type->_private.language = + lang ? lang : drgn_program_language(builder->prog); + *ret = type; + return NULL; } struct drgn_type_pair { @@ -609,8 +993,6 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, int *depth, bool *ret) { struct drgn_error *err; - struct drgn_type_pair pair = { a, b }; - struct hash_pair hp; if (*depth >= 1000) { return drgn_error_create(DRGN_ERROR_RECURSION, @@ -630,7 +1012,8 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, * Cache this comparison so that we don't do it again. We insert the * cache entry before doing the comparison in order to break cycles. */ - hp = drgn_type_pair_set_hash(&pair); + struct drgn_type_pair pair = { a, b }; + struct hash_pair hp = drgn_type_pair_set_hash(&pair); switch (drgn_type_pair_set_insert_hashed(cache, &pair, hp, NULL)) { case 1: /* These types haven't been compared yet. */ @@ -651,18 +1034,48 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, (*depth)++; if (drgn_type_kind(a) != drgn_type_kind(b) || - drgn_type_language(a) != drgn_type_language(b) || drgn_type_is_complete(a) != drgn_type_is_complete(b)) goto out_false; + switch (drgn_type_kind(a)) { + /* + * This types are uniquely deduplicated, so if their pointers did not + * compare equal then they are not equal. + */ + case DRGN_TYPE_VOID: + case DRGN_TYPE_INT: + case DRGN_TYPE_BOOL: + case DRGN_TYPE_FLOAT: + case DRGN_TYPE_COMPLEX: + goto out_false; + /* These types are uniquely deduplicated only if incomplete. */ + case DRGN_TYPE_STRUCT: + case DRGN_TYPE_UNION: + case DRGN_TYPE_CLASS: + case DRGN_TYPE_ENUM: + if (!drgn_type_is_complete(a)) + goto out_false; + break; + /* + * These types are not uniquely deduplicated because they can refer to + * types that are not deduplicated. + */ + case DRGN_TYPE_TYPEDEF: + case DRGN_TYPE_POINTER: + case DRGN_TYPE_ARRAY: + case DRGN_TYPE_FUNCTION: + break; + } + + if (drgn_type_language(a) != drgn_type_language(b)) + goto out_false; + if (drgn_type_has_name(a) && strcmp(drgn_type_name(a), drgn_type_name(b)) != 0) goto out_false; if (drgn_type_has_tag(a)) { - const char *tag_a, *tag_b; - - tag_a = drgn_type_tag(a); - tag_b = drgn_type_tag(b); + const char *tag_a = drgn_type_tag(a); + const char *tag_b = drgn_type_tag(b); if ((!tag_a != !tag_b) || (tag_a && strcmp(tag_a, tag_b) != 0)) goto out_false; } @@ -671,14 +1084,10 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, if (drgn_type_has_length(a) && drgn_type_length(a) != drgn_type_length(b)) goto out_false; - if (drgn_type_has_is_signed(a) && - drgn_type_is_signed(a) != drgn_type_is_signed(b)) - goto out_false; + assert(!drgn_type_has_is_signed(a)); if (drgn_type_has_type(a)) { - struct drgn_qualified_type type_a, type_b; - - type_a = drgn_type_type(a); - type_b = drgn_type_type(b); + struct drgn_qualified_type type_a = drgn_type_type(a); + struct drgn_qualified_type type_b = drgn_type_type(b); err = drgn_qualified_type_eq_impl(&type_a, &type_b, cache, depth, ret); if (err || !*ret) @@ -714,6 +1123,10 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, LIBDRGN_PUBLIC struct drgn_error *drgn_type_eq(struct drgn_type *a, struct drgn_type *b, bool *ret) { + if (drgn_type_program(a) != drgn_type_program(b)) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "types are from different programs"); + } struct drgn_type_pair_set cache = HASH_TABLE_INIT; int depth = 0; struct drgn_error *err = drgn_type_eq_impl(a, b, &cache, &depth, ret); @@ -948,3 +1361,390 @@ struct drgn_error *drgn_error_member_not_found(struct drgn_type *type, free(name); return err; } + +void drgn_program_init_types(struct drgn_program *prog) +{ + for (size_t i = 0; i < ARRAY_SIZE(prog->void_types); i++) { + struct drgn_type *type = &prog->void_types[i]; + type->_private.kind = DRGN_TYPE_VOID; + type->_private.is_complete = false; + type->_private.primitive = DRGN_C_TYPE_VOID; + type->_private.program = prog; + type->_private.language = &drgn_languages[i]; + } + drgn_dedupe_type_set_init(&prog->dedupe_types); + drgn_typep_vector_init(&prog->created_types); + drgn_member_map_init(&prog->members); + drgn_type_set_init(&prog->members_cached); +} + +void drgn_program_deinit_types(struct drgn_program *prog) +{ + drgn_member_map_deinit(&prog->members); + drgn_type_set_deinit(&prog->members_cached); + + for (size_t i = 0; i < prog->created_types.size; i++) { + struct drgn_type *type = prog->created_types.data[i]; + if (drgn_type_has_members(type)) { + struct drgn_type_member *members = + drgn_type_members(type); + size_t num_members = drgn_type_num_members(type); + for (size_t j = 0; j < num_members; j++) + drgn_lazy_type_deinit(&members[j].type); + free(members); + } + if (drgn_type_has_enumerators(type)) + free(drgn_type_enumerators(type)); + if (drgn_type_has_parameters(type)) { + struct drgn_type_parameter *parameters = + drgn_type_parameters(type); + size_t num_parameters = drgn_type_num_parameters(type); + for (size_t j = 0; j < num_parameters; j++) + drgn_lazy_type_deinit(¶meters[j].type); + free(parameters); + } + free(type); + } + drgn_typep_vector_deinit(&prog->created_types); + + for (struct drgn_dedupe_type_set_iterator it = + drgn_dedupe_type_set_first(&prog->dedupe_types); + it.entry; it = drgn_dedupe_type_set_next(it)) + free(*it.entry); + drgn_dedupe_type_set_deinit(&prog->dedupe_types); + + struct drgn_type_finder *finder = prog->type_finders; + while (finder) { + struct drgn_type_finder *next = finder->next; + free(finder); + finder = next; + } +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_add_type_finder(struct drgn_program *prog, drgn_type_find_fn fn, + void *arg) +{ + struct drgn_type_finder *finder = malloc(sizeof(*finder)); + if (!finder) + return &drgn_enomem; + finder->fn = fn; + finder->arg = arg; + finder->next = prog->type_finders; + prog->type_finders = finder; + return NULL; +} + +struct drgn_error * +drgn_program_find_type_impl(struct drgn_program *prog, + enum drgn_type_kind kind, const char *name, + size_t name_len, const char *filename, + struct drgn_qualified_type *ret) +{ + struct drgn_type_finder *finder = prog->type_finders; + while (finder) { + struct drgn_error *err = + finder->fn(kind, name, name_len, filename, finder->arg, + ret); + if (!err) { + if (drgn_type_program(ret->type) != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "type find callback returned type from wrong program"); + } + if (drgn_type_kind(ret->type) != kind) { + return drgn_error_create(DRGN_ERROR_TYPE, + "type find callback returned wrong kind of type"); + } + return NULL; + } + if (err != &drgn_not_found) + return err; + finder = finder->next; + } + return &drgn_not_found; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_find_type(struct drgn_program *prog, const char *name, + const char *filename, struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + err = drgn_program_language(prog)->find_type(prog, name, filename, ret); + if (err != &drgn_not_found) + return err; + + if (filename) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s' in '%s'", name, + filename); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s'", name); + } +} + +/* + * size_t and ptrdiff_t default to typedefs of whatever integer type matches the + * word size. + */ +static struct drgn_error * +default_size_t_or_ptrdiff_t(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret) +{ + static const enum drgn_primitive_type integer_types[2][3] = { + { + DRGN_C_TYPE_UNSIGNED_LONG, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + DRGN_C_TYPE_UNSIGNED_INT, + }, + { + DRGN_C_TYPE_LONG, + DRGN_C_TYPE_LONG_LONG, + DRGN_C_TYPE_INT, + }, + }; + struct drgn_error *err; + uint8_t word_size; + + err = drgn_program_word_size(prog, &word_size); + if (err) + return err; + for (size_t i = 0; i < ARRAY_SIZE(integer_types[0]); i++) { + enum drgn_primitive_type integer_type; + struct drgn_qualified_type qualified_type; + + integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + err = drgn_program_find_primitive_type(prog, integer_type, + &qualified_type.type); + if (err) + return err; + if (drgn_type_size(qualified_type.type) == word_size) { + qualified_type.qualifiers = 0; + return drgn_typedef_type_create(prog, + drgn_primitive_type_spellings[type][0], + qualified_type, + &drgn_language_c, ret); + } + } + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "no suitable integer type for %s", + drgn_primitive_type_spellings[type][0]); +} + +struct drgn_error * +drgn_program_find_primitive_type(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret) +{ + struct drgn_error *err; + struct drgn_qualified_type qualified_type; + enum drgn_type_kind kind; + const char * const *spellings; + size_t i; + + if (prog->primitive_types[type]) { + *ret = prog->primitive_types[type]; + return NULL; + } + + kind = drgn_primitive_type_kind[type]; + if (kind == DRGN_TYPE_VOID) { + *ret = drgn_void_type(prog, &drgn_language_c); + goto out; + } + + spellings = drgn_primitive_type_spellings[type]; + for (i = 0; spellings[i]; i++) { + err = drgn_program_find_type_impl(prog, kind, spellings[i], + strlen(spellings[i]), NULL, + &qualified_type); + if (!err && drgn_type_primitive(qualified_type.type) == type) { + *ret = qualified_type.type; + goto out; + } else if (err && err != &drgn_not_found) { + return err; + } + } + + switch (type) { + case DRGN_C_TYPE_CHAR: + case DRGN_C_TYPE_SIGNED_CHAR: + err = drgn_int_type_create(prog, spellings[0], 1, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_CHAR: + err = drgn_int_type_create(prog, spellings[0], 1, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_SHORT: + err = drgn_int_type_create(prog, spellings[0], 2, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_SHORT: + err = drgn_int_type_create(prog, spellings[0], 2, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_INT: + err = drgn_int_type_create(prog, spellings[0], 4, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_INT: + err = drgn_int_type_create(prog, spellings[0], 4, false, + &drgn_language_c, ret); + break; + /* long and unsigned long default to the word size. */ + case DRGN_C_TYPE_LONG: + case DRGN_C_TYPE_UNSIGNED_LONG: { + uint8_t word_size; + + err = drgn_program_word_size(prog, &word_size); + if (err) + break; + err = drgn_int_type_create(prog, spellings[0], word_size, + type == DRGN_C_TYPE_LONG, + &drgn_language_c, ret); + break; + } + case DRGN_C_TYPE_LONG_LONG: + err = drgn_int_type_create(prog, spellings[0], 8, true, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_UNSIGNED_LONG_LONG: + err = drgn_int_type_create(prog, spellings[0], 8, false, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_BOOL: + err = drgn_bool_type_create(prog, spellings[0], 1, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_FLOAT: + err = drgn_float_type_create(prog, spellings[0], 4, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_DOUBLE: + err = drgn_float_type_create(prog, spellings[0], 8, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_LONG_DOUBLE: + err = drgn_float_type_create(prog, spellings[0], 16, + &drgn_language_c, ret); + break; + case DRGN_C_TYPE_SIZE_T: + case DRGN_C_TYPE_PTRDIFF_T: + err = default_size_t_or_ptrdiff_t(prog, type, ret); + break; + default: + UNREACHABLE(); + } + if (err) + return err; + assert(drgn_type_primitive(*ret) == type); + +out: + prog->primitive_types[type] = *ret; + return NULL; +} + +static struct drgn_error * +drgn_program_cache_members(struct drgn_program *prog, + struct drgn_type *outer_type, + struct drgn_type *type, uint64_t bit_offset) +{ + if (!drgn_type_has_members(type)) + return NULL; + + struct drgn_type_member *members = drgn_type_members(type); + size_t num_members = drgn_type_num_members(type); + for (size_t i = 0; i < num_members; i++) { + struct drgn_type_member *member = &members[i]; + if (member->name) { + struct drgn_member_map_entry entry = { + .key = { + .type = outer_type, + .name = member->name, + .name_len = strlen(member->name), + }, + .value = { + .type = &member->type, + .bit_offset = + bit_offset + member->bit_offset, + .bit_field_size = + member->bit_field_size, + }, + }; + if (drgn_member_map_insert(&prog->members, &entry, + NULL) == -1) + return &drgn_enomem; + } else { + struct drgn_qualified_type member_type; + struct drgn_error *err = drgn_member_type(member, + &member_type); + if (err) + return err; + err = drgn_program_cache_members(prog, outer_type, + member_type.type, + bit_offset + + member->bit_offset); + if (err) + return err; + } + } + return NULL; +} + +struct drgn_error *drgn_program_find_member(struct drgn_program *prog, + struct drgn_type *type, + const char *member_name, + size_t member_name_len, + struct drgn_member_value **ret) +{ + const struct drgn_member_key key = { + .type = drgn_underlying_type(type), + .name = member_name, + .name_len = member_name_len, + }; + struct hash_pair hp = drgn_member_map_hash(&key); + struct drgn_member_map_iterator it = + drgn_member_map_search_hashed(&prog->members, &key, hp); + if (it.entry) { + *ret = &it.entry->value; + return NULL; + } + + /* + * Cache miss. One of the following is true: + * + * 1. The type isn't a structure, union, or class, which is a type + * error. + * 2. The type hasn't been cached, which means we need to cache it and + * check again. + * 3. The type has already been cached, which means the member doesn't + * exist. + */ + if (!drgn_type_has_members(key.type)) { + return drgn_type_error("'%s' is not a structure, union, or class", + type); + } + struct hash_pair cached_hp = drgn_type_set_hash(&key.type); + if (drgn_type_set_search_hashed(&prog->members_cached, &key.type, + cached_hp).entry) + return drgn_error_member_not_found(type, member_name); + + struct drgn_error *err = drgn_program_cache_members(prog, key.type, + key.type, 0); + if (err) + return err; + + if (drgn_type_set_insert_searched(&prog->members_cached, &key.type, + cached_hp, NULL) == -1) + return &drgn_enomem; + + it = drgn_member_map_search_hashed(&prog->members, &key, hp); + if (it.entry) { + *ret = &it.entry->value; + return NULL; + } + + return drgn_error_member_not_found(type, member_name); +} diff --git a/libdrgn/type.h b/libdrgn/type.h index 4acae99e2..f5042310c 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -12,8 +12,13 @@ #ifndef DRGN_TYPE_H #define DRGN_TYPE_H +#include + #include "drgn.h" -#include "language.h" +#include "hash_table.h" +#include "vector.h" + +struct drgn_language; /** * @ingroup Internals @@ -29,6 +34,49 @@ * @{ */ +/** Registered type finding callback in a @ref drgn_program. */ +struct drgn_type_finder { + /** The callback. */ + drgn_type_find_fn fn; + /** Argument to pass to @ref drgn_type_finder::fn. */ + void *arg; + /** Next callback to try. */ + struct drgn_type_finder *next; +}; + +DEFINE_HASH_SET_TYPE(drgn_dedupe_type_set, struct drgn_type *) + +/** (type, member name) pair. */ +struct drgn_member_key { + struct drgn_type *type; + const char *name; + size_t name_len; +}; + +/** Type, offset, and bit field size of a type member. */ +struct drgn_member_value { + struct drgn_lazy_type *type; + uint64_t bit_offset, bit_field_size; +}; + +#ifdef DOXYGEN +/** + * @struct drgn_member_map + * + * Map of compound type members. + * + * The key is a @ref drgn_member_key, and the value is a @ref drgn_member_value. + * + * @struct drgn_type_set + * + * Set of types compared by address. + */ +#else +DEFINE_HASH_MAP_TYPE(drgn_member_map, struct drgn_member_key, + struct drgn_member_value) +DEFINE_HASH_SET_TYPE(drgn_type_set, struct drgn_type *) +#endif + /** * @defgroup LazyTypes Lazy types * @@ -52,6 +100,8 @@ * like @c container_of(). */ struct drgn_type_thunk { + /** Program owning this thunk. */ + struct drgn_program *prog; /** * Callback to evaluate this thunk to a @ref drgn_qualified_type. * @@ -74,7 +124,10 @@ struct drgn_type_thunk { * * @param[in] thunk Thunk to free. */ -void drgn_type_thunk_free(struct drgn_type_thunk *thunk); +static inline void drgn_type_thunk_free(struct drgn_type_thunk *thunk) +{ + thunk->free_fn(thunk); +} /** * Create a @ref drgn_lazy_type from a @ref drgn_type_thunk. @@ -128,11 +181,11 @@ static inline bool drgn_lazy_type_is_evaluated(struct drgn_lazy_type *lazy_type) * remains in a valid, unevaluated state. * * @param[in] lazy_type Lazy type to evaluate. - * @param[out] qualified_type Evaluated type. + * @param[out] ret Evaluated type. * @return @c NULL on success, non-@c NULL on error. */ struct drgn_error *drgn_lazy_type_evaluate(struct drgn_lazy_type *lazy_type, - struct drgn_qualified_type *qualified_type); + struct drgn_qualified_type *ret); /** * Free a @ref drgn_lazy_type. @@ -151,14 +204,13 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type); * * Creating type descriptors. * - * libdrgn does not provide a way to allocate a @ref drgn_type. Instead, a type - * can be allocated in any way deemed appropriate (e.g., with @c malloc(), on - * the stack, embedded in another structure). These helpers initialize an - * allocated type. + * These functions create type descriptors. They are valid for the lifetime of + * the program that owns them. * - * Note that structure, union, enumerated, and function types end with a - * variable-length array. The caller must allocate the necessary number of - * elements. + * A few kinds of types have variable-length fields: structure, union, and class + * types have members, enumerated types have enumerators, and function types + * have parameters. These fields are constructed with a @em builder before + * creating the type. * * @{ */ @@ -166,342 +218,370 @@ void drgn_lazy_type_deinit(struct drgn_lazy_type *lazy_type); /** * Get the void type for the given @ref drgn_language. * - * The void type does not have any fields, so there is a single type - * descriptor per language to represent it. + * The void type does not have any fields, so a program has a single type + * descriptor per language to represent it. This function cannot fail. + * + * @param[in] prog Program owning type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. */ -static inline struct drgn_type * -drgn_void_type(const struct drgn_language *lang) -{ - return (struct drgn_type *)&drgn_language_or_default(lang)->void_type; -} +struct drgn_type *drgn_void_type(struct drgn_program *prog, + const struct drgn_language *lang); /** - * Initialize an integer type. + * Create an integer type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. * @param[in] is_signed Whether the type is signed. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_int_type_init(struct drgn_type *type, const char *name, uint64_t size, - bool is_signed, const struct drgn_language *lang); +struct drgn_error *drgn_int_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a boolean type. + * Create a boolean type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_bool_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang); +struct drgn_error *drgn_bool_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a floating-point type. + * Create a floating-point type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_float_type_init(struct drgn_type *type, const char *name, - uint64_t size, const struct drgn_language *lang); +struct drgn_error *drgn_float_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a complex type. + * Create a complex type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] size Size of the type in bytes. - * @param[in] real_type The corresponding real type. It must not be @c NULL and - * must be a floating-point or integer type. - * @param[in] lang Language of this type. + * @param[in] real_type Corresponding real type. Must not be @c NULL and must be + * a floating-point or integer type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_complex_type_init(struct drgn_type *type, const char *name, - uint64_t size, struct drgn_type *real_type, - const struct drgn_language *lang); +struct drgn_error *drgn_complex_type_create(struct drgn_program *prog, + const char *name, uint64_t size, + struct drgn_type *real_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +DEFINE_VECTOR_TYPE(drgn_type_member_vector, struct drgn_type_member) + +/** Builder for members of a structure, union, or class type. */ +struct drgn_compound_type_builder { + struct drgn_program *prog; + enum drgn_type_kind kind; + struct drgn_type_member_vector members; +}; /** - * Initialize a member of a type. + * Initialize a @ref drgn_compound_type_builder. * - * @param[out] members Member to initialize. - * @param[in] member_type See @ref drgn_type_member::type. - * @param[in] name See @ref drgn_type_member::name. - * @param[in] bit_offset See @ref drgn_type_member::bit_offset. - * @param[in] bit_field_size See @ref drgn_type_member::bit_field_size. + * @param[in] kind One of @ref DRGN_TYPE_STRUCT, @ref DRGN_TYPE_UNION, or @ref + * DRGN_TYPE_CLASS. */ -static inline void drgn_type_member_init(struct drgn_type_member *member, - struct drgn_lazy_type member_type, - const char *name, uint64_t bit_offset, - uint64_t bit_field_size) -{ - member->type = member_type; - member->name = name; - member->bit_offset = bit_offset; - member->bit_field_size = bit_field_size; -} +void drgn_compound_type_builder_init(struct drgn_compound_type_builder *builder, + struct drgn_program *prog, + enum drgn_type_kind kind); /** - * Free a member of a type. + * Deinitialize a @ref drgn_compound_type_builder. * - * This only frees @ref drgn_type_member::type. + * Don't call this if @ref drgn_compound_type_create() succeeded. + */ +void +drgn_compound_type_builder_deinit(struct drgn_compound_type_builder *builder); + +/** + * Add a @ref drgn_type_member to a @ref drgn_compound_type_builder. * - * @param[out] member Member to free. + * On success, @p builder takes ownership of @p type. */ -static inline void drgn_type_member_deinit(struct drgn_type_member *member) -{ - drgn_lazy_type_deinit(&member->type); -} +struct drgn_error * +drgn_compound_type_builder_add_member(struct drgn_compound_type_builder *builder, + struct drgn_lazy_type type, + const char *name, uint64_t bit_offset, + uint64_t bit_field_size); /** - * Initialize a structure type. + * Create a structure, union, or class type. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. + * On success, this takes ownership of @p builder. + * + * @param[in] builder Builder containing members. @c type and @c name of each + * member must remain valid for the lifetime of @c builder->prog. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @c builder->prog. May be @c NULL if the type is anonymous. * @param[in] size Size of the type in bytes. - * @param[in] members Members of the type. - * @param[in] num_members The number of members in the type. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_struct_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +struct drgn_error * +drgn_compound_type_create(struct drgn_compound_type_builder *builder, + const char *tag, uint64_t size, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete structure type. + * Create an incomplete structure, union, or class type. * * @c size and @c num_members are set to zero and @c is_complete is set to @c * false. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. - * @param[in] lang Language of this type. + * @param[in] prog Program owning type. + * @param[in] kind One of @ref DRGN_TYPE_STRUCT, @ref DRGN_TYPE_UNION, or @ref + * DRGN_TYPE_CLASS. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. May be @c NULL if the type is anonymous. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_struct_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_compound_type_create(struct drgn_program *prog, + enum drgn_type_kind kind, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret); -/** - * Initialize a union type. - * - * @sa drgn_struct_type_init(). - */ -void drgn_union_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +DEFINE_VECTOR_TYPE(drgn_type_enumerator_vector, struct drgn_type_enumerator) -/** - * Initialize an incomplete union type. - * - * @sa drgn_struct_type_init_incomplete(). - */ -void drgn_union_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +/** Builder for enumerators of an enumerated type. */ +struct drgn_enum_type_builder { + struct drgn_program *prog; + struct drgn_type_enumerator_vector enumerators; +}; -/** - * Initialize a class type. - * - * @sa drgn_struct_type_init(). - */ -void drgn_class_type_init(struct drgn_type *type, const char *tag, - uint64_t size, struct drgn_type_member *members, - size_t num_members, const struct drgn_language *lang); +/** Initialize a @ref drgn_enum_type_builder. */ +void drgn_enum_type_builder_init(struct drgn_enum_type_builder *builder, + struct drgn_program *prog); /** - * Initialize an incomplete class type. + * Deinitialize a @ref drgn_enum_type_builder. * - * @sa drgn_struct_type_init_incomplete(). + * Don't call this if @ref drgn_enum_type_create() succeeded. */ -void drgn_class_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +void drgn_enum_type_builder_deinit(struct drgn_enum_type_builder *builder); /** - * Initialize a signed enumerator of a type. - * - * @param[out] enumerator Enumerator to initialize. - * @param[in] name See @ref drgn_type_enumerator::name. - * @param[in] svalue See @ref drgn_type_enumerator::svalue. + * Add a @ref drgn_type_enumerator with a signed value to a @ref + * drgn_enum_type_builder. */ -static inline void -drgn_type_enumerator_init_signed(struct drgn_type_enumerator *enumerator, - const char *name, int64_t svalue) -{ - enumerator->name = name; - enumerator->svalue = svalue; -} +struct drgn_error * +drgn_enum_type_builder_add_signed(struct drgn_enum_type_builder *builder, + const char *name, int64_t svalue); /** - * Initialize an unsigned enumerator of a type. - * - * @param[out] enumerator Enumerator to initialize. - * @param[in] name See @ref drgn_type_enumerator::name. - * @param[in] uvalue See @ref drgn_type_enumerator::uvalue. + * Add a @ref drgn_type_enumerator with an unsigned value to a @ref + * drgn_enum_type_builder. */ -static inline void -drgn_type_enumerator_init_unsigned(struct drgn_type_enumerator *enumerator, - const char *name, uint64_t uvalue) -{ - enumerator->name = name; - enumerator->uvalue = uvalue; -} +struct drgn_error * +drgn_enum_type_builder_add_unsigned(struct drgn_enum_type_builder *builder, + const char *name, uint64_t uvalue); /** - * Initialize an enumerated type. + * Create an enumerated type. + * + * On success, this takes ownership of @p builder. * - * @param[out] type Type to initialize. + * @param[in] builder Builder containing enumerators. @c name of each enumerator + * must remain valid for the lifetime of @c builder->prog. * @param[in] tag Name of the type. This string is not copied. It may be @c NULL * if the type is anonymous. - * @param[in] compatible_type Type compatible with this enumerated type. It must - * be an integer type. - * @param[in] enumerators Enumerators of the type. - * @param[in] num_enumerators The number of enumerators in the type. - * @param[in] lang Language of this type. + * @param[in] compatible_type Type compatible with this enumerated type. Must be + * an integer type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_enum_type_init(struct drgn_type *type, const char *tag, - struct drgn_type *compatible_type, - struct drgn_type_enumerator *enumerators, - size_t num_enumerators, - const struct drgn_language *lang); +struct drgn_error *drgn_enum_type_create(struct drgn_enum_type_builder *builder, + const char *tag, + struct drgn_type *compatible_type, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete enumerated type. + * Create an incomplete enumerated type. * * @c compatible_type is set to @c NULL and @c num_enumerators is set to zero. * - * @param[out] type Type to initialize. - * @param[in] tag Name of the type. This string is not copied. It may be @c NULL - * if the type is anonymous. - * @param[in] lang Language of this type. + * @param[in] prog Program owning type. + * @param[in] tag Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. May be @c NULL if the type is anonymous. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_enum_type_init_incomplete(struct drgn_type *type, const char *tag, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_enum_type_create(struct drgn_program *prog, const char *tag, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a typedef type. + * Create a typedef type. * - * @param[out] type Type to initialize. - * @param[in] name Name of the type. This string is not copied. It must not be - * @c NULL. + * @param[in] prog Program owning type. + * @param[in] name Name of the type. Not copied; must remain valid for the + * lifetime of @p prog. Must not be @c NULL. * @param[in] aliased_type Type aliased by the typedef. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_typedef_type_init(struct drgn_type *type, const char *name, - struct drgn_qualified_type aliased_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_typedef_type_create(struct drgn_program *prog, const char *name, + struct drgn_qualified_type aliased_type, + const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize a pointer type. + * Create a pointer type. * - * @param[out] type Type to initialize. - * @param[in] size Size of the type in bytes. + * @param[in] prog Program owning type. * @param[in] referenced_type Type referenced by the pointer type. - * @param[in] lang Language of this type. + * @param[in] size Size of the type in bytes. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_pointer_type_init(struct drgn_type *type, uint64_t size, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_pointer_type_create(struct drgn_program *prog, + struct drgn_qualified_type referenced_type, + uint64_t size, const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an array type. + * Create an array type. * - * @param[out] type Type to initialize. - * @param[in] length Number of elements in the array type. + * @param[in] prog Program owning type. * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of this type. + * @param[in] length Number of elements in the array type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_array_type_init(struct drgn_type *type, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + uint64_t length, const struct drgn_language *lang, + struct drgn_type **ret); /** - * Initialize an incomplete array type. + * Create an incomplete array type. * * @c length is set to zero. * - * @param[out] type Type to initialize. + * @param[in] prog Program owning type. * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @p prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_array_type_init_incomplete(struct drgn_type *type, - struct drgn_qualified_type element_type, - const struct drgn_language *lang); +struct drgn_error * +drgn_incomplete_array_type_create(struct drgn_program *prog, + struct drgn_qualified_type element_type, + const struct drgn_language *lang, + struct drgn_type **ret); + +DEFINE_VECTOR_TYPE(drgn_type_parameter_vector, struct drgn_type_parameter) + +/** Builder for parameters of a function type. */ +struct drgn_function_type_builder { + struct drgn_program *prog; + struct drgn_type_parameter_vector parameters; +}; + +/** Initialize a @ref drgn_function_type_builder. */ +void drgn_function_type_builder_init(struct drgn_function_type_builder *builder, + struct drgn_program *prog); /** - * Initialize a parameter of a type. + * Deinitialize a @ref drgn_function_type_builder. * - * @param[out] parameter Parameter to initialize. - * @param[in] parameter_type See @ref drgn_type_parameter::type. - * @param[in] name See @ref drgn_type_parameter::name. + * Don't call this if @ref drgn_function_type_create() succeeded. */ -static inline void -drgn_type_parameter_init(struct drgn_type_parameter *parameter, - struct drgn_lazy_type parameter_type, const char *name) -{ - parameter->type = parameter_type; - parameter->name = name; -} +void +drgn_function_type_builder_deinit(struct drgn_function_type_builder *builder); /** - * Free a parameter of a type. - * - * This only frees @ref drgn_type_parameter::type. + * Add a @ref drgn_type_parameter to a @ref drgn_function_type_builder. * - * @param[out] parameter Parameter to free. + * On success, @p builder takes ownership of @p type. */ -static inline void drgn_type_parameter_deinit(struct drgn_type_parameter *parameter) -{ - drgn_lazy_type_deinit(¶meter->type); -} +struct drgn_error * +drgn_function_type_builder_add_parameter(struct drgn_function_type_builder *builder, + struct drgn_lazy_type type, + const char *name); /** - * Initialize a function type. + * Create a function type. * - * @param[out] type Type to initialize. + * On success, this takes ownership of @p builder. + * + * @param[in] builder Builder containing parameters. @c type and @c name of each + * parameter must remain valid for the lifetime of @c builder->prog. * @param[in] return_type Type returned by the function type. - * @param[in] parameters Parameters of the function type. - * @param[in] num_parameters The number of parameters accepted by the function - * type. * @param[in] is_variadic Whether the function type is variadic. - * @param[in] lang Language of this type. + * @param[in] lang Language of the type or @c NULL for the default language of + * @c builder->prog. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. */ -void drgn_function_type_init(struct drgn_type *type, - struct drgn_qualified_type return_type, - struct drgn_type_parameter *parameters, - size_t num_parameters, bool is_variadic, - const struct drgn_language *lang); +struct drgn_error * +drgn_function_type_create(struct drgn_function_type_builder *builder, + struct drgn_qualified_type return_type, + bool is_variadic, const struct drgn_language *lang, + struct drgn_type **ret); /** @} */ /** Mapping from @ref drgn_type_kind to the spelling of that kind. */ extern const char * const drgn_type_kind_spelling[]; -/** - * Names of primitive types. - * - * In some languages, like C, the same primitive type can be spelled in multiple - * ways. For example, "int" can also be spelled "signed int" or "int signed". - * - * This maps each @ref drgn_primitive_type to a ``NULL``-terminated array of the - * different ways to spell that type. The spelling at index zero is the - * preferred spelling. - */ -extern const char * const * const -drgn_primitive_type_spellings[DRGN_PRIMITIVE_TYPE_NUM]; - -/** - * Mapping from a @ref drgn_type_primitive to the corresponding @ref - * drgn_type_kind. - */ -extern const enum drgn_type_kind -drgn_primitive_type_kind[DRGN_PRIMITIVE_TYPE_NUM + 1]; - /** * Parse the name of an unqualified primitive C type. * @@ -594,6 +674,59 @@ struct drgn_error *drgn_type_bit_size(struct drgn_type *type, /** Get the appropriate @ref drgn_object_kind for a @ref drgn_type. */ enum drgn_object_kind drgn_type_object_kind(struct drgn_type *type); +/** Initialize type-related fields in a @ref drgn_program. */ +void drgn_program_init_types(struct drgn_program *prog); +/** Deinitialize type-related fields in a @ref drgn_program. */ +void drgn_program_deinit_types(struct drgn_program *prog); + +/** + * Find a parsed type in a @ref drgn_program. + * + * This should only be called by implementations of @ref + * drgn_language::find_type() + * + * @param[in] kind Kind of type to find. Must be @ref DRGN_TYPE_STRUCT, @ref + * DRGN_TYPE_UNION, @ref DRGN_TYPE_CLASS, @ref DRGN_TYPE_ENUM, or @ref + * DRGN_TYPE_TYPEDEF. + * @param[in] name Name of the type. + * @param[in] name_len Length of @p name in bytes. + * @param[in] filename See @ref drgn_program_find_type(). + * @param[out] ret Returned type. + * @return @c NULL on success, &@ref drgn_not_found if the type wasn't found, + * non-@c NULL on other error. + */ +struct drgn_error * +drgn_program_find_type_impl(struct drgn_program *prog, + enum drgn_type_kind kind, const char *name, + size_t name_len, const char *filename, + struct drgn_qualified_type *ret); + +/** Find a primitive type in a @ref drgn_program. */ +struct drgn_error * +drgn_program_find_primitive_type(struct drgn_program *prog, + enum drgn_primitive_type type, + struct drgn_type **ret); + +/** + * Find the type, offset, and bit field size of a type member. + * + * This matches the members of the type itself as well as the members of any + * unnamed members of the type. + * + * This caches all members of @p type for subsequent calls. + * + * @param[in] type Compound type to search in. + * @param[in] member_name Name of member. + * @param[in] member_name_len Length of @p member_name + * @param[out] ret Returned member information. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_program_find_member(struct drgn_program *prog, + struct drgn_type *type, + const char *member_name, + size_t member_name_len, + struct drgn_member_value **ret); + /** @} */ #endif /* DRGN_TYPE_H */ diff --git a/libdrgn/type_index.c b/libdrgn/type_index.c deleted file mode 100644 index 5b5113fef..000000000 --- a/libdrgn/type_index.c +++ /dev/null @@ -1,622 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include - -#include "internal.h" -#include "language.h" -#include "type_index.h" - -/* These functions compare the underlying type by reference, not by value. */ - -static struct hash_pair -drgn_pointer_type_key_hash(const struct drgn_pointer_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_pointer_type_key_eq(const struct drgn_pointer_type_key *a, - const struct drgn_pointer_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_pointer_type_table, drgn_pointer_type_key_hash, - drgn_pointer_type_key_eq) - -static struct hash_pair -drgn_array_type_key_hash(const struct drgn_array_type_key *key) -{ - size_t hash; - - hash = hash_combine((uintptr_t)key->type, key->qualifiers); - hash = hash_combine(hash, key->is_complete); - hash = hash_combine(hash, key->length); - hash = hash_combine(hash, (uintptr_t)key->lang); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_array_type_key_eq(const struct drgn_array_type_key *a, - const struct drgn_array_type_key *b) -{ - return (a->type == b->type && a->qualifiers == b->qualifiers && - a->is_complete == b->is_complete && a->length == b->length && - a->lang == b->lang); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_array_type_table, drgn_array_type_key_hash, - drgn_array_type_key_eq) - -static struct hash_pair drgn_member_hash_pair(const struct drgn_member_key *key) -{ - size_t hash; - - if (key->name) - hash = cityhash_size_t(key->name, key->name_len); - else - hash = 0; - hash = hash_combine((uintptr_t)key->type, hash); - return hash_pair_from_avalanching_hash(hash); -} - -static bool drgn_member_eq(const struct drgn_member_key *a, - const struct drgn_member_key *b) -{ - return (a->type == b->type && a->name_len == b->name_len && - (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_hash_pair, - drgn_member_eq) - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, hash_pair_ptr_type, - hash_table_scalar_eq) - -void drgn_type_index_init(struct drgn_type_index *tindex) -{ - tindex->finders = NULL; - memset(tindex->primitive_types, 0, sizeof(tindex->primitive_types)); - drgn_pointer_type_table_init(&tindex->pointer_types); - drgn_array_type_table_init(&tindex->array_types); - drgn_member_map_init(&tindex->members); - drgn_type_set_init(&tindex->members_cached); - tindex->word_size = 0; -} - -static void free_pointer_types(struct drgn_type_index *tindex) -{ - struct drgn_pointer_type_table_iterator it; - - for (it = drgn_pointer_type_table_first(&tindex->pointer_types); - it.entry; it = drgn_pointer_type_table_next(it)) - free(*it.entry); - drgn_pointer_type_table_deinit(&tindex->pointer_types); -} - -static void free_array_types(struct drgn_type_index *tindex) -{ - struct drgn_array_type_table_iterator it; - - for (it = drgn_array_type_table_first(&tindex->array_types); it.entry; - it = drgn_array_type_table_next(it)) - free(*it.entry); - drgn_array_type_table_deinit(&tindex->array_types); -} - -void drgn_type_index_deinit(struct drgn_type_index *tindex) -{ - struct drgn_type_finder *finder; - - drgn_member_map_deinit(&tindex->members); - drgn_type_set_deinit(&tindex->members_cached); - free_array_types(tindex); - free_pointer_types(tindex); - - finder = tindex->finders; - while (finder) { - struct drgn_type_finder *next = finder->next; - - free(finder); - finder = next; - } -} - -struct drgn_error *drgn_type_index_add_finder(struct drgn_type_index *tindex, - drgn_type_find_fn fn, void *arg) -{ - struct drgn_type_finder *finder; - - finder = malloc(sizeof(*finder)); - if (!finder) - return &drgn_enomem; - finder->fn = fn; - finder->arg = arg; - finder->next = tindex->finders; - tindex->finders = finder; - return NULL; -} - -void drgn_type_index_remove_finder(struct drgn_type_index *tindex) -{ - struct drgn_type_finder *finder; - - finder = tindex->finders->next; - free(tindex->finders); - tindex->finders = finder; -} - -/* Default long and unsigned long are 64 bits. */ -static struct drgn_type default_primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; -/* 32-bit versions of long and unsigned long. */ -static struct drgn_type default_long_32bit; -static struct drgn_type default_unsigned_long_32bit; - -__attribute__((constructor(200))) -static void default_primitive_types_init(void) -{ - size_t i; - - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_SIGNED_CHAR][0], - 1, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_CHAR], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_CHAR][0], - 1, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_SHORT][0], - 2, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_SHORT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_SHORT][0], - 2, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_INT][0], 4, - true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_INT], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_INT][0], - 4, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 8, false, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_LONG][0], - 8, true, &drgn_language_c); - drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG_LONG], - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG_LONG][0], - 8, false, &drgn_language_c); - drgn_bool_type_init(&default_primitive_types[DRGN_C_TYPE_BOOL], - drgn_primitive_type_spellings[DRGN_C_TYPE_BOOL][0], - 1, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_FLOAT], - drgn_primitive_type_spellings[DRGN_C_TYPE_FLOAT][0], - 4, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_DOUBLE][0], - 8, &drgn_language_c); - drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_DOUBLE], - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_DOUBLE][0], - 16, &drgn_language_c); - for (i = 0; i < ARRAY_SIZE(default_primitive_types); i++) { - if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID || - i == DRGN_C_TYPE_SIZE_T || i == DRGN_C_TYPE_PTRDIFF_T) - continue; - assert(drgn_type_primitive(&default_primitive_types[i]) == i); - } - - drgn_int_type_init(&default_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], - 4, true, &drgn_language_c); - assert(drgn_type_primitive(&default_long_32bit) == - DRGN_C_TYPE_LONG); - - drgn_int_type_init(&default_unsigned_long_32bit, - drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], - 4, false, &drgn_language_c); - assert(drgn_type_primitive(&default_unsigned_long_32bit) == - DRGN_C_TYPE_UNSIGNED_LONG); -} - -/* - * Like @ref drgn_type_index_find_parsed(), but returns - * &drgn_error_not_found instead of a more informative error message. - */ -static struct drgn_error * -drgn_type_index_find_parsed_internal(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - struct drgn_type_finder *finder; - - finder = tindex->finders; - while (finder) { - err = finder->fn(kind, name, name_len, filename, finder->arg, - ret); - if (!err) { - if (drgn_type_kind(ret->type) != kind) { - return drgn_error_create(DRGN_ERROR_TYPE, - "type find callback returned wrong kind of type"); - } - return NULL; - } - if (err != &drgn_not_found) - return err; - finder = finder->next; - } - return &drgn_not_found; -} - -struct drgn_error * -drgn_type_index_find_primitive(struct drgn_type_index *tindex, - enum drgn_primitive_type type, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - enum drgn_type_kind kind; - const char * const *spellings; - size_t i; - - if (tindex->primitive_types[type]) { - *ret = tindex->primitive_types[type]; - return NULL; - } - - kind = drgn_primitive_type_kind[type]; - if (kind == DRGN_TYPE_VOID) { - *ret = drgn_void_type(&drgn_language_c); - goto out; - } - - spellings = drgn_primitive_type_spellings[type]; - for (i = 0; spellings[i]; i++) { - err = drgn_type_index_find_parsed_internal(tindex, kind, - spellings[i], - strlen(spellings[i]), - NULL, - &qualified_type); - if (!err && drgn_type_primitive(qualified_type.type) == type) { - *ret = qualified_type.type; - goto out; - } else if (err && err != &drgn_not_found) { - return err; - } - } - - /* long and unsigned long default to the word size. */ - if (type == DRGN_C_TYPE_LONG || type == DRGN_C_TYPE_UNSIGNED_LONG) { - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - if (tindex->word_size == 4) { - *ret = (type == DRGN_C_TYPE_LONG ? - &default_long_32bit : - &default_unsigned_long_32bit); - goto out; - } - } - /* - * size_t and ptrdiff_t default to typedefs of whatever integer type - * matches the word size. - */ - if (type == DRGN_C_TYPE_SIZE_T || type == DRGN_C_TYPE_PTRDIFF_T) { - static enum drgn_primitive_type integer_types[2][3] = { - { - DRGN_C_TYPE_UNSIGNED_LONG, - DRGN_C_TYPE_UNSIGNED_LONG_LONG, - DRGN_C_TYPE_UNSIGNED_INT, - }, - { - DRGN_C_TYPE_LONG, - DRGN_C_TYPE_LONG_LONG, - DRGN_C_TYPE_INT, - }, - }; - - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - for (i = 0; i < 3; i++) { - enum drgn_primitive_type integer_type; - - integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; - err = drgn_type_index_find_primitive(tindex, - integer_type, - &qualified_type.type); - if (err) - return err; - if (drgn_type_size(qualified_type.type) == - tindex->word_size) { - qualified_type.qualifiers = 0; - *ret = (type == DRGN_C_TYPE_SIZE_T ? - &tindex->default_size_t : - &tindex->default_ptrdiff_t); - drgn_typedef_type_init(*ret, spellings[0], - qualified_type, &drgn_language_c); - goto out; - } - } - return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, - "no suitable integer type for %s", - spellings[0]); - } - - *ret = &default_primitive_types[type]; - -out: - tindex->primitive_types[type] = *ret; - return NULL; -} - -struct drgn_error * -drgn_type_index_find_parsed(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - int precision; - - err = drgn_type_index_find_parsed_internal(tindex, kind, name, name_len, - filename, ret); - if (err != &drgn_not_found) - return err; - - precision = name_len < INT_MAX ? (int)name_len : INT_MAX; - if (filename) { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find '%s %.*s' in '%s'", - drgn_type_kind_spelling[kind], precision, name, - filename); - } else { - return drgn_error_format(DRGN_ERROR_LOOKUP, - "could not find '%s %.*s'", - drgn_type_kind_spelling[kind], precision, name); - } -} - -struct drgn_error * -drgn_type_index_pointer_type(struct drgn_type_index *tindex, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_pointer_type_key key = { - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = lang ? lang : drgn_type_language(referenced_type.type), - }; - struct drgn_pointer_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - - hp = drgn_pointer_type_table_hash(&key); - it = drgn_pointer_type_table_search_hashed(&tindex->pointer_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_pointer_type_init(type, tindex->word_size, referenced_type, - key.lang); - if (drgn_pointer_type_table_insert_searched(&tindex->pointer_types, - &type, hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_type_index_array_type(struct drgn_type_index *tindex, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = true, - .length = length, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&tindex->array_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init(type, length, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&tindex->array_types, &type, - hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -struct drgn_error * -drgn_type_index_incomplete_array_type(struct drgn_type_index *tindex, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - const struct drgn_array_type_key key = { - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = false, - .lang = lang ? lang : drgn_type_language(element_type.type), - }; - struct drgn_array_type_table_iterator it; - struct drgn_type *type; - struct hash_pair hp; - - hp = drgn_array_type_table_hash(&key); - it = drgn_array_type_table_search_hashed(&tindex->array_types, &key, - hp); - if (it.entry) { - type = *it.entry; - goto out; - } - - type = malloc(sizeof(*type)); - if (!type) - return &drgn_enomem; - drgn_array_type_init_incomplete(type, element_type, key.lang); - if (drgn_array_type_table_insert_searched(&tindex->array_types, &type, - hp, NULL) == -1) { - free(type); - return &drgn_enomem; - } -out: - *ret = type; - return NULL; -} - -static struct drgn_error * -drgn_type_index_cache_members(struct drgn_type_index *tindex, - struct drgn_type *outer_type, - struct drgn_type *type, uint64_t bit_offset) -{ - struct drgn_error *err; - struct drgn_type_member *members; - size_t num_members, i; - - if (!drgn_type_has_members(type)) - return NULL; - - members = drgn_type_members(type); - num_members = drgn_type_num_members(type); - for (i = 0; i < num_members; i++) { - struct drgn_type_member *member; - - member = &members[i]; - if (member->name) { - struct drgn_member_map_entry entry = { - .key = { - .type = outer_type, - .name = member->name, - .name_len = strlen(member->name), - }, - .value = { - .type = &member->type, - .bit_offset = - bit_offset + member->bit_offset, - .bit_field_size = - member->bit_field_size, - }, - }; - - if (drgn_member_map_insert(&tindex->members, &entry, - NULL) == -1) - return &drgn_enomem; - } else { - struct drgn_qualified_type member_type; - - err = drgn_member_type(member, &member_type); - if (err) - return err; - err = drgn_type_index_cache_members(tindex, outer_type, - member_type.type, - bit_offset + - member->bit_offset); - if (err) - return err; - } - } - return NULL; -} - -struct drgn_error *drgn_type_index_find_member(struct drgn_type_index *tindex, - struct drgn_type *type, - const char *member_name, - size_t member_name_len, - struct drgn_member_value **ret) -{ - struct drgn_error *err; - const struct drgn_member_key key = { - .type = drgn_underlying_type(type), - .name = member_name, - .name_len = member_name_len, - }; - struct hash_pair hp, cached_hp; - struct drgn_member_map_iterator it; - - hp = drgn_member_map_hash(&key); - it = drgn_member_map_search_hashed(&tindex->members, &key, hp); - if (it.entry) { - *ret = &it.entry->value; - return NULL; - } - - /* - * Cache miss. One of the following is true: - * - * 1. The type isn't a structure, union, or class, which is a type - * error. - * 2. The type hasn't been cached, which means we need to cache it and - * check again. - * 3. The type has already been cached, which means the member doesn't - * exist. - */ - if (!drgn_type_has_members(key.type)) { - return drgn_type_error("'%s' is not a structure, union, or class", - type); - } - cached_hp = drgn_type_set_hash(&key.type); - if (drgn_type_set_search_hashed(&tindex->members_cached, &key.type, - cached_hp).entry) - return drgn_error_member_not_found(type, member_name); - - err = drgn_type_index_cache_members(tindex, key.type, key.type, 0); - if (err) - return err; - - if (drgn_type_set_insert_searched(&tindex->members_cached, &key.type, - cached_hp, NULL) == -1) - return &drgn_enomem; - - it = drgn_member_map_search_hashed(&tindex->members, &key, hp); - if (it.entry) { - *ret = &it.entry->value; - return NULL; - } - - return drgn_error_member_not_found(type, member_name); -} diff --git a/libdrgn/type_index.h b/libdrgn/type_index.h deleted file mode 100644 index 46425ac2f..000000000 --- a/libdrgn/type_index.h +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0+ - -/** - * @file - * - * Type lookup and caching. - * - * See @ref TypeIndex. - */ - -#ifndef DRGN_TYPE_INDEX_H -#define DRGN_TYPE_INDEX_H - -#include - -#include "drgn.h" -#include "hash_table.h" -#include "language.h" -#include "type.h" - -/** - * @ingroup Internals - * - * @defgroup TypeIndex Type index - * - * Type lookup and caching. - * - * @ref drgn_type_index provides a common interface for finding types in a - * program. - * - * @{ - */ - -struct drgn_pointer_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - const struct drgn_language *lang; -}; - -static struct drgn_pointer_type_key -drgn_pointer_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type referenced_type = drgn_type_type(*entry); - - return (struct drgn_pointer_type_key){ - .type = referenced_type.type, - .qualifiers = referenced_type.qualifiers, - .lang = drgn_type_language(*entry), - }; -} - -struct drgn_array_type_key { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - bool is_complete; - uint64_t length; - const struct drgn_language *lang; -}; - -static struct drgn_array_type_key -drgn_array_type_entry_to_key(struct drgn_type * const *entry) -{ - struct drgn_qualified_type element_type = drgn_type_type(*entry); - - return (struct drgn_array_type_key){ - .type = element_type.type, - .qualifiers = element_type.qualifiers, - .is_complete = drgn_type_is_complete(*entry), - .length = drgn_type_length(*entry), - .lang = drgn_type_language(*entry), - }; -} - -DEFINE_HASH_TABLE_TYPE(drgn_pointer_type_table, struct drgn_type *, - drgn_pointer_type_entry_to_key) -DEFINE_HASH_TABLE_TYPE(drgn_array_type_table, struct drgn_type *, - drgn_array_type_entry_to_key) - -/** (type, member name) pair. */ -struct drgn_member_key { - struct drgn_type *type; - const char *name; - size_t name_len; -}; - -/** Type, offset, and bit field size of a type member. */ -struct drgn_member_value { - struct drgn_lazy_type *type; - uint64_t bit_offset, bit_field_size; -}; - -#ifdef DOXYGEN -/** - * @struct drgn_member_map - * - * Map of compound type members. - * - * The key is a @ref drgn_member_key, and the value is a @ref drgn_member_value. - * - * @struct drgn_type_set - * - * Set of types compared by address. - */ -#else -DEFINE_HASH_MAP_TYPE(drgn_member_map, struct drgn_member_key, - struct drgn_member_value) -DEFINE_HASH_SET_TYPE(drgn_type_set, struct drgn_type *) -#endif - -/** Registered callback in a @ref drgn_type_index. */ -struct drgn_type_finder { - /** The callback. */ - drgn_type_find_fn fn; - /** Argument to pass to @ref drgn_type_finder::fn. */ - void *arg; - /** Next callback to try. */ - struct drgn_type_finder *next; -}; - -/** - * Type index. - * - * A type index is used to find types by name and cache the results. The types - * are found using callbacks which are registered with @ref - * drgn_type_index_add_finder(). - * - * @ref drgn_type_index_find() searches for a type. @ref - * drgn_type_index_pointer_type(), @ref drgn_type_index_array_type(), and @ref - * drgn_type_index_incomplete_array_type() create derived types. Any type - * returned by these is valid until the type index is destroyed with @ref - * drgn_type_index_destroy(). - */ -struct drgn_type_index { - /** Callbacks for finding types. */ - struct drgn_type_finder *finders; - /** Cache of primitive types. */ - struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; - struct drgn_type default_size_t; - struct drgn_type default_ptrdiff_t; - /** Cache of created pointer types. */ - struct drgn_pointer_type_table pointer_types; - /** Cache of created array types. */ - struct drgn_array_type_table array_types; - /** Cache for @ref drgn_type_index_find_member(). */ - struct drgn_member_map members; - /** - * Set of types which have been already cached in @ref - * drgn_type_index::members. - */ - struct drgn_type_set members_cached; - /** - * Size of a pointer in bytes. - * - * This is zero if it has not been set yet. - */ - uint8_t word_size; -}; - -/** - * Initialize a @ref drgn_type_index. - * - * @param[in] tindex Type index to initialize. - */ -void drgn_type_index_init(struct drgn_type_index *tindex); - -/** Deinitialize a @ref drgn_type_index. */ -void drgn_type_index_deinit(struct drgn_type_index *tindex); - -/** @sa drgn_program_add_type_finder() */ -struct drgn_error *drgn_type_index_add_finder(struct drgn_type_index *tindex, - drgn_type_find_fn fn, void *arg); - -/** - * Remove the most recently added type finding callback. - * - * This must only be called if the type index hasn't been used since the finder - * was added. - */ -void drgn_type_index_remove_finder(struct drgn_type_index *tindex); - -/** Find a primitive type in a @ref drgn_type_index. */ -struct drgn_error * -drgn_type_index_find_primitive(struct drgn_type_index *tindex, - enum drgn_primitive_type type, - struct drgn_type **ret); - -/** - * Find a parsed type in a @ref drgn_type_index. - * - * This should only be called by implementations of @ref - * drgn_language::find_type(). - * - * @param[in] kind Kind of type to find. Must be @ref DRGN_TYPE_STRUCT, @ref - * DRGN_TYPE_UNION, @ref DRGN_TYPE_CLASS, @ref DRGN_TYPE_ENUM, or @ref - * DRGN_TYPE_TYPEDEF. - * @param[in] name Name of the type. - * @param[in] name_len Length of @p name in bytes. - * @param[in] filename See @ref drgn_type_index_find(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_find_parsed(struct drgn_type_index *tindex, - enum drgn_type_kind kind, const char *name, - size_t name_len, const char *filename, - struct drgn_qualified_type *ret); - -/** - * Find a type in a @ref drgn_type_index. - * - * The returned type is valid for the lifetime of the @ref drgn_type_index. - * - * @param[in] tindex Type index. - * @param[in] name Name of the type. - * @param[in] filename Exact filename containing the type definition, or @c NULL - * for any definition. - * @param[in] lang Language to use to parse @p name. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static inline struct drgn_error * -drgn_type_index_find(struct drgn_type_index *tindex, const char *name, - const char *filename, const struct drgn_language *lang, - struct drgn_qualified_type *ret) -{ - return lang->find_type(tindex, name, filename, ret); -} - -/** - * Create a pointer type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p referenced_type and @p lang are passed, the same type will be - * returned. - * - * If this succeeds, @p referenced_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] referenced_type Type referenced by the pointer type. - * @param[in] lang Language of the pointer type. If @c NULL, the language of @p - * referenced_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_pointer_type(struct drgn_type_index *tindex, - struct drgn_qualified_type referenced_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an array type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p length, @p element_type, and @p lang are passed, the same type - * will be returned. - * - * If this succeeds, @p element_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] length Number of elements in the array type. - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_array_type(struct drgn_type_index *tindex, uint64_t length, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Create an incomplete array type. - * - * The created type is cached for the lifetime of the @ref drgn_type_index. If - * the same @p element_type and @p lang are passed, the same type will be - * returned. - * - * If this succeeds, @p element_type must remain valid until @p tindex is - * destroyed. - * - * @param[in] tindex Type index. - * @param[in] element_type Type of an element in the array type. - * @param[in] lang Language of the array type. If @c NULL, the language of @p - * element_type is used. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_type_index_incomplete_array_type(struct drgn_type_index *tindex, - struct drgn_qualified_type element_type, - const struct drgn_language *lang, - struct drgn_type **ret); - -/** - * Find the type, offset, and bit field size of a type member. - * - * This matches the members of the type itself as well as the members of any - * unnamed members of the type. - * - * This caches all members of @p type for subsequent calls. - * - * @param[in] tindex Type index. - * @param[in] type Compound type to search in. - * @param[in] member_name Name of member. - * @param[in] member_name_len Length of @p member_name - * @param[out] ret Returned member information. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_type_index_find_member(struct drgn_type_index *tindex, - struct drgn_type *type, - const char *member_name, - size_t member_name_len, - struct drgn_member_value **ret); - -/** @} */ - -#endif /* DRGN_TYPE_INDEX_H */ diff --git a/libdrgn/util.h b/libdrgn/util.h index 085999bdd..902b7b5f6 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -19,6 +19,10 @@ #include #include +#ifndef LIBDRGN_PUBLIC +#define LIBDRGN_PUBLIC __attribute__((visibility("default"))) +#endif + #ifdef NDEBUG #define UNREACHABLE() __builtin_unreachable() #else diff --git a/libdrgn/vector.h b/libdrgn/vector.h index f8771715c..d2ae7344a 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -13,8 +13,8 @@ #define DRGN_VECTOR_H #include -#include -#include +#include // IWYU pragma: keep +#include // IWYU pragma: keep /** * @ingroup Internals diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..5e9b80cdc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[tool.isort] +profile = "black" +known_first_party = ["drgn", "_drgn", "drgndoc", "vmtest"] +combine_as_imports = true +force_sort_within_sections = true diff --git a/scripts/cscope.sh b/scripts/cscope.sh index 52f0b252b..ac81aff76 100755 --- a/scripts/cscope.sh +++ b/scripts/cscope.sh @@ -1,3 +1,15 @@ -#!/bin/sh +#!/bin/bash -find libdrgn -name '*.[ch]' -o -name '*.[ch].in' | cscope -bq -i- +: ${PYTHON:=python3} +cscope_args=(-bq -i-) + +python_include="$("$PYTHON" -c 'import sysconfig; print(sysconfig.get_path("include"))' 2>/dev/null)" +if [[ -n $python_include ]] ; then + cscope_args+=("-I$python_include") +fi +python_platinclude="$("$PYTHON" -c 'import sysconfig; print(sysconfig.get_path("platinclude"))' 2>/dev/null)" +if [[ -n $python_platinclude && $python_platinclude != $python_include ]] ; then + cscope_args+=("-I$python_platinclude") +fi + +find libdrgn -name '*.[ch]' -o -name '*.[ch].in' | cscope "${cscope_args[@]}" diff --git a/scripts/generate_dwarf_constants.py b/scripts/generate_dwarf_constants.py index ae62a6300..f6a510b25 100755 --- a/scripts/generate_dwarf_constants.py +++ b/scripts/generate_dwarf_constants.py @@ -5,7 +5,6 @@ import keyword import re - prefixes = [ "DW_AT", "DW_ATE", diff --git a/scripts/generate_primitive_type_spellings.py b/scripts/generate_primitive_type_spellings.py index 03b943dad..7708efea5 100755 --- a/scripts/generate_primitive_type_spellings.py +++ b/scripts/generate_primitive_type_spellings.py @@ -5,7 +5,6 @@ import itertools import sys - SPELLINGS = [ ("DRGN_C_TYPE_VOID", ["void"]), ("DRGN_C_TYPE_CHAR", ["char"]), diff --git a/scripts/iwyu.py b/scripts/iwyu.py new file mode 100755 index 000000000..262204eca --- /dev/null +++ b/scripts/iwyu.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0+ + +import argparse +import json +import os +import os.path +import re +import subprocess +import sys +import sysconfig +import tempfile + +import yaml + +BUILD_BASE = "build/compile_commands" +CDB = BUILD_BASE + "/compile_commands.json" + +IWYU_REGEXES = [ + ("add", r"(.*) should add these lines:"), + ("remove", r"(.*) should remove these lines:"), + ("include_list", r"The full include-list for (.*):"), + ("none", r"---"), + ("none", r"\(.* has correct #includes/fwd-decls\)"), +] + + +# Python.h is the canonical header for the Python C API. The actual definitions +# come from internal header files, so we need an IWYU mapping file. Ideally we +# could do this with include mappings. Unfortunately, Python.h uses ""-style +# includes for those headers, one of which is "object.h". This conflicts with +# libdrgn's "object.h", and IWYU doesn't seem to have a way to distinguish +# between those in the mapping file. So, we generate symbol mappings with the +# find-all-symbols Clang tool. +def gen_python_mapping_file(mapping_path): + # These headers are guaranteed to be included by Python.h. See + # https://docs.python.org/3/c-api/intro.html#include-files. + IMPLIED_HEADERS = ( + "", + "", + "", + "", + "", + "", + ) + + include = sysconfig.get_path("include") + platinclude = sysconfig.get_path("platinclude") + + with open( + mapping_path + ".tmp", "w" + ) as imp, tempfile.TemporaryDirectory() as tmpdir: + imp.write("[\n") + for header in IMPLIED_HEADERS: + imp.write( + f' {{"include": ["{header}", "public", "", "public"]}},\n' + ) + + build_dir = os.path.join(tmpdir, "build") + os.mkdir(build_dir) + source = os.path.join(build_dir, "python.c") + with open(source, "w") as f: + f.write("#include ") + + commands = [ + { + "arguments": [ + "clang", + "-I", + include, + "-I", + platinclude, + "-c", + "python.c", + ], + "directory": build_dir, + "file": "python.c", + } + ] + with open(os.path.join(build_dir, "compile_commands.json"), "w") as f: + json.dump(commands, f) + + symbols_dir = os.path.join(tmpdir, "find_all_symbols") + os.mkdir(symbols_dir) + subprocess.check_call( + [ + "find-all-symbols", + "-p=" + build_dir, + "--output-dir=" + symbols_dir, + source, + ] + ) + + find_all_symbols_db = os.path.join(tmpdir, "find_all_symbols_db.yaml") + subprocess.check_call( + [ + "find-all-symbols", + "-p=" + build_dir, + "--merge-dir=" + symbols_dir, + find_all_symbols_db, + ] + ) + + with open(find_all_symbols_db, "r") as f: + for document in yaml.safe_load_all(f): + name = document["Name"] + path = document["FilePath"] + if path.startswith(include + "/"): + header = path[len(include) + 1 :] + elif path.startswith(platinclude + "/"): + header = path[len(platinclude) + 1 :] + else: + continue + if header == "pyconfig.h": + # Probably best not to use these. + continue + imp.write( + f' {{"symbol": ["{name}", "private", "", "public"]}}, # From {header}\n' + ) + # "cpython/object.h" defines struct _typeobject { ... } PyTypeObject. + # For some reason, include-what-you-mean wants struct _typeobject, but + # find-all-symbols only reports PyTypeObject. Add it manually. + imp.write( + f' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' + ) + + imp.write("]\n") + + os.rename(mapping_path + ".tmp", mapping_path) + + +def main(): + parser = argparse.ArgumentParser(description="run include-what-you-use on drgn") + parser.add_argument( + "source", nargs="*", help="run on given file instead of all source files" + ) + args = parser.parse_args() + + if args.source: + sources = {os.path.realpath(source) for source in args.source} + + subprocess.check_call( + [ + "bear", + "--cdb", + CDB, + "-a", + sys.executable, + "setup.py", + "build", + "-b", + BUILD_BASE, + "build_ext", + ] + ) + + python_mapping_file = os.path.join( + BUILD_BASE, + f"python.{sysconfig.get_platform()}.{sysconfig.get_python_version()}.imp", + ) + if not os.path.exists(python_mapping_file): + gen_python_mapping_file(python_mapping_file) + + with open(CDB, "r") as f: + commands = json.load(f) + + for command in commands: + if "elfutils" in os.path.relpath(command["directory"]): + continue + + if ( + args.source + and os.path.realpath(os.path.join(command["directory"], command["file"])) + not in sources + ): + continue + + with subprocess.Popen( + ["include-what-you-use"] + + command["arguments"][1:] + + [ + "-Xiwyu", + "--mapping_file=" + os.path.abspath(python_mapping_file), + "-w", # We don't want warnings from Clang. + ], + cwd=command["directory"], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as proc: + state = "none" + header = None + lines = [] + for line in proc.stdout: + line = line.rstrip("\n") + match = None + for new_state, regex in IWYU_REGEXES: + match = re.fullmatch(regex, line) + if match: + break + if match: + state = new_state + if state != "none": + path = os.path.relpath( + os.path.join(command["directory"], match.group(1)) + ) + if state in ("add", "remove"): + header = f"{path} should {state} these lines:" + else: + header = None + lines.clear() + elif state != "include_list" and line: + if header is not None: + print("\n" + header) + header = None + print(line) + print( + "Please ignore suggestions to declare opaque types if the appropriate header has already been included." + ) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index f3c9ee6cf..c7db3efc9 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-3.0+ # setuptools must be imported before distutils (see pypa/setuptools#2230). -from setuptools import setup, find_packages, Command +import setuptools # isort: skip import contextlib from distutils import log @@ -14,13 +14,15 @@ import os import os.path import re +import shlex +import subprocess +import sys + import pkg_resources +from setuptools import Command, find_packages, setup from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.egg_info import egg_info as _egg_info from setuptools.extension import Extension -import shlex -import subprocess -import sys from util import nproc, out_of_date @@ -126,7 +128,7 @@ def run(self): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.8", "5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = ["5.9", "5.8", "5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( @@ -302,7 +304,7 @@ def get_version(): "egg_info": egg_info, "test": test, }, - entry_points={"console_scripts": ["drgn=drgn.internal.cli:main"],}, + entry_points={"console_scripts": ["drgn=drgn.internal.cli:main"]}, python_requires=">=3.6", author="Omar Sandoval", author_email="osandov@osandov.com", diff --git a/tests/__init__.py b/tests/__init__.py index 96d7366ca..b9b1aaee1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,55 +17,11 @@ TypeEnumerator, TypeKind, TypeMember, - class_type, - enum_type, - float_type, - int_type, - struct_type, - typedef_type, - union_type, ) - DEFAULT_LANGUAGE = Language.C -coord_type = class_type( - "coord", - 12, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), - ), -) -point_type = struct_type( - "point", - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - ), -) -line_segment_type = struct_type( - "line_segment", 16, (TypeMember(point_type, "a"), TypeMember(point_type, "b", 64)) -) -option_type = union_type( - "option", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(float_type("float", 4), "f"), - ), -) -color_type = enum_type( - "color", - int_type("unsigned int", 4, False), - (TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), TypeEnumerator("BLUE", 2)), -) -pid_type = typedef_type("pid_t", int_type("int", 4, True)) - - MOCK_32BIT_PLATFORM = Platform(Architecture.UNKNOWN, PlatformFlags.IS_LITTLE_ENDIAN) MOCK_PLATFORM = Platform( Architecture.UNKNOWN, PlatformFlags.IS_64_BIT | PlatformFlags.IS_LITTLE_ENDIAN @@ -146,10 +102,9 @@ def mock_object_find(prog, name, flags, filename): return prog -class ObjectTestCase(unittest.TestCase): +class TestCase(unittest.TestCase): def setUp(self): super().setUp() - self.prog = mock_program() # For testing, we want to compare the raw objects rather than using the # language's equality operator. def object_equality_func(a, b, msg=None): @@ -217,3 +172,63 @@ def long(self, value): def double(self, value): return Object(self.prog, "double", value=value) + + +class MockProgramTestCase(TestCase): + def setUp(self): + super().setUp() + self.types = [] + self.objects = [] + self.prog = mock_program(types=self.types, objects=self.objects) + self.coord_type = self.prog.class_type( + "coord", + 12, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), + ), + ) + self.point_type = self.prog.struct_type( + "point", + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ) + self.line_segment_type = self.prog.struct_type( + "line_segment", + 16, + (TypeMember(self.point_type, "a"), TypeMember(self.point_type, "b", 64)), + ) + self.option_type = self.prog.union_type( + "option", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember(self.prog.float_type("float", 4), "f"), + ), + ) + self.color_type = self.prog.enum_type( + "color", + self.prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ) + self.pid_type = self.prog.typedef_type( + "pid_t", self.prog.int_type("int", 4, True) + ) + + def add_memory_segment(self, buf, virt_addr=None, phys_addr=None): + if virt_addr is not None: + self.prog.add_memory_segment( + virt_addr, len(buf), functools.partial(mock_memory_read, buf) + ) + if phys_addr is not None: + self.prog.add_memory_segment( + phys_addr, len(buf), functools.partial(mock_memory_read, buf), True + ) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 268a7ba44..b9dfcc3e6 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -4,10 +4,9 @@ from collections import namedtuple import os.path +from tests.dwarf import DW_AT, DW_FORM, DW_TAG from tests.elf import ET, PT, SHT from tests.elfwriter import ElfSection, create_elf_file -from tests.dwarf import DW_AT, DW_FORM, DW_TAG - DwarfAttrib = namedtuple("DwarfAttrib", ["name", "form", "value"]) DwarfDie = namedtuple("DwarfAttrib", ["tag", "attribs", "children"]) @@ -211,7 +210,7 @@ def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): return create_elf_file( ET.EXEC, [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b"",), + ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), ElfSection( name=".debug_abbrev", sh_type=SHT.PROGBITS, @@ -227,7 +226,7 @@ def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): sh_type=SHT.PROGBITS, data=_compile_debug_line(cu_die, little_endian), ), - ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0",), + ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), ], little_endian=little_endian, bits=bits, diff --git a/tests/elfwriter.py b/tests/elfwriter.py index b96246eb7..52a0caaa9 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -52,7 +52,7 @@ def create_elf_file( phdr_struct = struct.Struct(endian + "8I") e_machine = 3 if little_endian else 8 # EM_386 or EM_MIPS - shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1),) + shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1)) tmp = [shstrtab] tmp.extend(sections) sections = tmp diff --git a/tests/helpers/linux/test_block.py b/tests/helpers/linux/test_block.py index b77b77b5e..c0b374eaa 100644 --- a/tests/helpers/linux/test_block.py +++ b/tests/helpers/linux/test_block.py @@ -19,7 +19,6 @@ from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from tests.helpers.linux import LinuxHelperTestCase - LOOP_SET_FD = 0x4C00 LOOP_SET_STATUS64 = 0x4C04 LOOP_GET_STATUS64 = 0x4C05 diff --git a/tests/helpers/linux/test_cgroup.py b/tests/helpers/linux/test_cgroup.py index 686657af1..43cd4baa9 100644 --- a/tests/helpers/linux/test_cgroup.py +++ b/tests/helpers/linux/test_cgroup.py @@ -3,7 +3,6 @@ import os - from drgn.helpers.linux.cgroup import ( cgroup_name, cgroup_path, diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index dbcccb394..c1bc6a813 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -17,7 +17,7 @@ path_lookup, ) from drgn.helpers.linux.pid import find_task -from tests.helpers.linux import LinuxHelperTestCase, MS_BIND, mount, umount +from tests.helpers.linux import MS_BIND, LinuxHelperTestCase, mount, umount class TestFs(LinuxHelperTestCase): diff --git a/tests/helpers/linux/test_kconfig.py b/tests/helpers/linux/test_kconfig.py index 83d0d80ac..8993c9aaa 100644 --- a/tests/helpers/linux/test_kconfig.py +++ b/tests/helpers/linux/test_kconfig.py @@ -4,7 +4,6 @@ import os.path from drgn.helpers.linux.kconfig import get_kconfig - from tests.helpers.linux import LinuxHelperTestCase diff --git a/tests/helpers/linux/test_pid.py b/tests/helpers/linux/test_pid.py index 7fe870ed5..ae0c90fac 100644 --- a/tests/helpers/linux/test_pid.py +++ b/tests/helpers/linux/test_pid.py @@ -3,12 +3,7 @@ import os -from drgn.helpers.linux.pid import ( - find_pid, - find_task, - for_each_pid, - for_each_task, -) +from drgn.helpers.linux.pid import find_pid, find_task, for_each_pid, for_each_task from tests.helpers.linux import LinuxHelperTestCase diff --git a/tests/helpers/linux/test_user.py b/tests/helpers/linux/test_user.py index 47a9c0555..3ba1be632 100644 --- a/tests/helpers/linux/test_user.py +++ b/tests/helpers/linux/test_user.py @@ -6,7 +6,6 @@ import signal from drgn.helpers.linux.user import find_user, for_each_user - from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, diff --git a/tests/helpers/linux/test_uts.py b/tests/helpers/linux/test_uts.py index 033262ef0..0f4db8f96 100644 --- a/tests/helpers/linux/test_uts.py +++ b/tests/helpers/linux/test_uts.py @@ -4,19 +4,16 @@ import os import drgn - from tests.helpers.linux import LinuxHelperTestCase class TestUts(LinuxHelperTestCase): def test_uts_release(self): self.assertEqual( - self.prog["UTS_RELEASE"].string_().decode(), os.uname().release, + self.prog["UTS_RELEASE"].string_().decode(), os.uname().release ) def test_uts_release_no_debug_info(self): prog = drgn.Program() prog.set_kernel() - self.assertEqual( - prog["UTS_RELEASE"].string_().decode(), os.uname().release, - ) + self.assertEqual(prog["UTS_RELEASE"].string_().decode(), os.uname().release) diff --git a/tests/libdrgn.py b/tests/libdrgn.py index 8138a945f..e5059bded 100644 --- a/tests/libdrgn.py +++ b/tests/libdrgn.py @@ -6,9 +6,8 @@ from enum import auto import os -import drgn import _drgn - +import drgn _drgn_pydll = ctypes.PyDLL(_drgn.__file__) _drgn_cdll = ctypes.CDLL(_drgn.__file__) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index ceecb5039..449818dbf 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -16,31 +16,10 @@ TypeEnumerator, TypeMember, TypeParameter, - array_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import ( - DEFAULT_LANGUAGE, - ObjectTestCase, - color_type, - coord_type, - option_type, - pid_type, - point_type, ) +from tests import DEFAULT_LANGUAGE, TestCase from tests.dwarf import DW_AT, DW_ATE, DW_FORM, DW_LANG, DW_TAG -from tests.dwarfwriter import compile_dwarf, DwarfDie, DwarfAttrib - +from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf bool_die = DwarfDie( DW_TAG.base_type, @@ -210,336 +189,632 @@ def dwarf_program(*args, **kwds): return prog -class TestTypes(unittest.TestCase): - @staticmethod - def type_from_dwarf(dies, *args, **kwds): - if isinstance(dies, DwarfDie): - dies = (dies,) - dies = tuple(dies) + ( - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "__TEST__"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], - ), - ) - prog = dwarf_program(dies, *args, **kwds) - return prog.type("__TEST__").type +def test_type_dies(dies): + if isinstance(dies, DwarfDie): + dies = (dies,) + return tuple(dies) + ( + DwarfDie( + DW_TAG.typedef, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ], + ), + ) - def assertFromDwarf(self, dies, type, *args, **kwds): - self.assertEqual(self.type_from_dwarf(dies, *args, **kwds), type) +class TestTypes(TestCase): def test_unknown_tag(self): - die = DwarfDie(0x9999, ()) + prog = dwarf_program(test_type_dies(DwarfDie(0x9999, ()))) self.assertRaisesRegex( - Exception, "unknown DWARF type tag 0x9999", self.type_from_dwarf, die + Exception, "unknown DWARF type tag 0x9999", prog.type, "TEST" ) - def test_bad_base_type(self): - die = DwarfDie( - DW_TAG.base_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), - DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), - ], + def test_base_type_missing_byte_size(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), + ), + ) + ) ) - - byte_size = die.attribs.pop(0) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) - die.attribs.insert(0, byte_size) - encoding = die.attribs.pop(1) + def test_base_type_missing_encoding(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.name, DW_FORM.string, "bad egg"), + ), + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_encoding", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) - die.attribs.insert(1, encoding) - del die.attribs[2] + def test_base_type_missing_name(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + ), + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_base_type has missing or invalid DW_AT_name", - self.type_from_dwarf, - die, + prog.type, + "TEST", ) def test_complex(self): - dies = [ - DwarfDie( - DW_TAG.base_type, + prog = dwarf_program( + test_type_dies( ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 16), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.complex_float), - DwarfAttrib(DW_AT.name, DW_FORM.string, "double _Complex"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ), - ), - double_die, - ] - self.assertFromDwarf( - dies, complex_type("double _Complex", 16, float_type("double", 8)) + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 16), + DwarfAttrib( + DW_AT.encoding, DW_FORM.data1, DW_ATE.complex_float + ), + DwarfAttrib(DW_AT.name, DW_FORM.string, "double _Complex"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + double_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.complex_type("double _Complex", 16, prog.float_type("double", 8)), ) def test_unknown_base_type_encoding(self): - die = DwarfDie( - DW_TAG.base_type, - ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.encoding, DW_FORM.data1, 99), - DwarfAttrib(DW_AT.name, DW_FORM.string, "magic int"), - ), - ) - self.assertRaisesRegex( - Exception, "unknown DWARF encoding", self.type_from_dwarf, die + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, 99), + DwarfAttrib(DW_AT.name, DW_FORM.string, "magic int"), + ), + ) + ) ) + self.assertRaisesRegex(Exception, "unknown DWARF encoding", prog.type, "TEST") - def test_qualifiers(self): - dies = [ - DwarfDie(DW_TAG.const_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)],), - int_die, - ] - self.assertFromDwarf(dies, int_type("int", 4, True, Qualifiers.CONST)) - - del dies[0].attribs[0] - self.assertFromDwarf(dies, void_type(Qualifiers.CONST)) + def test_qualifier(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + ) - dies = [ - DwarfDie(DW_TAG.const_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)],), - DwarfDie(DW_TAG.restrict_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - DwarfDie(DW_TAG.volatile_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), - DwarfDie(DW_TAG.atomic_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 4)],), - int_die, - ] - self.assertFromDwarf( - dies, - int_type( + def test_multiple_qualifiers(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + DwarfDie( + DW_TAG.restrict_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie( + DW_TAG.volatile_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ), + DwarfDie( + DW_TAG.atomic_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 4),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.int_type( "int", 4, True, - Qualifiers.CONST + qualifiers=Qualifiers.CONST | Qualifiers.RESTRICT | Qualifiers.VOLATILE | Qualifiers.ATOMIC, ), ) - del dies[3].attribs[0] - self.assertFromDwarf( - dies, - void_type( - Qualifiers.CONST + def test_qualifier_void(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.const_type, ()))) + self.assertEqual( + prog.type("TEST").type, prog.void_type(qualifiers=Qualifiers.CONST) + ) + + def test_multiple_qualifiers_void(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.const_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + DwarfDie( + DW_TAG.restrict_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie( + DW_TAG.volatile_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ), + DwarfDie(DW_TAG.atomic_type, ()), + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.void_type( + qualifiers=Qualifiers.CONST | Qualifiers.RESTRICT | Qualifiers.VOLATILE - | Qualifiers.ATOMIC + | Qualifiers.ATOMIC, ), ) def test_struct(self): - dies = [ - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) + + def test_struct_anonymous(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), ), - int_die, - ] - - self.assertFromDwarf(dies, point_type) + ) - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, struct_type(None, point_type.size, point_type.members) + def test_struct_no_members(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0),), + ) + ) ) - dies[0].attribs.insert(0, tag) + self.assertEqual(prog.type("TEST").type, prog.struct_type(None, 0, ())) - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, struct_type("point", point_type.size, ())) - size = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) + def test_struct_incomplete(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), + ) + ) ) - self.assertFromDwarf(dies, struct_type("point")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, size) - dies[0].children.extend(children) + self.assertEqual(prog.type("TEST").type, prog.struct_type("point")) - name = dies[0].children[0].attribs.pop(0) - self.assertFromDwarf( - dies, - struct_type( + def test_struct_unnamed_member(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( "point", - point_type.size, + 8, ( - TypeMember(int_type("int", 4, True), None, 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(prog.int_type("int", 4, True), None), + TypeMember(prog.int_type("int", 4, True), "y", 32), ), ), ) - dies[0].children[0].attribs.insert(0, name) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_structure_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, + def test_struct_member_missing_type(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + ), + ), + ), + ), + ) ) - dies[0].attribs[0] = tag - - size = dies[0].attribs.pop(1) self.assertRaisesRegex( - Exception, - "DW_TAG_structure_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_member is missing DW_AT_type", prog.type, "TEST" ) - dies[0].attribs.insert(1, size) - name = dies[0].children[0].attribs.pop(0) - dies[0].children[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) + def test_struct_member_invalid_type(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.string, "foo"), + ), + ), + ), + ), + ) + ) self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_name", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_member has invalid DW_AT_type", prog.type, "TEST" ) - dies[0].children[0].attribs[0] = name - location = dies[0].children[0].attribs[1] - dies[0].children[0].attribs[1] = DwarfAttrib( - DW_AT.data_member_location, DW_FORM.string, "foo" + def test_struct_member_invalid_location(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.string, + "foo", + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) self.assertRaisesRegex( Exception, "DW_TAG_member has invalid DW_AT_data_member_location", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs[1] = location - type_ = dies[0].children[0].attribs.pop(2) + def test_struct_missing_size(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.structure_type, ()))) self.assertRaisesRegex( - Exception, "DW_TAG_member is missing DW_AT_type", self.type_from_dwarf, dies + Exception, + "DW_TAG_structure_type has missing or invalid DW_AT_byte_size", + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert( - 2, DwarfAttrib(DW_AT.type, DW_FORM.string, "foo") + + def test_struct_invalid_name(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ) + ) ) self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_type", - self.type_from_dwarf, - dies, + Exception, "DW_TAG_structure_type has invalid DW_AT_name", prog.type, "TEST" ) - dies[0].children[0].attribs[2] = type_ def test_incomplete_to_complete(self): - dies = [ - DwarfDie( - DW_TAG.pointer_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), - ], - ), - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), - ], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), - ], + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) ), - int_die, - ] - self.assertFromDwarf(dies, pointer_type(8, point_type)) + ) - # Ambiguous incomplete type. - dies.append( - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "bar.c"), - ], - [ + def test_incomplete_to_complete_ambiguous(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "b"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), - ], + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "foo.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + int_die, + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.decl_file, DW_FORM.udata, "bar.c"), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "b"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + ), + ), + ) ) ) - type_ = pointer_type(8, struct_type("point")) - self.assertFromDwarf(dies, type_) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.struct_type("point")) + ) def test_filename(self): dies = list(base_type_dies) + [ @@ -605,12 +880,20 @@ def test_filename(self): ), ] - other_point_type = struct_type( + point_type = lambda prog: prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "a"), - TypeMember(int_type("int", 4, True), "b", 32), + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + other_point_type = lambda prog: prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "a"), + TypeMember(prog.int_type("int", 4, True), "b", 32), ), ) @@ -618,13 +901,14 @@ def test_filename(self): for dir in ["", "src", "usr/src", "/usr/src"]: with self.subTest(dir=dir): self.assertEqual( - prog.type("struct point", os.path.join(dir, "foo.c")), point_type + prog.type("struct point", os.path.join(dir, "foo.c")), + point_type(prog), ) for dir in ["", "bar", "src/bar", "usr/src/bar", "/usr/src/bar"]: with self.subTest(dir=dir): self.assertEqual( prog.type("struct point", os.path.join(dir, "baz.c")), - other_point_type, + other_point_type(prog), ) dies[len(base_type_dies)].attribs[-1] = DwarfAttrib( @@ -637,406 +921,717 @@ def test_filename(self): for dir in ["xy", "src/xy", "usr/src/xy", "/usr/src/xy"]: with self.subTest(dir=dir): self.assertEqual( - prog.type("struct point", os.path.join(dir, "foo.h")), point_type + prog.type("struct point", os.path.join(dir, "foo.h")), + point_type(prog), ) for dir in ["ab", "include/ab", "usr/include/ab", "/usr/include/ab"]: with self.subTest(dir=dir): self.assertEqual( prog.type("struct point", os.path.join(dir, "foo.h")), - other_point_type, + other_point_type(prog), ) for filename in [None, "foo.h"]: with self.subTest(filename=filename): self.assertIn( - prog.type("struct point", filename), (point_type, other_point_type) + prog.type("struct point", filename), + (point_type(prog), other_point_type(prog)), ) - def test_bit_field(self): - dies = [ + def test_bit_field_data_bit_offset(self): + dies = ( DwarfDie( DW_TAG.structure_type, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + ), + ( DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), DwarfAttrib(DW_AT.data_bit_offset, DW_FORM.data1, 32), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), DwarfAttrib(DW_AT.data_bit_offset, DW_FORM.data1, 44), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), - ], + ), ), int_die, - ] - - t = struct_type( - "point", - 8, - [ - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32, 12), - TypeMember(int_type("int", 4, True), "z", 44, 20), - ], ) - # With DW_AT_data_bit_offset. - self.assertFromDwarf(dies, t, little_endian=True) - self.assertFromDwarf(dies, t, little_endian=False) - - # With DW_AT_bit_offset on big-endian. - dies[0].children[1].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 32 - ) - dies[0].children[2].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 44 - ) - self.assertFromDwarf(dies, t, little_endian=False) + for little_endian in [True, False]: + prog = dwarf_program(test_type_dies(dies), little_endian=little_endian) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), + ], + ), + ) - # With DW_AT_data_member_location and DW_AT_bit_offset on big-endian. - dies[0].children[1].attribs.append( - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4) - ) - dies[0].children[1].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0) - dies[0].children[2].attribs.append( - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4) + def test_bit_field_bit_offset_big_endian(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 32), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 44), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ), + little_endian=False, ) - dies[0].children[2].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 4) - - # With DW_AT_data_member_location and DW_AT_bit_offset on little-endian. - dies[0].children[1].attribs[2] = DwarfAttrib( - DW_AT.bit_offset, DW_FORM.data1, 20 + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), + ], + ), ) - dies[0].children[2].attribs[2] = DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0) - self.assertFromDwarf(dies, t, little_endian=True) - # With DW_AT_data_member_location, DW_AT_bit_offset, and - # DW_AT_byte_size on little-endian. - dies[0].children[1].attribs.append( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4) - ) - dies[0].children[2].attribs.append( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4) + def test_bit_field_data_member_location_and_bit_offset_big_endian(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 12), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ), + little_endian=False, ) - self.assertFromDwarf(dies, t, little_endian=True) - - def test_union(self): - dies = [ - DwarfDie( - DW_TAG.union_type, + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "option"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], - [ - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), + ), + ) + + def test_bit_field_data_member_location_and_bit_offset_little_endian(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "f"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + [ + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], ), - int_die, - float_die, - ] - - self.assertFromDwarf(dies, option_type) - - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_union_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, ) - dies[0].attribs[0] = tag - size = dies[0].attribs.pop(1) - self.assertRaisesRegex( - Exception, - "DW_TAG_union_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + def test_bit_field_data_member_location_and_bit_offset_with_byte_size_little_endian( + self, + ): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 12), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 20), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib(DW_AT.bit_size, DW_FORM.data1, 20), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.bit_offset, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) - dies[0].attribs.insert(1, size) - - def test_class(self): - dies = [ - DwarfDie( - DW_TAG.class_type, + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "coord"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 12), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32, 12), + TypeMember(prog.int_type("int", 4, True), "z", 44, 20), ], - [ - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), + ), + ) + + def test_union(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.union_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "option"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "f"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), ), - ], + int_die, + float_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + "option", + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.float_type("float", 4), "f"), + ), ), - int_die, - ] - - self.assertFromDwarf(dies, coord_type) - - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, class_type(None, coord_type.size, coord_type.members) ) - dies[0].attribs.insert(0, tag) - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, class_type("coord", coord_type.size, ())) - size = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) + def test_class(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "coord"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 12), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "z"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 8 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf(dies, class_type("coord")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, size) - dies[0].children.extend(children) - - name = dies[0].children[0].attribs.pop(0) - self.assertFromDwarf( - dies, - class_type( + self.assertEqual( + prog.type("TEST").type, + prog.class_type( "coord", - coord_type.size, + 12, ( - TypeMember(int_type("int", 4, True), None, 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + TypeMember(prog.int_type("int", 4, True), "z", 64), ), ), ) - dies[0].children[0].attribs.insert(0, name) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_class_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, - ) - dies[0].attribs[0] = tag - - size = dies[0].attribs.pop(1) - self.assertRaisesRegex( - Exception, - "DW_TAG_class_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + def test_lazy_cycle(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "next"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ) + ) ) - dies[0].attribs.insert(1, size) - - name = dies[0].children[0].attribs.pop(0) - dies[0].children[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_name", - self.type_from_dwarf, - dies, + type_ = prog.struct_type( + "foo", 8, (TypeMember(lambda: prog.pointer_type(type_), "next"),) ) - dies[0].children[0].attribs[0] = name + self.assertEqual(prog.type("TEST").type, type_) - location = dies[0].children[0].attribs[1] - dies[0].children[0].attribs[1] = DwarfAttrib( - DW_AT.data_member_location, DW_FORM.string, "foo" - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_data_member_location", - self.type_from_dwarf, - dies, + def test_infinite_cycle(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ) + ) ) - dies[0].children[0].attribs[1] = location + self.assertRaisesRegex(Exception, "maximum.*depth exceeded", prog.type, "TEST") - type_ = dies[0].children[0].attribs.pop(2) - self.assertRaisesRegex( - Exception, "DW_TAG_member is missing DW_AT_type", self.type_from_dwarf, dies - ) - dies[0].children[0].attribs.insert( - 2, DwarfAttrib(DW_AT.type, DW_FORM.string, "foo") - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_member has invalid DW_AT_type", - self.type_from_dwarf, - dies, + def test_enum(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), + ), + ), + unsigned_int_die, + ) + ) ) - dies[0].children[0].attribs[2] = type_ - - def test_lazy_cycle(self): - dies = [ - DwarfDie( - DW_TAG.structure_type, + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), ), + ), + ) + + def test_enum_anonymous(self): + prog = dwarf_program( + test_type_dies( ( DwarfDie( - DW_TAG.member, + DW_TAG.enumeration_type, ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "next"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), ), - ), - ), - DwarfDie( - DW_TAG.pointer_type, + unsigned_int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + None, + prog.int_type("unsigned int", 4, False), ( - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), ), ), - ] - - type_ = struct_type( - "foo", 8, (TypeMember(lambda: pointer_type(8, type_), "next"),) ) - self.assertFromDwarf(dies, type_) - def test_infinite_cycle(self): - dies = [ - DwarfDie( - DW_TAG.pointer_type, - [ - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], - ), - ] - self.assertRaisesRegex( - Exception, "maximum.*depth exceeded", self.type_from_dwarf, dies + def test_enum_no_enumerators(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + unsigned_int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type("color", prog.int_type("unsigned int", 4, False), ()), ) - def test_enum(self): - dies = [ - DwarfDie( - DW_TAG.enumeration_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), - ], + def test_enum_incomplete(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), - ], + ) + ) + ) + self.assertEqual(prog.type("TEST").type, prog.enum_type("color")) + + def test_enum_old_gcc(self): + # GCC < 5.1 + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), - ], + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), - ], + ), + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), ), - unsigned_int_die, - double_die, - ] + ) - self.assertFromDwarf(dies, color_type) - - tag = dies[0].attribs.pop(0) - self.assertFromDwarf( - dies, enum_type(None, color_type.type, color_type.enumerators) - ) - dies[0].attribs.insert(0, tag) - - children = list(dies[0].children) - dies[0].children.clear() - self.assertFromDwarf(dies, enum_type("color", color_type.type, ())) - type_ = dies[0].attribs.pop(1) - dies[0].attribs.append( - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True) - ) - self.assertFromDwarf(dies, enum_type("color")) - del dies[0].attribs[-1] - dies[0].attribs.insert(1, type_) - dies[0].children.extend(children) - - # A la GCC before 5.1. - del dies[0].attribs[1] - self.assertFromDwarf( - dies, - enum_type("color", int_type("", 4, False), color_type.enumerators), - ) - for i, child in enumerate(dies[0].children): - child.attribs[1] = DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -i) - self.assertFromDwarf( - dies, - enum_type( + def test_enum_old_gcc_signed(self): + # GCC < 5.1 + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.sdata, -2), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.enum_type( "color", - int_type("", 4, True), + prog.int_type("", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", -1), @@ -1045,60 +1640,148 @@ def test_enum(self): ), ) - dies[0].attribs.insert(1, DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)) + def test_enum_compatible_type_not_integer(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + float_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_AT_type of DW_TAG_enumeration_type is not an integer type", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - del dies[0].attribs[1] - size = dies[0].attribs.pop(1) + def test_enum_missing_compatible_type_and_byte_size(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.enumeration_type, ()))) self.assertRaisesRegex( Exception, "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].attribs.insert(1, size) - tag = dies[0].attribs.pop(0) - dies[0].attribs.insert(0, DwarfAttrib(DW_AT.name, DW_FORM.data1, 0)) + def test_enum_invalid_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumeration_type has invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].attribs[0] = tag - name = dies[0].children[0].attribs.pop(0) + def test_enum_enumerator_missing_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + (DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0),), + ), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert(0, name) - const_value = dies[0].children[0].attribs.pop(1) + def test_enum_enumerator_missing_const_value(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "FOO"),), + ), + ), + ), + unsigned_int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator is missing DW_AT_const_value", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs.insert( - 1, DwarfAttrib(DW_AT.const_value, DW_FORM.string, "asdf") + + def test_enum_enumerator_invalid_const_value(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FOO"), + DwarfAttrib( + DW_AT.const_value, DW_FORM.string, "FOO" + ), + ), + ), + ), + ), + unsigned_int_die, + ) + ) ) self.assertRaisesRegex( Exception, "DW_TAG_enumerator has invalid DW_AT_const_value", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - dies[0].children[0].attribs[1] = const_value def test_tagged_by_name(self): prog = dwarf_program( @@ -1210,339 +1893,612 @@ def test_tagged_by_name(self): ) ) - self.assertEqual(prog.type("struct point"), point_type) + self.assertEqual( + prog.type("struct point"), + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) self.assertRaisesRegex(LookupError, "could not find", prog.type, "union point") - self.assertEqual(prog.type("union option"), option_type) + self.assertEqual( + prog.type("union option"), + prog.union_type( + "option", + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.float_type("float", 4), "f"), + ), + ), + ) self.assertRaisesRegex( LookupError, "could not find", prog.type, "struct option" ) - self.assertEqual(prog.type("enum color"), color_type) + self.assertEqual( + prog.type("enum color"), + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) self.assertRaisesRegex(LookupError, "could not find", prog.type, "struct color") def test_typedef(self): - dies = [ - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "INT"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], - ), - int_die, - ] - self.assertFromDwarf(dies, typedef_type("INT", int_type("int", 4, True))) + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "INT"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.typedef_type("INT", prog.int_type("int", 4, True)), + ) - dies[0].attribs.pop(0) + def test_typedef_missing_name(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.typedef, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) + ), + int_die, + ) + ) + ) self.assertRaisesRegex( Exception, "DW_TAG_typedef has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + prog.type, + "TEST", ) - def test_void_typedef(self): - dies = [ - DwarfDie( - DW_TAG.typedef, [DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),], - ), - ] - self.assertFromDwarf(dies, typedef_type("VOID", void_type())) + def test_typedef_void(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.typedef, (DwarfAttrib(DW_AT.name, DW_FORM.string, "VOID"),) + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.typedef_type("VOID", prog.void_type()) + ) - dies[0].attribs.pop(0) - self.assertRaisesRegex( - Exception, - "DW_TAG_typedef has missing or invalid DW_AT_name", - self.type_from_dwarf, - dies, + def test_typedef_by_name(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "pid_t"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + int_die, + ) + ) + self.assertEqual( + prog.type("pid_t"), + prog.typedef_type("pid_t", prog.int_type("int", 4, True)), + ) + + def test_pointer(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.pointer_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True)) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True), 8) + ) + + def test_pointer_explicit_size(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.pointer_type(prog.int_type("int", 4, True), 4) + ) + + def test_pointer_void(self): + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.pointer_type, ()))) + self.assertEqual(prog.type("TEST").type, prog.pointer_type(prog.void_type())) + + def test_array(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 2) ) - def test_typedef_by_name(self): + def test_array_two_dimensional(self): prog = dwarf_program( - base_type_dies - + ( - DwarfDie( - DW_TAG.typedef, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "pid_t"), - DwarfAttrib( - DW_AT.type, DW_FORM.ref4, base_type_dies.index(int_die) + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), ), ), - ), + int_die, + ) ) ) - self.assertEqual(prog.type("pid_t"), pid_type) - - def test_pointer(self): - dies = [ - DwarfDie(DW_TAG.pointer_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),],), - int_die, - ] - self.assertFromDwarf(dies, pointer_type(8, int_type("int", 4, True))) - - del dies[0].attribs[0] - self.assertFromDwarf(dies, pointer_type(8, void_type())) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 3), 2), + ) - def test_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + def test_array_three_dimensional(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 4),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 4), 3), 2 ), - int_die, - ] - self.assertFromDwarf(dies, array_type(2, int_type("int", 4, True))) + ) - dies[0].children.append( - DwarfDie( - DW_TAG.subrange_type, [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)] - ), + def test_array_missing_type(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf( - dies, array_type(2, array_type(3, int_type("int", 4, True))) + self.assertRaisesRegex( + Exception, "DW_TAG_array_type is missing DW_AT_type", prog.type, "TEST" ) - dies[0].children.append( - DwarfDie( - DW_TAG.subrange_type, [DwarfAttrib(DW_AT.count, DW_FORM.data1, 4)] - ), + def test_array_zero_length_count(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), + ), + int_die, + ) + ) ) - self.assertFromDwarf( - dies, array_type(2, array_type(3, array_type(4, int_type("int", 4, True)))) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 0) ) - del dies[0].attribs[0] - self.assertRaisesRegex( - Exception, - "DW_TAG_array_type is missing DW_AT_type", - self.type_from_dwarf, - dies, + def test_array_zero_length_upper_bound(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.sdata, -1),), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True), 0) ) - def test_zero_length_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + def test_incomplete_array_no_subrange(self): + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),) ), - ], - ), - int_die, - ] - self.assertFromDwarf(dies, array_type(0, int_type("int", 4, True))) - - dies[0].children[0].attribs[0] = DwarfAttrib( - DW_AT.upper_bound, DW_FORM.sdata, -1 + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True)) ) - self.assertFromDwarf(dies, array_type(0, int_type("int", 4, True))) - - def test_incomplete_array(self): - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [DwarfDie(DW_TAG.subrange_type, [])], - ), - int_die, - ] - self.assertFromDwarf(dies, array_type(None, int_type("int", 4, True))) - del dies[0].children[0] - self.assertFromDwarf(dies, array_type(None, int_type("int", 4, True))) + def test_incomplete_array_empty_subrange(self): + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + (DwarfDie(DW_TAG.subrange_type, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, prog.array_type(prog.int_type("int", 4, True)) + ) def test_incomplete_array_of_array(self): - # int [3][] - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ - DwarfDie(DW_TAG.subrange_type, []), + prog = dwarf_program( + test_type_dies( + # int [3][] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie(DW_TAG.subrange_type, ()), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), ), - ], - ), - int_die, - ] - self.assertFromDwarf( - dies, array_type(None, array_type(3, int_type("int", 4, True))) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 3)), ) def test_array_of_zero_length_array(self): - # int [3][0] - dies = [ - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + # int [3][0] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 0), 3), + ) + + def test_array_of_zero_length_array_old_gcc(self): + # GCC < 9.0 + prog = dwarf_program( + test_type_dies( + # int [3][0] + ( DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + DwarfDie(DW_TAG.subrange_type, ()), + ), ), - ], - ), - int_die, - ] - - type_ = array_type(3, array_type(0, int_type("int", 4, True))) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[0].children[1].attribs[0] - self.assertFromDwarf(dies, type_) + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type(prog.array_type(prog.int_type("int", 4, True), 0), 3), + ) def test_array_of_zero_length_array_typedef(self): - dies = [ - # ZARRAY [3] - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + ( + # ZARRAY [3] DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 3)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), ), - ], - ), - # typedef int ZARRAY[0]; - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)], - [ + # typedef int ZARRAY[0]; DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), ), - ], + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + 3, ), - int_die, - ] - - type_ = array_type( - 3, typedef_type("ZARRAY", array_type(0, int_type("int", 4, True))) ) - self.assertFromDwarf(dies, type_) + def test_array_of_zero_length_array_typedef_old_gcc(self): # GCC actually squashes arrays of typedef arrays into one array type, # but let's handle it like GCC < 9.0 anyways. - del dies[2].children[0] - self.assertFromDwarf(dies, type_) + prog = dwarf_program( + test_type_dies( + ( + # ZARRAY [3] + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 3),), + ), + ), + ), + # typedef int ZARRAY[0]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),), + (DwarfDie(DW_TAG.subrange_type, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.array_type( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + 3, + ), + ) def test_flexible_array_member(self): # struct { # int i; # int a[]; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + ), + ), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - ], + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),) ), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - int_die, - ] - - self.assertFromDwarf( - dies, - struct_type( + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( None, 4, ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(None, int_type("int", 4, True)), "a", 32), + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True)), "a", 32), ), ), ) def test_typedef_flexible_array_member(self): - dies = [ - # struct { - # int i; - # FARRAY a; - # }; - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( + # struct { + # int i; + # FARRAY a; + # }; DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + ), + ), + ), + ), + # typedef int FARRAY[]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - ], + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),) ), - ], - ), - # typedef int FARRAY[]; - DwarfDie( - DW_TAG.typedef, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "FARRAY"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), - int_die, - ] - - self.assertFromDwarf( - dies, - struct_type( + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( None, 4, ( - TypeMember(int_type("int", 4, True), "i"), + TypeMember(prog.int_type("int", 4, True), "i"), TypeMember( - typedef_type( - "FARRAY", array_type(None, int_type("int", 4, True)) + prog.typedef_type( + "FARRAY", prog.array_type(prog.int_type("int", 4, True)) ), "a", 32, @@ -1555,303 +2511,602 @@ def test_zero_length_array_only_member(self): # struct { # int a[0]; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + (TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"),), ), - int_die, - ] - - type_ = struct_type( - None, 4, (TypeMember(array_type(0, int_type("int", 4, True)), "a"),) ) - self.assertFromDwarf(dies, type_) + def test_zero_length_array_only_member_old_gcc(self): # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) + # struct { + # int a[0]; + # }; + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + (TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"),), + ), + ) def test_typedef_zero_length_array_only_member(self): - dies = [ + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + # struct foo { + # ZARRAY a; + # }; + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + # typedef int ZARRAY[0]; + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), + ), + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), + ), + ), + ) + + def test_typedef_zero_length_array_only_member_old_gcc(self): + # GCC < 9.0. + dies = ( DwarfDie( # struct foo { # ZARRAY a; # }; DW_TAG.structure_type, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"), DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ + ), + ( DwarfDie( DW_TAG.member, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + ), ), - ], + ), ), # typedef int ZARRAY[0]; DwarfDie( DW_TAG.typedef, - [ + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "ZARRAY"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], - [ - DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], - ), - ], + ), ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3)],), + DwarfDie(DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 3),)), int_die, - ] + ) - type_ = struct_type( - "foo", - 4, - ( - TypeMember( - typedef_type("ZARRAY", array_type(0, int_type("int", 4, True))), "a" + prog = dwarf_program(test_type_dies(dies)) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), ), ), ) - self.assertFromDwarf(dies, type_) - - farray_zarray = typedef_type( - "ZARRAY", array_type(None, int_type("int", 4, True)) - ) - - # GCC < 9.0. - del dies[1].children[0] - prog = dwarf_program(dies) - self.assertEqual(prog.type("struct foo"), type_) # Although the ZARRAY type must be a zero-length array in the context # of the structure, it could still be an incomplete array if used # elsewhere. - self.assertEqual(prog.type("ZARRAY"), farray_zarray) + self.assertEqual( + prog.type("ZARRAY"), + prog.typedef_type("ZARRAY", prog.array_type(prog.int_type("int", 4, True))), + ) # Make sure it still works if we parse the array type first. - prog = dwarf_program(dies) - self.assertEqual(prog.type("ZARRAY"), farray_zarray) - self.assertEqual(prog.type("struct foo"), type_) + prog = dwarf_program(test_type_dies(dies)) + self.assertEqual( + prog.type("ZARRAY"), + prog.typedef_type("ZARRAY", prog.array_type(prog.int_type("int", 4, True))), + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + "foo", + 4, + ( + TypeMember( + prog.typedef_type( + "ZARRAY", prog.array_type(prog.int_type("int", 4, True), 0) + ), + "a", + ), + ), + ), + ) def test_zero_length_array_not_last_member(self): # struct { # int a[0]; # int i; # }; - dies = [ - DwarfDie( - DW_TAG.structure_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), - ], + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + ( + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + TypeMember(prog.int_type("int", 4, True), "i"), + ), ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ + ) + + def test_zero_length_array_not_last_member_old_gcc(self): + # GCC < 9.0. + # struct { + # int a[0]; + # int i; + # }; + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + ), + ), DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, ()),), ), - ], - ), - int_die, - ] - - type_ = struct_type( - None, - 4, - ( - TypeMember(array_type(0, int_type("int", 4, True)), "a"), - TypeMember(int_type("int", 4, True), "i"), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.struct_type( + None, + 4, + ( + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + TypeMember(prog.int_type("int", 4, True), "i"), + ), ), ) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) def test_zero_length_array_in_union(self): # union { # int i; # int a[0]; # }; - dies = [ - DwarfDie( - DW_TAG.union_type, - [DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), - ], + DW_TAG.union_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.count, DW_FORM.data1, 0),), + ), + ), ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + None, + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + ), + ), + ) + + def test_zero_length_array_in_union_old_gcc(self): + # GCC < 9.0. + # union { + # int i; + # int a[0]; + # }; + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - ], + DW_TAG.union_type, + (DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4),), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "i"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "a"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.array_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], - [ DwarfDie( - DW_TAG.subrange_type, - [DwarfAttrib(DW_AT.count, DW_FORM.data1, 0)], + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + (DwarfDie(DW_TAG.subrange_type, ()),), ), - ], - ), - int_die, - ] - - type_ = union_type( - None, - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(0, int_type("int", 4, True)), "a"), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.union_type( + None, + 4, + ( + TypeMember(prog.int_type("int", 4, True), "i"), + TypeMember(prog.array_type(prog.int_type("int", 4, True), 0), "a"), + ), ), ) - self.assertFromDwarf(dies, type_) - - # GCC < 9.0. - del dies[1].children[0].attribs[0] - self.assertFromDwarf(dies, type_) def test_pointer_size(self): prog = dwarf_program(base_type_dies, bits=32) - self.assertEqual(prog.type("int *"), pointer_type(4, int_type("int", 4, True))) + self.assertEqual( + prog.type("int *"), prog.pointer_type(prog.int_type("int", 4, True), 4) + ) - def test_function(self): + def test_function_no_parameters(self): + # int foo(void) + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type(prog.int_type("int", 4, True), (), False), + ) + + def test_function_void_return(self): + # void foo(void) + prog = dwarf_program(test_type_dies(DwarfDie(DW_TAG.subroutine_type, ()))) + self.assertEqual( + prog.type("TEST").type, prog.function_type(prog.void_type(), (), False) + ) + + def test_function_unnamed_parameter(self): # int foo(char) - dies = [ - DwarfDie( - DW_TAG.subroutine_type, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)], + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + ), ), - ], - ), - int_die, - char_die, - ] - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True)),), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True)),), False, ), ) + def test_function_named_parameter(self): # int foo(char c) - dies[0].children[0].attribs.append(DwarfAttrib(DW_AT.name, DW_FORM.string, "c")) - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True), "c"),), + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + DwarfAttrib(DW_AT.name, DW_FORM.string, "c"), + ), + ), + ), + ), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True), "c"),), False, ), ) + def test_function_unspecified_parameters(self): + # int foo() + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + (DwarfDie(DW_TAG.unspecified_parameters, ()),), + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type(prog.int_type("int", 4, True), (), True), + ) + + def test_function_variadic(self): # int foo(char, ...) - del dies[0].children[0].attribs[-1] - dies[0].children.append(DwarfDie(DW_TAG.unspecified_parameters, [])) - self.assertFromDwarf( - dies, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("char", 1, True)),), + prog = dwarf_program( + test_type_dies( + ( + DwarfDie( + DW_TAG.subroutine_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),), + ), + DwarfDie(DW_TAG.unspecified_parameters, ()), + ), + ), + int_die, + char_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("char", 1, True)),), True, ), ) - # int foo() - del dies[0].children[0] - self.assertFromDwarf(dies, function_type(int_type("int", 4, True), (), True)) - - # int foo(void) - del dies[0].children[0] - self.assertFromDwarf(dies, function_type(int_type("int", 4, True), (), False)) - - # void foo(void) - del dies[0].attribs[0] - self.assertFromDwarf(dies, function_type(void_type(), (), False)) - - def test_incomplete_array_parameter(self): + def test_function_incomplete_array_parameter(self): # void foo(int []) # Note that in C, this is equivalent to void foo(int *), so GCC and # Clang emit the DWARF for the latter. - dies = [ - DwarfDie( - DW_TAG.subroutine_type, - [], - [ + prog = dwarf_program( + test_type_dies( + ( DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1)], + DW_TAG.subroutine_type, + (), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1),), + ), + ), ), - ], - ), - DwarfDie(DW_TAG.array_type, [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2)],), - int_die, - ] - self.assertFromDwarf( - dies, - function_type( - void_type(), - (TypeParameter(array_type(None, int_type("int", 4, True))),), + DwarfDie( + DW_TAG.array_type, (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2),) + ), + int_die, + ) + ) + ) + self.assertEqual( + prog.type("TEST").type, + prog.function_type( + prog.void_type(), + (TypeParameter(prog.array_type(prog.int_type("int", 4, True))),), False, ), ) @@ -1859,132 +3114,151 @@ def test_incomplete_array_parameter(self): def test_language(self): for name, lang in DW_LANG.__members__.items(): if re.fullmatch("C[0-9]*", name): - self.assertFromDwarf( - (int_die,), - int_type("int", 4, True, language=Language.C), - lang=lang, + prog = dwarf_program(test_type_dies(int_die), lang=lang) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, language=Language.C), ) - - self.assertFromDwarf( - (int_die,), - int_type("int", 4, True, language=DEFAULT_LANGUAGE), - lang=DW_LANG.BLISS, + prog = dwarf_program(test_type_dies(int_die), lang=DW_LANG.BLISS) + self.assertEqual( + prog.type("TEST").type, + prog.int_type("int", 4, True, language=DEFAULT_LANGUAGE), ) -class TestObjects(ObjectTestCase): - def test_constant(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.enumeration_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ], - [ - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), - ], - ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), - ], +class TestObjects(TestCase): + def test_constant_signed_enum(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), ), - DwarfDie( - DW_TAG.enumerator, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), - DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), - ], + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), ), - ], - ), - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib( - DW_AT.location, - DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), ), - ], - ), - ] - - type_ = enum_type( - "color", - int_type("int", 4, True), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), + ), + ) ) - prog = dwarf_program(dies) - self.assertEqual(prog["BLUE"], Object(prog, type_, value=2)) - - dies[0] = unsigned_int_die - type_ = enum_type( + type_ = prog.enum_type( "color", - int_type("unsigned int", 4, False), + prog.int_type("int", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), TypeEnumerator("BLUE", 2), ), ) - prog = dwarf_program(dies) - self.assertEqual(prog["GREEN"], Object(prog, type_, value=1)) + self.assertEqual( + prog.object("RED", FindObjectFlags.CONSTANT), Object(prog, type_, value=0) + ) + self.assertEqual(prog["BLUE"], Object(prog, type_, value=2)) - del dies[1].attribs[0] - type_ = enum_type( - None, - int_type("unsigned int", 4, False), + def test_constant_unsigned_enum(self): + prog = dwarf_program( ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), + unsigned_int_die, + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "FLAG"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data2, 1 << 12), + ), + ), + ), + ), + ) ) - prog = dwarf_program(dies) self.assertEqual( - prog.object("RED", FindObjectFlags.CONSTANT), Object(prog, type_, value=0) + prog["FLAG"], + Object( + prog, + prog.enum_type( + None, + prog.int_type("unsigned int", 4, False), + (TypeEnumerator("FLAG", 4096),), + ), + 4096, + ), ) def test_function(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.subprogram, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), - ], - [ + prog = dwarf_program( + test_type_dies( + ( + int_die, DwarfDie( - DW_TAG.formal_parameter, - [DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0)], + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ( + DwarfDie( + DW_TAG.formal_parameter, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ), + ), ), - ], + ) + ) + ) + self.assertEqual( + prog["abs"], + Object( + prog, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("int", 1, True)),), + False, + ), + address=0x7FC3EB9B1C30, ), - ] - type_ = function_type( - int_type("int", 4, True), (TypeParameter(int_type("int", 1, True)),), False ) - - prog = dwarf_program(dies) - self.assertEqual(prog["abs"], Object(prog, type_, address=0x7FC3EB9B1C30)) self.assertEqual(prog.object("abs", FindObjectFlags.FUNCTION), prog["abs"]) self.assertRaisesRegex( LookupError, @@ -1994,33 +3268,42 @@ def test_function(self): FindObjectFlags.VARIABLE, ) - del dies[1].attribs[2] - prog = dwarf_program(dies) + def test_function_no_address(self): + prog = dwarf_program( + test_type_dies( + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "abort"),), + ) + ) + ) self.assertRaisesRegex( - LookupError, "could not find address", prog.object, "abs" + LookupError, "could not find address", prog.object, "abort" ) def test_variable(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib( - DW_AT.location, - DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), ), - ], - ), - ] - - prog = dwarf_program(dies) + ) + ) + ) self.assertEqual( prog["x"], - Object(prog, int_type("int", 4, True), address=0xFFFFFFFF01020304), + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), ) self.assertEqual(prog.object("x", FindObjectFlags.VARIABLE), prog["x"]) self.assertRaisesRegex( @@ -2031,17 +3314,44 @@ def test_variable(self): FindObjectFlags.CONSTANT, ) - del dies[1].attribs[2] - prog = dwarf_program(dies) + def test_variable_no_address(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ) + ) + ) self.assertRaisesRegex( LookupError, "could not find address or value", prog.object, "x" ) - dies[1].attribs.insert(2, DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0")) - prog = dwarf_program(dies) + def test_variable_unimplemented_location(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0"), + ), + ), + ) + ) + ) self.assertRaisesRegex(Exception, "unimplemented operation", prog.object, "x") - def test_const_signed(self): + def test_variable_const_signed(self): for form in ( DW_FORM.data1, DW_FORM.data2, @@ -2049,23 +3359,25 @@ def test_const_signed(self): DW_FORM.data8, DW_FORM.sdata, ): - dies = [ - int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1,), - ], - ), - ] - prog = dwarf_program(dies) - self.assertEqual( - prog["x"], Object(prog, int_type("int", 4, True), 1), + + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1), + ), + ), + ) + ) ) + self.assertEqual(prog["x"], Object(prog, prog.int_type("int", 4, True), 1)) - def test_const_unsigned(self): + def test_variable_const_unsigned(self): for form in ( DW_FORM.data1, DW_FORM.data2, @@ -2073,80 +3385,287 @@ def test_const_unsigned(self): DW_FORM.data8, DW_FORM.udata, ): - dies = [ - unsigned_int_die, - DwarfDie( - DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1), - ], - ), - ] - prog = dwarf_program(dies) + prog = dwarf_program( + test_type_dies( + ( + unsigned_int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1), + ), + ), + ) + ) + ) self.assertEqual( - prog["x"], Object(prog, int_type("unsigned int", 4, False), 1), + prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1) ) - def test_const_block(self): - dies = [ - int_die, - DwarfDie( - DW_TAG.structure_type, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), - ], - [ + def test_variable_const_block(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, DwarfDie( - DW_TAG.member, - [ + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ), + ), + ), + ) + ) + ) + self.assertEqual( + prog["p"], + Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2]), + ) + + def test_variable_const_block_too_small(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00", + ), + ), + ), + ) + ) + ) + self.assertRaisesRegex(Exception, "too small", prog.variable, "p") + + def test_specification(self): + prog = dwarf_program( + test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 0), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), DwarfDie( - DW_TAG.member, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), - DwarfAttrib(DW_AT.data_member_location, DW_FORM.data1, 4), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ], + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), ), + ) + ) + ) + + self.assertEqual( + prog["x"], + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + ) + + def test_namespace_reverse_specification(self): + """Test specification inside namespace while declaration is outside of it.""" + dies = ( + int_die, + DwarfDie( + DW_TAG.namespace, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), + ], + [ + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), + ) ], ), DwarfDie( DW_TAG.variable, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib( - DW_AT.const_value, - DW_FORM.block1, - b"\x01\x00\x00\x00\x02\x00\x00\x00", - ), - ], + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), - ] - prog = dwarf_program(dies) - self.assertEqual( - prog["p"], Object(prog, point_type, {"x": 1, "y": 2}), ) - dies[2].attribs[2] = DwarfAttrib( - DW_AT.const_value, DW_FORM.block1, b"\x01\x00\x00\x00\x02\x00\x00", - ) prog = dwarf_program(dies) - self.assertRaisesRegex(Exception, "too small", prog.variable, "p") + self.assertEqual( + prog["x"], + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + ) def test_not_found(self): - prog = dwarf_program([int_die]) + prog = dwarf_program(int_die) self.assertRaisesRegex(LookupError, "could not find", prog.object, "y") -class TestProgram(unittest.TestCase): +class TestScopes(TestCase): + def test_global_namespace(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ) + ) + self.assertEqual( + prog["::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + self.assertEqual(prog["::target"], prog["target"]) + + def test_namespaces_single(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + + def test_namespaces_gcc(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "target"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 123), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::target"], Object(prog, prog.int_type("int", 4, True), 123) + ) + + def test_namespaces_nested(self): + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"),), + ( + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "eve"),), + ( + DwarfDie( + DW_TAG.namespace, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "kerbin" + ), + ), + ( + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "minmus" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, 0 + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 47 + ), + ), + ), + ), + ), + ), + ), + ), + ), + ) + ) + self.assertEqual( + prog["moho::eve::kerbin::minmus"], + Object(prog, prog.int_type("int", 4, True), 47), + ) + + +class TestProgram(TestCase): def test_language(self): dies = ( DwarfDie( diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 3aee580a3..fe4090887 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -5,30 +5,12 @@ import operator import unittest -from drgn import ( - Qualifiers, - TypeEnumerator, - TypeMember, - TypeParameter, - array_type, - bool_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import coord_type, point_type -from tests.libdrgn import C_TOKEN, drgn_lexer_c, Lexer - - -class TestPrettyPrintTypeName(unittest.TestCase): +from drgn import Qualifiers, TypeEnumerator, TypeMember, TypeParameter +from tests import MockProgramTestCase +from tests.libdrgn import C_TOKEN, Lexer, drgn_lexer_c + + +class TestPrettyPrintTypeName(MockProgramTestCase): def assertTypeName(self, type, expected, same_as_definition=False): self.assertEqual(type.type_name(), expected) if same_as_definition: @@ -58,120 +40,175 @@ def assertQualifiedTypeName(self, expected, same_as_definition, constructor, *ar ) def test_void(self): - self.assertQualifiedTypeName("void", True, void_type) + self.assertQualifiedTypeName("void", True, self.prog.void_type) def test_int(self): - self.assertQualifiedTypeName("int", True, int_type, "int", 4, True) + self.assertQualifiedTypeName("int", True, self.prog.int_type, "int", 4, True) def test_bool(self): - self.assertQualifiedTypeName("_Bool", True, bool_type, "_Bool", 1) + self.assertQualifiedTypeName("_Bool", True, self.prog.bool_type, "_Bool", 1) def test_float(self): - self.assertQualifiedTypeName("float", True, float_type, "float", 4) + self.assertQualifiedTypeName("float", True, self.prog.float_type, "float", 4) def test_complex(self): self.assertQualifiedTypeName( "double _Complex", True, - complex_type, + self.prog.complex_type, "double _Complex", 16, - float_type("double", 8), + self.prog.float_type("double", 8), ) def test_struct(self): - self.assertQualifiedTypeName("struct point", True, struct_type, "point") - self.assertQualifiedTypeName("struct ", False, struct_type, None) + self.assertQualifiedTypeName( + "struct point", True, self.prog.struct_type, "point" + ) + self.assertQualifiedTypeName( + "struct ", False, self.prog.struct_type, None + ) def test_union(self): - self.assertQualifiedTypeName("union option", True, union_type, "option"), - self.assertQualifiedTypeName("union ", False, union_type, None) + self.assertQualifiedTypeName( + "union option", True, self.prog.union_type, "option" + ), + self.assertQualifiedTypeName( + "union ", False, self.prog.union_type, None + ) def test_class(self): - self.assertQualifiedTypeName("class coord", True, class_type, "coord") - self.assertQualifiedTypeName("class ", False, class_type, None) + self.assertQualifiedTypeName("class coord", True, self.prog.class_type, "coord") + self.assertQualifiedTypeName( + "class ", False, self.prog.class_type, None + ) def test_enum(self): self.assertQualifiedTypeName( - "enum color", True, enum_type, "color", None, None + "enum color", True, self.prog.enum_type, "color", None, None ), self.assertQualifiedTypeName( - "enum ", False, enum_type, None, None, None + "enum ", False, self.prog.enum_type, None, None, None ) def test_typedef(self): self.assertQualifiedTypeName( - "bool", False, typedef_type, "bool", bool_type("_Bool", 1) + "bool", + False, + self.prog.typedef_type, + "bool", + self.prog.bool_type("_Bool", 1), ) def test_pointer(self): - self.assertTypeName(pointer_type(8, void_type()), "void *", True) - t = pointer_type(8, void_type(Qualifiers.VOLATILE)) + self.assertTypeName( + self.prog.pointer_type(self.prog.void_type()), "void *", True + ) + t = self.prog.pointer_type(self.prog.void_type(qualifiers=Qualifiers.VOLATILE)) self.assertTypeName(t, "volatile void *", True) - t = pointer_type(8, void_type(Qualifiers.VOLATILE), Qualifiers.CONST) + t = self.prog.pointer_type( + self.prog.void_type(qualifiers=Qualifiers.VOLATILE), + qualifiers=Qualifiers.CONST, + ) self.assertTypeName(t, "volatile void * const", True) - t = pointer_type(8, t) + t = self.prog.pointer_type(t) self.assertTypeName(t, "volatile void * const *", True) def test_array(self): - i = int_type("int", 4, True) - self.assertTypeName(array_type(None, i), "int []", True) - self.assertTypeName(array_type(2, i), "int [2]", True) - self.assertTypeName(array_type(2, array_type(3, i)), "int [2][3]", True) + i = self.prog.int_type("int", 4, True) + self.assertTypeName(self.prog.array_type(i), "int []", True) + self.assertTypeName(self.prog.array_type(i, 2), "int [2]", True) + self.assertTypeName( + self.prog.array_type(self.prog.array_type(i, 3), 2), "int [2][3]", True + ) self.assertTypeName( - array_type(2, array_type(3, array_type(4, i))), "int [2][3][4]", True + self.prog.array_type( + self.prog.array_type(self.prog.array_type(i, 4), 3), 2 + ), + "int [2][3][4]", + True, ) def test_array_of_pointers(self): self.assertTypeName( - array_type(2, array_type(3, pointer_type(8, int_type("int", 4, True)))), + self.prog.array_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 3 + ), + 2, + ), "int *[2][3]", True, ) def test_pointer_to_array(self): self.assertTypeName( - pointer_type(8, array_type(2, int_type("int", 4, True))), "int (*)[2]", True + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ), + "int (*)[2]", + True, ) def test_pointer_to_pointer_to_array(self): self.assertTypeName( - pointer_type(8, pointer_type(8, array_type(2, int_type("int", 4, True)))), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ) + ), "int (**)[2]", True, ) def test_pointer_to_array_of_pointers(self): self.assertTypeName( - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), + self.prog.pointer_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 2 + ) + ), "int *(*)[2]", True, ) def test_array_of_pointers_to_array(self): self.assertTypeName( - array_type(2, pointer_type(8, array_type(3, int_type("int", 4, True)))), + self.prog.array_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3) + ), + 2, + ), "int (*[2])[3]", True, ) def test_pointer_to_function(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i),), False)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False) + ), "int (*)(int)", True, ) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i, "x"),), False)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i, "x"),), False) + ), "int (*)(int x)", True, ) self.assertTypeName( - pointer_type( - 8, - function_type( - i, (TypeParameter(i), TypeParameter(float_type("float", 4),)), False + self.prog.pointer_type( + self.prog.function_type( + i, + ( + TypeParameter(i), + TypeParameter(self.prog.float_type("float", 4)), + ), + False, ), ), "int (*)(int, float)", @@ -179,19 +216,22 @@ def test_pointer_to_function(self): ) def test_pointer_to_function_returning_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, function_type(pointer_type(8, i), (TypeParameter(i),), False) + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), (TypeParameter(i),), False + ) ), "int *(*)(int)", True, ) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, i), (TypeParameter(pointer_type(8, i)),), False + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), + (TypeParameter(self.prog.pointer_type(i)),), + False, ), ), "int *(*)(int *)", @@ -199,12 +239,13 @@ def test_pointer_to_function_returning_pointer(self): ) def test_pointer_to_function_returning_pointer_to_const(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, int_type("int", 4, True, Qualifiers.CONST)), + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), (TypeParameter(i),), False, ), @@ -214,12 +255,13 @@ def test_pointer_to_function_returning_pointer_to_const(self): ) def test_pointer_to_function_returning_const_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type( - pointer_type(8, i, Qualifiers.CONST), (TypeParameter(i),), False + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i, qualifiers=Qualifiers.CONST), + (TypeParameter(i),), + False, ), ), "int * const (*)(int)", @@ -227,34 +269,38 @@ def test_pointer_to_function_returning_const_pointer(self): ) def test_const_pointer_to_function_returning_pointer(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type( - 8, - function_type(pointer_type(8, i), (TypeParameter(i),), False), - Qualifiers.CONST, + self.prog.pointer_type( + self.prog.function_type( + self.prog.pointer_type(i), (TypeParameter(i),), False + ), + qualifiers=Qualifiers.CONST, ), "int *(* const)(int)", True, ) def test_array_of_pointers_to_functions(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - array_type( - 4, pointer_type(8, function_type(i, (TypeParameter(i),), False)) + self.prog.array_type( + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False) + ), + 4, ), "int (*[4])(int)", True, ) def test_array_of_const_pointers_to_functions(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - array_type( - None, - pointer_type( - 8, function_type(i, (TypeParameter(i),), False), Qualifiers.CONST + self.prog.array_type( + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), False), + qualifiers=Qualifiers.CONST, ), ), "int (* const [])(int)", @@ -262,40 +308,47 @@ def test_array_of_const_pointers_to_functions(self): ) def test_pointer_to_variadic_function(self): - i = int_type("int", 4, True) + i = self.prog.int_type("int", 4, True) self.assertTypeName( - pointer_type(8, function_type(i, (TypeParameter(i),), True)), + self.prog.pointer_type( + self.prog.function_type(i, (TypeParameter(i),), True) + ), "int (*)(int, ...)", True, ) def test_pointer_to_function_with_no_parameters(self): self.assertTypeName( - pointer_type(8, function_type(int_type("int", 4, True), (), False)), + self.prog.pointer_type( + self.prog.function_type(self.prog.int_type("int", 4, True), (), False) + ), "int (*)(void)", True, ) def test_pointer_to_function_with_no_parameter_specification(self): self.assertTypeName( - pointer_type(8, function_type(int_type("int", 4, True), (), True)), + self.prog.pointer_type( + self.prog.function_type(self.prog.int_type("int", 4, True), (), True) + ), "int (*)()", True, ) def test_function(self): self.assertTypeName( - function_type(int_type("int", 4, True), (), False), "int (void)" + self.prog.function_type(self.prog.int_type("int", 4, True), (), False), + "int (void)", ) -class TestPrettyPrintType(unittest.TestCase): +class TestPrettyPrintType(MockProgramTestCase): def assertPrettyPrint(self, type, expected): self.assertEqual(str(type), expected) def test_struct(self): self.assertPrettyPrint( - point_type, + self.point_type, """\ struct point { int x; @@ -303,13 +356,9 @@ def test_struct(self): }""", ) - line_segment = struct_type( - "line_segment", - 16, - (TypeMember(point_type, "a", 0), TypeMember(point_type, "b", 8)), - ) + def test_struct_member(self): self.assertPrettyPrint( - line_segment, + self.line_segment_type, """\ struct line_segment { struct point a; @@ -317,16 +366,16 @@ def test_struct(self): }""", ) - anonymous_point = struct_type( - None, - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 4), - ), - ) + def test_anonymous_struct(self): self.assertPrettyPrint( - anonymous_point, + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ), """\ struct { int x; @@ -334,15 +383,25 @@ def test_struct(self): }""", ) + def test_anonymous_struct_member(self): # Member with anonymous struct type. - line_segment = struct_type( - "line_segment", - 16, - (TypeMember(anonymous_point, "a", 0), TypeMember(anonymous_point, "b", 8),), + anonymous_struct = self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), ) - self.assertPrettyPrint( - line_segment, + self.prog.struct_type( + "line_segment", + 16, + ( + TypeMember(anonymous_struct, "a", 0), + TypeMember(anonymous_struct, "b", 64), + ), + ), """\ struct line_segment { struct { @@ -356,17 +415,25 @@ def test_struct(self): }""", ) - # Unnamed member. - point3 = struct_type( - "point3", - 0, - ( - TypeMember(anonymous_point, None, 0), - TypeMember(int_type("int", 4, True), "z", 8), - ), - ) + def test_struct_unnamed_member(self): self.assertPrettyPrint( - point3, + self.prog.struct_type( + "point3", + 0, + ( + TypeMember( + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), + ), + ), """\ struct point3 { struct { @@ -378,16 +445,15 @@ def test_struct(self): ) def test_bit_field(self): - point = struct_type( - "point", - 4, - ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 4, 8), - ), - ) self.assertPrettyPrint( - point, + self.prog.struct_type( + "point", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 4, 8), + ), + ), """\ struct point { int x : 4; @@ -396,16 +462,20 @@ def test_bit_field(self): ) def test_union(self): - t = union_type( - "foo", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(4, int_type("unsigned char", 1, False)), "a"), - ), - ) self.assertPrettyPrint( - t, + self.prog.union_type( + "foo", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember( + self.prog.array_type( + self.prog.int_type("unsigned char", 1, False), 4 + ), + "a", + ), + ), + ), """\ union foo { int i; @@ -413,17 +483,22 @@ def test_union(self): }""", ) - t = union_type( - "foo", - 4, - ( - TypeMember(int_type("int", 4, True), "i"), - TypeMember(array_type(4, int_type("unsigned char", 1, False)), "a"), - ), - Qualifiers.CONST, - ) + def test_union_qualified(self): self.assertPrettyPrint( - t, + self.prog.union_type( + "foo", + 4, + ( + TypeMember(self.prog.int_type("int", 4, True), "i"), + TypeMember( + self.prog.array_type( + self.prog.int_type("unsigned char", 1, False), 4 + ), + "a", + ), + ), + qualifiers=Qualifiers.CONST, + ), """\ const union foo { int i; @@ -433,7 +508,7 @@ def test_union(self): def test_class(self): self.assertPrettyPrint( - coord_type, + self.coord_type, """\ class coord { int x; @@ -443,17 +518,8 @@ class coord { ) def test_enum(self): - t = enum_type( - "color", - int_type("unsigned int", 4, False), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), - ) self.assertPrettyPrint( - t, + self.color_type, """\ enum color { RED = 0, @@ -462,18 +528,9 @@ def test_enum(self): }""", ) - t = enum_type( - "color", - int_type("unsigned int", 4, False), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", 1), - TypeEnumerator("BLUE", 2), - ), - Qualifiers.CONST, - ) + def test_enum_qualified(self): self.assertPrettyPrint( - t, + self.color_type.qualified(Qualifiers.CONST), """\ const enum color { RED = 0, @@ -482,17 +539,17 @@ def test_enum(self): }""", ) - t = enum_type( - None, - int_type("int", 4, True), - ( - TypeEnumerator("RED", 0), - TypeEnumerator("GREEN", -1), - TypeEnumerator("BLUE", -2), - ), - ) + def test_enum_anonymous(self): self.assertPrettyPrint( - t, + self.prog.enum_type( + None, + self.prog.int_type("int", 4, True), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", -1), + TypeEnumerator("BLUE", -2), + ), + ), """\ enum { RED = 0, @@ -503,34 +560,47 @@ def test_enum(self): def test_typedef(self): self.assertPrettyPrint( - typedef_type("INT", int_type("int", 4, True)), "typedef int INT" + self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)), + "typedef int INT", ) + + def test_typedef_const(self): self.assertPrettyPrint( - typedef_type("CINT", int_type("int", 4, True, Qualifiers.CONST)), + self.prog.typedef_type( + "CINT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), "typedef const int CINT", ) + + def test_const_typedef(self): self.assertPrettyPrint( - typedef_type("INT", int_type("int", 4, True), Qualifiers.CONST), + self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ), "const typedef int INT", ) + + def test_typedef_pointer(self): self.assertPrettyPrint( - typedef_type("string", pointer_type(8, int_type("char", 1, True))), + self.prog.typedef_type( + "string", self.prog.pointer_type(self.prog.int_type("char", 1, True)) + ), "typedef char *string", ) - t = typedef_type( - "Point", - struct_type( - None, - 8, - ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 4), + def test_typedef_struct(self): + self.assertPrettyPrint( + self.prog.typedef_type( + "Point", + self.prog.struct_type( + None, + 8, + ( + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + ), ), ), - ) - self.assertPrettyPrint( - t, """\ typedef struct { int x; @@ -538,9 +608,12 @@ def test_typedef(self): } Point""", ) - def test_function_typedef(self): + def test_typedef_function(self): self.assertPrettyPrint( - typedef_type("fn", function_type(int_type("int", 4, True), (), False)), + self.prog.typedef_type( + "fn", + self.prog.function_type(self.prog.int_type("int", 4, True), (), False), + ), "typedef int fn(void)", ) @@ -549,10 +622,16 @@ def test_function_no_name(self): ValueError, "function must have name", str, - struct_type( + self.prog.struct_type( "foo", 8, - (TypeMember(function_type(int_type("int", 4, True), (), False), None),), + ( + TypeMember( + self.prog.function_type( + self.prog.int_type("int", 4, True), (), False + ) + ), + ), ), ) diff --git a/tests/test_lexer.py b/tests/test_lexer.py index c2ff473b7..db92b4481 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -3,7 +3,7 @@ import unittest -from tests.libdrgn import drgn_test_lexer_func, Lexer +from tests.libdrgn import Lexer, drgn_test_lexer_func class TestLexer(unittest.TestCase): diff --git a/tests/test_object.py b/tests/test_object.py index c07dce858..1180b81c8 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -13,43 +13,21 @@ Type, TypeEnumerator, TypeMember, - array_type, cast, container_of, - enum_type, - float_type, - function_type, - int_type, - pointer_type, reinterpret, sizeof, - struct_type, - typedef_type, - union_type, - void_type, -) -from tests import ( - MockMemorySegment, - ObjectTestCase, - color_type, - coord_type, - line_segment_type, - mock_program, - option_type, - pid_type, - point_type, ) +from tests import MockMemorySegment, MockProgramTestCase, mock_program -class TestInit(ObjectTestCase): +class TestInit(MockProgramTestCase): def test_type_stays_alive(self): - obj = Object(self.prog, int_type("int", 4, True), value=0) - self.assertEqual(obj.type_, int_type("int", 4, True)) + obj = Object(self.prog, self.prog.int_type("int", 4, True), value=0) + self.assertEqual(obj.type_, self.prog.int_type("int", 4, True)) type_ = obj.type_ del obj - self.assertEqual(type_, int_type("int", 4, True)) - del self.prog - self.assertEqual(type_, int_type("int", 4, True)) + self.assertEqual(type_, self.prog.int_type("int", 4, True)) def test_type(self): self.assertRaisesRegex( @@ -132,16 +110,13 @@ def test_bit_offset(self): ) -class TestReference(ObjectTestCase): +class TestReference(MockProgramTestCase): def test_basic(self): - prog = mock_program( - segments=[ - MockMemorySegment((1000).to_bytes(4, "little"), virt_addr=0xFFFF0000), - ] - ) - obj = Object(prog, "int", address=0xFFFF0000) - self.assertIs(obj.prog_, prog) - self.assertEqual(obj.type_, prog.type("int")) + self.add_memory_segment((1000).to_bytes(4, "little"), virt_addr=0xFFFF0000) + + obj = Object(self.prog, "int", address=0xFFFF0000) + self.assertIs(obj.prog_, self.prog) + self.assertEqual(obj.type_, self.prog.type("int")) self.assertEqual(obj.address_, 0xFFFF0000) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -149,9 +124,9 @@ def test_basic(self): self.assertEqual(obj.value_(), 1000) self.assertEqual(repr(obj), "Object(prog, 'int', address=0xffff0000)") - self.assertEqual(obj.read_(), Object(prog, "int", value=1000)) + self.assertEqual(obj.read_(), Object(self.prog, "int", value=1000)) - obj = Object(prog, "int", address=0xFFFF0000, byteorder="big") + obj = Object(self.prog, "int", address=0xFFFF0000, byteorder="big") self.assertEqual(obj.byteorder_, "big") self.assertEqual(obj.value_(), -402456576) self.assertEqual( @@ -159,7 +134,7 @@ def test_basic(self): ) self.assertEqual(sizeof(obj), 4) - obj = Object(prog, "unsigned int", address=0xFFFF0000, bit_field_size=4) + obj = Object(self.prog, "unsigned int", address=0xFFFF0000, bit_field_size=4) self.assertEqual(obj.bit_offset_, 0) self.assertEqual(obj.bit_field_size_, 4) self.assertEqual(obj.value_(), 8) @@ -170,7 +145,11 @@ def test_basic(self): self.assertRaises(TypeError, sizeof, obj) obj = Object( - prog, "unsigned int", address=0xFFFF0000, bit_field_size=4, bit_offset=4 + self.prog, + "unsigned int", + address=0xFFFF0000, + bit_field_size=4, + bit_offset=4, ) self.assertEqual(obj.bit_offset_, 4) self.assertEqual(obj.bit_field_size_, 4) @@ -245,33 +224,32 @@ def test_read_float(self): self.assertEqual(obj.value_(), expected) def test_struct(self): - segment = ( - (99).to_bytes(4, "little") - + (-1).to_bytes(4, "little", signed=True) - + (12345).to_bytes(4, "little") - + (0).to_bytes(4, "little") - ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], + self.add_memory_segment( + ( + (99).to_bytes(4, "little") + + (-1).to_bytes(4, "little", signed=True) + + (12345).to_bytes(4, "little") + + (0).to_bytes(4, "little") + ), + virt_addr=0xFFFF0000, ) - - obj = Object(prog, "struct point", address=0xFFFF0000) + self.types.append(self.point_type) + obj = Object(self.prog, "struct point", address=0xFFFF0000) self.assertEqual(obj.value_(), {"x": 99, "y": -1}) self.assertEqual(sizeof(obj), 8) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(point_type, "point"), + TypeMember(self.point_type, "point"), TypeMember( - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "bar"), - TypeMember(int_type("int", 4, True), "baz", 32), + TypeMember(self.prog.int_type("int", 4, True), "bar"), + TypeMember(self.prog.int_type("int", 4, True), "baz", 32), ), ), None, @@ -279,7 +257,7 @@ def test_struct(self): ), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( obj.value_(), {"point": {"x": 99, "y": -1}, "bar": 12345, "baz": 0} ) @@ -288,24 +266,21 @@ def test_array(self): segment = bytearray() for i in range(10): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - obj = Object(prog, "int [5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [5]", address=0xFFFF0000) self.assertEqual(obj.value_(), [0, 1, 2, 3, 4]) self.assertEqual(sizeof(obj), 20) - obj = Object(prog, "int [2][5]", address=0xFFFF0000) + obj = Object(self.prog, "int [2][5]", address=0xFFFF0000) self.assertEqual(obj.value_(), [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) - obj = Object(prog, "int [2][2][2]", address=0xFFFF0000) + obj = Object(self.prog, "int [2][2][2]", address=0xFFFF0000) self.assertEqual(obj.value_(), [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) def test_void(self): - obj = Object(self.prog, void_type(), address=0) + obj = Object(self.prog, self.prog.void_type(), address=0) self.assertIs(obj.prog_, self.prog) - self.assertEqual(obj.type_, void_type()) + self.assertEqual(obj.type_, self.prog.void_type()) self.assertEqual(obj.address_, 0) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -319,9 +294,15 @@ def test_void(self): self.assertRaises(TypeError, sizeof, obj) def test_function(self): - obj = Object(self.prog, function_type(void_type(), (), False), address=0) + obj = Object( + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0, + ) self.assertIs(obj.prog_, self.prog) - self.assertEqual(obj.type_, function_type(void_type(), (), False)) + self.assertEqual( + obj.type_, self.prog.function_type(self.prog.void_type(), (), False) + ) self.assertEqual(obj.address_, 0) self.assertEqual(obj.byteorder_, "little") self.assertEqual(obj.bit_offset_, 0) @@ -337,7 +318,7 @@ def test_function(self): def test_incomplete(self): # It's valid to create references with incomplete type, but not to read # from them. - obj = Object(self.prog, struct_type("foo"), address=0) + obj = Object(self.prog, self.prog.struct_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete structure type", obj.value_ ) @@ -346,7 +327,7 @@ def test_incomplete(self): ) self.assertRaises(TypeError, sizeof, obj) - obj = Object(self.prog, union_type("foo"), address=0) + obj = Object(self.prog, self.prog.union_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete union type", obj.value_ ) @@ -354,7 +335,7 @@ def test_incomplete(self): TypeError, "cannot read object with incomplete union type", obj.read_ ) - obj = Object(self.prog, enum_type("foo"), address=0) + obj = Object(self.prog, self.prog.enum_type("foo"), address=0) self.assertRaisesRegex( TypeError, "cannot read object with incomplete enumerated type", obj.value_ ) @@ -362,7 +343,11 @@ def test_incomplete(self): TypeError, "cannot read object with incomplete enumerated type", obj.read_ ) - obj = Object(self.prog, array_type(None, int_type("int", 4, True)), address=0) + obj = Object( + self.prog, + self.prog.array_type(self.prog.int_type("int", 4, True)), + address=0, + ) self.assertRaisesRegex( TypeError, "cannot read object with incomplete array type", obj.value_ ) @@ -371,7 +356,7 @@ def test_incomplete(self): ) -class TestValue(ObjectTestCase): +class TestValue(MockProgramTestCase): def test_positional(self): self.assertEqual(Object(self.prog, "int", 1), Object(self.prog, "int", value=1)) @@ -495,73 +480,90 @@ def test_float(self): ) def test_enum(self): - self.assertEqual(Object(self.prog, color_type, value=0).value_(), 0) + self.assertEqual(Object(self.prog, self.color_type, value=0).value_(), 0) - def test_incomplete(self): + def test_incomplete_struct(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete structure type", Object, self.prog, - struct_type("foo"), + self.prog.struct_type("foo"), value={}, ) + def test_incomplete_union(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete union type", Object, self.prog, - union_type("foo"), + self.prog.union_type("foo"), value={}, ) + def test_incomplete_class(self): + self.assertRaisesRegex( + TypeError, + "cannot create object with incomplete class type", + Object, + self.prog, + self.prog.class_type("foo"), + value={}, + ) + + def test_incomplete_enum(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete enumerated type", Object, self.prog, - enum_type("foo"), + self.prog.enum_type("foo"), value=0, ) + def test_incomplete_array(self): self.assertRaisesRegex( TypeError, "cannot create object with incomplete array type", Object, self.prog, - array_type(None, int_type("int", 4, True)), + self.prog.array_type(self.prog.int_type("int", 4, True)), value=[], ) def test_compound(self): - obj = Object(self.prog, point_type, value={"x": 100, "y": -5}) + obj = Object(self.prog, self.point_type, value={"x": 100, "y": -5}) self.assertEqual(obj.x, Object(self.prog, "int", value=100)) self.assertEqual(obj.y, Object(self.prog, "int", value=-5)) self.assertEqual( - Object(self.prog, point_type, value={}), - Object(self.prog, point_type, value={"x": 0, "y": 0}), + Object(self.prog, self.point_type, value={}), + Object(self.prog, self.point_type, value={"x": 0, "y": 0}), ) value = { "a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}, } - obj = Object(self.prog, line_segment_type, value=value) - self.assertEqual(obj.a, Object(self.prog, point_type, value={"x": 1, "y": 2})) - self.assertEqual(obj.b, Object(self.prog, point_type, value={"x": 3, "y": 4})) + obj = Object(self.prog, self.line_segment_type, value=value) + self.assertEqual( + obj.a, Object(self.prog, self.point_type, value={"x": 1, "y": 2}) + ) + self.assertEqual( + obj.b, Object(self.prog, self.point_type, value={"x": 3, "y": 4}) + ) self.assertEqual(obj.value_(), value) - invalid_struct = struct_type( + invalid_struct = self.prog.struct_type( "foo", 4, ( - TypeMember(int_type("short", 2, True), "a"), + TypeMember(self.prog.int_type("short", 2, True), "a"), # Straddles the end of the structure. - TypeMember(int_type("int", 4, True), "b", 16), + TypeMember(self.prog.int_type("int", 4, True), "b", 16), # Beyond the end of the structure. - TypeMember(int_type("int", 4, True), "c", 32), + TypeMember(self.prog.int_type("int", 4, True), "c", 32), ), ) @@ -588,7 +590,7 @@ def test_compound(self): "must be dictionary or mapping", Object, self.prog, - point_type, + self.point_type, value=1, ) self.assertRaisesRegex( @@ -596,18 +598,23 @@ def test_compound(self): "member key must be string", Object, self.prog, - point_type, + self.point_type, value={0: 0}, ) self.assertRaisesRegex( - TypeError, "must be number", Object, self.prog, point_type, value={"x": []} + TypeError, + "must be number", + Object, + self.prog, + self.point_type, + value={"x": []}, ) self.assertRaisesRegex( LookupError, "has no member 'z'", Object, self.prog, - point_type, + self.point_type, value={"z": 999}, ) @@ -617,8 +624,11 @@ def test_pointer(self): self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'int *', value=0xffff0000)") + def test_pointer_typedef(self): obj = Object( - self.prog, typedef_type("INTP", self.prog.type("int *")), value=0xFFFF0000 + self.prog, + self.prog.typedef_type("INTP", self.prog.type("int *")), + value=0xFFFF0000, ) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) @@ -647,7 +657,7 @@ def test_array(self): ) -class TestConversions(ObjectTestCase): +class TestConversions(MockProgramTestCase): def test_bool(self): self.assertTrue(Object(self.prog, "int", value=-1)) self.assertFalse(Object(self.prog, "int", value=0)) @@ -667,7 +677,7 @@ def test_bool(self): TypeError, "cannot convert 'struct point' to bool", bool, - Object(self.prog, point_type, address=0), + Object(self.prog, self.point_type, address=0), ) def test_int(self): @@ -720,7 +730,7 @@ def test_index(self): ) -class TestInvalidBitField(ObjectTestCase): +class TestInvalidBitField(MockProgramTestCase): def test_integer(self): self.assertRaisesRegex( ValueError, @@ -785,7 +795,7 @@ def test_reference(self): "bit field must be integer", Object, self.prog, - point_type, + self.point_type, address=0, bit_field_size=4, ) @@ -794,20 +804,22 @@ def test_reference(self): "bit field must be integer", Object, self.prog, - point_type, + self.point_type, value={}, bit_field_size=4, ) def test_member(self): - type_ = struct_type("foo", 8, (TypeMember(point_type, "p", 0, 4),)) + type_ = self.prog.struct_type( + "foo", 8, (TypeMember(self.point_type, "p", 0, 4),) + ) obj = Object(self.prog, type_, address=0) self.assertRaisesRegex( ValueError, "bit field must be integer", obj.member_, "p" ) -class TestCLiteral(ObjectTestCase): +class TestCLiteral(MockProgramTestCase): def test_int(self): self.assertEqual(Object(self.prog, value=1), Object(self.prog, "int", value=1)) self.assertEqual( @@ -863,7 +875,7 @@ class Foo: ) -class TestCIntegerPromotion(ObjectTestCase): +class TestCIntegerPromotion(MockProgramTestCase): def test_conversion_rank_less_than_int(self): self.assertEqual(+self.bool(False), self.int(0)) @@ -890,11 +902,11 @@ def test_conversion_rank_less_than_int(self): # If short is the same size as int, then int can't represent all of the # values of unsigned short. self.assertEqual( - +Object(self.prog, int_type("short", 4, True), value=1), + +Object(self.prog, self.prog.int_type("short", 4, True), value=1), Object(self.prog, "int", value=1), ) self.assertEqual( - +Object(self.prog, int_type("unsigned short", 4, False), value=2), + +Object(self.prog, self.prog.int_type("unsigned short", 4, False), value=2), Object(self.prog, "unsigned int", value=2), ) @@ -930,20 +942,20 @@ def test_conversion_rank_greater_than_int(self): def test_extended_integer(self): self.assertEqual( - +Object(self.prog, int_type("byte", 1, True), value=1), + +Object(self.prog, self.prog.int_type("byte", 1, True), value=1), Object(self.prog, "int", value=1), ) self.assertEqual( - +Object(self.prog, int_type("ubyte", 1, False), value=-1), + +Object(self.prog, self.prog.int_type("ubyte", 1, False), value=-1), Object(self.prog, "int", value=0xFF), ) self.assertEqual( - +Object(self.prog, int_type("qword", 8, True), value=1), - Object(self.prog, int_type("qword", 8, True), value=1), + +Object(self.prog, self.prog.int_type("qword", 8, True), value=1), + Object(self.prog, self.prog.int_type("qword", 8, True), value=1), ) self.assertEqual( - +Object(self.prog, int_type("qword", 8, False), value=1), - Object(self.prog, int_type("qword", 8, False), value=1), + +Object(self.prog, self.prog.int_type("qword", 8, False), value=1), + Object(self.prog, self.prog.int_type("qword", 8, False), value=1), ) def test_bit_field(self): @@ -996,11 +1008,11 @@ def test_bit_field(self): def test_enum(self): # Enums should be converted to their compatible type and then promoted. self.assertEqual( - +Object(self.prog, color_type, value=1), + +Object(self.prog, self.color_type, value=1), Object(self.prog, "unsigned int", value=1), ) - type_ = enum_type( + type_ = self.prog.enum_type( "color", self.prog.type("unsigned long long"), ( @@ -1014,7 +1026,7 @@ def test_enum(self): Object(self.prog, "unsigned long long", value=1), ) - type_ = enum_type( + type_ = self.prog.enum_type( "color", self.prog.type("char"), ( @@ -1028,13 +1040,13 @@ def test_enum(self): ) def test_typedef(self): - type_ = typedef_type("SHORT", self.prog.type("short")) + type_ = self.prog.typedef_type("SHORT", self.prog.type("short")) self.assertEqual( +Object(self.prog, type_, value=5), Object(self.prog, "int", value=5) ) # Typedef should be preserved if the type wasn't promoted. - type_ = typedef_type("self.int", self.prog.type("int")) + type_ = self.prog.typedef_type("self.int", self.prog.type("int")) self.assertEqual( +Object(self.prog, type_, value=5), Object(self.prog, type_, value=5) ) @@ -1047,7 +1059,7 @@ def test_non_integer(self): ) -class TestCCommonRealType(ObjectTestCase): +class TestCCommonRealType(MockProgramTestCase): def assertCommonRealType(self, lhs, rhs, expected, commutative=True): if isinstance(lhs, (str, Type)): obj1 = Object(self.prog, lhs, value=1) @@ -1076,7 +1088,7 @@ def test_float(self): self.assertCommonRealType("double", "double", "double") # Floating type not in the standard. - float64 = float_type("float64", 8) + float64 = self.prog.float_type("float64", 8) self.assertCommonRealType(float64, "long long", float64) self.assertCommonRealType(float64, "float", float64) self.assertCommonRealType(float64, "double", float64) @@ -1127,8 +1139,8 @@ def test_same_sign(self): "unsigned long long", "unsigned long", "unsigned long long" ) - int64 = int_type("int64", 8, True) - qword = int_type("qword", 8, True) + int64 = self.prog.int_type("int64", 8, True) + qword = self.prog.int_type("qword", 8, True) self.assertCommonRealType("long", int64, "long") self.assertCommonRealType(int64, qword, qword, commutative=False) self.assertCommonRealType(qword, int64, int64, commutative=False) @@ -1139,8 +1151,8 @@ def test_unsigned_greater_rank(self): self.assertCommonRealType("unsigned long long", "long", "unsigned long long") self.assertCommonRealType("unsigned long long", "int", "unsigned long long") - int64 = int_type("int64", 8, True) - uint64 = int_type("uint64", 8, False) + int64 = self.prog.int_type("int64", 8, True) + uint64 = self.prog.int_type("uint64", 8, False) self.assertCommonRealType(uint64, "int", uint64) self.assertCommonRealType("unsigned long", int64, "unsigned long") @@ -1148,8 +1160,8 @@ def test_signed_can_represent_unsigned(self): self.assertCommonRealType("long", "unsigned int", "long") self.assertCommonRealType("long long", "unsigned int", "long long") - int64 = int_type("int64", 8, True) - weirduint = int_type("weirduint", 6, False) + int64 = self.prog.int_type("int64", 8, True) + weirduint = self.prog.int_type("weirduint", 6, False) self.assertCommonRealType(int64, "unsigned int", int64) self.assertCommonRealType("long", weirduint, "long") @@ -1158,19 +1170,19 @@ def test_corresponding_unsigned(self): self.assertCommonRealType("long long", "unsigned long", "unsigned long long") def test_enum(self): - self.assertCommonRealType(color_type, color_type, "unsigned int") + self.assertCommonRealType(self.color_type, self.color_type, "unsigned int") def test_typedef(self): - type_ = typedef_type("INT", self.prog.type("int")) + type_ = self.prog.typedef_type("INT", self.prog.type("int")) self.assertCommonRealType(type_, type_, type_) self.assertCommonRealType("int", type_, type_, commutative=False) self.assertCommonRealType(type_, "int", "int", commutative=False) - type_ = typedef_type("LONG", self.prog.type("long")) + type_ = self.prog.typedef_type("LONG", self.prog.type("long")) self.assertCommonRealType(type_, "int", type_) -class TestCOperators(ObjectTestCase): +class TestCOperators(MockProgramTestCase): def test_cast_array(self): obj = Object(self.prog, "int []", address=0xFFFF0000) self.assertEqual( @@ -1189,7 +1201,9 @@ def test_cast_array(self): def test_cast_function(self): func = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertEqual( cast("void *", func), Object(self.prog, "void *", value=0xFFFF0000) @@ -1315,7 +1329,9 @@ def test_ptr_relational(self): self.assertRaises(TypeError, operator.lt, ptr0, self.int(1)) func = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertTrue(func == func) self.assertTrue(func == ptr0) @@ -1331,7 +1347,9 @@ def test_ptr_relational(self): self.assertRaises( TypeError, operator.eq, - Object(self.prog, struct_type("foo", None, None), address=0xFFFF0000), + Object( + self.prog, self.prog.struct_type("foo", None, None), address=0xFFFF0000 + ), ptr0, ) @@ -1519,39 +1537,51 @@ def test_not(self): def test_container_of(self): obj = Object(self.prog, "int *", value=0xFFFF000C) - container_of(obj, point_type, "x") + container_of(obj, self.point_type, "x") self.assertEqual( - container_of(obj, point_type, "x"), - Object(self.prog, pointer_type(8, point_type), value=0xFFFF000C), + container_of(obj, self.point_type, "x"), + Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF000C + ), ) self.assertEqual( - container_of(obj, point_type, "y"), - Object(self.prog, pointer_type(8, point_type), value=0xFFFF0008), + container_of(obj, self.point_type, "y"), + Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF0008 + ), ) self.assertEqual( - container_of(obj, line_segment_type, "a.x"), - Object(self.prog, pointer_type(8, line_segment_type), value=0xFFFF000C), + container_of(obj, self.line_segment_type, "a.x"), + Object( + self.prog, + self.prog.pointer_type(self.line_segment_type), + value=0xFFFF000C, + ), ) self.assertEqual( - container_of(obj, line_segment_type, "b.x"), - Object(self.prog, pointer_type(8, line_segment_type), value=0xFFFF0004), + container_of(obj, self.line_segment_type, "b.x"), + Object( + self.prog, + self.prog.pointer_type(self.line_segment_type), + value=0xFFFF0004, + ), ) - polygon_type = struct_type( - "polygon", 0, (TypeMember(array_type(None, point_type), "points"),) + polygon_type = self.prog.struct_type( + "polygon", 0, (TypeMember(self.prog.array_type(self.point_type), "points"),) ) self.assertEqual( container_of(obj, polygon_type, "points[3].x"), - Object(self.prog, pointer_type(8, polygon_type), value=0xFFFEFFF4), + Object(self.prog, self.prog.pointer_type(polygon_type), value=0xFFFEFFF4), ) - small_point_type = struct_type( + small_point_type = self.prog.struct_type( "small_point", 1, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 4, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 4, 4), ), ) self.assertRaisesRegex( @@ -1568,7 +1598,7 @@ def test_container_of(self): r"container_of\(\) argument must be a pointer", container_of, obj[0], - point_type, + self.point_type, "x", ) @@ -1581,12 +1611,14 @@ def test_container_of(self): "x", ), - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(array_type(8, int_type("int", 4, True)), "arr"), - TypeMember(point_type, "point", 256), + TypeMember( + self.prog.array_type(self.prog.int_type("int", 4, True), 8), "arr" + ), + TypeMember(self.point_type, "point", 256), ), ) syntax_errors = [ @@ -1604,7 +1636,7 @@ def test_container_of(self): ) -class TestCPretty(ObjectTestCase): +class TestCPretty(MockProgramTestCase): def test_int(self): obj = Object(self.prog, "int", value=99) self.assertEqual(str(obj), "(int)99") @@ -1628,7 +1660,7 @@ def test_char(self): self.assertEqual( Object( self.prog, - typedef_type("uint8_t", self.prog.type("unsigned char")), + self.prog.typedef_type("uint8_t", self.prog.type("unsigned char")), value=65, ).format_(char=True), "(uint8_t)65", @@ -1645,13 +1677,17 @@ def test_float(self): self.assertEqual(str(Object(self.prog, "float", value=0.5)), "(float)0.5") def test_typedef(self): - type_ = typedef_type("INT", int_type("int", 4, True)) + type_ = self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) self.assertEqual(str(Object(self.prog, type_, value=99)), "(INT)99") - type_ = typedef_type("INT", int_type("int", 4, True), Qualifiers.CONST) + type_ = self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ) self.assertEqual(str(Object(self.prog, type_, value=99)), "(const INT)99") - type_ = typedef_type("CINT", int_type("int", 4, True, Qualifiers.CONST)) + type_ = self.prog.typedef_type( + "CINT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ) self.assertEqual(str(Object(self.prog, type_, value=99)), "(CINT)99") def test_struct(self): @@ -1661,12 +1697,10 @@ def test_struct(self): + (12345).to_bytes(4, "little", signed=True) + (0).to_bytes(4, "little", signed=True) ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) - obj = Object(prog, "struct point", address=0xFFFF0000) + obj = Object(self.prog, "struct point", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -1700,18 +1734,18 @@ def test_struct(self): "(struct point){ (int)99, (int)-1 }", ) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 16, ( - TypeMember(point_type, "point"), + TypeMember(self.point_type, "point"), TypeMember( - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "bar"), - TypeMember(int_type("int", 4, True), "baz", 32), + TypeMember(self.prog.int_type("int", 4, True), "bar"), + TypeMember(self.prog.int_type("int", 4, True), "baz", 32), ), ), None, @@ -1719,7 +1753,7 @@ def test_struct(self): ), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) expected = """\ (struct foo){ .point = (struct point){ @@ -1732,36 +1766,36 @@ def test_struct(self): self.assertEqual(str(obj), expected) self.assertEqual(str(obj.read_()), expected) - segment = ( - (99).to_bytes(8, "little") - + (-1).to_bytes(8, "little", signed=True) - + (12345).to_bytes(8, "little", signed=True) - + (0).to_bytes(8, "little", signed=True) - ) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] + self.add_memory_segment( + ( + (99).to_bytes(8, "little") + + (-1).to_bytes(8, "little", signed=True) + + (12345).to_bytes(8, "little", signed=True) + + (0).to_bytes(8, "little", signed=True) + ), + virt_addr=0xFFFF8000, ) - type_ = struct_type( + type_ = self.prog.struct_type( "foo", 32, ( TypeMember( - struct_type( + self.prog.struct_type( "long_point", 16, ( - TypeMember(int_type("long", 8, True), "x"), - TypeMember(int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "x"), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), ), ), "point", ), - TypeMember(int_type("long", 8, True), "bar", 128), - TypeMember(int_type("long", 8, True), "baz", 192), + TypeMember(self.prog.int_type("long", 8, True), "bar", 128), + TypeMember(self.prog.int_type("long", 8, True), "baz", 192), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF8000) expected = """\ (struct foo){ .point = (struct long_point){ @@ -1774,10 +1808,10 @@ def test_struct(self): self.assertEqual(str(obj), expected) self.assertEqual(str(obj.read_()), expected) - type_ = struct_type("foo", 0, ()) - self.assertEqual(str(Object(prog, type_, address=0)), "(struct foo){}") + type_ = self.prog.struct_type("foo", 0, ()) + self.assertEqual(str(Object(self.prog, type_, address=0)), "(struct foo){}") - obj = Object(prog, point_type, value={"x": 1}) + obj = Object(self.prog, self.point_type, value={"x": 1}) self.assertEqual( obj.format_(implicit_members=False), """\ @@ -1792,7 +1826,7 @@ def test_struct(self): (int)1, }""", ) - obj = Object(prog, point_type, value={"y": 1}) + obj = Object(self.prog, self.point_type, value={"y": 1}) self.assertEqual( obj.format_(implicit_members=False), """\ @@ -1810,22 +1844,23 @@ def test_struct(self): ) def test_bit_field(self): - segment = b"\x07\x10\x5e\x5f\x1f\0\0\0" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - type_ = struct_type( + self.add_memory_segment(b"\x07\x10\x5e\x5f\x1f\0\0\0", virt_addr=0xFFFF0000) + type_ = self.prog.struct_type( "bits", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True, Qualifiers.CONST), "y", 4, 28), - TypeMember(int_type("int", 4, True), "z", 32, 5), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + "y", + 4, + 28, + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 32, 5), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -1841,13 +1876,10 @@ def test_bit_field(self): self.assertEqual(str(obj.z), "(int)-1") def test_union(self): - segment = b"\0\0\x80?" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[option_type], - ) + self.add_memory_segment(b"\0\0\x80?", virt_addr=0xFFFF0000) + self.types.append(self.option_type) self.assertEqual( - str(Object(prog, "union option", address=0xFFFF0000)), + str(Object(self.prog, "union option", address=0xFFFF0000)), """\ (union option){ .i = (int)1065353216, @@ -1856,71 +1888,62 @@ def test_union(self): ) def test_enum(self): - self.assertEqual(str(Object(self.prog, color_type, value=0)), "(enum color)RED") self.assertEqual( - str(Object(self.prog, color_type, value=1)), "(enum color)GREEN" + str(Object(self.prog, self.color_type, value=0)), "(enum color)RED" + ) + self.assertEqual( + str(Object(self.prog, self.color_type, value=1)), "(enum color)GREEN" + ) + self.assertEqual( + str(Object(self.prog, self.color_type, value=4)), "(enum color)4" ) - self.assertEqual(str(Object(self.prog, color_type, value=4)), "(enum color)4") - obj = Object(self.prog, enum_type("color"), address=0) + obj = Object(self.prog, self.prog.enum_type("color"), address=0) self.assertRaisesRegex(TypeError, "cannot format incomplete enum", str, obj) def test_pointer(self): - prog = mock_program( - segments=[ - MockMemorySegment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000), - ] - ) - obj = Object(prog, "int *", value=0xFFFF0000) + self.add_memory_segment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000) + obj = Object(self.prog, "int *", value=0xFFFF0000) self.assertEqual(str(obj), "*(int *)0xffff0000 = 99") self.assertEqual(obj.format_(dereference=False), "(int *)0xffff0000") self.assertEqual( - str(Object(prog, "int *", value=0x7FFFFFFF)), "(int *)0x7fffffff" + str(Object(self.prog, "int *", value=0x7FFFFFFF)), "(int *)0x7fffffff" ) def test_void_pointer(self): - prog = mock_program( - segments=[ - MockMemorySegment((99).to_bytes(8, "little"), virt_addr=0xFFFF0000), - ] - ) + self.add_memory_segment((99).to_bytes(4, "little"), virt_addr=0xFFFF0000) self.assertEqual( - str(Object(prog, "void *", value=0xFFFF0000)), "(void *)0xffff0000" + str(Object(self.prog, "void *", value=0xFFFF0000)), "(void *)0xffff0000" ) def test_pointer_typedef(self): - prog = mock_program( - segments=[ - MockMemorySegment( - (0xFFFF00F0).to_bytes(8, "little"), virt_addr=0xFFFF0000 - ), - ] + self.add_memory_segment( + (0xFFFF00F0).to_bytes(8, "little"), virt_addr=0xFFFF0000 + ) + type_ = self.prog.typedef_type( + "HANDLE", + self.prog.pointer_type(self.prog.pointer_type(self.prog.void_type())), ) - type_ = typedef_type("HANDLE", pointer_type(8, pointer_type(8, void_type()))) self.assertEqual( - str(Object(prog, type_, value=0xFFFF0000)), + str(Object(self.prog, type_, value=0xFFFF0000)), "*(HANDLE)0xffff0000 = 0xffff00f0", ) # TODO: test symbolize. def test_c_string(self): - prog = mock_program( - segments=[ - MockMemorySegment(b"hello\0", virt_addr=0xFFFF0000), - MockMemorySegment(b"unterminated", virt_addr=0xFFFF0010), - MockMemorySegment(b'"escape\tme\\\0', virt_addr=0xFFFF0020), - ] - ) + self.add_memory_segment(b"hello\0", virt_addr=0xFFFF0000) + self.add_memory_segment(b"unterminated", virt_addr=0xFFFF0010) + self.add_memory_segment(b'"escape\tme\\\0', virt_addr=0xFFFF0020) - obj = Object(prog, "char *", value=0xFFFF0000) + obj = Object(self.prog, "char *", value=0xFFFF0000) self.assertEqual(str(obj), '(char *)0xffff0000 = "hello"') self.assertEqual(obj.format_(string=False), "*(char *)0xffff0000 = 104") - self.assertEqual(str(Object(prog, "char *", value=0x0)), "(char *)0x0") + self.assertEqual(str(Object(self.prog, "char *", value=0x0)), "(char *)0x0") self.assertEqual( - str(Object(prog, "char *", value=0xFFFF0010)), "(char *)0xffff0010" + str(Object(self.prog, "char *", value=0xFFFF0010)), "(char *)0xffff0010" ) self.assertEqual( - str(Object(prog, "char *", value=0xFFFF0020)), + str(Object(self.prog, "char *", value=0xFFFF0020)), r'(char *)0xffff0020 = "\"escape\tme\\"', ) @@ -1928,10 +1951,8 @@ def test_basic_array(self): segment = bytearray() for i in range(5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - obj = Object(prog, "int [5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [5]", address=0xFFFF0000) self.assertEqual(str(obj), "(int [5]){ 0, 1, 2, 3, 4 }") self.assertEqual( @@ -2012,10 +2033,8 @@ def test_nested_array(self): segment = bytearray() for i in range(10): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - obj = Object(prog, "int [2][5]", address=0xFFFF0000) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + obj = Object(self.prog, "int [2][5]", address=0xFFFF0000) self.assertEqual( str(obj), "(int [2][5]){ { 0, 1, 2, 3, 4 }, { 5, 6, 7, 8, 9 } }" @@ -2096,14 +2115,18 @@ def test_array_member(self): segment = bytearray() for i in range(5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) - type_ = struct_type( - None, 20, (TypeMember(array_type(5, int_type("int", 4, True)), "arr"),) + type_ = self.prog.struct_type( + None, + 20, + ( + TypeMember( + self.prog.array_type(self.prog.int_type("int", 4, True), 5), "arr" + ), + ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF0000) self.assertEqual( str(obj), @@ -2142,12 +2165,10 @@ def test_array_of_struct(self): segment = bytearray() for i in range(1, 5): segment.extend(i.to_bytes(4, "little")) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) - obj = Object(prog, "struct point [2]", address=0xFFFF0000) + obj = Object(self.prog, "struct point [2]", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -2169,12 +2190,11 @@ def test_zero_length_array(self): def test_array_zeroes(self): segment = bytearray(16) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[point_type, struct_type("empty", 0, ()),], - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) + self.types.append(self.point_type) + self.types.append(self.prog.struct_type("empty", 0, ())) - obj = Object(prog, "int [2]", address=0xFFFF0000) + obj = Object(self.prog, "int [2]", address=0xFFFF0000) self.assertEqual(str(obj), "(int [2]){}") self.assertEqual(obj.format_(implicit_elements=True), "(int [2]){ 0, 0 }") segment[:4] = (99).to_bytes(4, "little") @@ -2183,7 +2203,7 @@ def test_array_zeroes(self): segment[4:8] = (99).to_bytes(4, "little") self.assertEqual(str(obj), "(int [2]){ 0, 99 }") - obj = Object(prog, "struct point [2]", address=0xFFFF0000) + obj = Object(self.prog, "struct point [2]", address=0xFFFF0000) self.assertEqual( str(obj), """\ @@ -2195,16 +2215,14 @@ def test_array_zeroes(self): }""", ) - obj = Object(prog, "struct empty [2]", address=0) + obj = Object(self.prog, "struct empty [2]", address=0) self.assertEqual(str(obj), "(struct empty [2]){}") def test_char_array(self): segment = bytearray(16) - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) + self.add_memory_segment(segment, virt_addr=0xFFFF0000) - obj = Object(prog, "char [4]", address=0xFFFF0000) + obj = Object(self.prog, "char [4]", address=0xFFFF0000) segment[:16] = b"hello, world\0\0\0\0" self.assertEqual(str(obj), '(char [4])"hell"') self.assertEqual(obj.format_(string=False), "(char [4]){ 104, 101, 108, 108 }") @@ -2214,29 +2232,26 @@ def test_char_array(self): self.assertEqual(str(obj.read_()), str(obj)) self.assertEqual( - str(Object(prog, "char [0]", address=0xFFFF0000)), "(char [0]){}" + str(Object(self.prog, "char [0]", address=0xFFFF0000)), "(char [0]){}" ) self.assertEqual( - str(Object(prog, "char []", address=0xFFFF0000)), "(char []){}" + str(Object(self.prog, "char []", address=0xFFFF0000)), "(char []){}" ) def test_function(self): obj = Object( - self.prog, function_type(void_type(), (), False), address=0xFFFF0000 + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, ) self.assertEqual(str(obj), "(void (void))0xffff0000") -class TestGenericOperators(ObjectTestCase): +class TestGenericOperators(MockProgramTestCase): def setUp(self): super().setUp() - self.prog = mock_program( - segments=[ - MockMemorySegment( - b"".join(i.to_bytes(4, "little") for i in range(4)), - virt_addr=0xFFFF0000, - ), - ] + self.add_memory_segment( + b"".join(i.to_bytes(4, "little") for i in range(4)), virt_addr=0xFFFF0000 ) def test_len(self): @@ -2303,14 +2318,14 @@ def test_cast_primitive_value(self): TypeError, "cannot convert 'int' to 'struct point'", cast, - point_type, + self.point_type, Object(self.prog, "int", value=1), ) def test_cast_compound_value(self): - obj = Object(self.prog, point_type, address=0xFFFF0000).read_() - self.assertEqual(cast(point_type, obj), obj) - const_point_type = point_type.qualified(Qualifiers.CONST) + obj = Object(self.prog, self.point_type, address=0xFFFF0000).read_() + self.assertEqual(cast(self.point_type, obj), obj) + const_point_type = self.point_type.qualified(Qualifiers.CONST) self.assertEqual( cast(const_point_type, obj), Object(self.prog, const_point_type, address=0xFFFF0000).read_(), @@ -2319,7 +2334,7 @@ def test_cast_compound_value(self): TypeError, "cannot convert 'struct point' to 'enum color'", cast, - color_type, + self.color_type, obj, ) @@ -2342,39 +2357,43 @@ def test_reinterpret_reference(self): ) def test_reinterpret_value(self): - segment = (1).to_bytes(4, "little") + (2).to_bytes(4, "little") - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),], - types=[ - point_type, - struct_type( - "foo", 8, (TypeMember(int_type("long", 8, True), "counter"),) - ), - ], + self.types.append(self.point_type) + self.types.append( + self.prog.struct_type( + "foo", 8, (TypeMember(self.prog.int_type("long", 8, True), "counter"),) + ), ) - obj = Object(prog, "struct point", address=0xFFFF0000).read_() + obj = Object(self.prog, "struct point", address=0xFFFF0008).read_() self.assertEqual( reinterpret("struct foo", obj), - Object(prog, "struct foo", address=0xFFFF0000).read_(), + Object(self.prog, "struct foo", address=0xFFFF0008).read_(), ) self.assertEqual( reinterpret(obj.type_, obj, byteorder="big"), - Object(prog, "struct point", address=0xFFFF0000, byteorder="big").read_(), + Object( + self.prog, "struct point", address=0xFFFF0008, byteorder="big" + ).read_(), ) - self.assertEqual(reinterpret("int", obj), Object(prog, "int", value=1)) + self.assertEqual(reinterpret("int", obj), Object(self.prog, "int", value=2)) def test_member(self): - reference = Object(self.prog, point_type, address=0xFFFF0000) + reference = Object(self.prog, self.point_type, address=0xFFFF0000) unnamed_reference = Object( self.prog, - struct_type( + self.prog.struct_type( "point", 8, - (TypeMember(struct_type(None, 8, point_type.members), None),), + ( + TypeMember( + self.prog.struct_type(None, 8, self.point_type.members), None + ), + ), ), address=0xFFFF0000, ) - ptr = Object(self.prog, pointer_type(8, point_type), value=0xFFFF0000) + ptr = Object( + self.prog, self.prog.pointer_type(self.point_type), value=0xFFFF0000 + ) for obj in [reference, unnamed_reference, ptr]: self.assertEqual( obj.member_("x"), Object(self.prog, "int", address=0xFFFF0000) @@ -2403,34 +2422,38 @@ def test_member(self): self.assertRaisesRegex(AttributeError, "no attribute", getattr, obj, "x") def test_bit_field_member(self): - segment = b"\x07\x10\x5e\x5f\x1f\0\0\0" - prog = mock_program( - segments=[MockMemorySegment(segment, virt_addr=0xFFFF0000),] - ) - - type_ = struct_type( + self.add_memory_segment(b"\x07\x10\x5e\x5f\x1f\0\0\0", virt_addr=0xFFFF8000) + type_ = self.prog.struct_type( "bits", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True, Qualifiers.CONST), "y", 4, 28), - TypeMember(int_type("int", 4, True), "z", 32, 5), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + "y", + 4, + 28, + ), + TypeMember(self.prog.int_type("int", 4, True), "z", 32, 5), ), ) - obj = Object(prog, type_, address=0xFFFF0000) + obj = Object(self.prog, type_, address=0xFFFF8000) self.assertEqual( obj.x, Object( - prog, int_type("int", 4, True), address=0xFFFF0000, bit_field_size=4 + self.prog, + self.prog.int_type("int", 4, True), + address=0xFFFF8000, + bit_field_size=4, ), ) self.assertEqual( obj.y, Object( - prog, - int_type("int", 4, True, Qualifiers.CONST), - address=0xFFFF0000, + self.prog, + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST), + address=0xFFFF8000, bit_field_size=28, bit_offset=4, ), @@ -2438,29 +2461,30 @@ def test_bit_field_member(self): self.assertEqual( obj.z, Object( - prog, int_type("int", 4, True), address=0xFFFF0004, bit_field_size=5 + self.prog, + self.prog.int_type("int", 4, True), + address=0xFFFF8004, + bit_field_size=5, ), ) def test_member_out_of_bounds(self): obj = Object( - self.prog, struct_type("foo", 4, point_type.members), address=0xFFFF0000 + self.prog, + self.prog.struct_type("foo", 4, self.point_type.members), + address=0xFFFF0000, ).read_() self.assertRaisesRegex(OutOfBoundsError, "out of bounds", getattr, obj, "y") def test_string(self): - prog = mock_program( - segments=[ - MockMemorySegment( - b"\x00\x00\xff\xff\x00\x00\x00\x00", virt_addr=0xFFFEFFF8 - ), - MockMemorySegment(b"hello\0world\0", virt_addr=0xFFFF0000), - ] + self.add_memory_segment( + b"\x00\x00\xff\xff\x00\x00\x00\x00", virt_addr=0xFFFEFFF8 ) + self.add_memory_segment(b"hello\0world\0", virt_addr=0xFFFF0000) strings = [ - (Object(prog, "char *", address=0xFFFEFFF8), b"hello"), - (Object(prog, "char [2]", address=0xFFFF0000), b"he"), - (Object(prog, "char [8]", address=0xFFFF0000), b"hello"), + (Object(self.prog, "char *", address=0xFFFEFFF8), b"hello"), + (Object(self.prog, "char [2]", address=0xFFFF0000), b"he"), + (Object(self.prog, "char [8]", address=0xFFFF0000), b"hello"), ] for obj, expected in strings: with self.subTest(obj=obj): @@ -2468,10 +2492,10 @@ def test_string(self): self.assertEqual(obj.read_().string_(), expected) strings = [ - Object(prog, "char []", address=0xFFFF0000), - Object(prog, "int []", address=0xFFFF0000), - Object(prog, "int [2]", address=0xFFFF0000), - Object(prog, "int *", value=0xFFFF0000), + Object(self.prog, "char []", address=0xFFFF0000), + Object(self.prog, "int []", address=0xFFFF0000), + Object(self.prog, "int [2]", address=0xFFFF0000), + Object(self.prog, "int *", value=0xFFFF0000), ] for obj in strings: self.assertEqual(obj.string_(), b"hello") @@ -2479,16 +2503,16 @@ def test_string(self): self.assertRaisesRegex( TypeError, "must be an array or pointer", - Object(prog, "int", value=1).string_, + Object(self.prog, "int", value=1).string_, ) -class TestSpecialMethods(ObjectTestCase): +class TestSpecialMethods(MockProgramTestCase): def test_dir(self): obj = Object(self.prog, "int", value=0) self.assertEqual(dir(obj), sorted(object.__dir__(obj))) - obj = Object(self.prog, point_type, address=0xFFFF0000) + obj = Object(self.prog, self.point_type, address=0xFFFF0000) self.assertEqual(dir(obj), sorted(object.__dir__(obj) + ["x", "y"])) self.assertEqual(dir(obj.address_of_()), dir(obj)) diff --git a/tests/test_program.py b/tests/test_program.py index 52c32f4ef..b97c86a33 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -5,7 +5,6 @@ import itertools import os import tempfile -import unittest import unittest.mock from drgn import ( @@ -18,15 +17,8 @@ Program, ProgramFlags, Qualifiers, - array_type, - bool_type, - float_type, - function_type, + TypeKind, host_platform, - int_type, - pointer_type, - typedef_type, - void_type, ) from tests import ( DEFAULT_LANGUAGE, @@ -34,12 +26,9 @@ MOCK_PLATFORM, MockMemorySegment, MockObject, - ObjectTestCase, - color_type, + MockProgramTestCase, + TestCase, mock_program, - option_type, - pid_type, - point_type, ) from tests.elf import ET, PT from tests.elfwriter import ElfSection, create_elf_file @@ -90,15 +79,9 @@ def test_lookup_error(self): "foo", "foo.c", ) + self.assertRaisesRegex(LookupError, "^could not find 'foo'$", prog.type, "foo") self.assertRaisesRegex( - LookupError, "^could not find 'typedef foo'$", prog.type, "foo" - ) - self.assertRaisesRegex( - LookupError, - "^could not find 'typedef foo' in 'foo.c'$", - prog.type, - "foo", - "foo.c", + LookupError, "^could not find 'foo' in 'foo.c'$", prog.type, "foo", "foo.c" ) self.assertRaisesRegex( LookupError, "^could not find variable 'foo'$", prog.variable, "foo" @@ -118,15 +101,6 @@ def test_lookup_error(self): def test_flags(self): self.assertIsInstance(mock_program().flags, ProgramFlags) - def test_pointer_type(self): - prog = mock_program() - self.assertEqual(prog.pointer_type(prog.type("int")), prog.type("int *")) - self.assertEqual(prog.pointer_type("int"), prog.type("int *")) - self.assertEqual( - prog.pointer_type(prog.type("int"), Qualifiers.CONST), - prog.type("int * const"), - ) - def test_debug_info(self): Program().load_debug_info([]) @@ -134,7 +108,7 @@ def test_language(self): self.assertEqual(Program().language, DEFAULT_LANGUAGE) -class TestMemory(unittest.TestCase): +class TestMemory(TestCase): def test_simple_read(self): data = b"hello, world" prog = mock_program(segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)]) @@ -164,7 +138,7 @@ def test_read_unsigned(self): self.assertEqual(prog.read_word(0xA0, True), value) prog = mock_program( - MOCK_32BIT_PLATFORM, segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)], + MOCK_32BIT_PLATFORM, segments=[MockMemorySegment(data, 0xFFFF0000, 0xA0)] ) def test_bad_address(self): @@ -353,24 +327,37 @@ def test_invalid_read_fn(self): ) -class TestTypes(unittest.TestCase): +class TestTypes(MockProgramTestCase): def test_invalid_finder(self): - self.assertRaises(TypeError, mock_program().add_type_finder, "foo") + self.assertRaises(TypeError, self.prog.add_type_finder, "foo") - prog = mock_program() - prog.add_type_finder(lambda kind, name, filename: "foo") - self.assertRaises(TypeError, prog.type, "int") + self.prog.add_type_finder(lambda kind, name, filename: "foo") + self.assertRaises(TypeError, self.prog.type, "int") + + def test_finder_different_program(self): + def finder(kind, name, filename): + if kind == TypeKind.TYPEDEF and name == "foo": + prog = Program() + return prog.typedef_type("foo", prog.void_type()) + else: + return None + + self.prog.add_type_finder(finder) + self.assertRaisesRegex( + ValueError, + "type find callback returned type from wrong program", + self.prog.type, + "foo", + ) def test_wrong_kind(self): - prog = mock_program() - prog.add_type_finder(lambda kind, name, filename: void_type()) - self.assertRaises(TypeError, prog.type, "int") + self.prog.add_type_finder(lambda kind, name, filename: self.prog.void_type()) + self.assertRaises(TypeError, self.prog.type, "int") def test_not_found(self): - prog = mock_program() - self.assertRaises(LookupError, prog.type, "struct foo") - prog.add_type_finder(lambda kind, name, filename: None) - self.assertRaises(LookupError, prog.type, "struct foo") + self.assertRaises(LookupError, self.prog.type, "struct foo") + self.prog.add_type_finder(lambda kind, name, filename: None) + self.assertRaises(LookupError, self.prog.type, "struct foo") def test_default_primitive_types(self): def spellings(tokens, num_optional=0): @@ -382,96 +369,110 @@ def spellings(tokens, num_optional=0): prog = mock_program( MOCK_PLATFORM if word_size == 8 else MOCK_32BIT_PLATFORM ) - self.assertEqual(prog.type("_Bool"), bool_type("_Bool", 1)) - self.assertEqual(prog.type("char"), int_type("char", 1, True)) + self.assertEqual(prog.type("_Bool"), prog.bool_type("_Bool", 1)) + self.assertEqual(prog.type("char"), prog.int_type("char", 1, True)) for spelling in spellings(["signed", "char"]): - self.assertEqual(prog.type(spelling), int_type("signed char", 1, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("signed char", 1, True) + ) for spelling in spellings(["unsigned", "char"]): self.assertEqual( - prog.type(spelling), int_type("unsigned char", 1, False) + prog.type(spelling), prog.int_type("unsigned char", 1, False) ) for spelling in spellings(["short", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("short", 2, True)) + self.assertEqual(prog.type(spelling), prog.int_type("short", 2, True)) for spelling in spellings(["short", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned short", 2, False) + prog.type(spelling), prog.int_type("unsigned short", 2, False) ) for spelling in spellings(["int", "signed"], 1): - self.assertEqual(prog.type(spelling), int_type("int", 4, True)) + self.assertEqual(prog.type(spelling), prog.int_type("int", 4, True)) for spelling in spellings(["unsigned", "int"]): self.assertEqual( - prog.type(spelling), int_type("unsigned int", 4, False) + prog.type(spelling), prog.int_type("unsigned int", 4, False) ) for spelling in spellings(["long", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("long", word_size, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("long", word_size, True) + ) for spelling in spellings(["long", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned long", word_size, False) + prog.type(spelling), + prog.int_type("unsigned long", word_size, False), ) for spelling in spellings(["long", "long", "signed", "int"], 2): - self.assertEqual(prog.type(spelling), int_type("long long", 8, True)) + self.assertEqual( + prog.type(spelling), prog.int_type("long long", 8, True) + ) for spelling in spellings(["long", "long", "unsigned", "int"], 1): self.assertEqual( - prog.type(spelling), int_type("unsigned long long", 8, False) + prog.type(spelling), prog.int_type("unsigned long long", 8, False) ) - self.assertEqual(prog.type("float"), float_type("float", 4)) - self.assertEqual(prog.type("double"), float_type("double", 8)) + self.assertEqual(prog.type("float"), prog.float_type("float", 4)) + self.assertEqual(prog.type("double"), prog.float_type("double", 8)) for spelling in spellings(["long", "double"]): - self.assertEqual(prog.type(spelling), float_type("long double", 16)) + self.assertEqual( + prog.type(spelling), prog.float_type("long double", 16) + ) self.assertEqual( prog.type("size_t"), - typedef_type("size_t", int_type("unsigned long", word_size, False)), + prog.typedef_type( + "size_t", prog.int_type("unsigned long", word_size, False) + ), ) self.assertEqual( prog.type("ptrdiff_t"), - typedef_type("ptrdiff_t", int_type("long", word_size, True)), + prog.typedef_type("ptrdiff_t", prog.int_type("long", word_size, True)), ) def test_primitive_type(self): - prog = mock_program( - types=[int_type("long", 4, True), int_type("unsigned long", 4, True),] - ) - self.assertEqual(prog.type("long"), int_type("long", 4, True)) + self.types.append(self.prog.int_type("long", 4, True)) + self.assertEqual(self.prog.type("long"), self.prog.int_type("long", 4, True)) + + def test_primitive_type_invalid(self): # unsigned long with signed=True isn't valid, so it should be ignored. + self.types.append(self.prog.int_type("unsigned long", 4, True)) self.assertEqual( - prog.type("unsigned long"), int_type("unsigned long", 8, False) + self.prog.type("unsigned long"), + self.prog.int_type("unsigned long", 8, False), ) def test_size_t_and_ptrdiff_t(self): # 64-bit architecture with 4-byte long/unsigned long. - prog = mock_program( - types=[int_type("long", 4, True), int_type("unsigned long", 4, False),] - ) + types = [] + prog = mock_program(types=types) + types.append(prog.int_type("long", 4, True)) + types.append(prog.int_type("unsigned long", 4, False)) self.assertEqual( - prog.type("size_t"), typedef_type("size_t", prog.type("unsigned long long")) + prog.type("size_t"), + prog.typedef_type("size_t", prog.type("unsigned long long")), ) self.assertEqual( - prog.type("ptrdiff_t"), typedef_type("ptrdiff_t", prog.type("long long")) + prog.type("ptrdiff_t"), + prog.typedef_type("ptrdiff_t", prog.type("long long")), ) # 32-bit architecture with 8-byte long/unsigned long. - prog = mock_program( - MOCK_32BIT_PLATFORM, - types=[int_type("long", 8, True), int_type("unsigned long", 8, False),], - ) + types = [] + prog = mock_program(MOCK_32BIT_PLATFORM, types=types) + types.append(prog.int_type("long", 8, True)) + types.append(prog.int_type("unsigned long", 8, False)) self.assertEqual( - prog.type("size_t"), typedef_type("size_t", prog.type("unsigned int")) + prog.type("size_t"), prog.typedef_type("size_t", prog.type("unsigned int")) ) self.assertEqual( - prog.type("ptrdiff_t"), typedef_type("ptrdiff_t", prog.type("int")) + prog.type("ptrdiff_t"), prog.typedef_type("ptrdiff_t", prog.type("int")) ) # Nonsense sizes. - prog = mock_program( - types=[ - int_type("int", 1, True), - int_type("unsigned int", 1, False), - int_type("long", 1, True), - int_type("unsigned long", 1, False), - int_type("long long", 2, True), - int_type("unsigned long long", 2, False), - ] - ) + types = [] + prog = mock_program(types=types) + types.append(prog.int_type("int", 1, True)) + types.append(prog.int_type("unsigned int", 1, False)) + types.append(prog.int_type("long", 1, True)) + types.append(prog.int_type("unsigned long", 1, False)) + types.append(prog.int_type("long long", 2, True)) + types.append(prog.int_type("unsigned long long", 2, False)) self.assertRaisesRegex( ValueError, "no suitable integer type for size_t", prog.type, "size_t" ) @@ -480,159 +481,228 @@ def test_size_t_and_ptrdiff_t(self): ) def test_tagged_type(self): - prog = mock_program(types=[point_type, option_type, color_type]) - self.assertEqual(prog.type("struct point"), point_type) - self.assertEqual(prog.type("union option"), option_type) - self.assertEqual(prog.type("enum color"), color_type) + self.types.append(self.point_type) + self.types.append(self.option_type) + self.types.append(self.color_type) + self.assertEqual(self.prog.type("struct point"), self.point_type) + self.assertEqual(self.prog.type("union option"), self.option_type) + self.assertEqual(self.prog.type("enum color"), self.color_type) def test_typedef(self): - prog = mock_program(types=[pid_type]) - self.assertEqual(prog.type("pid_t"), pid_type) + self.types.append(self.pid_type) + self.assertEqual(self.prog.type("pid_t"), self.pid_type) def test_pointer(self): - prog = mock_program() - self.assertEqual(prog.type("int *"), pointer_type(8, int_type("int", 4, True))) self.assertEqual( - prog.type("const int *"), - pointer_type(8, int_type("int", 4, True, Qualifiers.CONST)), + self.prog.type("int *"), + self.prog.pointer_type(self.prog.int_type("int", 4, True)), ) + + def test_pointer_to_const(self): self.assertEqual( - prog.type("int * const"), - pointer_type(8, int_type("int", 4, True), Qualifiers.CONST), + self.prog.type("const int *"), + self.prog.pointer_type( + self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), ) + + def test_const_pointer(self): self.assertEqual( - prog.type("int **"), - pointer_type(8, pointer_type(8, int_type("int", 4, True))), + self.prog.type("int * const"), + self.prog.pointer_type( + self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ), ) + + def test_pointer_to_pointer(self): self.assertEqual( - prog.type("int *((*))"), - pointer_type(8, pointer_type(8, int_type("int", 4, True))), + self.prog.type("int **"), + self.prog.pointer_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)) + ), ) + self.assertEqual(self.prog.type("int *((*))"), self.prog.type("int **")) + + def test_pointer_to_const_pointer(self): self.assertEqual( - prog.type("int * const *"), - pointer_type( - 8, pointer_type(8, int_type("int", 4, True), Qualifiers.CONST) + self.prog.type("int * const *"), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.int_type("int", 4, True), qualifiers=Qualifiers.CONST + ) ), ) def test_array(self): - prog = mock_program() self.assertEqual( - prog.type("int []"), array_type(None, int_type("int", 4, True)) + self.prog.type("int [20]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 20), ) + + def test_array_hexadecimal(self): self.assertEqual( - prog.type("int [20]"), array_type(20, int_type("int", 4, True)) + self.prog.type("int [0x20]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 32), ) + + def test_array_octal(self): self.assertEqual( - prog.type("int [0x20]"), array_type(32, int_type("int", 4, True)) + self.prog.type("int [020]"), + self.prog.array_type(self.prog.int_type("int", 4, True), 16), ) + + def test_incomplete_array(self): self.assertEqual( - prog.type("int [020]"), array_type(16, int_type("int", 4, True)) + self.prog.type("int []"), + self.prog.array_type(self.prog.int_type("int", 4, True)), ) + + def test_array_two_dimensional(self): self.assertEqual( - prog.type("int [2][3]"), - array_type(2, array_type(3, int_type("int", 4, True))), + self.prog.type("int [2][3]"), + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3), 2 + ), ) + + def test_array_three_dimensional(self): self.assertEqual( - prog.type("int [2][3][4]"), - array_type(2, array_type(3, array_type(4, int_type("int", 4, True)))), + self.prog.type("int [2][3][4]"), + self.prog.array_type( + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 4), 3 + ), + 2, + ), ) def test_array_of_pointers(self): - prog = mock_program() self.assertEqual( - prog.type("int *[2][3]"), - array_type(2, array_type(3, pointer_type(8, int_type("int", 4, True)))), + self.prog.type("int *[2][3]"), + self.prog.array_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 3 + ), + 2, + ), ) def test_pointer_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (*)[2]"), - pointer_type(8, array_type(2, int_type("int", 4, True))), + self.prog.type("int (*)[2]"), + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ), ) + + def test_pointer_to_two_dimensional_array(self): self.assertEqual( - prog.type("int (*)[2][3]"), - pointer_type(8, array_type(2, array_type(3, int_type("int", 4, True)))), + self.prog.type("int (*)[2][3]"), + self.prog.pointer_type( + self.prog.array_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3), 2 + ) + ), ) def test_pointer_to_pointer_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (**)[2]"), - pointer_type(8, pointer_type(8, array_type(2, int_type("int", 4, True)))), + self.prog.type("int (**)[2]"), + self.prog.pointer_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 2) + ) + ), ) def test_pointer_to_array_of_pointers(self): - prog = mock_program() - self.assertEqual( - prog.type("int *(*)[2]"), - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), - ) self.assertEqual( - prog.type("int *((*)[2])"), - pointer_type(8, array_type(2, pointer_type(8, int_type("int", 4, True)))), + self.prog.type("int *(*)[2]"), + self.prog.pointer_type( + self.prog.array_type( + self.prog.pointer_type(self.prog.int_type("int", 4, True)), 2 + ) + ), ) + self.assertEqual(self.prog.type("int *((*)[2])"), self.prog.type("int *(*)[2]")) def test_array_of_pointers_to_array(self): - prog = mock_program() self.assertEqual( - prog.type("int (*[2])[3]"), - array_type(2, pointer_type(8, array_type(3, int_type("int", 4, True)))), + self.prog.type("int (*[2])[3]"), + self.prog.array_type( + self.prog.pointer_type( + self.prog.array_type(self.prog.int_type("int", 4, True), 3) + ), + 2, + ), ) -class TestObjects(ObjectTestCase): +class TestObjects(MockProgramTestCase): def test_invalid_finder(self): - self.assertRaises(TypeError, mock_program().add_object_finder, "foo") + self.assertRaises(TypeError, self.prog.add_object_finder, "foo") - prog = mock_program() - prog.add_object_finder(lambda prog, name, flags, filename: "foo") - self.assertRaises(TypeError, prog.object, "foo") + self.prog.add_object_finder(lambda prog, name, flags, filename: "foo") + self.assertRaises(TypeError, self.prog.object, "foo") def test_not_found(self): - prog = mock_program() - self.assertRaises(LookupError, prog.object, "foo") - prog.add_object_finder(lambda prog, name, flags, filename: None) - self.assertRaises(LookupError, prog.object, "foo") - self.assertFalse("foo" in prog) + self.assertRaises(LookupError, self.prog.object, "foo") + self.prog.add_object_finder(lambda prog, name, flags, filename: None) + self.assertRaises(LookupError, self.prog.object, "foo") + self.assertFalse("foo" in self.prog) def test_constant(self): - mock_obj = MockObject("PAGE_SIZE", int_type("int", 4, True), value=4096) - prog = mock_program(objects=[mock_obj]) + self.objects.append( + MockObject("PAGE_SIZE", self.prog.int_type("int", 4, True), value=4096) + ) self.assertEqual( - prog["PAGE_SIZE"], Object(prog, int_type("int", 4, True), value=4096) + self.prog["PAGE_SIZE"], + Object(self.prog, self.prog.int_type("int", 4, True), value=4096), ) self.assertEqual( - prog.object("PAGE_SIZE", FindObjectFlags.CONSTANT), prog["PAGE_SIZE"] + self.prog.object("PAGE_SIZE", FindObjectFlags.CONSTANT), + self.prog["PAGE_SIZE"], ) - self.assertTrue("PAGE_SIZE" in prog) + self.assertTrue("PAGE_SIZE" in self.prog) def test_function(self): - mock_obj = MockObject( - "func", function_type(void_type(), (), False), address=0xFFFF0000 + self.objects.append( + MockObject( + "func", + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, + ) ) - prog = mock_program(objects=[mock_obj]) self.assertEqual( - prog["func"], - Object(prog, function_type(void_type(), (), False), address=0xFFFF0000), + self.prog["func"], + Object( + self.prog, + self.prog.function_type(self.prog.void_type(), (), False), + address=0xFFFF0000, + ), ) - self.assertEqual(prog.object("func", FindObjectFlags.FUNCTION), prog["func"]) - self.assertTrue("func" in prog) + self.assertEqual( + self.prog.object("func", FindObjectFlags.FUNCTION), self.prog["func"] + ) + self.assertTrue("func" in self.prog) def test_variable(self): - mock_obj = MockObject("counter", int_type("int", 4, True), address=0xFFFF0000) - prog = mock_program(objects=[mock_obj]) + self.objects.append( + MockObject( + "counter", self.prog.int_type("int", 4, True), address=0xFFFF0000 + ) + ) self.assertEqual( - prog["counter"], Object(prog, int_type("int", 4, True), address=0xFFFF0000) + self.prog["counter"], + Object(self.prog, self.prog.int_type("int", 4, True), address=0xFFFF0000), ) self.assertEqual( - prog.object("counter", FindObjectFlags.VARIABLE), prog["counter"] + self.prog.object("counter", FindObjectFlags.VARIABLE), self.prog["counter"] ) - self.assertTrue("counter" in prog) + self.assertTrue("counter" in self.prog) -class TestCoreDump(unittest.TestCase): +class TestCoreDump(TestCase): def test_not_core_dump(self): prog = Program() self.assertRaisesRegex( @@ -664,7 +734,7 @@ def test_simple(self): with tempfile.NamedTemporaryFile() as f: f.write( create_elf_file( - ET.CORE, [ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=data,),] + ET.CORE, [ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=data)] ) ) f.flush() @@ -681,7 +751,7 @@ def test_physical(self): ET.CORE, [ ElfSection( - p_type=PT.LOAD, vaddr=0xFFFF0000, paddr=0xA0, data=data, + p_type=PT.LOAD, vaddr=0xFFFF0000, paddr=0xA0, data=data ), ], ) diff --git a/tests/test_python.py b/tests/test_python.py index 44efd6af1..183495e6a 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,9 +1,10 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ +import unittest + import _drgn import drgn -import unittest class TestModule(unittest.TestCase): diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 04ae95023..eb52e05c9 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -5,7 +5,6 @@ from tests.libdrgn import deserialize_bits, serialize_bits - VALUE = 12345678912345678989 diff --git a/tests/test_type.py b/tests/test_type.py index cdd81b666..735a395b7 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1,47 +1,34 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ -import unittest +import operator from drgn import ( Language, PrimitiveType, + Program, Qualifiers, TypeEnumerator, TypeKind, TypeMember, TypeParameter, - array_type, - bool_type, - class_type, - complex_type, - enum_type, - float_type, - function_type, - int_type, - pointer_type, sizeof, - struct_type, - typedef_type, - union_type, - void_type, ) +from tests import DEFAULT_LANGUAGE, MockProgramTestCase -from tests import DEFAULT_LANGUAGE - -class TestType(unittest.TestCase): +class TestType(MockProgramTestCase): def test_void(self): - t = void_type() + t = self.prog.void_type() self.assertEqual(t.kind, TypeKind.VOID) self.assertEqual(t.primitive, PrimitiveType.C_VOID) self.assertEqual(t.language, DEFAULT_LANGUAGE) - self.assertEqual(t, void_type()) + self.assertEqual(t, self.prog.void_type()) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "void_type()") + self.assertEqual(repr(t), "prog.void_type()") def test_int(self): - t = int_type("int", 4, True) + t = self.prog.int_type("int", 4, True) self.assertEqual(t.kind, TypeKind.INT) self.assertEqual(t.primitive, PrimitiveType.C_INT) self.assertEqual(t.language, DEFAULT_LANGUAGE) @@ -50,21 +37,21 @@ def test_int(self): self.assertTrue(t.is_signed) self.assertTrue(t.is_complete()) - self.assertEqual(t, int_type("int", 4, True)) - self.assertNotEqual(t, int_type("long", 4, True)) - self.assertNotEqual(t, int_type("int", 2, True)) - self.assertNotEqual(t, int_type("int", 4, False)) + self.assertEqual(t, self.prog.int_type("int", 4, True)) + self.assertNotEqual(t, self.prog.int_type("long", 4, True)) + self.assertNotEqual(t, self.prog.int_type("int", 2, True)) + self.assertNotEqual(t, self.prog.int_type("int", 4, False)) - self.assertEqual(repr(t), "int_type(name='int', size=4, is_signed=True)") + self.assertEqual(repr(t), "prog.int_type(name='int', size=4, is_signed=True)") self.assertEqual(sizeof(t), 4) - self.assertRaises(TypeError, int_type, None, 4, True) + self.assertRaises(TypeError, self.prog.int_type, None, 4, True) - self.assertIsNone(int_type("my_int", 4, True).primitive) - self.assertIsNone(int_type("int", 4, False).primitive) + self.assertIsNone(self.prog.int_type("my_int", 4, True).primitive) + self.assertIsNone(self.prog.int_type("int", 4, False).primitive) def test_bool(self): - t = bool_type("_Bool", 1) + t = self.prog.bool_type("_Bool", 1) self.assertEqual(t.kind, TypeKind.BOOL) self.assertEqual(t.primitive, PrimitiveType.C_BOOL) self.assertEqual(t.language, DEFAULT_LANGUAGE) @@ -72,87 +59,109 @@ def test_bool(self): self.assertEqual(t.size, 1) self.assertTrue(t.is_complete()) - self.assertEqual(t, bool_type("_Bool", 1)) - self.assertNotEqual(t, bool_type("bool", 1)) - self.assertNotEqual(t, bool_type("_Bool", 2)) + self.assertEqual(t, self.prog.bool_type("_Bool", 1)) + self.assertNotEqual(t, self.prog.bool_type("bool", 1)) + self.assertNotEqual(t, self.prog.bool_type("_Bool", 2)) - self.assertEqual(repr(t), "bool_type(name='_Bool', size=1)") + self.assertEqual(repr(t), "prog.bool_type(name='_Bool', size=1)") self.assertEqual(sizeof(t), 1) - self.assertRaises(TypeError, bool_type, None, 1) + self.assertRaises(TypeError, self.prog.bool_type, None, 1) def test_float(self): - t = float_type("float", 4) + t = self.prog.float_type("float", 4) self.assertEqual(t.primitive, PrimitiveType.C_FLOAT) self.assertEqual(t.kind, TypeKind.FLOAT) self.assertEqual(t.name, "float") self.assertEqual(t.size, 4) self.assertTrue(t.is_complete()) - self.assertEqual(t, float_type("float", 4)) - self.assertNotEqual(t, float_type("double", 4)) - self.assertNotEqual(t, float_type("float", 8)) + self.assertEqual(t, self.prog.float_type("float", 4)) + self.assertNotEqual(t, self.prog.float_type("double", 4)) + self.assertNotEqual(t, self.prog.float_type("float", 8)) - self.assertEqual(repr(t), "float_type(name='float', size=4)") + self.assertEqual(repr(t), "prog.float_type(name='float', size=4)") self.assertEqual(sizeof(t), 4) - self.assertRaises(TypeError, float_type, None, 4) + self.assertRaises(TypeError, self.prog.float_type, None, 4) def test_complex(self): - t = complex_type("double _Complex", 16, float_type("double", 8)) + t = self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("double", 8) + ) self.assertEqual(t.kind, TypeKind.COMPLEX) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.name, "double _Complex") self.assertEqual(t.size, 16) - self.assertEqual(t.type, float_type("double", 8)) + self.assertEqual(t.type, self.prog.float_type("double", 8)) self.assertTrue(t.is_complete()) self.assertEqual( - t, complex_type("double _Complex", 16, float_type("double", 8)) + t, + self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("float _Complex", 16, float_type("double", 8)) + t, + self.prog.complex_type( + "float _Complex", 16, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("double _Complex", 32, float_type("double", 8)) + t, + self.prog.complex_type( + "double _Complex", 32, self.prog.float_type("double", 8) + ), ) self.assertNotEqual( - t, complex_type("double _Complex", 16, float_type("float", 4)) + t, + self.prog.complex_type( + "double _Complex", 16, self.prog.float_type("float", 4) + ), ) self.assertEqual( repr(t), - "complex_type(name='double _Complex', size=16, type=float_type(name='double', size=8))", + "prog.complex_type(name='double _Complex', size=16, type=prog.float_type(name='double', size=8))", ) self.assertEqual(sizeof(t), 16) - self.assertRaises(TypeError, complex_type, None, 16, float_type("double", 8)) - self.assertRaises(TypeError, complex_type, "double _Complex", 16, None) + self.assertRaises( + TypeError, + self.prog.complex_type, + None, + 16, + self.prog.float_type("double", 8), + ) + self.assertRaises( + TypeError, self.prog.complex_type, "double _Complex", 16, None + ) self.assertRaisesRegex( ValueError, "must be floating-point or integer type", - complex_type, + self.prog.complex_type, "double _Complex", 16, - void_type(), + self.prog.void_type(), ) self.assertRaisesRegex( ValueError, "must be unqualified", - complex_type, + self.prog.complex_type, "double _Complex", 16, - float_type("double", 8, Qualifiers.CONST), + self.prog.float_type("double", 8, qualifiers=Qualifiers.CONST), ) def test_struct(self): - t = struct_type( + t = self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ) self.assertEqual(t.kind, TypeKind.STRUCT) @@ -163,111 +172,111 @@ def test_struct(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different tag. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "pt", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different size. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 16, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # One is anonymous. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # Different members. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("long", 8, True), "x", 0), - TypeMember(int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "x", 0), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), ), ), ) # Different number of members. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # One member is anonymous. self.assertNotEqual( t, - struct_type( + self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), None, 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), None, 32), ), ), ) # One is incomplete. - self.assertNotEqual(t, struct_type("point")) + self.assertNotEqual(t, self.prog.struct_type("point")) self.assertEqual( repr(t), - "struct_type(tag='point', size=8, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32)))", + "prog.struct_type(tag='point', size=8, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32)))", ) self.assertEqual(sizeof(t), 8) - t = struct_type( + t = self.prog.struct_type( None, 8, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ) self.assertEqual(t.kind, TypeKind.STRUCT) @@ -277,77 +286,79 @@ def test_struct(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), ), ) self.assertTrue(t.is_complete()) - t = struct_type("color", 0, ()) + t = self.prog.struct_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.struct_type(tag='color', size=0, members=())") - t = struct_type("color") + t = self.prog.struct_type("color") self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.struct_type(tag='color', size=None, members=None)" + ) - t = struct_type(None, None, None) + t = self.prog.struct_type(None, None, None) self.assertEqual(t.kind, TypeKind.STRUCT) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "struct_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.struct_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, struct_type, 4) + self.assertRaises(TypeError, self.prog.struct_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", struct_type, "point", 8, None + ValueError, "must not have size", self.prog.struct_type, "point", 8, None ) self.assertRaisesRegex( - ValueError, "must have size", struct_type, "point", None, () + ValueError, "must have size", self.prog.struct_type, "point", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", struct_type, "point", 8, 4 + TypeError, "must be sequence or None", self.prog.struct_type, "point", 8, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", struct_type, "point", 8, (4,) + TypeError, "must be TypeMember", self.prog.struct_type, "point", 8, (4,) ) # Bit size. - t = struct_type( + t = self.prog.struct_type( "point", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), ), ) def test_union(self): - t = union_type( + t = self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ) self.assertEqual(t.kind, TypeKind.UNION) @@ -358,111 +369,111 @@ def test_union(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different tag. self.assertNotEqual( t, - union_type( + self.prog.union_type( "pt", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different size. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 8, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # One is anonymous. self.assertNotEqual( t, - union_type( + self.prog.union_type( None, 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ), ) # Different members. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("long", 8, True), "x"), - TypeMember(int_type("unsigned long", 8, False), "y"), + TypeMember(self.prog.int_type("long", 8, True), "x"), + TypeMember(self.prog.int_type("unsigned long", 8, False), "y"), ), ), ) # Different number of members. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), - TypeMember(float_type("float", 4), "z"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.float_type("float", 4), "z"), ), ), ) # One member is anonymous. self.assertNotEqual( t, - union_type( + self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False),), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False)), ), ), ) # One is incomplete. - self.assertNotEqual(t, union_type("option")) + self.assertNotEqual(t, self.prog.union_type("option")) self.assertEqual( repr(t), - "union_type(tag='option', size=4, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='unsigned int', size=4, is_signed=False), name='y', bit_offset=0)))", + "prog.union_type(tag='option', size=4, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='unsigned int', size=4, is_signed=False), name='y', bit_offset=0)))", ) self.assertEqual(sizeof(t), 4) - t = union_type( + t = self.prog.union_type( None, 4, ( - TypeMember(int_type("int", 4, True), "x"), - TypeMember(int_type("unsigned int", 4, False), "y"), + TypeMember(self.prog.int_type("int", 4, True), "x"), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y"), ), ) self.assertEqual(t.kind, TypeKind.UNION) @@ -472,78 +483,80 @@ def test_union(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 0), ), ) self.assertTrue(t.is_complete()) - t = union_type("color", 0, ()) + t = self.prog.union_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.union_type(tag='color', size=0, members=())") - t = union_type("color") + t = self.prog.union_type("color") self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.union_type(tag='color', size=None, members=None)" + ) - t = union_type(None, None, None) + t = self.prog.union_type(None, None, None) self.assertEqual(t.kind, TypeKind.UNION) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "union_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.union_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, union_type, 4) + self.assertRaises(TypeError, self.prog.union_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", union_type, "option", 8, None + ValueError, "must not have size", self.prog.union_type, "option", 8, None ) self.assertRaisesRegex( - ValueError, "must have size", union_type, "option", None, () + ValueError, "must have size", self.prog.union_type, "option", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", union_type, "option", 8, 4 + TypeError, "must be sequence or None", self.prog.union_type, "option", 8, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", union_type, "option", 8, (4,) + TypeError, "must be TypeMember", self.prog.union_type, "option", 8, (4,) ) # Bit size. - t = union_type( + t = self.prog.union_type( "option", 4, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("unsigned int", 4, False), "y", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("unsigned int", 4, False), "y", 0, 4), ), ) def test_class(self): - t = class_type( + t = self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ) self.assertEqual(t.kind, TypeKind.CLASS) @@ -554,118 +567,118 @@ def test_class(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ) self.assertTrue(t.is_complete()) self.assertEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different tag. self.assertNotEqual( t, - class_type( + self.prog.class_type( "crd", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different size. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 16, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # One is anonymous. self.assertNotEqual( t, - class_type( + self.prog.class_type( None, 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ), ) # Different members. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("long", 8, True), "x", 0), - TypeMember(int_type("long", 8, True), "y", 64), - TypeMember(int_type("long", 8, True), "z", 128), + TypeMember(self.prog.int_type("long", 8, True), "x", 0), + TypeMember(self.prog.int_type("long", 8, True), "y", 64), + TypeMember(self.prog.int_type("long", 8, True), "z", 128), ), ), ) # Different number of members. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), ), ), ) # One member is anonymous. self.assertNotEqual( t, - class_type( + self.prog.class_type( "coord", 8, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), None, 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), None, 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ), ) # One is incomplete. - self.assertNotEqual(t, class_type("coord")) + self.assertNotEqual(t, self.prog.class_type("coord")) self.assertEqual( repr(t), - "class_type(tag='coord', size=12, members=(TypeMember(type=int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32), TypeMember(type=int_type(name='int', size=4, is_signed=True), name='z', bit_offset=64)))", + "prog.class_type(tag='coord', size=12, members=(TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='x', bit_offset=0), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='y', bit_offset=32), TypeMember(type=prog.int_type(name='int', size=4, is_signed=True), name='z', bit_offset=64)))", ) self.assertEqual(sizeof(t), 12) - t = class_type( + t = self.prog.class_type( None, 12, ( - TypeMember(int_type("int", 4, True), "x", 0), - TypeMember(int_type("int", 4, True), "y", 32), - TypeMember(int_type("int", 4, True), "z", 64), + TypeMember(self.prog.int_type("int", 4, True), "x", 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32), + TypeMember(self.prog.int_type("int", 4, True), "z", 64), ), ) self.assertEqual(t.kind, TypeKind.CLASS) @@ -675,77 +688,79 @@ def test_class(self): self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 0), - TypeMember(int_type("int", 4, True), "y", 32, 0), - TypeMember(int_type("int", 4, True), "z", 64, 0), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 0), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 0), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 0), ), ) self.assertTrue(t.is_complete()) - t = class_type("color", 0, ()) + t = self.prog.class_type("color", 0, ()) self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertEqual(t.size, 0) self.assertEqual(t.members, ()) self.assertTrue(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag='color', size=0, members=())") + self.assertEqual(repr(t), "prog.class_type(tag='color', size=0, members=())") - t = class_type("color") + t = self.prog.class_type("color") self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag='color', size=None, members=None)") + self.assertEqual( + repr(t), "prog.class_type(tag='color', size=None, members=None)" + ) - t = class_type(None, None, None) + t = self.prog.class_type(None, None, None) self.assertEqual(t.kind, TypeKind.CLASS) self.assertIsNone(t.primitive) self.assertEqual(t.tag, None) self.assertIsNone(t.size) self.assertIsNone(t.members) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "class_type(tag=None, size=None, members=None)") + self.assertEqual(repr(t), "prog.class_type(tag=None, size=None, members=None)") - self.assertRaises(TypeError, class_type, 4) + self.assertRaises(TypeError, self.prog.class_type, 4) self.assertRaisesRegex( - ValueError, "must not have size", class_type, "coord", 12, None + ValueError, "must not have size", self.prog.class_type, "coord", 12, None ) self.assertRaisesRegex( - ValueError, "must have size", class_type, "coord", None, () + ValueError, "must have size", self.prog.class_type, "coord", None, () ) self.assertRaisesRegex( - TypeError, "must be sequence or None", class_type, "coord", 12, 4 + TypeError, "must be sequence or None", self.prog.class_type, "coord", 12, 4 ) self.assertRaisesRegex( - TypeError, "must be TypeMember", class_type, "coord", 12, (4,) + TypeError, "must be TypeMember", self.prog.class_type, "coord", 12, (4,) ) # Bit size. - t = class_type( + t = self.prog.class_type( "coord", 12, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), - TypeMember(int_type("int", 4, True), "z", 64, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 4), ), ) self.assertEqual( t.members, ( - TypeMember(int_type("int", 4, True), "x", 0, 4), - TypeMember(int_type("int", 4, True), "y", 32, 4), - TypeMember(int_type("int", 4, True), "z", 64, 4), + TypeMember(self.prog.int_type("int", 4, True), "x", 0, 4), + TypeMember(self.prog.int_type("int", 4, True), "y", 32, 4), + TypeMember(self.prog.int_type("int", 4, True), "z", 64, 4), ), ) def test_enum(self): - t = enum_type( + t = self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -756,7 +771,7 @@ def test_enum(self): self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.tag, "color") - self.assertEqual(t.type, int_type("unsigned int", 4, False)) + self.assertEqual(t.type, self.prog.int_type("unsigned int", 4, False)) self.assertEqual( t.enumerators, ( @@ -769,9 +784,9 @@ def test_enum(self): self.assertEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -782,9 +797,9 @@ def test_enum(self): # Different tag. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "COLOR", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -795,9 +810,9 @@ def test_enum(self): # One is anonymous. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( None, - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -808,9 +823,9 @@ def test_enum(self): # Different compatible type. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("int", 4, True), + self.prog.int_type("int", 4, True), ( TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1), @@ -821,9 +836,9 @@ def test_enum(self): # Different enumerators. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), ( TypeEnumerator("RED", 0), TypeEnumerator("YELLOW", 1), @@ -834,22 +849,22 @@ def test_enum(self): # Different number of enumerators. self.assertNotEqual( t, - enum_type( + self.prog.enum_type( "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), (TypeEnumerator("RED", 0), TypeEnumerator("GREEN", 1)), ), ) # One is incomplete. - self.assertNotEqual(t, enum_type("color")) + self.assertNotEqual(t, self.prog.enum_type("color")) self.assertEqual( repr(t), - "enum_type(tag='color', type=int_type(name='unsigned int', size=4, is_signed=False), enumerators=(TypeEnumerator('RED', 0), TypeEnumerator('GREEN', 1), TypeEnumerator('BLUE', 2)))", + "prog.enum_type(tag='color', type=prog.int_type(name='unsigned int', size=4, is_signed=False), enumerators=(TypeEnumerator('RED', 0), TypeEnumerator('GREEN', 1), TypeEnumerator('BLUE', 2)))", ) self.assertEqual(sizeof(t), 4) - t = enum_type("color", None, None) + t = self.prog.enum_type("color", None, None) self.assertEqual(t.kind, TypeKind.ENUM) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") @@ -857,245 +872,335 @@ def test_enum(self): self.assertIsNone(t.enumerators) self.assertFalse(t.is_complete()) - self.assertEqual(repr(t), "enum_type(tag='color', type=None, enumerators=None)") + self.assertEqual( + repr(t), "prog.enum_type(tag='color', type=None, enumerators=None)" + ) # A type with no enumerators isn't valid in C, but we allow it. - t = enum_type("color", int_type("unsigned int", 4, False), ()) + t = self.prog.enum_type( + "color", self.prog.int_type("unsigned int", 4, False), () + ) self.assertEqual(t.kind, TypeKind.ENUM) self.assertIsNone(t.primitive) self.assertEqual(t.tag, "color") - self.assertEqual(t.type, int_type("unsigned int", 4, False)) + self.assertEqual(t.type, self.prog.int_type("unsigned int", 4, False)) self.assertEqual(t.enumerators, ()) self.assertTrue(t.is_complete()) self.assertEqual( repr(t), - "enum_type(tag='color', type=int_type(name='unsigned int', size=4, is_signed=False), enumerators=())", + "prog.enum_type(tag='color', type=prog.int_type(name='unsigned int', size=4, is_signed=False), enumerators=())", ) - self.assertRaisesRegex(TypeError, "must be Type", enum_type, "color", 4, ()) self.assertRaisesRegex( - ValueError, "must be integer type", enum_type, "color", void_type(), () + TypeError, "must be Type", self.prog.enum_type, "color", 4, () + ) + self.assertRaisesRegex( + ValueError, + "must be integer type", + self.prog.enum_type, + "color", + self.prog.void_type(), + (), ) self.assertRaisesRegex( ValueError, "must be unqualified", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, True, Qualifiers.CONST), + self.prog.int_type("unsigned int", 4, True, qualifiers=Qualifiers.CONST), (), ) self.assertRaisesRegex( ValueError, "must not have compatible type", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), None, ) self.assertRaisesRegex( - ValueError, "must have compatible type", enum_type, "color", None, () + ValueError, + "must have compatible type", + self.prog.enum_type, + "color", + None, + (), ) self.assertRaisesRegex( TypeError, "must be sequence or None", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), 4, ) self.assertRaisesRegex( TypeError, "must be TypeEnumerator", - enum_type, + self.prog.enum_type, "color", - int_type("unsigned int", 4, False), + self.prog.int_type("unsigned int", 4, False), (4,), ) def test_typedef(self): - t = typedef_type("INT", int_type("int", 4, True)) + t = self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) self.assertEqual(t.kind, TypeKind.TYPEDEF) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.name, "INT") - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, typedef_type("INT", int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, typedef_type("INT", int_type("int", 4, True))) + self.assertEqual( + t, self.prog.typedef_type("INT", self.prog.int_type("int", 4, True)) + ) # Different name. - self.assertNotEqual(t, typedef_type("integer", int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.typedef_type("integer", self.prog.int_type("int", 4, True)) + ) # Different type. self.assertNotEqual( - t, typedef_type("integer", int_type("unsigned int", 4, False)) + t, + self.prog.typedef_type( + "integer", self.prog.int_type("unsigned int", 4, False) + ), ) self.assertNotEqual( - t, typedef_type("INT", int_type("int", 4, True, Qualifiers.CONST)) + t, + self.prog.typedef_type( + "INT", self.prog.int_type("int", 4, True, qualifiers=Qualifiers.CONST) + ), ) self.assertEqual( repr(t), - "typedef_type(name='INT', type=int_type(name='int', size=4, is_signed=True))", + "prog.typedef_type(name='INT', type=prog.int_type(name='int', size=4, is_signed=True))", ) self.assertEqual(sizeof(t), 4) - t = typedef_type("VOID", void_type()) + t = self.prog.typedef_type("VOID", self.prog.void_type()) self.assertFalse(t.is_complete()) - self.assertRaises(TypeError, typedef_type, None, int_type("int", 4, True)) - self.assertRaises(TypeError, typedef_type, "INT", 4) + self.assertRaises( + TypeError, self.prog.typedef_type, None, self.prog.int_type("int", 4, True) + ) + self.assertRaises(TypeError, self.prog.typedef_type, "INT", 4) self.assertEqual( - typedef_type("size_t", int_type("unsigned long", 8, False)).primitive, + self.prog.typedef_type( + "size_t", self.prog.int_type("unsigned long", 8, False) + ).primitive, PrimitiveType.C_SIZE_T, ) self.assertEqual( - typedef_type("ptrdiff_t", int_type("long", 8, True)).primitive, + self.prog.typedef_type( + "ptrdiff_t", self.prog.int_type("long", 8, True) + ).primitive, PrimitiveType.C_PTRDIFF_T, ) def test_pointer(self): - t = pointer_type(8, int_type("int", 4, True)) + t = self.prog.pointer_type(self.prog.int_type("int", 4, True), 8) self.assertEqual(t.kind, TypeKind.POINTER) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.size, 8) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, pointer_type(8, int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, pointer_type(8, int_type("int", 4, True))) + self.assertEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), 8) + ) + # Default size. + self.assertEqual(t, self.prog.pointer_type(self.prog.int_type("int", 4, True))) + self.assertEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), None) + ) # Different size. - self.assertNotEqual(t, pointer_type(4, int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.pointer_type(self.prog.int_type("int", 4, True), 4) + ) # Different type. - self.assertNotEqual(t, pointer_type(8, void_type())) - self.assertNotEqual(t, pointer_type(8, void_type(Qualifiers.CONST))) + self.assertNotEqual(t, self.prog.pointer_type(self.prog.void_type(), 8)) + self.assertNotEqual( + t, + self.prog.pointer_type(self.prog.void_type(qualifiers=Qualifiers.CONST), 8), + ) self.assertEqual( repr(t), - "pointer_type(size=8, type=int_type(name='int', size=4, is_signed=True))", + "prog.pointer_type(type=prog.int_type(name='int', size=4, is_signed=True))", + ) + self.assertEqual( + repr(self.prog.pointer_type(self.prog.int_type("int", 4, True), 4)), + "prog.pointer_type(type=prog.int_type(name='int', size=4, is_signed=True), size=4)", ) + self.assertEqual(sizeof(t), 8) - self.assertRaises(TypeError, pointer_type, None, int_type("int", 4, True)) - self.assertRaises(TypeError, pointer_type, 8, 4) + self.assertRaises(TypeError, self.prog.pointer_type, 4) def test_array(self): - t = array_type(10, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True), 10) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) self.assertEqual(t.length, 10) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - self.assertEqual(t, array_type(10, int_type("int", 4, True))) - # Qualified type argument. - self.assertEqual(t, array_type(10, int_type("int", 4, True))) + self.assertEqual( + t, self.prog.array_type(self.prog.int_type("int", 4, True), 10) + ) # Different length. - self.assertNotEqual(t, array_type(4, int_type("int", 4, True))) + self.assertNotEqual( + t, self.prog.array_type(self.prog.int_type("int", 4, True), 4) + ) # Different type. - self.assertNotEqual(t, array_type(10, void_type())) - self.assertNotEqual(t, array_type(10, void_type(Qualifiers.CONST))) + self.assertNotEqual(t, self.prog.array_type(self.prog.void_type(), 10)) + self.assertNotEqual( + t, + self.prog.array_type(self.prog.void_type(qualifiers=Qualifiers.CONST), 10), + ) self.assertEqual( repr(t), - "array_type(length=10, type=int_type(name='int', size=4, is_signed=True))", + "prog.array_type(type=prog.int_type(name='int', size=4, is_signed=True), length=10)", ) self.assertEqual(sizeof(t), 40) - t = array_type(0, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True), 0) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertEqual(t.length, 0) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertTrue(t.is_complete()) - t = array_type(None, int_type("int", 4, True)) + t = self.prog.array_type(self.prog.int_type("int", 4, True)) self.assertEqual(t.kind, TypeKind.ARRAY) self.assertIsNone(t.primitive) self.assertIsNone(t.length) - self.assertEqual(t.type, int_type("int", 4, True)) + self.assertEqual(t.type, self.prog.int_type("int", 4, True)) self.assertFalse(t.is_complete()) - self.assertRaises(TypeError, array_type, 10, 4) + self.assertRaises(TypeError, self.prog.array_type, 10, 4) def test_function(self): - t = function_type(void_type(), (TypeParameter(int_type("int", 4, True), "n"),)) + t = self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + ) self.assertEqual(t.kind, TypeKind.FUNCTION) self.assertIsNone(t.primitive) self.assertEqual(t.language, DEFAULT_LANGUAGE) - self.assertEqual(t.type, void_type()) - self.assertEqual(t.parameters, (TypeParameter(int_type("int", 4, True), "n"),)) + self.assertEqual(t.type, self.prog.void_type()) + self.assertEqual( + t.parameters, (TypeParameter(self.prog.int_type("int", 4, True), "n"),) + ) self.assertFalse(t.is_variadic) self.assertTrue(t.is_complete()) self.assertEqual( t, - function_type(void_type(), (TypeParameter(int_type("int", 4, True), "n"),)), + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + ), ) # Different return type. self.assertNotEqual( t, - function_type( - int_type("int", 4, True), - (TypeParameter(int_type("int", 4, True), "n"),), + self.prog.function_type( + self.prog.int_type("int", 4, True), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), ), ) # Different parameter name. self.assertNotEqual( t, - function_type(void_type(), (TypeParameter(int_type("int", 4, True), "x"),)), + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "x"),), + ), ) # Unnamed parameter. self.assertNotEqual( - t, function_type(void_type(), (TypeParameter(int_type("int", 4, True),),)) + t, + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True)),), + ), ) # Different number of parameters. self.assertNotEqual( t, - function_type( - void_type(), + self.prog.function_type( + self.prog.void_type(), ( - TypeParameter(int_type("int", 4, True), "n"), - TypeParameter(pointer_type(8, void_type()), "p"), + TypeParameter(self.prog.int_type("int", 4, True), "n"), + TypeParameter( + self.prog.pointer_type(self.prog.void_type(), 8), "p" + ), ), ), ) # One is variadic. self.assertNotEqual( t, - function_type( - void_type(), (TypeParameter(int_type("int", 4, True), "n"),), True + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(self.prog.int_type("int", 4, True), "n"),), + True, ), ) self.assertEqual( repr(t), - "function_type(type=void_type(), parameters=(TypeParameter(type=int_type(name='int', size=4, is_signed=True), name='n'),), is_variadic=False)", + "prog.function_type(type=prog.void_type(), parameters=(TypeParameter(type=prog.int_type(name='int', size=4, is_signed=True), name='n'),), is_variadic=False)", ) self.assertRaises(TypeError, sizeof, t) - self.assertFalse(function_type(void_type(), (), False).is_variadic) - self.assertTrue(function_type(void_type(), (), True).is_variadic) + self.assertFalse( + self.prog.function_type(self.prog.void_type(), (), False).is_variadic + ) + self.assertTrue( + self.prog.function_type(self.prog.void_type(), (), True).is_variadic + ) - self.assertRaisesRegex(TypeError, "must be Type", function_type, None, ()) self.assertRaisesRegex( - TypeError, "must be sequence", function_type, void_type(), None + TypeError, "must be _drgn\.Type", self.prog.function_type, None, () ) self.assertRaisesRegex( - TypeError, "must be TypeParameter", function_type, void_type(), (4,) + TypeError, + "must be sequence", + self.prog.function_type, + self.prog.void_type(), + None, + ) + self.assertRaisesRegex( + TypeError, + "must be TypeParameter", + self.prog.function_type, + self.prog.void_type(), + (4,), ) def test_cycle(self): - t1 = struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t1), "next"),)) - t2 = struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t2), "next"),)) + t1 = self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t1), "next"),) + ) + t2 = self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t2), "next"),) + ) t3, t4 = ( - struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t4), "next"),)), - struct_type("foo", 8, (TypeMember(lambda: pointer_type(8, t3), "next"),)), + self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t4), "next"),) + ), + self.prog.struct_type( + "foo", 8, (TypeMember(lambda: self.prog.pointer_type(t3), "next"),) + ), ) self.assertEqual(t1, t2) self.assertEqual(t2, t3) @@ -1103,90 +1208,202 @@ def test_cycle(self): self.assertEqual( repr(t1), - "struct_type(tag='foo', size=8, members=(TypeMember(type=pointer_type(size=8, type=struct_type(tag='foo', ...)), name='next', bit_offset=0),))", + "prog.struct_type(tag='foo', size=8, members=(TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='foo', ...)), name='next', bit_offset=0),))", ) def test_cycle2(self): - t1 = struct_type( + t1 = self.prog.struct_type( "list_head", 16, ( - TypeMember(lambda: pointer_type(8, t1), "next"), - TypeMember(lambda: pointer_type(8, t1), "prev", 8), + TypeMember(lambda: self.prog.pointer_type(t1), "next"), + TypeMember(lambda: self.prog.pointer_type(t1), "prev", 8), ), ) - t2 = struct_type( + t2 = self.prog.struct_type( "list_head", 16, ( - TypeMember(lambda: pointer_type(8, t2), "next"), - TypeMember(lambda: pointer_type(8, t2), "prev", 8), + TypeMember(lambda: self.prog.pointer_type(t2), "next"), + TypeMember(lambda: self.prog.pointer_type(t2), "prev", 8), ), ) self.assertEqual(t1, t2) self.assertEqual( repr(t1), - "struct_type(tag='list_head', size=16, members=(TypeMember(type=pointer_type(size=8, type=struct_type(tag='list_head', ...)), name='next', bit_offset=0), TypeMember(type=pointer_type(size=8, type=struct_type(tag='list_head', ...)), name='prev', bit_offset=8)))", + "prog.struct_type(tag='list_head', size=16, members=(TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='list_head', ...)), name='next', bit_offset=0), TypeMember(type=prog.pointer_type(type=prog.struct_type(tag='list_head', ...)), name='prev', bit_offset=8)))", ) def test_infinite(self): - f = lambda: struct_type("foo", 0, (TypeMember(f, "next"),)) + f = lambda: self.prog.struct_type("foo", 0, (TypeMember(f, "next"),)) self.assertEqual( repr(f()), - "struct_type(tag='foo', size=0, members=(TypeMember(type=struct_type(tag='foo', ...), name='next', bit_offset=0),))", + "prog.struct_type(tag='foo', size=0, members=(TypeMember(type=prog.struct_type(tag='foo', ...), name='next', bit_offset=0),))", ) with self.assertRaisesRegex(RecursionError, "maximum.*depth"): f() == f() def test_bad_thunk(self): - t1 = struct_type( + t1 = self.prog.struct_type( "foo", 16, (TypeMember(lambda: exec('raise Exception("test")'), "bar"),) ) with self.assertRaisesRegex(Exception, "test"): t1.members[0].type - t1 = struct_type("foo", 16, (TypeMember(lambda: 0, "bar"),)) + t1 = self.prog.struct_type("foo", 16, (TypeMember(lambda: 0, "bar"),)) with self.assertRaisesRegex(TypeError, "type callable must return Type"): t1.members[0].type def test_qualifiers(self): - self.assertEqual(void_type().qualifiers, Qualifiers(0)) + self.assertEqual(self.prog.void_type().qualifiers, Qualifiers(0)) - t = void_type(Qualifiers.CONST | Qualifiers.VOLATILE) + t = self.prog.void_type(qualifiers=Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual(t.qualifiers, Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual( - repr(t), "void_type(qualifiers=)" + repr(t), "prog.void_type(qualifiers=)" ) - self.assertEqual(t.qualified(Qualifiers.ATOMIC), void_type(Qualifiers.ATOMIC)) - self.assertEqual(t.unqualified(), void_type()) + self.assertEqual( + t.qualified(Qualifiers.ATOMIC), + self.prog.void_type(qualifiers=Qualifiers.ATOMIC), + ) + self.assertEqual(t.unqualified(), self.prog.void_type()) self.assertEqual(t.qualified(Qualifiers(0)), t.unqualified()) - self.assertRaisesRegex(TypeError, "expected Qualifiers or None", void_type, 1.5) + self.assertRaisesRegex( + TypeError, "expected Qualifiers", self.prog.void_type, qualifiers=1.5 + ) def test_language(self): - self.assertEqual(void_type(language=None).language, DEFAULT_LANGUAGE) - self.assertEqual(void_type(language=Language.C).language, Language.C) + self.assertEqual(self.prog.void_type(language=None).language, DEFAULT_LANGUAGE) + self.assertEqual(self.prog.void_type(language=Language.C).language, Language.C) self.assertEqual( - int_type("int", 4, True, language=Language.CPP).language, Language.CPP + self.prog.int_type("int", 4, True, language=Language.CPP).language, + Language.CPP, ) self.assertNotEqual( - int_type("int", 4, True, language=Language.C), - int_type("int", 4, True, language=Language.CPP), + self.prog.int_type("int", 4, True, language=Language.C), + self.prog.int_type("int", 4, True, language=Language.CPP), + ) + + def test_language_repr(self): + self.assertEqual( + repr(self.prog.void_type(language=Language.CPP)), + "prog.void_type(language=Language.CPP)", ) def test_cmp(self): - self.assertEqual(void_type(), void_type()) - self.assertEqual(void_type(Qualifiers.CONST), void_type(Qualifiers.CONST)) - self.assertNotEqual(void_type(), void_type(Qualifiers.CONST)) - self.assertNotEqual(void_type(), int_type("int", 4, True)) - self.assertNotEqual(void_type(), 1) - self.assertNotEqual(1, void_type()) + self.assertEqual(self.prog.void_type(), self.prog.void_type()) + self.assertEqual( + self.prog.void_type(qualifiers=Qualifiers.CONST), + self.prog.void_type(qualifiers=Qualifiers.CONST), + ) + self.assertNotEqual( + self.prog.void_type(), self.prog.void_type(qualifiers=Qualifiers.CONST) + ) + self.assertNotEqual(self.prog.void_type(), self.prog.int_type("int", 4, True)) + self.assertNotEqual(self.prog.void_type(), 1) + self.assertNotEqual(1, self.prog.void_type()) + + def test_different_programs_compare(self): + self.assertRaisesRegex( + ValueError, + "types are from different programs", + operator.eq, + self.prog.void_type(), + Program().void_type(), + ) + + def test_different_programs_complex(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.complex_type, + "double _Complex", + 16, + Program().float_type("double", 8), + ) + + def test_different_programs_compound(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.struct_type, + None, + 4, + (TypeMember(Program().int_type("int", 4, True)),), + ) + + def test_different_programs_compound_callback(self): + with self.assertRaisesRegex(ValueError, "type is from different program"): + self.prog.struct_type( + None, 4, (TypeMember(lambda: Program().int_type("int", 4, True)),) + ).members[0].type + + def test_different_programs_enum(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.enum_type, + None, + Program().int_type("int", 4, True), + (), + ) + + def test_different_programs_typedef(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.typedef_type, + "INT", + Program().int_type("int", 4, True), + ) + def test_different_programs_pointer(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.pointer_type, + Program().int_type("int", 4, True), + ) -class TestTypeEnumerator(unittest.TestCase): + def test_different_programs_array(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.pointer_type, + Program().int_type("int", 4, True), + ) + + def test_different_programs_function_return(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.function_type, + Program().int_type("int", 4, True), + (), + ) + + def test_different_programs_function_parameter(self): + self.assertRaisesRegex( + ValueError, + "type is from different program", + self.prog.function_type, + self.prog.void_type(), + (TypeParameter(Program().int_type("int", 4, True)),), + ) + + def test_different_programs_function_parameter_callback(self): + with self.assertRaisesRegex(ValueError, "type is from different program"): + self.prog.function_type( + self.prog.void_type(), + (TypeParameter(lambda: Program().int_type("int", 4, True)),), + ).parameters[0].type + + +class TestTypeEnumerator(MockProgramTestCase): def test_init(self): e = TypeEnumerator("a", 1) self.assertEqual(e.name, "a") @@ -1212,128 +1429,142 @@ def test_cmp(self): self.assertNotEqual(TypeEnumerator("b", 1), TypeEnumerator("a", 1)) -class TestTypeMember(unittest.TestCase): +class TestTypeMember(MockProgramTestCase): def test_init(self): - m = TypeMember(void_type()) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type()) + self.assertEqual(m.type, self.prog.void_type()) self.assertIsNone(m.name) self.assertEqual(m.bit_offset, 0) self.assertEqual(m.offset, 0) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo") - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo") + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 0) self.assertEqual(m.offset, 0) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo", 8) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo", 8) + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 8) self.assertEqual(m.offset, 1) self.assertEqual(m.bit_field_size, 0) - m = TypeMember(void_type(), "foo", 9, 7) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type(), "foo", 9, 7) + self.assertEqual(m.type, self.prog.void_type()) self.assertEqual(m.name, "foo") self.assertEqual(m.bit_offset, 9) self.assertRaises(ValueError, getattr, m, "offset") self.assertEqual(m.bit_field_size, 7) self.assertRaises(TypeError, TypeMember, None) - self.assertRaises(TypeError, TypeMember, void_type(), 1) - self.assertRaises(TypeError, TypeMember, void_type(), "foo", None) - self.assertRaises(TypeError, TypeMember, void_type(), "foo", 0, None) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), 1) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), "foo", None) + self.assertRaises(TypeError, TypeMember, self.prog.void_type(), "foo", 0, None) def test_callable(self): - m = TypeMember(void_type) - self.assertEqual(m.type, void_type()) + m = TypeMember(self.prog.void_type) + self.assertEqual(m.type, self.prog.void_type()) - m = TypeMember(lambda: int_type("int", 4, True)) - self.assertEqual(m.type, int_type("int", 4, True)) + m = TypeMember(lambda: self.prog.int_type("int", 4, True)) + self.assertEqual(m.type, self.prog.int_type("int", 4, True)) m = TypeMember(lambda: None) self.assertRaises(TypeError, getattr, m, "type") def test_repr(self): - m = TypeMember(type=void_type, name="foo") + m = TypeMember(type=self.prog.void_type, name="foo") self.assertEqual( - repr(m), "TypeMember(type=void_type(), name='foo', bit_offset=0)" + repr(m), "TypeMember(type=prog.void_type(), name='foo', bit_offset=0)" ) - m = TypeMember(type=void_type, bit_field_size=4) + m = TypeMember(type=self.prog.void_type, bit_field_size=4) self.assertEqual( repr(m), - "TypeMember(type=void_type(), name=None, bit_offset=0, bit_field_size=4)", + "TypeMember(type=prog.void_type(), name=None, bit_offset=0, bit_field_size=4)", ) m = TypeMember(lambda: None) self.assertRaises(TypeError, repr, m) def test_cmp(self): - self.assertEqual(TypeMember(void_type()), TypeMember(void_type(), None, 0, 0)) self.assertEqual( - TypeMember(bit_offset=9, bit_field_size=7, type=void_type, name="foo"), - TypeMember(void_type(), "foo", 9, 7), + TypeMember(self.prog.void_type()), + TypeMember(self.prog.void_type(), None, 0, 0), + ) + self.assertEqual( + TypeMember( + bit_offset=9, bit_field_size=7, type=self.prog.void_type, name="foo" + ), + TypeMember(self.prog.void_type(), "foo", 9, 7), ) self.assertNotEqual( - TypeMember(int_type("int", 4, True)), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.int_type("int", 4, True)), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), "foo"), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.void_type(), "foo"), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), bit_offset=8), TypeMember(void_type(), None, 0, 0) + TypeMember(self.prog.void_type(), bit_offset=8), + TypeMember(self.prog.void_type(), None, 0, 0), ) self.assertNotEqual( - TypeMember(void_type(), bit_field_size=8), - TypeMember(void_type(), None, 0, 0), + TypeMember(self.prog.void_type(), bit_field_size=8), + TypeMember(self.prog.void_type(), None, 0, 0), ) -class TestTypeParameter(unittest.TestCase): +class TestTypeParameter(MockProgramTestCase): def test_init(self): - p = TypeParameter(void_type()) - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type()) + self.assertEqual(p.type, self.prog.void_type()) self.assertIsNone(p.name) - p = TypeParameter(void_type(), "foo") - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type(), "foo") + self.assertEqual(p.type, self.prog.void_type()) self.assertEqual(p.name, "foo") self.assertRaises(TypeError, TypeParameter, None) - self.assertRaises(TypeError, TypeParameter, void_type(), 1) + self.assertRaises(TypeError, TypeParameter, self.prog.void_type(), 1) def test_callable(self): - p = TypeParameter(void_type) - self.assertEqual(p.type, void_type()) + p = TypeParameter(self.prog.void_type) + self.assertEqual(p.type, self.prog.void_type()) - p = TypeParameter(lambda: int_type("int", 4, True)) - self.assertEqual(p.type, int_type("int", 4, True)) + p = TypeParameter(lambda: self.prog.int_type("int", 4, True)) + self.assertEqual(p.type, self.prog.int_type("int", 4, True)) p = TypeParameter(lambda: None) self.assertRaises(TypeError, getattr, p, "type") def test_repr(self): - p = TypeParameter(type=void_type, name="foo") - self.assertEqual(repr(p), "TypeParameter(type=void_type(), name='foo')") + p = TypeParameter(type=self.prog.void_type, name="foo") + self.assertEqual(repr(p), "TypeParameter(type=prog.void_type(), name='foo')") - p = TypeParameter(type=void_type) - self.assertEqual(repr(p), "TypeParameter(type=void_type(), name=None)") + p = TypeParameter(type=self.prog.void_type) + self.assertEqual(repr(p), "TypeParameter(type=prog.void_type(), name=None)") p = TypeParameter(lambda: None) self.assertRaises(TypeError, repr, p) def test_cmp(self): - self.assertEqual(TypeParameter(void_type()), TypeParameter(void_type(), None)) self.assertEqual( - TypeParameter(name="foo", type=void_type), TypeParameter(void_type(), "foo") + TypeParameter(self.prog.void_type()), + TypeParameter(self.prog.void_type(), None), + ) + self.assertEqual( + TypeParameter(name="foo", type=self.prog.void_type), + TypeParameter(self.prog.void_type(), "foo"), ) self.assertNotEqual( - TypeParameter(int_type("int", 4, True)), TypeParameter(void_type(), None) + TypeParameter(self.prog.int_type("int", 4, True)), + TypeParameter(self.prog.void_type(), None), ) self.assertNotEqual( - TypeParameter(void_type(), "foo"), TypeParameter(void_type(), None) + TypeParameter(self.prog.void_type(), "foo"), + TypeParameter(self.prog.void_type(), None), ) diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 8fd5f986e..09d263370 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -11,12 +11,7 @@ import sys from drgn.helpers import enum_type_to_class -from drgn.helpers.linux import ( - bpf_map_for_each, - bpf_prog_for_each, - hlist_for_each_entry, -) - +from drgn.helpers.linux import bpf_map_for_each, bpf_prog_for_each, hlist_for_each_entry BpfMapType = enum_type_to_class(prog.type("enum bpf_map_type"), "BpfMapType") BpfProgType = enum_type_to_class(prog.type("enum bpf_prog_type"), "BpfProgType") diff --git a/vmtest/manage.py b/vmtest/manage.py index d74cbd6d7..3b069af23 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -1,7 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0+ -import aiohttp import argparse import asyncio import difflib @@ -30,11 +29,12 @@ Tuple, ) import urllib.parse + +import aiohttp from yarl import URL from util import nproc - logger = logging.getLogger("asyncio") @@ -296,7 +296,7 @@ async def _upload_file_obj(self, file: BinaryIO, commit: Dict[str, Any]) -> None params = {} else: params = { - "cursor": {"offset": offset, "session_id": session_id,}, + "cursor": {"offset": offset, "session_id": session_id}, } if last: endpoint = "upload_session/finish" diff --git a/vmtest/onoatimehack.c b/vmtest/onoatimehack.c index dd5c4fa82..b4390bf08 100644 --- a/vmtest/onoatimehack.c +++ b/vmtest/onoatimehack.c @@ -8,6 +8,9 @@ * Overlayfs uses O_NOATIME, so overlayfs on top of 9pfs doesn't work. We work * around this with this LD_PRELOAD hack to remove O_NOATIME from open() and * fcntl() calls. + * + * As of QEMU 5.1.0, the 9pfs server falls back to removing O_NOATIME, so this + * isn't necessary on newer versions. */ #include diff --git a/vmtest/resolver.py b/vmtest/resolver.py index 25ea539ec..23965d9af 100644 --- a/vmtest/resolver.py +++ b/vmtest/resolver.py @@ -16,7 +16,6 @@ from util import KernelVersion - # This URL contains a mapping from file names to URLs where those files can be # downloaded. This is needed because the files under a Dropbox shared folder # have randomly-generated links. @@ -54,10 +53,10 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: def _resolve_build(self, path: str) -> ResolvedKernel: release = subprocess.check_output( - ["make", "-s", "kernelrelease"], universal_newlines=True, cwd=path, + ["make", "-s", "kernelrelease"], universal_newlines=True, cwd=path ).strip() vmlinuz = subprocess.check_output( - ["make", "-s", "image_name"], universal_newlines=True, cwd=path, + ["make", "-s", "image_name"], universal_newlines=True, cwd=path ).strip() return ResolvedKernel( release=release, diff --git a/vmtest/vm.py b/vmtest/vm.py index de1d7e690..05d5dd806 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -4,6 +4,7 @@ import errno import os import os.path +import re import shlex import shutil import socket @@ -12,7 +13,6 @@ from util import nproc, out_of_date - # Script run as init in the virtual machine. This only depends on busybox. We # don't assume that any regular commands are built in (not even echo or test), # so we always explicitly run busybox. @@ -141,20 +141,24 @@ class LostVMError(Exception): def run_in_vm(command: str, *, vmlinuz: str, build_dir: str) -> int: + match = re.search( + "QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", + subprocess.check_output( + ["qemu-system-x86_64", "-version"], universal_newlines=True + ), + ) + if not match: + raise Exception("could not determine QEMU version") + qemu_version = tuple(int(x) for x in match.group(1).split(".")) + # multidevs was added in QEMU 4.2.0. - if ( - "multidevs" - in subprocess.run( - ["qemu-system-x86_64", "-help"], - stdout=subprocess.PIPE, - universal_newlines=True, - ).stdout - ): - multidevs = ",multidevs=remap" - else: - multidevs = "" - - onoatimehack = _build_onoatimehack(build_dir) + multidevs = ",multidevs=remap" if qemu_version >= (4, 2) else "" + # QEMU's 9pfs O_NOATIME handling was fixed in 5.1.0. The fix was backported + # to 5.0.1. + env = os.environ.copy() + if qemu_version < (5, 0, 1): + onoatimehack_so = _build_onoatimehack(build_dir) + env["LD_PRELOAD"] = f"{onoatimehack_so}:{env.get('LD_PRELOAD', '')}" with tempfile.TemporaryDirectory(prefix="drgn-vmtest-") as temp_dir, socket.socket( socket.AF_UNIX @@ -200,10 +204,7 @@ def run_in_vm(command: str, *, vmlinuz: str, build_dir: str) -> int: f"rootfstype=9p rootflags=trans=virtio,cache=loose ro console=0,115200 panic=-1 init={init}", # fmt: on ], - env={ - **os.environ, - "LD_PRELOAD": f"{onoatimehack}:{os.getenv('LD_PRELOAD', '')}", - }, + env=env, ) as qemu: server_sock.settimeout(5) try: