From 33f1331b8885ec3c23a1bd9409e7f7078f34f391 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 18 Dec 2024 01:05:35 -0800 Subject: [PATCH 001/166] libdrgn: enable -Wformat-security Ubuntu enables -Wformat-security by default, but upstream GCC doesn't enable it even with -Wall. It caught some legitimate issues in the module API branch, so let's enable it explicitly. Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index c532429d4..67bd2b71b 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -120,6 +120,7 @@ AS_IF([test "x$enable_compiler_warnings" != xno], [AX_APPEND_COMPILE_FLAGS([ dnl -Wall dnl -Wformat-overflow=2 dnl + -Wformat-security dnl -Wformat-truncation=2 dnl -Wimplicit-fallthrough dnl -Wmissing-prototypes dnl From c39cc68229e4fc4289e15e2760e451616ba3ab1a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 13 Dec 2024 21:11:36 -0800 Subject: [PATCH 002/166] libdrgn: elf_notes: add helper function for looking up notes An upcoming commit needs to look up a few notes by name+type, so add a common helper function for that. Signed-off-by: Omar Sandoval --- libdrgn/elf_notes.c | 39 +++++++++++++++++++++++++++++++++++++++ libdrgn/elf_notes.h | 10 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/libdrgn/elf_notes.c b/libdrgn/elf_notes.c index bcd36449d..d9679f3cc 100644 --- a/libdrgn/elf_notes.c +++ b/libdrgn/elf_notes.c @@ -55,6 +55,45 @@ bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, return true; } +int find_elf_note(Elf *elf, const char *name, uint32_t type, const void **ret, + size_t *size_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return -1; + size_t name_size = strlen(name) + 1; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return -1; + if (phdr->p_type != PT_NOTE) + continue; + Elf_Data *data = elf_getdata_rawchunk(elf, phdr->p_offset, + phdr->p_filesz, + note_header_type(phdr->p_align)); + if (!data) + return -1; + GElf_Nhdr nhdr; + size_t offset = 0, name_offset, desc_offset; + while (offset < data->d_size && + (offset = gelf_getnote(data, offset, &nhdr, + &name_offset, + &desc_offset))) { + const char *note_name = (char *)data->d_buf + name_offset; + if (nhdr.n_namesz == name_size + && memcmp(note_name, name, name_size) == 0 + && nhdr.n_type == type) { + *ret = (char *)data->d_buf + desc_offset; + *size_ret = nhdr.n_descsz; + return 0; + } + } + } + *ret = NULL; + *size_ret = 0; + return 0; +} + size_t parse_gnu_build_id_from_notes(const void *buf, size_t size, unsigned int align, bool bswap, const void **ret) diff --git a/libdrgn/elf_notes.h b/libdrgn/elf_notes.h index 3f198f6ee..dc8cb9936 100644 --- a/libdrgn/elf_notes.h +++ b/libdrgn/elf_notes.h @@ -70,6 +70,16 @@ bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, GElf_Nhdr *nhdr_ret, const char **name_ret, const void **desc_ret); +/** + * Find an ELF note matching the given name and type. + * + * Note that this currently only checks segments, not sections. + * + * @return 0 on success, -1 on libelf error. + */ +int find_elf_note(Elf *elf, const char *name, uint32_t type, const void **ret, + size_t *size_ret); + /** * Parse a GNU build ID from a buffer containing note data. From c528375089cf39c55af1a09bb1ce0e7ee89279d2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:43:48 -0800 Subject: [PATCH 003/166] libdrgn: add CRC-32 implementation This will be used for checking the CRC in .gnu_debuglink. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 6 ++++ libdrgn/crc32.c | 65 ++++++++++++++++++++++++++++++++++++++++ libdrgn/crc32.h | 49 ++++++++++++++++++++++++++++++ libdrgn/tests/crc32.c.in | 40 +++++++++++++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 libdrgn/crc32.c create mode 100644 libdrgn/crc32.h create mode 100644 libdrgn/tests/crc32.c.in diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 5cffa80da..6eb451bc6 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -52,6 +52,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ cfi.h \ cityhash.h \ cleanup.h \ + crc32.c \ + crc32.h \ debug_info.c \ debug_info.h \ drgn_internal.h \ @@ -227,6 +229,7 @@ TESTS = $(check_PROGRAMS) check_PROGRAMS = tests/binary_search \ tests/cityhash \ + tests/crc32 \ tests/language_c \ tests/lexer \ tests/path \ @@ -252,6 +255,9 @@ tests_binary_search_LDADD = $(test_ldadd) tests_cityhash_CFLAGS = $(test_cflags) tests_cityhash_CPPFLAGS = $(test_cppflags) tests_cityhash_LDADD = $(test_ldadd) +tests_crc32_CFLAGS = $(test_cflags) +tests_crc32_CPPFLAGS = $(test_cppflags) +tests_crc32_LDADD = $(test_ldadd) tests_language_c_CFLAGS = $(test_cflags) tests_language_c_CPPFLAGS = $(test_cppflags) tests_language_c_LDADD = $(test_ldadd) diff --git a/libdrgn/crc32.c b/libdrgn/crc32.c new file mode 100644 index 000000000..a4c627330 --- /dev/null +++ b/libdrgn/crc32.c @@ -0,0 +1,65 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crc32.h" + +uint32_t crc32_update(uint32_t crc, const void *buf, size_t len) +{ + static const uint32_t table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d, + }; + for (const uint8_t *p = buf, *end = p + len; p < end; p++) + crc = (crc >> 8) ^ table[(crc ^ *p) & 0xff]; + return crc; +} diff --git a/libdrgn/crc32.h b/libdrgn/crc32.h new file mode 100644 index 000000000..e202ff202 --- /dev/null +++ b/libdrgn/crc32.h @@ -0,0 +1,49 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * CRC-32 checksums. + * + * See @ref CRC32. + */ + +#ifndef DRGN_CRC32_H +#define DRGN_CRC32_H + +#include +#include + +/** + * @ingroup Internals + * + * @defgroup CRC32 CRC-32 + * + * CRC-32 checksums. + * + * @{ + */ + +/** + * Update a CRC-32 checksum with additional data. + * + * This uses the IEEE CRC-32 polynomial (x32 + + * x26 + x23 + x22 + + * x16 + x12 + x11 + + * x10 + x8 + x7 + + * x5 + x4 + x2 + + * x + 1). + * + * @param[in] crc Checksum to update. For the first call, this is the initial + * checksum value (often `0xffffffff`). + * @param[in] buf Data to checksum. + * @param[in] len Size of @p buf in bytes. + * @return Updated checksum. This is not bitwise negated as is often required + * for the final result. + */ +uint32_t crc32_update(uint32_t crc, const void *buf, size_t len); + +/** @} */ + +#endif /* DRGN_CRC32_H */ diff --git a/libdrgn/tests/crc32.c.in b/libdrgn/tests/crc32.c.in new file mode 100644 index 000000000..fb8fe662a --- /dev/null +++ b/libdrgn/tests/crc32.c.in @@ -0,0 +1,40 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "test_util.h" +#include "../crc32.h" + +static uint32_t string_crc32(const char *s) +{ + return ~crc32_update(~0, s, strlen(s)); +} + +#suite crc32 + +#tcase crc32 + +#test empty +{ + ck_assert_uint_eq(string_crc32(""), 0); +} + +#test simple +{ + // https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32-iso-hdlc + ck_assert_uint_eq(string_crc32("123456789"), 0xcbf43926); + // http://www.febooti.com/products/filetweak/members/hash-and-crc/test-vectors/ + ck_assert_uint_eq(string_crc32("The quick brown fox jumps over the lazy dog"), + 0x414fa339); +} + +#test update +{ + uint32_t crc = ~0; + crc = crc32_update(crc, "12", 2); + crc = crc32_update(crc, "345", 3); + crc = crc32_update(crc, "6789", 4); + crc = ~crc; + ck_assert_uint_eq(crc, 0xcbf43926); +} From 75f5e88f04da97368a3c60c27c6c7735d1af06df Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:47:32 -0800 Subject: [PATCH 004/166] libdrgn: add hexadecimal encoding/decoding functions These will be used for GNU build IDs. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 6 ++++ libdrgn/hexlify.c | 58 +++++++++++++++++++++++++++++++ libdrgn/hexlify.h | 71 ++++++++++++++++++++++++++++++++++++++ libdrgn/tests/hexlify.c.in | 52 ++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 libdrgn/hexlify.c create mode 100644 libdrgn/hexlify.h create mode 100644 libdrgn/tests/hexlify.c.in diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 6eb451bc6..4f0464df0 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -75,6 +75,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ hash_table.c \ hash_table.h \ helpers.h \ + hexlify.c \ + hexlify.h \ io.c \ io.h \ kallsyms.c \ @@ -230,6 +232,7 @@ TESTS = $(check_PROGRAMS) check_PROGRAMS = tests/binary_search \ tests/cityhash \ tests/crc32 \ + tests/hexlify \ tests/language_c \ tests/lexer \ tests/path \ @@ -258,6 +261,9 @@ tests_cityhash_LDADD = $(test_ldadd) tests_crc32_CFLAGS = $(test_cflags) tests_crc32_CPPFLAGS = $(test_cppflags) tests_crc32_LDADD = $(test_ldadd) +tests_hexlify_CFLAGS = $(test_cflags) +tests_hexlify_CPPFLAGS = $(test_cppflags) +tests_hexlify_LDADD = $(test_ldadd) tests_language_c_CFLAGS = $(test_cflags) tests_language_c_CPPFLAGS = $(test_cppflags) tests_language_c_LDADD = $(test_ldadd) diff --git a/libdrgn/hexlify.c b/libdrgn/hexlify.c new file mode 100644 index 000000000..5a0d6e96e --- /dev/null +++ b/libdrgn/hexlify.c @@ -0,0 +1,58 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "hexlify.h" + +void hexlify(const void *in, size_t in_len, char *out) +{ + static const char nibble_to_hex_digit[] = "0123456789abcdef"; + for (size_t i = 0; i < in_len; i++) { + uint8_t byte = ((uint8_t *)in)[i]; + out[2 * i] = nibble_to_hex_digit[byte >> 4]; + out[2 * i + 1] = nibble_to_hex_digit[byte & 0xf]; + } +} + +char *ahexlify(const void *in, size_t in_len) +{ + size_t out_size; + if (__builtin_mul_overflow(in_len, 2U, &out_size) || + __builtin_add_overflow(out_size, 1U, &out_size)) + return NULL; + char *out = malloc(out_size); + if (!out) + return NULL; + hexlify(in, in_len, out); + out[out_size - 1] = '\0'; + return out; +} + +static inline bool hex_digit_to_nibble(char c, uint8_t *ret) +{ + if ('0' <= c && c <= '9') + *ret = c - '0'; + else if ('a' <= c && c <= 'f') + *ret = c - 'a' + 10; + else if ('A' <= c && c <= 'F') + *ret = c - 'A' + 10; + else + return false; + return true; +} + +bool unhexlify(const char *in, size_t in_len, void *out) +{ + if (in_len % 2) + return false; + for (size_t i = 0; i < in_len; i += 2) { + uint8_t lo, hi; + if (!hex_digit_to_nibble(in[i], &hi) || + !hex_digit_to_nibble(in[i + 1], &lo)) + return false; + ((uint8_t *)out)[i / 2] = (hi << 4) | lo; + } + return true; +} diff --git a/libdrgn/hexlify.h b/libdrgn/hexlify.h new file mode 100644 index 000000000..51421aead --- /dev/null +++ b/libdrgn/hexlify.h @@ -0,0 +1,71 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * Hexadecimal encoding/decoding. + * + * See @ref Hexlify. + */ + +#ifndef DRGN_HEXLIFY_H +#define DRGN_HEXLIFY_H + +#include +#include + +/** + * @ingroup Internals + * + * @defgroup Hexlify Hexlify + * + * Hexadecimal encoding/decoding. + * + * @{ + */ + +/** + * Encode binary data to a hexadecimal string. + * + * The output string is an even number of lowercase hexadecimal characters with + * no separators. It is not null-terminated. + * + * @param[in] in Input binary data. + * @param[in] in_len Size of @p in in bytes. + * @param[out] out Output hexadecimal string of size `2 * in_len` characters. + * Not null-terminated. + */ +void hexlify(const void *in, size_t in_len, char *out); + +/** + * Allocate and encode binary data to a hexadecimal string. + * + * This is like @ref hexlify(), but it allocates the output string, including a + * terminating null byte. + * + * @param[in] in Input binary data. + * @param[in] in_len Size of @p in in bytes. + * @return Output hexadecimal string, or `NULL` on failure to allocate memory. + * Unlike @ref hexlify(), this *is* null-terminated. On success, it must be + * freed with `free()`. + */ +char *ahexlify(const void *in, size_t in_len); + +/** + * Decode hexadecimal string to binary data. + * + * The input string must be an even number of hexadecimal characters (either + * lowercase or uppercase) with no separators. + * + * @param[in] in Input hexadecimal string. Does not need to be null-terminated. + * @param[in] in_len Number of characters in @p in. + * @param[out] out Returned binary data of size `in_len / 2` bytes. + * @return `true` if data was successfully decoded, `false` if not (either + * because @p in_len was odd or @p in contained non-hexadecimal characters). + */ +bool unhexlify(const char *in, size_t in_len, void *out); + +/** @} */ + +#endif /* DRGN_HEXLIFY_H */ diff --git a/libdrgn/tests/hexlify.c.in b/libdrgn/tests/hexlify.c.in new file mode 100644 index 000000000..82ab50eed --- /dev/null +++ b/libdrgn/tests/hexlify.c.in @@ -0,0 +1,52 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "test_util.h" +#include "../cleanup.h" +#include "../hexlify.h" + +static const uint8_t binary[] = { + 0x19, 0x29, 0x1d, 0x9a, 0xc4, 0xf3, 0x4c, 0x42, 0x01, 0xee, + 0xdf, 0x9e, 0x8d, 0x1e, 0x59, 0x68, 0xf7, 0xd5, 0x48, 0x19, +}; +static const char str[] = "19291d9ac4f34c4201eedf9e8d1e5968f7d54819"; + +#suite hexlify + +#tcase hexlify + +#test hexlify_simple +{ + char out[2 * sizeof(binary) + 1]; + out[sizeof(out) - 1] = '~'; + hexlify(binary, sizeof(binary), out); + ck_assert_mem_eq(out, str, sizeof(out) - 1); + // Test that the string wasn't null-terminated. + ck_assert_int_eq(out[sizeof(out) - 1], '~'); +} + +#test ahexlify_simple +{ + _cleanup_free_ char *out = ahexlify(binary, sizeof(binary)); + ck_assert_ptr_nonnull(out); + ck_assert_str_eq(out, str); +} + +#test unhexlify_simple +{ + uint8_t out[(sizeof(str) - 1) / 2]; + ck_assert(unhexlify(str, sizeof(str) - 1, out)); + ck_assert_mem_eq(out, binary, sizeof(binary)); +} + +#test unhexlify_odd +{ + ck_assert(!unhexlify("abc", 3, (uint8_t [1]){})); +} + +#test unhexlify_non_hex +{ + ck_assert(!unhexlify("foobar", 6, (uint8_t [3]){})); +} From 8e12b67a80fee942135e8cccfad3a1e9597b1b49 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:49:27 -0800 Subject: [PATCH 005/166] libdrgn: add functions for deserializing 64- or 32-bit structures We are going to need to parse various structures that have different 64- and 32-bit formats, may be byte-swapped, and may be unaligned (e.g., various ELF and link map structures). Provide a couple of convenience macros for doing this. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 6 +- libdrgn/serialize.h | 177 ++++++++++++++++++++++++++++++++ libdrgn/tests/serialize.c.in | 192 +++++++++++++++++++++++++++++++++++ libdrgn/util.h | 4 + 4 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 libdrgn/tests/serialize.c.in diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 4f0464df0..394ed668d 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -236,7 +236,8 @@ check_PROGRAMS = tests/binary_search \ tests/language_c \ tests/lexer \ tests/path \ - tests/recursion_guard + tests/recursion_guard \ + tests/serialize EXTRA_DIST += $(addsuffix .c.in,$(check_PROGRAMS)) @@ -276,6 +277,9 @@ tests_path_LDADD = $(test_ldadd) tests_recursion_guard_CFLAGS = $(test_cflags) tests_recursion_guard_CPPFLAGS = $(test_cppflags) tests_recursion_guard_LDADD = $(test_ldadd) +tests_serialize_CFLAGS = $(test_cflags) +tests_serialize_CPPFLAGS = $(test_cppflags) +tests_serialize_LDADD = $(test_ldadd) # Don't delete test-suite.log on failure. .PRECIOUS: $(TEST_SUITE_LOG) diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 5c995dcbd..e23f69df2 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -12,11 +12,13 @@ #ifndef DRGN_SERIALIZE_H #define DRGN_SERIALIZE_H +#include #include #include #include #include "minmax.h" +#include "util.h" /** * @ingroup Internals @@ -175,6 +177,181 @@ void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint64_t deserialize_bits(const void *buf, uint64_t bit_offset, uint8_t bit_size, bool little_endian); +#define struct64_assign_member(member) do { \ + typeof_member(_struct64_src_type, member) _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_dst->member = _struct64_tmp; \ +} while (0) + +#define struct64_bswap_member(member) do { \ + typeof_member(_struct64_src_type, member) _struct64_swapped; \ + _Static_assert(sizeof(_struct64_swapped) == 8 || \ + sizeof(_struct64_swapped) == 4 || \ + sizeof(_struct64_swapped) == 2 || \ + sizeof(_struct64_swapped) == 1, \ + "scalar member has invalid size"); \ + if (sizeof(_struct64_swapped) == 8) { \ + uint64_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_64(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_swapped) == 4) { \ + uint32_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_32(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_swapped) == 2) { \ + uint16_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_16(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else { \ + memcpy(&_struct64_swapped, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_swapped)); \ + } \ + _struct64_dst->member = _struct64_swapped; \ +} while (0) + +#define struct64_bswap_member_inplace(member) do { \ + _Static_assert(sizeof(_struct64_dst->member) == 8 || \ + sizeof(_struct64_dst->member) == 4 || \ + sizeof(_struct64_dst->member) == 2 || \ + sizeof(_struct64_dst->member) == 1, \ + "scalar member has invalid size"); \ + if (sizeof(_struct64_dst->member) == 8) { \ + uint64_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_64(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_dst->member) == 4) { \ + uint32_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_32(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_dst->member) == 2) { \ + uint16_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_16(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } \ +} while (0) + +#define struct64_memcpy_member(member) do { \ + _Static_assert(sizeof(_struct64_dst->member) \ + == sizeof_member(_struct64_src_type, member), \ + "64-bit and 32-bit members have different sizes"); \ + memcpy(&_struct64_dst->member, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_dst->member)); \ +} while (0) + +#define struct64_ignore_member(member) + +#ifdef DOXYGEN +/** + * Deserialize a structure from a memory buffer, where the structure has + * different 64-bit and 32-bit formats, may have a different byte order, and may + * be unaligned. + * + * @param[out] struct64p Returned 64-bit structure in host byte order. + * @param[in] T32 32-bit structure type. + * @param[in] visit_members Macro with signature + * `visit_members(visit_scalar_member, visit_raw_member)`. + * `visit_scalar_member()` is a macro that should be called with the name of + * each scalar member of the structure. `visit_raw_member()` is a macro that + * should be called with the name of each member that is identical regardless of + * 64-/32-bit format or byte order. + * @param[in] buf Source buffer. Must not overlap with @p struct64p. + * @param[in] is_64_bit Whether the source is in the 64-bit format or the 32-bit + * format. + * @param[in] bswap Whether the source has a different byte order than the host + * system. + */ +void deserialize_struct64(T64 * restrict struct64p, T32, visit_members, + const void * restrict buf, bool is_64_bit, + bool bswap); + +/** + * Like @ref deserialize_struct64(), but the source and destination are the + * same. + * + * @param[in,out] struct64p Initially the source buffer, then the returned + * 64-bit structure in host byte order. + */ +void deserialize_struct64_inplace(T64 *struct64p, T32, bool visit_members, + bool is_64_bit, bool bswap); + +#else +#define deserialize_struct64(struct64p, type32, visit_members, buf, is_64_bit, \ + bswap) \ +do { \ + __auto_type _struct64_dst = (struct64p); \ + /* \ + * We want to type check buf like a function parameter, so do two \ + * implicit conversions instead of an explicit cast. \ + */ \ + const void *_struct64_buf = (buf); \ + const char *_struct64_src = _struct64_buf; \ + if (is_64_bit) { \ + if (bswap) { \ + typedef typeof(*_struct64_dst) _struct64_src_type; \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + memcpy(_struct64_dst, buf, sizeof(*_struct64_dst)); \ + } \ + } else { \ + typedef typeof(type32) _struct64_src_type; \ + if (bswap) { \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + visit_members(struct64_assign_member, \ + struct64_memcpy_member); \ + } \ + } \ +} while (0) + +#define deserialize_struct64_inplace(struct64p, type32, visit_members, \ + is_64_bit, bswap) do { \ + __auto_type _struct64_dst = (struct64p); \ + if (!(is_64_bit)) { \ + typedef typeof(type32) _struct64_src_type; \ + _Alignas(_struct64_src_type) char \ + _struct64_src[sizeof(_struct64_src_type)]; \ + memcpy(_struct64_src, _struct64_dst, sizeof(_struct64_src)); \ + if (bswap) { \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + visit_members(struct64_assign_member, \ + struct64_memcpy_member); \ + } \ + } else if (bswap) { \ + visit_members(struct64_bswap_member_inplace, \ + struct64_ignore_member); \ + } \ +} while (0) +#endif + /** @} */ #endif /* DRGN_SERIALIZE_H */ diff --git a/libdrgn/tests/serialize.c.in b/libdrgn/tests/serialize.c.in new file mode 100644 index 000000000..1940a0371 --- /dev/null +++ b/libdrgn/tests/serialize.c.in @@ -0,0 +1,192 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "test_util.h" +#include "../serialize.h" +#include "../util.h" + +struct foo64 { + uint64_t big; + uint32_t medium; + uint16_t small; + uint8_t tiny; + uint8_t array[3]; +}; + +struct foo32 { + uint32_t big; + uint16_t medium; + uint8_t small; + uint8_t tiny; + uint8_t array[3]; +}; + +#define visit_foo_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(big); \ + visit_scalar_member(medium); \ + visit_scalar_member(small); \ + visit_scalar_member(tiny); \ + visit_raw_member(array); \ +} while (0) + +#suite serialize + +#tcase deserialize_struct64 + +#test deserialize_struct64_64le +{ + uint8_t buf[24] = { + 0x00, 0xf2, 0x05, 0x2a, 0x01, 0x00, 0x00, 0x00, + 0xa0, 0x86, 0x01, 0x00, + 0x20, 0x03, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, true, + !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_64be +{ + uint8_t buf[24] = { + 0x00, 0x00, 0x00, 0x01, 0x2a, 0x05, 0xf2, 0x00, + 0x00, 0x01, 0x86, 0xa0, + 0x03, 0x20, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, true, + HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_32le +{ + uint8_t buf[12] = { + 0x00, 0x5e, 0xd0, 0xb2, + 0x10, 0x27, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, false, + !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_32be +{ + uint8_t buf[12] = { + 0xb2, 0xd0, 0x5e, 0x00, + 0x27, 0x10, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, false, + HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#tcase deserialize_struct64_inplace + +#test deserialize_struct64_inplace_64le +{ + uint8_t buf[24] = { + 0x00, 0xf2, 0x05, 0x2a, 0x01, 0x00, 0x00, 0x00, + 0xa0, 0x86, 0x01, 0x00, + 0x20, 0x03, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + true, !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_64be +{ + uint8_t buf[24] = { + 0x00, 0x00, 0x00, 0x01, 0x2a, 0x05, 0xf2, 0x00, + 0x00, 0x01, 0x86, 0xa0, + 0x03, 0x20, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + true, HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_32le +{ + uint8_t buf[12] = { + 0x00, 0x5e, 0xd0, 0xb2, + 0x10, 0x27, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + false, !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_32be +{ + uint8_t buf[12] = { + 0xb2, 0xd0, 0x5e, 0x00, + 0x27, 0x10, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + false, HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} diff --git a/libdrgn/util.h b/libdrgn/util.h index 243c7e842..333ff2f58 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -69,6 +69,10 @@ _Generic(sizeof(struct { _Static_assert(assert_expression, message); int _; }),\ default: (eval_expression)) +#define sizeof_member(type, member) sizeof(((type *)0)->member) + +#define typeof_member(type, member) typeof(((type *)0)->member) + #define container_of(ptr, type, member) \ static_assert_expression( \ types_compatible(*(ptr), ((type *)0)->member) \ From 59b23c88aca4e0a00e90335f5ccdb5bac081791d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:50:07 -0800 Subject: [PATCH 006/166] libdrgn: binary_search_tree: add delete_entry This is a trivial wrapper to delete by entry instead of by key or iterator. Signed-off-by: Omar Sandoval --- libdrgn/binary_search_tree.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index 6c1411d70..cbaa7b2e4 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -150,6 +150,16 @@ struct binary_search_tree_iterator binary_search_tree_delete_iterator(struct binary_search_tree *tree, struct binary_search_tree_iterator it); +/** + * Delete an entry in a @ref binary_search_tree. + * + * @return An iterator pointing to the next entry in the tree. See @ref + * binary_search_tree_next(). + */ +struct binary_search_tree_iterator +binary_search_tree_delete_entry(struct binary_search_tree *tree, + entry_type *entry); + /** * Get an iterator pointing to the first (in-order) entry in a @ref * binary_search_tree. @@ -492,6 +502,13 @@ tree##_delete_iterator(struct tree *tree, struct tree##_iterator it) \ return it; \ } \ \ +__attribute__((__always_inline__, __unused__)) \ +static inline struct tree##_iterator \ +tree##_delete_entry(struct tree *tree, tree##_entry_type *entry) \ +{ \ + return tree##_delete_iterator(tree, (struct tree##_iterator){ entry }); \ +} \ + \ __attribute__((__unused__)) \ static struct tree##_iterator tree##_first(struct tree *tree) \ { \ From 7781e493845f28fd08d8cb8552f38475664d4f26 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:52:28 -0800 Subject: [PATCH 007/166] libdrgn: linux_kernel: parse build ID in VMCOREINFO This has been available since Linux kernel commit 0935288c6e00 ("kdump: append kernel build-id string to VMCOREINFO") (in v5.9). Save it so we can use it when loading debugging information. Unfortunately, the build ID in VMCOREINFO was briefly broken in several stable releases. 6.10 and 5.15 reached their end-of-life while broken and so will remain broken forever. It feels like overkill to drop support for those versions over this, so we work around it with a version check. Signed-off-by: Omar Sandoval --- ...rgn_program_parse_vmcoreinfo.inc.strswitch | 44 +++++++++++++++++++ libdrgn/linux_kernel.c | 1 + libdrgn/program.h | 4 ++ 3 files changed, 49 insertions(+) diff --git a/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch b/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch index 545950220..a9d8dff07 100644 --- a/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch +++ b/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch @@ -18,6 +18,34 @@ static struct drgn_error *parse_vmcoreinfo_u64(const char *value, return NULL; } +// Linux kernel commit 905415ff3ffb ("lib/buildid: harden build ID parsing +// logic") (in v6.12) contains a bug that results in a garbage build ID in +// VMCOREINFO. It was fixed in the same patch series in commits de3ec364c3c3 +// ("lib/buildid: add single folio-based file reader abstraction") and +// d4deb8242341 ("lib/buildid: take into account e_phoff when fetching program +// headers"). However, the broken commit was backported to several stable +// kernels. Some branches were fixed by "lib/buildid: Fix build ID parsing +// logic", but a couple reached their end-of-life while broken. See +// https://lore.kernel.org/all/20241104175256.2327164-1-jolsa@kernel.org/. +// +// The very sad workaround is to ignore the build ID based on a version check. +static void ignore_broken_vmcoreinfo_build_id(struct drgn_program *prog) +{ + char *p = (char *)prog->vmcoreinfo.osrelease; + long major = strtol(p, &p, 10), minor = 0, patch = 0; + if (*p == '.') { + minor = strtol(p + 1, &p, 10); + if (*p == '.') + patch = strtol(p + 1, NULL, 10); + } + if ((major == 6 && minor == 11 && patch >= 3 && patch < 10) + || (major == 6 && minor == 10 && patch >= 14) + || (major == 6 && minor == 6 && patch >= 55 && patch < 63) + || (major == 6 && minor == 1 && patch >= 113 && patch < 119) + || (major == 5 && minor == 15 && patch >= 168)) + prog->vmcoreinfo.build_id_len = 0; +} + struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, const char *desc, size_t descsz) @@ -37,6 +65,21 @@ struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, const char *value = equals + 1; @memswitch (line, equals - line)@ + @case "BUILD-ID"@ + { + size_t build_id_len = (newline - value) / 2; + if (build_id_len > sizeof(prog->vmcoreinfo.build_id)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "BUILD-ID in VMCOREINFO is too long"); + } + if (!unhexlify(value, newline - value, + &prog->vmcoreinfo.build_id)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't parse BUILD-ID in VMCOREINFO"); + } + prog->vmcoreinfo.build_id_len = build_id_len; + break; + } @case "CRASHTIME"@ prog->vmcoreinfo.have_crashtime = true; break; @@ -119,6 +162,7 @@ struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "VMCOREINFO does not contain valid OSRELEASE"); } + ignore_broken_vmcoreinfo_build_id(prog); if (!is_power_of_two(prog->vmcoreinfo.page_size)) { return drgn_error_create(DRGN_ERROR_OTHER, "VMCOREINFO does not contain valid PAGESIZE"); diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 94bc2dc70..15979673e 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -25,6 +25,7 @@ #include "error.h" #include "hash_table.h" #include "helpers.h" +#include "hexlify.h" #include "io.h" #include "linux_kernel.h" #include "platform.h" diff --git a/libdrgn/program.h b/libdrgn/program.h index 5e5fee714..095fd55a9 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -160,6 +160,8 @@ struct drgn_program { struct { /** `uname -r` */ char osrelease[128]; + /** Build ID. */ + char build_id[128]; /** `PAGE_SIZE` of the kernel. */ uint64_t page_size; /** @@ -193,6 +195,8 @@ struct drgn_program { bool have_crashtime; /** Whether `phys_base` was in the VMCOREINFO. */ bool have_phys_base; + /** Length of build ID. */ + unsigned int build_id_len; /** * `PAGE_SHIFT` of the kernel (derived from * `PAGE_SIZE`). From eb40260beea9f9882686c0506d4704f8a6eb5988 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:52:52 -0800 Subject: [PATCH 008/166] libdrgn: linux_kernel: move depmod index code No code changes other than moving it in the file to make upcoming diffs cleaner. Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel.c | 426 ++++++++++++++++++++--------------------- 1 file changed, 213 insertions(+), 213 deletions(-) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 15979673e..30f167d47 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -397,6 +397,219 @@ struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) return NULL; } +/* + * /lib/modules/$(uname -r)/modules.dep.bin maps all installed kernel modules to + * their filesystem path (and dependencies, which we don't care about). It is + * generated by depmod; the format is a fairly simple serialized radix tree. + * + * modules.dep(5) contains a warning: "These files are not intended for editing + * or use by any additional utilities as their format is subject to change in + * the future." But, the format hasn't changed since 2009, and pulling in + * libkmod is overkill since we only need a very small subset of its + * functionality (plus our minimal parser is more efficient). If the format + * changes in the future, we can reevaluate this. + */ + +struct depmod_index { + void *addr; + size_t len; + char path[256]; +}; + +static void depmod_index_deinit(struct depmod_index *depmod) +{ + munmap(depmod->addr, depmod->len); +} + +struct depmod_index_buffer { + struct binary_buffer bb; + struct depmod_index *depmod; +}; + +static struct drgn_error *depmod_index_buffer_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + struct depmod_index_buffer *buffer = + container_of(bb, struct depmod_index_buffer, bb); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %#tx: %s", + buffer->depmod->path, + pos - (const char *)buffer->depmod->addr, + message); +} + +static void depmod_index_buffer_init(struct depmod_index_buffer *buffer, + struct depmod_index *depmod) +{ + binary_buffer_init(&buffer->bb, depmod->addr, depmod->len, false, + depmod_index_buffer_error); + buffer->depmod = depmod; +} + +static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) +{ + struct drgn_error *err; + struct depmod_index_buffer buffer; + depmod_index_buffer_init(&buffer, depmod); + uint32_t magic; + if ((err = binary_buffer_next_u32(&buffer.bb, &magic))) + return err; + if (magic != 0xb007f457) { + return binary_buffer_error(&buffer.bb, + "invalid magic 0x%" PRIx32, magic); + } + uint32_t version; + if ((err = binary_buffer_next_u32(&buffer.bb, &version))) + return err; + if (version != 0x00020001) { + return binary_buffer_error(&buffer.bb, + "unknown version 0x%" PRIx32, + version); + } + return NULL; +} + +static struct drgn_error *depmod_index_init(struct depmod_index *depmod, + const char *osrelease) +{ + struct drgn_error *err; + + snprintf(depmod->path, sizeof(depmod->path), + "/lib/modules/%s/modules.dep.bin", osrelease); + + int fd = open(depmod->path, O_RDONLY); + if (fd == -1) + return drgn_error_create_os("open", errno, depmod->path); + + struct stat st; + if (fstat(fd, &st) == -1) { + err = drgn_error_create_os("fstat", errno, depmod->path); + goto out; + } + + if (st.st_size < 0 || st.st_size > SIZE_MAX) { + err = &drgn_enomem; + goto out; + } + + void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { + err = drgn_error_create_os("mmap", errno, depmod->path); + goto out; + } + + depmod->addr = addr; + depmod->len = st.st_size; + + err = depmod_index_validate(depmod); + if (err) + depmod_index_deinit(depmod); +out: + close(fd); + return err; +} + +/* + * Look up the path of the kernel module with the given name. + * + * @param[in] name Name of the kernel module. + * @param[out] path_ret Returned path of the kernel module, relative to + * /lib/modules/$(uname -r). This is @em not null-terminated. @c NULL if not + * found. + * @param[out] len_ret Returned length of @p path_ret. + */ +static struct drgn_error *depmod_index_find(struct depmod_index *depmod, + const char *name, + const char **path_ret, + size_t *len_ret) +{ + static const uint32_t INDEX_NODE_MASK = UINT32_C(0x0fffffff); + static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); + static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); + static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); + + struct drgn_error *err; + struct depmod_index_buffer buffer; + depmod_index_buffer_init(&buffer, depmod); + + /* depmod_index_validate() already checked that this is within bounds. */ + buffer.bb.pos += 8; + uint32_t offset; + for (;;) { + if ((err = binary_buffer_next_u32(&buffer.bb, &offset))) + return err; + if ((offset & INDEX_NODE_MASK) > depmod->len) { + return binary_buffer_error(&buffer.bb, + "offset is out of bounds"); + } + buffer.bb.pos = (const char *)depmod->addr + (offset & INDEX_NODE_MASK); + + if (offset & INDEX_NODE_PREFIX) { + const char *prefix; + size_t prefix_len; + if ((err = binary_buffer_next_string(&buffer.bb, + &prefix, + &prefix_len))) + return err; + if (strncmp(name, prefix, prefix_len) != 0) + goto not_found; + name += prefix_len; + } + + if (offset & INDEX_NODE_CHILDS) { + uint8_t first, last; + if ((err = binary_buffer_next_u8(&buffer.bb, &first)) || + (err = binary_buffer_next_u8(&buffer.bb, &last))) + return err; + if (*name) { + uint8_t cur = *name; + if (cur < first || cur > last) + goto not_found; + if ((err = binary_buffer_skip(&buffer.bb, + 4 * (cur - first)))) + return err; + name++; + continue; + } else { + if ((err = binary_buffer_skip(&buffer.bb, + 4 * (last - first + 1)))) + return err; + break; + } + } else if (*name) { + goto not_found; + } else { + break; + } + } + if (!(offset & INDEX_NODE_VALUES)) + goto not_found; + + uint32_t value_count; + if ((err = binary_buffer_next_u32(&buffer.bb, &value_count))) + return err; + if (!value_count) + goto not_found; /* Or is this malformed? */ + + /* Skip over priority. */ + if ((err = binary_buffer_skip(&buffer.bb, 4))) + return err; + + const char *colon = memchr(buffer.bb.pos, ':', + buffer.bb.end - buffer.bb.pos); + if (!colon) { + return binary_buffer_error(&buffer.bb, + "expected string containing ':'"); + } + *path_ret = buffer.bb.pos; + *len_ret = colon - buffer.bb.pos; + return NULL; + +not_found: + *path_ret = NULL; + return NULL; +} + struct kernel_module_iterator { char *name; uint64_t start, end; @@ -978,219 +1191,6 @@ kernel_module_section_iterator_next(struct kernel_module_section_iterator *it, return NULL; } -/* - * /lib/modules/$(uname -r)/modules.dep.bin maps all installed kernel modules to - * their filesystem path (and dependencies, which we don't care about). It is - * generated by depmod; the format is a fairly simple serialized radix tree. - * - * modules.dep(5) contains a warning: "These files are not intended for editing - * or use by any additional utilities as their format is subject to change in - * the future." But, the format hasn't changed since 2009, and pulling in - * libkmod is overkill since we only need a very small subset of its - * functionality (plus our minimal parser is more efficient). If the format - * changes in the future, we can reevaluate this. - */ - -struct depmod_index { - void *addr; - size_t len; - char path[256]; -}; - -static void depmod_index_deinit(struct depmod_index *depmod) -{ - munmap(depmod->addr, depmod->len); -} - -struct depmod_index_buffer { - struct binary_buffer bb; - struct depmod_index *depmod; -}; - -static struct drgn_error *depmod_index_buffer_error(struct binary_buffer *bb, - const char *pos, - const char *message) -{ - struct depmod_index_buffer *buffer = - container_of(bb, struct depmod_index_buffer, bb); - return drgn_error_format(DRGN_ERROR_OTHER, "%s: %#tx: %s", - buffer->depmod->path, - pos - (const char *)buffer->depmod->addr, - message); -} - -static void depmod_index_buffer_init(struct depmod_index_buffer *buffer, - struct depmod_index *depmod) -{ - binary_buffer_init(&buffer->bb, depmod->addr, depmod->len, false, - depmod_index_buffer_error); - buffer->depmod = depmod; -} - -static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) -{ - struct drgn_error *err; - struct depmod_index_buffer buffer; - depmod_index_buffer_init(&buffer, depmod); - uint32_t magic; - if ((err = binary_buffer_next_u32(&buffer.bb, &magic))) - return err; - if (magic != 0xb007f457) { - return binary_buffer_error(&buffer.bb, - "invalid magic 0x%" PRIx32, magic); - } - uint32_t version; - if ((err = binary_buffer_next_u32(&buffer.bb, &version))) - return err; - if (version != 0x00020001) { - return binary_buffer_error(&buffer.bb, - "unknown version 0x%" PRIx32, - version); - } - return NULL; -} - -static struct drgn_error *depmod_index_init(struct depmod_index *depmod, - const char *osrelease) -{ - struct drgn_error *err; - - snprintf(depmod->path, sizeof(depmod->path), - "/lib/modules/%s/modules.dep.bin", osrelease); - - int fd = open(depmod->path, O_RDONLY); - if (fd == -1) - return drgn_error_create_os("open", errno, depmod->path); - - struct stat st; - if (fstat(fd, &st) == -1) { - err = drgn_error_create_os("fstat", errno, depmod->path); - goto out; - } - - if (st.st_size < 0 || st.st_size > SIZE_MAX) { - err = &drgn_enomem; - goto out; - } - - void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) { - err = drgn_error_create_os("mmap", errno, depmod->path); - goto out; - } - - depmod->addr = addr; - depmod->len = st.st_size; - - err = depmod_index_validate(depmod); - if (err) - depmod_index_deinit(depmod); -out: - close(fd); - return err; -} - -/* - * Look up the path of the kernel module with the given name. - * - * @param[in] name Name of the kernel module. - * @param[out] path_ret Returned path of the kernel module, relative to - * /lib/modules/$(uname -r). This is @em not null-terminated. @c NULL if not - * found. - * @param[out] len_ret Returned length of @p path_ret. - */ -static struct drgn_error *depmod_index_find(struct depmod_index *depmod, - const char *name, - const char **path_ret, - size_t *len_ret) -{ - static const uint32_t INDEX_NODE_MASK = UINT32_C(0x0fffffff); - static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); - static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); - static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); - - struct drgn_error *err; - struct depmod_index_buffer buffer; - depmod_index_buffer_init(&buffer, depmod); - - /* depmod_index_validate() already checked that this is within bounds. */ - buffer.bb.pos += 8; - uint32_t offset; - for (;;) { - if ((err = binary_buffer_next_u32(&buffer.bb, &offset))) - return err; - if ((offset & INDEX_NODE_MASK) > depmod->len) { - return binary_buffer_error(&buffer.bb, - "offset is out of bounds"); - } - buffer.bb.pos = (const char *)depmod->addr + (offset & INDEX_NODE_MASK); - - if (offset & INDEX_NODE_PREFIX) { - const char *prefix; - size_t prefix_len; - if ((err = binary_buffer_next_string(&buffer.bb, - &prefix, - &prefix_len))) - return err; - if (strncmp(name, prefix, prefix_len) != 0) - goto not_found; - name += prefix_len; - } - - if (offset & INDEX_NODE_CHILDS) { - uint8_t first, last; - if ((err = binary_buffer_next_u8(&buffer.bb, &first)) || - (err = binary_buffer_next_u8(&buffer.bb, &last))) - return err; - if (*name) { - uint8_t cur = *name; - if (cur < first || cur > last) - goto not_found; - if ((err = binary_buffer_skip(&buffer.bb, - 4 * (cur - first)))) - return err; - name++; - continue; - } else { - if ((err = binary_buffer_skip(&buffer.bb, - 4 * (last - first + 1)))) - return err; - break; - } - } else if (*name) { - goto not_found; - } else { - break; - } - } - if (!(offset & INDEX_NODE_VALUES)) - goto not_found; - - uint32_t value_count; - if ((err = binary_buffer_next_u32(&buffer.bb, &value_count))) - return err; - if (!value_count) - goto not_found; /* Or is this malformed? */ - - /* Skip over priority. */ - if ((err = binary_buffer_skip(&buffer.bb, 4))) - return err; - - const char *colon = memchr(buffer.bb.pos, ':', - buffer.bb.end - buffer.bb.pos); - if (!colon) { - return binary_buffer_error(&buffer.bb, - "expected string containing ':'"); - } - *path_ret = buffer.bb.pos; - *len_ret = colon - buffer.bb.pos; - return NULL; - -not_found: - *path_ret = NULL; - return NULL; -} - /* * Identify an ELF file as a kernel module, vmlinux, or neither. We classify a * file as a kernel module if it has a section named .gnu.linkonce.this_module. From e215bd6c88a23760206ad0d41920fa91c2673180 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:55:12 -0800 Subject: [PATCH 009/166] libdrgn: debug_info: add debuginfod client scaffolding We currently use debuginfod via libdwfl, but when we get rid of our libdwfl dependency, we'll need to do the debuginfod calls ourselves. So, let's add the scaffolding for using libdebuginfod. We provide three choices at build time: * No debuginfod (./configure --without-debuginfod). * Soft dependency: load libdebuginfod with dlopen at runtime if available (./configure --with-debuginfod --enable-dlopen-debuginfod). This is the default and probably what we want distros to use. * Hard dependency: link against libdebuginfod (./configure --with-debuginfod --disable-dlopen-debuginfod). This is intended for environments where dlopen can't be used (e.g., manylinux wheels). The client handle will be created lazily, so for now this just sets up some wrappers and doesn't do much with them. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 9 ++++++ libdrgn/configure.ac | 23 ++++++++++++++ libdrgn/debug_info.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ libdrgn/debug_info.h | 7 +++++ 4 files changed, 112 insertions(+) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 394ed668d..e2899fd09 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -133,6 +133,15 @@ libdrgnimpl_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ libdrgnimpl_la_CPPFLAGS = $(AM_CPPFLAGS) -iquote . libdrgnimpl_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) -lm +if WITH_DEBUGINFOD +if ENABLE_DLOPEN_DEBUGINFOD +libdrgnimpl_la_LIBADD += -ldl +else +libdrgnimpl_la_CFLAGS += $(libdebuginfod_CFLAGS) +libdrgnimpl_la_LIBADD += $(libdebuginfod_LIBS) +endif +endif + if WITH_LIBKDUMPFILE libdrgnimpl_la_SOURCES += kdump.c libdrgnimpl_la_CFLAGS += $(libkdumpfile_CFLAGS) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 67bd2b71b..1a523a794 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -81,6 +81,29 @@ PKG_PROG_PKG_CONFIG PKG_CHECK_MODULES(elfutils, [libelf >= 0.165 libdw >= 0.165]) +AC_ARG_WITH([debuginfod], + [AS_HELP_STRING([--with-debuginfod], + [build with support for debuginfod + @<:@default=auto@:>@])], + [], [with_debuginfod=auto]) +AS_CASE(["x$with_debuginfod"], + [xyes], [PKG_CHECK_MODULES(libdebuginfod, [libdebuginfod])], + [xauto], [PKG_CHECK_MODULES(libdebuginfod, [libdebuginfod], + [with_debuginfod=yes], + [with_debuginfod=no])]) +AM_CONDITIONAL([WITH_DEBUGINFOD], [test "x$with_debuginfod" != xno]) +AM_COND_IF([WITH_DEBUGINFOD], [AC_DEFINE(WITH_DEBUGINFOD)]) + +AC_ARG_ENABLE([dlopen-debuginfod], + [AS_HELP_STRING([--disable-dlopen-debuginfod], + [if building with support for debuginfod, link + against libdebuginfod instead of loading it with + dlopen(3)])], + [], [enable_dlopen_debuginfod=yes]) +AM_CONDITIONAL([ENABLE_DLOPEN_DEBUGINFOD], + [test "x$enable_dlopen_debuginfod" != xno]) +AM_COND_IF([ENABLE_DLOPEN_DEBUGINFOD], [AC_DEFINE(ENABLE_DLOPEN_DEBUGINFOD)]) + AC_ARG_WITH([libkdumpfile], [AS_HELP_STRING([--with-libkdumpfile], [build with support for the makedumpfile kernel diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 85040eddd..7fee6286a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -28,6 +28,72 @@ #include "program.h" #include "util.h" +#if WITH_DEBUGINFOD +#if _ELFUTILS_PREREQ(0, 179) +#define DRGN_DEBUGINFOD_0_179_FUNCTIONS \ + X(debuginfod_set_user_data) \ + X(debuginfod_get_user_data) \ + X(debuginfod_get_url) +#else +#define DRGN_DEBUGINFOD_0_179_FUNCTIONS +#endif + +#define DRGN_DEBUGINFOD_FUNCTIONS \ + X(debuginfod_begin) \ + X(debuginfod_end) \ + X(debuginfod_find_debuginfo) \ + X(debuginfod_find_executable) \ + X(debuginfod_set_progressfn) \ + DRGN_DEBUGINFOD_0_179_FUNCTIONS + +#if ENABLE_DLOPEN_DEBUGINFOD +#include + +#define X(name) static typeof(&name) drgn_##name; +DRGN_DEBUGINFOD_FUNCTIONS +#undef X + +__attribute__((__constructor__)) +static void drgn_dlopen_debuginfod(void) +{ + void *handle = dlopen(DEBUGINFOD_SONAME, RTLD_LAZY); + if (handle) { + #define X(name) drgn_##name = dlsym(handle, #name); + DRGN_DEBUGINFOD_FUNCTIONS + #undef X + + #define X(name) || !drgn_##name + if (0 DRGN_DEBUGINFOD_FUNCTIONS) { + #undef X + #define X(name) drgn_##name = NULL; + DRGN_DEBUGINFOD_FUNCTIONS + #undef X + dlclose(handle); + } + } +} + +static inline bool drgn_have_debuginfod(void) +{ + return drgn_debuginfod_begin != NULL; +} +#else +// GCC and Clang optimize out the function pointer. +#define X(name) __attribute__((__unused__)) \ + static const typeof(&name) drgn_##name = name; +DRGN_DEBUGINFOD_FUNCTIONS +#undef X + +static inline bool drgn_have_debuginfod(void) +{ + return true; +} +#endif + +#undef DRGN_DEBUGINFOD_FUNCTIONS +#undef DRGN_DEBUGINFOD_0_179_FUNCTIONS +#endif + static inline Dwarf *drgn_elf_file_dwarf_key(struct drgn_elf_file * const *entry) { return (*entry)->dwarf; @@ -2149,6 +2215,9 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_register_symbol_finder_impl(prog, &dbinfo->symbol_finder, "elf", &symbol_finder_ops, prog, 0); +#if WITH_DEBUGINFOD + dbinfo->debuginfod_client = NULL; +#endif drgn_module_table_init(&dbinfo->modules); c_string_set_init(&dbinfo->module_names); drgn_dwarf_info_init(dbinfo); @@ -2161,6 +2230,10 @@ void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) drgn_debug_info_free_modules(dbinfo, false, true); assert(drgn_module_table_empty(&dbinfo->modules)); drgn_module_table_deinit(&dbinfo->modules); +#if WITH_DEBUGINFOD + if (dbinfo->debuginfod_client) + drgn_debuginfod_end(dbinfo->debuginfod_client); +#endif dwfl_end(dbinfo->dwfl); } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 6d69a13dd..614b4233b 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,6 +12,9 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H +#if WITH_DEBUGINFOD +#include +#endif #include #include #include @@ -142,6 +145,10 @@ struct drgn_debug_info { /** DWARF frontend library handle. */ Dwfl *dwfl; +#if WITH_DEBUGINFOD + /** debuginfod-client session. */ + debuginfod_client *debuginfod_client; +#endif /** Modules keyed by build ID and address range. */ struct drgn_module_table modules; /** From 541d5e71ff43725238aa3825286529d4a001e1a9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 12:56:21 -0800 Subject: [PATCH 010/166] libdrgn: add API for choosing file to write progress bars to We try to pick a good default, but it's not exactly the same as logging so it needs extra flexibility. This will be used by upcoming debuginfod integration. Signed-off-by: Omar Sandoval --- docs/api_reference.rst | 4 + libdrgn/drgn.h | 13 ++++ libdrgn/log.c | 36 +++++++++ libdrgn/log.h | 3 + libdrgn/program.c | 1 + libdrgn/program.h | 2 + libdrgn/python/drgnpy.h | 7 ++ libdrgn/python/program.c | 158 ++++++++++++++++++++++++++++++++------- 8 files changed, 196 insertions(+), 28 deletions(-) diff --git a/docs/api_reference.rst b/docs/api_reference.rst index b3c4d7b22..c67f82a4a 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -186,6 +186,10 @@ Logging drgn logs using the standard :mod:`logging` module to a logger named ``"drgn"``. +drgn will also display progress bars on standard error if standard error is a +terminal, the ``"drgn"`` logger has a :class:`~logging.StreamHandler` for +``stderr``, and its log level is less than or equal to ``WARNING``. + Thread Safety ------------- diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index f6438e913..ecd69bcfc 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1211,6 +1211,12 @@ struct drgn_error *drgn_program_element_info(struct drgn_program *prog, * By default, the log file is set to `stderr` and the log level is @ref * DRGN_LOG_NONE, so logging is disabled. * + * Additionally, drgn can display a progress bar for some operations, like + * downloading debugging information. By default, progress bars are displayed on + * standard error if standard error is a terminal, the log file is set to + * `stderr`, and the log level is less than or equal to @ref DRGN_LOG_WARNING, + * but this can be changed (@ref drgn_program_set_progress_file()). + * * @{ */ @@ -1282,6 +1288,13 @@ void drgn_program_get_log_callback(struct drgn_program *prog, drgn_log_fn **callback_ret, void **callback_arg_ret); +/** + * Write progress bars to the given file. + * + * @param[in] file File, or @c NULL to disable progress bars. + */ +void drgn_program_set_progress_file(struct drgn_program *prog, FILE *file); + /** @} */ /** diff --git a/libdrgn/log.c b/libdrgn/log.c index e43fc5314..f86b7c035 100644 --- a/libdrgn/log.c +++ b/libdrgn/log.c @@ -3,6 +3,7 @@ #include #include +#include #include "log.h" #include "program.h" @@ -81,3 +82,38 @@ void drgn_error_log(enum drgn_log_level level, struct drgn_program *prog, prog->log_fn(prog, prog->log_arg, level, format, ap, err); va_end(ap); } + +LIBDRGN_PUBLIC void drgn_program_set_progress_file(struct drgn_program *prog, + FILE *file) +{ + prog->progress_file = file; + prog->default_progress_file = false; +} + +FILE *drgn_program_get_progress_file(struct drgn_program *prog, + int *columns_ret) +{ + *columns_ret = -1; + + if (!prog->default_progress_file) { + if (prog->progress_file) { + int fd = fileno(prog->progress_file); + struct winsize winsize; + if (fd >= 0 && ioctl(fd, TIOCGWINSZ, &winsize) == 0) + *columns_ret = winsize.ws_col; + } + return prog->progress_file; + } + + if (drgn_log_is_enabled(prog, DRGN_LOG_WARNING) + && prog->log_fn == drgn_file_log_fn && prog->log_arg == stderr) { + int fd = fileno(stderr); + struct winsize winsize; + if (fd >= 0 && ioctl(fd, TIOCGWINSZ, &winsize) == 0) { + *columns_ret = winsize.ws_col; + return stderr; + } + } + + return NULL; +} diff --git a/libdrgn/log.h b/libdrgn/log.h index 23eaa5063..77cd54c06 100644 --- a/libdrgn/log.h +++ b/libdrgn/log.h @@ -86,4 +86,7 @@ void drgn_error_log(enum drgn_log_level level, struct drgn_program *prog, * @} */ +FILE *drgn_program_get_progress_file(struct drgn_program *prog, + int *columns_ret); + #endif /* DRGN_LOG_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index ab7783196..0f3e4db1b 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -108,6 +108,7 @@ void drgn_program_init(struct drgn_program *prog, drgn_thread_set_init(&prog->thread_set); drgn_program_set_log_level(prog, DRGN_LOG_NONE); drgn_program_set_log_file(prog, stderr); + prog->default_progress_file = true; drgn_object_init(&prog->vmemmap, prog); } diff --git a/libdrgn/program.h b/libdrgn/program.h index 095fd55a9..7092db913 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -244,7 +244,9 @@ struct drgn_program { */ drgn_log_fn *log_fn; void *log_arg; + FILE *progress_file; enum drgn_log_level log_level; + bool default_progress_file; /* * Blocking callbacks. diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 24ac5347c..97ace8b76 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -40,6 +40,13 @@ } while (0) #endif +#if PY_VERSION_HEX < 0x030900a1 +static inline PyObject *PyObject_CallNoArgs(PyObject *func) +{ + return PyObject_CallFunctionObjArgs(func, NULL); +} +#endif + #if PY_VERSION_HEX < 0x030d00a1 #define PyThreadState_GetUnchecked _PyThreadState_UncheckedGet #endif diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 154b0a022..eea999cbf 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -16,6 +16,7 @@ DEFINE_HASH_SET_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq); static PyObject *percent_s; +static PyObject *logging_StreamHandler; static PyObject *logger; static PyObject *logger_log; @@ -40,7 +41,7 @@ static void drgnpy_log_fn(struct drgn_program *prog, void *arg, PyErr_WriteUnraisable(logger_log); } -static int get_log_level(void) +static int get_logging_status(int *log_level_ret, bool *enable_progress_bar_ret) { // We don't use getEffectiveLevel() because that doesn't take // logging.disable() into account. @@ -57,38 +58,128 @@ static int get_log_level(void) if (ret) break; } - return level; + + *log_level_ret = level; + + if (level > DRGN_LOG_WARNING || !isatty(STDERR_FILENO)) { + *enable_progress_bar_ret = false; + return 0; + } + + PyObject *current_logger = logger; + _cleanup_pydecref_ PyObject *logger_to_decref = NULL; + do { + _cleanup_pydecref_ PyObject *handlers = + PyObject_GetAttrString(current_logger, "handlers"); + if (!handlers) + return -1; + + Py_ssize_t size = PySequence_Size(handlers); + if (size < 0) + return -1; + + for (Py_ssize_t i = 0; i < size; i++) { + _cleanup_pydecref_ PyObject *handler = + PySequence_GetItem(handlers, i); + if (!handler) + return -1; + + int r = PyObject_IsInstance(handler, + logging_StreamHandler); + if (r < 0) + return -1; + if (!r) + continue; + + _cleanup_pydecref_ PyObject *stream = + PyObject_GetAttrString(handler, "stream"); + if (!stream) + return -1; + + _cleanup_pydecref_ PyObject *fd_obj = + PyObject_CallMethod(stream, "fileno", NULL); + if (!fd_obj) { + // Ignore AttributeError, + // io.UnsupportedOperation, etc. + if (PyErr_ExceptionMatches(PyExc_Exception)) { + PyErr_Clear(); + continue; + } else { + return -1; + } + } + + long fd = PyLong_AsLong(fd_obj); + if (fd == -1 && PyErr_Occurred()) + return -1; + + if (fd == STDERR_FILENO) { + *enable_progress_bar_ret = true; + return 0; + } + } + + _cleanup_pydecref_ PyObject *propagate = + PyObject_GetAttrString(current_logger, "propagate"); + if (!propagate) + return -1; + int ret = PyObject_IsTrue(propagate); + if (ret < 0) + return -1; + if (!ret) + break; + + Py_XDECREF(logger_to_decref); + logger_to_decref = PyObject_GetAttrString(current_logger, + "parent"); + if (!logger_to_decref) + return -1; + current_logger = logger_to_decref; + } while (current_logger != Py_None); + + *enable_progress_bar_ret = false; + return 0; } -// This is slightly heinous. We need to sync the Python log level with the -// libdrgn log level, but the Python log level can change at any time, and there -// is no API to be notified of this. So, we monkey patch logger._cache.clear() -// to update the log level on every live program. This only works since CPython -// commit 78c18a9b9a14 ("bpo-30962: Added caching to Logger.isEnabledFor() -// (GH-2752)") (in v3.7), though. Before that, the best we can do is sync the -// level at the time that the program is created. +// This is slightly heinous. We need to sync the Python logging configuration +// with libdrgn, but the Python log level and handlers can change at any time, +// and there are no APIs to be notified of this. +// +// To sync the log level, we monkey patch logger._cache.clear() to update the +// libdrgn log level on every live program. This only works since CPython commit +// 78c18a9b9a14 ("bpo-30962: Added caching to Logger.isEnabledFor() (GH-2752)") +// (in v3.7), though. Before that, the best we can do is sync the level at the +// time that the program is created. +// +// We also check handlers in that monkey patch, which isn't the right place to +// hook but should work in practice in most cases. #if PY_VERSION_HEX >= 0x030700a1 static int cached_log_level; +static bool cached_enable_progress_bar; static struct pyobjectp_set programs = HASH_TABLE_INIT; -static int cache_log_level(void) +static int cache_logging_status(void) { - int level = get_log_level(); - if (level < 0) - return level; - cached_log_level = level; - return 0; + return get_logging_status(&cached_log_level, + &cached_enable_progress_bar); } static PyObject *LoggerCacheWrapper_clear(PyObject *self) { PyDict_Clear(self); - if (cache_log_level()) - return NULL; - for (struct pyobjectp_set_iterator it = pyobjectp_set_first(&programs); - it.entry; it = pyobjectp_set_next(it)) { - Program *prog = (Program *)*it.entry; - drgn_program_set_log_level(&prog->prog, cached_log_level); + if (!pyobjectp_set_empty(&programs)) { + if (cache_logging_status()) + return NULL; + for (struct pyobjectp_set_iterator it = + pyobjectp_set_first(&programs); + it.entry; it = pyobjectp_set_next(it)) { + Program *prog = (Program *)*it.entry; + drgn_program_set_log_level(&prog->prog, + cached_log_level); + drgn_program_set_progress_file(&prog->prog, + cached_enable_progress_bar + ? stderr : NULL); + } } Py_RETURN_NONE; } @@ -114,19 +205,23 @@ static int init_logger_cache_wrapper(void) NULL); if (!cache_wrapper) return -1; - if (PyObject_SetAttrString(logger, "_cache", cache_wrapper)) - return -1; - - return cache_log_level(); + return PyObject_SetAttrString(logger, "_cache", cache_wrapper); } static int Program_init_logging(Program *prog) { + // The cache is only maintained while there are live programs, so if + // this is the only program, we need to update the cache. + if (pyobjectp_set_empty(&programs) && cache_logging_status()) + return -1; + PyObject *obj = (PyObject *)prog; if (pyobjectp_set_insert(&programs, &obj, NULL) < 0) return -1; drgn_program_set_log_callback(&prog->prog, drgnpy_log_fn, NULL); drgn_program_set_log_level(&prog->prog, cached_log_level); + drgn_program_set_progress_file(&prog->prog, + cached_enable_progress_bar ? stderr : NULL); return 0; } @@ -140,11 +235,14 @@ static int init_logger_cache_wrapper(void) { return 0; } static int Program_init_logging(Program *prog) { - int level = get_log_level(); - if (level < 0) - return level; + int level; + bool enable_progress_bar; + if (get_logging_status(&level, &enable_progress_bar)) + return -1; drgn_program_set_log_callback(&prog->prog, drgnpy_log_fn, NULL); drgn_program_set_log_level(&prog->prog, level); + drgn_program_set_progress_file(&prog->prog, + enable_progress_bar ? stderr : NULL); return 0; } @@ -160,6 +258,10 @@ int init_logging(void) _cleanup_pydecref_ PyObject *logging = PyImport_ImportModule("logging"); if (!logging) return -1; + logging_StreamHandler = PyObject_GetAttrString(logging, + "StreamHandler"); + if (!logging_StreamHandler) + return -1; logger = PyObject_CallMethod(logging, "getLogger", "s", "drgn"); if (!logger) return -1; From 4e83130008e9c28c3d775a0ab40075f2f9123720 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Dec 2024 13:18:36 -0800 Subject: [PATCH 011/166] Introduce module and debug info finder APIs drgn currently provides limited control over how debugging information is found. drgn has hardcoded logic for where to search for debugging information. The most the user can do is provide a list of files for drgn to try in addition to the default locations (with the -s CLI option or the drgn.Program.load_debug_info() method). The implementation is also a mess. We use libdwfl, but its data model is slightly different from what we want, so we have to work around it or reimplement its functionality in several places: see commits e5874ad18a53 ("libdrgn: use libdwfl"), e6abfeac0329 ("libdrgn: debug_info: report userspace core dump debug info ourselves"), and 1d4854a5bce7 ("libdrgn: implement optimized x86-64 ELF relocations") for some examples. The mismatched combination of libdwfl and our own code is difficult to maintain, and the lack of control over the whole debug info pipeline has made it difficult to fix several longstanding issues. The solution is a major rework removing our libdwfl dependency and replacing it with our own model. This (huge) commit is that rework comprising the following components: - drgn.Module/struct drgn_module, a representation of a binary used by a program. - Automatic discovery of the modules loaded in a program. - Interfaces for manually creating and overriding modules. - Automatic discovery of debugging information from the standard locations and debuginfod. - Interfaces for custom debug info finders and for manually overriding debugging information. - Tons of test cases. A lot of care was taken to make these interfaces extremely flexible yet cohesive. The existing interfaces are also reimplemented on top of the new functionality to maintain backwards compatibility, with one exception: drgn.Program.load_debug_info()/-s would previously accept files that it didn't find loaded in the program. This turned out to be a big footgun for users, so now this must be done explicitly (with drgn.ExtraModule/--extra-symbols). The API and implementation both owe a lot to libdwfl: - The concepts of modules, module address ranges/section addresses, and file biases are heavily inspired by the libdwfl interfaces. - Ideas for determining modules in userspace processes and core dumps were taken from libdwfl. - Our implementation of ELF symbol table address lookups is based on dwfl_module_addrinfo(). drgn has taken these concepts and fine-tuned them based on lessons learned. Credit is also due to Stephen Brennan for early testing and feedback. Closes #16, closes #25, closes #332. Signed-off-by: Omar Sandoval --- _drgn.pyi | 735 +- docs/advanced_usage.rst | 139 +- docs/api_reference.rst | 40 +- docs/user_guide.rst | 41 + drgn/__init__.py | 18 + drgn/cli.py | 53 +- libdrgn/Makefile.am | 4 + libdrgn/build-aux/gen_constants.py | 6 + libdrgn/build-aux/gen_elf_sections.py | 8 +- libdrgn/cleanup.h | 10 + libdrgn/debug_info.c | 6970 ++++++++++++----- libdrgn/debug_info.h | 398 +- libdrgn/drgn.h | 553 +- libdrgn/dwarf_info.c | 313 +- libdrgn/dwarf_info.h | 34 +- libdrgn/elf_file.c | 708 +- libdrgn/elf_file.h | 110 +- libdrgn/elf_symtab.c | 450 ++ libdrgn/elf_symtab.h | 55 + libdrgn/error.c | 17 - libdrgn/error.h | 19 +- libdrgn/examples/load_debug_info.c | 3 + libdrgn/handler.h | 5 + libdrgn/linux_kernel.c | 1743 ++--- libdrgn/linux_kernel.h | 16 +- libdrgn/orc_info.c | 4 + libdrgn/program.c | 152 +- libdrgn/program.h | 16 + libdrgn/python/drgnpy.h | 49 +- libdrgn/python/main.c | 11 + libdrgn/python/module.c | 593 ++ libdrgn/python/module_section_addresses.c | 260 + libdrgn/python/program.c | 399 + libdrgn/python/util.c | 20 + libdrgn/register_state.c | 9 +- libdrgn/symbol.c | 71 +- libdrgn/symbol.h | 14 +- libdrgn/util.h | 2 + scripts/crashme/Makefile | 65 + scripts/crashme/common.c | 10 + scripts/crashme/crashme.c | 25 + scripts/crashme/crashme.h | 15 + scripts/crashme/main.c | 10 + tests/linux_kernel/test_debug_info.py | 116 +- tests/resources/crashme.alt.zst | Bin 0 -> 409 bytes tests/resources/crashme.core.zst | Bin 0 -> 18351 bytes tests/resources/crashme.dwz.zst | Bin 0 -> 2716 bytes tests/resources/crashme.so.dwz.zst | Bin 0 -> 2482 bytes tests/resources/crashme.so.zst | Bin 0 -> 2561 bytes tests/resources/crashme.zst | Bin 0 -> 2727 bytes tests/resources/crashme_pie.core.zst | Bin 0 -> 18430 bytes tests/resources/crashme_pie.zst | Bin 0 -> 2803 bytes .../resources/crashme_pie_no_headers.core.zst | Bin 0 -> 14787 bytes tests/resources/crashme_static.core.zst | Bin 0 -> 5234 bytes tests/resources/crashme_static.zst | Bin 0 -> 5299 bytes tests/resources/crashme_static_pie.core.zst | Bin 0 -> 5482 bytes tests/resources/crashme_static_pie.zst | Bin 0 -> 8036 bytes tests/test_debug_info.py | 2671 +++++++ tests/test_dwarf.py | 38 +- tests/test_module.py | 489 ++ tests/test_symbol.py | 228 +- 61 files changed, 14074 insertions(+), 3641 deletions(-) create mode 100644 libdrgn/elf_symtab.c create mode 100644 libdrgn/elf_symtab.h create mode 100644 libdrgn/python/module.c create mode 100644 libdrgn/python/module_section_addresses.c create mode 100644 scripts/crashme/Makefile create mode 100644 scripts/crashme/common.c create mode 100644 scripts/crashme/crashme.c create mode 100644 scripts/crashme/crashme.h create mode 100644 scripts/crashme/main.c create mode 100644 tests/resources/crashme.alt.zst create mode 100644 tests/resources/crashme.core.zst create mode 100755 tests/resources/crashme.dwz.zst create mode 100755 tests/resources/crashme.so.dwz.zst create mode 100755 tests/resources/crashme.so.zst create mode 100755 tests/resources/crashme.zst create mode 100644 tests/resources/crashme_pie.core.zst create mode 100755 tests/resources/crashme_pie.zst create mode 100644 tests/resources/crashme_pie_no_headers.core.zst create mode 100644 tests/resources/crashme_static.core.zst create mode 100755 tests/resources/crashme_static.zst create mode 100644 tests/resources/crashme_static_pie.core.zst create mode 100755 tests/resources/crashme_static_pie.zst create mode 100644 tests/test_debug_info.py create mode 100644 tests/test_module.py diff --git a/_drgn.pyi b/_drgn.pyi index 9fcd8e75d..d7bbb9e95 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -20,6 +20,8 @@ from typing import ( Iterator, List, Mapping, + MutableMapping, + NamedTuple, Optional, Sequence, Set, @@ -29,9 +31,9 @@ from typing import ( ) if sys.version_info < (3, 8): - from typing_extensions import Final, Protocol + from typing_extensions import Final, Literal, Protocol else: - from typing import Final, Protocol + from typing import Final, Literal, Protocol if sys.version_info < (3, 10): from typing_extensions import TypeAlias @@ -691,34 +693,343 @@ class Program: """ ... + def modules(self) -> Iterator[Module]: + """Get an iterator over all of the created modules in the program.""" + + def loaded_modules(self) -> Iterator[Tuple[Module, bool]]: + """ + Determine what executables, libraries, etc. are loaded in the program + and create modules to represent them. + + This may automatically load some debugging information necessary to + enumerate the modules. Other than that, it does not load debugging + information. + + See :meth:`load_debug_info()` for a higher-level interface that does + load debugging information. + + :return: Iterator of module and ``True`` if it was newly created + or ``False`` if it was previously found. + """ + ... + + @overload + def main_module( + self, name: Optional[Path] = None, *, create: Literal[False] = False + ) -> MainModule: + """ + Find the main module. + + :param name: :attr:`Module.name`, or ``None`` to match any name + :raises LookupError: if main module has not been created or its name + doesn't match + """ + ... + + @overload + def main_module( + self, name: Path, *, create: Literal[True] + ) -> Tuple[MainModule, bool]: + """ + Find or create the main module. + + :param name: :attr:`Module.name` + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + :raises LookupError: if main module was already created with a + different name + """ + ... + + @overload + def shared_library_module( + self, + name: Path, + dynamic_address: IntegerLike, + *, + create: Literal[False] = False, + ) -> SharedLibraryModule: + """ + Find a shared library module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` + :return: Shared library module with the given name and dynamic address. + :raises LookupError: if no matching module has been created + """ + ... + + @overload + def shared_library_module( + self, name: Path, dynamic_address: IntegerLike, *, create: Literal[True] + ) -> Tuple[SharedLibraryModule, bool]: + """ + Find or create a shared library module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + """ + ... + + @overload + def vdso_module( + self, + name: Path, + dynamic_address: IntegerLike, + *, + create: Literal[False] = False, + ) -> VdsoModule: + """ + Find a vDSO module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`VdsoModule.dynamic_address` + :return: vDSO module with the given name and dynamic address. + :raises LookupError: if no matching module has been created + """ + ... + + @overload + def vdso_module( + self, name: Path, dynamic_address: IntegerLike, *, create: Literal[True] + ) -> Tuple[VdsoModule, bool]: + """ + Find or create a vDSO module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`VdsoModule.dynamic_address` + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + """ + ... + + @overload + def relocatable_module( + self, name: Path, address: IntegerLike, *, create: Literal[False] = False + ) -> RelocatableModule: + """ + Find a relocatable module. + + :param name: :attr:`Module.name` + :param address: :attr:`RelocatableModule.address` + :return: Relocatable module with the given name and address. + :raises LookupError: if no matching module has been created + """ + ... + + @overload + def relocatable_module( + self, name: Path, address: IntegerLike, *, create: Literal[True] + ) -> Tuple[RelocatableModule, bool]: + """ + Find or create a relocatable module. + + :param name: :attr:`Module.name` + :param address: :attr:`RelocatableModule.address` + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + """ + ... + + @overload + def linux_kernel_loadable_module( + self, module_obj: Object, *, create: Literal[False] = False + ) -> RelocatableModule: + """ + Find a Linux kernel loadable module from a ``struct module`` object. + + Note that kernel modules are represented as relocatable modules. + + :param module_obj: ``struct module`` or ``struct module *`` object for + the kernel module. + :return: Relocatable module with a name and address matching + *module_obj*. + :raises LookupError: if no matching module has been created + """ + ... + + @overload + def linux_kernel_loadable_module( + self, module_obj: Object, *, create: Literal[True] + ) -> Tuple[RelocatableModule, bool]: + """ + Find or create a Linux kernel loadable module from a ``struct module`` + object. + + If a new module is created, its :attr:`~Module.address_range` and + :attr:`~RelocatableModule.section_addresses` are set from *module_obj*. + + :param module_obj: ``struct module`` or ``struct module *`` object for + the kernel module. + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + """ + ... + + @overload + def extra_module( + self, name: Path, id: IntegerLike = 0, *, create: Literal[False] = False + ) -> ExtraModule: + """ + Find an extra module. + + :param name: :attr:`Module.name` + :param id: :attr:`ExtraModule.id` + :return: Extra module with the given name and ID number. + :raises LookupError: if no matching module has been created + """ + ... + + @overload + def extra_module( + self, name: Path, id: IntegerLike = 0, *, create: Literal[True] + ) -> Tuple[ExtraModule, bool]: + """ + Find or create an extra module. + + :param name: :attr:`Module.name` + :param id: :attr:`ExtraModule.id` + :return: Module and ``True`` if it was newly created or ``False`` if it + was found. + """ + ... + + def module(self, __address: IntegerLike) -> Module: + """ + Find the module containing the given address. + + Addresses are matched based on :attr:`Module.address_range`. + + :param address: Address to search for. + :raises LookupError: if no module contains the given address + """ + ... + + def register_debug_info_finder( + self, + name: str, + fn: Callable[[Sequence[Module]], None], + *, + enable_index: Optional[int] = None, + ) -> None: + """ + Register a callback for finding debugging information. + + This does not enable the finder unless *enable_index* is given. + + :param name: Finder name. + :param fn: Callable taking a list of :class:`Module`\\ s that want + debugging information. + + This should check :meth:`Module.wants_loaded_file()` and + :meth:`Module.wants_debug_file()` and do one of the following for + each module: + + * Obtain and/or locate a file wanted by the module and call + :meth:`Module.try_file()`. + * Install files for a later finder to use. + * Set :attr:`Module.loaded_file_status` or + :attr:`Module.debug_file_status` to + :attr:`ModuleFileStatus.DONT_NEED` if the finder believes that + the file is not needed. + * Ignore it, for example if the finder doesn't know how to find the + wanted files for the module. + :param enable_index: Insert the finder into the list of enabled object + finders at the given index. If -1 or greater than the number of + enabled finders, insert it at the end. If ``None`` or not given, + don't enable the finder. + :raises ValueError: if there is already a finder with the given name + """ + ... + + def registered_debug_info_finders(self) -> Set[str]: + """Return the names of all registered debugging information finders.""" + ... + + def set_enabled_debug_info_finders(self, names: Sequence[str]) -> None: + """ + Set the list of enabled debugging information finders. + + Finders are called in the same order as the list until all wanted files + have been found. + + Finders that are not in the list are not called. + + :param names: Names of finders to enable, in order. + :raises ValueError: if no finder has a given name or the same name is + given more than once + """ + ... + + def enabled_debug_info_finders(self) -> List[str]: + """ + Return the names of enabled debugging information finders, in order. + """ + ... + debug_info_path: Optional[str] + """ + Directories to search for debugging information files. + + The standard debugging information finder supports searching for files by + *build ID* (a unique byte string present in both the :ref:`loaded file + ` and the :ref:`debug file `) and by + *debug link* (a name and checksum in the loaded file that refers to the + debug file). + + This setting controls what directories the standard debugging information + finder searches. It is a sequence of paths separated by colons (``:``). + + Searches by build ID ignore relative paths. They check under each absolute + path for a file named ``.build-id/xx/yyyy`` (for loaded files) or + ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the + lowercase hexadecimal representation of the build ID. + + Searches by debug link check every path for a file with the name given by + the debug link. Relative paths are relative to the directory containing the + loaded file. An empty path means the directory containing the loaded file. + + The default is ``:.debug:/usr/lib/debug``, which should work out of the box + on most Linux distributions. + + If ``None``, then searches by build ID and debug link are disabled (unless + the debug link is an absolute path). + """ + def load_debug_info( self, - paths: Optional[Iterable[Path]] = None, + paths: Optional[Iterable[Path]] = (), default: bool = False, main: bool = False, ) -> None: """ - Load debugging information for a list of executable or library files. + Load debugging information for the given set of files and/or modules. - Note that this is parallelized, so it is usually faster to load - multiple files at once rather than one by one. + This determines what executables, libraries, etc. are loaded in the + program (see :meth:`loaded_modules()`) and tries to load their + debugging information from the given *paths*. - :param paths: Paths of binary files. - :param default: Also load debugging information which can automatically - be determined from the program. + .. note:: + It is much more efficient to load multiple files at once rather + than one by one when possible. - For the Linux kernel, this tries to load ``vmlinux`` and any loaded - kernel modules from a few standard locations. + :param paths: Paths of binary files to try. - For userspace programs, this tries to load the executable and any - loaded libraries. + Files that don't correspond to any loaded modules are ignored. See + :class:`ExtraModule` for a way to provide arbitrary debugging + information. + :param default: Try to load all debugging information for all loaded + modules. - This implies ``main=True``. - :param main: Also load debugging information for the main executable. + The files in *paths* are tried first before falling back to the + enabled debugging information finders. - For the Linux kernel, this tries to load ``vmlinux``. + This implies ``main=True``. + :param main: Try to load all debugging information for the main module. - This is currently ignored for userspace programs. + The files in *paths* are tried first before falling back to the + enabled debugging information finders. :raises MissingDebugInfoError: if debugging information was not available for some files; other files with debugging information are still loaded @@ -727,10 +1038,20 @@ class Program: def load_default_debug_info(self) -> None: """ - Load debugging information which can automatically be determined from - the program. + Load all debugging information that can automatically be determined + from the program. - This is equivalent to ``load_debug_info(None, True)``. + This is equivalent to ``load_debug_info(default=True)``. + """ + ... + + def load_module_debug_info(self, *modules: Module) -> None: + """ + Load debugging information for the given modules using the enabled + debugging information finders. + + The files to search for are controlled by + :attr:`Module.loaded_file_status` and :attr:`Module.debug_file_status`. """ ... cache: Dict[Any, Any] @@ -1105,6 +1426,380 @@ class NoDefaultProgramError(Exception): ... +class Module: + """ + A ``Module`` represents an executable, library, or other binary file used + by a program. It has several subclasses representing specific types of + modules. + + Modules are uniquely identified by their type, name, and a type-specific + value. + + Modules have several attributes that are determined automatically whenever + possible but may be overridden manually if needed. + + Modules can be assigned files that provide debugging and runtime + information: + + * .. _module-loaded-file: + + The "loaded file" is the file containing the executable code, data, etc. + used by the program at runtime. + + + * .. _module-debug-file: + + The "debug file" is the file containing debugging information (e.g., + `DWARF `_). + + The loaded file and debug file may be the same file, for example, an + unstripped binary. They may be different files if the binary was stripped + and its debugging information was split into a separate file. + + + * .. _module-supplementary-debug-file: + + The debug file may depend on a "supplementary debug file" such as one + generated by `dwz(1) `_. If so, + then the supplementary debug file must be found before the debug file can + be used. + """ + + prog: Final[Program] + """Program that this module is from.""" + name: Final[str] + """ + Name of this module. + + Its exact meaning varies by module type. + """ + address_range: Optional[Tuple[int, int]] + """ + Address range where this module is loaded. + + This is a tuple of the start (inclusive) and end (exclusive) addresses. If + the module is not loaded in memory, then both are 0. If not known yet, then + this is ``None``. + + :meth:`Program.loaded_modules()` sets this automatically from the program + state/core dump when possible. Otherwise, for :class:`MainModule`, + :class:`SharedLibraryModule`, and :class:`VdsoModule`, it may be set + automatically when a file is assigned to the module. It is never set + automatically for :class:`ExtraModule`. It can also be set manually. + """ + build_id: Optional[bytes] + """ + Unique byte string (e.g., GNU build ID) identifying files used by this + module. + + If not known, then this is ``None``. + + :meth:`Program.loaded_modules()` sets this automatically from the program + state/core dump when possible. Otherwise, when a file is assigned to the + module, it is set to the file's build ID if it is not already set. It can + also be set manually. + """ + loaded_file_status: ModuleFileStatus + """Status of the module's :ref:`loaded file `.""" + loaded_file_path: Optional[str] + """ + Absolute path of the module's :ref:`loaded file `, or + ``None`` if not known. + """ + loaded_file_bias: Optional[int] + """ + Difference between the load address in the program and addresses in the + :ref:`loaded file ` itself. + + This is often non-zero due to address space layout randomization (ASLR). + + It is set automatically based on the module type: + + * For :class:`MainModule`, it is set based on metadata from the process or + core dump (the `auxiliary vector + `_ for userspace + programs, the ``VMCOREINFO`` note for the Linux kernel). + * For :class:`SharedLibraryModule` and :class:`VdsoModule`, it is set based + on :attr:`~SharedLibraryModule.dynamic_address`. + * For :class:`RelocatableModule`, it is set to zero. Addresses are adjusted + according to :attr:`~RelocatableModule.section_addresses` instead. + * For :class:`ExtraModule`, it is set based on + :attr:`~Module.address_range`. + """ + debug_file_status: ModuleFileStatus + """Status of the module's :ref:`debug file `.""" + debug_file_path: Optional[str] + """ + Absolute path of the module's :ref:`debug file `, or + ``None`` if not known. + """ + debug_file_bias: Optional[int] + """ + Difference between the load address in the program and addresses in the + :ref:`debug file `. + + See :attr:`loaded_file_bias`. + """ + supplementary_debug_file_kind: Optional[SupplementaryFileKind] + """ + Kind of the module's :ref:`supplementary debug file + `, or ``None`` if not known or not needed. + """ + supplementary_debug_file_path: Optional[str] + """ + Absolute path of the module's :ref:`supplementary debug file + `, or ``None`` if not known or not needed. + """ + + def wants_loaded_file(self) -> bool: + """ + Return whether this module wants a :ref:`loaded file + `. + + This should be preferred over checking :attr:`loaded_file_status` + directly since this is future-proof against new status types being + added. It is currently equivalent to ``module.loaded_file_status == + ModuleFileStatus.WANT``. + """ + ... + + def wants_debug_file(self) -> bool: + """ + Return whether this module wants a :ref:`debug file + `. + + This should be preferred over checking :attr:`debug_file_status` + directly since this is future-proof against new status types being + added. It is currently equivalent to ``module.debug_file_status == + ModuleFileStatus.WANT or module.debug_file_status == + ModuleFileStatus.WANT_SUPPLEMENTARY``. + """ + ... + + def wanted_supplementary_debug_file(self) -> WantedSupplementaryFile: + """ + Return information about the :ref:`supplementary debug file + ` that this module currently wants. + + :raises ValueError: if the module doesn't currently want a + supplementary debug file (i.e., ``module.debug_file_status != + ModuleFileStatus.WANT_SUPPLEMENTARY``) + """ + ... + + def try_file( + self, + path: Path, + *, + fd: int = -1, + force: bool = False, + ) -> None: + """ + Try to use the given file for this module. + + If the file does not appear to belong to this module, then it is + ignored. This currently checks that the file and the module have the + same build ID. + + If :attr:`loaded_file_status` is :attr:`~ModuleFileStatus.WANT` and the + file is loadable, then it is used as the :ref:`loaded file + ` and :attr:`loaded_file_status` is set to + :attr:`~ModuleFileStatus.HAVE`. + + If :attr:`debug_file_status` is :attr:`~ModuleFileStatus.WANT` or + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file provides + debugging information, then it is used as the :ref:`debug file + ` and :attr:`debug_file_status` is set to + :attr:`~ModuleFileStatus.HAVE`. However, if the file requires a + supplementary debug file, then it is not used as the debug file yet and + :attr:`debug_file_status` is set to + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` instead. + + If :attr:`debug_file_status` is + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file matches + :meth:`wanted_supplementary_debug_file()`, then the previously found + file is used as the debug file, the given file is used as the + :ref:`supplementary debug file `, and + :attr:`debug_file_status` is set to :attr:`~ModuleFileStatus.HAVE`. + + The file may be used as both the loaded file and debug file if + applicable. + + :param path: Path to file. + :param fd: If nonnegative, an open file descriptor referring to the + file. This always takes ownership of the file descriptor even if + the file is not used or on error, so the caller must not close it. + :param force: If ``True``, then don't check whether the file matches + the module. + """ + ... + +class MainModule(Module): + """ + Main module. + + There is only one main module in a program. For userspace programs, it is + the executable, and its name is usually the absolute path of the + executable. For the Linux kernel, it is the kernel image, a.k.a. + ``vmlinux``, and its name is "kernel". + """ + +class SharedLibraryModule(Module): + """ + Shared library (a.k.a. dynamic library, dynamic shared object, or ``.so``) + module. + + Shared libraries are uniquely identified by their name (usually the + absolute path of the shared object file) and dynamic address. + """ + + dynamic_address: Final[int] + """Address of the shared object's dynamic section.""" + +class VdsoModule(Module): + """ + Virtual dynamic shared object (vDSO) module. + + The vDSO is a special shared library automatically loaded into a process by + the kernel; see :manpage:`vdso(7)`. It is uniquely identified by its name + (the ``SONAME`` field of the shared object file) and dynamic address. + """ + + dynamic_address: Final[int] + """Address of the shared object's dynamic section.""" + +class RelocatableModule(Module): + """ + Relocatable object module. + + A relocatable object is an object file requiring a linking step to assign + section addresses and adjust the file to reference those addresses. + + Linux kernel loadable modules (``.ko`` files) are a special kind of + relocatable object. + + For userspace programs, relocatable objects are usually intermediate + products of the compilation process (``.o`` files). They are not typically + loaded at runtime. However, drgn allows manually defining a relocatable + module and assigning its section addresses if needed. + + Relocatable modules are uniquely identified by a name and address. + """ + + address: Final[int] + """ + Address identifying the module. + + For Linux kernel loadable modules, this is the module base address. + """ + + section_addresses: MutableMapping[str, int] + """ + Mapping from section names to assigned addresses. + + Once a file has been assigned to the module, this can no longer be + modified. + + :meth:`Program.linux_kernel_loadable_module()` and + :meth:`Program.loaded_modules()` prepopulate this for Linux kernel loadable + modules. + """ + +class ExtraModule(Module): + """ + Module with extra debugging information. + + For advanced use cases, it may be necessary to manually add debugging + information that does not fit into any of the categories above. + ``ExtraModule`` is intended for these use cases. For example, it can be + used to add debugging information from a standalone file that is not in use + by a particular program. + + Extra modules are uniquely identified by an arbitrary name and ID number. + """ + + id: Final[int] + """Arbitrary identification number.""" + +class ModuleFileStatus(enum.Enum): + """ + Status of a file in a :class:`Module`. + + This is usually used to communicate with debugging information finders; see + :meth:`Program.register_debug_info_finder()`. + """ + + WANT = ... + """File has not been found and should be searched for.""" + + HAVE = ... + """File has already been found and assigned.""" + + DONT_WANT = ... + """ + File has not been found, but it should not be searched for. + + :meth:`Module.try_file()` and debugging information finders are required to + honor this and will never change it. However, other operations may reset + this to :attr:`WANT` when they load debugging information automatically. + """ + + DONT_NEED = ... + """ + File has not been found and is not needed (e.g., because its debugging + information is not applicable or is provided through another mechanism). + + In contrast to :attr:`DONT_WANT`, drgn itself will never change this to + :attr:`WANT`. + """ + + WANT_SUPPLEMENTARY = ... + """ + File has been found, but it requires a supplementary file before it can be + used. See :meth:`Module.wanted_supplementary_debug_file()`. + """ + +class WantedSupplementaryFile(NamedTuple): + """Information about a wanted supplementary file.""" + + kind: SupplementaryFileKind + """Kind of supplementary file.""" + path: str + """Path of main file that wants the supplementary file.""" + supplementary_path: str + """ + Path to the supplementary file. + + This may be absolute or relative to :attr:`path`. + """ + checksum: bytes + """ + Unique identifier of the supplementary file. + + The interpretation depends on :attr:`kind`. + """ + +class SupplementaryFileKind(enum.Enum): + """ + Kind of supplementary file. + + .. note:: + DWARF 5 supplementary files are not currently supported but may be in + the future. + + DWARF package files are not considered supplementary files. They are + considered part of the debug file and must have the same path as the + debug file plus a ".dwp" extension. + """ + + GNU_DEBUGALTLINK = ... + """ + GNU-style supplementary debug file referred to by a ``.gnu_debugaltlink`` + section. + + Its :attr:`~WantedSupplementaryFile.checksum` is the file's GNU build ID. + """ + class Thread: """A thread in a program.""" diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 84c44eb84..c717b0b0d 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -6,15 +6,128 @@ Advanced Usage The :doc:`user_guide` covers basic usage of drgn, but drgn also supports more advanced use cases which are covered here. -Loading Debugging Symbols -------------------------- +.. _advanced-modules: -drgn will automatically load debugging information based on the debugged -program (e.g., from loaded kernel modules or loaded shared libraries). -:meth:`drgn.Program.load_debug_info()` can be used to load additional debugging -information:: +Modules and Debugging Symbols +----------------------------- - >>> prog.load_debug_info(['./libfoo.so', '/usr/lib/libbar.so']) +drgn tries to determine what executable, libraries, etc. a program uses and +load debugging symbols automatically. As long as :doc:`debugging symbols are +installed `, this should work out of the box on +standard setups. + +For non-standard scenarios, drgn allows overriding the defaults with different +levels of control and complexity. + +Loading Debugging Symbols From Non-Standard Locations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +drgn searches standard locations for debugging symbols. If you have debugging +symbols available in a non-standard location, you can provide it to the CLI +with the ``-s``/``--symbols`` option: + +.. code-block:: console + + $ drgn -s ./libfoo.so -s /usr/lib/libbar.so.debug + +Or with the :meth:`drgn.Program.load_debug_info()` method:: + + >>> prog.load_debug_info(["./libfoo.so", "/usr/lib/libbar.so.debug"]) + +Loading Debugging Symbols For Specific Modules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``-s`` and ``load_debug_info()`` try the given files against all of the modules +loaded in the program based on build IDs. You can also :ref:`look up +` a specific module and try a given file for just that +module with :meth:`drgn.Module.try_file()`:: + + >>> prog.main_module().try_file("build/vmlinux") + +Loading Additional Debugging Symbols +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``-s`` and ``load_debug_info()`` ignore files that don't correspond to a loaded +module. To load debugging symbols from an arbitrary file, pass +``--extra-symbols`` to the CLI: + +.. code-block:: console + + $ drgn --extra-symbols ./my_extra_symbols.debug + +Or create a :class:`drgn.ExtraModule`:: + + >>> module = prog.extra_module("my_extra_symbols") + >>> module.try_file("./my_extra_symbols.debug") + +Listing Modules +^^^^^^^^^^^^^^^ + +By default, drgn creates a module for everything loaded in the program. You can +disable this in the CLI with ``-no-default-symbols``. + +You can find or create the loaded modules programmatically with +:meth:`drgn.Program.loaded_modules()`:: + + >>> for module, new in prog.loaded_modules(): + ... print("Created" if new else "Found", module) + +You can see all of the created modules with :meth:`drgn.Program.modules()`. + +Overriding Modules +^^^^^^^^^^^^^^^^^^ + +You can create modules with the :ref:`module factory functions +`. You can also modify various attributes of the +:class:`drgn.Module` class. + +Debug Info Finders +^^^^^^^^^^^^^^^^^^ + +A callback for automatically finding debugging symbols for a set of modules can +be registered with :meth:`drgn.Program.register_debug_info_finder()`. Here is +an example for getting debugging symbols on Fedora Linux using DNF: + +.. code-block:: python3 + + import subprocess + + import drgn + + # Install debugging symbols using the DNF debuginfo-install plugin. Note that + # this is mainly for demonstration purposes; debuginfod, which drgn supports + # out of the box, is more reliable. + def dnf_debug_info_finder(modules: list[drgn.Module]) -> None: + packages = set() + for module in modules: + if not module.wants_debug_file(): + continue + + if not module.name.startswith("/"): + continue + + proc = subprocess.run( + ["rpm", "--query", "--file", module.name], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + if proc.returncode == 0: + packages.add(proc.stdout.rstrip("\n")) + + # Try installing their debug info. + subprocess.call( + ["sudo", "dnf", "debuginfo-install", "--skip-broken", "--"] + + sorted(packages) + ) + + # Leave the rest to the standard debug info finder. + + + prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=0) + +Currently, debug info finders must be configured explicitly by the user. In the +future, there will be a plugin system for doing so automatically. Library ------- @@ -92,9 +205,9 @@ Environment Variables Some of drgn's behavior can be modified through environment variables: ``DRGN_MAX_DEBUG_INFO_ERRORS`` - The maximum number of individual errors to report in a - :exc:`drgn.MissingDebugInfoError`. Any additional errors are truncated. The - default is 5; -1 is unlimited. + The maximum number of warnings about missing debugging information to log + on CLI startup or from :meth:`drgn.Program.load_debug_info()`. Any + additional errors are truncated. The default is 5; -1 is unlimited. ``DRGN_PREFER_ORC_UNWINDER`` Whether to prefer using `ORC @@ -104,12 +217,6 @@ Some of drgn's behavior can be modified through environment variables: vice versa. This environment variable is mainly intended for testing and may be ignored in the future. -``DRGN_USE_LIBDWFL_REPORT`` - Whether drgn should use libdwfl to find debugging information for core - dumps instead of its own implementation (0 or 1). The default is 0. This - environment variable is mainly intended as an escape hatch in case of bugs - in drgn's implementation and will be ignored in the future. - ``DRGN_USE_LIBKDUMPFILE_FOR_ELF`` Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index c67f82a4a..eecd0138a 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,7 +7,7 @@ Programs -------- .. drgndoc:: Program - :exclude: (void|int|bool|float|struct|union|class|enum|typedef|pointer|array|function)_type + :exclude: (void|int|bool|float|struct|union|class|enum|typedef|pointer|array|function)_type|(main|shared_library|vdso|relocatable|linux_kernel_loadable|extra)_module .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags @@ -159,6 +159,44 @@ can be used just like types obtained from :meth:`Program.type()`. .. drgndoc:: Program.array_type .. drgndoc:: Program.function_type +Modules +------- + +.. drgndoc:: Module +.. drgndoc:: MainModule +.. drgndoc:: SharedLibraryModule +.. drgndoc:: VdsoModule +.. drgndoc:: RelocatableModule +.. drgndoc:: ExtraModule +.. drgndoc:: ModuleFileStatus +.. drgndoc:: WantedSupplementaryFile +.. drgndoc:: SupplementaryFileKind + +.. _api-module-constructors: + +Module Lookups/Constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For each module type, there is a corresponding method to create a module of +that type or find one that was previously created:: + + >>> prog.extra_module("foo", 1234) + Traceback (most recent call last): + ... + LookupError: module not found + >>> prog.extra_module("foo", 1234, create=True) + (prog.extra_module(name='foo', id=0x4d2), True) + >>> prog.extra_module("foo", 1234) + >>> prog.extra_module("foo", 1234, create=True) + (prog.extra_module(name='foo', id=0x4d2), False) + +.. drgndoc:: Program.main_module +.. drgndoc:: Program.shared_library_module +.. drgndoc:: Program.vdso_module +.. drgndoc:: Program.relocatable_module +.. drgndoc:: Program.linux_kernel_loadable_module +.. drgndoc:: Program.extra_module + Miscellaneous ------------- diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 0ebf60b4d..1040ea8c7 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -319,6 +319,47 @@ functions like :meth:`drgn.Program.int_type()`:: You won't usually need to work with types directly, but see :ref:`api-reference-types` if you do. +Modules +^^^^^^^ + +drgn tracks executables, shared libraries, loadable kernel modules, and other +binary files used by a program with the :class:`drgn.Module` class. Modules +store their name, identifying information, load address, and debugging symbols. + +.. code-block:: pycon + :caption: Linux kernel example + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='kernel') + prog.relocatable_module(name='rng_core', address=0xffffffffc0400000) + prog.relocatable_module(name='virtio_rng', address=0xffffffffc0402000) + prog.relocatable_module(name='binfmt_misc', address=0xffffffffc0401000) + >>> prog.main_module().debug_file_path + '/usr/lib/modules/6.13.0-rc1-vmtest34.1default/build/vmlinux' + +.. code-block:: pycon + :caption: Userspace example + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='/usr/bin/grep') + prog.shared_library_module(name='/lib64/ld-linux-x86-64.so.2', dynamic_address=0x7f51772b6e68) + prog.shared_library_module(name='/lib64/libc.so.6', dynamic_address=0x7f51771af960) + prog.shared_library_module(name='/lib64/libpcre2-8.so.0', dynamic_address=0x7f5177258c68) + prog.vdso_module(name='linux-vdso.so.1', dynamic_address=0x7f51772803e0) + >>> prog.main_module().loaded_file_path + '/usr/bin/grep' + >>> prog.main_module().debug_file_path + '/usr/lib/debug/usr/bin/grep-3.11-7.fc40.x86_64.debug' + +drgn normally initializes the appropriate modules and loads their debugging +symbols automatically. Advanced use cases can create or modify modules and load +debugging symbols manually; see the :ref:`advanced usage guide +`. + Platforms ^^^^^^^^^ diff --git a/drgn/__init__.py b/drgn/__init__.py index 5a03f5a30..981bef3e7 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -49,11 +49,15 @@ from _drgn import ( NULL, Architecture, + ExtraModule, FaultError, FindObjectFlags, IntegerLike, Language, + MainModule, MissingDebugInfoError, + Module, + ModuleFileStatus, NoDefaultProgramError, Object, ObjectAbsentError, @@ -66,8 +70,11 @@ ProgramFlags, Qualifiers, Register, + RelocatableModule, + SharedLibraryModule, StackFrame, StackTrace, + SupplementaryFileKind, Symbol, SymbolBinding, SymbolIndex, @@ -80,6 +87,8 @@ TypeMember, TypeParameter, TypeTemplateParameter, + VdsoModule, + WantedSupplementaryFile, alignof, cast, container_of, @@ -106,11 +115,15 @@ __all__ = ( "Architecture", + "ExtraModule", "FaultError", "FindObjectFlags", "IntegerLike", "Language", + "MainModule", "MissingDebugInfoError", + "Module", + "ModuleFileStatus", "NULL", "NoDefaultProgramError", "Object", @@ -124,8 +137,11 @@ "ProgramFlags", "Qualifiers", "Register", + "RelocatableModule", + "SharedLibraryModule", "StackFrame", "StackTrace", + "SupplementaryFileKind", "Symbol", "SymbolBinding", "SymbolIndex", @@ -138,6 +154,8 @@ "TypeMember", "TypeParameter", "TypeTemplateParameter", + "VdsoModule", + "WantedSupplementaryFile", "alignof", "cast", "container_of", diff --git a/drgn/cli.py b/drgn/cli.py index 8d3497588..36d6d9f22 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -89,19 +89,6 @@ def version_header() -> str: return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" -class _QuietAction(argparse.Action): - def __init__( - self, option_strings: Any, dest: Any, nargs: Any = 0, **kwds: Any - ) -> None: - super().__init__(option_strings, dest, nargs=nargs, **kwds) - - def __call__( - self, parser: Any, namespace: Any, values: Any, option_string: Any = None - ) -> None: - setattr(namespace, self.dest, True) - namespace.log_level = "none" - - def _identify_script(path: str) -> str: EI_NIDENT = 16 SIZEOF_E_TYPE = 2 @@ -161,9 +148,8 @@ def _displayhook(value: Any) -> None: def _main() -> None: handler = logging.StreamHandler() - handler.setFormatter( - _LogFormatter(hasattr(sys.stderr, "fileno") and os.isatty(sys.stderr.fileno())) - ) + color = hasattr(sys.stderr, "fileno") and os.isatty(sys.stderr.fileno()) + handler.setFormatter(_LogFormatter(color)) logging.getLogger().addHandler(handler) version = version_header() @@ -193,7 +179,9 @@ def _main() -> None: metavar="PATH", type=str, action="append", - help="load additional debugging symbols from the given file; this option may be given more than once", + help="load debugging symbols from the given file. " + "If the file does not correspond to a loaded executable, library, or module, " + "then it is ignored. This option may be given more than once", ) default_symbols_group = symbol_group.add_mutually_exclusive_group() default_symbols_group.add_argument( @@ -201,15 +189,25 @@ def _main() -> None: dest="default_symbols", action="store_const", const={"main": True}, - help="only load debugging symbols for the main executable and those added with -s; " - "for userspace programs, this is currently equivalent to --no-default-symbols", + help="only load debugging symbols for the main executable " + "and those added with -s or --extra-symbols", ) default_symbols_group.add_argument( "--no-default-symbols", dest="default_symbols", action="store_const", const={}, - help="don't load any debugging symbols that were not explicitly added with -s", + help="don't load any debugging symbols that were not explicitly added " + "with -s or --extra-symbols", + ) + symbol_group.add_argument( + "--extra-symbols", + metavar="PATH", + type=str, + action="append", + help="load additional debugging symbols from the given file, " + "which is assumed not to correspond to a loaded executable, library, or module. " + "This option may be given more than once", ) advanced_group = parser.add_argument_group("advanced") @@ -235,7 +233,9 @@ def _main() -> None: parser.add_argument( "-q", "--quiet", - action=_QuietAction, + dest="log_level", + action="store_const", + const="none", help="don't print any logs or download progress", ) parser.add_argument( @@ -268,8 +268,6 @@ def _main() -> None: else: print(version, file=sys.stderr, flush=True) - if not args.quiet: - os.environ["DEBUGINFOD_PROGRESS"] = "1" if args.log_level == "none": logger.setLevel(logging.CRITICAL + 1) else: @@ -316,7 +314,14 @@ def _main() -> None: try: prog.load_debug_info(args.symbols, **args.default_symbols) except drgn.MissingDebugInfoError as e: - logger.warning("%s", e) + logger.warning("\033[1m%s\033[m" if color else "%s", e) + + if args.extra_symbols: + for extra_symbol_path in args.extra_symbols: + extra_symbol_path = os.path.abspath(extra_symbol_path) + module, new = prog.extra_module(extra_symbol_path, create=True) + if new: + module.try_file(extra_symbol_path) if args.script: sys.argv = args.script diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index e2899fd09..6414d95ab 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -66,6 +66,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ elf_file.h \ elf_notes.c \ elf_notes.h \ + elf_symtab.c \ + elf_symtab.h \ elf_sections.h \ error.c \ error.h \ @@ -183,6 +185,8 @@ _drgn_la_SOURCES = python/constants.c \ python/helpers.c \ python/language.c \ python/main.c \ + python/module.c \ + python/module_section_addresses.c \ python/object.c \ python/platform.c \ python/program.c \ diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 8e2180b1d..c6999292b 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -16,6 +16,7 @@ class ConstantClass(NamedTuple): CONSTANTS = ( ConstantClass("Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)"), ConstantClass("FindObjectFlags", "Flag", r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)"), + ConstantClass("ModuleFileStatus", "Enum", r"DRGN_MODULE_FILE_([a-zA-Z0-9_]+)"), ConstantClass( "PlatformFlags", "Flag", @@ -28,6 +29,11 @@ class ConstantClass(NamedTuple): ConstantClass( "Qualifiers", "Flag", r"DRGN_QUALIFIER_([a-zA-Z0-9_]+)", [("NONE", "0")] ), + ConstantClass( + "SupplementaryFileKind", + "Enum", + r"DRGN_SUPPLEMENTARY_FILE_([a-z-A-Z0-9_]+)(? None: out_file.write(f"\t{section_enumerator_name(section_name)},\n") out_file.write( """\ - /** Indices less than this are cached when the module is loaded. */ - DRGN_SECTION_INDEX_NUM_PRECACHE, + /** Indices less than this are used by the DWARF index. */ + DRGN_SECTION_INDEX_NUM_DWARF_INDEX, """ ) for i, section_name in enumerate(CACHED_SECTIONS): if i == 0: out_file.write( - f"\t{section_enumerator_name(section_name)} = DRGN_SECTION_INDEX_NUM_PRECACHE,\n" + f"\t{section_enumerator_name(section_name)} = DRGN_SECTION_INDEX_NUM_DWARF_INDEX,\n" ) else: out_file.write(f"\t{section_enumerator_name(section_name)},\n") diff --git a/libdrgn/cleanup.h b/libdrgn/cleanup.h index 9ca90b4ab..9b71fb3d1 100644 --- a/libdrgn/cleanup.h +++ b/libdrgn/cleanup.h @@ -10,8 +10,10 @@ #ifndef DRGN_CLEANUP_H #define DRGN_CLEANUP_H +#include #include #include +#include #include #define _cleanup_(x) __attribute__((__cleanup__(x))) @@ -39,6 +41,14 @@ static inline void closep(int *fd) close(*fd); } +/** Call @c closedir() when the variable goes out of scope. */ +#define _cleanup_closedir_ _cleanup_(closedirp) +static inline void closedirp(DIR **dirp) +{ + if (*dirp) + closedir(*dirp); +} + /** * Get the value of a pointer variable and reset it to @c NULL. * diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 7fee6286a..70b49cd4e 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -10,24 +11,70 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include +#include +#include "array.h" #include "binary_buffer.h" +#include "binary_search.h" #include "cleanup.h" +#include "crc32.h" #include "debug_info.h" #include "elf_file.h" #include "elf_notes.h" #include "error.h" +#include "hexlify.h" +#include "io.h" #include "linux_kernel.h" +#include "log.h" #include "openmp.h" #include "platform.h" +#include "pp.h" #include "program.h" +#include "serialize.h" #include "util.h" +#define _cleanup_elf_end_ _cleanup_(elf_endp) +static inline void elf_endp(Elf **elfp) +{ + elf_end(*elfp); +} + +#if !_ELFUTILS_PREREQ(0, 175) +// If we don't have dwelf_elf_begin(), this is equivalent except that it doesn't +// handle compressed files. +static inline Elf *dwelf_elf_begin(int fd) +{ + return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); +} +#endif + +DEFINE_HASH_MAP_FUNCTIONS(drgn_module_section_address_map, + c_string_key_hash_pair, c_string_key_eq); + +// This is currently always DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK. +struct drgn_module_wanted_supplementary_file { + struct drgn_elf_file *file; + // supplementary_path and checksum are owned by file. + const char *supplementary_path; + const void *checksum; + size_t checksum_len; + // checksum_str is a separate allocation. + char *checksum_str; + // Used to detect when the wanted supplementary file has changed in + // order to avoid redundant attempts. + uint64_t generation; +}; + #if WITH_DEBUGINFOD #if _ELFUTILS_PREREQ(0, 179) #define DRGN_DEBUGINFOD_0_179_FUNCTIONS \ @@ -79,8 +126,7 @@ static inline bool drgn_have_debuginfod(void) } #else // GCC and Clang optimize out the function pointer. -#define X(name) __attribute__((__unused__)) \ - static const typeof(&name) drgn_##name = name; +#define X(name) static const typeof(&name) drgn_##name = name; DRGN_DEBUGINFOD_FUNCTIONS #undef X @@ -96,2107 +142,5218 @@ static inline bool drgn_have_debuginfod(void) static inline Dwarf *drgn_elf_file_dwarf_key(struct drgn_elf_file * const *entry) { - return (*entry)->dwarf; + return (*entry)->_dwarf; } DEFINE_HASH_TABLE_FUNCTIONS(drgn_elf_file_dwarf_table, drgn_elf_file_dwarf_key, ptr_key_hash_pair, scalar_key_eq); -DEFINE_VECTOR_FUNCTIONS(drgn_module_vector); - -struct drgn_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; -}; +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); -static inline struct drgn_module_key -drgn_module_key(struct drgn_module * const *entry) +static inline +struct drgn_module_key drgn_module_entry_key(struct drgn_module * const *entry) { - return (struct drgn_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; + struct drgn_module_key key; + key.kind = (*entry)->kind; + SWITCH_ENUM(key.kind) { + case DRGN_MODULE_SHARED_LIBRARY: + key.shared_library.name = (*entry)->name; + key.shared_library.dynamic_address = + (*entry)->shared_library.dynamic_address; + break; + case DRGN_MODULE_VDSO: + key.vdso.name = (*entry)->name; + key.vdso.dynamic_address = (*entry)->vdso.dynamic_address; + break; + case DRGN_MODULE_RELOCATABLE: + key.relocatable.name = (*entry)->name; + key.relocatable.address = (*entry)->relocatable.address; + break; + case DRGN_MODULE_EXTRA: + key.extra.name = (*entry)->name; + key.extra.id = (*entry)->extra.id; + break; + case DRGN_MODULE_MAIN: + default: + UNREACHABLE(); + } + return key; } static inline struct hash_pair drgn_module_key_hash_pair(const struct drgn_module_key *key) { - size_t hash = hash_bytes(key->build_id, key->build_id_len); - hash = hash_combine(hash, key->start); - hash = hash_combine(hash, key->end); + size_t hash = key->kind; + SWITCH_ENUM(key->kind) { + case DRGN_MODULE_SHARED_LIBRARY: + hash = hash_combine(hash, + hash_c_string(key->shared_library.name)); + hash = hash_combine(hash, key->shared_library.dynamic_address); + break; + case DRGN_MODULE_VDSO: + hash = hash_combine(hash, hash_c_string(key->vdso.name)); + hash = hash_combine(hash, key->vdso.dynamic_address); + break; + case DRGN_MODULE_RELOCATABLE: + hash = hash_combine(hash, hash_c_string(key->relocatable.name)); + hash = hash_combine(hash, key->relocatable.address); + break; + case DRGN_MODULE_EXTRA: + hash = hash_combine(hash, hash_c_string(key->extra.name)); + hash = hash_combine(hash, key->extra.id); + break; + case DRGN_MODULE_MAIN: + default: + UNREACHABLE(); + } return hash_pair_from_avalanching_hash(hash); } + static inline bool drgn_module_key_eq(const struct drgn_module_key *a, const struct drgn_module_key *b) { - return (a->build_id_len == b->build_id_len && - memcmp(a->build_id, b->build_id, a->build_id_len) == 0 && - a->start == b->start && a->end == b->end); -} -DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_key, - drgn_module_key_hash_pair, drgn_module_key_eq); - -DEFINE_HASH_SET_FUNCTIONS(c_string_set, c_string_key_hash_pair, - c_string_key_eq); - -/** - * @c Dwfl_Callbacks::find_elf() implementation. - * - * If the ELF file was reported directly, this returns it. Otherwise, it falls - * back to an appropriate callback. - * - * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c - * Elf handle, which we need for a couple of reasons: - * - * - We usually already have the @c Elf handle open in order to identify the - * file. - * - For kernel modules, we set the section addresses in the @c Elf handle - * ourselves instead of using @c Dwfl_Callbacks::section_address(). - * - * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC - * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN - * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but - * @ref dwfl_report_elf() bypasses this hack. - * - * So, we're stuck using @c dwfl_report_module() and this dummy callback. - */ -static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, - const char *name, Dwarf_Addr base, - char **file_name, Elf **elfp) -{ - struct drgn_module *module = *userdatap; - if (module->elf) { - *file_name = module->path; - int fd = module->fd; - *elfp = module->elf; - // libdwfl consumes the returned path, file descriptor, and ELF - // handle, so clear the fields. - module->path = NULL; - module->fd = -1; - module->elf = NULL; - return fd; - } - if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - *elfp = NULL; - return -1; - } else if (module->prog->flags & DRGN_PROGRAM_IS_LIVE) { - return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, - base, file_name, elfp); - } else { - return dwfl_build_id_find_elf(dwfl_module, userdatap, name, - base, file_name, elfp); + if (a->kind != b->kind) + return false; + SWITCH_ENUM(a->kind) { + case DRGN_MODULE_SHARED_LIBRARY: + return (strcmp(a->shared_library.name, + b->shared_library.name) == 0 + && a->shared_library.dynamic_address + == b->shared_library.dynamic_address); + break; + case DRGN_MODULE_VDSO: + return (strcmp(a->vdso.name, b->vdso.name) == 0 + && a->vdso.dynamic_address == b->vdso.dynamic_address); + break; + case DRGN_MODULE_RELOCATABLE: + return (strcmp(a->relocatable.name, b->relocatable.name) == 0 + && a->relocatable.address == b->relocatable.address); + break; + case DRGN_MODULE_EXTRA: + return (strcmp(a->extra.name, b->extra.name) == 0 + && a->extra.id == b->extra.id); + break; + case DRGN_MODULE_MAIN: + default: + UNREACHABLE(); } } -/** - * @c Dwfl_Callbacks::section_address() implementation. - * - * We set the section header @c sh_addr in memory instead of using this, but - * libdwfl requires the callback pointer to be non-@c NULL. It will be called - * for any sections that still have a zero @c sh_addr, meaning they are not - * present in memory. - */ -static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - const char *secname, Elf32_Word shndx, - const GElf_Shdr *shdr, Dwarf_Addr *addr) +DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_entry_key, + drgn_module_key_hash_pair, drgn_module_key_eq); + +static inline uint64_t drgn_module_address_key(const struct drgn_module *entry) { - *addr = -1; - return DWARF_CB_OK; + return entry->start; } -static const Dwfl_Callbacks drgn_dwfl_callbacks = { - .find_elf = drgn_dwfl_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; +DEFINE_BINARY_SEARCH_TREE_FUNCTIONS(drgn_module_address_tree, node, + drgn_module_address_key, + binary_search_tree_scalar_cmp, splay); -static void drgn_module_destroy(struct drgn_module *module) +static void drgn_module_free_section_addresses(struct drgn_module *module) { - if (module) { - drgn_error_destroy(module->err); - drgn_module_orc_info_deinit(module); - drgn_module_dwarf_info_deinit(module); - elf_end(module->elf); - if (module->fd != -1) - close(module->fd); - free(module->path); - for (struct drgn_elf_file_dwarf_table_iterator it = - drgn_elf_file_dwarf_table_first(&module->split_dwarf_files); - it.entry; - it = drgn_elf_file_dwarf_table_next(it)) - drgn_elf_file_destroy(*it.entry); - drgn_elf_file_dwarf_table_deinit(&module->split_dwarf_files); - if (module->debug_file != module->loaded_file) - drgn_elf_file_destroy(module->debug_file); - drgn_elf_file_destroy(module->loaded_file); - free(module->name); - free(module); - } -} - -static void drgn_module_finish_indexing(struct drgn_debug_info *dbinfo, - struct drgn_module *module) -{ - module->state = DRGN_DEBUG_INFO_MODULE_INDEXED; - if (module->name) { - int ret = c_string_set_insert(&dbinfo->module_names, - (const char **)&module->name, - NULL); - /* drgn_debug_info_update_index() should've reserved enough. */ - assert(ret != -1); - } -} - -/* - * Wrapper around dwfl_report_end() that works around a libdwfl bug which causes - * it to close stdin when it frees some modules that were reported by - * dwfl_core_file_report(). This was fixed in elfutils 0.177 by commit - * d37f6ea7e3e5 ("libdwfl: Fix fd leak/closing wrong fd after - * dwfl_core_file_report()"), but we support older versions. - */ -static int my_dwfl_report_end(struct drgn_debug_info *dbinfo, - int (*removed)(Dwfl_Module *, void *, - const char *, Dwarf_Addr, void *), - void *arg) -{ - int fd = -1; - if ((dbinfo->prog->flags - & (DRGN_PROGRAM_IS_LINUX_KERNEL | DRGN_PROGRAM_IS_LIVE)) == 0) - fd = dup(0); - int ret = dwfl_report_end(dbinfo->dwfl, removed, arg); - if (fd != -1) { - dup2(fd, 0); - close(fd); - } - return ret; -} - -struct drgn_dwfl_module_removed_arg { - struct drgn_debug_info *dbinfo; - bool finish_indexing; - bool free_all; -}; - -static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, - const char *name, Dwarf_Addr base, - void *_arg) -{ - struct drgn_dwfl_module_removed_arg *arg = _arg; - /* - * userdatap is actually a void ** like for the other libdwfl callbacks, - * but dwfl_report_end() has the wrong signature for the removed - * callback. - */ - struct drgn_module *module = *(void **)userdatap; - if (arg->finish_indexing && module && - module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) - drgn_module_finish_indexing(arg->dbinfo, module); - if (arg->free_all || !module || - module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { - drgn_module_destroy(module); - } else { - /* - * The module was already indexed. Report it again so libdwfl - * doesn't remove it. - */ - Dwarf_Addr end; - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, - NULL, NULL); - dwfl_report_module(arg->dbinfo->dwfl, name, base, end); - } - return DWARF_CB_OK; + for (auto it = + drgn_module_section_address_map_first(&module->section_addresses); + it.entry; + it = drgn_module_section_address_map_next(it)) + free(it.entry->key); } -static void drgn_debug_info_free_modules(struct drgn_debug_info *dbinfo, - bool finish_indexing, bool free_all) +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find(struct drgn_program *prog, + const struct drgn_module_key *key) { - for (struct drgn_module_table_iterator it = - drgn_module_table_first(&dbinfo->modules); it.entry; ) { - struct drgn_module *module = *it.entry; - struct drgn_module **nextp = it.entry; - do { - struct drgn_module *next = module->next; - if (finish_indexing && - module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) - drgn_module_finish_indexing(dbinfo, module); - if (free_all || - module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { - if (module == *nextp) { - if (nextp == it.entry && !next) { - it = drgn_module_table_delete_iterator(&dbinfo->modules, - it); - } else { - if (!next) - it = drgn_module_table_next(it); - *nextp = next; - } - } - void **userdatap; - dwfl_module_info(module->dwfl_module, - &userdatap, NULL, NULL, NULL, - NULL, NULL, NULL); - *userdatap = NULL; - drgn_module_destroy(module); - } else { - if (!next) - it = drgn_module_table_next(it); - nextp = &module->next; - } - module = next; - } while (module); + if (key->kind == DRGN_MODULE_MAIN) { + return prog->dbinfo.main_module; + } else { + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, key); + return it.entry ? *it.entry : NULL; } - - dwfl_report_begin(dbinfo->dwfl); - struct drgn_dwfl_module_removed_arg arg = { - .dbinfo = dbinfo, - .finish_indexing = finish_indexing, - .free_all = free_all, - }; - my_dwfl_report_end(dbinfo, drgn_dwfl_module_removed, &arg); } -struct drgn_error * -drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, - const char *name, const char *message, - struct drgn_error *err) -{ - if (err && err->code == DRGN_ERROR_NO_MEMORY) { - /* Always fail hard if we're out of memory. */ - goto err; - } - if (load->num_errors == 0 && - !string_builder_append(&load->errors, - "missing some debugging symbols (see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html):")) - goto err; - if (load->num_errors < load->max_errors) { - if (!string_builder_line_break(&load->errors)) - goto err; - if (!string_builder_append(&load->errors, " ")) - goto err; - if (name && !string_builder_append(&load->errors, name)) - goto err; - if (name && (message || err) && - !string_builder_append(&load->errors, " (")) - goto err; - if (message && !string_builder_append(&load->errors, message)) - goto err; - if (message && err && - !string_builder_append(&load->errors, ": ")) - goto err; - if (err && !string_builder_append_error(&load->errors, err)) - goto err; - if (name && (message || err) && - !string_builder_appendc(&load->errors, ')')) - goto err; - } - load->num_errors++; - drgn_error_destroy(err); - return NULL; - -err: - drgn_error_destroy(err); - return &drgn_enomem; +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, + uint64_t address) +{ + struct drgn_module_address_tree_iterator it = + drgn_module_address_tree_search_le(&prog->dbinfo.modules_by_address, + &address); + if (!it.entry || address >= it.entry->end) + return NULL; + return it.entry; } -static struct drgn_error * -drgn_debug_info_report_module(struct drgn_debug_info_load_state *load, - const void *build_id, size_t build_id_len, - uint64_t start, uint64_t end, const char *name, - Dwfl_Module *dwfl_module, const char *path, - int fd, Elf *elf, bool *new_ret) +struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, + const struct drgn_module_key *key, + const char *name, + struct drgn_module **ret, + bool *new_ret) { - struct drgn_debug_info *dbinfo = load->dbinfo; struct drgn_error *err; - char *path_key = NULL; - - if (new_ret) - *new_ret = false; struct hash_pair hp; - // Silence -Wmaybe-uninitialized false positive last seen with GCC 12 on - // i386 and Arm. - struct drgn_module_table_iterator it = {}; - if (build_id_len) { - struct drgn_module_key key = { - .build_id = build_id, - .build_id_len = build_id_len, - .start = start, - .end = end, - }; - hp = drgn_module_table_hash(&key); - it = drgn_module_table_search_hashed(&dbinfo->modules, &key, - hp); - if (it.entry && - (*it.entry)->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { - /* We've already indexed this module. */ - err = NULL; - goto free; - } - } - - if (!dwfl_module) { - path_key = realpath(path, NULL); - if (!path_key) { - path_key = strdup(path); - if (!path_key) { - err = &drgn_enomem; - goto free; + if (key->kind == DRGN_MODULE_MAIN) { + if (prog->dbinfo.main_module) { + if (strcmp(prog->dbinfo.main_module->name, name) != 0) { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "main module already exists with different name"); } + *ret = prog->dbinfo.main_module; + if (new_ret) + *new_ret = false; + return NULL; } - - dwfl_module = dwfl_report_module(dbinfo->dwfl, path_key, start, - end); - if (!dwfl_module) { - err = drgn_error_libdwfl(); - goto free; + } else { + hp = drgn_module_table_hash(key); + struct drgn_module_table_iterator it = + drgn_module_table_search_hashed(&prog->dbinfo.modules, + key, hp); + if (it.entry) { + *ret = *it.entry; + if (new_ret) + *new_ret = false; + return NULL; } } - void **userdatap; - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, - NULL); - if (*userdatap) { - /* We've already reported this file at this offset. */ - err = NULL; - goto free; + struct drgn_module *module = calloc(1, sizeof(*module)); + if (!module) + return &drgn_enomem; + module->start = module->end = UINT64_MAX; + + module->prog = prog; + module->kind = key->kind; + // Linux userspace core dumps usually filter out file-backed mappings + // (see coredump_filter in core(5)), so we need the loaded file to read + // the text. Additionally, .eh_frame is in the loaded file and not the + // debug file. + // + // Linux kernel core dumps preserve the main kernel and kernel module + // text, and the kernel doesn't use .eh_frame, so we don't need the + // loaded file for the kernel. + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + module->debug_file_status = DRGN_MODULE_FILE_WANT; + SWITCH_ENUM(key->kind) { + case DRGN_MODULE_MAIN: + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; + break; + case DRGN_MODULE_SHARED_LIBRARY: + module->shared_library.dynamic_address = + key->shared_library.dynamic_address; + break; + case DRGN_MODULE_VDSO: + module->vdso.dynamic_address = key->vdso.dynamic_address; + break; + case DRGN_MODULE_RELOCATABLE: + module->relocatable.address = key->relocatable.address; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; + break; + case DRGN_MODULE_EXTRA: + module->extra.id = key->extra.id; + break; + default: + UNREACHABLE(); } - if (new_ret) - *new_ret = true; - struct drgn_module *module = calloc(1, sizeof(*module)); - if (!module) { + module->name = strdup(name); + if (!module->name) { err = &drgn_enomem; - goto free; + goto err_module; } - module->prog = load->dbinfo->prog; - module->state = DRGN_DEBUG_INFO_MODULE_NEW; - module->build_id = build_id; - module->build_id_len = build_id_len; - module->start = start; - module->end = end; - if (name) { - module->name = strdup(name); - if (!module->name) { + + if (key->kind == DRGN_MODULE_MAIN) { + prog->dbinfo.main_module = module; + } else { + if (drgn_module_table_insert_searched(&prog->dbinfo.modules, + &module, hp, NULL) < 0) { err = &drgn_enomem; - free(module); - goto free; + goto err_name; } + prog->dbinfo.modules_generation++; } - module->dwfl_module = dwfl_module; - module->path = path_key; - module->fd = fd; - module->elf = elf; - drgn_elf_file_dwarf_table_init(&module->split_dwarf_files); - - /* path_key, fd and elf are owned by the module now. */ - if (!drgn_module_vector_append(&load->new_modules, &module)) { - drgn_module_destroy(module); - return &drgn_enomem; - } - if (build_id_len) { - if (it.entry) { - /* - * The first module with this build ID is in - * new_modules, so insert it after in the list, not - * before. - */ - module->next = (*it.entry)->next; - (*it.entry)->next = module; - } else if (drgn_module_table_insert_searched(&dbinfo->modules, - &module, hp, - NULL) < 0) { - drgn_module_vector_pop(&load->new_modules); - drgn_module_destroy(module); - return &drgn_enomem; - } + drgn_elf_file_dwarf_table_init(&module->split_dwarf_files); + drgn_module_section_address_map_init(&module->section_addresses); + + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + drgn_log_debug(prog, "created main module %s", module->name); + break; + case DRGN_MODULE_SHARED_LIBRARY: + drgn_log_debug(prog, + "created shared library module %s@0x%" PRIx64, + module->name, + module->shared_library.dynamic_address); + break; + case DRGN_MODULE_VDSO: + drgn_log_debug(prog, + "created vDSO module %s@0x%" PRIx64, + module->name, module->vdso.dynamic_address); + break; + case DRGN_MODULE_RELOCATABLE: + drgn_log_debug(prog, + "created relocatable module %s@0x%" PRIx64, + module->name, module->relocatable.address); + break; + case DRGN_MODULE_EXTRA: + drgn_log_debug(prog, + "created extra module %s 0x%" PRIx64, + module->name, module->extra.id); + break; + default: + UNREACHABLE(); } - *userdatap = module; + + *ret = module; + if (new_ret) + *new_ret = true; return NULL; -free: - elf_end(elf); - if (fd != -1) - close(fd); - free(path_key); +err_name: + free(module->name); +err_module: + free(module); return err; } -struct drgn_error * -drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, - const char *path, int fd, Elf *elf, uint64_t start, - uint64_t end, const char *name, bool *new_ret) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, + const char *name, + struct drgn_module **ret, + bool *new_ret) { + struct drgn_module_key key = { .kind = DRGN_MODULE_MAIN }; + return drgn_module_find_or_create(prog, &key, name, ret, new_ret); +} - struct drgn_error *err; - const void *build_id; - ssize_t build_id_len = drgn_elf_gnu_build_id(elf, &build_id); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, path, NULL, - drgn_error_libelf()); - elf_end(elf); - close(fd); - return err; - } else if (build_id_len == 0) { - build_id = NULL; - } - return drgn_debug_info_report_module(load, build_id, build_id_len, - start, end, name, NULL, path, fd, - elf, new_ret); +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_SHARED_LIBRARY, + .shared_library.name = name, + .shared_library.dynamic_address = dynamic_address, + }; + return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } -static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, - void **userdatap, - const char *name, Dwarf_Addr base, - void *arg) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret) { - struct drgn_debug_info_load_state *load = arg; - struct drgn_error *err; + const struct drgn_module_key key = { + .kind = DRGN_MODULE_VDSO, + .vdso.name = name, + .vdso.dynamic_address = dynamic_address, + }; + return drgn_module_find_or_create(prog, &key, name, ret, new_ret); +} - if (*userdatap) { - /* - * This was either reported from drgn_debug_info_report_elf() or - * already indexed. - */ - return DWARF_CB_OK; - } - - const unsigned char *build_id; - GElf_Addr build_id_vaddr; - int build_id_len = dwfl_module_build_id(dwfl_module, &build_id, - &build_id_vaddr); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, name, NULL, - drgn_error_libdwfl()); - if (err) - goto err; - } else if (build_id_len == 0) { - build_id = NULL; - } - Dwarf_Addr end; - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); - err = drgn_debug_info_report_module(load, build_id, build_id_len, base, - end, NULL, dwfl_module, name, -1, - NULL, NULL); - if (err) - goto err; - return DWARF_CB_OK; +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_relocatable(struct drgn_program *prog, + const char *name, uint64_t address, + struct drgn_module **ret, bool *new_ret) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_RELOCATABLE, + .relocatable.name = name, + .relocatable.address = address, + }; + return drgn_module_find_or_create(prog, &key, name, ret, new_ret); +} -err: - drgn_error_destroy(err); - return DWARF_CB_ABORT; +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, + const char *name, + uint64_t id, + struct drgn_module **ret, + bool *new_ret) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_EXTRA, + .extra.name = name, + .extra.id = id, + }; + return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } -static struct drgn_error *drgn_get_nt_file(Elf *elf, const char **ret, - size_t *len_ret) +static void +drgn_module_clear_wanted_supplementary_debug_file(struct drgn_module *module) { - size_t phnum; - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - for (size_t i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_NOTE) { - Elf_Data *data = elf_getdata_rawchunk(elf, - phdr->p_offset, - phdr->p_filesz, - note_header_type(phdr->p_align)); - if (!data) - return drgn_error_libelf(); - GElf_Nhdr nhdr; - size_t offset = 0, name_offset, desc_offset; - while (offset < data->d_size && - (offset = gelf_getnote(data, offset, &nhdr, - &name_offset, - &desc_offset))) { - const char *name = - (char *)data->d_buf + name_offset; - if (nhdr.n_namesz == sizeof("CORE") && - memcmp(name, "CORE", sizeof("CORE")) == 0 && - nhdr.n_type == NT_FILE) { - *ret = (char *)data->d_buf + desc_offset; - *len_ret = nhdr.n_descsz; - return NULL; - } - } - } + struct drgn_module_wanted_supplementary_file *wanted = + module->wanted_supplementary_debug_file; + if (wanted) { + free(wanted->checksum_str); + if (wanted->file != module->loaded_file + && wanted->file != module->debug_file) + drgn_elf_file_destroy(wanted->file); + free(wanted); + module->wanted_supplementary_debug_file = NULL; } - *ret = NULL; - *len_ret = 0; - return NULL; } -struct drgn_mapped_file_segment { - uint64_t start; - uint64_t end; - uint64_t file_offset; -}; - -DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment); - -DEFINE_HASH_MAP(drgn_mapped_files, const char *, - struct drgn_mapped_file_segment_vector, c_string_key_hash_pair, - c_string_key_eq); +// Note: this doesn't remove the module from the module tables. +static void drgn_module_destroy(struct drgn_module *module) +{ + drgn_module_free_section_addresses(module); + drgn_module_section_address_map_deinit(&module->section_addresses); + drgn_module_orc_info_deinit(module); + drgn_module_dwarf_info_deinit(module); + drgn_module_clear_wanted_supplementary_debug_file(module); + drgn_elf_file_destroy(module->supplementary_debug_file); + if (module->debug_file != module->loaded_file) + drgn_elf_file_destroy(module->debug_file); + drgn_elf_file_destroy(module->loaded_file); + free(module->build_id); + free(module->name); + free(module); +} -struct userspace_core_report_state { - struct drgn_mapped_files files; - void *phdr_buf; - size_t phdr_buf_capacity; - void *segment_buf; - size_t segment_buf_capacity; -}; +void drgn_module_delete(struct drgn_module *module) +{ + assert(!module->loaded_file); + assert(!module->debug_file); + if (module->start < module->end) { + drgn_module_address_tree_delete_entry(&module->prog->dbinfo.modules_by_address, + module); + } + if (module->kind == DRGN_MODULE_MAIN) { + module->prog->dbinfo.main_module = NULL; + } else { + struct drgn_module_key key = + drgn_module_entry_key((struct drgn_module * const *)&module); + drgn_module_table_delete(&module->prog->dbinfo.modules, &key); + module->prog->dbinfo.modules_generation++; + } + drgn_module_destroy(module); +} -static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, - const char *pos, - const char *message) +LIBDRGN_PUBLIC +struct drgn_program *drgn_module_program(const struct drgn_module *module) { - return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); + return module->prog; } -static bool -drgn_mapped_file_segments_contiguous(const struct drgn_mapped_file_segment *segment1, - const struct drgn_mapped_file_segment *segment2) +LIBDRGN_PUBLIC +struct drgn_module_key drgn_module_key(const struct drgn_module *module) { - if (segment1->end != segment2->start) - return false; - uint64_t size = segment1->end - segment1->start; - return segment1->file_offset + size == segment2->file_offset; + if (module->kind == DRGN_MODULE_MAIN) { + struct drgn_module_key key; + key.kind = DRGN_MODULE_MAIN; + return key; + } + return drgn_module_entry_key((struct drgn_module * const *)&module); } -static struct drgn_error * -userspace_core_get_mapped_files(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core, - const char *nt_file, size_t nt_file_len) +LIBDRGN_PUBLIC +enum drgn_module_kind drgn_module_kind(const struct drgn_module *module) { - struct drgn_error *err; + return module->kind; +} - GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(load->dbinfo->prog->core, - &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - bool is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; - bool little_endian = ehdr->e_ident[EI_DATA] == ELFDATA2LSB; +LIBDRGN_PUBLIC const char *drgn_module_name(const struct drgn_module *module) +{ + return module->name; +} - struct binary_buffer bb; - binary_buffer_init(&bb, nt_file, nt_file_len, little_endian, - parse_nt_file_error); +LIBDRGN_PUBLIC bool drgn_module_address_range(const struct drgn_module *module, + uint64_t *start_ret, + uint64_t *end_ret) +{ + if (module->start == UINT64_MAX) + return false; + *start_ret = module->start; + *end_ret = module->end; + return true; +} - /* - * fs/binfmt_elf.c in the Linux kernel source code documents the format - * of NT_FILE as: - * - * long count -- how many files are mapped - * long page_size -- units for file_ofs - * array of [COUNT] elements of - * long start - * long end - * long file_ofs - * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... - */ - uint64_t count, page_size; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&bb, &count))) - return err; - if (count > UINT64_MAX / 24) - return binary_buffer_error(&bb, "count is too large"); - if ((err = binary_buffer_next_u64(&bb, &page_size)) || - (err = binary_buffer_skip(&bb, count * 24))) - return err; - } else { - if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) - return err; - if (count > UINT64_MAX / 12) - return binary_buffer_error(&bb, "count is too large"); - if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || - (err = binary_buffer_skip(&bb, count * 12))) - return err; +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_set_address_range(struct drgn_module *module, uint64_t start, + uint64_t end) +{ + if (start >= end && start != 0 && end != UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "invalid module address range"); } - for (uint64_t i = 0; i < count; i++) { - struct drgn_mapped_file_segment segment; - if (is_64_bit) { - memcpy(&segment, nt_file + 16 + i * 24, 24); - if (bb.bswap) { - segment.start = bswap_64(segment.start); - segment.end = bswap_64(segment.end); - segment.file_offset = bswap_64(segment.file_offset); - } - } else { - struct { - uint32_t start; - uint32_t end; - uint32_t file_offset; - } segment32; - memcpy(&segment32, nt_file + 8 + i * 12, 12); - if (bb.bswap) { - segment.start = bswap_32(segment32.start); - segment.end = bswap_32(segment32.end); - segment.file_offset = bswap_32(segment32.file_offset); - } else { - segment.start = segment32.start; - segment.end = segment32.end; - segment.file_offset = segment32.file_offset; - } - } - segment.file_offset *= page_size; + if (module->start < module->end) { + drgn_module_address_tree_delete_entry(&module->prog->dbinfo.modules_by_address, + module); + } - struct drgn_mapped_files_entry entry = { - .key = bb.pos, - }; - if ((err = binary_buffer_skip_string(&bb))) - return err; - struct drgn_mapped_files_iterator it; - int r = drgn_mapped_files_insert(&core->files, &entry, &it); - if (r < 0) - return &drgn_enomem; - if (r == 1) - drgn_mapped_file_segment_vector_init(&it.entry->value); - - /* - * The Linux kernel creates separate entries for contiguous - * mappings with different memory protections even though the - * protection is not included in NT_FILE. Merge them if we can. - */ - if (!drgn_mapped_file_segment_vector_empty(&it.entry->value) - && drgn_mapped_file_segments_contiguous(drgn_mapped_file_segment_vector_last(&it.entry->value), - &segment)) - drgn_mapped_file_segment_vector_last(&it.entry->value)->end = segment.end; - else if (!drgn_mapped_file_segment_vector_append(&it.entry->value, - &segment)) - return &drgn_enomem; + module->start = start; + module->end = end; + if (start < end) { + // We don't bother checking for overlapping address ranges, + // which shouldn't happen with well-formed programs and at worst + // causes spurious failed lookups. We may need to revisit this + // if it's a problem in practice. + drgn_module_address_tree_insert(&module->prog->dbinfo.modules_by_address, + module, NULL); } return NULL; } -static bool build_id_matches(Elf *elf, const void *build_id, - size_t build_id_len) +LIBDRGN_PUBLIC +const char *drgn_module_build_id(const struct drgn_module *module, + const void **raw_ret, size_t *raw_len_ret) { - const void *elf_build_id; - ssize_t elf_build_id_len = drgn_elf_gnu_build_id(elf, &elf_build_id); - if (elf_build_id_len < 0) - return false; - return (elf_build_id_len == build_id_len && - memcmp(elf_build_id, build_id, build_id_len) == 0); + if (raw_ret) + *raw_ret = module->build_id; + if (raw_len_ret) + *raw_len_ret = module->build_id_len; + return module->build_id_str; } -static struct drgn_error * -userspace_core_elf_address_range(uint16_t e_type, size_t phnum, - struct drgn_error *(*get_phdr)(void *, size_t, GElf_Phdr *), - void *arg, - const struct drgn_mapped_file_segment *segments, - size_t num_segments, - const struct drgn_mapped_file_segment *ehdr_segment, - uint64_t *bias_ret, uint64_t *start_ret, - uint64_t *end_ret) +static void *drgn_module_alloc_build_id(size_t build_id_len) { - struct drgn_error *err; - - /* - * First, find the virtual address of the ELF header so that we can - * calculate the bias. - */ - uint64_t ehdr_vaddr; - size_t i; - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr; - err = get_phdr(arg, i, &phdr); - if (err) - return err; - if (phdr.p_type == PT_LOAD) { - uint64_t align = phdr.p_align ? phdr.p_align : 1; - if ((phdr.p_offset & -align) == 0) { - ehdr_vaddr = phdr.p_vaddr & -align; - break; - } - } - } - if (i >= phnum) { - /* - * No loadable segments contain the ELF header. This can't be - * our file. - */ - *bias_ret = 0; -not_loaded: - *start_ret = *end_ret = 0; + size_t alloc_size; + if (__builtin_mul_overflow(build_id_len, 3U, &alloc_size) || + __builtin_add_overflow(alloc_size, 1U, &alloc_size)) return NULL; - } - *bias_ret = ehdr_segment->start - ehdr_vaddr; - if (*bias_ret != 0 && e_type == ET_EXEC) { - /* The executable is not loaded at the correct address. */ - goto not_loaded; - } - - /* - * Now check all of the program headers to (1) get the module address - * range and (2) make sure that they are mapped as expected. If we're - * lucky, this can detect a file that was mmap'd and not actually loaded - * by the kernel or dynamic loader. This could also be the wrong file. - */ - const struct drgn_mapped_file_segment *segment = segments; - const struct drgn_mapped_file_segment *end_segment = - segments + num_segments; - uint64_t start = 0, end = 0; - bool first = true; - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr; - err = get_phdr(arg, i, &phdr); - if (err) - return err; - if (phdr.p_type != PT_LOAD) - continue; - uint64_t vaddr = phdr.p_vaddr + *bias_ret; - if (phdr.p_filesz != 0) { - /* - * Advance to the mapped segment containing the start - * address. - */ - while (vaddr >= segment->end) { - if (++segment == end_segment) - goto not_loaded; - if (vaddr < segment->start) - goto not_loaded; - } - if (segment->file_offset + (vaddr - segment->start) != - phdr.p_offset) { - /* - * The address in the core dump does not map to - * the segment's file offset. - */ - goto not_loaded; - } - if (phdr.p_filesz > segment->end - vaddr) { - /* Part of the segment is not mapped. */ - goto not_loaded; - } - } - if (first) { - uint64_t align = phdr.p_align ? phdr.p_align : 1; - start = vaddr & -align; - first = false; - } - end = vaddr + phdr.p_memsz; - } - if (start >= end) - goto not_loaded; - *start_ret = start; - *end_ret = end; - return NULL; + return malloc(alloc_size); } -/* ehdr_buf must be aligned as Elf64_Ehdr. */ -static void read_ehdr(const void *ehdr_buf, GElf_Ehdr *ret, bool *is_64_bit_ret, - bool *bswap_ret) -{ - *is_64_bit_ret = ((unsigned char *)ehdr_buf)[EI_CLASS] == ELFCLASS64; - bool little_endian = - ((unsigned char *)ehdr_buf)[EI_DATA] == ELFDATA2LSB; - *bswap_ret = little_endian != HOST_LITTLE_ENDIAN; - if (*is_64_bit_ret) { - const Elf64_Ehdr *ehdr64 = ehdr_buf; - if (*bswap_ret) { - memcpy(ret->e_ident, ehdr64->e_ident, EI_NIDENT); - ret->e_type = bswap_16(ehdr64->e_type); - ret->e_machine = bswap_16(ehdr64->e_machine); - ret->e_version = bswap_32(ehdr64->e_version); - ret->e_entry = bswap_64(ehdr64->e_entry); - ret->e_phoff = bswap_64(ehdr64->e_phoff); - ret->e_shoff = bswap_64(ehdr64->e_shoff); - ret->e_flags = bswap_32(ehdr64->e_flags); - ret->e_ehsize = bswap_16(ehdr64->e_ehsize); - ret->e_phentsize = bswap_16(ehdr64->e_phentsize); - ret->e_phnum = bswap_16(ehdr64->e_phnum); - ret->e_shentsize = bswap_16(ehdr64->e_shentsize); - ret->e_shnum = bswap_16(ehdr64->e_shnum); - ret->e_shstrndx = bswap_16(ehdr64->e_shstrndx); - } else { - *ret = *ehdr64; - } - } else { - const Elf32_Ehdr *ehdr32 = ehdr_buf; - memcpy(ret->e_ident, ehdr32->e_ident, EI_NIDENT); - if (*bswap_ret) { - ret->e_type = bswap_16(ehdr32->e_type); - ret->e_machine = bswap_16(ehdr32->e_machine); - ret->e_version = bswap_32(ehdr32->e_version); - ret->e_entry = bswap_32(ehdr32->e_entry); - ret->e_phoff = bswap_32(ehdr32->e_phoff); - ret->e_shoff = bswap_32(ehdr32->e_shoff); - ret->e_flags = bswap_32(ehdr32->e_flags); - ret->e_ehsize = bswap_16(ehdr32->e_ehsize); - ret->e_phentsize = bswap_16(ehdr32->e_phentsize); - ret->e_phnum = bswap_16(ehdr32->e_phnum); - ret->e_shentsize = bswap_16(ehdr32->e_shentsize); - ret->e_shnum = bswap_16(ehdr32->e_shnum); - ret->e_shstrndx = bswap_16(ehdr32->e_shstrndx); - } else { - ret->e_type = ehdr32->e_type; - ret->e_machine = ehdr32->e_machine; - ret->e_version = ehdr32->e_version; - ret->e_entry = ehdr32->e_entry; - ret->e_phoff = ehdr32->e_phoff; - ret->e_shoff = ehdr32->e_shoff; - ret->e_flags = ehdr32->e_flags; - ret->e_ehsize = ehdr32->e_ehsize; - ret->e_phentsize = ehdr32->e_phentsize; - ret->e_phnum = ehdr32->e_phnum; - ret->e_shentsize = ehdr32->e_shentsize; - ret->e_shnum = ehdr32->e_shnum; - ret->e_shstrndx = ehdr32->e_shstrndx; - } - } +static void drgn_module_set_build_id_impl(struct drgn_module *module, + const void *build_id, + size_t build_id_len, + void *build_id_buf) +{ + module->build_id = build_id_buf; + memcpy(module->build_id, build_id, build_id_len); + + module->build_id_len = build_id_len; + + module->build_id_str = (char *)build_id_buf + build_id_len; + hexlify(build_id, build_id_len, module->build_id_str); + module->build_id_str[2 * build_id_len] = '\0'; } -/* phdr_buf must be aligned as Elf64_Phdr. */ -static void read_phdr(const void *phdr_buf, size_t i, bool is_64_bit, - bool bswap, GElf_Phdr *ret) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_build_id(struct drgn_module *module, + const void *build_id, + size_t build_id_len) { - if (is_64_bit) { - const Elf64_Phdr *phdr64 = (Elf64_Phdr *)phdr_buf + i; - if (bswap) { - ret->p_type = bswap_32(phdr64->p_type); - ret->p_flags = bswap_32(phdr64->p_flags); - ret->p_offset = bswap_64(phdr64->p_offset); - ret->p_vaddr = bswap_64(phdr64->p_vaddr); - ret->p_paddr = bswap_64(phdr64->p_paddr); - ret->p_filesz = bswap_64(phdr64->p_filesz); - ret->p_memsz = bswap_64(phdr64->p_memsz); - ret->p_align = bswap_64(phdr64->p_align); - } else { - *ret = *phdr64; - } - } else { - const Elf32_Phdr *phdr32 = (Elf32_Phdr *)phdr_buf + i; - if (bswap) { - ret->p_type = bswap_32(phdr32->p_type); - ret->p_offset = bswap_32(phdr32->p_offset); - ret->p_vaddr = bswap_32(phdr32->p_vaddr); - ret->p_paddr = bswap_32(phdr32->p_paddr); - ret->p_filesz = bswap_32(phdr32->p_filesz); - ret->p_memsz = bswap_32(phdr32->p_memsz); - ret->p_flags = bswap_32(phdr32->p_flags); - ret->p_align = bswap_32(phdr32->p_align); - } else { - ret->p_type = phdr32->p_type; - ret->p_offset = phdr32->p_offset; - ret->p_vaddr = phdr32->p_vaddr; - ret->p_paddr = phdr32->p_paddr; - ret->p_filesz = phdr32->p_filesz; - ret->p_memsz = phdr32->p_memsz; - ret->p_flags = phdr32->p_flags; - ret->p_align = phdr32->p_align; - } + if (build_id_len == 0) { + free(module->build_id); + module->build_id = NULL; + module->build_id_len = 0; + module->build_id_str = NULL; + return NULL; } -} -struct core_get_phdr_arg { - const void *phdr_buf; - bool is_64_bit; - bool bswap; -}; + char *build_id_buf = drgn_module_alloc_build_id(build_id_len); + if (!build_id_buf) + return &drgn_enomem; + free(module->build_id); + drgn_module_set_build_id_impl(module, build_id, build_id_len, + build_id_buf); + return NULL; +} static struct drgn_error * -core_get_phdr(void *arg_, size_t i, GElf_Phdr *ret) +drgn_module_section_addresses_allowed(struct drgn_module *module, bool modify) { - struct core_get_phdr_arg *arg = arg_; - read_phdr(arg->phdr_buf, i, arg->is_64_bit, arg->bswap, ret); + if (module->kind != DRGN_MODULE_RELOCATABLE) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "section addresses are only supported for relocatable modules"); + } + if (modify && (module->loaded_file || module->debug_file)) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "section addresses cannot be modified after file is set"); + } return NULL; } -struct userspace_core_identified_file { - const void *build_id; - size_t build_id_len; - uint64_t start, end; - bool ignore; - bool have_address_range; -}; - -static struct drgn_error * -userspace_core_identify_file(struct drgn_program *prog, - struct userspace_core_report_state *core, - const struct drgn_mapped_file_segment *segments, - size_t num_segments, - const struct drgn_mapped_file_segment *ehdr_segment, - struct userspace_core_identified_file *ret) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_get_section_address(struct drgn_module *module, + const char *name, + uint64_t *ret) { - struct drgn_error *err; + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) + return err; + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search(&module->section_addresses, + (char **)&name); + if (!it.entry) + return &drgn_not_found; + *ret = it.entry->value; + return NULL; +} - Elf64_Ehdr ehdr_buf; - err = drgn_program_read_memory(prog, &ehdr_buf, ehdr_segment->start, - sizeof(ehdr_buf), false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - err = NULL; - } +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_section_address(struct drgn_module *module, + const char *name, + uint64_t address) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, true); + if (err) return err; - } - if (memcmp(&ehdr_buf, ELFMAG, SELFMAG) != 0) { - ret->ignore = true; + + struct hash_pair hp = + drgn_module_section_address_map_hash((char **)&name); + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search_hashed(&module->section_addresses, + (char **)&name, + hp); + if (it.entry) { + it.entry->value = address; return NULL; } + struct drgn_module_section_address_map_entry entry = { + .key = strdup(name), + .value = address, + }; + if (!entry.key) + return &drgn_enomem; + if (drgn_module_section_address_map_insert_searched(&module->section_addresses, + &entry, hp, + NULL) < 0) { + free(entry.key); + return &drgn_enomem; + } + module->section_addresses_generation++; + return NULL; +} + +struct drgn_error *drgn_module_delete_section_address(struct drgn_module *module, + const char *name) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, true); + if (err) + return err; + + struct hash_pair hp = + drgn_module_section_address_map_hash((char **)&name); + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search_hashed(&module->section_addresses, + (char **)&name, + hp); + if (!it.entry) + return &drgn_not_found; + + _cleanup_free_ _unused_ char *key_to_free = it.entry->key; + drgn_module_section_address_map_delete_iterator_hashed(&module->section_addresses, + it, hp); + module->section_addresses_generation++; + return NULL; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_num_section_addresses(struct drgn_module *module, + size_t *ret) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) + return err; + *ret = drgn_module_section_address_map_size(&module->section_addresses); + return NULL; +} + +struct drgn_module_section_address_iterator { + struct drgn_module *module; + struct drgn_module_section_address_map_iterator map_it; + uint64_t generation; +}; + +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_section_address_iterator_create(struct drgn_module *module, + struct drgn_module_section_address_iterator **ret) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) + return err; + + struct drgn_module_section_address_iterator *it = malloc(sizeof(*it)); + if (!it) + return &drgn_enomem; + it->module = module; + it->map_it = drgn_module_section_address_map_first(&module->section_addresses); + it->generation = module->section_addresses_generation; + *ret = it; + return NULL; +} + +LIBDRGN_PUBLIC void +drgn_module_section_address_iterator_destroy(struct drgn_module_section_address_iterator *it) +{ + free(it); +} + +LIBDRGN_PUBLIC struct drgn_module * +drgn_module_section_address_iterator_module(struct drgn_module_section_address_iterator *it) +{ + return it->module; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_section_address_iterator_next(struct drgn_module_section_address_iterator *it, + const char **name_ret, + uint64_t *address_ret) +{ + if (it->map_it.entry) { + if (it->generation != it->module->section_addresses_generation) { + return drgn_error_create(DRGN_ERROR_OTHER, + "section addresses changed during iteration"); + } + *name_ret = it->map_it.entry->key; + if (address_ret) + *address_ret = it->map_it.entry->value; + it->map_it = drgn_module_section_address_map_next(it->map_it); + } else { + *name_ret = NULL; + } + return NULL; +} + +LIBDRGN_PUBLIC enum drgn_module_file_status +drgn_module_loaded_file_status(const struct drgn_module *module) +{ + return module->loaded_file_status; +} + +static bool +drgn_can_change_module_file_status(enum drgn_module_file_status old_status, + enum drgn_module_file_status new_status) +{ + SWITCH_ENUM(old_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + SWITCH_ENUM(new_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + default: + return false; + } + case DRGN_MODULE_FILE_HAVE: + return new_status == DRGN_MODULE_FILE_HAVE; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + SWITCH_ENUM(new_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + return true; + case DRGN_MODULE_FILE_HAVE: + default: + return false; + } + default: + UNREACHABLE(); + } +} + +LIBDRGN_PUBLIC +bool drgn_module_set_loaded_file_status(struct drgn_module *module, + enum drgn_module_file_status status) +{ + if (!drgn_can_change_module_file_status(module->loaded_file_status, + status)) + return false; + module->loaded_file_status = status; + return true; +} + +LIBDRGN_PUBLIC +bool drgn_module_wants_loaded_file(const struct drgn_module *module) +{ + SWITCH_ENUM(module->loaded_file_status) { + case DRGN_MODULE_FILE_WANT: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return false; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + default: + UNREACHABLE(); + } +} + +LIBDRGN_PUBLIC enum drgn_module_file_status +drgn_module_debug_file_status(const struct drgn_module *module) +{ + return module->debug_file_status; +} + +LIBDRGN_PUBLIC +bool drgn_module_set_debug_file_status(struct drgn_module *module, + enum drgn_module_file_status status) +{ + if (!drgn_can_change_module_file_status(module->debug_file_status, + status)) + return false; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + && status != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = status; + return true; +} + +LIBDRGN_PUBLIC +bool drgn_module_wants_debug_file(const struct drgn_module *module) +{ + SWITCH_ENUM(module->debug_file_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return false; + default: + UNREACHABLE(); + } +} + +LIBDRGN_PUBLIC +const char *drgn_module_loaded_file_path(const struct drgn_module *module) +{ + return module->loaded_file ? module->loaded_file->path : NULL; +} + +LIBDRGN_PUBLIC +uint64_t drgn_module_loaded_file_bias(const struct drgn_module *module) +{ + return module->loaded_file_bias; +} + +LIBDRGN_PUBLIC +const char *drgn_module_debug_file_path(const struct drgn_module *module) +{ + return module->debug_file ? module->debug_file->path : NULL; +} + +LIBDRGN_PUBLIC +uint64_t drgn_module_debug_file_bias(const struct drgn_module *module) +{ + return module->debug_file_bias; +} + +LIBDRGN_PUBLIC enum drgn_supplementary_file_kind +drgn_module_supplementary_debug_file_kind(const struct drgn_module *module) +{ + return module->supplementary_debug_file + ? DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK + : DRGN_SUPPLEMENTARY_FILE_NONE; +} + +LIBDRGN_PUBLIC const char * +drgn_module_supplementary_debug_file_path(const struct drgn_module *module) +{ + return module->supplementary_debug_file + ? module->supplementary_debug_file->path : NULL; +} + +LIBDRGN_PUBLIC enum drgn_supplementary_file_kind +drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, + const char **debug_file_path_ret, + const char **supplementary_path_ret, + const void **checksum_ret, + size_t *checksum_len_ret) +{ + struct drgn_module_wanted_supplementary_file *wanted = + module->wanted_supplementary_debug_file; + if (debug_file_path_ret) + *debug_file_path_ret = wanted ? wanted->file->path : NULL; + if (supplementary_path_ret) + *supplementary_path_ret = wanted ? wanted->supplementary_path : NULL; + if (checksum_ret) + *checksum_ret = wanted ? wanted->checksum : NULL; + if (checksum_len_ret) + *checksum_len_ret = wanted ? wanted->checksum_len : 0; + return wanted + ? DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK + : DRGN_SUPPLEMENTARY_FILE_NONE; +} + +static struct drgn_error * +drgn_program_register_debug_info_finder_impl(struct drgn_program *prog, + struct drgn_debug_info_finder *finder, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index) +{ + struct drgn_error *err; + bool should_free = !finder; + if (finder) { + finder->handler.name = name; + } else { + finder = malloc(sizeof(*finder)); + if (!finder) + return &drgn_enomem; + finder->handler.name = strdup(name); + if (!finder->handler.name) { + free(finder); + return &drgn_enomem; + } + } + finder->handler.free = should_free; + finder->ops = *ops; + finder->arg = arg; + err = drgn_handler_list_register(&prog->dbinfo.debug_info_finders, + &finder->handler, enable_index, + "module debug info finder"); + if (err && should_free) { + free((char *)finder->handler.name); + free(finder); + } + return err; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_register_debug_info_finder(struct drgn_program *prog, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index) +{ + return drgn_program_register_debug_info_finder_impl(prog, NULL, name, + ops, arg, + enable_index); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_registered_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret) +{ + return drgn_handler_list_registered(&prog->dbinfo.debug_info_finders, + names_ret, count_ret); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_set_enabled_debug_info_finders(struct drgn_program *prog, + const char * const *names, + size_t count) +{ + return drgn_handler_list_set_enabled(&prog->dbinfo.debug_info_finders, + names, count, + "module debug info finder"); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_enabled_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret) +{ + return drgn_handler_list_enabled(&prog->dbinfo.debug_info_finders, + names_ret, count_ret); +} + +static const char *drgn_default_debug_info_path = ":.debug:/usr/lib/debug"; + +LIBDRGN_PUBLIC +const char *drgn_program_debug_info_path(struct drgn_program *prog) +{ + return prog->dbinfo.debug_info_path; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_program_set_debug_info_path(struct drgn_program *prog, + const char *path) +{ + char *new_path; + if (path) { + new_path = strdup(path); + if (!new_path) + return &drgn_enomem; + } else { + new_path = NULL; + } + if (prog->dbinfo.debug_info_path != drgn_default_debug_info_path) + free((char *)prog->dbinfo.debug_info_path); + prog->dbinfo.debug_info_path = new_path; + return NULL; +} + +static struct drgn_error * +drgn_module_set_wanted_gnu_debugaltlink(struct drgn_module *module, + struct drgn_elf_file *file) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + // We don't cache .gnu_debugaltlink, and it doesn't need relocation, so + // don't use drgn_elf_file_read_section(). + Elf_Data *data; + err = read_elf_section(file->scns[DRGN_SCN_GNU_DEBUGALTLINK], &data); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, + "%s: couldn't read .gnu_debugaltlink; ignoring debug info: ", + file->path); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + + const char *debugaltlink = data->d_buf; + const char *nul = memchr(debugaltlink, 0, data->d_size); + if (!nul || nul + 1 == debugaltlink + data->d_size) { + drgn_log_debug(prog, + "%s: couldn't parse .gnu_debugaltlink; ignoring debug info", + file->path); + return NULL; + } + const void *build_id = nul + 1; + size_t build_id_len = debugaltlink + data->d_size - (nul + 1); + _cleanup_free_ char *build_id_str = ahexlify(build_id, build_id_len); + if (!build_id_str) + return &drgn_enomem; + drgn_log_debug(prog, "%s has gnu_debugaltlink %s build ID %s", + file->path, debugaltlink, build_id_str); + + struct drgn_module_wanted_supplementary_file *wanted = + malloc(sizeof(*wanted)); + if (!wanted) + return &drgn_enomem; + *wanted = (struct drgn_module_wanted_supplementary_file){ + .file = file, + .supplementary_path = debugaltlink, + .checksum = build_id, + .checksum_len = build_id_len, + .checksum_str = no_cleanup_ptr(build_id_str), + .generation = ++prog->dbinfo.supplementary_file_generation, + }; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->wanted_supplementary_debug_file = wanted; + module->debug_file_status = DRGN_MODULE_FILE_WANT_SUPPLEMENTARY; + return NULL; +} + +static bool +drgn_module_copy_section_addresses(struct drgn_module *module, Elf *elf) +{ + if (drgn_module_section_address_map_empty(&module->section_addresses)) + return true; + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return false; + + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return false; + + char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return false; + + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search(&module->section_addresses, + &scnname); + if (!it.entry) + continue; + + shdr->sh_addr = it.entry->value; + if (!gelf_update_shdr(scn, shdr)) + return false; + } + return true; +} + +static bool elf_main_bias(struct drgn_program *prog, Elf *elf, uint64_t *ret) +{ + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) { + drgn_log_debug(prog, "gelf_getehdr: %s", elf_errmsg(-1)); + return false; + } + + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) { + drgn_log_debug(prog, "elf_getphdrnum: %s", elf_errmsg(-1)); + return false; + } + + uint64_t phdr_vaddr; + bool have_phdr_vaddr = false; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) { + drgn_log_debug(prog, "gelf_getphdr: %s", + elf_errmsg(-1)); + return false; + } + if (phdr->p_type == PT_LOAD && + phdr->p_offset <= ehdr->e_phoff && + ehdr->e_phoff < phdr->p_offset + phdr->p_filesz) { + phdr_vaddr = ehdr->e_phoff - phdr->p_offset + phdr->p_vaddr; + have_phdr_vaddr = true; + } + } + if (!have_phdr_vaddr) { + drgn_log_debug(prog, + "file does not have loadable segment containing e_phoff"); + return false; + } + *ret = prog->auxv.at_phdr - phdr_vaddr; + return true; +} + +static bool elf_dso_bias(struct drgn_program *prog, Elf *elf, + uint64_t dynamic_address, uint64_t *ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) { + drgn_log_debug(prog, "elf_getphdrnum: %s", elf_errmsg(-1)); + return false; + } + + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) { + drgn_log_debug(prog, "gelf_getphdr: %s", + elf_errmsg(-1)); + return false; + } + if (phdr->p_type == PT_DYNAMIC) { + *ret = dynamic_address - phdr->p_vaddr; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from PT_DYNAMIC program header", + *ret); + return true; + } + } + drgn_log_debug(prog, "file does not have PT_DYNAMIC program header"); + return false; +} + +static bool drgn_module_elf_file_bias(struct drgn_module *module, + struct drgn_elf_file *file, uint64_t *ret) +{ + struct drgn_program *prog = module->prog; + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { + *ret = prog->vmcoreinfo.kaslr_offset; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from VMCOREINFO", + *ret); + return true; + } else { + return elf_main_bias(prog, file->elf, ret); + } + case DRGN_MODULE_SHARED_LIBRARY: + return elf_dso_bias(prog, file->elf, + module->shared_library.dynamic_address, + ret); + case DRGN_MODULE_VDSO: + return elf_dso_bias(prog, file->elf, + module->vdso.dynamic_address, ret); + case DRGN_MODULE_EXTRA: + if (module->start != UINT64_MAX) { + uint64_t elf_start, elf_end; + if (!drgn_elf_file_address_range(file, &elf_start, + &elf_end)) + return false; + if (elf_start < elf_end) { + *ret = module->start - elf_start; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from ELF start address", + *ret); + return true; + } + } + fallthrough; + case DRGN_MODULE_RELOCATABLE: + default: + *ret = 0; + return true; + } +} + +static bool +drgn_module_should_set_address_range_from_elf_file(struct drgn_module *module) +{ + if (module->start != UINT64_MAX) + return false; + + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + case DRGN_MODULE_SHARED_LIBRARY: + case DRGN_MODULE_VDSO: + return true; + case DRGN_MODULE_RELOCATABLE: + case DRGN_MODULE_EXTRA: + default: + return false; + } +} + +// Takes ownership of file unless it is already owned by module. +static struct drgn_error * +drgn_module_maybe_use_elf_file(struct drgn_module *module, + struct drgn_elf_file *file, + bool is_gnu_debugaltlink_file) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + struct drgn_elf_file *gnu_debugaltlink_file = NULL; + + bool use_loaded, has_dwarf, use_debug; + if (is_gnu_debugaltlink_file) { + assert(module->debug_file_status + == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY); + gnu_debugaltlink_file = file; + file = module->wanted_supplementary_debug_file->file; + use_loaded = false; + has_dwarf = use_debug = true; + } else { + // We should only be here if we want a file. + assert(drgn_module_wants_file(module)); + use_loaded = module->loaded_file_status == DRGN_MODULE_FILE_WANT + && file->is_loadable; + has_dwarf = drgn_elf_file_has_dwarf(file); + use_debug = drgn_module_wants_debug_file(module) && has_dwarf; + } + + _cleanup_free_ void *build_id_buf = NULL; + + if (!is_gnu_debugaltlink_file + && use_debug && file->scns[DRGN_SCN_GNU_DEBUGALTLINK]) { + // If we're trying to reuse a debug file that wants a + // supplementary file, then don't reset it, otherwise we'll free + // the file that we're trying to reuse. + if (!module->wanted_supplementary_debug_file + || module->wanted_supplementary_debug_file->file != file) { + err = drgn_module_set_wanted_gnu_debugaltlink(module, file); + if (err) + goto unused; + } + if (!use_loaded && module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->file == file) + return NULL; + use_debug = false; + } + + if (!use_loaded && !use_debug) { + if (file->is_loadable) { + drgn_log_debug(prog, + "%s is loadable, but don't want loaded file; ignoring", + file->path); + } else if (has_dwarf) { + drgn_log_debug(prog, + "%s has debug info, but don't want debug info; ignoring", + file->path); + } else { + drgn_log_debug(prog, + "%s is not loadable and no debug info; ignoring", + file->path); + } + err = NULL; + goto unused; + } + + // Get everything that might fail before we commit to using the file. + const void *elf_build_id; + ssize_t elf_build_id_len = 0; + if (module->build_id_len == 0) { + elf_build_id_len = drgn_elf_gnu_build_id(file->elf, + &elf_build_id); + if (elf_build_id_len < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + err = NULL; + goto unused; + } + if (elf_build_id_len > 0) { + build_id_buf = + drgn_module_alloc_build_id(elf_build_id_len); + if (!build_id_buf) { + err = &drgn_enomem; + goto unused; + } + } + } + + if (file != module->loaded_file && file != module->debug_file + && !drgn_module_copy_section_addresses(module, file->elf)) { + drgn_log_debug(prog, "%s: %s", file->path, elf_errmsg(-1)); + err = NULL; + goto unused; + } + + uint64_t bias; + if (!drgn_module_elf_file_bias(module, file, &bias)) { + err = NULL; + goto unused; + } + uint64_t elf_start = 0, elf_end = 0; + if (drgn_module_should_set_address_range_from_elf_file(module)) { + if (!drgn_elf_file_address_range(file, &elf_start, &elf_end)) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + err = NULL; + goto unused; + } + elf_start += bias; + elf_end += bias; + if (elf_start >= elf_end) { + drgn_log_debug(prog, "%s: address range is invalid", + file->path); + } + } + + // At this point, we've committed to using the file. Nothing after this + // is allowed to fail. + + if (use_loaded && use_debug) { + drgn_log_info(prog, + "%s: using loadable file with debug info %s", + module->name, file->path); + } else if (use_loaded) { + drgn_log_info(prog, "%s: using loadable file %s", module->name, + file->path); + } else if (is_gnu_debugaltlink_file) { + drgn_log_info(prog, + "%s: using debug info file %s with supplementary file %s", + module->name, file->path, gnu_debugaltlink_file->path); + } else { + drgn_log_info(prog, "%s: using debug info file %s", + module->name, file->path); + } + + // If we got a build ID or address range earlier, install them. + if (elf_build_id_len > 0) { + drgn_module_set_build_id_impl(module, elf_build_id, + elf_build_id_len, + no_cleanup_ptr(build_id_buf)); + drgn_log_debug(prog, "%s: set build ID %s from file", + module->name, module->build_id_str); + } + if (elf_start < elf_end) { + drgn_log_debug(prog, + "%s: set address range 0x%" PRIx64 + "-0x%" PRIx64 " from file", module->name, + elf_start, elf_end); + err = drgn_module_set_address_range(module, elf_start, elf_end); + // This can only fail if the address range is invalid, which we + // just checked for. + assert(!err); + } + + if (use_loaded) { + module->loaded_file = file; + module->loaded_file_bias = bias; + module->loaded_file_status = DRGN_MODULE_FILE_HAVE; + module->elf_symtab_pending_files |= + DRGN_MODULE_FILE_MASK_LOADED; + } + if (use_debug) { + module->debug_file = file; + module->debug_file_bias = bias; + module->supplementary_debug_file = gnu_debugaltlink_file; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = DRGN_MODULE_FILE_HAVE; + module->pending_indexing_next = + prog->dbinfo.modules_pending_indexing; + prog->dbinfo.modules_pending_indexing = module; + prog->tried_main_language = false; + module->elf_symtab_pending_files |= + DRGN_MODULE_FILE_MASK_DEBUG; + } + if (!prog->has_platform) { + drgn_log_debug(prog, "setting program platform from %s", + file->path); + drgn_program_set_platform(prog, &file->platform); + } + return NULL; + +unused: + drgn_elf_file_destroy(gnu_debugaltlink_file); + if (module->wanted_supplementary_debug_file + && file == module->wanted_supplementary_debug_file->file) { + module->wanted_supplementary_debug_file->file = NULL; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = DRGN_MODULE_FILE_WANT; + } + if (file != module->loaded_file && file != module->debug_file) + drgn_elf_file_destroy(file); + return err; +} + +// Always takes ownership of fd_. Attempts to resolve the real path of path. +static struct drgn_error * +drgn_module_try_file_internal(struct drgn_module *module, const char *path, + int fd_, bool check_build_id, + const uint32_t *expected_crc) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + _cleanup_close_ int fd = fd_; + if (fd >= 0) { + if (path) { + drgn_log_debug(prog, "%s: trying %s with fd %d", + module->name, path, fd); + } else { + drgn_log_debug(prog, "%s: trying fd %d", module->name, + fd); + } + } else { + fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + drgn_log_debug(prog, "%s: trying %s", module->name, path); + } + + // Try to canonicalize the path, first via + // readlink("/proc/self/fd/$fd"), then via realpath(). +#define FORMAT "/proc/self/fd/%d" + char fd_path[sizeof(FORMAT) + - (sizeof("%d") - 1) + + max_decimal_length(int)]; + snprintf(fd_path, sizeof(fd_path), FORMAT, fd); +#undef FORMAT + + size_t link_buf_size = PATH_MAX; + _cleanup_free_ char *link_buf = malloc(link_buf_size); + if (!link_buf) + return &drgn_enomem; + + for (;;) { + ssize_t r = readlink(fd_path, link_buf, link_buf_size); + if (r < 0) { + drgn_log_debug(prog, "readlink: %s: %m", fd_path); + if (path) { + free(link_buf); + link_buf = realpath(path, NULL); + if (link_buf) { + drgn_log_debug(prog, + "canonical path is %s", + link_buf); + path = link_buf; + } else { + drgn_log_debug(prog, "realpath: %s: %m", + path); + } + } else { + path = fd_path; + } + break; + } + + if (r < link_buf_size) { + link_buf[r] = '\0'; + if (drgn_log_is_enabled(prog, DRGN_LOG_DEBUG) + && (!path || strcmp(path, link_buf) != 0)) { + drgn_log_debug(prog, "canonical path is %s", + link_buf); + } + path = link_buf; + break; + } + + if (__builtin_mul_overflow(link_buf_size, 2U, &link_buf_size)) + return &drgn_enomem; + free(link_buf); + link_buf = malloc(link_buf_size); + if (!link_buf) + return &drgn_enomem; + } + + _cleanup_elf_end_ Elf *elf = dwelf_elf_begin(fd); + if (!elf) { + drgn_log_debug(prog, "%s: %s", path, elf_errmsg(-1)); + return NULL; + } + if (elf_kind(elf) != ELF_K_ELF) { + drgn_log_debug(prog, "%s: not an ELF file", path); + return NULL; + } + + // This code assumes that DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK is + // the only kind of supplementary file, which is currently true. + bool log_build_id = check_build_id + || drgn_log_is_enabled(prog, DRGN_LOG_DEBUG); + const void *elf_build_id; + ssize_t elf_build_id_len; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + || (log_build_id && module->build_id_len > 0)) { + elf_build_id_len = drgn_elf_gnu_build_id(elf, &elf_build_id); + if (elf_build_id_len < 0) { + drgn_log_debug(prog, "%s: %s%s", path, elf_errmsg(-1), + check_build_id ? "" : "; ignoring build ID"); + } + } + + bool is_gnu_debugaltlink_file = false; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + && elf_build_id_len >= 0 + && elf_build_id_len + == module->wanted_supplementary_debug_file->checksum_len + && memcmp(elf_build_id, + module->wanted_supplementary_debug_file->checksum, + elf_build_id_len) == 0) { + drgn_log_debug(prog, "%s: %s build ID matches gnu_debugaltlink", + module->name, path); + is_gnu_debugaltlink_file = true; + } else if (log_build_id && module->build_id_len > 0) { + if (elf_build_id_len < 0) { + if (check_build_id) + return NULL; + } else if (elf_build_id_len == module->build_id_len + && memcmp(elf_build_id, module->build_id, + elf_build_id_len) == 0) { + drgn_log_debug(prog, "%s: %s build ID matches", + module->name, path); + } else { + if (elf_build_id_len == 0) { + drgn_log_debug(prog, + "%s: %s is missing build ID%s", + module->name, path, + check_build_id ? "" : "; forcing"); + } else { + drgn_log_debug(prog, + "%s: %s build ID does not match%s", + module->name, path, + check_build_id ? "" : "; forcing"); + } + if (check_build_id) + return NULL; + } + } + if (expected_crc) { + size_t size; + const void *rawfile = elf_rawfile(elf, &size); + if (!rawfile) { + drgn_log_debug(prog, "%s: %s", path, elf_errmsg(-1)); + return NULL; + } + uint32_t crc = ~crc32_update(-1, rawfile, size); + if (crc != *expected_crc) { + drgn_log_debug(prog, + "%s: %s CRC 0x%08" PRIx32 " does not match", + module->name, path, crc); + return NULL; + } + drgn_log_debug(prog, "%s: %s CRC matches", module->name, path); + } + + struct drgn_elf_file *file; + err = drgn_elf_file_create(module, path, fd, NULL, elf, &file); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + // fd and elf are owned by the drgn_elf_file now. + fd = -1; + elf = NULL; + return drgn_module_maybe_use_elf_file(module, file, + is_gnu_debugaltlink_file); +} + +// Arbitrary limit on the number of bytes we'll allocate and read from the +// program's memory at once when finding modules/debug info. +static const uint64_t MAX_MEMORY_READ_FOR_DEBUG_INFO = UINT64_C(1048576); + +#define drgn_module_try_files_log(module, how_format, ...) \ +({ \ + struct drgn_module *_module = (module); \ + bool _want_loaded = _module->loaded_file_status == DRGN_MODULE_FILE_WANT;\ + bool _want_debug = _module->debug_file_status == DRGN_MODULE_FILE_WANT; \ + bool _want_supplementary_debug = _module->debug_file_status \ + == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY;\ + drgn_log_debug(_module->prog, \ + "%s (%s%s): " how_format " %s%s%s file%s", _module->name,\ + _module->build_id_str ? "build ID " : "no build ID", \ + _module->build_id_str ?: "", \ + ## __VA_ARGS__, \ + _want_loaded ? "loaded" : "", \ + _want_loaded && (_want_debug || _want_supplementary_debug)\ + ? " and " : "", \ + _want_debug ? "debug" \ + : _want_supplementary_debug ? "supplementary debug" : "",\ + _want_loaded && (_want_debug || _want_supplementary_debug)\ + ? "s" : ""); \ +}) + +static struct drgn_error * +drgn_module_try_vdso_in_core(struct drgn_module *module) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + // The Linux kernel has included the entire vDSO in core dumps since + // Linux kernel commit f47aef55d9a1 ("[PATCH] i386 vDSO: use + // VM_ALWAYSDUMP") (in v2.6.20). Try to read it from program memory. + + // The vDSO in memory is always stripped. + if (module->loaded_file_status != DRGN_MODULE_FILE_WANT) + return NULL; + + uint64_t start, end; + if (!drgn_module_address_range(module, &start, &end)) { + drgn_log_debug(prog, + "vDSO address range is not known; " + "can't read from program"); + return NULL; + } + if (start >= end) { + drgn_log_debug(prog, + "vDSO address range is empty; " + "can't read from program"); + return NULL; + } + uint64_t size = end - start; + if (size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "vDSO is unreasonably large (%" PRIu64 " bytes); " + "not reading from program", + size); + return NULL; + } + + _cleanup_free_ char *image = malloc(size); + if (!image) + return &drgn_enomem; + err = drgn_program_read_memory(prog, image, start, size, false); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, "couldn't read vDSO: "); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + + _cleanup_elf_end_ Elf *elf = elf_memory(image, size); + if (!elf) { + drgn_log_debug(prog, "couldn't read vDSO: %s", elf_errmsg(-1)); + return NULL; + } + struct drgn_elf_file *file; + err = drgn_elf_file_create(module, "[vdso]", -1, image, elf, &file); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + // image and elf are owned by the drgn_elf_file now. + image = NULL; + elf = NULL; + + drgn_log_debug(prog, "trying vDSO in %s", + (module->prog->flags & DRGN_PROGRAM_IS_LIVE) + ? "memory" : "core"); + return drgn_module_maybe_use_elf_file(module, file, false); +} + +static void +drgn_module_try_supplementary_debug_file_log(struct drgn_module *module, + const char *how) +{ + const char *debug_file_path; + const char *debugaltlink_path; + if (drgn_module_wanted_supplementary_debug_file(module, + &debug_file_path, + &debugaltlink_path, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return; + const char *debugaltlink_build_id_str = + module->wanted_supplementary_debug_file->checksum_str; + drgn_log_debug(module->prog, + "%s: %s gnu_debugaltlink %s build ID %s in file %s", + module->name, how, debugaltlink_path, + debugaltlink_build_id_str, debug_file_path); +} + +static struct drgn_error * +drgn_module_try_standard_supplementary_files(struct drgn_module *module) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + const char *debug_file_path; + const char *debugaltlink_path; + if (drgn_module_wanted_supplementary_debug_file(module, + &debug_file_path, + &debugaltlink_path, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + + drgn_module_try_supplementary_debug_file_log(module, + "trying standard paths for"); + + STRING_BUILDER(sb); + const char *slash; + if (debugaltlink_path[0] == '/' + || !(slash = strrchr(debug_file_path, '/'))) { + // debugaltlink is absolute, or the debug file doesn't have a + // directory component and is therefore in the current working + // directory. Try debugaltlink directly. + err = drgn_module_try_file_internal(module, debugaltlink_path, + -1, true, NULL); + } else { + // Try $(dirname $path)/$debugaltlink. + if (!string_builder_appendn(&sb, debug_file_path, + slash + 1 - debug_file_path) + || !string_builder_append(&sb, debugaltlink_path) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_file_internal(module, sb.str, -1, true, + NULL); + } + if (err + || module->debug_file_status != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + return err; + + // All of the Linux distributions that use gnu_debugaltlink that I'm + // aware of (Debian, Fedora, SUSE, and their derivatives) put + // gnu_debugaltlink files in a ".dwz" subdirectory under the debug + // directory (e.g., "/usr/lib/debug/.dwz"). Try the path starting with + // the ".dwz" directory under all of the configured debug directories. + // This can help in a couple of cases: + // + // 1. When the gnu_debugaltlink path is absolute (which is the case on + // Debian and its derivatives as of Debian 12/Ubuntu 23.10) and the + // debug directory has been copied to a different path. See + // https://bugs.launchpad.net/ubuntu/+source/gdb/+bug/1818918. + // 2. When the gnu_debugaltlink path is relative (which is the case on + // Fedora, SUSE, and their derivatives) and the debug file was found + // outside of the debug directory. + const char *dwz = strstr(debugaltlink_path, "/.dwz/"); + if (dwz) { + const char *debug_dir; + size_t debug_dir_len; + drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { + if (debug_dir_len == 0 || debug_dir[0] != '/') + continue; + + sb.len = 0; + if (!string_builder_appendn(&sb, debug_dir, + debug_dir_len) + || !string_builder_append(&sb, dwz) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + + // Don't bother trying debugaltlink directly again. + if (strcmp(sb.str, debugaltlink_path) == 0) + continue; + + err = drgn_module_try_file_internal(module, sb.str, -1, + true, NULL); + if (err + || module->debug_file_status + != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + return err; + } + } + return NULL; +} + +static bool +drgn_module_wanted_supplementary_debug_file_is_new(struct drgn_module *module, + uint64_t orig_supplementary_file_generation) +{ + return module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->generation + > orig_supplementary_file_generation; +} + +struct drgn_error * +drgn_module_try_standard_file(struct drgn_module *module, const char *path, + int fd, bool check_build_id, + const uint32_t *expected_crc) +{ + struct drgn_error *err; + uint64_t orig_supplementary_file_generation = + module->prog->dbinfo.supplementary_file_generation; + err = drgn_module_try_file_internal(module, path, fd, check_build_id, + expected_crc); + if (err) + return err; + // If the wanted supplementary debug file changed, try finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = drgn_module_try_standard_supplementary_files(module); + if (err) + return err; + } + return NULL; +} + +// An entry in /proc/$pid/map_files. +struct drgn_map_files_segment { + uint64_t start; + uint64_t end; +}; + +DEFINE_VECTOR(drgn_map_files_segment_vector, struct drgn_map_files_segment); + +static inline int drgn_map_files_segment_compare(const void *_a, const void *_b) +{ + const struct drgn_map_files_segment *a = _a; + const struct drgn_map_files_segment *b = _b; + return (a->start > b->start) - (a->start < b->start); +} + +static void +drgn_debug_info_set_map_files_segments(struct drgn_debug_info *dbinfo, + struct drgn_map_files_segment_vector *segments, + bool sorted) +{ + free(dbinfo->map_files_segments); + drgn_map_files_segment_vector_shrink_to_fit(segments); + drgn_map_files_segment_vector_steal(segments, + &dbinfo->map_files_segments, + &dbinfo->num_map_files_segments); + // The Linux kernel always returns these entries in order, but sort it + // just in case. + if (!sorted) { + qsort(dbinfo->map_files_segments, + dbinfo->num_map_files_segments, + sizeof(dbinfo->map_files_segments[0]), + drgn_map_files_segment_compare); + } +} + +static struct drgn_error * +drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, + bool *tried) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + const uint64_t address = module->shared_library.dynamic_address; + +#define DIR_FORMAT "/proc/%ld/map_files" +#define ENTRY_FORMAT "/%" PRIx64 "-%" PRIx64 + char path[sizeof(DIR_FORMAT ENTRY_FORMAT) + - (sizeof("%ld") - 1) + + max_decimal_length(long) + - 2 * (sizeof("%" PRIx64) - 1) + + 2 * 16]; + int dir_len = sprintf(path, DIR_FORMAT, (long)prog->pid); + + // Check the cache first. + #define less_than_start(a, b) (*(a) < (b)->start) + size_t cache_index = binary_search_gt(prog->dbinfo.map_files_segments, + prog->dbinfo.num_map_files_segments, + &address, less_than_start); + #undef less_than_start + if (cache_index > 0 + && address < prog->dbinfo.map_files_segments[cache_index - 1].end) { + struct drgn_map_files_segment *cache = + &prog->dbinfo.map_files_segments[cache_index - 1]; + sprintf(path + dir_len, ENTRY_FORMAT, cache->start, cache->end); + drgn_log_debug(prog, + "found %s containing dynamic section 0x%" PRIx64 " in map_files cache", + path, address); + int fd = open(path, O_RDONLY); + if (fd >= 0) { + *tried = true; + return drgn_module_try_standard_file(module, path, fd, + false, NULL); + } else { + // We found a match in the cache, but we couldn't open + // it. If it doesn't exist anymore, then we need to + // rebuild the cache. If it failed for any other reason, + // ignore it like we do in the cache miss case. + bool rebuild_cache = errno == ENOENT; + drgn_log_debug(prog, "%s: %m", path); + if (!rebuild_cache) + return NULL; + } + drgn_log_debug(prog, "rebuilding map_files cache"); + path[dir_len] = '\0'; + } +#undef ENTRY_FORMAT +#undef DIR_FORMAT + + // Walk /proc/$pid/map_files, caching it while looking for a match. + _cleanup_closedir_ DIR *dir = opendir(path); + if (!dir) { + if (errno != ENOENT) + return drgn_error_create_os("opendir", errno, path); + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + _cleanup_(drgn_map_files_segment_vector_deinit) + struct drgn_map_files_segment_vector segments = VECTOR_INIT; + bool sorted = true; + bool found = false; + struct dirent *ent; + while ((errno = 0, ent = readdir(dir))) { + struct drgn_map_files_segment segment; + if (sscanf(ent->d_name, "%" SCNx64 "-%" SCNx64, &segment.start, + &segment.end) != 2) + continue; + + if (!drgn_map_files_segment_vector_empty(&segments) + && segment.start + < drgn_map_files_segment_vector_last(&segments)->start) + sorted = false; + if (!drgn_map_files_segment_vector_append(&segments, &segment)) + return &drgn_enomem; + + if (segment.start <= address && address < segment.end + && !found + && strlen(ent->d_name) + 1 < sizeof(path) - dir_len) { + found = true; + path[dir_len] = '/'; + memcpy(path + dir_len + 1, ent->d_name, + strlen(ent->d_name) + 1); + drgn_log_debug(prog, + "found %s containing dynamic section 0x%" PRIx64, + path, address); + int fd = openat(dirfd(dir), ent->d_name, O_RDONLY); + if (fd >= 0) { + *tried = true; + err = drgn_module_try_standard_file(module, + path, fd, + false, + NULL); + if (err) + return err; + } else { + drgn_log_debug(prog, "%s: %m", path); + } + path[dir_len] = '\0'; + } + } + if (errno) + return drgn_error_create_os("readdir", errno, path); + + drgn_debug_info_set_map_files_segments(&prog->dbinfo, &segments, + sorted); + + if (!found) { + drgn_log_debug(prog, + "didn't find entry in %s containing dynamic section 0x%" PRIx64, + path, address); + } + return NULL; +} + +static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, + bool *tried) +{ + struct drgn_program *prog = module->prog; + + *tried = false; + if (module->kind == DRGN_MODULE_MAIN) { +#define FORMAT "/proc/%ld/exe" + char path[sizeof(FORMAT) + - (sizeof("%ld") - 1) + + max_decimal_length(long)]; + snprintf(path, sizeof(path), FORMAT, (long)prog->pid); +#undef FORMAT + int fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + *tried = true; + return drgn_module_try_standard_file(module, path, fd, false, + NULL); + } else if (module->kind == DRGN_MODULE_SHARED_LIBRARY) { + return drgn_module_try_proc_files_for_shared_library(module, + tried); + } else { + return NULL; + } +} + +static struct drgn_error * +drgn_module_try_files_by_build_id(struct drgn_module *module) +{ + struct drgn_error *err; + + size_t build_id_len; + const char *build_id_str = + drgn_module_build_id(module, NULL, &build_id_len); + // We need at least 2 bytes (4 hex characters) to build the paths. + if (build_id_len < 2) + return NULL; + + STRING_BUILDER(sb); + const char *debug_dir; + size_t debug_dir_len; + drgn_program_for_each_debug_dir(module->prog, debug_dir, debug_dir_len) { + if (debug_dir_len == 0 || debug_dir[0] != '/') + continue; + if (!string_builder_appendn(&sb, debug_dir, debug_dir_len) + || !string_builder_appendf(&sb, "/.build-id/%c%c/%s.debug", + build_id_str[0], build_id_str[1], + &build_id_str[2]) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + // We trust the build ID encoded in the path and don't check it + // again. + if (module->debug_file_status == DRGN_MODULE_FILE_WANT) { + err = drgn_module_try_standard_file(module, sb.str, -1, + false, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + if (module->loaded_file_status == DRGN_MODULE_FILE_WANT) { + // Remove the ".debug" extension. + sb.str[sb.len - sizeof(".debug") + 1] = '\0'; + err = drgn_module_try_standard_file(module, sb.str, -1, + false, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + sb.len = 0; + } + return NULL; +} + +static struct drgn_error * +drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + struct drgn_elf_file *file = module->loaded_file; + if (!file || !file->scns[DRGN_SCN_GNU_DEBUGLINK]) + return NULL; + // We don't cache .gnu_debuglink, and it doesn't need relocation, so + // don't use drgn_elf_file_read_section(). + Elf_Data *data; + err = read_elf_section(file->scns[DRGN_SCN_GNU_DEBUGLINK], &data); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, + "%s: couldn't read .gnu_debuglink: ", + file->path); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + + struct drgn_elf_file_section_buffer buffer; + drgn_elf_file_section_buffer_init(&buffer, file, + file->scns[DRGN_SCN_GNU_DEBUGLINK], + data); + const char *debuglink; + size_t debuglink_len; + uint32_t crc; + if ((err = binary_buffer_next_string(&buffer.bb, &debuglink, + &debuglink_len)) + // Align up to 4-byte boundary. + || (err = binary_buffer_skip(&buffer.bb, -(debuglink_len + 1) & 3)) + || (err = binary_buffer_next_u32(&buffer.bb, &crc))) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + drgn_log_debug(prog, "%s has debuglink %s CRC 0x%08" PRIx32, file->path, + debuglink, crc); + + STRING_BUILDER(sb); + if (debuglink[0] == '/') { + // debuglink is absolute. Try it directly. + err = drgn_module_try_standard_file(module, debuglink, -1, + false, &crc); + if (err || !drgn_module_wants_file(module)) + return err; + } else if (file->path[0] && debuglink[0]) { + // debuglink is relative. Try it in the debug directories. + const char *slash = strrchr(file->path, '/'); + size_t dirslash_len = slash ? slash - file->path + 1 : 0; + const char *debug_dir; + size_t debug_dir_len; + drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { + // If debug_dir is empty, then try: + // $(dirname $path)/$debuglink + // If debug_dir is relative, then try: + // $(dirname $path)/$debug_dir/$debuglink + // If debug_dir is absolute, then try: + // $debug_dir/$(dirname $path)/$debuglink + if (debug_dir_len > 0 && debug_dir[0] == '/') { + if (file->path[0] != '/') + continue; + if (!string_builder_appendn(&sb, debug_dir, + debug_dir_len)) + return &drgn_enomem; + } + if (!string_builder_appendn(&sb, file->path, + dirslash_len) + || (debug_dir_len > 0 && debug_dir[0] != '/' + && (!string_builder_appendn(&sb, debug_dir, + debug_dir_len) + || !string_builder_appendc(&sb, '/'))) + || !string_builder_appendn(&sb, debuglink, + debuglink_len) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, sb.str, -1, + false, &crc); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = 0; + } + } + return NULL; +} + +static struct drgn_error * +drgn_module_try_standard_files(struct drgn_module *module, + struct drgn_module_standard_files_state *state) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + if (prog->dbinfo.debug_info_path) { + drgn_module_try_files_log(module, + "trying standard paths in \"%s\" for", + prog->dbinfo.debug_info_path); + } else { + drgn_module_try_files_log(module, "trying standard paths for"); + } + + // If we need a supplementary file, try that first. + err = drgn_module_try_standard_supplementary_files(module); + if (err || !drgn_module_wants_file(module)) + return err; + + // If a previous attempt used a loadable file with debug info but didn't + // want both, we might be able to reuse it. + if (module->loaded_file_status == DRGN_MODULE_FILE_WANT) { + struct drgn_elf_file *reuse_file = NULL; + if (module->debug_file && module->debug_file->is_loadable) + reuse_file = module->debug_file; + else if (module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->file->is_loadable) + reuse_file = module->wanted_supplementary_debug_file->file; + if (reuse_file) { + drgn_log_debug(prog, + "reusing loadable debug file %s as loaded file", + reuse_file->path); + err = drgn_module_maybe_use_elf_file(module, reuse_file, + false); + if (err || !drgn_module_wants_file(module)) + return err; + } + } + if (module->debug_file_status == DRGN_MODULE_FILE_WANT + && module->loaded_file + && drgn_elf_file_has_dwarf(module->loaded_file)) { + drgn_log_debug(prog, + "reusing loaded file with debug info %s as debug file", + module->loaded_file->path); + err = drgn_module_maybe_use_elf_file(module, + module->loaded_file, + false); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // First, try methods that are guaranteed to find the right file: + // reading a vDSO from the core dump and opening a file via a magic + // symlink in /proc. + bool tried_proc_symlink = false; + if (module->kind == DRGN_MODULE_VDSO) { + err = drgn_module_try_vdso_in_core(module); + if (err || !drgn_module_wants_file(module)) + return err; + } else if (drgn_program_is_userspace_process(prog)) { + err = drgn_module_try_proc_files(module, &tried_proc_symlink); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // If we already have the build ID, try it now before wasting time with + // the expected paths. If this is a Linux kernel loadable module, this + // can save us from needing the depmod index. If not, it can still save + // us from trying a file with the wrong build ID. + const bool had_build_id = module->build_id_len > 0; + if (had_build_id) { + err = drgn_module_try_files_by_build_id(module); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // Next, try opening things at their expected paths. If this is the + // Linux kernel or a Linux kernel loadable module, try some well-known + // paths. + if (module->kind == DRGN_MODULE_MAIN + && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { + err = drgn_module_try_vmlinux_files(module, state); + if (err || !drgn_module_wants_file(module)) + return err; + } else if (module->kind == DRGN_MODULE_RELOCATABLE + && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { + err = drgn_module_try_linux_kmod_files(module, state); + if (err || !drgn_module_wants_file(module)) + return err; + // Otherwise, if the module name looks like a path (i.e., it contains a + // slash), try it. The vDSO is embedded in the kernel and isn't on disk, + // so there's no point in trying it. Additionally, if we already tried a + // /proc symlink, then we already tried the file that the path is + // supposed to refer to, so don't try again. + } else if (module->kind != DRGN_MODULE_VDSO + && !tried_proc_symlink + && strchr(module->name, '/')) { + err = drgn_module_try_standard_file(module, module->name, -1, + true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // If we didn't have the build ID before, we might have found the loaded + // file and gotten a build ID from it. Try to find the debug file by + // build ID now. + if (!had_build_id) { + err = drgn_module_try_files_by_build_id(module); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // We might have a loaded file with a .gnu_debuglink. Try to find the + // corresponding debug file. + return drgn_module_try_files_by_gnu_debuglink(module); +} + +static void +drgn_module_standard_files_state_deinit(struct drgn_module_standard_files_state *state) +{ + depmod_index_deinit(&state->modules_dep); +} + +static struct drgn_error * +drgn_standard_module_file_find(struct drgn_module * const *modules, + size_t num_modules, void *arg) +{ + struct drgn_error *err; + + _cleanup_(drgn_module_standard_files_state_deinit) + struct drgn_module_standard_files_state state = {}; + for (size_t i = 0; i < num_modules; i++) { + err = drgn_module_try_standard_files(modules[i], &state); + if (err) + return err; + } + return NULL; +} + +#if WITH_DEBUGINFOD +static int count_columns(const char *s, size_t n) +{ + int columns = 0; + while (n > 0) { + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + do { + wchar_t wc; + size_t r = mbrtowc(&wc, s, n, &ps); + if (r == (size_t)-1) // Invalid multibyte sequence. + return -1; + if (r == (size_t)-2) // Incomplete multibyte character. + return -2; + if (r == 0) // Null wide character. + r = 1; + + int w = wcwidth(wc); + if (w < 0) // Nonprintable wide character. + return -3; + s += r; + n -= r; + columns += w; + } while (!mbsinit(&ps)); + } + return columns; +} + +static int truncate_columns(struct string_builder *sb, size_t start, size_t end, + int max_columns) +{ + int columns = 0; + + size_t truncate_len = start; + int truncate_column = 0; + mbstate_t truncate_ps; + memset(&truncate_ps, 0, sizeof(truncate_ps)); + + while (start < end) { + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + do { + wchar_t wc; + size_t r = mbrtowc(&wc, &sb->str[start], end - start, + &ps); + if (r == (size_t)-1) // Invalid multibyte sequence. + return -1; + if (r == (size_t)-2) // Incomplete multibyte character. + return -2; + if (r == 0) // Null wide character. + r = 1; + + int w = wcwidth(wc); + if (w < 0) // Nonprintable wide character. + return -3; + + if (w > max_columns - columns) { + int dots = min(max_columns, 3); + char reset[MB_LEN_MAX]; + size_t reset_len = 0; + if (!mbsinit(&truncate_ps)) { + reset_len = wcrtomb(reset, L'\0', + &truncate_ps) - 1; + } + size_t new_len = (truncate_len + + reset_len + + dots + + (sb->len - end)); + if (!string_builder_reserve(sb, new_len)) + return INT_MIN; + memmove(&sb->str[truncate_len + reset_len + dots], + &sb->str[end], sb->len - end); + memset(&sb->str[truncate_len + reset_len], '.', + dots); + memcpy(&sb->str[truncate_len], reset, + reset_len); + sb->len = new_len; + return truncate_column + dots; + } + + start += r; + columns += w; + if (columns <= max_columns - 3) { + truncate_len = start; + truncate_column = columns; + memcpy(&truncate_ps, &ps, sizeof(ps)); + } + } while (!mbsinit(&ps)); + } + return columns; +} + +static void reset_shift_state(struct string_builder *sb, mbstate_t *ps) +{ + if (!mbsinit(ps)) + sb->len += wcrtomb(&sb->str[sb->len], L'\0', ps) - 1; +} + +static bool write_unicode_progress_bar(struct string_builder *sb, int columns, + double ratio) +{ + size_t orig_len = sb->len; + + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + + // "Right one eighth block" character. + size_t r = wcrtomb(&sb->str[sb->len], L'\u2595', &ps); + if (r == (size_t)-1) + return false; + sb->len += r; + + // + 0.25 so that we round up if the piece would be at least 75% full. + int eighths = columns * ratio * 8.0 + 0.25; + int blocks = eighths / 8; + int i; + for (i = 0; i < blocks; i++) { + // "Full block" character. + r = wcrtomb(&sb->str[sb->len], L'\u2588', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + } + // "Left one eighth block" through "left seven eighths block" + // characters. + static const wchar_t eighths_blocks[7] = + L"\u258f\u258e\u258d\u258c\u258b\u258a\u2589"; + if (eighths % 8 != 0) { + r = wcrtomb(&sb->str[sb->len], eighths_blocks[eighths % 8 - 1], + &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + i++; + } + + for (; i < columns; i++) { + r = wcrtomb(&sb->str[sb->len], L' ', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + } + + // "Left one eighth block" character. + r = wcrtomb(&sb->str[sb->len], L'\u258f', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + + reset_shift_state(sb, &ps); + return true; + +undo: + sb->len = orig_len; + return false; +} + +static void write_ascii_progress_bar(struct string_builder *sb, int columns, + double ratio) +{ + sb->str[sb->len++] = '['; + // + 0.25 so that we round up if the block would be at least 75% full. + int blocks = columns * ratio + 0.25; + memset(&sb->str[sb->len], '#', blocks); + sb->len += blocks; + memset(&sb->str[sb->len], ' ', columns - blocks); + sb->len += columns - blocks; + sb->str[sb->len++] = ']'; +} + +static bool write_unicode_spinner(struct string_builder *sb, int pos) +{ + static const wchar_t spinner[] = { + L'\u2596', // Quadrant lower left + L'\u2598', // Quadrant upper left + L'\u259d', // Quadrant upper right + L'\u2597', // Quadrant lower right + }; + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + size_t r = wcrtomb(&sb->str[sb->len], + spinner[pos % array_size(spinner)], &ps); + if (r == (size_t)-1) + return false; + sb->len += r; + reset_shift_state(sb, &ps); + return true; +} + +static void write_ascii_spinner(struct string_builder *sb, int pos) +{ + static const char spinner[] = { '|', '/', '-', '\\' }; + sb->str[sb->len++] = spinner[pos % array_size(spinner)]; +} + +// debuginfod_set_user_data() and debuginfod_get_user_data() were added in +// elfutils 0.179. Before that, we emulate them with a thread-local variable. +#if !_ELFUTILS_PREREQ(0, 179) +static _Thread_local void *drgn_debuginfod_user_data; +#endif + +// This is called with: +// - a >= 0 && b == 0 while cleaning the debuginfod cache, where a is the number +// of files in the cache that have been checked. +// - a >= 0 && b == 0 while waiting to read the first chunk of data from a +// debuginfod server, where a is an increasing counter. Note that this cannot +// be distinguished from the previous case. +// - a >= 0 && b > 0 while downloading, where a is the number of bytes +// downloaded and b is the total size to download in bytes. +// - a >= 0 && b <= 0 while downloading, where a is the number of bytes +// downloaded and the total size is not known. This can be distinguished from +// the first two cases because debuginfod_get_url() will return non-NULL. +// - a < 0 && b >= 0 when the download has finished successfully. b is the +// downloaded file descriptor. +// - a < 0 && b < 0 when the download failed. b is a negative errno. +static void drgn_log_debuginfod_progress(debuginfod_client *client, long a, + long b) +{ +#if _ELFUTILS_PREREQ(0, 179) + struct drgn_program *prog = drgn_debuginfod_get_user_data(client); +#else + struct drgn_program *prog = drgn_debuginfod_user_data; +#endif + + const bool done = a < 0; + + // If we already started logging progress for this download when it + // failed, we log the error like progress below. Otherwise, the download + // failed very early, so we only log a debug message. + if (done && b < 0 && !prog->dbinfo.logged_debuginfod_progress) { + if (b != -ENOSYS) { + errno = -b; + drgn_log_debug(prog, + "%s: couldn't download%s from debuginfod: %m", + prog->dbinfo.debuginfod_current_name, + prog->dbinfo.debuginfod_current_type); + } else if (!prog->dbinfo.logged_no_debuginfod) { + drgn_log_debug(prog, + "no debuginfod servers configured; " + "try setting the DEBUGINFOD_URLS environment variable"); + prog->dbinfo.logged_no_debuginfod = true; + } + return; + } + prog->dbinfo.logged_debuginfod_progress = true; + + int columns; + FILE *file = drgn_program_get_progress_file(prog, &columns); + + // ANSI escape sequence to clear the current line and return the cursor + // to the beginning of the line. + static const char ansi_erase_line[] = "\33[2K\r"; + + // Once we know what URL we are downloading from, log it. + if (!prog->dbinfo.debuginfod_have_url) { + // debuginfod_get_url() was added in elfutils 0.179. Before + // that, we have to assume that we have a URL. +#if _ELFUTILS_PREREQ(0, 179) + const char *url = drgn_debuginfod_get_url(client); + if (url) { + prog->dbinfo.debuginfod_have_url = true; + // Erase the current line since we may have logged + // progress. + if (columns >= 0) { + fwrite(ansi_erase_line, 1, + sizeof(ansi_erase_line) - 1, file); + fflush(file); + } + drgn_log_debug(prog, "downloading from debuginfod at %s", url); + } +#else + prog->dbinfo.debuginfod_have_url = true; +#endif + } + + // If we succeeded without ever getting a URL, it must have been cached. + if (done && b >= 0 && !prog->dbinfo.debuginfod_have_url) { + // We may have logged download progress when we were actually + // cleaning the cache. Clear it to avoid confusion. + if (columns >= 0) { + fwrite(ansi_erase_line, 1, sizeof(ansi_erase_line) - 1, + file); + fflush(file); + } + drgn_log_debug(prog, "%s: found%s in debuginfod cache", + prog->dbinfo.debuginfod_current_name, + prog->dbinfo.debuginfod_current_type); + return; + } + + if (!file) + return; + + // We only do the progress animation if we would have at least one + // column for a progress bar. Using the calculation for bar_columns + // below: + // + // columns - (floor(columns / 2) - 10) - 2 - 4 >= 1 + // => columns - floor(columns / 2) >= 17 + // => ceil(columns / 2) >= 17 + // => columns >= 33 + bool animate = columns >= 33; + const bool orig_animate = animate; + + STRING_BUILDER(sb); + + if (animate && !string_builder_appendc(&sb, '\r')) + return; + + int fill_columns = 0; + int bar_columns = 0; + if (animate) { + if (done) { + // We need to erase anything left in the line with + // spaces. + fill_columns = columns; + } else if (b > 0) { + // Use half of the line plus a bit for the name and + // download size so that it doesn't get too short in + // small terminals. + fill_columns = columns / 2 + 10; + // Use the rest for the progress bar. + bar_columns = (columns - fill_columns + - 2 // Ends of progress bar + - 4 // " XX%" + ); + } else { + // Use the whole line, minus the spinner, for the name + // and download size + fill_columns = columns - 1; + } + } + + if (!string_builder_append(&sb, + done && b >= 0 + ? "Downloaded " : "Downloading ") + || !string_builder_append(&sb, + prog->dbinfo.debuginfod_current_name) + || !string_builder_append(&sb, + prog->dbinfo.debuginfod_current_type)) + return; + + size_t download_size_start = sb.len; + if (done && b < 0) { + errno = -b; + if (!string_builder_appendf(&sb, " failed: %m")) + return; + } else if (prog->dbinfo.debuginfod_have_url) { + intmax_t download_size; + if (done) { + struct stat st; + if (fstat(b, &st) < 0) { + drgn_log_warning(prog, "fstat: %m"); + return; + } + download_size = st.st_size; + } else { + download_size = a; + } + if (download_size < 2048) { + if (!string_builder_appendf(&sb, " (%" PRIdMAX " B)", + download_size)) + return; + } else { + static const char prefixes[] = "KMGTPEZY"; + int i = 1; + while (i < sizeof(prefixes) - 1 + && (download_size >> (10 * i)) >= 2048) + i++; + double unit = INTMAX_C(1) << (10 * i); + if (!string_builder_appendf(&sb, " (%.1f %ciB)", + download_size / unit, + prefixes[i - 1])) + return; + } + } + + if (animate) { + int current_column; + if (done) { + // Start at byte 1 to skip the "\r". + current_column = count_columns(&sb.str[1], sb.len - 1); + } else { + int download_size_len = sb.len - download_size_start; + // Leave room for the download size and an extra space. + int max_columns = + max(fill_columns - download_size_len - 1, 0); + // Start at byte 1 to skip the "\r". + current_column = truncate_columns(&sb, 1, + download_size_start, + max_columns); + if (current_column == INT_MIN) + return; // Memory allocation failed. + if (current_column >= 0) + current_column += download_size_len; + } + if (current_column < 0) { + // We either couldn't decode the string or the string + // contained a nonprintable character. Give up on the + // animation. + animate = false; + } else if (current_column < fill_columns) { + if (!string_builder_reserve_for_append(&sb, + fill_columns + - current_column)) + return; + memset(&sb.str[sb.len], ' ', + fill_columns - current_column); + sb.len += fill_columns - current_column; + } + } + + // If we can't encode any of the following Unicode characters in the + // current locale, we fall back to ASCII. + if (!done && b > 0) { + // Clamp the ratio in case we get bogus sizes. + double ratio = a < b ? (double)a / (double)b : 1.0; + if (animate) { + // One multibyte character for each bar column, one for + // each end, and one to reset the shift state. + if (!string_builder_reserve_for_append(&sb, + (bar_columns + 3) + * MB_CUR_MAX)) + return; + if (!write_unicode_progress_bar(&sb, bar_columns, + ratio)) { + write_ascii_progress_bar(&sb, bar_columns, + ratio); + } + } + unsigned int percent = 100.0 * ratio; + // We're not 100% done until we're called with done = true. + if (percent > 99) + percent = 99; + if (!string_builder_appendf(&sb, " %*u%%", animate ? 2 : 0, + percent)) + return; + } else if (!done && animate) { + // One multibyte character for the spinner, one to reset the + // shift state. + if (!string_builder_reserve_for_append(&sb, 2 * MB_CUR_MAX)) + return; + unsigned int pos = prog->dbinfo.debuginfod_spinner_position++; + if (!write_unicode_spinner(&sb, pos)) + write_ascii_spinner(&sb, pos); + } + + if ((done || !animate) && !string_builder_appendc(&sb, '\n')) + return; + + // If we were originally animating but gave up, we need to skip the + // "\r". + fwrite(sb.str + (orig_animate && !animate ? 1 : 0), 1, + sb.len - (orig_animate && !animate ? 1 : 0), file); +} + +static struct sigaction drgn_cancel_debuginfod_oldact; +static volatile sig_atomic_t drgn_cancel_debuginfod; +static void drgn_cancel_debuginfod_handler(int sig) +{ + drgn_cancel_debuginfod = 1; + drgn_cancel_debuginfod_oldact.sa_handler(sig); +} +static void drgn_cancel_debuginfod_sigaction(int sig, siginfo_t *info, + void *ucontext) +{ + drgn_cancel_debuginfod = 1; + drgn_cancel_debuginfod_oldact.sa_sigaction(sig, info, ucontext); +} +static bool drgn_prepare_debuginfod_find(struct drgn_program *prog) +{ +#if !_ELFUTILS_PREREQ(0, 179) + drgn_debuginfod_user_data = prog; +#endif + // If the application has a signal handler for SIGINT, temporarily wrap + // it with our own signal handler that sets a flag for the debuginfod + // progressfn. This allows Ctrl+C to interrupt a download in + // applications that handle SIGINT (like the Python interpreter). + drgn_cancel_debuginfod = 0; + if (sigaction(SIGINT, NULL, &drgn_cancel_debuginfod_oldact) != 0) + return false; + struct sigaction act = drgn_cancel_debuginfod_oldact; + if ((act.sa_flags & SA_SIGINFO) + // SIG_DFL and SIG_IGN are meant to be assigned to sa_handler, but + // the Linux kernel treats them the same for sa_sigaction. + && act.sa_sigaction != (void *)SIG_DFL + && act.sa_sigaction != (void *)SIG_IGN) + act.sa_sigaction = drgn_cancel_debuginfod_sigaction; + else if (!(act.sa_flags & SA_SIGINFO) + && act.sa_handler != SIG_DFL && act.sa_handler != SIG_IGN) + act.sa_handler = drgn_cancel_debuginfod_handler; + else + return false; + return sigaction(SIGINT, &act, NULL) == 0; +} +static void drgn_finish_debuginfod_find(bool restore_sigaction) +{ + if (restore_sigaction) + sigaction(SIGINT, &drgn_cancel_debuginfod_oldact, NULL); +} + +static int drgn_debuginfod_progressfn(debuginfod_client *client, long a, long b) +{ + if (drgn_cancel_debuginfod) + return 1; + if (a >= 0) + drgn_log_debuginfod_progress(client, a, b); + return 0; +} + +static struct drgn_error * +drgn_module_try_file_from_debuginfod(struct drgn_module *module, + const char *build_id_str, + bool debug, bool supplementary, + struct string_builder *cache_sb) +{ + struct drgn_program *prog = module->prog; + + if (!string_builder_appendf(cache_sb, "/%s/%s", build_id_str, + debug ? "debuginfo" : "executable") + || !string_builder_null_terminate(cache_sb)) + return &drgn_enomem; + + prog->dbinfo.debuginfod_current_name = module->name; + if (supplementary) + prog->dbinfo.debuginfod_current_type = " supplementary debug info"; + else if (debug) + prog->dbinfo.debuginfod_current_type = " debug info"; + else + prog->dbinfo.debuginfod_current_type = ""; + prog->dbinfo.debuginfod_have_url = false; + prog->dbinfo.logged_debuginfod_progress = false; + bool restore_sigaction = drgn_prepare_debuginfod_find(prog); + char *path; + auto find = debug + ? drgn_debuginfod_find_debuginfo + : drgn_debuginfod_find_executable; + int fd = find(prog->dbinfo.debuginfod_client, + (const unsigned char *)build_id_str, 0, &path); + drgn_finish_debuginfod_find(restore_sigaction); + if (fd == -ENOENT && drgn_cancel_debuginfod) { + // Before elfutils commit 5527216460c6 ("debuginfod-client.c: + // Skip empty file creation for cancelled queries") (in elfutils + // 0.190), libdebuginfod has a nasty bug that causes it to cache + // a cancelled download as a negative hit. Work around it by + // deleting the cache file. + unlink(cache_sb->str); + return drgn_error_create_os("download cancelled", EINTR, NULL); + } + drgn_log_debuginfod_progress(prog->dbinfo.debuginfod_client, -1, fd); + if (fd >= 0) { + struct drgn_error *err = + drgn_module_try_file(module, path, fd, true); + free(path); + if (err) + return err; + } + return NULL; +} + +static struct drgn_error * +drgn_module_try_supplementary_file_from_debuginfod(struct drgn_module *module, + struct string_builder *cache_sb) +{ + if (drgn_module_wanted_supplementary_debug_file(module, NULL, NULL, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + const char *gnu_debugaltlink_build_id_str = + module->wanted_supplementary_debug_file->checksum_str; + return drgn_module_try_file_from_debuginfod(module, + gnu_debugaltlink_build_id_str, + true, true, cache_sb); +} + +static struct drgn_error * +drgn_debuginfod_find(struct drgn_module * const *modules, size_t num_modules, + void *arg) +{ + struct drgn_error *err; + struct drgn_program *prog = arg; + + if (!prog->dbinfo.debuginfod_client) { + prog->dbinfo.debuginfod_client = drgn_debuginfod_begin(); + if (!prog->dbinfo.debuginfod_client) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't create debuginfod client session"); + } + drgn_debuginfod_set_progressfn(prog->dbinfo.debuginfod_client, + drgn_debuginfod_progressfn); +#if _ELFUTILS_PREREQ(0, 179) + drgn_debuginfod_set_user_data(prog->dbinfo.debuginfod_client, + prog); +#endif + } + + STRING_BUILDER(sb); + const char *env; + if ((env = getenv("DEBUGINFOD_CACHE_PATH"))) { + if (!string_builder_append(&sb, env)) + return &drgn_enomem; + } else { + env = getenv("HOME") ?: "/"; + if (!string_builder_append(&sb, env) + || !string_builder_append(&sb, "/.debuginfod_client_cache") + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + struct stat st; + if (stat(sb.str, &st) < 0) { + sb.len = 0; + if ((env = getenv("XDG_CACHE_HOME"))) { + if (!string_builder_append(&sb, env) + || !string_builder_append(&sb, + "/debuginfod_client")) + return &drgn_enomem; + } else if (!string_builder_append(&sb, + getenv("HOME") ?: "/") + || !string_builder_append(&sb, + "/.cache/debuginfod_client")) { + return &drgn_enomem; + } + } + } + + size_t cache_dir_len = sb.len; + for (size_t i = 0; i < num_modules; i++) { + struct drgn_module *module = modules[i]; + const char *build_id_str = + drgn_module_build_id(module, NULL, NULL); + if (!build_id_str) { + drgn_module_try_files_log(module, "can't query debuginfod for"); + continue; + } + + drgn_module_try_files_log(module, "querying debuginfod for"); + + // If we need a supplementary file, try that first. + err = drgn_module_try_supplementary_file_from_debuginfod(module, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + + // If we need the debug file (including if we needed a + // gnu_debugaltlink file and didn't find it), try that next. + if (drgn_module_wants_debug_file(module)) { + uint64_t orig_supplementary_file_generation = + prog->dbinfo.supplementary_file_generation; + err = drgn_module_try_file_from_debuginfod(module, + build_id_str, + true, false, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + // If the wanted supplementary debug file changed, try + // finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = drgn_module_try_supplementary_file_from_debuginfod(module, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + } + } + + if (drgn_module_wants_loaded_file(module)) { + err = drgn_module_try_file_from_debuginfod(module, + build_id_str, + false, false, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + } + } + return NULL; +} +#endif // WITH_DEBUGINFOD + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_try_file(struct drgn_module *module, + const char *path, int fd, bool force) +{ + if (!drgn_module_wants_file(module)) { + drgn_log_debug(module->prog, "%s: ignoring unwanted file %s", + module->name, path); + if (fd >= 0) + close(fd); + return NULL; + } + drgn_module_try_files_log(module, "trying provided file as"); + return drgn_module_try_file_internal(module, path, fd, !force, NULL); +} + +LIBDRGN_PUBLIC +void drgn_module_iterator_destroy(struct drgn_module_iterator *it) +{ + if (it) { + if (it->destroy) + it->destroy(it); + else + free(it); + } +} + +LIBDRGN_PUBLIC struct drgn_program * +drgn_module_iterator_program(const struct drgn_module_iterator *it) +{ + return it->prog; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + if (!it->next) { + *ret = NULL; + return NULL; + } + struct drgn_error *err = it->next(it, ret, new_ret); + if (err || !*ret) + it->next = NULL; + return err; +} + +struct drgn_created_module_iterator { + struct drgn_module_iterator it; + struct drgn_module_table_iterator table_it; + uint64_t generation; + bool yielded_main; +}; + +static struct drgn_error * +drgn_created_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_created_module_iterator *it = + container_of(_it, struct drgn_created_module_iterator, it); + struct drgn_debug_info *dbinfo = &it->it.prog->dbinfo; + if (!it->yielded_main) { + it->yielded_main = true; + it->table_it = drgn_module_table_first(&dbinfo->modules); + it->generation = dbinfo->modules_generation; + if (dbinfo->main_module) { + *ret = dbinfo->main_module; + if (new_ret) + *new_ret = false; + return NULL; + } + } + if (it->generation != dbinfo->modules_generation) { + return drgn_error_create(DRGN_ERROR_OTHER, + "modules changed during iteration"); + } + if (it->table_it.entry) { + *ret = *it->table_it.entry; + if (new_ret) + *new_ret = false; + it->table_it = drgn_module_table_next(it->table_it); + } else { + *ret = NULL; + } + return NULL; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_created_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_created_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->it, prog, NULL, + drgn_created_module_iterator_next); + *ret = &it->it; + return NULL; +} - GElf_Ehdr ehdr; - struct core_get_phdr_arg arg; - read_ehdr(&ehdr_buf, &ehdr, &arg.is_64_bit, &arg.bswap); - if (ehdr.e_type == ET_CORE || - ehdr.e_phnum == 0 || - ehdr.e_phentsize != - (arg.is_64_bit ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { - ret->ignore = true; +struct drgn_mapped_file { + const char *path; + // Mapped address range containing file offset 0. This is used to find + // the file header. + uint64_t offset0_vaddr, offset0_size; +}; + +static struct drgn_mapped_file *drgn_mapped_file_create(const char *path) +{ + struct drgn_mapped_file *file = calloc(1, sizeof(*file)); + if (file) + file->path = path; + return file; +} + +static void drgn_mapped_file_destroy(struct drgn_mapped_file *file) +{ + free(file); +} + +struct drgn_mapped_file_segment { + uint64_t start; + uint64_t end; + uint64_t file_offset; + struct drgn_mapped_file *file; +}; + +DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment); + +struct drgn_mapped_file_segments { + struct drgn_mapped_file_segment_vector vector; + // Whether the segments are already sorted by start address. This should + // always be true for both /proc/$pid/maps and NT_FILE, but we check and + // sort afterwards if not just in case. + bool sorted; +}; + +#define DRGN_MAPPED_FILE_SEGMENTS_INIT { VECTOR_INIT, true } + +static void drgn_mapped_file_segments_abort(struct drgn_mapped_file_segments *segments) +{ + drgn_mapped_file_segment_vector_deinit(&segments->vector); +} + +static struct drgn_error * +drgn_add_mapped_file_segment(struct drgn_mapped_file_segments *segments, + uint64_t start, uint64_t end, uint64_t file_offset, + struct drgn_mapped_file *file) +{ + assert(start < end); + if (file_offset == 0 && file->offset0_size == 0) { + file->offset0_vaddr = start; + file->offset0_size = end - start; + } + if (!drgn_mapped_file_segment_vector_empty(&segments->vector)) { + struct drgn_mapped_file_segment *last = + drgn_mapped_file_segment_vector_last(&segments->vector); + // If the last segment is from the same file and contiguous with + // this one, merge into that one. + if (file == last->file && start == last->end + && file_offset == last->file_offset + (last->end - last->start)) { + last->end = end; + return NULL; + } + if (start < last->start) + segments->sorted = false; + } + struct drgn_mapped_file_segment *entry = + drgn_mapped_file_segment_vector_append_entry(&segments->vector); + if (!entry) + return &drgn_enomem; + entry->start = start; + entry->end = end; + entry->file_offset = file_offset; + entry->file = file; + return NULL; +} + +enum { + // Yield main module next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_MAIN, + // Yield vDSO module next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO, + // Get first link_map from r_debug next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG, + // Yield module from link_map list next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP, + // States after this are the same as + // USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP but also count how + // many link_map entries we've iterated. +}; + +// Arbitrary limit on the number iterations to make through the link_map list in +// order to avoid getting stuck in a cycle. +static const int MAX_LINK_MAP_LIST_ITERATIONS = 10000; + +struct userspace_loaded_module_iterator { + struct drgn_module_iterator it; + int state; + bool read_main_phdrs; + bool have_main_dyn; + bool have_vdso_dyn; + + struct drgn_mapped_file_segment *file_segments; + size_t num_file_segments; + + uint64_t main_phoff; + uint64_t main_bias; + uint64_t main_dyn_vaddr; + uint64_t main_dyn_memsz; + uint64_t vdso_dyn_vaddr; + uint64_t link_map; + + // Temporary buffer for reading program headers. + void *phdrs_buf; + size_t phdrs_buf_capacity; + + // Temporary buffer for reading segment contents. + void *segment_buf; + size_t segment_buf_capacity; +}; + +static void +userspace_loaded_module_iterator_deinit(struct userspace_loaded_module_iterator *it) +{ + free(it->segment_buf); + free(it->phdrs_buf); + free(it->file_segments); +} + +static inline int drgn_mapped_file_segment_compare(const void *_a, + const void *_b) +{ + const struct drgn_mapped_file_segment *a = _a; + const struct drgn_mapped_file_segment *b = _b; + return (a->start > b->start) - (a->start < b->start); +} + +static void +userspace_loaded_module_iterator_set_file_segments(struct userspace_loaded_module_iterator *it, + struct drgn_mapped_file_segments *segments) +{ + // Don't bother shrinking to fit since this is short-lived. + drgn_mapped_file_segment_vector_steal(&segments->vector, + &it->file_segments, + &it->num_file_segments); + if (!segments->sorted) { + qsort(it->file_segments, it->num_file_segments, + sizeof(it->file_segments[0]), + drgn_mapped_file_segment_compare); + } +} + +static struct drgn_mapped_file_segment * +find_mapped_file_segment(struct userspace_loaded_module_iterator *it, + uint64_t address) +{ + #define less_than_start(a, b) (*(a) < (b)->start) + size_t i = binary_search_gt(it->file_segments, it->num_file_segments, + &address, less_than_start); + #undef less_than_start + if (i == 0 || address >= it->file_segments[i - 1].end) return NULL; + return &it->file_segments[i - 1]; +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_ehdr(struct userspace_loaded_module_iterator *it, + uint64_t address, GElf_Ehdr *ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + err = drgn_program_read_memory(prog, ret, address, sizeof(*ret), false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read ELF header at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; + } else if (err) { + return err; + } + if (memcmp(ret->e_ident, ELFMAG, SELFMAG) != 0) { + drgn_log_debug(prog, "invalid ELF header magic"); + return &drgn_not_found; + } + if (ret->e_ident[EI_CLASS] != + (drgn_platform_is_64_bit(&prog->platform) + ? ELFCLASS64 : ELFCLASS32)) { + drgn_log_debug(prog, + "ELF header class (%u) does not match program", + ret->e_ident[EI_CLASS]); + return &drgn_not_found; } + if (ret->e_ident[EI_DATA] != + (drgn_platform_is_little_endian(&prog->platform) + ? ELFDATA2LSB : ELFDATA2MSB)) { + drgn_log_debug(prog, + "ELF header data encoding (%u) does not match program", + ret->e_ident[EI_DATA]); + return &drgn_not_found; + } +#define visit_elf_ehdr_members(visit_scalar_member, visit_raw_member) do { \ + visit_raw_member(e_ident); \ + visit_scalar_member(e_type); \ + visit_scalar_member(e_machine); \ + visit_scalar_member(e_version); \ + visit_scalar_member(e_entry); \ + visit_scalar_member(e_phoff); \ + visit_scalar_member(e_shoff); \ + visit_scalar_member(e_flags); \ + visit_scalar_member(e_ehsize); \ + visit_scalar_member(e_phentsize); \ + visit_scalar_member(e_phnum); \ + visit_scalar_member(e_shentsize); \ + visit_scalar_member(e_shnum); \ + visit_scalar_member(e_shstrndx); \ +} while (0) + deserialize_struct64_inplace(ret, Elf32_Ehdr, visit_elf_ehdr_members, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_elf_ehdr_members + if (ret->e_phentsize != + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { + drgn_log_debug(prog, + "ELF program header entry size (%u) does not match class", + ret->e_phentsize); + return &drgn_not_found; + } + return NULL; +} - if (ehdr.e_phnum > SIZE_MAX / ehdr.e_phentsize || - !alloc_or_reuse(&core->phdr_buf, &core->phdr_buf_capacity, - ehdr.e_phnum * ehdr.e_phentsize)) +static struct drgn_error * +userspace_loaded_module_iterator_read_phdrs(struct userspace_loaded_module_iterator *it, + uint64_t address, uint16_t phnum) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + uint32_t phentsize = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr)); + uint32_t phdrs_size = (uint32_t)phnum * phentsize; + if (phdrs_size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "program header table is unreasonably large (%" PRIu32 " bytes); ignoring", + phdrs_size); + return &drgn_not_found; + } + if (!alloc_or_reuse(&it->phdrs_buf, &it->phdrs_buf_capacity, + phdrs_size)) return &drgn_enomem; + err = drgn_program_read_memory(prog, it->phdrs_buf, address, phdrs_size, + false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read program header table at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; + } + return err; +} + +static void +userspace_loaded_module_iterator_phdr(struct userspace_loaded_module_iterator *it, + size_t i, GElf_Phdr *ret) +{ + struct drgn_program *prog = it->it.prog; + size_t phentsize = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr)); +#define visit_phdr_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(p_type); \ + visit_scalar_member(p_flags); \ + visit_scalar_member(p_offset); \ + visit_scalar_member(p_vaddr); \ + visit_scalar_member(p_paddr); \ + visit_scalar_member(p_filesz); \ + visit_scalar_member(p_memsz); \ + visit_scalar_member(p_align); \ +} while (0) + deserialize_struct64(ret, Elf32_Phdr, visit_phdr_members, + (char *)it->phdrs_buf + i * phentsize, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_phdr_members +} - /* - * Check whether the mapped segment containing the file header also - * contains the program headers. This seems to be the case in practice. - */ - uint64_t ehdr_segment_file_end = - (ehdr_segment->file_offset + - (ehdr_segment->end - ehdr_segment->start)); - if (ehdr_segment_file_end < ehdr.e_phoff || - ehdr_segment_file_end - ehdr.e_phoff < - ehdr.e_phnum * ehdr.e_phentsize) +static struct drgn_error * +userspace_loaded_module_iterator_read_dynamic(struct userspace_loaded_module_iterator *it, + uint64_t address, uint64_t size, + size_t *num_dyn_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "dynamic section is unreasonably large (%" PRIu64 " bytes); ignoring", + size); + return &drgn_not_found; + } + size_t dyn_size = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Dyn) : sizeof(Elf32_Dyn)); + uint64_t num_dyn = size / dyn_size; + *num_dyn_ret = num_dyn; + if (num_dyn == 0) return NULL; - err = drgn_program_read_memory(prog, core->phdr_buf, - ehdr_segment->start + ehdr.e_phoff, - ehdr.e_phnum * ehdr.e_phentsize, false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - err = NULL; - } - return err; + if (!alloc_or_reuse(&it->segment_buf, &it->segment_buf_capacity, + num_dyn * dyn_size)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, it->segment_buf, address, + num_dyn * dyn_size, false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read dynamic section at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; } - arg.phdr_buf = core->phdr_buf; - - /* - * In theory, if the program has a huge number of program headers, they - * may not all be dumped. However, the largest binary I was able to find - * still had all program headers within 1k. - * - * It'd be more reliable to determine the bias based on the headers that - * were saved, use that to read the build ID, use that to find the ELF - * file, and then determine the address range directly from the ELF - * file. However, we need the address range to report the build ID to - * libdwfl, so we do it this way. - */ - uint64_t bias; - err = userspace_core_elf_address_range(ehdr.e_type, ehdr.e_phnum, - core_get_phdr, &arg, segments, - num_segments, ehdr_segment, - &bias, &ret->start, &ret->end); - if (err) + return err; +} + +static void +userspace_loaded_module_iterator_dyn(struct userspace_loaded_module_iterator *it, + size_t i, GElf_Dyn *ret) +{ + struct drgn_program *prog = it->it.prog; + size_t dyn_size = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Dyn) : sizeof(Elf32_Dyn)); +#define visit_elf_dyn_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(d_tag); \ + visit_scalar_member(d_un.d_val); \ +} while (0) + deserialize_struct64(ret, Elf32_Dyn, visit_elf_dyn_members, + (char *)it->segment_buf + i * dyn_size, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_elf_dyn_members +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_main_phdrs(struct userspace_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + // The main bias is the difference between AT_PHDR and the virtual + // address of the program headers in the ELF file. We determine the + // latter by finding the PT_LOAD segment containing e_phoff. We would + // use PT_PHDR instead, but static binaries usually don't have it, and + // we can't assume a bias of 0 for static PIE binaries. + // + // If we couldn't find the file offset of the program headers, we can't + // find anything else. + if (it->main_phoff == 0) + return NULL; + + drgn_log_debug(prog, "reading program header table from AT_PHDR"); + + err = userspace_loaded_module_iterator_read_phdrs(it, + prog->auxv.at_phdr, + prog->auxv.at_phnum); + if (err == &drgn_not_found) + return NULL; + else if (err) return err; - if (ret->start >= ret->end) { - ret->ignore = true; + + // Silence -Wmaybe-uninitialized false positives on dyn_vaddr and + // dyn_memsz last seen with GCC 9. + uint64_t phdr_vaddr, dyn_vaddr = 0, dyn_memsz = 0; + bool have_phdr_vaddr = false, have_dyn = false; + for (uint16_t i = 0; i < prog->auxv.at_phnum; i++) { + GElf_Phdr phdr; + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD && phdr.p_offset <= it->main_phoff + && it->main_phoff < phdr.p_offset + phdr.p_filesz) { + drgn_log_debug(prog, + "found PT_LOAD containing program headers with p_vaddr 0x%" PRIx64 + " and p_offset 0x%" PRIx64, + phdr.p_vaddr, phdr.p_offset); + phdr_vaddr = it->main_phoff - phdr.p_offset + phdr.p_vaddr; + have_phdr_vaddr = true; + } else if (phdr.p_type == PT_DYNAMIC) { + drgn_log_debug(prog, + "found PT_DYNAMIC with p_vaddr 0x%" PRIx64 + " and p_memsz 0x%" PRIx64, + phdr.p_vaddr, phdr.p_memsz); + have_dyn = true; + dyn_vaddr = phdr.p_vaddr; + dyn_memsz = phdr.p_memsz; + } + } + if (have_phdr_vaddr) { + it->main_bias = prog->auxv.at_phdr - phdr_vaddr; + drgn_log_debug(prog, "main bias is 0x%" PRIx64, it->main_bias); + } else { + drgn_log_debug(prog, + "didn't find PT_LOAD containing program headers"); return NULL; } - ret->have_address_range = true; + if (have_dyn) { + it->have_main_dyn = true; + it->main_dyn_vaddr = dyn_vaddr + it->main_bias; + it->main_dyn_memsz = dyn_memsz; + drgn_log_debug(prog, "main dynamic section is at 0x%" PRIx64, + it->main_dyn_vaddr); + } else { + drgn_log_debug(prog, + "didn't find PT_DYNAMIC program header; probably statically linked"); + } + it->read_main_phdrs = true; + return NULL; +} + +static struct drgn_error * +identify_module_from_phdrs(struct userspace_loaded_module_iterator *it, + struct drgn_module *module, size_t phnum, + uint64_t bias) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; - for (uint16_t i = 0; i < ehdr.e_phnum; i++) { + uint64_t start = UINT64_MAX, end = 0; + for (size_t i = 0; i < phnum; i++) { GElf_Phdr phdr; - core_get_phdr(&arg, i, &phdr); - if (phdr.p_type == PT_NOTE) { - if (phdr.p_filesz > SIZE_MAX || - !alloc_or_reuse(&core->segment_buf, - &core->segment_buf_capacity, - phdr.p_filesz)) + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD) { + // Like elf_address_range_from_min_and_max_phdr(). + start = min(start, phdr.p_vaddr + bias); + end = max(end, phdr.p_vaddr + phdr.p_memsz + bias); + } else if (phdr.p_type == PT_NOTE + && module->build_id_len == 0) { + uint64_t note_size = min(phdr.p_filesz, phdr.p_memsz); + if (!note_size) + continue; + if (note_size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "note is unreasonably large (%" PRIu64 " bytes); ignoring", + note_size); + continue; + } + if (!alloc_or_reuse(&it->segment_buf, + &it->segment_buf_capacity, + note_size)) return &drgn_enomem; - err = drgn_program_read_memory(prog, core->segment_buf, + err = drgn_program_read_memory(prog, it->segment_buf, phdr.p_vaddr + bias, - phdr.p_filesz, false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - continue; - } else { + note_size, false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read note at 0x%" PRIx64 ": %s" + "; ignoring", + err->address, err->message); + drgn_error_destroy(err); + continue; + } else if (err) { + return err; + } + const void *build_id; + size_t build_id_len = + parse_gnu_build_id_from_notes(it->segment_buf, + note_size, + phdr.p_align == 8 ? + 8 : 4, + drgn_platform_bswap(&prog->platform), + &build_id); + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (err) return err; - } + drgn_log_debug(prog, + "found build ID %s in note at 0x%" PRIx64, + module->build_id_str, + phdr.p_vaddr + bias + + ((char *)build_id + - (char *)it->segment_buf)); } - ret->build_id_len = - parse_gnu_build_id_from_notes(core->segment_buf, - phdr.p_filesz, - phdr.p_align == 8 - ? 8 : 4, - arg.bswap, - &ret->build_id); - if (ret->build_id_len) - break; } } - return NULL; -} + if (module->build_id_len == 0) { + drgn_log_debug(prog, + "couldn't find build ID from mapped program headers"); + } + if (start < end) { + err = drgn_module_set_address_range(module, start, end); + if (err) + return err; + drgn_log_debug(prog, + "got address range 0x%" PRIx64 "-0x%" PRIx64 " from mapped program headers", + start, end); + } else { + drgn_log_debug(prog, + "couldn't find address range from mapped program headers"); + } + return NULL; +} + +static struct drgn_error * +userspace_loaded_module_iterator_yield_main(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + struct drgn_mapped_file_segment *segment = + find_mapped_file_segment(it, prog->auxv.at_phdr); + if (segment) { + // We don't need to read the file header to get e_phoff. Instead, + // determine it from the file mapping. + it->main_phoff = + segment->file_offset + (prog->auxv.at_phdr - segment->start); + drgn_log_debug(prog, + "AT_PHDR is mapped from file %s at offset 0x%" PRIx64, + segment->file->path, it->main_phoff); + } else { + drgn_log_debug(prog, + "couldn't find mapped file segment containing AT_PHDR"); + } + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_main(prog, + segment ? segment->file->path : "", + &module, &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } + err = userspace_loaded_module_iterator_read_main_phdrs(it); + if (err) + return err; + if (it->read_main_phdrs) { + err = identify_module_from_phdrs(it, module, + prog->auxv.at_phnum, + it->main_bias); + if (err) + return err; + } + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; +} + +static struct drgn_error * +userspace_loaded_module_iterator_yield_vdso(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!prog->auxv.at_sysinfo_ehdr) { + drgn_log_debug(prog, "no vDSO"); +no_vdso: + *ret = NULL; + return NULL; + } + + drgn_log_debug(prog, "reading vDSO ELF header from AT_SYSINFO_EHDR"); + GElf_Ehdr ehdr; + err = userspace_loaded_module_iterator_read_ehdr(it, + prog->auxv.at_sysinfo_ehdr, + &ehdr); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + drgn_log_debug(prog, + "reading %" PRIu16 " program headers at 0x%" PRIx64, + ehdr.e_phnum, prog->auxv.at_sysinfo_ehdr + ehdr.e_phoff); + + // It is effectively part of the ABI that the vDSO program headers are + // mapped at AT_SYSINFO_EHDR + e_phoff (see the Linux kernel's reference + // vDSO parser: vdso_init_from_sysinfo_ehdr() in + // tools/testing/selftests/vDSO/parse_vdso.c, glibc: setup_vdso() in + // elf/setup-vdso.h, and musl: __vdsosym() in src/internal/vdso.c). + err = userspace_loaded_module_iterator_read_phdrs(it, + prog->auxv.at_sysinfo_ehdr + ehdr.e_phoff, + ehdr.e_phnum); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + // This is based on the Linux kernel's reference vDSO parser. + uint64_t bias = prog->auxv.at_sysinfo_ehdr; + // Silence -Wmaybe-uninitialized false positives on dyn_vaddr and + // dyn_memsz last seen with GCC 12. + uint64_t dyn_vaddr = 0, dyn_memsz = 0; + bool have_load = false, have_dyn = false; + for (size_t i = 0; i < ehdr.e_phnum; i++) { + GElf_Phdr phdr; + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD && !have_load) { + drgn_log_debug(prog, + "found PT_LOAD with p_offset 0x%" PRIx64 + " and p_vaddr 0x%" PRIx64, + phdr.p_offset, phdr.p_vaddr); + have_load = true; + bias = prog->auxv.at_sysinfo_ehdr + phdr.p_offset - phdr.p_vaddr; + } else if (phdr.p_type == PT_DYNAMIC) { + drgn_log_debug(prog, + "found PT_DYNAMIC with p_offset 0x%" PRIx64 + " and p_memsz 0x%" PRIx64, + phdr.p_offset, phdr.p_memsz); + dyn_vaddr = prog->auxv.at_sysinfo_ehdr + phdr.p_offset; + dyn_memsz = phdr.p_memsz; + have_dyn = true; + } + } + if (!have_load) { + drgn_log_warning(prog, + "can't find vDSO: " + "no PT_LOAD header in vDSO program headers"); + goto no_vdso; + } + drgn_log_debug(prog, "vDSO bias is 0x%" PRIx64, bias); + if (!have_dyn) { + drgn_log_warning(prog, + "can't find vDSO: " + "no PT_DYNAMIC header in vDSO program headers"); + goto no_vdso; + } + it->vdso_dyn_vaddr = dyn_vaddr; + it->have_vdso_dyn = true; + + drgn_log_debug(prog, "reading vDSO dynamic section at 0x%" PRIx64, + dyn_vaddr); + size_t num_dyn; + err = userspace_loaded_module_iterator_read_dynamic(it, dyn_vaddr, + dyn_memsz, + &num_dyn); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + // Silence -Wmaybe-uninitialized false positives on dt_strtab and + // dt_soname last seen with GCC 12. + uint64_t dt_strtab = 0, dt_soname = 0; + bool have_dt_strtab = false, have_dt_soname = false; + for (size_t i = 0; i < num_dyn; i++) { + GElf_Dyn dyn; + userspace_loaded_module_iterator_dyn(it, i, &dyn); + if (dyn.d_tag == DT_STRTAB) { + dt_strtab = dyn.d_un.d_ptr; + have_dt_strtab = true; + drgn_log_debug(prog, "found DT_STRTAB 0x%" PRIx64, + dt_strtab); + } else if (dyn.d_tag == DT_SONAME) { + dt_soname = dyn.d_un.d_val; + have_dt_soname = true; + drgn_log_debug(prog, "found DT_SONAME 0x%" PRIx64, + dt_soname); + } else if (dyn.d_tag == DT_NULL) { + break; + } + } + if (!have_dt_strtab || !have_dt_soname) { + drgn_log_warning(prog, + "can't find vDSO: " + "no %s%s%s entr%s in vDSO dynamic section", + have_dt_strtab ? "" : "DT_STRTAB", + have_dt_strtab || have_dt_soname ? "" : " or ", + have_dt_soname ? "" : "DT_SONAME", + have_dt_strtab || have_dt_soname ? "y" : "ies"); + goto no_vdso; + } + + _cleanup_free_ char *name = NULL; + err = drgn_program_read_c_string(prog, dt_strtab + bias + dt_soname, + false, SIZE_MAX, &name); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_warning(prog, + "can't find vDSO: " + "couldn't read soname at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + goto no_vdso; + } else if (err) { + return err; + } + drgn_log_debug(prog, "read vDSO soname \"%s\"", name); + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_vdso(prog, name, dyn_vaddr, &module, + &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } + + err = identify_module_from_phdrs(it, module, ehdr.e_phnum, bias); + if (err) + return err; + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; +} + +#define read_struct64(prog, struct64p, address, type32, visit_members) \ + read_struct64_impl(prog, struct64p, address, type32, visit_members, \ + PP_UNIQUE(prog), PP_UNIQUE(struct64p), \ + PP_UNIQUE(is_64_bit), PP_UNIQUE(err)) +#define read_struct64_impl(prog, struct64p, address, type32, visit_members, \ + unique_prog, unique_struct64, unique_is_64_bit, \ + unique_err) ({ \ + struct drgn_program *unique_prog = (prog); \ + __auto_type unique_struct64p = (struct64p); \ + static_assert(sizeof(*unique_struct64p) >= sizeof(type32), \ + "64-bit type is smaller than 32-bit type"); \ + const bool unique_is_64_bit = \ + drgn_platform_is_64_bit(&unique_prog->platform); \ + struct drgn_error *unique_err = \ + drgn_program_read_memory(unique_prog, unique_struct64p, \ + (address), \ + unique_is_64_bit \ + ? sizeof(*unique_struct64p) \ + : sizeof(type32), false); \ + if (!unique_err) { \ + deserialize_struct64_inplace(unique_struct64p, type32, \ + visit_members, unique_is_64_bit, \ + drgn_platform_bswap(&unique_prog->platform));\ + } \ + unique_err; \ +}) + +static struct drgn_error * +userspace_get_link_map(struct userspace_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!it->read_main_phdrs) { + err = userspace_loaded_module_iterator_read_main_phdrs(it); + if (err) + return err; + } + if (!it->have_main_dyn) + return NULL; + + drgn_log_debug(prog, "reading main dynamic section"); + size_t num_dyn; + err = userspace_loaded_module_iterator_read_dynamic(it, + it->main_dyn_vaddr, + it->main_dyn_memsz, + &num_dyn); + if (err == &drgn_not_found) { + drgn_log_warning(prog, + "can't find shared libraries: " + "couldn't read main dynamic section"); + return NULL; + } else if (err) { + return err; + } + + GElf_Dyn dyn; + size_t i; + for (i = 0; i < num_dyn; i++) { + userspace_loaded_module_iterator_dyn(it, i, &dyn); + if (dyn.d_tag == DT_NULL) { + i = num_dyn; + break; + } + if (dyn.d_tag == DT_DEBUG) { + drgn_log_debug(prog, "found DT_DEBUG 0x%" PRIx64, + dyn.d_un.d_ptr); + break; + } + } + if (i >= num_dyn) { + drgn_log_warning(prog, + "can't find shared libraries: " + "no DT_DEBUG entry in main dynamic section"); + return NULL; + } + + struct drgn_r_debug { + int32_t r_version; + alignas(8) uint64_t r_map; + } r_debug; + struct drgn_r_debug32 { + int32_t r_version; + uint32_t r_map; + }; +#define visit_r_debug_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(r_version); \ + visit_scalar_member(r_map); \ +} while (0) + err = read_struct64(prog, &r_debug, dyn.d_un.d_ptr, + struct drgn_r_debug32, visit_r_debug_members); +#undef visit_r_debug_members + if (err && err->code == DRGN_ERROR_FAULT) { + // Note: musl doesn't update DT_DEBUG for static PIE binaries + // compiled with GCC (as of musl v1.2.3 and GCC 13), so that + // case is known to fail here. + drgn_log_warning(prog, + "can't find shared libraries: " + "couldn't read r_debug at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return NULL; + } else if (err) { + return err; + } + drgn_log_debug(prog, + "read r_debug = { .r_version = %" PRId32 ", .r_map = 0x%" PRIx64 " }", + r_debug.r_version, r_debug.r_map); + + if (r_debug.r_version < 1) { + drgn_log_warning(prog, + "can't find shared libraries: " + "invalid r_debug.r_version %" PRId32, + r_debug.r_version); + return NULL; + } + it->link_map = r_debug.r_map; + return NULL; +} + +static struct drgn_error * +identify_module_from_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_module *module, + struct drgn_mapped_file *file, uint64_t l_addr) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + // Even if it is a 32-bit file, segments should be at least a page, so + // we should be able to read the 64-bit size. + if (file->offset0_size < sizeof(Elf64_Ehdr)) { + drgn_log_debug(prog, "didn't find mapped ELF header"); + return NULL; + } + + drgn_log_debug(prog, "reading ELF header at 0x%" PRIx64, + file->offset0_vaddr); + GElf_Ehdr ehdr; + err = userspace_loaded_module_iterator_read_ehdr(it, + file->offset0_vaddr, + &ehdr); + if (err == &drgn_not_found) + return NULL; + else if (err) + return err; + + drgn_log_debug(prog, + "reading %" PRIu16 " program headers from 0x%" PRIx64, + ehdr.e_phnum, file->offset0_vaddr + ehdr.e_phoff); + // e_phnum and e_phentsize are uint16_t, so this can't overflow. + uint32_t phdrs_size = + (uint32_t)ehdr.e_phnum * (uint32_t)ehdr.e_phentsize; + if (ehdr.e_phoff > file->offset0_size || + phdrs_size > file->offset0_size - ehdr.e_phoff) { + drgn_log_debug(prog, + "program header table is not mapped with ELF header"); + return NULL; + } + err = userspace_loaded_module_iterator_read_phdrs(it, + file->offset0_vaddr + ehdr.e_phoff, + ehdr.e_phnum); + if (err == &drgn_not_found) + return NULL; + else if (err) + return err; + + return identify_module_from_phdrs(it, module, ehdr.e_phnum, l_addr); +} + +// This is the public definition of struct link_map from glibc's link.h: +// +// struct link_map +// { +// /* These first few members are part of the protocol with the debugger. +// This is the same format used in SVR4. */ +// +// ElfW(Addr) l_addr; /* Difference between the address in the ELF +// file and the addresses in memory. */ +// char *l_name; /* Absolute file name object was found in. */ +// ElfW(Dyn) *l_ld; /* Dynamic section of the shared object. */ +// struct link_map *l_next, *l_prev; /* Chain of loaded objects. */ +// }; +// +// We don't need l_prev, so we exclude it from our definition. +struct drgn_link_map { + uint64_t l_addr; + uint64_t l_name; + uint64_t l_ld; + uint64_t l_next; +}; +struct drgn_link_map32 { + uint32_t l_addr; + uint32_t l_name; + uint32_t l_ld; + uint32_t l_next; +}; + +static struct drgn_error * +userspace_next_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_link_map *ret, char **name_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!it->link_map) { + drgn_log_debug(prog, "found end of link_map list"); + return &drgn_stop; + } -static struct drgn_error *elf_file_get_phdr(void *arg, size_t i, - GElf_Phdr *phdr) -{ - if (!gelf_getphdr(arg, i, phdr)) - return drgn_error_libelf(); + if (it->state + >= USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP + + MAX_LINK_MAP_LIST_ITERATIONS) { + drgn_log_warning(prog, + "can't find remaining shared libraries: " + "too many entries or cycle in link_map list"); + return &drgn_stop; + } + it->state++; + +#define visit_link_map_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(l_addr); \ + visit_scalar_member(l_name); \ + visit_scalar_member(l_ld); \ + visit_scalar_member(l_next); \ +} while (0) + err = read_struct64(prog, ret, it->link_map, struct drgn_link_map32, + visit_link_map_members); +#undef visit_link_map_members + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_warning(prog, + "can't find remaining shared libraries: " + "couldn't read next link_map at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_stop; + } else if (err) { + return err; + } + + it->link_map = ret->l_next; + + err = drgn_program_read_c_string(prog, ret->l_name, false, SIZE_MAX, + name_ret); + if (err && err->code == DRGN_ERROR_FAULT) + *name_ret = NULL; + else if (err) + return err; + drgn_log_debug(prog, + "read link_map = { .l_addr = 0x%" PRIx64 ", .l_name = 0x%" PRIx64 "%s%s%s, .l_ld = 0x%" PRIx64 ", .l_next = 0x%" PRIx64 " }", + ret->l_addr, ret->l_name, *name_ret ? " = \"" : "", + *name_ret ? *name_ret : "", *name_ret ? "\"" : "", + ret->l_ld, ret->l_next); + if (err) { + drgn_log_debug(prog, + "couldn't read l_name at 0x%" PRIx64 ": %s" + "; skipping", + err->address, err->message); + drgn_error_destroy(err); + } return NULL; } static struct drgn_error * -userspace_core_maybe_report_file(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core, - const char *path, - const struct drgn_mapped_file_segment *segments, - size_t num_segments) +yield_from_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) { struct drgn_error *err; - struct drgn_program *prog = load->dbinfo->prog; - for (size_t ehdr_idx = 0; ehdr_idx < num_segments; ehdr_idx++) { - const struct drgn_mapped_file_segment *ehdr_segment = - &segments[ehdr_idx]; - /* - * There should always be a full page mapped, so even if it's a - * 32-bit file, we can read the 64-bit size. - */ - if (ehdr_segment->file_offset != 0 || - ehdr_segment->end - ehdr_segment->start < sizeof(Elf64_Ehdr)) + struct drgn_program *prog = it->it.prog; + + for (;;) { + struct drgn_link_map link_map; + _cleanup_free_ char *name = NULL; + err = userspace_next_link_map(it, &link_map, &name); + if (err == &drgn_stop) { + *ret = NULL; + return NULL; + } else if (err) { + return err; + } + + if (link_map.l_ld == it->main_dyn_vaddr) { + drgn_log_debug(prog, + "l_ld matches main dynamic section; skipping"); + continue; + } + if (it->have_vdso_dyn && link_map.l_ld == it->vdso_dyn_vaddr) { + drgn_log_debug(prog, + "l_ld matches vDSO dynamic section; skipping"); + continue; + } + if (!name) continue; - /* - * This logic is complicated because we're dealing with two data - * sources that we can't completely trust: the memory in the - * core dump and the file at the path found in the core dump. - * - * First, we try to identify the mapped file contents in the - * core dump. Ideally, this will find a build ID. However, this - * can fail for a few reasons: - * - * 1. The file is not an ELF file. - * 2. The ELF file is not an executable or library. - * 3. The ELF file does not have a build ID. - * 4. The file header was not dumped to the core dump, in which - * case we can't tell whether this is an ELF file. Dumping - * the first page of an executable file has been the default - * behavior since Linux kernel commit 895021552d6f - * ("coredump: default - * CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y") (in v2.6.37), but - * it can be disabled at kernel build time or toggled at - * runtime. - * 5. The build ID or the necessary ELF metadata were not dumped - * in the core dump. This can happen if the necessary program - * headers or note segment were not in the first page of the - * file. - * 6. The file is mapped but not actually loaded into the - * program (e.g., if the program is a tool like a profiler or - * a debugger that mmaps binaries [like drgn itself!]). - * - * In cases 1 and 2, we can simply ignore the file. In cases - * 3-5, we blindly trust the path in the core dump. We can - * sometimes detect case 6 in - * userspace_core_elf_address_range(). - * - * There is also the possibility that the program modified or - * corrupted the ELF metadata in memory (more likely if the file - * was explicitly mmap'd, since the metadata will usually be - * read-only if it was loaded properly). We don't deal with that - * yet. - */ - struct userspace_core_identified_file identity = {}; - err = userspace_core_identify_file(prog, core, segments, - num_segments, ehdr_segment, - &identity); + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_shared_library(prog, name, + link_map.l_ld, + &module, &new); if (err) return err; - if (identity.ignore) - continue; - -#define CLEAR_ELF() do { \ - elf = NULL; \ - fd = -1; \ -} while (0) -#define CLOSE_ELF() do { \ - elf_end(elf); \ - close(fd); \ - CLEAR_ELF(); \ -} while (0) - int fd; - Elf *elf; - /* - * There are a few things that can go wrong here: - * - * 1. The path no longer exists. - * 2. The path refers to a different ELF file than was in the - * core dump. - * 3. The path refers to something which isn't a valid ELF file. - */ - err = open_elf_file(path, &fd, &elf); - if (err) { - drgn_error_destroy(err); - CLEAR_ELF(); - } else if (identity.build_id_len > 0) { - if (!build_id_matches(elf, identity.build_id, - identity.build_id_len)) - CLOSE_ELF(); - } - - if (elf && !identity.have_address_range) { - GElf_Ehdr ehdr_mem, *ehdr; - size_t phnum; - if ((ehdr = gelf_getehdr(elf, &ehdr_mem)) && - (elf_getphdrnum(elf, &phnum) == 0)) { - uint64_t bias; - err = userspace_core_elf_address_range(ehdr->e_type, - phnum, - elf_file_get_phdr, - elf, - segments, - num_segments, - ehdr_segment, - &bias, - &identity.start, - &identity.end); - if (err || identity.start >= identity.end) { - drgn_error_destroy(err); - CLOSE_ELF(); - } else { - identity.have_address_range = true; - } - } else { - CLOSE_ELF(); - } + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; } - if (elf) { - err = drgn_debug_info_report_elf(load, path, fd, elf, - identity.start, - identity.end, NULL, - NULL); + struct drgn_mapped_file_segment *segment = + find_mapped_file_segment(it, link_map.l_ld); + if (segment) { + err = identify_module_from_link_map(it, module, + segment->file, + link_map.l_addr); if (err) return err; } else { - if (!identity.have_address_range) - identity.start = identity.end = 0; - Dwfl_Module *dwfl_module = - dwfl_report_module(load->dbinfo->dwfl, path, - identity.start, - identity.end); - if (!dwfl_module) - return drgn_error_libdwfl(); - if (identity.build_id_len > 0 && - dwfl_module_report_build_id(dwfl_module, - identity.build_id, - identity.build_id_len, - 0)) - return drgn_error_libdwfl(); - } -#undef CLOSE_ELF -#undef CLEAR_ELF + drgn_log_debug(prog, + "couldn't find mapped file segment containing l_ld"); + } + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; } - return NULL; } static struct drgn_error * -userspace_core_report_mapped_files(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core) +userspace_loaded_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) { - struct drgn_error *err; - for (struct drgn_mapped_files_iterator it = - drgn_mapped_files_first(&core->files); - it.entry; it = drgn_mapped_files_next(it)) { - err = userspace_core_maybe_report_file(load, core, - it.entry->key, - drgn_mapped_file_segment_vector_begin(&it.entry->value), - drgn_mapped_file_segment_vector_size(&it.entry->value)); + struct userspace_loaded_module_iterator *it = + container_of(_it, struct userspace_loaded_module_iterator, it); + switch (it->state) { + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_MAIN: + err = drgn_program_cache_auxv(it->it.prog); + if (err) + return err; + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO; + return userspace_loaded_module_iterator_yield_main(it, ret, + new_ret); + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO: + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG; + err = userspace_loaded_module_iterator_yield_vdso(it, ret, + new_ret); + if (err || *ret) + return err; + fallthrough; + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG: + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP; + err = userspace_get_link_map(it); if (err) return err; + fallthrough; + default: + return yield_from_link_map(it, ret, new_ret); } - return NULL; } +struct process_mapped_file_entry { + dev_t dev; + ino_t ino; + struct drgn_mapped_file *file; +}; + +struct process_mapped_file_key { + dev_t dev; + ino_t ino; + const char *path; +}; + +static struct process_mapped_file_key +process_mapped_file_entry_to_key(const struct process_mapped_file_entry *entry) +{ + return (struct process_mapped_file_key){ + .dev = entry->dev, + .ino = entry->ino, + .path = entry->file->path, + }; +} + +static struct hash_pair +process_mapped_file_key_hash_pair(const struct process_mapped_file_key *key) +{ + size_t hash = hash_combine(key->dev, key->ino); + hash = hash_combine(hash, hash_c_string(key->path)); + return hash_pair_from_avalanching_hash(hash); +} + +static bool process_mapped_file_key_eq(const struct process_mapped_file_key *a, + const struct process_mapped_file_key *b) +{ + return (a->dev == b->dev + && a->ino == b->ino + && strcmp(a->path, b->path) == 0); +} + +DEFINE_HASH_TABLE(process_mapped_files, struct process_mapped_file_entry, + process_mapped_file_entry_to_key, + process_mapped_file_key_hash_pair, + process_mapped_file_key_eq); + +struct process_loaded_module_iterator { + struct userspace_loaded_module_iterator u; + struct process_mapped_files files; +}; + static struct drgn_error * -userspace_core_report_debug_info(struct drgn_debug_info_load_state *load, - const char *nt_file, size_t nt_file_len) +process_add_mapping(struct process_loaded_module_iterator *it, + const char *maps_path, const char *map_files_path, + int map_files_fd, bool *logged_readlink_eperm, + bool *logged_stat_eperm, + struct drgn_map_files_segment_vector *map_files_segments, + struct drgn_mapped_file_segments *segments, + char *line, size_t line_len) { - struct drgn_error *err; + struct drgn_program *prog = it->u.it.prog; + + struct drgn_map_files_segment segment; + uint64_t segment_file_offset; + unsigned int dev_major, dev_minor; + uint64_t ino; + int map_name_len, path_index; + if (sscanf(line, + "%" SCNx64 "-%" SCNx64 "%n %*s %" SCNx64 " %x:%x %" SCNu64 " %n", + &segment.start, &segment.end, &map_name_len, + &segment_file_offset, &dev_major, &dev_minor, &ino, + &path_index) != 6) { + return drgn_error_format(DRGN_ERROR_OTHER, "couldn't parse %s", + maps_path); + } + // Skip anonymous mappings. + if (ino == 0) + return NULL; + + if (!drgn_map_files_segment_vector_append(map_files_segments, &segment)) + return &drgn_enomem; - struct userspace_core_report_state core = { - .files = HASH_TABLE_INIT, + struct process_mapped_file_key key = { + .dev = makedev(dev_major, dev_minor), + .ino = ino, + .path = line + path_index, }; - err = userspace_core_get_mapped_files(load, &core, nt_file, - nt_file_len); - if (err) - goto out; - err = userspace_core_report_mapped_files(load, &core); -out: - free(core.segment_buf); - free(core.phdr_buf); - for (struct drgn_mapped_files_iterator it = - drgn_mapped_files_first(&core.files); - it.entry; it = drgn_mapped_files_next(it)) - drgn_mapped_file_segment_vector_deinit(&it.entry->value); - drgn_mapped_files_deinit(&core.files); - return err; + _cleanup_free_ char *real_path = NULL; + + // /proc/$pid/maps has a couple of ambiguities that + // /proc/$pid/map_files/
can help with: + // + // 1. Newlines in the file path from /proc/$pid/maps are escaped as + // \012. However, \ is not escaped, so it is ambiguous whether \012 + // is a newline or appeared literally in the path. We can read the + // map_files link to get the unescaped path. + // 2. The device number in /proc/$pid/maps is incorrect for some + // filesystems. Specifically, for Btrfs as of Linux 6.5, it refers to + // a filesystem-wide device number rather than the subvolume-specific + // device numbers returned by stat. We can stat the map_files link to + // get the correct device number. + if (map_files_fd >= 0) { + char map_files_name[34]; + snprintf(map_files_name, sizeof(map_files_name), + "%" PRIx64 "-%" PRIx64, segment.start, segment.end); + + // The escaped path must be at least as long as the original + // path, so use that as the readlink buffer size. + size_t bufsiz = line_len - path_index + 1; + real_path = malloc(bufsiz); + if (!real_path) + return &drgn_enomem; + // Before Linux kernel commit bdb4d100afe9 ("procfs: always + // expose /proc//map_files/ and make it readable") (in + // v4.3), reading these links required CAP_SYS_ADMIN. Since that + // commit, it only requires PTRACE_MODE_READ, which we must have + // since we opened /proc/$pid/maps. + // + // If we can't read this link, we have to fall back to the + // escaped path. Newlines and the literal sequence \012 are + // unlikely to appear in a path, so it's not a big deal. + ssize_t r = readlinkat(map_files_fd, map_files_name, real_path, + bufsiz); + if (r < 0) { + if (errno == EPERM) { + free(real_path); + real_path = NULL; + if (!*logged_readlink_eperm) { + drgn_log_debug(prog, + "don't have permission to read symlinks in %s", + map_files_path); + } + *logged_readlink_eperm = true; + } else if (errno == ENOENT) { + // We raced with a change to the mapping. + drgn_log_debug(prog, "mapping %s disappeared", + map_files_name); + return NULL; + } else { + return drgn_error_format_os("readlink", errno, + "%s/%s", + map_files_path, + map_files_name); + } + } else if (r >= bufsiz) { + // We didn't allocate enough for the link contents. The + // only way this is possible is if we raced with the + // mapping being replaced by a different path. + drgn_log_debug(prog, + "mapping %s path changed; skipping", + map_files_name); + return NULL; + } else { + real_path[r] = '\0'; + key.path = real_path; + } + + // Following these links requires CAP_SYS_ADMIN. If we can't, we + // have to fall back to using the device number from + // /proc/$pid/maps. Mapping files with the same path and inode + // number in different Btrfs subvolumes is unlikely, so this is + // also not a big deal. + struct stat st; + if (fstatat(map_files_fd, map_files_name, &st, 0) < 0) { + if (errno == EPERM) { + if (!*logged_stat_eperm) { + drgn_log_debug(prog, + "don't have permission to follow symlinks in %s", + map_files_path); + } + *logged_stat_eperm = true; + } else if (errno == ENOENT) { + // We raced with a change to the mapping. + drgn_log_debug(prog, "mapping %s disappeared", + map_files_name); + return NULL; + } else { + return drgn_error_format_os("stat", errno, + "%s/%s", + map_files_path, + map_files_name); + } + } else { + key.dev = st.st_dev; + } + } + + struct hash_pair hp = process_mapped_files_hash(&key); + struct process_mapped_files_iterator files_it = + process_mapped_files_search_hashed(&it->files, &key, hp); + if (!files_it.entry) { + if (!real_path) { + real_path = strdup(key.path); + if (!real_path) + return &drgn_enomem; + } + struct drgn_mapped_file *file = + drgn_mapped_file_create(real_path); + if (!file) + return &drgn_enomem; + struct process_mapped_file_entry entry = { + .dev = key.dev, + .ino = key.ino, + .file = file, + }; + if (process_mapped_files_insert_searched(&it->files, &entry, hp, + &files_it) < 0) { + drgn_mapped_file_destroy(file); + return &drgn_enomem; + } + // real_path is owned by the iterator now. + real_path = NULL; + } + return drgn_add_mapped_file_segment(segments, segment.start, segment.end, + segment_file_offset, + files_it.entry->file); } static struct drgn_error * -userspace_report_elf_file(struct drgn_debug_info_load_state *load, - const char *path) +process_get_mapped_files(struct process_loaded_module_iterator *it) { struct drgn_error *err; + struct drgn_program *prog = it->u.it.prog; + +#define FORMAT "/proc/%ld/maps" + char maps_path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + snprintf(maps_path, sizeof(maps_path), FORMAT, (long)prog->pid); +#undef FORMAT + _cleanup_fclose_ FILE *maps_file = fopen(maps_path, "r"); + if (!maps_file) + return drgn_error_create_os("fopen", errno, maps_path); + drgn_log_debug(prog, "parsing %s", maps_path); + +#define FORMAT "/proc/%ld/map_files" + char map_files_path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + snprintf(map_files_path, sizeof(map_files_path), FORMAT, + (long)prog->pid); +#undef FORMAT + // Since Linux kernel commit bdb4d100afe9 ("procfs: always expose + // /proc//map_files/ and make it readable") (in v4.3), + // /proc/$pid/map_files always exists. Before that, it only exists if + // CONFIG_CHECKPOINT_RESTORE is enabled. + // + // If it exists, we should always have permission to open it since we + // were able to open /proc/$pid/maps. + _cleanup_close_ int map_files_fd = + open(map_files_path, O_RDONLY | O_DIRECTORY); + if (map_files_fd < 0) { + if (errno != ENOENT) { + return drgn_error_create_os("open", errno, + map_files_path); + } + drgn_log_debug(prog, "%s: %m", map_files_path); + } - int fd; - Elf *elf; - err = open_elf_file(path, &fd, &elf); - if (err) - goto err; - - GElf_Ehdr ehdr_mem, *ehdr; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) { - err = drgn_error_libelf(); - goto err_close; - } - /* - * We haven't implemented a way to get the load address for dynamically - * loaded or relocatable files, so for now we report those as unloaded. - */ - uint64_t start = 0, end = 0; - if (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_CORE) { - err = elf_address_range(elf, 0, &start, &end); + _cleanup_free_ char *line = NULL; + size_t n = 0; + bool logged_readlink_eperm = false, logged_stat_eperm = false; + // While we're reading /proc/$pid/maps, we might as well cache the + // segments for drgn_module_try_proc_files_for_shared_library(). + _cleanup_(drgn_map_files_segment_vector_deinit) + struct drgn_map_files_segment_vector map_files_segments = VECTOR_INIT; + struct drgn_mapped_file_segments segments = DRGN_MAPPED_FILE_SEGMENTS_INIT; + for (;;) { + errno = 0; + ssize_t len; + if ((len = getline(&line, &n, maps_file)) == -1) { + if (errno) { + err = drgn_error_create_os("getline", errno, + maps_path); + } else { + err = NULL; + } + break; + } + // Remove the newline. + if (len > 0 && line[len - 1] == '\n') + line[--len] = '\0'; + + drgn_log_debug(prog, "read %s", line); + err = process_add_mapping(it, maps_path, map_files_path, + map_files_fd, &logged_readlink_eperm, + &logged_stat_eperm, + &map_files_segments, &segments, line, + len); if (err) - goto err_close; + break; } + if (err) { + drgn_mapped_file_segments_abort(&segments); + } else { + drgn_debug_info_set_map_files_segments(&prog->dbinfo, + &map_files_segments, + segments.sorted); + userspace_loaded_module_iterator_set_file_segments(&it->u, + &segments); + } + return err; +} - return drgn_debug_info_report_elf(load, path, fd, elf, start, end, NULL, - NULL); +static void +process_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) +{ + struct process_loaded_module_iterator *it = + container_of(_it, struct process_loaded_module_iterator, u.it); + for (struct process_mapped_files_iterator files_it = + process_mapped_files_first(&it->files); + files_it.entry; files_it = process_mapped_files_next(files_it)) { + free((char *)files_it.entry->file->path); + drgn_mapped_file_destroy(files_it.entry->file); + } + process_mapped_files_deinit(&it->files); + userspace_loaded_module_iterator_deinit(&it->u); + free(it); +} -err_close: - elf_end(elf); - close(fd); -err: - return drgn_debug_info_report_error(load, path, NULL, err); +static struct drgn_error * +process_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_error *err; + struct process_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->u.it, prog, + process_loaded_module_iterator_destroy, + userspace_loaded_module_iterator_next); + process_mapped_files_init(&it->files); + err = process_get_mapped_files(it); + if (err) { + process_loaded_module_iterator_destroy(&it->u.it); + return err; + } + *ret = &it->u.it; + return NULL; +} + +static const char * +core_mapped_file_entry_to_key(struct drgn_mapped_file * const *entry) +{ + return (*entry)->path; +} + +DEFINE_HASH_TABLE(core_mapped_files, struct drgn_mapped_file *, + core_mapped_file_entry_to_key, c_string_key_hash_pair, + c_string_key_eq); + +struct core_loaded_module_iterator { + struct userspace_loaded_module_iterator u; + struct core_mapped_files files; +}; + +static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); } static struct drgn_error * -userspace_report_debug_info(struct drgn_debug_info_load_state *load) +core_get_mapped_files(struct core_loaded_module_iterator *it) { struct drgn_error *err; + struct drgn_program *prog = it->u.it.prog; - for (size_t i = 0; i < load->num_paths; i++) { - err = userspace_report_elf_file(load, load->paths[i]); - if (err) + const void *note; + size_t note_size; + if (find_elf_note(prog->core, "CORE", NT_FILE, ¬e, ¬e_size)) + return drgn_error_libelf(); + if (!note) { + drgn_log_debug(prog, "core doesn't have NT_FILE note"); + return NULL; + } + + drgn_log_debug(prog, "parsing NT_FILE"); + + bool is_64_bit = drgn_platform_is_64_bit(&prog->platform); + bool little_endian = drgn_platform_is_little_endian(&prog->platform); + + struct binary_buffer bb; + binary_buffer_init(&bb, note, note_size, little_endian, + parse_nt_file_error); + + // fs/binfmt_elf.c in the Linux kernel source code documents the format + // of NT_FILE as: + // + // long count -- how many files are mapped + // long page_size -- units for file_ofs + // array of [COUNT] elements of + // long start + // long end + // long file_ofs + // followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + struct nt_file_segment64 { + uint64_t start; + uint64_t end; + uint64_t file_offset; + }; + struct nt_file_segment32 { + uint32_t start; + uint32_t end; + uint32_t file_offset; + }; + uint64_t count, page_size; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / sizeof(struct nt_file_segment64)) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, + count * sizeof(struct nt_file_segment64)))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / sizeof(struct nt_file_segment32)) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, + count * sizeof(struct nt_file_segment32)))) return err; } - if (load->load_default) { - Dwfl *dwfl = load->dbinfo->dwfl; - struct drgn_program *prog = load->dbinfo->prog; - if (prog->flags & DRGN_PROGRAM_IS_LIVE) { - int ret = dwfl_linux_proc_report(dwfl, prog->pid); - if (ret == -1) { - return drgn_error_libdwfl(); - } else if (ret) { - return drgn_error_create_os("dwfl_linux_proc_report", - ret, NULL); - } + struct drgn_mapped_file_segments segments = + DRGN_MAPPED_FILE_SEGMENTS_INIT; + for (uint64_t i = 0; i < count; i++) { + struct nt_file_segment64 segment; +#define visit_nt_file_segment_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(start); \ + visit_scalar_member(end); \ + visit_scalar_member(file_offset); \ +} while (0) + deserialize_struct64(&segment, struct nt_file_segment32, + visit_nt_file_segment_members, + (char *)note + + (is_64_bit + ? 16 + i * sizeof(struct nt_file_segment64) + : 8 + i * sizeof(struct nt_file_segment32)), + is_64_bit, bb.bswap); +#undef visit_nt_file_segment_members + segment.file_offset *= page_size; + const char *path = bb.pos; + if ((err = binary_buffer_skip_string(&bb))) + goto err; + drgn_log_debug(prog, + "found 0x%" PRIx64 "-0x%" PRIx64 " 0x%" PRIx64 " %s", + segment.start, segment.end, segment.file_offset, + path); + if (segment.start >= segment.end) + continue; + + struct hash_pair hp = core_mapped_files_hash(&path); + struct core_mapped_files_iterator files_it = + core_mapped_files_search_hashed(&it->files, &path, hp); + struct drgn_mapped_file *file; + if (files_it.entry) { + file = *files_it.entry; } else { - const char *nt_file; - size_t nt_file_len; - char *env = getenv("DRGN_USE_LIBDWFL_REPORT"); - if (env && atoi(env)) { - nt_file = NULL; - nt_file_len = 0; - } else { - err = drgn_get_nt_file(prog->core, &nt_file, - &nt_file_len); - if (err) - return err; + file = drgn_mapped_file_create(path); + if (!file) { + err = &drgn_enomem; + goto err; } - if (nt_file) { - err = userspace_core_report_debug_info(load, - nt_file, - nt_file_len); - if (err) - return err; - } else if (dwfl_core_file_report(dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); + if (core_mapped_files_insert_searched(&it->files, &file, + hp, NULL) < 0) { + drgn_mapped_file_destroy(file); + err = &drgn_enomem; + goto err; } } + err = drgn_add_mapped_file_segment(&segments, segment.start, + segment.end, + segment.file_offset, file); + if (err) + goto err; } + userspace_loaded_module_iterator_set_file_segments(&it->u, &segments); return NULL; + +err: + drgn_mapped_file_segments_abort(&segments); + return err; } -static int should_apply_relocation_section(Elf *elf, size_t shstrndx, - const GElf_Shdr *shdr) +static void +core_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) { - if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) - return 0; + struct core_loaded_module_iterator *it = + container_of(_it, struct core_loaded_module_iterator, u.it); + for (struct core_mapped_files_iterator files_it = + core_mapped_files_first(&it->files); + files_it.entry; + files_it = core_mapped_files_next(files_it)) + drgn_mapped_file_destroy(*files_it.entry); + core_mapped_files_deinit(&it->files); + userspace_loaded_module_iterator_deinit(&it->u); + free(it); +} - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - return -1; - if (shdr->sh_type == SHT_RELA) { - if (!strstartswith(scnname, ".rela.")) - return 0; - scnname += sizeof(".rela.") - 1; - } else { - if (!strstartswith(scnname, ".rel.")) - return 0; - scnname += sizeof(".rel.") - 1; +static struct drgn_error * +core_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_error *err; + struct core_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->u.it, prog, + core_loaded_module_iterator_destroy, + userspace_loaded_module_iterator_next); + core_mapped_files_init(&it->files); + err = core_get_mapped_files(it); + if (err) { + core_loaded_module_iterator_destroy(&it->u.it); + return err; } - return (strstartswith(scnname, "debug_") || - strstartswith(scnname, "orc_")); + *ret = &it->u.it; + return NULL; +} + +static struct drgn_error * +null_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(it, prog, NULL, NULL); + *ret = it; + return NULL; } -static inline struct drgn_error *get_reloc_sym_value(const void *syms, - size_t num_syms, - const uint64_t *sh_addrs, - size_t shdrnum, - bool is_64_bit, - bool bswap, - uint32_t r_sym, - uint64_t *ret) +LIBDRGN_PUBLIC struct drgn_error * +drgn_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) { - if (r_sym >= num_syms) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF relocation symbol"); + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + return linux_kernel_loaded_module_iterator_create(prog, ret); + else if (drgn_program_is_userspace_process(prog)) + return process_loaded_module_iterator_create(prog, ret); + else if (drgn_program_is_userspace_core(prog)) + return core_loaded_module_iterator_create(prog, ret); + else + return null_module_iterator_create(prog, ret); +} + +struct load_debug_info_file { + const char *path; + // We only keep this to keep load_debug_info_provided::build_id alive + // without needing to copy it. If we add a drgn_module_try_file API that + // allows providing an Elf handle, we could pass it down. + Elf *elf; + // This may be consumed and set to -1. + int fd; +}; + +DEFINE_VECTOR(load_debug_info_file_vector, struct load_debug_info_file); + +struct load_debug_info_provided { + const void *build_id; + size_t build_id_len; + struct load_debug_info_file_vector files; + bool matched; +}; + +static struct nstring +load_debug_info_provided_key(const struct load_debug_info_provided *provided) +{ + return (struct nstring){ provided->build_id, provided->build_id_len }; +} + +DEFINE_HASH_TABLE(load_debug_info_provided_table, + struct load_debug_info_provided, + load_debug_info_provided_key, nstring_hash_pair, nstring_eq); + +struct load_debug_info_state { + // Provided files grouped by build ID. + struct load_debug_info_provided_table provided; + // Number of entries in the provided table that haven't matched any + // modules. + size_t unmatched_provided; +}; + +static struct drgn_error * +load_debug_info_add_provided_file(struct drgn_program *prog, + struct load_debug_info_state *state, + const char *path) +{ + _cleanup_close_ int fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_warning(prog, "%s: %m; ignoring", path); + return NULL; } - uint16_t st_shndx; - uint64_t st_value; - if (is_64_bit) { - const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - memcpy(&st_value, &sym->st_value, sizeof(st_value)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value = bswap_64(st_value); - } - } else { - const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - uint32_t st_value32; - memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value32 = bswap_32(st_value32); + _cleanup_elf_end_ Elf *elf = dwelf_elf_begin(fd); + if (!elf) { + drgn_log_warning(prog, "%s: %s; ignoring", path, + elf_errmsg(-1)); + return NULL; + } + if (elf_kind(elf) != ELF_K_ELF) { + drgn_log_warning(prog, "%s: not an ELF file; ignoring", path); + return NULL; + } + const void *build_id; + ssize_t build_id_len = drgn_elf_gnu_build_id(elf, &build_id); + if (build_id_len <= 0) { + if (build_id_len < 0) { + drgn_log_warning(prog, "%s: %s; ignoring", path, + elf_errmsg(-1)); + } else { + drgn_log_warning(prog, "%s: no build ID; ignoring", + path); } - st_value = st_value32; + return NULL; } - if (st_shndx >= shdrnum) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF symbol section index"); + + if (drgn_log_is_enabled(prog, DRGN_LOG_DEBUG)) { + _cleanup_free_ char *build_id_str = + ahexlify(build_id, build_id_len); + if (!build_id_str) + return &drgn_enomem; + drgn_log_debug(prog, "provided file %s build ID %s", + path, build_id_str); + } + + struct load_debug_info_provided provided = { + .build_id = build_id, + .build_id_len = build_id_len, + }; + struct load_debug_info_provided_table_iterator it; + int r = load_debug_info_provided_table_insert(&state->provided, + &provided, &it); + if (r < 0) + return &drgn_enomem; + if (r > 0) { + load_debug_info_file_vector_init(&it.entry->files); + state->unmatched_provided++; + } + + struct load_debug_info_file file = { + .path = path, + .fd = fd, + .elf = elf, + }; + if (!load_debug_info_file_vector_append(&it.entry->files, &file)) { + if (load_debug_info_file_vector_empty(&it.entry->files)) { + // The key will no longer be valid once we free the Elf + // handle, so we need to delete the entry. + load_debug_info_provided_table_delete_iterator(&state->provided, + it); + } + return &drgn_enomem; } - *ret = sh_addrs[st_shndx] + st_value; + // fd and elf are owned by state now. + fd = -1; + elf = NULL; return NULL; } +static void load_debug_info_state_deinit(struct load_debug_info_state *state) +{ + for (struct load_debug_info_provided_table_iterator it = + load_debug_info_provided_table_first(&state->provided); + it.entry; + it = load_debug_info_provided_table_next(it)) { + vector_for_each(load_debug_info_file_vector, file, + &it.entry->files) { + elf_end(file->elf); + if (file->fd >= 0) + close(file->fd); + } + load_debug_info_file_vector_deinit(&it.entry->files); + } + load_debug_info_provided_table_deinit(&state->provided); +} + +static struct load_debug_info_provided * +load_debug_info_find_provided(struct load_debug_info_state *state, + const void *build_id, size_t build_id_len) +{ + struct nstring key = { build_id, build_id_len }; + struct load_debug_info_provided *provided = + load_debug_info_provided_table_search(&state->provided, + &key).entry; + if (provided && !provided->matched) { + state->unmatched_provided--; + provided->matched = true; + } + return provided; +} + static struct drgn_error * -apply_elf_relas(const struct drgn_relocating_section *relocating, - Elf_Data *reloc_data, Elf_Data *symtab_data, - const uint64_t *sh_addrs, size_t shdrnum, - const struct drgn_platform *platform) +load_debug_info_try_provided(struct drgn_module *module, + struct load_debug_info_provided *provided, + enum drgn_module_file_status not_status) { struct drgn_error *err; - - bool is_64_bit = drgn_platform_is_64_bit(platform); - bool bswap = drgn_platform_bswap(platform); - apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; - - const void *relocs = reloc_data->d_buf; - size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); - size_t num_relocs = reloc_data->d_size / reloc_size; - - const void *syms = symtab_data->d_buf; - size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - size_t num_syms = symtab_data->d_size / sym_size; - - for (size_t i = 0; i < num_relocs; i++) { - uint64_t r_offset; - uint32_t r_sym; - uint32_t r_type; - int64_t r_addend; - if (is_64_bit) { - const Elf64_Rela *rela = (Elf64_Rela *)relocs + i; - uint64_t r_info; - memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); - memcpy(&r_info, &rela->r_info, sizeof(r_info)); - memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); - if (bswap) { - r_offset = bswap_64(r_offset); - r_info = bswap_64(r_info); - r_addend = bswap_64(r_addend); - } - r_sym = ELF64_R_SYM(r_info); - r_type = ELF64_R_TYPE(r_info); - } else { - const Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; - uint32_t r_offset32; - uint32_t r_info32; - int32_t r_addend32; - memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); - memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); - memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); - if (bswap) { - r_offset32 = bswap_32(r_offset32); - r_info32 = bswap_32(r_info32); - r_addend32 = bswap_32(r_addend32); - } - r_offset = r_offset32; - r_sym = ELF32_R_SYM(r_info32); - r_type = ELF32_R_TYPE(r_info32); - r_addend = r_addend32; - } - uint64_t sym_value; - err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, - is_64_bit, bswap, r_sym, &sym_value); + vector_for_each(load_debug_info_file_vector, file, &provided->files) { + // No need to check build ID again. + err = drgn_module_try_file_internal(module, file->path, + file->fd, false, NULL); + // drgn_module_try_file_internal took ownership of file->fd. In + // the unlikely scenario that another module has the same build + // ID, we'll just have to reopen it by path. + file->fd = -1; if (err) return err; - err = apply_elf_reloc(relocating, r_offset, r_type, &r_addend, - sym_value); - if (err) - return err; + if (module->loaded_file_status != not_status + && module->debug_file_status != not_status) + break; } return NULL; } static struct drgn_error * -apply_elf_rels(const struct drgn_relocating_section *relocating, - Elf_Data *reloc_data, Elf_Data *symtab_data, - const uint64_t *sh_addrs, size_t shdrnum, - const struct drgn_platform *platform) +load_debug_info_try_provided_supplementary_files(struct drgn_module *module, + struct load_debug_info_state *state) +{ + const void *checksum; + size_t checksum_len; + if (drgn_module_wanted_supplementary_debug_file(module, NULL, NULL, + &checksum, + &checksum_len) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + struct load_debug_info_provided *provided = + load_debug_info_find_provided(state, checksum, checksum_len); + if (!provided) + return NULL; + drgn_module_try_supplementary_debug_file_log(module, + "trying provided files for"); + return load_debug_info_try_provided(module, provided, + DRGN_MODULE_FILE_WANT_SUPPLEMENTARY); +} + +static struct drgn_error * +load_debug_info_try_provided_files(struct drgn_module *module, + struct load_debug_info_state *state) { struct drgn_error *err; - bool is_64_bit = drgn_platform_is_64_bit(platform); - bool bswap = drgn_platform_bswap(platform); - apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; - - const void *relocs = reloc_data->d_buf; - size_t reloc_size = is_64_bit ? sizeof(Elf64_Rel) : sizeof(Elf32_Rel); - size_t num_relocs = reloc_data->d_size / reloc_size; - - const void *syms = symtab_data->d_buf; - size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - size_t num_syms = symtab_data->d_size / sym_size; - - for (size_t i = 0; i < num_relocs; i++) { - uint64_t r_offset; - uint32_t r_sym; - uint32_t r_type; - if (is_64_bit) { - const Elf64_Rel *rel = (Elf64_Rel *)relocs + i; - uint64_t r_info; - memcpy(&r_offset, &rel->r_offset, sizeof(r_offset)); - memcpy(&r_info, &rel->r_info, sizeof(r_info)); - if (bswap) { - r_offset = bswap_64(r_offset); - r_info = bswap_64(r_info); - } - r_sym = ELF64_R_SYM(r_info); - r_type = ELF64_R_TYPE(r_info); - } else { - const Elf32_Rel *rel32 = (Elf32_Rel *)relocs + i; - uint32_t r_offset32; - uint32_t r_info32; - memcpy(&r_offset32, &rel32->r_offset, sizeof(r_offset32)); - memcpy(&r_info32, &rel32->r_info, sizeof(r_info32)); - if (bswap) { - r_offset32 = bswap_32(r_offset32); - r_info32 = bswap_32(r_info32); + err = load_debug_info_try_provided_supplementary_files(module, state); + if (err) + return err; + + const void *build_id; + size_t build_id_len; + drgn_module_build_id(module, &build_id, &build_id_len); + if (build_id_len != 0) { + // Look up the provided file even if we don't need it so that it + // counts as matched. + struct load_debug_info_provided *provided = + load_debug_info_find_provided(state, build_id, + build_id_len); + if (provided && drgn_module_wants_file(module)) { + uint64_t orig_supplementary_file_generation = + module->prog->dbinfo.supplementary_file_generation; + drgn_module_try_files_log(module, + "trying provided files for"); + err = load_debug_info_try_provided(module, provided, + DRGN_MODULE_FILE_WANT); + if (err) + return err; + // If the wanted supplementary debug file changed, try + // finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = load_debug_info_try_provided_supplementary_files(module, + state); + if (err) + return err; } - r_offset = r_offset32; - r_sym = ELF32_R_SYM(r_info32); - r_type = ELF32_R_TYPE(r_info32); } - uint64_t sym_value; - err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, - is_64_bit, bswap, r_sym, &sym_value); - if (err) - return err; - - err = apply_elf_reloc(relocating, r_offset, r_type, NULL, - sym_value); - if (err) - return err; } return NULL; } -/* - * Before the debugging information in a relocatable ELF file (e.g., Linux - * kernel module) can be used, it must have ELF relocations applied. This is - * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a - * much faster implementation. - */ -static struct drgn_error *relocate_elf_file(Elf *elf) +static void load_debug_info_log_missing(struct drgn_module *module, + unsigned int max_warnings, + unsigned int *num_warnings) { - struct drgn_error *err; - - GElf_Ehdr ehdr_mem, *ehdr; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - - if (ehdr->e_type != ET_REL) { - /* Not a relocatable file. */ - return NULL; + if (++(*num_warnings) > max_warnings) + return; + const char *missing_loaded = ""; + if (drgn_module_loaded_file_status(module) == DRGN_MODULE_FILE_WANT) { + switch (drgn_module_kind(module)) { + case DRGN_MODULE_MAIN: + missing_loaded = "executable file"; + break; + case DRGN_MODULE_SHARED_LIBRARY: + case DRGN_MODULE_VDSO: + missing_loaded = "shared object file"; + break; + default: + missing_loaded = "loaded file"; + break; + } } - - struct drgn_platform platform; - drgn_platform_from_elf(ehdr, &platform); - if (!platform.arch->apply_elf_reloc) { - /* Unsupported; fall back to libdwfl. */ - return NULL; + const char *missing_debug; + switch (drgn_module_debug_file_status(module)) { + case DRGN_MODULE_FILE_WANT: + missing_debug = "debugging symbols"; + break; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + missing_debug = "supplementary debugging symbols"; + break; + default: + missing_debug = ""; + break; } + drgn_log_warning(module->prog, "missing %s%s%s for %s", missing_loaded, + missing_loaded[0] && missing_debug[0] ? " and ": "", + missing_debug, module->name); +} - size_t shdrnum; - if (elf_getshdrnum(elf, &shdrnum)) - return drgn_error_libelf(); - _cleanup_free_ uint64_t *sh_addrs = - calloc(shdrnum, sizeof(sh_addrs[0])); - if (!sh_addrs && shdrnum > 0) - return &drgn_enomem; - - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; - } +static inline void drgn_module_iterator_destroyp(struct drgn_module_iterator **itp) +{ + drgn_module_iterator_destroy(*itp); +} - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, + size_t n, bool load_default, bool load_main) +{ + struct drgn_error *err; - Elf_Scn *reloc_scn = NULL; - while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { - GElf_Shdr *reloc_shdr, reloc_shdr_mem; - reloc_shdr = gelf_getshdr(reloc_scn, &reloc_shdr_mem); - if (!reloc_shdr) - return drgn_error_libelf(); - - int r = should_apply_relocation_section(elf, shstrndx, - reloc_shdr); - if (r < 0) - return drgn_error_libelf(); - if (r) { - scn = elf_getscn(elf, reloc_shdr->sh_info); - if (!scn) - return drgn_error_libelf(); - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - if (shdr->sh_type == SHT_NOBITS) - continue; + if (n == 0 && !load_default && !load_main) { + // We don't have any files to try. Don't create any modules. + return NULL; + } - Elf_Scn *symtab_scn = elf_getscn(elf, - reloc_shdr->sh_link); - if (!symtab_scn) - return drgn_error_libelf(); + drgn_blocking_guard(prog); - Elf_Data *data, *reloc_data, *symtab_data; - if ((err = read_elf_section(scn, &data)) || - (err = read_elf_section(reloc_scn, &reloc_data)) || - (err = read_elf_section(symtab_scn, &symtab_data))) - return err; + const char *env = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + unsigned int max_warnings = env ? atoi(env) : 5; + unsigned int num_warnings = 0; - struct drgn_relocating_section relocating = { - .buf = data->d_buf, - .buf_size = data->d_size, - .addr = sh_addrs[elf_ndxscn(scn)], - .bswap = drgn_platform_bswap(&platform), - }; + drgn_log_debug(prog, "loading %sdebugging symbols", + load_default ? "default " : load_main ? "main " : ""); - if (reloc_shdr->sh_type == SHT_RELA) { - err = apply_elf_relas(&relocating, reloc_data, - symtab_data, sh_addrs, - shdrnum, &platform); - } else { - err = apply_elf_rels(&relocating, reloc_data, - symtab_data, sh_addrs, - shdrnum, &platform); - } - if (err) - return err; + _cleanup_(load_debug_info_state_deinit) + struct load_debug_info_state state = { + .provided = HASH_TABLE_INIT, + }; + for (size_t i = 0; i < n; i++) { + err = load_debug_info_add_provided_file(prog, &state, paths[i]); + if (err) + return err; + } - /* - * Mark the relocation section as empty so that libdwfl - * doesn't try to apply it again. - */ - reloc_shdr->sh_size = 0; - if (!gelf_update_shdr(reloc_scn, reloc_shdr)) - return drgn_error_libelf(); - reloc_data->d_size = 0; - } + if (load_debug_info_provided_table_empty(&state.provided) + && !load_default && !load_main) { + drgn_log_debug(prog, "no usable provided files"); + return NULL; } - return NULL; -} -static struct drgn_error * -drgn_module_find_files(struct drgn_debug_info_load_state *load, - struct drgn_module *module) -{ - struct drgn_error *err; + uint64_t old_generation = prog->dbinfo.load_debug_info_generation; - if (module->elf) { - err = relocate_elf_file(module->elf); + _cleanup_(drgn_module_iterator_destroyp) + struct drgn_module_iterator *it = NULL; + err = drgn_loaded_module_iterator_create(prog, &it); + if (err) + return err; + _cleanup_(drgn_module_vector_deinit) + struct drgn_module_vector modules = VECTOR_INIT; + struct drgn_module *module; + while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module) { + // Reset DONT_WANT to WANT. + if (module->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + if (module->debug_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->debug_file_status = DRGN_MODULE_FILE_WANT; + + err = load_debug_info_try_provided_files(module, &state); if (err) return err; - } - GElf_Addr loaded_file_bias; - Elf *loaded_elf = NULL; - Dwarf_Addr debug_file_bias; - Dwarf *dwarf; - err = NULL; - #pragma omp critical(drgn_module_find_files) - { - // We don't need the loaded file for the Linux kernel, and we - // always report the debug file as the main file to libdwfl. - if (!(load->dbinfo->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { - loaded_elf = dwfl_module_getelf(module->dwfl_module, - &loaded_file_bias); - if (!loaded_elf) - err = drgn_error_libdwfl(); - } - if (!err) { - dwarf = dwfl_module_getdwarf(module->dwfl_module, - &debug_file_bias); - if (!dwarf) - err = drgn_error_libdwfl(); + if (drgn_module_wants_file(module) + && (load_default + || (load_main + && drgn_module_kind(module) == DRGN_MODULE_MAIN)) + && !drgn_module_vector_append(&modules, &module)) + return &drgn_enomem; + + // If we are only trying files for the main module (i.e., if + // we're not loading all default debug info and any provided + // files were all for the main module), then we only want to + // create the main module. + if (!load_default + && drgn_module_kind(module) == DRGN_MODULE_MAIN + && state.unmatched_provided == 0) { + err = NULL; + break; } } if (err) return err; - const char *loaded_file_path; - const char *debug_file_path; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, NULL, NULL, - &loaded_file_path, &debug_file_path); - // If the loaded file also has debugging information, debug_file_path is - // NULL. (debug_file_path is also NULL if libdwfl got the debug file - // from debuginfod, so this isn't 100% correct, but it'll at least - // identify the module.) - if (!debug_file_path) - debug_file_path = loaded_file_path; - - module->debug_file_bias = debug_file_bias; - err = drgn_elf_file_create(module, debug_file_path, dwarf_getelf(dwarf), - &module->debug_file); - if (err) { - module->debug_file = NULL; - return err; - } - module->debug_file->dwarf = dwarf; - if (!module->debug_file->scns[DRGN_SCN_DEBUG_INFO] || - !module->debug_file->scns[DRGN_SCN_DEBUG_ABBREV]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "missing debugging information sections"); - } - - Dwarf *altdwarf = dwarf_getalt(dwarf); - if (altdwarf) { - Elf *altelf = dwarf_getelf(altdwarf); - if (!altelf) - return drgn_error_libdw(); - size_t shstrndx; - if (elf_getshdrstrndx(altelf, &shstrndx)) - return drgn_error_libelf(); - - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(altelf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - - if (shdr->sh_type != SHT_PROGBITS) - continue; - const char *scnname = elf_strptr(altelf, shstrndx, - shdr->sh_name); - if (!scnname) - return drgn_error_libelf(); - - /* - * TODO: save more sections and support imported units. - */ - if (strcmp(scnname, ".debug_info") == 0 && - !module->debug_file->alt_debug_info_data) { - err = read_elf_section(scn, - &module->debug_file->alt_debug_info_data); - if (err) - return err; - } else if (strcmp(scnname, ".debug_str") == 0 && - !module->debug_file->alt_debug_str_data) { - err = read_elf_section(scn, - &module->debug_file->alt_debug_str_data); - if (err) - return err; + struct drgn_module **wanted_modules = + drgn_module_vector_begin(&modules); + size_t num_wanted_modules = drgn_module_vector_size(&modules); + bool iterator_tried_missing = false; + + // The module iterator may have tried to load debug info, so we need to + // check each module again. + if (num_wanted_modules > 0) { + uint64_t new_generation = + ++prog->dbinfo.load_debug_info_generation; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + module = wanted_modules[i]; + if (module->load_debug_info_generation <= old_generation) { + // Reset DONT_WANT to WANT. + if (module->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + if (module->debug_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->debug_file_status = DRGN_MODULE_FILE_WANT; + if (drgn_module_wants_file(module)) { + wanted_modules[new_num_wanted_modules++] = module; + module->load_debug_info_generation = new_generation; + } + } else if (drgn_module_wants_file(module)) { + load_debug_info_log_missing(module, + max_warnings, + &num_warnings); + iterator_tried_missing = true; } } + num_wanted_modules = new_num_wanted_modules; } - err = drgn_elf_file_precache_sections(module->debug_file); - if (err) - return err; - if (loaded_elf) { - module->loaded_file_bias = loaded_file_bias; - if (loaded_elf == module->debug_file->elf) { - module->loaded_file = module->debug_file; - } else { - err = drgn_elf_file_create(module, loaded_file_path, - loaded_elf, - &module->loaded_file); - if (err) { - module->loaded_file = NULL; + if (num_wanted_modules > 0) { + uint64_t orig_supplementary_file_generation = + prog->dbinfo.supplementary_file_generation; + drgn_handler_list_for_each_enabled(struct drgn_debug_info_finder, + finder, + &prog->dbinfo.debug_info_finders) { + err = finder->ops.find(wanted_modules, + num_wanted_modules, finder->arg); + if (err) return err; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + module = wanted_modules[i]; + // If there are no more finders to try after + // this and a finder changed the wanted + // supplementary debug file, try to find a + // provided file for it one last time. + if (drgn_handler_is_last_enabled(&finder->handler) + && drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = load_debug_info_try_provided_supplementary_files(module, + &state); + if (err) + return err; + } + if (drgn_module_wants_file(module)) { + wanted_modules[new_num_wanted_modules++] = + module; + } } + num_wanted_modules = new_num_wanted_modules; + if (num_wanted_modules == 0) + break; } } - return NULL; -} -static struct drgn_error * -drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, - struct drgn_dwarf_index_state *index, - struct drgn_module *head) -{ - struct drgn_error *err; - struct drgn_module *module; - for (module = head; module; module = module->next) { - err = drgn_module_find_files(load, module); - if (err) { - module->err = err; - continue; - } - module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - return drgn_dwarf_index_read_file(index, module->debug_file); - } - /* - * We checked all of the files and didn't find debugging information. - * Report why for each one. - * - * (If we did find debugging information, we discard errors on the - * unused files.) - */ - err = NULL; - #pragma omp critical(drgn_debug_info_read_module_error) - for (module = head; module; module = module->next) { - const char *name = - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, - NULL, NULL, NULL, NULL); - if (module->err) { - err = drgn_debug_info_report_error(load, name, NULL, - module->err); - module->err = NULL; - } else { - err = drgn_debug_info_report_error(load, name, - "no debugging information", - NULL); + if (state.unmatched_provided != 0) { + for (struct load_debug_info_provided_table_iterator pit = + load_debug_info_provided_table_first(&state.provided); + pit.entry; + pit = load_debug_info_provided_table_next(pit)) { + if (!pit.entry->matched) { + vector_for_each(load_debug_info_file_vector, + file, &pit.entry->files) { + drgn_log_warning(prog, + "provided file %s did not match any loaded modules; ignoring", + file->path); + } + } } - if (err) - break; } - return err; -} - -static struct drgn_error * -drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) -{ - if (drgn_module_vector_empty(&load->new_modules)) - return NULL; - struct drgn_debug_info *dbinfo = load->dbinfo; - if (!c_string_set_reserve(&dbinfo->module_names, - c_string_set_size(&dbinfo->module_names) - + drgn_module_vector_size(&load->new_modules))) - return &drgn_enomem; - struct drgn_dwarf_index_state index; - if (!drgn_dwarf_index_state_init(&index, dbinfo)) - return &drgn_enomem; - struct drgn_error *err = NULL; - #pragma omp parallel for schedule(dynamic) num_threads(drgn_num_threads) - for (size_t i = 0; i < drgn_module_vector_size(&load->new_modules); i++) { - if (err) - continue; - struct drgn_module *module = - *drgn_module_vector_at(&load->new_modules, i); - struct drgn_error *module_err = - drgn_debug_info_read_module(load, &index, module); - if (module_err) { - #pragma omp critical(drgn_debug_info_update_index_error) - if (err) - drgn_error_destroy(module_err); - else - err = module_err; - } + for (size_t i = 0; i < num_wanted_modules; i++) { + load_debug_info_log_missing(wanted_modules[i], max_warnings, + &num_warnings); } - if (!err) { - drgn_debug_info_free_modules(dbinfo, true, false); - err = drgn_dwarf_info_update_index(&index); + if (num_warnings > max_warnings) { + drgn_log_warning(prog, "... missing %u more", + num_warnings - max_warnings); } - drgn_dwarf_index_state_deinit(&index); - return err; -} -struct drgn_error * -drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) -{ - struct drgn_debug_info *dbinfo = load->dbinfo; - my_dwfl_report_end(dbinfo, NULL, NULL); - struct drgn_error *err = drgn_debug_info_update_index(load); - dwfl_report_begin_add(dbinfo->dwfl); + // Update the DWARF index eagerly, mostly because that's what we did + // back when we used libdwfl. We may want to remove this in the future. + err = drgn_dwarf_info_update_index(&prog->dbinfo); if (err) return err; - drgn_module_vector_clear(&load->new_modules); - return NULL; -} - -static struct drgn_error * -drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) -{ - if (load->num_errors > load->max_errors && - (!string_builder_line_break(&load->errors) || - !string_builder_appendf(&load->errors, "... %u more", - load->num_errors - load->max_errors))) { - string_builder_deinit(&load->errors); - return &drgn_enomem; - } - if (load->num_errors) { - return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, - &load->errors); - } else { - return NULL; - } -} -struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, - const char **paths, size_t n, - bool load_default, bool load_main) -{ - struct drgn_program *prog = dbinfo->prog; - struct drgn_error *err; - - if (load_default) - load_main = true; - - const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); - struct drgn_debug_info_load_state load = { - .dbinfo = dbinfo, - .paths = paths, - .num_paths = n, - .load_default = load_default, - .load_main = load_main, - .new_modules = VECTOR_INIT, - .errors = STRING_BUILDER_INIT, - .max_errors = max_errors ? atoi(max_errors) : 5, - }; - dwfl_report_begin_add(dbinfo->dwfl); - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - err = linux_kernel_report_debug_info(&load); - else - err = userspace_report_debug_info(&load); - my_dwfl_report_end(dbinfo, NULL, NULL); - if (err) - goto err; - - /* - * userspace_report_debug_info() reports the main debugging information - * directly with libdwfl, so we need to report it to dbinfo. - */ - if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && - dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, - &load, 0)) { - err = &drgn_enomem; - goto err; + if (num_wanted_modules > 0 || iterator_tried_missing) { + return drgn_error_create(DRGN_ERROR_MISSING_DEBUG_INFO, + "missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html"); } - err = drgn_debug_info_update_index(&load); - if (err) - goto err; - - /* - * TODO: for core dumps, we need to add memory reader segments for - * read-only segments of the loaded binaries since those aren't saved in - * the core dump. - */ - - err = drgn_debug_info_report_finalize_errors(&load); -out: - drgn_module_vector_deinit(&load.new_modules); - return err; - -err: - drgn_debug_info_free_modules(dbinfo, false, false); - string_builder_deinit(&load.errors); - goto out; + return NULL; } -struct elf_symbols_search_arg { - const char *name; - uint64_t address; - enum drgn_find_symbol_flags flags; +LIBDRGN_PUBLIC struct drgn_error * +drgn_load_module_debug_info(struct drgn_module **modules, size_t *num_modulesp) +{ struct drgn_error *err; - struct drgn_symbol_result_builder *builder; -}; -static bool elf_symbol_match(struct elf_symbols_search_arg *arg, GElf_Addr addr, - const GElf_Sym *sym, const char *name) -{ - if ((arg->flags & DRGN_FIND_SYMBOL_NAME) && strcmp(name, arg->name) != 0) - return false; - if ((arg->flags & DRGN_FIND_SYMBOL_ADDR) && - (arg->address < addr || arg->address >= addr + sym->st_size)) - return false; - return true; -} + const size_t orig_num_modules = *num_modulesp; + if (orig_num_modules == 0) + return NULL; -static bool elf_symbol_store_match(struct elf_symbols_search_arg *arg, - GElf_Sym *elf_sym, GElf_Addr addr, - const char *name) -{ - struct drgn_symbol *sym; - if (arg->flags == (DRGN_FIND_SYMBOL_ONE | DRGN_FIND_SYMBOL_NAME)) { - int binding = GELF_ST_BIND(elf_sym->st_info); - /* - * The order of precedence is - * GLOBAL = UNIQUE > WEAK > LOCAL = everything else - * - * If we found a global or unique symbol, return it - * immediately. If we found a weak symbol, then save it, - * which may overwrite a previously found weak or local - * symbol. Otherwise, save the symbol only if we haven't - * found another symbol. - */ - if (binding != STB_GLOBAL - && binding != STB_GNU_UNIQUE - && binding != STB_WEAK - && drgn_symbol_result_builder_count(arg->builder) > 0) - return false; - sym = malloc(sizeof(*sym)); - if (!sym) { - arg->err = &drgn_enomem; - return true; - } - drgn_symbol_from_elf(name, addr, elf_sym, sym); - if (!drgn_symbol_result_builder_add(arg->builder, sym)) { - arg->err = &drgn_enomem; - drgn_symbol_destroy(sym); - } + struct drgn_program *prog = modules[0]->prog; + drgn_log_debug(prog, "loading debugging symbols for %zu modules", + orig_num_modules); - /* Abort on error, or short-circuit if we found a global or - * unique symbol */ - return (arg->err || sym->binding == DRGN_SYMBOL_BINDING_GLOBAL - || sym->binding == DRGN_SYMBOL_BINDING_UNIQUE); - } else { - sym = malloc(sizeof(*sym)); - if (!sym) { - arg->err = &drgn_enomem; - return true; + size_t num_wanted_modules = 0; + for (size_t i = 0; i < orig_num_modules; i++) { + if (modules[i]->prog != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "modules are from different programs"); } - drgn_symbol_from_elf(name, addr, elf_sym, sym); - if (!drgn_symbol_result_builder_add(arg->builder, sym)) { - arg->err = &drgn_enomem; - drgn_symbol_destroy(sym); + if (drgn_module_wants_file(modules[i])) { + modules[num_wanted_modules++] = modules[i]; + } else if (modules[i]->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT + || modules[i]->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) { + drgn_log_debug(prog, + "debugging symbols not wanted for %s", + modules[i]->name); + } else { + drgn_log_debug(prog, + "debugging symbols already loaded for %s", + modules[i]->name); } - /* Abort on error, or short-circuit for single lookup */ - return (arg->err || (arg->flags & DRGN_FIND_SYMBOL_ONE)); } -} + if (num_wanted_modules == 0) { + *num_modulesp = 0; + return NULL; + } -static int elf_symbols_search_cb(Dwfl_Module *dwfl_module, void **userdatap, - const char *module_name, Dwarf_Addr base, - void *cb_arg) -{ - struct elf_symbols_search_arg *arg = cb_arg; + uint64_t generation = ++prog->dbinfo.load_debug_info_generation; + for (size_t i = 0; i < num_wanted_modules; i++) + modules[i]->load_debug_info_generation = generation; - int symtab_len = dwfl_module_getsymtab(dwfl_module); - if (symtab_len == -1) - return DWARF_CB_OK; + drgn_blocking_guard(prog); - /* Ignore the zeroth null symbol */ - for (int i = 1; i < symtab_len; i++) { - GElf_Sym elf_sym; - GElf_Addr elf_addr; - const char *name = dwfl_module_getsym_info(dwfl_module, i, - &elf_sym, &elf_addr, - NULL, NULL, NULL); - if (!name || !elf_symbol_match(arg, elf_addr, &elf_sym, name)) - continue; - if (elf_symbol_store_match(arg, &elf_sym, elf_addr, name)) - return DWARF_CB_ABORT; + const size_t orig_num_wanted_modules = num_wanted_modules; + drgn_handler_list_for_each_enabled(struct drgn_debug_info_finder, + finder, + &prog->dbinfo.debug_info_finders) { + err = finder->ops.find(modules, num_wanted_modules, + finder->arg); + if (err) + return err; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + if (drgn_module_wants_file(modules[i])) + modules[new_num_wanted_modules++] = modules[i]; + } + num_wanted_modules = new_num_wanted_modules; + if (num_wanted_modules == 0) + break; } - return DWARF_CB_OK; + drgn_log_debug(prog, "debugging symbols loaded for %zu/%zu modules", + orig_num_wanted_modules - num_wanted_modules, + orig_num_wanted_modules); + *num_modulesp = num_wanted_modules; + return NULL; } static struct drgn_error * -elf_symbols_search(const char *name, uint64_t addr, enum drgn_find_symbol_flags flags, - void *data, struct drgn_symbol_result_builder *builder) +elf_symbols_search(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, void *data, + struct drgn_symbol_result_builder *builder) { - Dwfl_Module *dwfl_module = NULL; + struct drgn_error *err; struct drgn_program *prog = data; - struct elf_symbols_search_arg arg = { - .name = name, - .address = addr, - .flags = flags, - .err = NULL, - .builder = builder, - }; - - if (arg.flags & DRGN_FIND_SYMBOL_ADDR) { - dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, arg.address); - if (!dwfl_module) - return NULL; - } - if ((arg.flags & (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) - == (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) { - GElf_Off offset; - GElf_Sym elf_sym; - const char *sym_name = dwfl_module_addrinfo(dwfl_module, addr, - &offset, &elf_sym, - NULL, NULL, NULL); - if (!sym_name) + if (flags & DRGN_FIND_SYMBOL_ADDR) { + struct drgn_module *module = + drgn_module_find_by_address(prog, addr); + if (!module) return NULL; - struct drgn_symbol *sym = malloc(sizeof(*sym)); - if (!sym) - return &drgn_enomem; - drgn_symbol_from_elf(sym_name, addr - offset, &elf_sym, sym); - if (!drgn_symbol_result_builder_add(builder, sym)) { - arg.err = &drgn_enomem; - drgn_symbol_destroy(sym); - } - } else if (dwfl_module) { - elf_symbols_search_cb(dwfl_module, NULL, NULL, 0, &arg); + return drgn_module_elf_symbols_search(module, name, addr, flags, + builder); } else { - dwfl_getmodules(prog->dbinfo.dwfl, elf_symbols_search_cb, &arg, 0); + if (prog->dbinfo.main_module) { + err = drgn_module_elf_symbols_search(prog->dbinfo.main_module, + name, addr, flags, + builder); + if (err == &drgn_stop) + return NULL; + if (err) + return err; + } + for (auto it = drgn_module_table_first(&prog->dbinfo.modules); + it.entry; it = drgn_module_table_next(it)) { + err = drgn_module_elf_symbols_search(*it.entry, name, + addr, flags, + builder); + if (err == &drgn_stop) + break; + if (err) + return err; + } + return NULL; } - return arg.err; -} - -bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, - const char *name) -{ - return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } void drgn_debug_info_init(struct drgn_debug_info *dbinfo, struct drgn_program *prog) { + elf_version(EV_CURRENT); dbinfo->prog = prog; - dbinfo->dwfl = dwfl_begin(&drgn_dwfl_callbacks); - // This is temporary until we stop using libdwfl, and is extremely - // unlikely to fail anwyays, so don't bother propagating an error up. - if (!dbinfo->dwfl) - abort(); + drgn_module_table_init(&dbinfo->modules); + drgn_module_address_tree_init(&dbinfo->modules_by_address); const struct drgn_type_finder_ops type_finder_ops = { .find = drgn_debug_info_find_type, }; @@ -2215,26 +5372,53 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_register_symbol_finder_impl(prog, &dbinfo->symbol_finder, "elf", &symbol_finder_ops, prog, 0); + const struct drgn_debug_info_finder_ops + standard_debug_info_finder_ops = { + .find = drgn_standard_module_file_find, + }; + drgn_program_register_debug_info_finder_impl(prog, + &dbinfo->standard_debug_info_finder, + "standard", + &standard_debug_info_finder_ops, + prog, 0); + dbinfo->debug_info_path = drgn_default_debug_info_path; #if WITH_DEBUGINFOD dbinfo->debuginfod_client = NULL; + if (drgn_have_debuginfod()) { + const struct drgn_debug_info_finder_ops + debuginfod_debug_info_finder_ops = { + .find = drgn_debuginfod_find, + }; + drgn_program_register_debug_info_finder_impl(prog, + &dbinfo->debuginfod_debug_info_finder, + "debuginfod", + &debuginfod_debug_info_finder_ops, + prog, + DRGN_HANDLER_REGISTER_ENABLE_LAST); + } #endif - drgn_module_table_init(&dbinfo->modules); - c_string_set_init(&dbinfo->module_names); drgn_dwarf_info_init(dbinfo); } void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) { - drgn_dwarf_info_deinit(dbinfo); - c_string_set_deinit(&dbinfo->module_names); - drgn_debug_info_free_modules(dbinfo, false, true); - assert(drgn_module_table_empty(&dbinfo->modules)); - drgn_module_table_deinit(&dbinfo->modules); + free(dbinfo->map_files_segments); + if (dbinfo->debug_info_path != drgn_default_debug_info_path) + free((char *)dbinfo->debug_info_path); #if WITH_DEBUGINFOD if (dbinfo->debuginfod_client) drgn_debuginfod_end(dbinfo->debuginfod_client); #endif - dwfl_end(dbinfo->dwfl); + drgn_handler_list_deinit(struct drgn_debug_info_finder, finder, + &dbinfo->debug_info_finders, + if (finder->ops.destroy) + finder->ops.destroy(finder->arg); + ); + drgn_dwarf_info_deinit(dbinfo); + for (auto it = drgn_module_table_first(&dbinfo->modules); it.entry; + it = drgn_module_table_next(it)) + drgn_module_destroy(*it.entry); + drgn_module_table_deinit(&dbinfo->modules); } struct drgn_elf_file *drgn_module_find_dwarf_file(struct drgn_module *module, @@ -2242,7 +5426,7 @@ struct drgn_elf_file *drgn_module_find_dwarf_file(struct drgn_module *module, { if (!module->debug_file) return NULL; - if (dwarf == module->debug_file->dwarf) + if (dwarf == module->debug_file->_dwarf) return module->debug_file; struct drgn_elf_file_dwarf_table_iterator it = drgn_elf_file_dwarf_table_search(&module->split_dwarf_files, @@ -2256,15 +5440,11 @@ drgn_module_create_split_dwarf_file(struct drgn_module *module, struct drgn_elf_file **ret) { struct drgn_error *err; - err = drgn_elf_file_create(module, name, dwarf_getelf(dwarf), ret); + err = drgn_elf_file_create(module, name, -1, NULL, dwarf_getelf(dwarf), + ret); if (err) return err; - err = drgn_elf_file_precache_sections(*ret); - if (err) { - drgn_elf_file_destroy(*ret); - return err; - } - (*ret)->dwarf = dwarf; + (*ret)->_dwarf = dwarf; int r = drgn_elf_file_dwarf_table_insert(&module->split_dwarf_files, ret, NULL); if (r < 0) { @@ -2350,135 +5530,3 @@ drgn_module_find_cfi(struct drgn_program *prog, struct drgn_module *module, } return &drgn_not_found; } - -#if !_ELFUTILS_PREREQ(0, 175) -static Elf *dwelf_elf_begin(int fd) -{ - return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); -} -#endif - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) -{ - struct drgn_error *err; - - *fd_ret = open(path, O_RDONLY); - if (*fd_ret == -1) - return drgn_error_create_os("open", errno, path); - *elf_ret = dwelf_elf_begin(*fd_ret); - if (!*elf_ret) { - err = drgn_error_libelf(); - goto err_fd; - } - if (elf_kind(*elf_ret) != ELF_K_ELF) { - err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); - goto err_elf; - } - return NULL; - -err_elf: - elf_end(*elf_ret); -err_fd: - close(*fd_ret); - return err; -} - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...) -{ - struct drgn_error *err; - size_t i; - - for (i = 0; path_formats[i]; i++) { - va_list ap; - int ret; - char *path; - int fd; - Elf *elf; - - va_start(ap, path_formats); - ret = vasprintf(&path, path_formats[i], ap); - va_end(ap); - if (ret == -1) - return &drgn_enomem; - fd = open(path, O_RDONLY); - if (fd == -1) { - free(path); - continue; - } - elf = dwelf_elf_begin(fd); - if (!elf) { - close(fd); - free(path); - continue; - } - if (elf_kind(elf) != ELF_K_ELF) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: not an ELF file", path); - elf_end(elf); - close(fd); - free(path); - return err; - } - *path_ret = path; - *fd_ret = fd; - *elf_ret = elf; - return NULL; - } - *path_ret = NULL; - *fd_ret = -1; - *elf_ret = NULL; - return NULL; -} - -/* - * Get the start address from the first loadable segment and the end address - * from the last loadable segment. - * - * The ELF specification states that loadable segments are sorted on p_vaddr. - * However, vmlinux on x86-64 has an out of order segment for .data..percpu, and - * Arm has a couple for .vector and .stubs. Thankfully, those are placed in the - * middle by the vmlinux linker script, so we can still rely on the first and - * last loadable segments. - */ -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret) -{ - size_t phnum; - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - - GElf_Phdr phdr_mem, *phdr; - size_t i; - for (i = 0; i < phnum; i++) { - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_LOAD) { - uint64_t align = phdr->p_align ? phdr->p_align : 1; - *start_ret = (phdr->p_vaddr & -align) + bias; - break; - } - } - if (i >= phnum) { - /* There were no loadable segments. */ - *start_ret = *end_ret = 0; - return NULL; - } - - for (i = phnum; i-- > 0;) { - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_LOAD) { - *end_ret = (phdr->p_vaddr + phdr->p_memsz) + bias; - if (*start_ret >= *end_ret) - *start_ret = *end_ret = 0; - return NULL; - } - } - /* We found a loadable segment earlier, so this shouldn't happen. */ - assert(!"PT_LOAD segment disappeared"); - *end_ret = 0; - return NULL; -} diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 614b4233b..2241ef3a8 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -16,12 +16,13 @@ #include #endif #include -#include #include +#include "binary_search_tree.h" #include "cfi.h" #include "drgn_internal.h" #include "dwarf_info.h" +#include "elf_symtab.h" #include "hash_table.h" #include "object.h" #include "orc_info.h" @@ -45,121 +46,84 @@ struct drgn_elf_file; * @{ */ -/** State of a @ref drgn_module. */ -enum drgn_module_state { - /** Reported but not indexed. */ - DRGN_DEBUG_INFO_MODULE_NEW, - /** Reported and will be indexed on success. */ - DRGN_DEBUG_INFO_MODULE_INDEXING, - /** Indexed. Must not be freed until @ref drgn_debug_info_destroy(). */ - DRGN_DEBUG_INFO_MODULE_INDEXED, -} __attribute__((__packed__)); - DEFINE_HASH_TABLE_TYPE(drgn_elf_file_dwarf_table, struct drgn_elf_file *); +DEFINE_HASH_TABLE_TYPE(drgn_module_table, struct drgn_module *); +DEFINE_BINARY_SEARCH_TREE_TYPE(drgn_module_address_tree, struct drgn_module); -/** - * A module reported to a @ref drgn_debug_info. - * - * Conceptually, a module is an ELF file loaded at a specific address range (or - * not loaded). - * - * Files are identified by canonical path and, if present, build ID. Each (path, - * address range) is uniquely represented by a @ref drgn_module. - */ -struct drgn_module { +struct drgn_debug_info_finder { + struct drgn_handler handler; + struct drgn_debug_info_finder_ops ops; + void *arg; +}; + +/** Cache of debugging information. */ +struct drgn_debug_info { + /** Program owning this cache. */ struct drgn_program *prog; - /** @c NULL if the module does not have a build ID. */ - const void *build_id; - /** Zero if the module does not have a build ID. */ - size_t build_id_len; - /** Load address range, or both 0 if not loaded. */ - uint64_t start, end; - /** Optional module name allocated with @c malloc(). */ - char *name; + struct drgn_type_finder type_finder; + struct drgn_object_finder object_finder; + struct drgn_symbol_finder symbol_finder; - Dwfl_Module *dwfl_module; - /** File that is loaded into the program. */ - struct drgn_elf_file *loaded_file; - /** File containing debugging information. */ - struct drgn_elf_file *debug_file; + /** Main module. @c NULL if not created yet. */ + struct drgn_module *main_module; + /** Table of non-main modules indexed on @ref drgn_module_key. */ + struct drgn_module_table modules; /** - * Difference between addresses in program and addresses in @ref - * drgn_module::loaded_file. + * Counter used to detect when @ref modules is modified during iteration + * of a @ref drgn_created_module_iterator. */ - uint64_t loaded_file_bias; + uint64_t modules_generation; + /** Tree of modules sorted by start address. */ + struct drgn_module_address_tree modules_by_address; /** - * Difference between addresses in program and addresses in @ref - * drgn_module::debug_file. + * Singly-linked list of modules that need to have their DWARF + * information indexed. */ - uint64_t debug_file_bias; - - struct drgn_elf_file_dwarf_table split_dwarf_files; - + struct drgn_module *modules_pending_indexing; /** DWARF debugging information. */ - struct drgn_module_dwarf_info dwarf; - /** ORC unwinder information. */ - struct drgn_module_orc_info orc; - - /** Whether DWARF CFI from .debug_frame has been parsed. */ - bool parsed_debug_frame; - /** Whether EH CFI from .eh_frame has been parsed. */ - bool parsed_eh_frame; - /** Whether ORC unwinder data has been parsed. */ - bool parsed_orc; + struct drgn_dwarf_info dwarf; - /* - * path, elf, and fd are used when an ELF file was reported with - * drgn_debug_info_report_elf() so we can report the file to libdwfl - * later. They are not valid after loading. + struct drgn_handler_list debug_info_finders; + struct drgn_debug_info_finder standard_debug_info_finder; + /** See @ref drgn_program_debug_info_path(). */ + const char *debug_info_path; + /** + * Counter used to detect when loading debugging information is + * attempted. + * + * @sa drgn_module::load_debug_info_generation */ - char *path; - Elf *elf; - int fd; - enum drgn_module_state state; - /** Error while loading. */ - struct drgn_error *err; + uint64_t load_debug_info_generation; /** - * Next module with same build ID and address range. + * Counter used to detect when the wanted supplementary file for a + * module has changed. * - * There may be multiple files with the same build ID (e.g., a stripped - * binary and its corresponding separate debug info file). While - * loading, all files with the same build ID and address range are - * linked in a list. Only one is indexed; the rest are destroyed. + * @sa drgn_module_wanted_supplementary_file::generation */ - struct drgn_module *next; -}; - -DEFINE_HASH_TABLE_TYPE(drgn_module_table, struct drgn_module *); - -DEFINE_HASH_SET_TYPE(c_string_set, const char *); - -/** Cache of debugging information. */ -struct drgn_debug_info { - /** Program owning this cache. */ - struct drgn_program *prog; + uint64_t supplementary_file_generation; - struct drgn_type_finder type_finder; - struct drgn_object_finder object_finder; - struct drgn_symbol_finder symbol_finder; - - /** DWARF frontend library handle. */ - Dwfl *dwfl; #if WITH_DEBUGINFOD + struct drgn_debug_info_finder debuginfod_debug_info_finder; /** debuginfod-client session. */ debuginfod_client *debuginfod_client; + const char *debuginfod_current_name; + const char *debuginfod_current_type; + unsigned int debuginfod_spinner_position; + bool debuginfod_have_url; + bool logged_debuginfod_progress; #endif - /** Modules keyed by build ID and address range. */ - struct drgn_module_table modules; + bool logged_no_debuginfod; + /** - * Names of indexed modules. - * - * The entries in this set are @ref drgn_module::name, so they should - * not be freed. + * Cache of entries in /proc/$pid/map_files used for finding loaded + * files. Populated the first time we need it or opportunistically when + * we parse /proc/$pid/maps. Rebuilt whenever we try to open an entry + * that no longer exists. */ - struct c_string_set module_names; - /** DWARF debugging information. */ - struct drgn_dwarf_info dwarf; + struct drgn_map_files_segment *map_files_segments; + /** Number of segments in @ref map_files_segments. */ + size_t num_map_files_segments; }; /** Initialize a @ref drgn_debug_info. */ @@ -169,96 +133,188 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, /** Deinitialize a @ref drgn_debug_info. */ void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo); -DEFINE_VECTOR_TYPE(drgn_module_vector, struct drgn_module *); - -/** State tracked while loading debugging information. */ -struct drgn_debug_info_load_state { - struct drgn_debug_info * const dbinfo; - const char ** const paths; - const size_t num_paths; - const bool load_default; - const bool load_main; - /** Newly added modules to be indexed. */ - struct drgn_module_vector new_modules; - /** Formatted errors reported by @ref drgn_debug_info_report_error(). */ - struct string_builder errors; - /** Number of errors reported by @ref drgn_debug_info_report_error(). */ - unsigned int num_errors; - /** Maximum number of errors to report before truncating. */ - unsigned int max_errors; +typedef void drgn_module_iterator_destroy_fn(struct drgn_module_iterator *); +typedef struct drgn_error * +drgn_module_iterator_next_fn(struct drgn_module_iterator *, + struct drgn_module **, bool *); + +struct drgn_module_iterator { + struct drgn_program *prog; + drgn_module_iterator_destroy_fn *destroy; + drgn_module_iterator_next_fn *next; }; -/** - * Report a non-fatal error while loading debugging information. - * - * The error will be included in a @ref DRGN_ERROR_MISSING_DEBUG_INFO error - * returned by @ref drgn_debug_info_load(). - * - * @param[name] name An optional module name to prefix to the error message. - * @param[message] message An optional message with additional context to prefix - * to the error message. - * @param[err] err The error to report. This may be @c NULL if @p name and @p - * message provide sufficient information. This is destroyed on either success - * or failure. - * @return @c NULL on success, @ref drgn_enomem if the error could not be - * reported. - */ -struct drgn_error * -drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, - const char *name, const char *message, - struct drgn_error *err); +static inline void +drgn_module_iterator_init(struct drgn_module_iterator *it, + struct drgn_program *prog, + drgn_module_iterator_destroy_fn *destroy, + drgn_module_iterator_next_fn *next) +{ + it->prog = prog; + it->destroy = destroy; + it->next = next; +} + +/** Bitmask of files in a @ref drgn_module. */ +enum drgn_module_file_mask { + DRGN_MODULE_FILE_MASK_LOADED = 1 << 0, + DRGN_MODULE_FILE_MASK_DEBUG = 1 << 1, +} __attribute__((__packed__)); -/** - * Report a module to a @ref drgn_debug_info from an ELF file. - * - * This takes ownership of @p fd and @p elf on either success or failure. They - * should not be used (including closed or freed) after this returns. - * - * @param[in] path The path to the file. - * @param[in] fd A file descriptor referring to the file. - * @param[in] elf The Elf handle of the file. - * @param[in] start The (inclusive) start address of the loaded file, or 0 if - * the file is not loaded. - * @param[in] end The (exclusive) end address of the loaded file, or 0 if the - * file is not loaded. - * @param[in] name An optional name for the module. This is only used for @ref - * drgn_debug_info_is_indexed(). - * @param[out] new_ret Whether the module was newly created and reported. This - * is @c false if a module with the same build ID and address range was already - * loaded or a file with the same path and address range was already reported. - */ -struct drgn_error * -drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, - const char *path, int fd, Elf *elf, uint64_t start, - uint64_t end, const char *name, bool *new_ret); +DEFINE_HASH_MAP_TYPE(drgn_module_section_address_map, char *, uint64_t); -/** Index new debugging information and continue reporting. */ -struct drgn_error * -drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load); +struct drgn_module { + struct drgn_program *prog; + enum drgn_module_kind kind; -/** - * Load debugging information. - * - * @sa drgn_program_load_debug_info - */ -struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, - const char **paths, size_t n, - bool load_default, bool load_main); + /** Module name. */ + char *name; + /** Kind-specific information. */ + union { + struct { + uint64_t dynamic_address; + } shared_library; + struct { + uint64_t dynamic_address; + } vdso; + struct { + uint64_t address; + } relocatable; + struct { + uint64_t id; + } extra; + }; + /** + * Raw binary build ID. @c NULL if the module does not have a build ID. + */ + void *build_id; + /** + * Length of @ref drgn_module::build_id in bytes. Zero if the module + * does not have a build ID. + */ + size_t build_id_len; + /** + * Build ID as a null-terminated hexadecimal string. @c NULL if the + * module does not have a build ID. + * + * Used for logging and finding debugging information. + * + * This is allocated together with @ref drgn_module::build_id. + */ + char *build_id_str; + /** Node in @ref drgn_debug_info::modules_by_address. */ + struct binary_tree_node node; + /** + * Load address range. Both 0 if not loaded. Both @c UINT64_MAX if not + * known yet. + */ + uint64_t start, end; + + struct drgn_elf_file *loaded_file; + struct drgn_elf_file *debug_file; + struct drgn_elf_file *supplementary_debug_file; + /** Table mapping libdw handle to corresponding @ref drgn_elf_file. */ + struct drgn_elf_file_dwarf_table split_dwarf_files; + uint64_t loaded_file_bias; + uint64_t debug_file_bias; + enum drgn_module_file_status loaded_file_status; + enum drgn_module_file_status debug_file_status; + enum drgn_supplementary_file_kind supplementary_debug_file_kind; + + /** DWARF debugging information. */ + struct drgn_module_dwarf_info dwarf; + /** ORC unwinder information. */ + struct drgn_module_orc_info orc; + /** ELF symbol table. */ + struct drgn_elf_symbol_table elf_symtab; + + /** Whether .debug_frame has been parsed. */ + bool parsed_debug_frame; + /** Whether .eh_frame has been parsed. */ + bool parsed_eh_frame; + /** Whether ORC unwinder data has been parsed. */ + bool parsed_orc; + /** Which files need to be checked for an ELF symbol table. */ + enum drgn_module_file_mask elf_symtab_pending_files; + /** + * Whether a full symbol table has been found (as opposed to a dynamic + * symbol table, which only contains a subset of symbols). + */ + bool have_full_symtab; + + /** Mapping from section name to address. */ + struct drgn_module_section_address_map section_addresses; + /** + * Counter used to detect when @ref section_addresses is modified during + * iteration of a @ref drgn_module_section_address_iterator. + */ + uint64_t section_addresses_generation; + + /** + * Counter used to detect when loading debugging information is + * attempted. + * + * @sa drgn_debug_info::load_debug_info_generation + */ + uint64_t load_debug_info_generation; + struct drgn_module_wanted_supplementary_file *wanted_supplementary_debug_file; + /** Node in @ref drgn_debug_info::modules_pending_indexing. */ + struct drgn_module *pending_indexing_next; +}; + +struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, + const struct drgn_module_key *key, + const char *name, + struct drgn_module **ret, + bool *new_ret); /** - * Return whether a @ref drgn_debug_info has indexed a module with the given - * name. + * Delete a partially-initialized module. This can only be called before the + * module is returned from public API. */ -bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, - const char *name); +void drgn_module_delete(struct drgn_module *module); + +static inline void drgn_module_deletep(struct drgn_module **modulep) +{ + if (*modulep) + drgn_module_delete(*modulep); +} + +struct depmod_index { + char *path; + void *addr; + size_t len; +}; + +struct drgn_module_standard_files_state { + struct depmod_index modules_dep; +}; + +// Always takes ownership of fd. Attempts to resolve the real path of path. +struct drgn_error * +drgn_module_try_standard_file(struct drgn_module *module, const char *path, + int fd, bool check_build_id, + const uint32_t *expected_crc); + +#define drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) \ + for (debug_dir = (prog)->dbinfo.debug_info_path; \ + debug_dir \ + && (debug_dir_len = strchrnul(debug_dir, ':') - debug_dir, 1); \ + debug_dir = debug_dir[debug_dir_len] == '\0' \ + ? NULL : debug_dir + debug_dir_len + 1) + +static inline bool drgn_module_wants_file(struct drgn_module *module) +{ + return drgn_module_wants_loaded_file(module) + || drgn_module_wants_debug_file(module); +} /** * Get the language of the program's `main` function or `NULL` if it could not * be found. */ -struct drgn_error * -drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, - const struct drgn_language **ret); +const struct drgn_language * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo); /** @ref drgn_type_finder_ops::find() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(uint64_t kinds, const char *name, diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index ecd69bcfc..e680dadd5 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -819,19 +819,6 @@ struct drgn_error *drgn_program_set_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_set_pid(struct drgn_program *prog, pid_t pid); -/** - * Load debugging information for a list of executable or library files. - * - * @param[in] load_default Whether to also load debugging information which can - * automatically be determined from the program. This implies @p load_main. - * @param[in] load_main Whether to also load information for the main - * executable. - */ -struct drgn_error *drgn_program_load_debug_info(struct drgn_program *prog, - const char **paths, size_t n, - bool load_default, - bool load_main); - /** * Create a @ref drgn_program from a core dump file. * @@ -1199,6 +1186,546 @@ struct drgn_error *drgn_program_element_info(struct drgn_program *prog, /** @} */ +/** + * @defgroup Modules Modules + * + * Modules in a program and debugging information. + * + * @{ + */ + +/** An executable, library, or other binary file used by a program. */ +struct drgn_module; + +/** Kinds of modules. */ +enum drgn_module_kind { + /** + * Main module. For userspace programs, this is the executable. For the + * Linux kernel, this is `vmlinux`. + */ + DRGN_MODULE_MAIN, + /** Shared library (a.k.a. dynamic library or dynamic shared object). */ + DRGN_MODULE_SHARED_LIBRARY, + /** Virtual dynamic shared object (vDSO). */ + DRGN_MODULE_VDSO, + /** Relocatable object (e.g., Linux kernel loadable module). */ + DRGN_MODULE_RELOCATABLE, + /** Extra debugging information. */ + DRGN_MODULE_EXTRA, +} __attribute__((__packed__)); + +/** Unique key for a @ref drgn_module. */ +struct drgn_module_key { + /** Kind of module. */ + enum drgn_module_kind kind; + /** Kind-specific key. */ + union { + struct { + /** Name of module. */ + const char *name; + /** Address of dynamic section. */ + uint64_t dynamic_address; + } shared_library; + struct { + /** Name of module. */ + const char *name; + /** Address of dynamic section. */ + uint64_t dynamic_address; + } vdso; + struct { + /** Name of module. */ + const char *name; + /** + * Address identifying the module (e.g., for Linux + * kernel loadable modules, the base address). + */ + uint64_t address; + } relocatable; + struct { + /** Name of module. */ + const char *name; + /** Arbitrary identification number. */ + uint64_t id; + } extra; + }; +}; + +/** + * Find the created @ref drgn_module matching the given @p key. + * + * @return Module, or @c NULL if not found. + */ +struct drgn_module *drgn_module_find(struct drgn_program *prog, + const struct drgn_module_key *key); + +/** + * Find the created @ref drgn_module containing the given @p address. + * + * @return Module, or @c NULL if not found. + */ +struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, + uint64_t address); + +/** + * Find the main module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, + const char *name, + struct drgn_module **ret, + bool *new_ret); + +/** + * Find a shared library module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret); + +/** + * Find a vDSO module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret); + +/** + * Find a relocatable module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_relocatable(struct drgn_program *prog, + const char *name, uint64_t address, + struct drgn_module **ret, bool *new_ret); + +/** + * Find a created Linux kernel loadable module from a ``struct module`` object. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret); + +/** + * Find a Linux kernel loadable module from a ``struct module`` object, creating + * it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret, + bool *new_ret); + +/** + * Find an extra module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, + const char *name, + uint64_t id, + struct drgn_module **ret, + bool *new_ret); + +/** Get the program that a module is from. */ +struct drgn_program *drgn_module_program(const struct drgn_module *module); + +/** Get the unique key for a module. */ +struct drgn_module_key drgn_module_key(const struct drgn_module *module); + +/** Get the kind of a module. */ +enum drgn_module_kind drgn_module_kind(const struct drgn_module *module); + +/** Get the name of a module. */ +const char *drgn_module_name(const struct drgn_module *module); + +/** + * Get the address range where a module is loaded. + * + * If the module is not loaded in memory, then the start and end are both 0 + * + * @param[out] start_ret Minimum address (inclusive). + * @param[out] end_ret Maximum address (exclusive). + * @return @c true on success, @c false if the address range is not known yet. + */ +bool drgn_module_address_range(const struct drgn_module *module, + uint64_t *start_ret, uint64_t *end_ret); + +/** + * Set the address range of a module. + * + * @p start and @p end may both be 0 to indicate that the module is not loaded + * in memory. They may both be @c UINT64_MAX to unset the range. Otherwise, @p + * start must be less than @p end. + */ +struct drgn_error *drgn_module_set_address_range(struct drgn_module *module, + uint64_t start, uint64_t end); + +/** + * Get the unique byte string (e.g., GNU build ID) identifying files used by + * a module. + * + * @param[out] raw_ret Returned raw build ID. @c NULL if not known. Valid until + * the build ID is changed. + * @param[out] raw_len_ret Size of returned build ID, in bytes. 0 if not known. + * @return Lowercase hexadecimal representation of build ID. @c NULL if not + * known. Valid until the build ID is changed. + */ +const char *drgn_module_build_id(const struct drgn_module *module, + const void **raw_ret, size_t *raw_len_ret); + +/** + * Set the unique byte string (e.g., GNU build ID) identifying files used by a + * module. + * + * @param[in] build_id New build ID. + * @param[in] build_id_len New size of build ID, in bytes. May be 0 to unset the + * build ID. + */ +struct drgn_error *drgn_module_set_build_id(struct drgn_module *module, + const void *build_id, + size_t build_id_len); + +/** Get the address of a section with the given name in a relocatable module. */ +struct drgn_error *drgn_module_get_section_address(struct drgn_module *module, + const char *name, + uint64_t *ret); + +/** + * Set the address of a section with the given name in a relocatable module. + * + * This is not allowed after a file has been assigned to the module. + */ +struct drgn_error *drgn_module_set_section_address(struct drgn_module *module, + const char *name, + uint64_t address); + +/** + * Unset the address of a section with the given name in a relocatable module. + * + * This is not allowed after a file has been assigned to the module. + */ +struct drgn_error *drgn_module_delete_section_address(struct drgn_module *module, + const char *name); + +/** + * Get the number of section addresses currently set in a relocatable module. + */ +struct drgn_error *drgn_module_num_section_addresses(struct drgn_module *module, + size_t *ret); + +/** Iterator over set section addresses in a relocatable module. */ +struct drgn_module_section_address_iterator; + +/** Create a @ref drgn_module_section_address_iterator. */ +struct drgn_error * +drgn_module_section_address_iterator_create(struct drgn_module *module, + struct drgn_module_section_address_iterator **ret); + +/** Destroy a @ref drgn_module_section_address_iterator. */ +void +drgn_module_section_address_iterator_destroy(struct drgn_module_section_address_iterator *it); + +/** Get the module that a @ref drgn_module_section_address_iterator is for. */ +struct drgn_module * +drgn_module_section_address_iterator_module(struct drgn_module_section_address_iterator *it); + +/** + * Get the next section name and address from a @ref + * drgn_module_section_address_iterator. + * + * @param[out] name_ret Returned name. Valid until the the next call to @ref + * drgn_module_section_address_iterator_next() or @ref + * drgn_module_section_address_iterator_destroy() on @it. + * @param[out] address_ret Returned address. + */ +struct drgn_error * +drgn_module_section_address_iterator_next(struct drgn_module_section_address_iterator *it, + const char **name_ret, + uint64_t *address_ret); + +/** Status of a file in a @ref drgn_module. */ +enum drgn_module_file_status { + /** File has not been found and should be searched for. */ + DRGN_MODULE_FILE_WANT, + /** File has already been found and assigned. */ + DRGN_MODULE_FILE_HAVE, + /** File has not been found, but it should not be searched for. */ + DRGN_MODULE_FILE_DONT_WANT, + /** File has not been found and is not needed. */ + DRGN_MODULE_FILE_DONT_NEED, + /** + * File has been found, but it requires a supplementary file before it + * can be used. + */ + DRGN_MODULE_FILE_WANT_SUPPLEMENTARY, +}; + +/** Kind of supplementary file. */ +enum drgn_supplementary_file_kind { + /** Not known or not needed. */ + DRGN_SUPPLEMENTARY_FILE_NONE, + /** + * GNU-style supplementary debug file referred to by a + * ``.gnu_debugaltlink`` section. + */ + DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK, +}; + +/** Get the status of a module's loaded file. */ +enum drgn_module_file_status +drgn_module_loaded_file_status(const struct drgn_module *module); + +/** Set the status of a module's loaded file. */ +bool drgn_module_set_loaded_file_status(struct drgn_module *module, + enum drgn_module_file_status status); + +/** + * Get whether a module wants a loaded file. + * + * For future-proofness, debug info finders should prefer this over comparing + * @ref drgn_module_loaded_file_status() directly. + */ +bool drgn_module_wants_loaded_file(const struct drgn_module *module); + +/** Get the absolute path of a module's loaded file, or @c NULL if not known. */ +const char *drgn_module_loaded_file_path(const struct drgn_module *module); + +/** + * Get the difference between the load address in the program and addresses in a + * module's loaded file. + */ +uint64_t drgn_module_loaded_file_bias(const struct drgn_module *module); + +enum drgn_module_file_status +drgn_module_debug_file_status(const struct drgn_module *module); + +bool drgn_module_set_debug_file_status(struct drgn_module *module, + enum drgn_module_file_status status); + +/** + * Get whether a module wants a debug file. + * + * For future-proofness, debug info finders should prefer this over comparing + * @ref drgn_module_debug_file_status() directly. + */ +bool drgn_module_wants_debug_file(const struct drgn_module *module); + +/** Get the absolute path of a module's debug file, or @c NULL if not known. */ +const char *drgn_module_debug_file_path(const struct drgn_module *module); + +/** + * Get the difference between the load address in the program and addresses in a + * module's debug file. + */ +uint64_t drgn_module_debug_file_bias(const struct drgn_module *module); + +/** Get the kind of a module's supplementary debug file. */ +enum drgn_supplementary_file_kind +drgn_module_supplementary_debug_file_kind(const struct drgn_module *module); + +/** + * Get the absolute path of a module's supplementary debug file, or @c NULL if + * not known or not needed. + */ +const char * +drgn_module_supplementary_debug_file_path(const struct drgn_module *module); + +/** + * Get information about the supplementary debug file that a module currently + * wants. + * + * @param[out] debug_file_path_ret Path of main file that wants the + * supplementary file. + * @param[out] supplementary_path_ret Path to supplementary file. This may be + * absolute or relative to @p debug_file_path_ret. + * @param[out] checksum_ret Unique identifier of the supplementary file. + * @param[out] checksum_len_ret Size of unique identifier, in bytes. + * @return Kind of supplementary file. + */ +enum drgn_supplementary_file_kind +drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, + const char **debug_file_path_ret, + const char **supplementary_path_ret, + const void **checksum_ret, + size_t *checksum_len_ret); + +/** Debugging information finder callback table. */ +struct drgn_debug_info_finder_ops { + /** + * Callback to destroy the debug info finder. + * + * This may be @c NULL. + * + * @param[in] arg Argument passed to @ref + * drgn_program_register_debug_info_finder(). + */ + void (*destroy)(void *arg); + /** + * Callback for finding debug info. + * + * @param[in] modules Array of modules that want debugging information. + * @param[in] num_modules Number of modules in @p modules. + * @param[in] arg Argument passed to @ref + * drgn_program_register_debug_info_finder(). + * @return @c NULL on success, non-@c NULL on error. It is not an error + * for some debugging information to not be found. + */ + struct drgn_error *(*find)(struct drgn_module * const *modules, + size_t num_modules, void *arg); +}; + +/** + * Register a debugging information finding callback. + * + * @param[in] name Finder name. This is copied. + * @param[in] ops Callback table. This is copied. + * @param[in] arg Argument to pass to callbacks. + * @param[in] enable_index Insert the finder into the list of enabled finders at + * the given index. If @ref DRGN_HANDLER_REGISTER_ENABLE_LAST or greater than + * the number of enabled finders, insert it at the end. If @ref + * DRGN_HANDLER_REGISTER_DONT_ENABLE, don’t enable the finder. + */ +struct drgn_error * +drgn_program_register_debug_info_finder(struct drgn_program *prog, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index); + +/** + * Get the names of all registered debugging information finders. + * + * The order of the names is arbitrary. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_registered_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** + * Set the list of enabled debugging information finders. + * + * Finders are called in the same order as the list until all wanted files have + * been found. + * + * @param[in] names Names of finders to enable, in order. + * @param[in] count Number of names in @p names. + */ +struct drgn_error * +drgn_program_set_enabled_debug_info_finders(struct drgn_program *prog, + const char * const *names, + size_t count); + +/** + * Get the names of enabled debugging information finders, in order. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_enabled_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** Colon-separated directories to search for debugging information files. */ +const char *drgn_program_debug_info_path(struct drgn_program *prog); + +/** Set the directories to search for debugging information files. */ +struct drgn_error *drgn_program_set_debug_info_path(struct drgn_program *prog, + const char *path); + +/** + * Try to use the given file for a module. + * + * @param[in] path Path to file. + * @param[in] fd If nonnegative, an open file descriptor referring to the file. + * This always takes ownership of the file descriptor even if the file is not + * used or on error. + * @param[in] force If @c true, don't check whether the file matches the module. + */ +struct drgn_error * +drgn_module_try_file(struct drgn_module *module, const char *path, int fd, + bool force); + +/** Iterator over a set of modules. */ +struct drgn_module_iterator; + +/** Destroy a @ref drgn_module_iterator. */ +void +drgn_module_iterator_destroy(struct drgn_module_iterator *it); + +/** Get the program that a module iterator is from. */ +struct drgn_program * +drgn_module_iterator_program(const struct drgn_module_iterator *it); + +/** + * Get the next module in a module iterator. + * + * @param[out] ret Returned module. + * @param[out] new_ret Whether the module was newly created. May be @c NULL. + */ +struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, + struct drgn_module **ret, + bool *new_ret); + +/** Create an iterator over created modules. */ +struct drgn_error * +drgn_created_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +/** + * Create an iterator that determines what executables, libraries, etc. are + * loaded in the program and creates modules to represent them. + */ +struct drgn_error * +drgn_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +/** + * Load debugging information for the given set of files and/or modules. + * + * @param[in] load_default Whether to load all debugging information for all + * loaded modules. This implies @p load_main. + * @param[in] load_main Whether to load all debugging information for the main + * module. + */ +struct drgn_error *drgn_program_load_debug_info(struct drgn_program *prog, + const char **paths, size_t n, + bool load_default, + bool load_main); + +/** + * Load debugging information for the given modules using the enabled debugging + * information finders. + */ +struct drgn_error *drgn_load_module_debug_info(struct drgn_module **modules, + size_t *num_modules); + +/** @} */ + /** * @defgroup Logging Logging * diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 7725e781c..939a1b663 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -138,6 +138,7 @@ struct drgn_dwarf_index_cu { }; DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector); +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq); @@ -372,26 +373,6 @@ drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, buffer->cu = cu; } -bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, - struct drgn_debug_info *dbinfo) -{ - state->dbinfo = dbinfo; - drgn_init_num_threads(); - state->cus = malloc_array(drgn_num_threads, sizeof(*state->cus)); - if (!state->cus) - return false; - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_init(&state->cus[i]); - return true; -} - -void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state) -{ - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_deinit(&state->cus[i]); - free(state->cus); -} - static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) { Dwarf_Attribute attr_mem, *attr; @@ -402,14 +383,20 @@ static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) } static struct drgn_error * -drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file, - enum drgn_section_index scn) +drgn_dwarf_index_read_file(struct drgn_elf_file *file, + struct drgn_dwarf_index_cu_vector *cus); + +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_elf_file *file, + enum drgn_section_index scn, + struct drgn_dwarf_index_cu_vector *cus) { struct drgn_error *err; - struct drgn_dwarf_index_cu_vector *cus = - &state->cus[omp_get_thread_num()]; + Dwarf *dwarf; + err = drgn_elf_file_get_dwarf(file, &dwarf); + if (err) + return err; Dwarf_Off off, next_off; size_t header_size; Dwarf_Half version; @@ -421,19 +408,18 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, scn == DRGN_SCN_DEBUG_TYPES ? &v4_type_signature : NULL; int ret; for (off = 0; - (ret = dwarf_next_unit(file->dwarf, off, &next_off, &header_size, + (ret = dwarf_next_unit(dwarf, off, &next_off, &header_size, &version, &abbrev_offset, &address_size, &offset_size, v4_type_signaturep, NULL)) == 0; off = next_off) { Dwarf_Die cudie; if (scn == DRGN_SCN_DEBUG_TYPES) { - if (!dwarf_offdie_types(file->dwarf, off + header_size, + if (!dwarf_offdie_types(dwarf, off + header_size, &cudie)) return drgn_error_libdw(); } else { - if (!dwarf_offdie(file->dwarf, off + header_size, - &cudie)) + if (!dwarf_offdie(dwarf, off + header_size, &cudie)) return drgn_error_libdw(); } uint8_t unit_type; @@ -459,18 +445,18 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, &split_file); if (err) return err; - err = drgn_dwarf_index_read_file(state, - split_file); + err = drgn_dwarf_index_read_file(split_file, + cus); if (err) return err; } continue; } else if (unit_type == DW_UT_skeleton) { - if (drgn_log_is_enabled(state->dbinfo->prog, + if (drgn_log_is_enabled(file->module->prog, DRGN_LOG_WARNING)) { const char *dwo_name = drgn_dwarf_dwo_name(&cudie); - drgn_log_warning(state->dbinfo->prog, + drgn_log_warning(file->module->prog, "%s: split DWARF file%s%s not found", file->path ?: "", dwo_name ? " " : "", @@ -592,15 +578,41 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, return NULL; } -struct drgn_error * -drgn_dwarf_index_read_file(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file) +static struct drgn_error * +drgn_dwarf_index_read_file(struct drgn_elf_file *file, + struct drgn_dwarf_index_cu_vector *cus) { struct drgn_error *err; - err = drgn_dwarf_index_read_cus(state, file, DRGN_SCN_DEBUG_INFO); - if (!err && file->scn_data[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(state, file, - DRGN_SCN_DEBUG_TYPES); + + for (int scn = 0; scn < DRGN_SECTION_INDEX_NUM_DWARF_INDEX; scn++) { + if (file->scns[scn]) { + Elf_Data *data; + err = drgn_elf_file_read_section(file, scn, &data); + if (err) + return err; + } + } + struct drgn_elf_file *supplementary_file = + file->module->supplementary_debug_file; + if (supplementary_file) { + err = drgn_elf_file_read_section(supplementary_file, + DRGN_SCN_DEBUG_INFO, + &file->alt_debug_info_data); + if (err) + return err; + if (supplementary_file->scns[DRGN_SCN_DEBUG_STR]) { + err = drgn_elf_file_read_section(supplementary_file, + DRGN_SCN_DEBUG_STR, + &file->alt_debug_str_data); + if (err) + return err; + } + } + + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus); + if (!err && file->scns[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, + cus); } return err; } @@ -1798,24 +1810,33 @@ drgn_dwarf_base_type_map_merge(struct drgn_dwarf_base_type_map *dst, return err; } -struct drgn_error * -drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) +static struct drgn_error * +drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) { - struct drgn_debug_info *dbinfo = state->dbinfo; - struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; + if (!dbinfo->modules_pending_indexing) + return NULL; if (dbinfo->dwarf.global.saved_err) return drgn_error_copy(dbinfo->dwarf.global.saved_err); - size_t new_cus_size = drgn_dwarf_index_cu_vector_size(cus); - for (int i = 0; i < drgn_num_threads; i++) - new_cus_size += drgn_dwarf_index_cu_vector_size(&state->cus[i]); - if (new_cus_size == drgn_dwarf_index_cu_vector_size(cus)) - return NULL; + drgn_init_num_threads(); - // Per-thread array of maps to populate. Thread 0 uses the maps in the - // dbinfo directly. These are merged into the dbinfo and freed. + _cleanup_(drgn_module_vector_deinit) + struct drgn_module_vector modules = VECTOR_INIT; + { + struct drgn_module *module = dbinfo->modules_pending_indexing; + do { + if (!drgn_module_vector_append(&modules, &module)) + return &drgn_enomem; + module = module->pending_indexing_next; + } while (module); + } + + // Per-thread structures to populate. Thread 0 uses the structures in + // the dbinfo directly. These are merged into the dbinfo and freed. _cleanup_free_ union { + // For reading modules. + struct drgn_dwarf_index_cu_vector cus; // For first pass. struct drgn_dwarf_specification_map specifications; // For second pass. @@ -1823,19 +1844,69 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) struct drgn_dwarf_index_die_map map[DRGN_DWARF_INDEX_MAP_SIZE]; struct drgn_dwarf_base_type_map base_types; }; - } *maps = NULL; + } *threads = NULL; if (drgn_num_threads > 1) { - maps = malloc_array(drgn_num_threads - 1, sizeof(maps[0])); - if (!maps) + threads = malloc_array(drgn_num_threads - 1, sizeof(threads[0])); + if (!threads) return &drgn_enomem; } - if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) - return &drgn_enomem; - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_extend(cus, &state->cus[i]); + size_t old_cus_size = + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); struct drgn_error *err = NULL; + #pragma omp parallel num_threads(drgn_num_threads) + { + struct drgn_dwarf_index_cu_vector *cus; + int thread_num = omp_get_thread_num(); + if (thread_num == 0) { + cus = &dbinfo->dwarf.index_cus; + } else { + cus = &threads[thread_num - 1].cus; + drgn_dwarf_index_cu_vector_init(cus); + } + + #pragma omp for schedule(dynamic) + for (size_t i = 0; i < drgn_module_vector_size(&modules); i++) { + struct drgn_module *module = + *drgn_module_vector_at(&modules, i); + if (err) + continue; + struct drgn_error *module_err = + drgn_dwarf_index_read_file(module->debug_file, + cus); + if (module_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(module_err); + else + err = module_err; + } + } + } + if (err) + goto err; + + struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; + + size_t new_cus_size = drgn_dwarf_index_cu_vector_size(cus); + for (int i = 0; i < drgn_num_threads - 1; i++) + new_cus_size += drgn_dwarf_index_cu_vector_size(&threads[i].cus); + if (new_cus_size == old_cus_size) + return NULL; + + if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) { + for (int i = 0; i < drgn_num_threads - 1; i++) + drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + err = &drgn_enomem; + goto err; + } + + for (int i = 0; i < drgn_num_threads - 1; i++) { + drgn_dwarf_index_cu_vector_extend(cus, &threads[i].cus); + drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + } + #pragma omp parallel num_threads(drgn_num_threads) { struct drgn_dwarf_specification_map *specifications; @@ -1843,7 +1914,7 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) if (thread_num == 0) { specifications = &dbinfo->dwarf.specifications; } else { - specifications = &maps[thread_num - 1].specifications; + specifications = &threads[thread_num - 1].specifications; drgn_dwarf_specification_map_init(specifications); } @@ -1873,7 +1944,7 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) } for (int i = 0; i < drgn_num_threads - 1; i++) { err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, - &maps[i].specifications, + &threads[i].specifications, err); } if (err) @@ -1890,10 +1961,10 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) map = dbinfo->dwarf.global.map; base_types = &dbinfo->dwarf.base_types; } else { - array_for_each(tag_map, maps[thread_num - 1].map) + array_for_each(tag_map, threads[thread_num - 1].map) drgn_dwarf_index_die_map_init(tag_map); - map = maps[thread_num - 1].map; - base_types = &maps[thread_num - 1].base_types; + map = threads[thread_num - 1].map; + base_types = &threads[thread_num - 1].base_types; drgn_dwarf_base_type_map_init(base_types); } @@ -1926,14 +1997,14 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) for (int j = 0; j < drgn_num_threads - 1; j++) { thread_err = drgn_dwarf_index_die_map_merge(&dbinfo->dwarf.global.map[i], - &maps[j].map[i], + &threads[j].map[i], thread_err); } } else { for (int j = 0; j < drgn_num_threads - 1; j++) { thread_err = drgn_dwarf_base_type_map_merge(&dbinfo->dwarf.base_types, - &maps[j].base_types, + &threads[j].base_types, thread_err); } } @@ -1955,13 +2026,16 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) qsort(drgn_dwarf_index_cu_vector_begin(cus), drgn_dwarf_index_cu_vector_size(cus), sizeof(struct drgn_dwarf_index_cu), drgn_dwarf_index_cu_cmp); + dbinfo->modules_pending_indexing = NULL; dbinfo->dwarf.global.cus_indexed = drgn_dwarf_index_cu_vector_size(cus); return NULL; } -static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index *ns) { + struct drgn_error *err; + size_t num_index_cus = drgn_dwarf_index_cu_vector_size(&ns->dbinfo->dwarf.index_cus); if (ns->cus_indexed >= num_index_cus) @@ -1972,12 +2046,10 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) // The parent namespace must be indexed first so that the DIEs for this // namespace are populated. - struct drgn_error *err = index_namespace(ns->parent); + err = index_namespace_impl(ns->parent); if (err) return err; - drgn_blocking_guard(ns->dbinfo->prog); - struct drgn_dwarf_index_die_vector *die_vectors_to_index[DRGN_DWARF_INDEX_NUM_NAMESPACE_TAGS]; int tags_to_index[DRGN_DWARF_INDEX_NUM_NAMESPACE_TAGS]; @@ -2086,6 +2158,26 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) return NULL; } +static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +{ + if (!ns->dbinfo->modules_pending_indexing + && (ns->cus_indexed + >= drgn_dwarf_index_cu_vector_size(&ns->dbinfo->dwarf.index_cus))) + return NULL; + + drgn_blocking_guard(ns->dbinfo->prog); + + struct drgn_error *err = drgn_dwarf_index_update(ns->dbinfo); + if (err) + return err; + return index_namespace_impl(ns); +} + +struct drgn_error *drgn_dwarf_info_update_index(struct drgn_debug_info *dbinfo) +{ + return index_namespace(&dbinfo->dwarf.global); +} + /** * Iterator over DWARF debugging information. * @@ -2285,28 +2377,29 @@ static struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, return NULL; } -struct drgn_error * -drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, - const struct drgn_language **ret) +const struct drgn_language * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo) { struct drgn_error *err; struct drgn_dwarf_index_iterator it; const enum drgn_dwarf_index_tag tag = DRGN_DWARF_INDEX_subprogram; err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, "main", strlen("main"), &tag, 1); - if (err) - return err; + if (err) { + drgn_error_destroy(err); + return NULL; + } Dwarf_Die die; while (drgn_dwarf_index_iterator_next(&it, &die, NULL)) { - err = drgn_language_from_die(&die, false, ret); + const struct drgn_language *lang; + err = drgn_language_from_die(&die, false, &lang); if (err) { drgn_error_destroy(err); continue; } - if (*ret) - return NULL; + if (lang) + return lang; } - *ret = NULL; return NULL; } @@ -2531,7 +2624,10 @@ struct drgn_error *drgn_module_find_dwarf_scopes(struct drgn_module *module, *length_ret = 0; return NULL; } - Dwarf *dwarf = module->debug_file->dwarf; + Dwarf *dwarf; + err = drgn_elf_file_get_dwarf(module->debug_file, &dwarf); + if (err) + return err; *bias_ret = module->debug_file_bias; pc -= module->debug_file_bias; @@ -2772,16 +2868,17 @@ static struct drgn_error *drgn_dwarf_next_addrx(struct binary_buffer *bb, return drgn_error_create(DRGN_ERROR_OTHER, "indirect address without .debug_addr section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_ADDR); + Elf_Data *data; + err = drgn_elf_file_read_section(file, DRGN_SCN_DEBUG_ADDR, &data); if (err) return err; - if (base > file->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size) { + if (base > data->d_size) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_addr_base is out of bounds"); } - *addr_base = (char *)file->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; + *addr_base = (char *)data->d_buf + base; // In DWARF 5, there is a header immediately before addr_base, // which ends with a segment selector size. We don't support a // segment selector yet. In GNU Debug Fission, .debug_addr @@ -2804,6 +2901,7 @@ static struct drgn_error *drgn_dwarf_next_addrx(struct binary_buffer *bb, if ((err = binary_buffer_next_uleb128(bb, &index))) return err; + // The data must was cached when we cached addr_base. Elf_Data *data = file->scn_data[DRGN_SCN_DEBUG_ADDR]; if (index >= ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { @@ -2849,10 +2947,10 @@ static struct drgn_error *drgn_dwarf_read_loclistx(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "DW_FORM_loclistx without .debug_loclists section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOCLISTS); + Elf_Data *data; + err = drgn_elf_file_read_section(file, DRGN_SCN_DEBUG_LOCLISTS, &data); if (err) return err; - Elf_Data *data = file->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; if (base > data->d_size) { return drgn_error_create(DRGN_ERROR_OTHER, @@ -2893,12 +2991,11 @@ static struct drgn_error *drgn_dwarf5_location_list(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "loclist without .debug_loclists section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOCLISTS); - if (err) - return err; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclist is out of bounds"); @@ -3031,17 +3128,16 @@ drgn_dwarf4_split_location_list(struct drgn_elf_file *file, Dwarf_Word offset, return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr without .debug_loc section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOC); - if (err) - return err; Dwarf_Off dwp_offset; if (dwarf_cu_dwp_section_info(cu_die->cu, DW_SECT_LOCLISTS, &dwp_offset, NULL)) return drgn_error_libdw(); offset += dwp_offset; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOC); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr is out of bounds"); @@ -3146,12 +3242,11 @@ static struct drgn_error *drgn_dwarf4_location_list(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr without .debug_loc section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOC); - if (err) - return err; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOC); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr is out of bounds"); @@ -4539,17 +4634,17 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); err = NULL; } else if (bit_offset >= 0) { - Dwarf_Addr start, end, bias; - dwfl_module_info(file->module->dwfl_module, NULL, &start, &end, - &bias, NULL, NULL, NULL); + uint64_t biased_address = + address + file->module->debug_file_bias; /* * If the address is not in the module's address range, then * it's probably something special like a Linux per-CPU variable * (which isn't actually a variable address but an offset). * Don't apply the bias in that case. */ - if (start <= address + bias && address + bias < end) - address += bias; + if (file->module->start <= biased_address + && biased_address < file->module->end) + address = biased_address; err = drgn_object_set_reference_internal(ret, &type, address, bit_offset); } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { @@ -6623,10 +6718,6 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, &file->module->dwarf.datarel_base); } - err = drgn_elf_file_cache_section(file, scn); - if (err) - return err; - _cleanup_(drgn_dwarf_cie_vector_deinit) struct drgn_dwarf_cie_vector cies = VECTOR_INIT; _cleanup_(drgn_dwarf_fde_vector_deinit) @@ -6634,9 +6725,10 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, _cleanup_(drgn_dwarf_cie_map_deinit) struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; - Elf_Data *data = file->scn_data[scn]; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, scn); + err = drgn_elf_file_section_buffer_read(&buffer, file, scn); + if (err) + return err; while (binary_buffer_has_next(&buffer.bb)) { uint32_t tmp; if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) @@ -6688,13 +6780,13 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, size_t pointer_offset = (buffer.bb.pos - (is_64_bit ? 8 : 4) - - (char *)data->d_buf); + - (char *)buffer.data->d_buf); if (cie_pointer > pointer_offset) { return binary_buffer_error(&buffer.bb, "CIE pointer is out of bounds"); } cie_pointer = pointer_offset - cie_pointer; - } else if (cie_pointer > data->d_size) { + } else if (cie_pointer > buffer.data->d_size) { return binary_buffer_error(&buffer.bb, "CIE pointer is out of bounds"); } @@ -6753,7 +6845,8 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, } buffer.bb.pos = buffer.bb.end; - buffer.bb.end = (const char *)data->d_buf + data->d_size; + buffer.bb.end = (const char *)buffer.data->d_buf + + buffer.data->d_size; } drgn_dwarf_cie_vector_shrink_to_fit(&cies); diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h index 81761efcd..50f51dfc6 100644 --- a/libdrgn/dwarf_info.h +++ b/libdrgn/dwarf_info.h @@ -216,39 +216,7 @@ struct drgn_dwarf_info { void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo); void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo); -/** - * State tracked while indexing new DWARF information in a @ref drgn_dwarf_info. - */ -struct drgn_dwarf_index_state { - struct drgn_debug_info *dbinfo; - /** Per-thread arrays of CUs to be indexed. */ - struct drgn_dwarf_index_cu_vector *cus; -}; - -/** - * Initialize state for indexing new DWARF information. - * - * @return @c true on success, @c false on failure to allocate memory. - */ -bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, - struct drgn_debug_info *dbinfo); - -/** Deinitialize state for indexing new DWARF information. */ -void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state); - -/** Read a @ref drgn_elf_file to index its DWARF information. */ -struct drgn_error * -drgn_dwarf_index_read_file(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file); - -/** - * Index new DWARF information. - * - * This should be called once all files have been read with @ref - * drgn_dwarf_index_read_file() to finish indexing those files. - */ -struct drgn_error * -drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state); +struct drgn_error *drgn_dwarf_info_update_index(struct drgn_debug_info *dbinfo); /** * Find the DWARF DIEs in a @ref drgn_module for the scope containing a given diff --git a/libdrgn/elf_file.c b/libdrgn/elf_file.c index bf5952b6b..4b3af027d 100644 --- a/libdrgn/elf_file.c +++ b/libdrgn/elf_file.c @@ -3,13 +3,16 @@ #include #include +#include #include #include #include #include #include +#include #include "array.h" +#include "debug_info.h" #include "drgn_internal.h" #include "elf_file.h" #include "error.h" @@ -35,6 +38,16 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) return NULL; } +void truncate_elf_string_data(Elf_Data *data) +{ + const char *buf = data->d_buf; + const char *nul = memrchr(buf, '\0', data->d_size); + if (nul) + data->d_size = nul - buf + 1; + else + data->d_size = 0; +} + #include "drgn_section_name_to_index.inc" enum drgn_dwarf_file_type { @@ -45,163 +58,519 @@ enum drgn_dwarf_file_type { }; struct drgn_error *drgn_elf_file_create(struct drgn_module *module, - const char *path, Elf *elf, - struct drgn_elf_file **ret) + const char *path, int fd, char *image, + Elf *elf, struct drgn_elf_file **ret) { - struct drgn_error *err; + if (elf_kind(elf) != ELF_K_ELF) + return drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); if (!ehdr) return drgn_error_libelf(); - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - struct drgn_elf_file *file = calloc(1, sizeof(*file)); + _cleanup_free_ struct drgn_elf_file *file = calloc(1, sizeof(*file)); if (!file) return &drgn_enomem; - file->module = module; - file->path = path; - file->elf = elf; - drgn_platform_from_elf(ehdr, &file->platform); - // We mimic libdw's logic for choosing debug sections: we either use all - // .debug_* or .zdebug_* sections (DRGN_DWARF_FILE_PLAIN), all - // .debug_*.dwo or .zdebug_*.dwo sections (DRGN_DWARF_FILE_DWO), or all - // .gnu.debuglto_.debug_* sections (DRGN_DWARF_FILE_GNU_LTO), in that - // order of preference. - enum drgn_dwarf_file_type dwarf_file_type = DRGN_DWARF_FILE_NONE; - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto err; - } - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) { - err = drgn_error_libelf(); - goto err; - } + if (ehdr->e_type == ET_EXEC || + ehdr->e_type == ET_DYN || + ehdr->e_type == ET_REL) { + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); - enum drgn_dwarf_file_type dwarf_section_type; - if (strcmp(scnname, ".debug_cu_index") == 0 || - strcmp(scnname, ".debug_tu_index") == 0) { - dwarf_section_type = DRGN_DWARF_FILE_DWO; - } else if (strstartswith(scnname, ".debug_") || - strstartswith(scnname, ".zdebug_")) { - if (strcmp(scnname + strlen(scnname) - 4, ".dwo") == 0) + bool has_sections = false; + bool has_alloc_section = false; + // We mimic libdw's logic for choosing debug sections: we either + // use all .debug_* or .zdebug_* sections + // (DRGN_DWARF_FILE_PLAIN), all .debug_*.dwo or .zdebug_*.dwo + // sections (DRGN_DWARF_FILE_DWO), or all .gnu.debuglto_.debug_* + // sections (DRGN_DWARF_FILE_GNU_LTO), in that order of + // preference. + enum drgn_dwarf_file_type dwarf_file_type = DRGN_DWARF_FILE_NONE; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + has_sections = true; + if (shdr->sh_type != SHT_NOBITS && + shdr->sh_type != SHT_NOTE && + (shdr->sh_flags & SHF_ALLOC)) + has_alloc_section = true; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); + + enum drgn_dwarf_file_type dwarf_section_type; + if (strcmp(scnname, ".debug_cu_index") == 0 || + strcmp(scnname, ".debug_tu_index") == 0) { dwarf_section_type = DRGN_DWARF_FILE_DWO; - else - dwarf_section_type = DRGN_DWARF_FILE_PLAIN; - } else if (strstartswith(scnname, ".gnu.debuglto_.debug")) { - dwarf_section_type = DRGN_DWARF_FILE_GNU_LTO; - } else { - dwarf_section_type = DRGN_DWARF_FILE_NONE; + } else if (strstartswith(scnname, ".debug_") || + strstartswith(scnname, ".zdebug_")) { + if (strcmp(scnname + strlen(scnname) - 4, ".dwo") == 0) + dwarf_section_type = DRGN_DWARF_FILE_DWO; + else + dwarf_section_type = DRGN_DWARF_FILE_PLAIN; + } else if (strstartswith(scnname, ".gnu.debuglto_.debug")) { + dwarf_section_type = DRGN_DWARF_FILE_GNU_LTO; + } else { + dwarf_section_type = DRGN_DWARF_FILE_NONE; + } + dwarf_file_type = max(dwarf_file_type, dwarf_section_type); } - dwarf_file_type = max(dwarf_file_type, dwarf_section_type); - } - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto err; - } + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); - if (shdr->sh_type != SHT_PROGBITS) - continue; + if (shdr->sh_type != SHT_PROGBITS) + continue; - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) { - err = drgn_error_libelf(); - goto err; - } + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); - enum drgn_section_index index; - if (strstartswith(scnname, ".debug_") || - strstartswith(scnname, ".zdebug_")) { - const char *subname; - if (strstartswith(scnname, ".zdebug_")) - subname = scnname + sizeof(".zdebug_") - 1; - else - subname = scnname + sizeof(".debug_") - 1; - size_t len = strlen(subname); - if (len >= 4 - && strcmp(subname + len - 4, ".dwo") == 0) { - if (dwarf_file_type != DRGN_DWARF_FILE_DWO) + enum drgn_section_index index; + if (strstartswith(scnname, ".debug_") || + strstartswith(scnname, ".zdebug_")) { + const char *subname; + if (strstartswith(scnname, ".zdebug_")) + subname = scnname + sizeof(".zdebug_") - 1; + else + subname = scnname + sizeof(".debug_") - 1; + size_t len = strlen(subname); + if (len >= 4 + && strcmp(subname + len - 4, ".dwo") == 0) { + if (dwarf_file_type != DRGN_DWARF_FILE_DWO) + continue; + len -= 4; + } else if (dwarf_file_type != DRGN_DWARF_FILE_PLAIN) { continue; - len -= 4; - } else if (dwarf_file_type != DRGN_DWARF_FILE_PLAIN) { - continue; + } + index = drgn_debug_section_name_to_index(subname, len); + } else if (strstartswith(scnname, ".gnu.debuglto_.debug_")) { + if (dwarf_file_type != DRGN_DWARF_FILE_GNU_LTO) + continue; + const char *subname = + scnname + sizeof(".gnu.debuglto_.debug_") - 1; + index = drgn_debug_section_name_to_index(subname, + strlen(subname)); + } else if (strcmp(scnname, ".init.text") == 0) { + // We consider a file to be vmlinux if it has an + // .init.text section and is not relocatable + // (which excludes kernel modules). + file->is_vmlinux = ehdr->e_type != ET_REL; + index = DRGN_SECTION_INDEX_NUM; + } else { + index = drgn_non_debug_section_name_to_index(scnname); } - index = drgn_debug_section_name_to_index(subname, len); - } else if (strstartswith(scnname, ".gnu.debuglto_.debug_")) { - if (dwarf_file_type != DRGN_DWARF_FILE_GNU_LTO) - continue; - const char *subname = - scnname + sizeof(".gnu.debuglto_.debug_") - 1; - index = drgn_debug_section_name_to_index(subname, - strlen(subname)); + if (index < DRGN_SECTION_INDEX_NUM && !file->scns[index]) + file->scns[index] = scn; + } + + if (ehdr->e_type == ET_REL) { + // We consider a relocatable file "loadable" if it has + // any allocated sections. + file->is_loadable = has_alloc_section; + file->is_relocatable = file->needs_relocation = true; } else { - index = drgn_non_debug_section_name_to_index(scnname); + // We consider executable and shared object files + // loadable if they have any loadable segments, and + // either no sections or at least one allocated section. + bool has_loadable_segment = false; + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = + gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_LOAD) { + has_loadable_segment = true; + break; + } + } + file->is_loadable = + has_loadable_segment && + (!has_sections || has_alloc_section); } - if (index < DRGN_SECTION_INDEX_NUM && !file->scns[index]) - file->scns[index] = scn; } - *ret = file; - return NULL; -err: - free(file); - return err; + file->module = module; + file->path = strdup(path); + if (!file->path) + return &drgn_enomem; + file->image = image; + file->fd = fd; + file->elf = elf; + drgn_platform_from_elf(ehdr, &file->platform); + *ret = no_cleanup_ptr(file); + return NULL; } void drgn_elf_file_destroy(struct drgn_elf_file *file) { - free(file); + if (file) { + dwarf_end(file->_dwarf); + elf_end(file->elf); + if (file->fd >= 0) + close(file->fd); + free(file->image); + free(file->path); + free(file); + } } -static void truncate_null_terminated_section(Elf_Data *data) +static int should_apply_relocation_section(Elf *elf, size_t shstrndx, + const GElf_Shdr *shdr) { - if (data) { - const char *buf = data->d_buf; - const char *nul = memrchr(buf, '\0', data->d_size); - if (nul) - data->d_size = nul - buf + 1; - else - data->d_size = 0; + if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) + return 0; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return -1; + if (shdr->sh_type == SHT_RELA) { + if (!strstartswith(scnname, ".rela.")) + return 0; + scnname += sizeof(".rela.") - 1; + } else { + if (!strstartswith(scnname, ".rel.")) + return 0; + scnname += sizeof(".rel.") - 1; } + return (strstartswith(scnname, "debug_") + || strstartswith(scnname, "orc_")); } -struct drgn_error *drgn_elf_file_precache_sections(struct drgn_elf_file *file) +static inline struct drgn_error *get_reloc_sym_value(const void *syms, + size_t num_syms, + const uint64_t *sh_addrs, + size_t shdrnum, + bool is_64_bit, + bool bswap, + uint32_t r_sym, + uint64_t *ret) +{ + if (r_sym >= num_syms) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF relocation symbol"); + } + uint16_t st_shndx; + uint64_t st_value; + if (is_64_bit) { + const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + memcpy(&st_value, &sym->st_value, sizeof(st_value)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value = bswap_64(st_value); + } + } else { + const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + uint32_t st_value32; + memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value32 = bswap_32(st_value32); + } + st_value = st_value32; + } + if (st_shndx >= shdrnum) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF symbol section index"); + } + *ret = sh_addrs[st_shndx] + st_value; + return NULL; +} + +static struct drgn_error * +apply_elf_relas(const struct drgn_relocating_section *relocating, + Elf_Data *reloc_data, Elf_Data *symtab_data, + const uint64_t *sh_addrs, size_t shdrnum, + const struct drgn_platform *platform) { struct drgn_error *err; - for (size_t i = 0; i < DRGN_SECTION_INDEX_NUM_PRECACHE; i++) { - if (file->scns[i]) { - err = read_elf_section(file->scns[i], - &file->scn_data[i]); - if (err) - return err; + bool is_64_bit = drgn_platform_is_64_bit(platform); + bool bswap = drgn_platform_bswap(platform); + apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; + + const void *relocs = reloc_data->d_buf; + size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); + size_t num_relocs = reloc_data->d_size / reloc_size; + + const void *syms = symtab_data->d_buf; + size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + size_t num_syms = symtab_data->d_size / sym_size; + + for (size_t i = 0; i < num_relocs; i++) { + uint64_t r_offset; + uint32_t r_sym; + uint32_t r_type; + int64_t r_addend; + if (is_64_bit) { + const Elf64_Rela *rela = (Elf64_Rela *)relocs + i; + uint64_t r_info; + memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); + memcpy(&r_info, &rela->r_info, sizeof(r_info)); + memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); + if (bswap) { + r_offset = bswap_64(r_offset); + r_info = bswap_64(r_info); + r_addend = bswap_64(r_addend); + } + r_sym = ELF64_R_SYM(r_info); + r_type = ELF64_R_TYPE(r_info); + } else { + const Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; + uint32_t r_offset32; + uint32_t r_info32; + int32_t r_addend32; + memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); + memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); + memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); + if (bswap) { + r_offset32 = bswap_32(r_offset32); + r_info32 = bswap_32(r_info32); + r_addend32 = bswap_32(r_addend32); + } + r_offset = r_offset32; + r_sym = ELF32_R_SYM(r_info32); + r_type = ELF32_R_TYPE(r_info32); + r_addend = r_addend32; } + uint64_t sym_value; + err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, + is_64_bit, bswap, r_sym, &sym_value); + if (err) + return err; + + err = apply_elf_reloc(relocating, r_offset, r_type, &r_addend, + sym_value); + if (err) + return err; } + return NULL; +} + +static struct drgn_error * +apply_elf_rels(const struct drgn_relocating_section *relocating, + Elf_Data *reloc_data, Elf_Data *symtab_data, + const uint64_t *sh_addrs, size_t shdrnum, + const struct drgn_platform *platform) +{ + struct drgn_error *err; + + bool is_64_bit = drgn_platform_is_64_bit(platform); + bool bswap = drgn_platform_bswap(platform); + apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; + + const void *relocs = reloc_data->d_buf; + size_t reloc_size = is_64_bit ? sizeof(Elf64_Rel) : sizeof(Elf32_Rel); + size_t num_relocs = reloc_data->d_size / reloc_size; + + const void *syms = symtab_data->d_buf; + size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + size_t num_syms = symtab_data->d_size / sym_size; + + for (size_t i = 0; i < num_relocs; i++) { + uint64_t r_offset; + uint32_t r_sym; + uint32_t r_type; + if (is_64_bit) { + const Elf64_Rel *rel = (Elf64_Rel *)relocs + i; + uint64_t r_info; + memcpy(&r_offset, &rel->r_offset, sizeof(r_offset)); + memcpy(&r_info, &rel->r_info, sizeof(r_info)); + if (bswap) { + r_offset = bswap_64(r_offset); + r_info = bswap_64(r_info); + } + r_sym = ELF64_R_SYM(r_info); + r_type = ELF64_R_TYPE(r_info); + } else { + const Elf32_Rel *rel32 = (Elf32_Rel *)relocs + i; + uint32_t r_offset32; + uint32_t r_info32; + memcpy(&r_offset32, &rel32->r_offset, sizeof(r_offset32)); + memcpy(&r_info32, &rel32->r_info, sizeof(r_info32)); + if (bswap) { + r_offset32 = bswap_32(r_offset32); + r_info32 = bswap_32(r_info32); + } + r_offset = r_offset32; + r_sym = ELF32_R_SYM(r_info32); + r_type = ELF32_R_TYPE(r_info32); + } + uint64_t sym_value; + err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, + is_64_bit, bswap, r_sym, &sym_value); + if (err) + return err; - /* - * Truncate any extraneous bytes so that we can assume that a pointer - * within .debug_{,line_}str is always null-terminated. - */ - truncate_null_terminated_section(file->scn_data[DRGN_SCN_DEBUG_STR]); - truncate_null_terminated_section(file->alt_debug_str_data); + err = apply_elf_reloc(relocating, r_offset, r_type, NULL, + sym_value); + if (err) + return err; + } return NULL; } struct drgn_error * -drgn_elf_file_cache_section(struct drgn_elf_file *file, enum drgn_section_index scn) +drgn_elf_file_apply_relocations(struct drgn_elf_file *file) { - if (file->scn_data[scn]) + struct drgn_error *err; + + if (!file->needs_relocation) return NULL; - return read_elf_section(file->scns[scn], &file->scn_data[scn]); + + if (!file->platform.arch->apply_elf_reloc) { + return drgn_error_format(DRGN_ERROR_NOT_IMPLEMENTED, + "relocation support is not implemented for %s architecture", + file->platform.arch->name); + } + + Elf *elf = file->elf; + size_t shdrnum; + if (elf_getshdrnum(elf, &shdrnum)) + return drgn_error_libelf(); + _cleanup_free_ uint64_t *sh_addrs = + calloc(shdrnum, sizeof(sh_addrs[0])); + if (!sh_addrs && shdrnum > 0) + return &drgn_enomem; + + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; + } + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + Elf_Scn *reloc_scn = NULL; + while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { + GElf_Shdr *reloc_shdr, reloc_shdr_mem; + reloc_shdr = gelf_getshdr(reloc_scn, &reloc_shdr_mem); + if (!reloc_shdr) + return drgn_error_libelf(); + + int r = should_apply_relocation_section(elf, shstrndx, + reloc_shdr); + if (r < 0) + return drgn_error_libelf(); + if (r) { + scn = elf_getscn(elf, reloc_shdr->sh_info); + if (!scn) + return drgn_error_libelf(); + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if (shdr->sh_type == SHT_NOBITS) + continue; + + Elf_Scn *symtab_scn = elf_getscn(elf, + reloc_shdr->sh_link); + if (!symtab_scn) + return drgn_error_libelf(); + shdr = gelf_getshdr(symtab_scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if (shdr->sh_type == SHT_NOBITS) { + return drgn_error_create(DRGN_ERROR_OTHER, + "relocation symbol table has no data"); + } + + Elf_Data *data, *reloc_data, *symtab_data; + if ((err = read_elf_section(scn, &data)) + || (err = read_elf_section(reloc_scn, &reloc_data)) + || (err = read_elf_section(symtab_scn, &symtab_data))) + return err; + + struct drgn_relocating_section relocating = { + .buf = data->d_buf, + .buf_size = data->d_size, + .addr = sh_addrs[elf_ndxscn(scn)], + .bswap = drgn_platform_bswap(&file->platform), + }; + + if (reloc_shdr->sh_type == SHT_RELA) { + err = apply_elf_relas(&relocating, reloc_data, + symtab_data, sh_addrs, + shdrnum, &file->platform); + } else { + err = apply_elf_rels(&relocating, reloc_data, + symtab_data, sh_addrs, + shdrnum, &file->platform); + } + if (err) + return err; + } + } + file->needs_relocation = false; + return NULL; +} + +struct drgn_error *drgn_elf_file_read_section(struct drgn_elf_file *file, + enum drgn_section_index scn, + Elf_Data **ret) +{ + struct drgn_error *err; + if (!file->scn_data[scn]) { + err = drgn_elf_file_apply_relocations(file); + if (err) + return err; + err = read_elf_section(file->scns[scn], &file->scn_data[scn]); + if (err) + return err; + if (scn == DRGN_SCN_DEBUG_STR) + truncate_elf_string_data(file->scn_data[scn]); + } + *ret = file->scn_data[scn]; + return NULL; +} + +struct drgn_error *drgn_elf_file_get_dwarf(struct drgn_elf_file *file, + Dwarf **ret) +{ + struct drgn_error *err; + if (!file->_dwarf) { + struct drgn_elf_file *supplementary_file = + file->module->supplementary_debug_file; + if (supplementary_file) { + supplementary_file->_dwarf = + dwarf_begin_elf(supplementary_file->elf, + DWARF_C_READ, NULL); + if (!supplementary_file->_dwarf) + return drgn_error_libdw(); + } + + err = drgn_elf_file_apply_relocations(file); + if (err) + return err; + + file->_dwarf = dwarf_begin_elf(file->elf, DWARF_C_READ, NULL); + if (!file->_dwarf) + return drgn_error_libdw(); + + if (supplementary_file) + dwarf_setalt(file->_dwarf, supplementary_file->_dwarf); + } + *ret = file->_dwarf; + return NULL; } struct drgn_error * @@ -281,3 +650,114 @@ struct drgn_error *drgn_elf_file_section_buffer_error(struct binary_buffer *bb, return drgn_elf_file_section_error(buffer->file, buffer->scn, buffer->data, ptr, message); } + +static bool elf_address_range_from_first_and_last_segment(Elf *elf, + uint64_t *start_ret, + uint64_t *end_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return false; + + uint64_t start; + GElf_Phdr phdr_mem, *phdr; + size_t i; + for (i = 0; i < phnum; i++) { + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + if (phdr->p_type == PT_LOAD) { + start = phdr->p_vaddr; + break; + } + } + if (i >= phnum) { + *start_ret = *end_ret = 0; + return true; + } + + for (i = phnum; i-- > 0;) { + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + + if (phdr->p_type == PT_LOAD) { + uint64_t end = phdr->p_vaddr + phdr->p_memsz; + if (start < end) { + *start_ret = start; + *end_ret = end; + return true; + } + break; + } + } + *start_ret = *end_ret = 0; + return true; +} + +static bool elf_address_range_from_min_and_max_segment(Elf *elf, + uint64_t *start_ret, + uint64_t *end_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return false; + + uint64_t start = UINT64_MAX, end = 0; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + if (phdr->p_type == PT_LOAD) { + start = min(start, phdr->p_vaddr); + end = max(end, phdr->p_vaddr + phdr->p_memsz); + } + } + if (start < end) { + *start_ret = start; + *end_ret = end; + } else { + *start_ret = *end_ret = 0; + } + return true; +} + +bool drgn_elf_file_address_range(struct drgn_elf_file *file, + uint64_t *start_ret, uint64_t *end_ret) +{ + // The ELF specification says that "loadable segment entries in the + // program header table appear in ascending order, sorted on the p_vaddr + // member." However, this is not the case in practice. + // + // vmlinux on some architectures contains special segments whose + // addresses are not meaningful and break the sorted order (e.g., + // segments corresponding to the .data..percpu section on x86-64 and the + // .vectors and .stubs sections on Arm). It appears that segments in + // vmlinux are sorted other than those special segments, and the special + // segments are never the first or last segment. + // + // Userspace ELF loaders disagree about whether to assume sorted order: + // + // - As of Linux kernel commit 10b19249192a ("ELF: fix overflow in total + // mapping size calculation") (in v5.18), the Linux kernel DOES NOT + // assume sorting. Before that, it DOES. + // - glibc as of v2.40 DOES assume sorting; see _dl_map_object_from_fd() + // in elf/dl-load.c and _dl_map_segments() in elf/dl-map-segments.h. + // - musl as of v1.2.5 DOES NOT assume sorting; see map_library() in + // ldso/dynlink.c. + // + // So, we use a heuristic: if the file has an .init.text section, then + // it is probably a vmlinux file, so we assume the sorted order, which + // allows us to ignore the special segments in the middle. + // + // Otherwise, we don't assume the sorted order. + if (file->is_vmlinux) { + return elf_address_range_from_first_and_last_segment(file->elf, + start_ret, + end_ret); + } else { + return elf_address_range_from_min_and_max_segment(file->elf, + start_ret, + end_ret); + } +} diff --git a/libdrgn/elf_file.h b/libdrgn/elf_file.h index ca2b6f3eb..386d6409c 100644 --- a/libdrgn/elf_file.h +++ b/libdrgn/elf_file.h @@ -43,6 +43,14 @@ struct drgn_module; */ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); +/** + * Truncate any bytes beyond the last null character in an ELF string table. + * + * This sets `data->d_size` so that any string table index less than + * `data->d_size` is guaranteed to be valid. + */ +void truncate_elf_string_data(Elf_Data *data); + static inline bool elf_data_contains_ptr(Elf_Data *data, const void *ptr) { uintptr_t bufi = (uintptr_t)data->d_buf; @@ -55,11 +63,38 @@ struct drgn_elf_file { /** Module using this file. */ struct drgn_module *module; /** Filesystem path to this file. */ - const char *path; + char *path; + /** + * Memory image backing @ref elf. + * + * @c NULL if not backed by a memory image. + */ + char *image; + /** + * File descriptor backing @ref elf. + * + * -1 if not backed by a file. + */ + int fd; + /** Whether the file is loadable. */ + bool is_loadable; + /** Whether the file is relocatable. */ + bool is_relocatable; + /** Whether the file still need to have relocations applied. */ + bool needs_relocation; + /** Whether the file is a Linux kernel image (`vmlinux`). */ + bool is_vmlinux; /** libelf handle. */ Elf *elf; - /** libdw handle if we're using DWARF information from this file. */ - Dwarf *dwarf; + /** + * libdw handle. + * + * @c NULL if not yet created. + * + * Don't access this directly. Get it with @ref + * drgn_elf_file_get_dwarf() instead. + */ + Dwarf *_dwarf; /** * Platform of this file. * @@ -86,16 +121,33 @@ struct drgn_elf_file { Elf_Data *alt_debug_str_data; }; +/** + * Create a @ref drgn_elf_file. + * + * On success, this takes ownership of @p fd, @p image, and @p elf. @p path is + * copied. + */ struct drgn_error *drgn_elf_file_create(struct drgn_module *module, - const char *path, Elf *elf, - struct drgn_elf_file **ret); + const char *path, int fd, char *image, + Elf *elf, struct drgn_elf_file **ret); void drgn_elf_file_destroy(struct drgn_elf_file *file); -struct drgn_error *drgn_elf_file_precache_sections(struct drgn_elf_file *file); - +/** Apply ELF relocations to the file if needed. */ struct drgn_error * -drgn_elf_file_cache_section(struct drgn_elf_file *file, enum drgn_section_index scn); +drgn_elf_file_apply_relocations(struct drgn_elf_file *file); + +/** + * Read an indexed ELF section. + * + * This applies ELF relocations to the file first if needed. + */ +struct drgn_error *drgn_elf_file_read_section(struct drgn_elf_file *file, + enum drgn_section_index scn, + Elf_Data **ret); + +struct drgn_error *drgn_elf_file_get_dwarf(struct drgn_elf_file *file, + Dwarf **ret); static inline bool drgn_elf_file_is_little_endian(const struct drgn_elf_file *file) @@ -108,6 +160,12 @@ static inline bool drgn_elf_file_bswap(const struct drgn_elf_file *file) return drgn_platform_bswap(&file->platform); } +static inline bool +drgn_elf_file_is_64_bit(const struct drgn_elf_file *file) +{ + return drgn_platform_is_64_bit(&file->platform); +} + static inline uint8_t drgn_elf_file_address_size(const struct drgn_elf_file *file) { @@ -120,6 +178,12 @@ drgn_elf_file_address_mask(const struct drgn_elf_file *file) return drgn_platform_address_mask(&file->platform); } +static inline bool drgn_elf_file_has_dwarf(const struct drgn_elf_file *file) +{ + return (file->scns[DRGN_SCN_DEBUG_INFO] + && file->scns[DRGN_SCN_DEBUG_ABBREV]); +} + struct drgn_error * drgn_elf_file_section_error(struct drgn_elf_file *file, Elf_Scn *scn, Elf_Data *data, const char *ptr, @@ -156,6 +220,10 @@ drgn_elf_file_section_buffer_init(struct drgn_elf_file_section_buffer *buffer, buffer->data = data; } +/** + * Initialize a @ref binary_buffer for an indexed ELF section that has already + * been read. + */ static inline void drgn_elf_file_section_buffer_init_index(struct drgn_elf_file_section_buffer *buffer, struct drgn_elf_file *file, @@ -165,6 +233,32 @@ drgn_elf_file_section_buffer_init_index(struct drgn_elf_file_section_buffer *buf file->scn_data[scn]); } +/** + * Read an indexed ELF section (applying ELF relocations if needed) and + * initialize a @ref binary_buffer for it. + */ +static inline struct drgn_error * +drgn_elf_file_section_buffer_read(struct drgn_elf_file_section_buffer *buffer, + struct drgn_elf_file *file, + enum drgn_section_index scn) +{ + Elf_Data *data; + struct drgn_error *err = drgn_elf_file_read_section(file, scn, &data); + if (err) + return err; + drgn_elf_file_section_buffer_init(buffer, file, file->scns[scn], data); + return NULL; +} + +/** + * Return the virtual address range of an ELF file. + * + * @param[out] start_ret Minimum virtual address (inclusive). + * @param[out] end_ret Maximum virtual address (exclusive). + */ +bool drgn_elf_file_address_range(struct drgn_elf_file *file, + uint64_t *start_ret, uint64_t *end_ret); + /** @} */ #endif /* DRGN_ELF_FILE_H */ diff --git a/libdrgn/elf_symtab.c b/libdrgn/elf_symtab.c new file mode 100644 index 000000000..0c0dd3ae9 --- /dev/null +++ b/libdrgn/elf_symtab.c @@ -0,0 +1,450 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include +#include + +#include "debug_info.h" +#include "elf_file.h" +#include "elf_symtab.h" +#include "error.h" +#include "log.h" +#include "minmax.h" +#include "serialize.h" +#include "util.h" + +static struct drgn_error *find_elf_file_symtab(struct drgn_elf_file *file, + uint64_t bias, + struct drgn_elf_file **file_ret, + uint64_t *bias_ret, + Elf_Scn **scn_ret, + GElf_Word *strtab_idx_ret, + GElf_Word *num_local_symbols_ret, + bool *full_symtab_ret) +{ + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(file->elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + if (shdr->sh_type == SHT_SYMTAB + || shdr->sh_type == SHT_DYNSYM) { + *file_ret = file; + *bias_ret = bias; + *scn_ret = scn; + *strtab_idx_ret = shdr->sh_link; + *num_local_symbols_ret = shdr->sh_info; + if (shdr->sh_type == SHT_SYMTAB) { + *full_symtab_ret = true; + return NULL; + } + } + } + return NULL; +} + +static struct drgn_error * +find_module_elf_symtab(struct drgn_module *module) +{ + struct drgn_error *err; + + if (!module->elf_symtab_pending_files) + return NULL; + + if (module->elf_symtab.num_symbols > 0 && !module->have_full_symtab) { + module->elf_symtab_pending_files = 0; + return NULL; + } + + struct drgn_elf_file *file = NULL; + uint64_t bias; + Elf_Scn *symtab_scn; + GElf_Word strtab_idx, num_local_symbols; + bool full_symtab = false; + + if (module->elf_symtab_pending_files & DRGN_MODULE_FILE_MASK_DEBUG) { + err = find_elf_file_symtab(module->debug_file, + module->debug_file_bias, &file, + &bias, &symtab_scn, &strtab_idx, + &num_local_symbols, &full_symtab); + if (err) + return err; + } + + if (!full_symtab && + (module->elf_symtab_pending_files & DRGN_MODULE_FILE_MASK_LOADED)) { + err = find_elf_file_symtab(module->loaded_file, + module->loaded_file_bias, &file, + &bias, &symtab_scn, &strtab_idx, + &num_local_symbols, &full_symtab); + if (err) + return err; + } + + if (!file) { + drgn_log_debug(module->prog, "%s: no ELF symbol table", + module->name); + module->elf_symtab_pending_files = 0; + return NULL; + } + + Elf_Scn *strtab_scn = elf_getscn(file->elf, strtab_idx); + if (!strtab_scn) + return drgn_error_libelf(); + + Elf_Data *data, *strtab_data; + if ((err = read_elf_section(symtab_scn, &data)) + || (err = read_elf_section(strtab_scn, &strtab_data))) + if (err) + return err; + + truncate_elf_string_data(strtab_data); + + Elf_Data *shndx_data = NULL; + int shndx_idx = elf_scnshndx(symtab_scn); + if (shndx_idx > 0) { + Elf_Scn *shndx_scn = elf_getscn(file->elf, shndx_idx); + if (!shndx_scn) + return drgn_error_libelf(); + err = read_elf_section(shndx_scn, &shndx_data); + if (err) + return err; + } + + module->elf_symtab.file = file; + module->elf_symtab.bias = bias; + module->elf_symtab.data = data->d_buf; + module->elf_symtab.num_symbols = + data->d_size + / (drgn_elf_file_is_64_bit(file) + ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym)); + if (num_local_symbols < 1) + num_local_symbols = 1; + if (num_local_symbols > module->elf_symtab.num_symbols) + num_local_symbols = module->elf_symtab.num_symbols; + module->elf_symtab.num_local_symbols = num_local_symbols; + module->elf_symtab.strtab = strtab_data; + module->elf_symtab.shndx = shndx_data; + module->elf_symtab_pending_files = 0; + module->have_full_symtab = full_symtab; + + drgn_log_debug(module->prog, + "%s: found ELF %ssymbol table with %zu symbols", + module->name, full_symtab ? "" : "dynamic ", + module->elf_symtab.num_symbols); + + return NULL; +} + +static size_t elf_symbol_shndx(struct drgn_module *module, size_t sym_idx, + const GElf_Sym *sym) +{ + if (sym->st_shndx < SHN_LORESERVE) + return sym->st_shndx; + if (sym->st_shndx == SHN_XINDEX + && module->elf_symtab.shndx + && sym_idx < + module->elf_symtab.shndx->d_size / sizeof(uint32_t)) { + uint32_t tmp; + memcpy(&tmp, + (const char *)module->elf_symtab.shndx->d_buf + + sym_idx * sizeof(uint32_t), + sizeof(uint32_t)); + if (drgn_elf_file_bswap(module->elf_symtab.file)) + tmp = bswap_32(tmp); + return tmp; + } + return SHN_UNDEF; +} + +static bool elf_symbol_address(struct drgn_module *module, size_t sym_idx, + const GElf_Sym *sym, uint64_t *ret) +{ + uint64_t addr = sym->st_value; + + // On 32-bit Arm, the least significant bit of st_value in an STT_FUNC + // symbol indicates whether it addresses a Thumb instruction. Clear it. + // + // P.S. If we need any more architecture-specific hacks, then we should + // add a callback to drgn_architecture_info. Note that we don't + // currently support V1 of the 64-bit PowerPC ELF ABI where st_value is + // the address of a "function descriptor" instead of the function entry + // point. + if (module->elf_symtab.file->platform.arch->arch == DRGN_ARCH_ARM + && GELF_ST_TYPE(sym->st_info) == STT_FUNC) + addr &= ~1; + + addr += module->elf_symtab.bias; + if (module->elf_symtab.file->is_relocatable) { + size_t shndx = elf_symbol_shndx(module, sym_idx, sym); + if (shndx == SHN_UNDEF) + return false; + Elf_Scn *scn = elf_getscn(module->elf_symtab.file->elf, shndx); + if (!scn) + return false; + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return false; + addr += shdr->sh_addr; + } + *ret = addr; + return true; +} + +// When searching for one symbol, if there are multiple matches, we break ties +// based on the symbol binding. The order of precedence is: +// GLOBAL = UNIQUE > WEAK > LOCAL = everything else +static int drgn_symbol_binding_precedence(const struct drgn_symbol *sym) +{ + SWITCH_ENUM(sym->binding) { + case DRGN_SYMBOL_BINDING_GLOBAL: + case DRGN_SYMBOL_BINDING_UNIQUE: + return 3; + case DRGN_SYMBOL_BINDING_WEAK: + return 2; + case DRGN_SYMBOL_BINDING_LOCAL: + case DRGN_SYMBOL_BINDING_UNKNOWN: + return 1; + default: + UNREACHABLE(); + } +} + +static int elf_symbol_binding_precedence(const GElf_Sym *sym) +{ + switch (GELF_ST_BIND(sym->st_info)) { + case STB_GLOBAL: + case STB_GNU_UNIQUE: + return 3; + case STB_WEAK: + return 2; + default: + return 1; + } +} + +// This assumes that both symbols contain the search address. +static bool better_addr_match(const GElf_Sym *a, uint64_t a_addr, + const struct drgn_symbol *b) +{ + // Prefer the symbol that starts closer to the search address. + if (a_addr > b->address) + return true; + if (a_addr < b->address) + return false; + + // If the symbols have the same start address, prefer the one that ends + // closer to the search address. + if (a->st_size < b->size) + return true; + if (a->st_size > b->size) + return false; + + // If the symbols have the same start and end addresses, prefer the one + // with the higher binding precedence. + return elf_symbol_binding_precedence(a) + > drgn_symbol_binding_precedence(b); +} + +// This assumes that both symbols start before the search address and have size +// 0. +static bool better_sizeless_addr_match(const GElf_Sym *a, uint64_t a_addr, + const GElf_Sym *b, uint64_t b_addr) +{ + // Prefer the symbol that starts closer to the search address. + if (a_addr > b_addr) + return true; + if (a_addr < b_addr) + return false; + + // If the symbols have the same start address, prefer the one with the + // higher binding precedence. + return elf_symbol_binding_precedence(a) + > elf_symbol_binding_precedence(b); +} + +static bool addr_in_sym_section(struct drgn_module *module, size_t sym_idx, + const GElf_Sym *sym, uint64_t unbiased_addr) +{ + size_t shndx = elf_symbol_shndx(module, sym_idx, sym); + if (shndx == SHN_UNDEF) + return false; + Elf_Scn *scn = elf_getscn(module->elf_symtab.file->elf, shndx); + if (!scn) + return false; + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return false; + return unbiased_addr >= shdr->sh_addr + && (unbiased_addr - shdr->sh_addr < shdr->sh_size); +} + +struct drgn_error * +drgn_module_elf_symbols_search(struct drgn_module *module, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct drgn_symbol_result_builder *builder) +{ + struct drgn_error *err; + + err = find_module_elf_symtab(module); + if (err) + return err; + if (module->elf_symtab.num_symbols == 0) + return NULL; + + const bool is_64_bit = drgn_elf_file_is_64_bit(module->elf_symtab.file); + const bool bswap = drgn_elf_file_bswap(module->elf_symtab.file); + const size_t sym_size = + is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + + // Handwritten assembly functions may have a symbol size of 0 even + // though logically they have a size. The best we can do is assume that + // such a symbol extends until the next symbol. If we're searching by + // address and we don't find any symbols containing the address, then we + // will return a symbol with size 0 that could contain it based on this + // assumption. + const char *sizeless_name = NULL; + // Silence -Wmaybe-uninitialized false positives on sizeless_addr and + // sizeless_sym_idx last seen with GCC 12. + uint64_t sizeless_addr = 0; + size_t sizeless_sym_idx = 0; + Elf64_Sym sizeless_sym; + // The maximum end address of any symbol starting before the given + // address. Any symbol with size 0 starting before this is either + // contained within another symbol or is assumed to end before this, so + // it should be ignored. + uint64_t max_end_addr = 0; + + // If we're searching for one symbol, then we may already have a match, + // but we still need to search for a better match. This is only possible + // if we're not searching by address, because address searches only + // search one module. + struct drgn_symbol *best_sym = NULL; + if (flags & DRGN_FIND_SYMBOL_ONE) + best_sym = drgn_symbol_result_builder_single(builder); + + // If we already have a match, then we will never prefer a local symbol + // over that match, so we can skip local symbols. + // + // Otherwise, skip the undefined symbol at index 0. + for (size_t i = best_sym ? module->elf_symtab.num_local_symbols : 1; + i < module->elf_symtab.num_symbols; i++) { + Elf64_Sym elf_sym; +#define visit_elf_sym_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(st_name); \ + visit_scalar_member(st_info); \ + visit_scalar_member(st_other); \ + visit_scalar_member(st_shndx); \ + visit_scalar_member(st_value); \ + visit_scalar_member(st_size); \ +} while (0) + deserialize_struct64(&elf_sym, Elf32_Sym, visit_elf_sym_members, + module->elf_symtab.data + i * sym_size, + is_64_bit, bswap); +#undef visit_elf_sym_members + + // Ignore undefined symbols. + if (elf_sym.st_shndx == SHN_UNDEF) + continue; + + // Ignore symbols with an out-of-bounds name. + if (elf_sym.st_name >= module->elf_symtab.strtab->d_size) + continue; + const char *elf_sym_name = + (const char *)module->elf_symtab.strtab->d_buf + + elf_sym.st_name; + + if ((flags & DRGN_FIND_SYMBOL_NAME) + && strcmp(elf_sym_name, name) != 0) + continue; + + if (flags & DRGN_FIND_SYMBOL_ADDR) { + // Ignore these special symbol types for address + // searches (before we bother computing the address). + switch (GELF_ST_TYPE(elf_sym.st_info)) { + case STT_SECTION: + case STT_FILE: + case STT_TLS: + continue; + default: + break; + } + } else if (best_sym + // This is a non-address search for one symbol. + // Prefer the symbol with the higher binding + // precedence. + && elf_symbol_binding_precedence(&elf_sym) + <= drgn_symbol_binding_precedence(best_sym)) { + continue; + } + + uint64_t elf_sym_addr; + if (!elf_symbol_address(module, i, &elf_sym, &elf_sym_addr)) + continue; + + if (flags & DRGN_FIND_SYMBOL_ADDR) { + if (elf_sym_addr > addr) + continue; + + max_end_addr = max(max_end_addr, + elf_sym_addr + elf_sym.st_size); + + if (elf_sym.st_size == 0) { + if (!sizeless_name + || better_sizeless_addr_match(&elf_sym, + elf_sym_addr, + &sizeless_sym, + sizeless_addr)) { + sizeless_name = elf_sym_name; + sizeless_addr = elf_sym_addr; + sizeless_sym_idx = i; + sizeless_sym = elf_sym; + } + continue; + } else if (addr - elf_sym_addr >= elf_sym.st_size + || (best_sym + && !better_addr_match(&elf_sym, + elf_sym_addr, + best_sym))) { + continue; + } + } + + if (!drgn_symbol_result_builder_add_from_elf(builder, + elf_sym_name, + elf_sym_addr, + &elf_sym)) + return &drgn_enomem; + + if (flags & DRGN_FIND_SYMBOL_ONE) { + best_sym = drgn_symbol_result_builder_single(builder); + if (!(flags & DRGN_FIND_SYMBOL_ADDR)) { + // If we're not searching by address and we find + // a matching global symbol, then we don't need + // to search anymore. + if (best_sym->binding == DRGN_SYMBOL_BINDING_GLOBAL + || best_sym->binding == DRGN_SYMBOL_BINDING_UNIQUE) + return &drgn_stop; + // Otherwise, if we're searching by address and + // we find a matching local symbol, then we can + // skip past the remaining local symbols. + if (i < module->elf_symtab.num_local_symbols) + i = module->elf_symtab.num_local_symbols - 1; + } + } + } + + if (sizeless_name + && drgn_symbol_result_builder_count(builder) == 0 + && sizeless_addr >= max_end_addr + && addr_in_sym_section(module, sizeless_sym_idx, &sizeless_sym, + addr - module->elf_symtab.bias) + && !drgn_symbol_result_builder_add_from_elf(builder, sizeless_name, + sizeless_addr, + &sizeless_sym)) + return &drgn_enomem; + + return NULL; +} diff --git a/libdrgn/elf_symtab.h b/libdrgn/elf_symtab.h new file mode 100644 index 000000000..298f93a84 --- /dev/null +++ b/libdrgn/elf_symtab.h @@ -0,0 +1,55 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * ELF symbol tables. + * + * See @ref ElfSymtab. + */ + +#ifndef DRGN_ELF_SYMBOL_H +#define DRGN_ELF_SYMBOL_H + +#include "drgn_internal.h" + +struct drgn_elf_file; + +/** + * @ingroup Internals + * + * @defgroup ElfSymtab ELF symbol tables + * + * ELF symbol table lookups. + * + * @{ + */ + +/** Symbol table from an ELF file. */ +struct drgn_elf_symbol_table { + /** File containing symbol table. @c NULL if not found yet. */ + struct drgn_elf_file *file; + /** Bias to apply to addresses from the file. */ + uint64_t bias; + /** Symbol table section data. */ + const char *data; + /** Number of symbols in table. */ + size_t num_symbols; + /** Number of local symbols in table. */ + size_t num_local_symbols; + /** String table section used by symbol table. */ + Elf_Data *strtab; + /** Optional `SHT_SYMTAB_SHNDX` section used by symbol table. */ + Elf_Data *shndx; +}; + +/** Find matching ELF symbols in a specific module. */ +struct drgn_error * +drgn_module_elf_symbols_search(struct drgn_module *module, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct drgn_symbol_result_builder *builder); + +/** @} */ + +#endif /* DRGN_ELF_SYMBOL_H */ diff --git a/libdrgn/error.c b/libdrgn/error.c index 95174abbe..27d9a2dc5 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -2,7 +2,6 @@ // SPDX-License-Identifier: LGPL-2.1-or-later #include -#include #include #include #include @@ -158,16 +157,6 @@ drgn_error_format_fault(uint64_t address, const char *format, ...) return err; } -struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, - struct string_builder *sb) -{ - if (!string_builder_null_terminate(sb)) { - string_builder_deinit(sb); - return &drgn_enomem; - } - return drgn_error_create_nodup(code, sb->str); -} - LIBDRGN_PUBLIC struct drgn_error *drgn_error_copy(struct drgn_error *src) { if (!src->needs_destroy) @@ -274,9 +263,3 @@ struct drgn_error *drgn_error_libdw(void) return drgn_error_format(DRGN_ERROR_OTHER, "libdw error: %s", dwarf_errmsg(-1)); } - -struct drgn_error *drgn_error_libdwfl(void) -{ - return drgn_error_format(DRGN_ERROR_OTHER, "libdwfl error: %s", - dwfl_errmsg(-1)); -} diff --git a/libdrgn/error.h b/libdrgn/error.h index d9ee35368..40f57ca96 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -33,15 +33,16 @@ extern struct drgn_error drgn_stop; /** Global @ref DRGN_ERROR_OBJECT_ABSENT error. */ extern struct drgn_error drgn_error_object_absent; -struct string_builder; - /** - * Create a @ref drgn_error with a message from a @ref string_builder. - * - * This deinitializes the string builder. + * Return whether an error is fatal, meaning that it should usually be returned + * to the caller instead of being handled or logged. */ -struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, - struct string_builder *sb); +static inline bool drgn_error_is_fatal(struct drgn_error *err) +{ + return err == &drgn_enomem; +} + +struct string_builder; /** * Append a formatted @ref drgn_error to a @ref string_builder. @@ -60,10 +61,6 @@ struct drgn_error *drgn_error_libelf(void) struct drgn_error *drgn_error_libdw(void) __attribute__((__returns_nonnull__)); -/** Create a @ref drgn_error from the libdwfl error indicator. */ -struct drgn_error *drgn_error_libdwfl(void) - __attribute__((__returns_nonnull__)); - /** * Create a @ref drgn_error with a type name. * diff --git a/libdrgn/examples/load_debug_info.c b/libdrgn/examples/load_debug_info.c index 953771df1..284c8b67e 100644 --- a/libdrgn/examples/load_debug_info.c +++ b/libdrgn/examples/load_debug_info.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -91,6 +92,8 @@ noreturn static void usage(bool error) int main(int argc, char **argv) { + setlocale(LC_ALL, ""); + struct option long_options[] = { {"kernel", no_argument, NULL, 'k'}, {"core", required_argument, NULL, 'c'}, diff --git a/libdrgn/handler.h b/libdrgn/handler.h index 3b7d8b7cb..c73b9f2cf 100644 --- a/libdrgn/handler.h +++ b/libdrgn/handler.h @@ -51,6 +51,11 @@ struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, const char ***names_ret, size_t *count_ret); +static inline bool drgn_handler_is_last_enabled(struct drgn_handler *handler) +{ + return handler->enabled && (!handler->next || !handler->next->enabled); +} + // Helper to simplify the casting and naming in drgn_handler_list_deinit(). static inline struct drgn_handler * drgn_handler_free_and_next(struct drgn_handler *handler) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 30f167d47..61d5e5e52 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include +#include "array.h" #include "binary_buffer.h" #include "cleanup.h" #include "debug_info.h" @@ -28,6 +30,7 @@ #include "hexlify.h" #include "io.h" #include "linux_kernel.h" +#include "log.h" #include "platform.h" #include "program.h" #include "type.h" @@ -410,15 +413,11 @@ struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) * changes in the future, we can reevaluate this. */ -struct depmod_index { - void *addr; - size_t len; - char path[256]; -}; - -static void depmod_index_deinit(struct depmod_index *depmod) +void depmod_index_deinit(struct depmod_index *depmod) { - munmap(depmod->addr, depmod->len); + if (depmod->len > 0) + munmap(depmod->addr, depmod->len); + free(depmod->path); } struct depmod_index_buffer { @@ -469,33 +468,41 @@ static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) return NULL; } +__attribute__((__format__(__printf__, 2, 3))) static struct drgn_error *depmod_index_init(struct depmod_index *depmod, - const char *osrelease) + const char *path_format, + ...) { struct drgn_error *err; - snprintf(depmod->path, sizeof(depmod->path), - "/lib/modules/%s/modules.dep.bin", osrelease); + va_list ap; + va_start(ap, path_format); + int r = vasprintf(&depmod->path, path_format, ap); + va_end(ap); + if (r < 0) + return &drgn_enomem; int fd = open(depmod->path, O_RDONLY); - if (fd == -1) - return drgn_error_create_os("open", errno, depmod->path); + if (fd == -1) { + err = drgn_error_create_os("open", errno, depmod->path); + goto out_path; + } struct stat st; if (fstat(fd, &st) == -1) { err = drgn_error_create_os("fstat", errno, depmod->path); - goto out; + goto out_fd; } - if (st.st_size < 0 || st.st_size > SIZE_MAX) { + if (st.st_size > SIZE_MAX) { err = &drgn_enomem; - goto out; + goto out_fd; } void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) { err = drgn_error_create_os("mmap", errno, depmod->path); - goto out; + goto out_fd; } depmod->addr = addr; @@ -504,8 +511,11 @@ static struct drgn_error *depmod_index_init(struct depmod_index *depmod, err = depmod_index_validate(depmod); if (err) depmod_index_deinit(depmod); -out: +out_fd: close(fd); +out_path: + if (err) + free(depmod->path); return err; } @@ -610,312 +620,393 @@ static struct drgn_error *depmod_index_find(struct depmod_index *depmod, return NULL; } -struct kernel_module_iterator { - char *name; - uint64_t start, end; - void *build_id_buf; - size_t build_id_buf_capacity; - /* `struct module` type. */ - struct drgn_qualified_type module_type; - /* Current `struct module` (not a pointer). */ - struct drgn_object mod; - /* `struct list_head *` in next module to return. */ - struct drgn_object node; - /* Temporary objects reused for various purposes. */ - struct drgn_object tmp1, tmp2, tmp3; - /* Address of `struct list_head modules`. */ - uint64_t head; - bool use_sys_module; - bool use_sys_module_sections; -}; - -static void kernel_module_iterator_deinit(struct kernel_module_iterator *it) +struct drgn_error * +drgn_module_try_vmlinux_files(struct drgn_module *module, + struct drgn_module_standard_files_state *state) { - drgn_object_deinit(&it->tmp3); - drgn_object_deinit(&it->tmp2); - drgn_object_deinit(&it->tmp1); - drgn_object_deinit(&it->node); - drgn_object_deinit(&it->mod); - free(it->build_id_buf); - free(it->name); + struct drgn_error *err; + struct drgn_program *prog = module->prog; + const char *osrelease = prog->vmcoreinfo.osrelease; + + // Paths relative to the debug directory where vmlinux might be + // installed. + static const char * const debug_dir_paths[] = { + // Debian, Ubuntu: + "/boot/vmlinux-%s", + // Fedora, CentOS: + "/lib/modules/%s/vmlinux", + // SUSE: + "/lib/modules/%s/vmlinux.debug", + }; + STRING_BUILDER(sb); + const char *debug_dir; + size_t debug_dir_len; + drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { + if (debug_dir_len == 0 || debug_dir[0] != '/') + continue; + array_for_each(format, debug_dir_paths) { + if (!string_builder_appendn(&sb, debug_dir, + debug_dir_len) + || !string_builder_appendf(&sb, *format, osrelease) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, sb.str, -1, + true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = 0; + } + } + + // Absolute paths where vmlinux might be installed. + static const char * const paths[] = { + "/boot/vmlinux-%s", + "/lib/modules/%s/build/vmlinux", + "/lib/modules/%s/vmlinux", + }; + array_for_each(format, paths) { + if (!string_builder_appendf(&sb, *format, osrelease) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = 0; + } + return NULL; } -static struct drgn_error * -kernel_module_iterator_init(struct kernel_module_iterator *it, - struct drgn_program *prog, bool use_sys_module) +struct drgn_error * +drgn_module_try_linux_kmod_files(struct drgn_module *module, + struct drgn_module_standard_files_state *state) { struct drgn_error *err; + struct drgn_program *prog = module->prog; + struct depmod_index *modules_dep = &state->modules_dep; - it->name = NULL; - it->build_id_buf = NULL; - it->build_id_buf_capacity = 0; - it->use_sys_module = use_sys_module; - it->use_sys_module_sections = use_sys_module; - err = drgn_program_find_type(prog, "struct module", NULL, - &it->module_type); - if (err) - return err; + if (!modules_dep->addr) { + err = depmod_index_init(modules_dep, + "/lib/modules/%s/modules.dep.bin", + prog->vmcoreinfo.osrelease); + if (err) { + if (drgn_error_is_fatal(err)) + return err; + drgn_error_log_debug(prog, err, + "couldn't open depmod index: "); + drgn_error_destroy(err); + modules_dep->path = NULL; + modules_dep->addr = MAP_FAILED; + modules_dep->len = 0; + } else { + drgn_log_debug(prog, "opened depmod index %s", + modules_dep->path); + } + } + if (modules_dep->len == 0) + return NULL; - drgn_object_init(&it->mod, prog); - drgn_object_init(&it->node, prog); - drgn_object_init(&it->tmp1, prog); - drgn_object_init(&it->tmp2, prog); - drgn_object_init(&it->tmp3, prog); + const char *depmod_path; + size_t depmod_path_len; + err = depmod_index_find(modules_dep, module->name, &depmod_path, + &depmod_path_len); + if (err) { + drgn_error_log_debug(prog, err, + "couldn't parse depmod index: "); + drgn_error_destroy(err); + return NULL; + } + if (!depmod_path) { + drgn_log_debug(prog, "couldn't find %s in depmod index", + module->name); + return NULL; + } + drgn_log_debug(prog, "found %.*s in depmod index", + depmod_path_len > INT_MAX + ? INT_MAX : (int)depmod_path_len, + depmod_path); + + // Get the length of the path with one extension after ".ko" removed if + // present (e.g., ".gz", ".xz", or ".zst"). + const char *name = memrchr(depmod_path, '/', depmod_path_len); + if (name) + name = name + 1; + else + name = depmod_path; + const char *name_end = depmod_path + depmod_path_len; + size_t ko_len = depmod_path_len; + for (int j = 0; j < 2; j++) { + char *dot = memrchr(name, '.', name_end - name); + if (!dot) + break; + if (name_end - dot == 3 + && dot[1] == 'k' && dot[2] == 'o') { + ko_len = name_end - depmod_path; + break; + } + name_end = dot; + } - err = drgn_program_find_object(prog, "modules", NULL, - DRGN_FIND_OBJECT_VARIABLE, &it->node); - if (err) - goto err; - if (it->node.kind != DRGN_OBJECT_REFERENCE) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "can't get address of modules list"); - goto err; + const char *osrelease = prog->vmcoreinfo.osrelease; + STRING_BUILDER(sb); + const char *debug_dir; + size_t debug_dir_len; + drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { + if (debug_dir_len == 0 || debug_dir[0] != '/') + continue; + // Debian, Ubuntu: + // $debug_dir/lib/modules/$(uname -r)/$ko_name + if (!string_builder_appendn(&sb, debug_dir, debug_dir_len) + || !string_builder_appendn(&sb, depmod_path, ko_len) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + + // Fedora, CentOS, SUSE: + // $debug_dir/lib/modules/$(uname -r)/$ko_name.debug + if (!string_builder_append(&sb, ".debug") + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; } - it->head = it->node.address; - err = drgn_object_member(&it->node, &it->node, "next"); - if (err) - goto err; - err = drgn_object_read(&it->node, &it->node); - if (err) - goto err; + sb.len = 0; + if (!string_builder_appendf(&sb, "/lib/modules/%s/", osrelease) || + !string_builder_appendn(&sb, depmod_path, depmod_path_len) || + !string_builder_null_terminate(&sb)) + return &drgn_enomem; + return drgn_module_try_standard_file(module, sb.str, -1, true, NULL); +} + +// This has a weird calling convention so that the caller can call +// drgn_error_format_os() itself. +static const char *get_gnu_build_id_from_note_file(int fd, + void **bufp, + size_t *buf_capacityp, + const void **build_id_ret, + size_t *build_id_len_ret) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return "fstat"; + + if (st.st_size > SSIZE_MAX + || !alloc_or_reuse(bufp, buf_capacityp, st.st_size)) + return ""; + + ssize_t r = read_all(fd, *bufp, st.st_size); + if (r < 0) + return "read"; + *build_id_len_ret = parse_gnu_build_id_from_notes(*bufp, r, 4, false, + build_id_ret); return NULL; +} -err: - kernel_module_iterator_deinit(it); - return err; +static struct drgn_error * +get_build_id_from_sys_kernel_notes(void **buf_ret, + const void **build_id_ret, + size_t *build_id_len_ret) +{ + static const char path[] = "/sys/kernel/notes"; + _cleanup_close_ int fd = open(path, O_RDONLY); + if (fd == -1) + return drgn_error_create_os("open", errno, path); + + _cleanup_free_ void *buf = NULL; + size_t buf_capacity = 0; + const char *message = get_gnu_build_id_from_note_file(fd, &buf, + &buf_capacity, + build_id_ret, + build_id_len_ret); + if (message && message[0]) + return drgn_error_create_os(message, errno, path); + else if (message) + return &drgn_enomem; + *buf_ret = no_cleanup_ptr(buf); + return NULL; +} + +// Arbitrary limit on the number iterations to make through the modules list in +// order to avoid getting stuck in a cycle. +static const int MAX_MODULE_LIST_ITERATIONS = 10000; + +struct linux_kernel_loaded_module_iterator { + struct drgn_module_iterator it; + bool yielded_vmlinux; + int module_list_iterations_remaining; + // `struct module` type. + struct drgn_qualified_type module_type; + // `struct list_head *` in next module to yield. + struct drgn_object node; + // Address of `struct list_head modules`. + uint64_t modules_head; +}; + +static void +linux_kernel_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) +{ + struct linux_kernel_loaded_module_iterator *it = + container_of(_it, struct linux_kernel_loaded_module_iterator, it); + drgn_object_deinit(&it->node); + free(it); } -/** - * Get the the next loaded kernel module. - * - * After this is called, @c it->name is set to the name of the kernel module, - * and @c it->start and @c it->end are set to the address range of the kernel - * module. These are valid until the next time this is called or the iterator is - * destroyed. - * - * @return @c NULL on success, non-@c NULL on error. In particular, when there - * are no more modules, returns &@ref drgn_stop. - */ static struct drgn_error * -kernel_module_iterator_next(struct kernel_module_iterator *it) +yield_vmlinux(struct linux_kernel_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) { struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(&it->mod); - - uint64_t addr; - err = drgn_object_read_unsigned(&it->node, &addr); - if (err) - return err; - if (addr == it->head) - return &drgn_stop; + struct drgn_program *prog = it->it.prog; - err = drgn_object_container_of(&it->mod, &it->node, it->module_type, - "list"); - if (err) - return err; - err = drgn_object_dereference(&it->mod, &it->mod); + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_main(prog, "kernel", &module, &new); if (err) return err; - // We need several fields from the `struct module`. Especially for - // /proc/kcore, it is faster to read the entire structure (which is <1kB - // as of Linux 6.0) from the core dump all at once than it is to read - // each field individually. - err = drgn_object_read(&it->mod, &it->mod); - if (err) - return err; - err = drgn_object_member(&it->node, &it->mod, "list"); - if (err) - return err; - err = drgn_object_member(&it->node, &it->node, "next"); - if (err) - return err; - - // Set tmp1 to the module base address and tmp2 to the size. - err = drgn_object_member(&it->tmp1, &it->mod, "mem"); - if (!err) { - // Since Linux kernel commit ac3b43283923 ("module: replace - // module_layout with module_memory") (in v6.4), the base and - // size are in the `struct module_memory mem[MOD_TEXT]` member - // of `struct module`. - if (!prog->mod_text_cached) { - err = drgn_program_find_object(drgn_object_program(&it->mod), - "MOD_TEXT", NULL, - DRGN_FIND_OBJECT_CONSTANT, - &it->tmp2); - if (err) - return err; - union drgn_value mod_text_value; - err = drgn_object_read_integer(&it->tmp2, - &mod_text_value); - if (err) - return err; - prog->mod_text = mod_text_value.uvalue; - prog->mod_text_cached = true; - } + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } - err = drgn_object_subscript(&it->tmp1, &it->tmp1, - prog->mod_text); + if (prog->vmcoreinfo.build_id_len > 0) { + // Since Linux kernel commit 0935288c6e00 ("kdump: append kernel + // build-id string to VMCOREINFO") (in v5.9), we can get the + // build ID from VMCOREINFO. + err = drgn_module_set_build_id(module, prog->vmcoreinfo.build_id, + prog->vmcoreinfo.build_id_len); if (err) return err; - err = drgn_object_member(&it->tmp2, &it->tmp1, "size"); + drgn_log_debug(prog, + "found kernel build ID %s in VMCOREINFO", + module->build_id_str); + } else if (prog->flags & DRGN_PROGRAM_IS_LIVE) { + // Before that, on the live kernel, we can get the build ID from + // /sys/kernel/notes. + _cleanup_free_ void *build_id_buf = NULL; + const void *build_id; + size_t build_id_len; + err = get_build_id_from_sys_kernel_notes(&build_id_buf, + &build_id, + &build_id_len); if (err) return err; - err = drgn_object_member(&it->tmp1, &it->tmp1, "base"); - if (err) - return err; - } else if (err->code == DRGN_ERROR_LOOKUP) { - // Since Linux kernel commit 7523e4dc5057 ("module: use a - // structure to encapsulate layout.") (in v4.5), the base and - // size are in the `struct module_layout core_layout` member of - // `struct module`. - drgn_error_destroy(err); - - err = drgn_object_member(&it->tmp1, &it->mod, "core_layout"); - if (!err) { - err = drgn_object_member(&it->tmp2, &it->tmp1, "size"); - if (err) - return err; - err = drgn_object_member(&it->tmp1, &it->tmp1, "base"); - if (err) - return err; - } else if (err->code == DRGN_ERROR_LOOKUP) { - // Before that, they are directly in the `struct - // module`. - drgn_error_destroy(err); - - err = drgn_object_member(&it->tmp2, &it->mod, - "core_size"); - if (err) - return err; - err = drgn_object_member(&it->tmp1, &it->mod, - "module_core"); + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); if (err) return err; + drgn_log_debug(prog, + "found kernel build ID %s in /sys/kernel/notes", + module->build_id_str); } else { - return err; + drgn_log_debug(prog, + "couldn't find kernel build ID in /sys/kernel/notes"); } } else { - return err; + // Otherwise, we can't get the build ID. + drgn_log_debug(prog, "couldn't find kernel build ID"); } - err = drgn_object_read_unsigned(&it->tmp1, &it->start); - if (err) - return err; - err = drgn_object_read_unsigned(&it->tmp2, &it->end); - if (err) - return err; - it->end += it->start; - - err = drgn_object_member(&it->tmp2, &it->mod, "name"); - if (err) - return err; - char *name; - err = drgn_object_read_c_string(&it->tmp2, &name); - if (err) - return err; - free(it->name); - it->name = name; + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; return NULL; } static struct drgn_error * -kernel_module_iterator_gnu_build_id_live(struct kernel_module_iterator *it, - const void **build_id_ret, - size_t *build_id_len_ret) +kernel_module_set_build_id_live(struct drgn_module *module) { struct drgn_error *err; + struct drgn_program *prog = module->prog; - char *path; - if (asprintf(&path, "/sys/module/%s/notes", it->name) == -1) + _cleanup_free_ char *path; + if (asprintf(&path, "/sys/module/%s/notes", module->name) < 0) { + path = NULL; return &drgn_enomem; - DIR *dir = opendir(path); + } + _cleanup_closedir_ DIR *dir = opendir(path); if (!dir) { - err = drgn_error_create_os("opendir", errno, path); - goto out_path; + if (errno == ENOENT) { + drgn_log_debug(prog, "opendir: %s: %m", path); + return NULL; + } else { + return drgn_error_create_os("opendir", errno, path); + } } + _cleanup_free_ void *buf = NULL; + size_t capacity = 0; + struct dirent *ent; while ((errno = 0, ent = readdir(dir))) { if (ent->d_type == DT_DIR) continue; - int fd = openat(dirfd(dir), ent->d_name, O_RDONLY); - if (fd == -1) { - err = drgn_error_format_os("openat", errno, "%s/%s", - path, ent->d_name); - goto out; - } - - struct stat st; - if (fstat(fd, &st) < 0) { - err = drgn_error_format_os("fstat", errno, "%s/%s", - path, ent->d_name); - close(fd); - goto out; - } - - if (st.st_size > SIZE_MAX || - !alloc_or_reuse(&it->build_id_buf, - &it->build_id_buf_capacity, st.st_size)) { - err = &drgn_enomem; - close(fd); - goto out; + _cleanup_close_ int fd = openat(dirfd(dir), ent->d_name, + O_RDONLY); + if (fd < 0) { + return drgn_error_format_os("openat", errno, "%s/%s", + path, ent->d_name); } - ssize_t r = read_all(fd, it->build_id_buf, st.st_size); - if (r < 0) { - err = drgn_error_format_os("read", errno, "%s/%s", path, - ent->d_name); - close(fd); - goto out; + const void *build_id; + size_t build_id_len; + const char *message = + get_gnu_build_id_from_note_file(fd, &buf, &capacity, + &build_id, + &build_id_len); + if (message && message[0]) { + return drgn_error_format_os(message, errno, "%s/%s", + path, ent->d_name); + } else if (message) { + return &drgn_enomem; } - close(fd); - - *build_id_len_ret = - parse_gnu_build_id_from_notes(it->build_id_buf, r, 4, - false, build_id_ret); - if (*build_id_len_ret) { - err = NULL; - goto out; + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (!err) { + drgn_log_debug(prog, + "found build ID %s in %s/%s", + module->build_id_str, path, + ent->d_name); + } + return err; } } - if (errno) { - err = drgn_error_create_os("readdir", errno, path); - } else { - *build_id_ret = NULL; - *build_id_len_ret = 0; - err = NULL; - } - -out: - closedir(dir); -out_path: - free(path); - return err; + if (errno) + return drgn_error_create_os("readdir", errno, path); + drgn_log_debug(prog, "couldn't find build ID in %s", path); + return NULL; } static struct drgn_error * -kernel_module_iterator_gnu_build_id(struct kernel_module_iterator *it, - const void **build_id_ret, - size_t *build_id_len_ret) +kernel_module_set_build_id(struct drgn_module *module, + const struct drgn_object *module_obj, + bool use_sys_module) { - if (it->use_sys_module) { - return kernel_module_iterator_gnu_build_id_live(it, - build_id_ret, - build_id_len_ret); - } + if (use_sys_module) + return kernel_module_set_build_id_live(module); struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(&it->mod); + struct drgn_program *prog = module->prog; const bool bswap = drgn_platform_bswap(&prog->platform); DRGN_OBJECT(attrs, prog); DRGN_OBJECT(attr, prog); DRGN_OBJECT(tmp, prog); + _cleanup_free_ void *buf = NULL; + size_t capacity = 0; // n = mod->notes_attrs->notes uint64_t n; - err = drgn_object_member(&attrs, &it->mod, "notes_attrs"); + err = drgn_object_member(&attrs, module_obj, "notes_attrs"); if (err) return err; err = drgn_object_member_dereference(&tmp, &attrs, "notes"); @@ -954,158 +1045,91 @@ kernel_module_iterator_gnu_build_id(struct kernel_module_iterator *it, if (err) return err; - if (size > SIZE_MAX || - !alloc_or_reuse(&it->build_id_buf, - &it->build_id_buf_capacity, size)) + if (size > SIZE_MAX || !alloc_or_reuse(&buf, &capacity, size)) return &drgn_enomem; - err = drgn_program_read_memory(prog, it->build_id_buf, address, - size, false); + err = drgn_program_read_memory(prog, buf, address, size, false); if (err) return err; - *build_id_len_ret = - parse_gnu_build_id_from_notes(it->build_id_buf, size, 4, - bswap, build_id_ret); - if (*build_id_len_ret) - return NULL; + const void *build_id; + size_t build_id_len = + parse_gnu_build_id_from_notes(buf, size, 4, bswap, + &build_id); + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (!err) { + drgn_log_debug(prog, + "found build ID %s in notes_attrs", + module->build_id_str); + } + return err; + } } - *build_id_ret = NULL; - *build_id_len_ret = 0; + drgn_log_debug(prog, + "couldn't find build ID in notes_attrs"); return NULL; } -struct kernel_module_section_iterator { - struct kernel_module_iterator *kmod_it; - bool yielded_percpu; - /* /sys/module/$module/sections directory or NULL. */ - DIR *sections_dir; - /* If not using /sys/module/$module/sections. */ - uint64_t i; - uint64_t nsections; - char *name; -}; - static struct drgn_error * -kernel_module_section_iterator_init_no_sys_module(struct kernel_module_section_iterator *it, - struct kernel_module_iterator *kmod_it) +kernel_module_set_section_addresses_live(struct drgn_module *module) { struct drgn_error *err; + struct drgn_program *prog = module->prog; - it->sections_dir = NULL; - it->i = 0; - it->name = NULL; - /* it->nsections = mod->sect_attrs->nsections */ - err = drgn_object_member(&kmod_it->tmp1, &kmod_it->mod, "sect_attrs"); - if (err) - return err; - err = drgn_object_member_dereference(&kmod_it->tmp2, &kmod_it->tmp1, - "nsections"); - if (err) - return err; - err = drgn_object_read_unsigned(&kmod_it->tmp2, &it->nsections); - if (err) - return err; - /* kmod_it->tmp1 = mod->sect_attrs->attrs */ - return drgn_object_member_dereference(&kmod_it->tmp1, &kmod_it->tmp1, - "attrs"); -} - -static struct drgn_error * -kernel_module_section_iterator_init(struct kernel_module_section_iterator *it, - struct kernel_module_iterator *kmod_it) -{ - it->kmod_it = kmod_it; - it->yielded_percpu = false; - if (kmod_it->use_sys_module_sections) { - char *path; - if (asprintf(&path, "/sys/module/%s/sections", - kmod_it->name) == -1) - return &drgn_enomem; - it->sections_dir = opendir(path); - free(path); - if (!it->sections_dir) { - return drgn_error_format_os("opendir", errno, - "/sys/module/%s/sections", - kmod_it->name); - } - return NULL; - } else { - return kernel_module_section_iterator_init_no_sys_module(it, kmod_it); + _cleanup_free_ char *path; + if (asprintf(&path, "/sys/module/%s/sections", module->name) < 0) { + path = NULL; + return &drgn_enomem; } -} - -static void -kernel_module_section_iterator_deinit(struct kernel_module_section_iterator *it) -{ - if (it->sections_dir) - closedir(it->sections_dir); - else - free(it->name); -} + _cleanup_closedir_ DIR *dir = opendir(path); + if (!dir) + return drgn_error_create_os("opendir", errno, path); -static struct drgn_error * -kernel_module_section_iterator_next_live(struct kernel_module_section_iterator *it, - const char **name_ret, - uint64_t *address_ret) -{ struct dirent *ent; - while ((errno = 0, ent = readdir(it->sections_dir))) { + while ((errno = 0, ent = readdir(dir))) { if (ent->d_type == DT_DIR) continue; - if (ent->d_type == DT_UNKNOWN) { - struct stat st; - - if (fstatat(dirfd(it->sections_dir), ent->d_name, &st, - 0) == -1) { - return drgn_error_format_os("fstatat", errno, - "/sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); - } - if (S_ISDIR(st.st_mode)) - continue; - } - int fd = openat(dirfd(it->sections_dir), ent->d_name, O_RDONLY); - if (fd == -1) { - return drgn_error_format_os("openat", errno, - "/sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); + _cleanup_close_ int fd = openat(dirfd(dir), ent->d_name, + O_RDONLY); + if (fd < 0) { + return drgn_error_format_os("openat", errno, "%s/%s", + path, ent->d_name); } - FILE *file = fdopen(fd, "r"); - if (!file) { - close(fd); + + _cleanup_fclose_ FILE *file = fdopen(fd, "r"); + if (!file) return drgn_error_create_os("fdopen", errno, NULL); - } - int ret = fscanf(file, "%" SCNx64, address_ret); - fclose(file); - if (ret != 1) { + uint64_t address; + if (fscanf(file, "%" SCNx64, &address) != 1) { return drgn_error_format(DRGN_ERROR_OTHER, - "could not parse /sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); + "could not parse %s/%s", + path, ent->d_name); } - *name_ret = ent->d_name; - return NULL; - } - if (errno) { - return drgn_error_format_os("readdir", errno, - "/sys/module/%s/sections", - it->kmod_it->name); - } else { - return &drgn_stop; + + drgn_log_debug(prog, "found section %s@0x%" PRIx64 " in %s", + ent->d_name, address, path); + err = drgn_module_set_section_address(module, ent->d_name, + address); + if (err) + return err; } + if (errno) + return drgn_error_create_os("readdir", errno, path); + return NULL; } static struct drgn_error * -kernel_module_section_iterator_next(struct kernel_module_section_iterator *it, - const char **name_ret, - uint64_t *address_ret) +kernel_module_set_section_addresses(struct drgn_module *module, + const struct drgn_object *module_obj, + bool use_sys_module) { struct drgn_error *err; - struct kernel_module_iterator *kmod_it = it->kmod_it; + struct drgn_program *prog = module->prog; + + DRGN_OBJECT(tmp, prog); // As of Linux 6.0, the .data..percpu section is not included in the // section attributes. (kernel/module/sysfs.c:add_sect_attrs() only @@ -1114,656 +1138,479 @@ kernel_module_section_iterator_next(struct kernel_module_section_iterator *it, // for the .data..percpu section.) However, we need this address so that // global per-CPU variables will be relocated correctly. Get it from // `struct module`. - if (!it->yielded_percpu) { - it->yielded_percpu = true; - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->mod, - "percpu"); - if (!err) { - err = drgn_object_read_unsigned(&kmod_it->tmp2, address_ret); + err = drgn_object_member(&tmp, module_obj, "percpu"); + if (!err) { + uint64_t address; + err = drgn_object_read_unsigned(&tmp, &address); + if (err) + return err; + drgn_log_debug(prog, "module percpu is 0x%" PRIx64, address); + // struct module::percpu is NULL if the module doesn't have any + // per-CPU data. + if (address) { + err = drgn_module_set_section_address(module, + ".data..percpu", + address); if (err) return err; - // struct module::percpu is NULL if the module doesn't - // have any per-CPU data. - if (*address_ret) { - *name_ret = ".data..percpu"; - return NULL; - } - } else if (err->code == DRGN_ERROR_LOOKUP) { - // struct module::percpu doesn't exist if !SMP. - drgn_error_destroy(err); - } else { - return err; } + } else if (err->code == DRGN_ERROR_LOOKUP) { + // struct module::percpu doesn't exist if !SMP. + drgn_error_destroy(err); + } else { + return err; } - if (it->sections_dir) { - err = kernel_module_section_iterator_next_live(it, name_ret, - address_ret); - if (err && err->code == DRGN_ERROR_OS && err->errnum == EACCES) { - closedir(it->sections_dir); - drgn_error_destroy(err); - it->kmod_it->use_sys_module_sections = false; - err = kernel_module_section_iterator_init_no_sys_module(it, it->kmod_it); - if (err) - return err; - } else { + if (use_sys_module) { + err = kernel_module_set_section_addresses_live(module); + // We could be debugging /proc/kcore without root privileges via + // an fd that we were passed. If we didn't have permission to + // access the files in /sys/module/$module/sections, fall back + // to the non-live path. + if (!err || err->code != DRGN_ERROR_OS || err->errnum != EACCES) return err; - } + drgn_error_log_debug(prog, err, "falling back to sect_attrs: "); + drgn_error_destroy(err); } - if (it->i >= it->nsections) - return &drgn_stop; - err = drgn_object_subscript(&kmod_it->tmp2, &kmod_it->tmp1, it->i++); + DRGN_OBJECT(attrs, prog); + DRGN_OBJECT(attr, prog); + + err = drgn_object_member(&attrs, module_obj, "sect_attrs"); if (err) return err; - err = drgn_object_member(&kmod_it->tmp3, &kmod_it->tmp2, "address"); + + // i = mod->sect_attrs->nsections + err = drgn_object_member_dereference(&tmp, &attrs, "nsections"); if (err) return err; - err = drgn_object_read_unsigned(&kmod_it->tmp3, address_ret); + uint64_t i; + err = drgn_object_read_unsigned(&tmp, &i); if (err) return err; - /* - * Since Linux kernel commit ed66f991bb19 ("module: Refactor section - * attr into bin attribute") (in v5.8), the section name is - * module_sect_attr.battr.attr.name. Before that, it is simply - * module_sect_attr.name. - */ - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->tmp2, "battr"); - if (!err) { - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->tmp2, - "attr"); + + // attrs = mod->sect_attrs->attrs + err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); + if (err) + return err; + + while (i-- > 0) { + // attr = attrs[i] + err = drgn_object_subscript(&attr, &attrs, i); if (err) return err; - } else { - if (err->code != DRGN_ERROR_LOOKUP) + + // address = attr.address + err = drgn_object_member(&tmp, &attr, "address"); + if (err) + return err; + uint64_t address; + err = drgn_object_read_unsigned(&tmp, &address); + if (err) return err; - drgn_error_destroy(err); - } - err = drgn_object_member(&kmod_it->tmp3, &kmod_it->tmp2, "name"); - if (err) - return err; - char *name; - err = drgn_object_read_c_string(&kmod_it->tmp3, &name); - if (err) - return err; - free(it->name); - *name_ret = it->name = name; - return NULL; -} -/* - * Identify an ELF file as a kernel module, vmlinux, or neither. We classify a - * file as a kernel module if it has a section named .gnu.linkonce.this_module. - * If it doesn't, but it does have a section named .init.text, we classify it as - * vmlinux. - */ -static struct drgn_error *identify_kernel_elf(Elf *elf, - bool *is_vmlinux_ret, - bool *is_module_ret) -{ - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - Elf_Scn *scn = NULL; - bool have_init_text = false; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - continue; + // Since Linux kernel commit ed66f991bb19 ("module: Refactor + // section attr into bin attribute") (in v5.8), the section name + // is module_sect_attr.battr.attr.name. Before that, it is + // simply module_sect_attr.name. - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - return drgn_error_libelf(); - if (strcmp(scnname, ".gnu.linkonce.this_module") == 0) { - *is_vmlinux_ret = false; - *is_module_ret = true; - return NULL; - } else if (strcmp(scnname, ".init.text") == 0) { - have_init_text = true; + // attr = attr.battr.attr + err = drgn_object_member(&attr, &attr, "battr"); + if (!err) { + err = drgn_object_member(&attr, &attr, "attr"); + if (err) + return err; + } else { + if (err->code != DRGN_ERROR_LOOKUP) + return err; + drgn_error_destroy(err); } + err = drgn_object_member(&tmp, &attr, "name"); + if (err) + return err; + _cleanup_free_ char *name = NULL; + err = drgn_object_read_c_string(&tmp, &name); + if (err) + return err; + + drgn_log_debug(prog, + "found section %s@0x%" PRIx64 " in sect_attrs", + name, address); + err = drgn_module_set_section_address(module, name, address); + if (err) + return err; } - *is_vmlinux_ret = have_init_text; - *is_module_ret = false; return NULL; } -DEFINE_HASH_MAP(elf_scn_name_map, const char *, Elf_Scn *, - c_string_key_hash_pair, c_string_key_eq); - static struct drgn_error * -cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf) +kernel_module_find_or_create_internal(const struct drgn_object *module_obj, + struct drgn_module **ret, bool *new_ret, + bool create, bool log) { struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(module_obj); - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - struct elf_scn_name_map scn_map = HASH_TABLE_INIT; - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out_scn_map; - } - - if (!(shdr->sh_flags & SHF_ALLOC)) - continue; + struct drgn_module_key key; + key.kind = DRGN_MODULE_RELOCATABLE; + uint64_t name_offset; + err = drgn_type_offsetof(module_obj->type, "name", &name_offset); + if (err) + return err; + if (name_offset >= drgn_object_size(module_obj) + || !memchr(drgn_object_buffer(module_obj) + name_offset, '\0', + drgn_object_size(module_obj) - name_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't read module name"); + } + key.relocatable.name = drgn_object_buffer(module_obj) + name_offset; - struct elf_scn_name_map_entry entry = { - .key = elf_strptr(elf, shstrndx, shdr->sh_name), - .value = scn, - }; - if (!entry.key) { - err = drgn_error_libelf(); - goto out_scn_map; + DRGN_OBJECT(mem, prog); + DRGN_OBJECT(val, prog); + bool layout_in_module = false; + err = drgn_object_member(&mem, module_obj, "mem"); + if (!err) { + // Since Linux kernel commit ac3b43283923 ("module: replace + // module_layout with module_memory") (in v6.4), the base and + // size are in the `struct module_memory mem[MOD_TEXT]` member + // of `struct module`. + if (!prog->mod_text_cached) { + err = drgn_program_find_object(prog, "MOD_TEXT", NULL, + DRGN_FIND_OBJECT_CONSTANT, + &val); + if (err) + return err; + union drgn_value mod_text_value; + err = drgn_object_read_integer(&val, &mod_text_value); + if (err) + return err; + prog->mod_text = mod_text_value.uvalue; + prog->mod_text_cached = true; } - - if (elf_scn_name_map_insert(&scn_map, &entry, NULL) == -1) { - err = &drgn_enomem; - goto out_scn_map; + err = drgn_object_subscript(&mem, &mem, prog->mod_text); + if (err) + return err; + } else { + if (err->code != DRGN_ERROR_LOOKUP) + return err; + drgn_error_destroy(err); + // Between that and Linux kernel commit 7523e4dc5057 ("module: + // use a structure to encapsulate layout.") (in v4.5), the base + // and size are in the `struct module_layout core_layout` member + // of `struct module`. + err = drgn_object_member(&mem, module_obj, "core_layout"); + if (err) { + if (err->code != DRGN_ERROR_LOOKUP) + return err; + drgn_error_destroy(err); + // Before that, they are directly in the `struct + // module`. + layout_in_module = true; } } - - struct kernel_module_section_iterator section_it; - err = kernel_module_section_iterator_init(§ion_it, kmod_it); + if (layout_in_module) + err = drgn_object_member(&val, module_obj, "module_core"); + else + err = drgn_object_member(&val, &mem, "base"); if (err) - goto out_scn_map; - const char *name; - uint64_t address; - while (!(err = kernel_module_section_iterator_next(§ion_it, &name, - &address))) { - struct elf_scn_name_map_iterator it = - elf_scn_name_map_search(&scn_map, &name); - if (it.entry) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(it.entry->value, - &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - break; - } - shdr->sh_addr = address; - if (!gelf_update_shdr(it.entry->value, shdr)) { - err = drgn_error_libelf(); - break; - } - } + return err; + err = drgn_object_read_unsigned(&val, &key.relocatable.address); + if (err) + return err; + + if (log) { + drgn_log_debug(prog, "found loaded kernel module %s@0x%" PRIx64, + key.relocatable.name, key.relocatable.address); } - if (err && err != &drgn_stop) - goto out_section_it; - err = NULL; -out_section_it: - kernel_module_section_iterator_deinit(§ion_it); -out_scn_map: - elf_scn_name_map_deinit(&scn_map); - return err; -} -struct kernel_module_file { - const char *path; - int fd; - Elf *elf; - /* - * Kernel module build ID. This is owned by the Elf handle. Because we - * use this as the key in the kernel_module_table, the file must always - * be removed from the table before it is reported to the DWARF index - * (which takes ownership of the Elf handle). - */ - const void *gnu_build_id; - size_t gnu_build_id_len; - /* Next file with the same build ID. */ - struct kernel_module_file *next; -}; + if (!create) { + *ret = drgn_module_find(prog, &key); + if (new_ret) + *new_ret = false; + return NULL; + } -static struct nstring -kernel_module_table_key(struct kernel_module_file * const *entry) -{ - return (struct nstring){ - (*entry)->gnu_build_id, (*entry)->gnu_build_id_len - }; -} + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create(prog, &key, key.relocatable.name, + &module, &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } -DEFINE_HASH_TABLE(kernel_module_table, struct kernel_module_file *, - kernel_module_table_key, nstring_hash_pair, nstring_eq); + if (layout_in_module) + err = drgn_object_member(&val, module_obj, "core_size"); + else + err = drgn_object_member(&val, &mem, "size"); + if (err) + return err; + uint64_t size; + err = drgn_object_read_unsigned(&val, &size); + if (err) + return err; -static struct drgn_error * -report_loaded_kernel_module(struct drgn_debug_info_load_state *load, - struct kernel_module_iterator *kmod_it, - struct kernel_module_table *kmod_table) -{ - struct drgn_error *err; + drgn_log_debug(prog, "module size is %" PRIu64, size); + err = drgn_module_set_address_range(module, key.relocatable.address, + key.relocatable.address + size); + if (err) + return err; - struct nstring key; - err = kernel_module_iterator_gnu_build_id(kmod_it, - (const void **)&key.str, - &key.len); - if (err || key.len == 0) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find GNU build ID", - err); + // If we're debugging the running kernel, we can use + // /sys/module/$module/notes and /sys/module/$module/sections instead of + // getting the equivalent information from the core dump. This fast path + // can be disabled via an environment variable for testing. It may also + // be disabled if we encounter permission issues using + // /sys/module/$module/sections. + bool use_sys_module = false; + if (prog->flags & DRGN_PROGRAM_IS_LOCAL) { + char *env = getenv("DRGN_USE_SYS_MODULE"); + use_sys_module = !env || atoi(env); } + err = kernel_module_set_build_id(module, module_obj, use_sys_module); + if (err) + return err; + err = kernel_module_set_section_addresses(module, module_obj, + use_sys_module); + if (err) + return err; - struct hash_pair hp = kernel_module_table_hash(&key); - struct kernel_module_table_iterator it = - kernel_module_table_search_hashed(kmod_table, &key, hp); - if (!it.entry) - return &drgn_not_found; - - struct kernel_module_file *kmod = *it.entry; - kernel_module_table_delete_iterator_hashed(kmod_table, it, hp); - do { - err = cache_kernel_module_sections(kmod_it, kmod->elf); - if (err) { - err = drgn_debug_info_report_error(load, kmod->path, - "could not get section addresses", - err); - if (err) - return err; - goto next; - } - - err = drgn_debug_info_report_elf(load, kmod->path, kmod->fd, - kmod->elf, kmod_it->start, - kmod_it->end, kmod_it->name, - NULL); - kmod->elf = NULL; - kmod->fd = -1; - if (err) - return err; -next: - kmod = kmod->next; - } while (kmod); + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; return NULL; } static struct drgn_error * -report_default_kernel_module(struct drgn_debug_info_load_state *load, - struct kernel_module_iterator *kmod_it, - struct depmod_index *depmod) +drgn_module_find_or_create_linux_kernel_loadable_internal(const struct drgn_object *module_obj, + struct drgn_module **ret, + bool *new_ret, + bool create) { - static const char * const module_paths[] = { - "/usr/lib/debug/lib/modules/%s/%.*s", - "/usr/lib/debug/lib/modules/%s/%.*s.debug", - "/lib/modules/%s/%.*s%.*s", - NULL, - }; struct drgn_error *err; - const char *depmod_path; - size_t depmod_path_len; - err = depmod_index_find(depmod, kmod_it->name, &depmod_path, - &depmod_path_len); - if (err) { - return drgn_debug_info_report_error(load, - "kernel modules", - "could not parse depmod", - err); - } else if (!depmod_path) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find module in depmod", - NULL); + // kernel_module_find_or_create_internal() expects a `struct module` + // value. + struct drgn_object mod; + if (drgn_type_kind(drgn_underlying_type(module_obj->type)) + == DRGN_TYPE_POINTER) { + drgn_object_init(&mod, drgn_object_program(module_obj)); + err = drgn_object_dereference(&mod, module_obj); + if (!err) + err = drgn_object_read(&mod, &mod); + module_obj = &mod; + if (err) + goto out; + } else if (module_obj->kind != DRGN_OBJECT_VALUE) { + drgn_object_init(&mod, drgn_object_program(module_obj)); + err = drgn_object_read(&mod, module_obj); + module_obj = &mod; + if (err) + goto out; } - size_t extension_len; - if (depmod_path_len >= 3 && - (memcmp(depmod_path + depmod_path_len - 3, ".gz", 3) == 0 || - memcmp(depmod_path + depmod_path_len - 3, ".xz", 3) == 0)) - extension_len = 3; - else - extension_len = 0; - char *path; - int fd; - Elf *elf; - err = find_elf_file(&path, &fd, &elf, module_paths, - load->dbinfo->prog->vmcoreinfo.osrelease, - depmod_path_len - extension_len, depmod_path, - extension_len, - depmod_path + depmod_path_len - extension_len); - if (err) - return drgn_debug_info_report_error(load, NULL, NULL, err); - if (!elf) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find .ko", - NULL); - } + err = kernel_module_find_or_create_internal(module_obj, ret, new_ret, + create, false); +out: + if (module_obj == &mod) + drgn_object_deinit(&mod); + return err; +} - err = cache_kernel_module_sections(kmod_it, elf); - if (err) { - err = drgn_debug_info_report_error(load, path, - "could not get section addresses", - err); - elf_end(elf); - close(fd); - free(path); - return err; - } +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret) +{ + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_obj, + ret, + NULL, + false); +} - err = drgn_debug_info_report_elf(load, path, fd, elf, kmod_it->start, - kmod_it->end, kmod_it->name, NULL); - free(path); - return err; +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret, + bool *new_ret) +{ + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_obj, + ret, + new_ret, + true); } static struct drgn_error * -report_loaded_kernel_modules(struct drgn_debug_info_load_state *load, - struct kernel_module_table *kmod_table, - struct depmod_index *depmod, bool use_sys_module) +yield_kernel_module(struct linux_kernel_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) { - struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; + struct drgn_program *prog = it->it.prog; - struct kernel_module_iterator kmod_it; - err = kernel_module_iterator_init(&kmod_it, prog, use_sys_module); - if (err) { -kernel_module_iterator_error: - return drgn_debug_info_report_error(load, "kernel modules", - "could not find loaded kernel modules", - err); - } + DRGN_OBJECT(mod, prog); for (;;) { - err = kernel_module_iterator_next(&kmod_it); - if (err == &drgn_stop) { - err = NULL; - break; - } else if (err) { - kernel_module_iterator_deinit(&kmod_it); - goto kernel_module_iterator_error; + uint64_t addr; + err = drgn_object_read_unsigned(&it->node, &addr); + if (err) { +list_walk_err: + if (!drgn_error_is_fatal(err)) { + drgn_error_log_warning(prog, err, + "can't find remaining kernel modules: " + "couldn't read next module: "); + drgn_error_destroy(err); + *ret = NULL; + err = NULL; + } + return err; } - - /* Look for an explicitly-reported file first. */ - if (kmod_table) { - err = report_loaded_kernel_module(load, &kmod_it, - kmod_table); - if (!err) - continue; - else if (err != &drgn_not_found) - break; + if (addr == it->modules_head) { + drgn_log_debug(prog, + "found end of loaded kernel module list"); + *ret = NULL; + return NULL; } - /* - * If it was not reported explicitly and we're also reporting the - * defaults, look for the module at the standard locations unless we've - * already indexed that module. - */ - if (depmod && - !drgn_debug_info_is_indexed(load->dbinfo, kmod_it.name)) { - if (!depmod->addr) { - err = depmod_index_init(depmod, - prog->vmcoreinfo.osrelease); - if (err) { - depmod->addr = NULL; - err = drgn_debug_info_report_error(load, - "kernel modules", - "could not read depmod", - err); - if (err) - break; - depmod = NULL; - continue; - } - } - err = report_default_kernel_module(load, &kmod_it, - depmod); - if (err) - break; + if (it->module_list_iterations_remaining == 0) { + drgn_log_warning(prog, + "can't find remaining kernel modules: " + "too many entries or cycle in modules list"); + *ret = NULL; + return NULL; } - } - kernel_module_iterator_deinit(&kmod_it); - return err; -} + it->module_list_iterations_remaining--; -static struct drgn_error * -report_kernel_modules(struct drgn_debug_info_load_state *load, - struct kernel_module_file *kmods, size_t num_kmods, - bool vmlinux_is_pending) -{ - struct drgn_program *prog = load->dbinfo->prog; - struct drgn_error *err; + err = drgn_object_container_of(&mod, &it->node, it->module_type, + "list"); + if (err) + goto list_walk_err; - if (!num_kmods && !load->load_default) - return NULL; + err = drgn_object_dereference(&mod, &mod); + if (err) + goto list_walk_err; + // We need several fields from the `struct module`. Especially + // for /proc/kcore, it is faster to read the entire structure + // (which is <2kB as of Linux 6.5) from the core dump all at + // once than it is to read each field individually. + err = drgn_object_read(&mod, &mod); + if (err) + goto list_walk_err; - /* - * If we're debugging the running kernel, we can use - * /sys/module/$module/notes and /sys/module/$module/sections instead of - * getting the equivalent information from the core dump. This fast path - * can be disabled via an environment variable for testing. It may also - * be disabled if we encounter permission issues using - * /sys/module/$module/sections. - */ - bool use_sys_module = false; - if (prog->flags & DRGN_PROGRAM_IS_LOCAL) { - char *env = getenv("DRGN_USE_SYS_MODULE"); - use_sys_module = !env || atoi(env); - } - /* - * We need to index vmlinux now so that we can walk the list of modules - * in the kernel. - */ - if (vmlinux_is_pending) { - err = drgn_debug_info_report_flush(load); + err = drgn_object_member(&it->node, &mod, "list"); if (err) - return err; - } + goto list_walk_err; + err = drgn_object_member(&it->node, &it->node, "next"); + if (err) + goto list_walk_err; - struct kernel_module_table kmod_table = HASH_TABLE_INIT; - struct depmod_index depmod; - depmod.addr = NULL; - struct kernel_module_table_iterator it; - for (size_t i = 0; i < num_kmods; i++) { - struct kernel_module_file *kmod = &kmods[i]; - - ssize_t build_id_len = - drgn_elf_gnu_build_id(kmod->elf, &kmod->gnu_build_id); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, kmod->path, - NULL, - drgn_error_libelf()); - if (err) - goto out; + err = kernel_module_find_or_create_internal(&mod, ret, new_ret, + true, true); + if (err && !drgn_error_is_fatal(err)) { + drgn_error_log_warning(prog, err, "ignoring module: "); + drgn_error_destroy(err); continue; } - kmod->gnu_build_id_len = build_id_len; - - struct nstring key = kernel_module_table_key(&kmod); - struct hash_pair hp = kernel_module_table_hash(&key); - it = kernel_module_table_search_hashed(&kmod_table, &key, hp); - if (it.entry) { - kmod->next = *it.entry; - *it.entry = kmod; - } else { - if (kernel_module_table_insert_searched(&kmod_table, - &kmod, hp, - NULL) == -1) { - err = &drgn_enomem; - goto out; - } - kmod->next = NULL; - } - } - - err = report_loaded_kernel_modules(load, num_kmods ? &kmod_table : NULL, - load->load_default ? &depmod : NULL, - use_sys_module); - if (err) - goto out; - - /* Anything left over was not loaded. */ - for (it = kernel_module_table_first(&kmod_table); it.entry; ) { - struct kernel_module_file *kmod = *it.entry; - it = kernel_module_table_delete_iterator(&kmod_table, it); - do { - err = drgn_debug_info_report_elf(load, kmod->path, - kmod->fd, kmod->elf, 0, - 0, kmod->path, NULL); - kmod->elf = NULL; - kmod->fd = -1; - if (err) - goto out; - kmod = kmod->next; - } while (kmod); - } - err = NULL; -out: - if (depmod.addr) - depmod_index_deinit(&depmod); - kernel_module_table_deinit(&kmod_table); - return err; -} - -static struct drgn_error * -report_vmlinux(struct drgn_debug_info_load_state *load, - bool *vmlinux_is_pending) -{ - static const char * const vmlinux_paths[] = { - /* - * The files under /usr/lib/debug should always have debug - * information, so check for those first. - */ - "/usr/lib/debug/boot/vmlinux-%s", - "/usr/lib/debug/lib/modules/%s/vmlinux", - "/boot/vmlinux-%s", - "/lib/modules/%s/build/vmlinux", - "/lib/modules/%s/vmlinux", - NULL, - }; - struct drgn_program *prog = load->dbinfo->prog; - struct drgn_error *err; - - char *path; - int fd; - Elf *elf; - err = find_elf_file(&path, &fd, &elf, vmlinux_paths, - prog->vmcoreinfo.osrelease); - if (err) - return drgn_debug_info_report_error(load, NULL, NULL, err); - if (!elf) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "could not find vmlinux for %s", - prog->vmcoreinfo.osrelease); - return drgn_debug_info_report_error(load, "kernel", NULL, err); - } - - uint64_t start, end; - err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, - &end); - if (err) { - err = drgn_debug_info_report_error(load, path, NULL, err); - elf_end(elf); - close(fd); - free(path); return err; } - - err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, - "kernel", vmlinux_is_pending); - free(path); - return err; } -struct drgn_error * -linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load) +static struct drgn_error * +linux_kernel_loaded_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) { - struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; + struct linux_kernel_loaded_module_iterator *it = + container_of(_it, struct linux_kernel_loaded_module_iterator, it); + struct drgn_program *prog = it->it.prog; - struct kernel_module_file *kmods; - if (load->num_paths) { - kmods = malloc_array(load->num_paths, sizeof(*kmods)); - if (!kmods) - return &drgn_enomem; - } else { - kmods = NULL; + if (!it->yielded_vmlinux) { + it->yielded_vmlinux = true; + return yield_vmlinux(it, ret, new_ret); } - /* - * We may need to index vmlinux before we can properly report kernel - * modules. So, this sets aside kernel modules and reports everything - * else. - */ - size_t num_kmods = 0; - bool vmlinux_is_pending = false; - for (size_t i = 0; i < load->num_paths; i++) { - const char *path = load->paths[i]; - int fd; - Elf *elf; - err = open_elf_file(path, &fd, &elf); - if (err) { - err = drgn_debug_info_report_error(load, path, NULL, - err); - if (err) - goto out; - continue; + // Start the module list walk if we haven't yet. + if (!it->module_type.type) { + for (int attempt = 1; attempt <= 2; attempt++) { + err = drgn_program_find_type(prog, "struct module", + NULL, &it->module_type); + if (!err) { + err = drgn_program_find_object(prog, "modules", + NULL, + DRGN_FIND_OBJECT_VARIABLE, + &it->node); + } + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + if (attempt == 1 && prog->dbinfo.main_module) { + struct drgn_module *module = + prog->dbinfo.main_module; + if (module->debug_file_status + == DRGN_MODULE_FILE_DONT_WANT) { + module->debug_file_status = + DRGN_MODULE_FILE_WANT; + } + if (drgn_module_wants_debug_file(module)) { + err = drgn_load_module_debug_info(&module, + &(size_t){1}); + if (err) + return err; + continue; + } + } + if (!prog->dbinfo.main_module + || drgn_module_wants_debug_file(prog->dbinfo.main_module)) { + drgn_log_warning(prog, + "can't find loaded modules without kernel debug info"); + } else { + drgn_log_debug(prog, + "kernel does not have loadable module support"); + } + *ret = NULL; + return NULL; + } else if (err) { + return err; + } } - - bool is_vmlinux, is_module; - err = identify_kernel_elf(elf, &is_vmlinux, &is_module); - if (err) { - err = drgn_debug_info_report_error(load, path, NULL, - err); - elf_end(elf); - close(fd); - if (err) - goto out; - continue; + if (it->node.kind != DRGN_OBJECT_REFERENCE) { + drgn_log_warning(prog, + "can't find kernel modules: " + "can't get address of modules list"); + *ret = NULL; + return NULL; } - if (is_module) { - struct kernel_module_file *kmod = &kmods[num_kmods++]; - kmod->path = path; - kmod->fd = fd; - kmod->elf = elf; - } else if (is_vmlinux) { - uint64_t start, end; - err = elf_address_range(elf, - prog->vmcoreinfo.kaslr_offset, - &start, &end); - if (err) { - elf_end(elf); - close(fd); - err = drgn_debug_info_report_error(load, path, - NULL, err); - if (err) - goto out; - continue; - } - - bool is_new; - err = drgn_debug_info_report_elf(load, path, fd, elf, - start, end, "kernel", - &is_new); - if (err) - goto out; - if (is_new) - vmlinux_is_pending = true; - } else { - err = drgn_debug_info_report_elf(load, path, fd, elf, 0, - 0, NULL, NULL); - if (err) - goto out; + it->modules_head = it->node.address; + err = drgn_object_member(&it->node, &it->node, "next"); + if (!err) + err = drgn_object_read(&it->node, &it->node); + if (err) { + if (drgn_error_is_fatal(err)) + return err; + drgn_error_log_warning(prog, err, + "can't find kernel modules: " + "couldn't read modules list: "); + drgn_error_destroy(err); + *ret = NULL; + return NULL; } } - if (load->load_main && !vmlinux_is_pending && - !drgn_debug_info_is_indexed(load->dbinfo, "kernel")) { - err = report_vmlinux(load, &vmlinux_is_pending); - if (err) - goto out; - } + return yield_kernel_module(it, ret, new_ret); +} - err = report_kernel_modules(load, kmods, num_kmods, vmlinux_is_pending); -out: - for (size_t i = 0; i < num_kmods; i++) { - elf_end(kmods[i].elf); - if (kmods[i].fd != -1) - close(kmods[i].fd); - } - free(kmods); - return err; +struct drgn_error * +linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct linux_kernel_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->it, prog, + linux_kernel_loaded_module_iterator_destroy, + linux_kernel_loaded_module_iterator_next); + it->module_list_iterations_remaining = MAX_MODULE_LIST_ITERATIONS; + drgn_object_init(&it->node, prog); + *ret = &it->it; + return NULL; } diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 78002f5aa..16e6f2866 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -6,7 +6,8 @@ #include "drgn_internal.h" -struct drgn_debug_info_load_state; +struct depmod_index; +struct drgn_module_standard_files_state; struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog); @@ -23,8 +24,19 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); +void depmod_index_deinit(struct depmod_index *depmod); + +struct drgn_error * +linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +struct drgn_error * +drgn_module_try_vmlinux_files(struct drgn_module *module, + struct drgn_module_standard_files_state *state); + struct drgn_error * -linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load); +drgn_module_try_linux_kmod_files(struct drgn_module *module, + struct drgn_module_standard_files_state *state); #define KDUMP_SIGNATURE "KDUMP " #define KDUMP_SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 809fd6b50..d30b4f6da 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -302,6 +302,10 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) return NULL; } + err = drgn_elf_file_apply_relocations(module->debug_file); + if (err) + return err; + // Since Linux kernel b9f174c811e3 ("x86/unwind/orc: Add ELF section // with ORC version identifier") (in v6.4), which was also backported to // Linux 6.3.10, vmlinux and kernel modules have a .orc_header ELF diff --git a/libdrgn/program.c b/libdrgn/program.c index 0f3e4db1b..c06c55419 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -28,10 +27,12 @@ #include "language.h" #include "log.h" #include "linux_kernel.h" +#include "log.h" #include "memory_reader.h" #include "minmax.h" #include "object.h" #include "program.h" +#include "serialize.h" #include "symbol.h" #include "util.h" #include "vector.h" @@ -77,7 +78,27 @@ drgn_program_platform(struct drgn_program *prog) LIBDRGN_PUBLIC const struct drgn_language * drgn_program_language(struct drgn_program *prog) { - return prog->lang ? prog->lang : &drgn_default_language; + if (prog->lang) + return prog->lang; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { + prog->lang = &drgn_language_c; + return prog->lang; + } + if (!prog->tried_main_language) { + prog->tried_main_language = true; + prog->lang = drgn_debug_info_main_language(&prog->dbinfo); + if (prog->lang) { + drgn_log_debug(prog, + "set default language to %s from main()", + prog->lang->name); + return prog->lang; + } else { + drgn_log_debug(prog, + "couldn't find language of main(); defaulting to %s", + drgn_default_language.name); + } + } + return &drgn_default_language; } LIBDRGN_PUBLIC void drgn_program_set_language(struct drgn_program *prog, @@ -760,63 +781,90 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -/* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_program *prog) +struct drgn_error *drgn_program_cache_auxv(struct drgn_program *prog) { - struct drgn_error *err; + if (prog->auxv_cached) + return NULL; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - return; - const struct drgn_language *lang; - err = drgn_debug_info_main_language(&prog->dbinfo, &lang); - if (err) { - drgn_error_destroy(err); - return; + _cleanup_close_ int fd = -1; + const void *note; + size_t note_size; +#define FORMAT "/proc/%ld/auxv" + char path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + if (drgn_program_is_userspace_process(prog)) { + snprintf(path, sizeof(path), FORMAT, (long)prog->pid); +#undef FORMAT + fd = open(path, O_RDONLY); + if (fd < 0) + return drgn_error_create_os("open", errno, path); + drgn_log_debug(prog, "parsing %s", path); + } else { + assert(drgn_program_is_userspace_core(prog)); + if (find_elf_note(prog->core, "CORE", NT_AUXV, ¬e, + ¬e_size)) + return drgn_error_libelf(); + if (!note) { + return drgn_error_create(DRGN_ERROR_OTHER, + "core file is missing NT_AUXV"); + } + drgn_log_debug(prog, "parsing NT_AUXV"); } - if (lang) - prog->lang = lang; -} -static int drgn_set_platform_from_dwarf(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - Dwarf *dwarf, Dwarf_Addr bias, - void *arg) -{ - Elf *elf; - GElf_Ehdr ehdr_mem, *ehdr; - struct drgn_platform platform; + memset(&prog->auxv, 0, sizeof(prog->auxv)); - elf = dwarf_getelf(dwarf); - if (!elf) - return DWARF_CB_OK; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return DWARF_CB_OK; - drgn_platform_from_elf(ehdr, &platform); - drgn_program_set_platform(arg, &platform); - return DWARF_CB_ABORT; -} - -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, - size_t n, bool load_default, bool load_main) -{ - struct drgn_error *err; - - if (!n && !load_default && !load_main) - return NULL; - - drgn_blocking_guard(prog); - err = drgn_debug_info_load(&prog->dbinfo, paths, n, load_default, load_main); - if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { - if (!prog->lang) - drgn_program_set_language_from_main(prog); - if (!prog->has_platform) { - dwfl_getdwarf(prog->dbinfo.dwfl, - drgn_set_platform_from_dwarf, prog, 0); + bool is_64_bit = drgn_platform_is_64_bit(&prog->platform); + bool bswap = drgn_platform_bswap(&prog->platform); + size_t aux_size = is_64_bit ? 16 : 8; +#define visit_aux_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(a_type); \ + visit_scalar_member(a_un.a_val); \ +} while (0) + for (;;) { + Elf64_auxv_t auxv; + if (fd >= 0) { + ssize_t r = read_all(fd, &auxv, aux_size); + if (r < 0) + return drgn_error_create_os("read", errno, path); + if (r < aux_size) + break; + deserialize_struct64_inplace(&auxv, Elf32_auxv_t, + visit_aux_members, + is_64_bit, bswap); + } else { + if (note_size < aux_size) + break; + deserialize_struct64(&auxv, Elf32_auxv_t, + visit_aux_members, note, is_64_bit, + bswap); + note = (char *)note + aux_size; + note_size -= aux_size; + } + if (auxv.a_type == 0 && auxv.a_un.a_val == 0) + break; + switch (auxv.a_type) { + case AT_PHDR: + drgn_log_debug(prog, "found AT_PHDR 0x%" PRIx64, + auxv.a_un.a_val); + prog->auxv.at_phdr = auxv.a_un.a_val; + break; + case AT_PHNUM: + drgn_log_debug(prog, "found AT_PHNUM %" PRIu64, + auxv.a_un.a_val); + prog->auxv.at_phnum = auxv.a_un.a_val; + break; + case AT_SYSINFO_EHDR: + drgn_log_debug(prog, "found AT_SYSINFO_EHDR 0x%" PRIx64, + auxv.a_un.a_val); + prog->auxv.at_sysinfo_ehdr = auxv.a_un.a_val; + break; } } - return err; +#undef visit_aux_members + prog->auxv_cached = true; + return NULL; } static struct drgn_error *get_prstatus_pid(struct drgn_program *prog, const char *data, diff --git a/libdrgn/program.h b/libdrgn/program.h index 7092db913..1dfe47fee 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -30,7 +30,9 @@ #include "vector.h" struct drgn_object_finder; +struct drgn_symbol; struct drgn_symbol_finder; +struct drgn_type_finder; /** * @defgroup Internals Internals @@ -118,6 +120,11 @@ struct drgn_program { /* Default language of the program. */ const struct drgn_language *lang; struct drgn_platform platform; + /** + * Whether we have tried determining the default language from "main" + * since the last time that debug info was added. + */ + bool tried_main_language; bool has_platform; enum drgn_program_flags flags; @@ -147,6 +154,13 @@ struct drgn_program { struct { /** Cached `pr_fname` from `NT_PRPSINFO` note. */ const char *core_dump_fname_cached; + /** Cache of important parts of auxiliary vector. */ + struct { + uint64_t at_phdr; + uint64_t at_phnum; + uint64_t at_sysinfo_ehdr; + } auxv; + bool auxv_cached; }; /* @@ -295,6 +309,8 @@ struct drgn_error *drgn_program_init_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid); +struct drgn_error *drgn_program_cache_auxv(struct drgn_program *prog); + /** * Return whether a @ref drgn_program is a userspace process running on the * local machine. diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 97ace8b76..41a2ba6d3 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -45,6 +45,10 @@ static inline PyObject *PyObject_CallNoArgs(PyObject *func) { return PyObject_CallFunctionObjArgs(func, NULL); } +static inline PyObject *PyObject_CallOneArg(PyObject *callable, PyObject *arg) +{ + return PyObject_CallFunctionObjArgs(callable, arg, NULL); +} #endif #if PY_VERSION_HEX < 0x030d00a1 @@ -136,6 +140,26 @@ typedef struct { const struct drgn_language *language; } Language; +typedef struct { + PyObject_HEAD + struct drgn_module *module; +} Module; + +typedef struct { + PyObject_HEAD + struct drgn_module_iterator *it; +} ModuleIterator; + +typedef struct { + PyObject_HEAD + struct drgn_module *module; +} ModuleSectionAddresses; + +typedef struct { + PyObject_HEAD + struct drgn_module_section_address_iterator *it; +} ModuleSectionAddressesIterator; + typedef struct { PyObject_HEAD DrgnObject *obj; @@ -241,33 +265,45 @@ typedef struct { extern PyObject *Architecture_class; extern PyObject *FindObjectFlags_class; +extern PyObject *ModuleFileStatus_class; +extern PyObject *ModuleSectionAddresses_class; extern PyObject *PlatformFlags_class; extern PyObject *PrimitiveType_class; extern PyObject *ProgramFlags_class; extern PyObject *Qualifiers_class; +extern PyObject *SupplementaryFileKind_class; extern PyObject *SymbolBinding_class; extern PyObject *SymbolKind_class; extern PyObject *TypeKind_class; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; +extern PyTypeObject ExtraModule_type; extern PyTypeObject FaultError_type; extern PyTypeObject Language_type; +extern PyTypeObject MainModule_type; +extern PyTypeObject ModuleIteratorWithNew_type; +extern PyTypeObject ModuleIterator_type; +extern PyTypeObject ModuleSectionAddressesIterator_type; +extern PyTypeObject Module_type; extern PyTypeObject ObjectIterator_type; extern PyTypeObject Platform_type; extern PyTypeObject Program_type; extern PyTypeObject Register_type; +extern PyTypeObject RelocatableModule_type; +extern PyTypeObject SharedLibraryModule_type; extern PyTypeObject StackFrame_type; extern PyTypeObject StackTrace_type; -extern PyTypeObject Symbol_type; extern PyTypeObject SymbolIndex_type; -extern PyTypeObject Thread_type; +extern PyTypeObject Symbol_type; extern PyTypeObject ThreadIterator_type; +extern PyTypeObject Thread_type; extern PyTypeObject TypeEnumerator_type; -extern PyTypeObject TypeKindSet_type; extern PyTypeObject TypeKindSetIterator_type; +extern PyTypeObject TypeKindSet_type; extern PyTypeObject TypeMember_type; extern PyTypeObject TypeParameter_type; extern PyTypeObject TypeTemplateParameter_type; +extern PyTypeObject VdsoModule_type; extern PyObject *MissingDebugInfoError; extern PyObject *ObjectAbsentError; extern PyObject *OutOfBoundsError; @@ -284,6 +320,11 @@ void *set_error_type_name(const char *format, #define call_tp_alloc(type) ((type *)type##_type.tp_alloc(&type##_type, 0)) +PyObject *Module_wrap(struct drgn_module *module); +PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b); +int add_WantedSupplementaryFile(PyObject *m); +int init_module_section_addresses(void); + PyObject *Language_wrap(const struct drgn_language *language); int language_converter(PyObject *o, void *p); int add_languages(void); @@ -348,7 +389,9 @@ DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds); DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds); int append_string(PyObject *parts, const char *s); +int append_u64_hex(PyObject *parts, uint64_t value); int append_format(PyObject *parts, const char *format, ...); +int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name); PyObject *join_strings(PyObject *parts); // Implementation of _repr_pretty_() for IPython/Jupyter that just calls str(). PyObject *repr_pretty_from_str(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index c103f0dec..34a0aa037 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -293,6 +293,17 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) if (add_module_constants(m) || add_type(m, &Language_type) || add_languages() || add_type(m, &DrgnObject_type) || + add_type(m, &Module_type) || + add_type(m, &MainModule_type) || + add_type(m, &SharedLibraryModule_type) || + add_type(m, &VdsoModule_type) || + add_type(m, &RelocatableModule_type) || + add_type(m, &ExtraModule_type) || + PyType_Ready(&ModuleIterator_type) || + PyType_Ready(&ModuleIteratorWithNew_type) || + add_WantedSupplementaryFile(m) || + init_module_section_addresses() || + PyType_Ready(&ModuleSectionAddressesIterator_type) || PyType_Ready(&ObjectIterator_type) || add_type(m, &Platform_type) || add_type(m, &Program_type) || diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c new file mode 100644 index 000000000..06067f845 --- /dev/null +++ b/libdrgn/python/module.c @@ -0,0 +1,593 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../util.h" + +static PyObject *WantedSupplementaryFile_class; + +int add_WantedSupplementaryFile(PyObject *m) +{ + _cleanup_pydecref_ PyObject *collections = + PyImport_ImportModule("collections"); + _cleanup_pydecref_ PyObject *namedtuple = + PyObject_GetAttrString(collections, "namedtuple"); + if (!namedtuple) + return -1; + WantedSupplementaryFile_class = + PyObject_CallFunction(namedtuple, "s[ssss]", + "WantedSupplementaryFile", "kind", "path", + "supplementary_path", "checksum"); + if (!WantedSupplementaryFile_class) + return -1; + Py_INCREF(WantedSupplementaryFile_class); + if (PyModule_AddObject(m, "WantedSupplementaryFile", + WantedSupplementaryFile_class) == -1) { + Py_DECREF(WantedSupplementaryFile_class); + Py_DECREF(WantedSupplementaryFile_class); + return -1; + } + return 0; +} + +PyObject *Module_wrap(struct drgn_module *module) +{ + PyTypeObject *type; + SWITCH_ENUM(drgn_module_kind(module)) { + case DRGN_MODULE_MAIN: + type = &MainModule_type; + break; + case DRGN_MODULE_SHARED_LIBRARY: + type = &SharedLibraryModule_type; + break; + case DRGN_MODULE_VDSO: + type = &VdsoModule_type; + break; + case DRGN_MODULE_RELOCATABLE: + type = &RelocatableModule_type; + break; + case DRGN_MODULE_EXTRA: + type = &ExtraModule_type; + break; + default: + UNREACHABLE(); + } + Module *ret = (Module *)type->tp_alloc(type, 0); + if (ret) { + struct drgn_program *prog = drgn_module_program(module); + Py_INCREF(container_of(prog, Program, prog)); + ret->module = module; + } + return (PyObject *)ret; +} + +PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b) +{ + return Py_BuildValue("NO", Module_wrap(module), b ? Py_True : Py_False); +} + +static void Module_dealloc(Module *self) +{ + if (self->module) { + struct drgn_program *prog = drgn_module_program(self->module); + Py_DECREF(container_of(prog, Program, prog)); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int append_module_repr_common(PyObject *parts, Module *self, + const char *method_name) +{ + if (append_format(parts, "prog.%s_module(name=", method_name) < 0 || + append_attr_repr(parts, (PyObject *)self, "name") < 0) + return -1; + return 0; +} + +static PyObject *Module_repr(Module *self) +{ + struct drgn_module_key key = drgn_module_key(self->module); + + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + + SWITCH_ENUM(key.kind) { + case DRGN_MODULE_MAIN: + if (append_module_repr_common(parts, self, "main") < 0) + return NULL; + break; + case DRGN_MODULE_SHARED_LIBRARY: + if (append_module_repr_common(parts, self, + "shared_library") + || append_string(parts, ", dynamic_address=") + || append_u64_hex(parts, + key.shared_library.dynamic_address)) + return NULL; + break; + case DRGN_MODULE_VDSO: + if (append_module_repr_common(parts, self, "vdso") + || append_string(parts, ", dynamic_address=") + || append_u64_hex(parts, key.vdso.dynamic_address)) + return NULL; + break; + case DRGN_MODULE_RELOCATABLE: + if (append_module_repr_common(parts, self, "relocatable") + || append_string(parts, ", address=") + || append_u64_hex(parts, key.relocatable.address)) + return NULL; + break; + case DRGN_MODULE_EXTRA: + if (append_module_repr_common(parts, self, "extra") + || append_string(parts, ", id=") + || append_u64_hex(parts, key.extra.id)) + return NULL; + break; + default: + UNREACHABLE(); + } + if (append_string(parts, ")")) + return NULL; + return join_strings(parts); +} + +static PyObject *Module_richcompare(Module *self, PyObject *other, int op) +{ + if ((op != Py_EQ && op != Py_NE) || + !PyObject_TypeCheck(other, &Module_type)) + Py_RETURN_NOTIMPLEMENTED; + int ret = self->module == ((Module *)other)->module; + if (op == Py_NE) + ret = !ret; + Py_RETURN_BOOL(ret); +} + +static Py_hash_t Module_hash(Module *self) +{ + return _Py_HashPointer(self->module); +} + +static PyObject *Module_wanted_supplementary_debug_file(Module *self) +{ + const char *debug_file_path, *supplementary_path; + const void *checksum; + size_t checksum_len; + enum drgn_supplementary_file_kind kind = + drgn_module_wanted_supplementary_debug_file(self->module, + &debug_file_path, + &supplementary_path, + &checksum, + &checksum_len); + if (kind == DRGN_SUPPLEMENTARY_FILE_NONE) { + return PyErr_Format(PyExc_ValueError, + "module does not want supplementary debug file"); + } + return PyObject_CallFunction(WantedSupplementaryFile_class, + "NO&O&y#", + PyObject_CallFunction(SupplementaryFileKind_class, + "k", + (unsigned long)kind), + PyUnicode_DecodeFSDefault, debug_file_path, + PyUnicode_DecodeFSDefault, + supplementary_path, checksum, + (Py_ssize_t)checksum_len); +} + +static PyObject *Module_try_file(Module *self, PyObject *args, PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = { "path", "fd", "force", NULL }; + struct path_arg path = {}; + int fd = -1; + int force = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|$ip:try_file", keywords, + path_converter, &path, &fd, &force)) + return NULL; + err = drgn_module_try_file(self->module, path.path, fd, force); + path_cleanup(&path); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + +static Program *Module_get_prog(Module *self, void *arg) +{ + Program *prog = + container_of(drgn_module_program(self->module), Program, prog); + Py_INCREF(prog); + return prog; +} + +static PyObject *Module_get_name(Module *self, void *arg) +{ + return PyUnicode_DecodeFSDefault(drgn_module_name(self->module)); +} + +static PyObject *Module_get_address_range(Module *self, void *arg) +{ + uint64_t start, end; + if (!drgn_module_address_range(self->module, &start, &end)) + Py_RETURN_NONE; + return Py_BuildValue("KK", (unsigned long long)start, + (unsigned long long)end); +} + +static int Module_set_address_range(Module *self, PyObject *value, void *arg) +{ + struct drgn_error *err; + if (value == Py_None) { + err = drgn_module_set_address_range(self->module, -1, -1); + } else { + if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) { + PyErr_SetString(PyExc_TypeError, + "address_range must be (int, int) or None"); + return -1; + } + _cleanup_pydecref_ PyObject *start_obj = + PyNumber_Index(PyTuple_GET_ITEM(value, 0)); + if (!start_obj) + return -1; + _cleanup_pydecref_ PyObject *end_obj = + PyNumber_Index(PyTuple_GET_ITEM(value, 1)); + if (!end_obj) + return -1; + uint64_t start = PyLong_AsUint64(start_obj); + uint64_t end = PyLong_AsUint64(end_obj); + if (start == UINT64_MAX && end == UINT64_MAX) { + PyErr_SetString(PyExc_ValueError, + "invalid module address range"); + return -1; + } + err = drgn_module_set_address_range(self->module, start, end); + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static PyObject *Module_get_build_id(Module *self, void *arg) +{ + const void *build_id; + size_t build_id_len; + if (!drgn_module_build_id(self->module, &build_id, &build_id_len)) + Py_RETURN_NONE; + return PyBytes_FromStringAndSize(build_id, build_id_len); +} + +static int Module_set_build_id(Module *self, PyObject *value, void *arg) +{ + struct drgn_error *err; + if (value == Py_None) { + err = drgn_module_set_build_id(self->module, NULL, 0); + } else { + Py_buffer buffer; + int ret = PyObject_GetBuffer(value, &buffer, PyBUF_SIMPLE); + if (ret) + return ret; + + if (buffer.len == 0) { + PyErr_SetString(PyExc_ValueError, + "build ID cannot be empty"); + PyBuffer_Release(&buffer); + return -1; + } + + err = drgn_module_set_build_id(self->module, buffer.buf, + buffer.len); + PyBuffer_Release(&buffer); + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +#define MODULE_FILE_STATUS_GETSET(which) \ +static PyObject *Module_wants_##which##_file(Module *self) \ +{ \ + Py_RETURN_BOOL(drgn_module_wants_##which##_file(self->module)); \ +} \ + \ +static PyObject *Module_get_##which##_file_status(Module *self, void *arg) \ +{ \ + return PyObject_CallFunction(ModuleFileStatus_class, "i", \ + (int)drgn_module_##which##_file_status(self->module));\ +} \ + \ +static int Module_set_##which##_file_status(Module *self, PyObject *value, \ + void *arg) \ +{ \ + if (!PyObject_TypeCheck(value, \ + (PyTypeObject *)ModuleFileStatus_class)) { \ + PyErr_SetString(PyExc_TypeError, \ + #which "_file_status must be ModuleFileStatus");\ + return -1; \ + } \ + _cleanup_pydecref_ PyObject *value_obj = \ + PyObject_GetAttrString(value, "value"); \ + if (!value_obj) \ + return -1; \ + long status = PyLong_AsLong(value_obj); \ + if (status == -1 && PyErr_Occurred()) \ + return -1; \ + \ + if (drgn_module_set_##which##_file_status(self->module, status)) \ + return 0; \ + \ + _cleanup_pydecref_ PyObject *old_status = \ + Module_get_##which##_file_status(self, NULL); \ + if (!old_status) \ + return -1; \ + PyErr_Format(PyExc_ValueError, \ + "cannot change " #which "_file_status from %S to %S", \ + old_status, value); \ + return -1; \ +} +MODULE_FILE_STATUS_GETSET(loaded) +MODULE_FILE_STATUS_GETSET(debug) + +static PyObject *Module_get_loaded_file_path(Module *self, void *arg) +{ + const char *path = drgn_module_loaded_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyObject *Module_get_loaded_file_bias(Module *self, void *arg) +{ + if (!drgn_module_loaded_file_path(self->module)) + Py_RETURN_NONE; + return PyLong_FromUint64(drgn_module_loaded_file_bias(self->module)); +} + +static PyObject *Module_get_debug_file_path(Module *self, void *arg) +{ + const char *path = drgn_module_debug_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyObject *Module_get_debug_file_bias(Module *self, void *arg) +{ + if (!drgn_module_debug_file_path(self->module)) + Py_RETURN_NONE; + return PyLong_FromUint64(drgn_module_debug_file_bias(self->module)); +} + +static PyObject *Module_get_supplementary_debug_file_kind(Module *self, + void *arg) +{ + enum drgn_supplementary_file_kind kind = + drgn_module_supplementary_debug_file_kind(self->module); + if (kind == DRGN_SUPPLEMENTARY_FILE_NONE) + Py_RETURN_NONE; + return PyObject_CallFunction(SupplementaryFileKind_class, "k", + (unsigned long)kind); +} + +static PyObject *Module_get_supplementary_debug_file_path(Module *self, + void *arg) +{ + const char *path = + drgn_module_supplementary_debug_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyMethodDef Module_methods[] = { + {"wants_loaded_file", (PyCFunction)Module_wants_loaded_file, + METH_NOARGS, drgn_Module_wants_loaded_file_DOC}, + {"wants_debug_file", (PyCFunction)Module_wants_debug_file, METH_NOARGS, + drgn_Module_wants_debug_file_DOC}, + {"wanted_supplementary_debug_file", + (PyCFunction)Module_wanted_supplementary_debug_file, METH_NOARGS, + drgn_Module_wanted_supplementary_debug_file_DOC}, + {"try_file", (PyCFunction)Module_try_file, + METH_VARARGS | METH_KEYWORDS, drgn_Module_try_file_DOC}, + {}, +}; + +static PyGetSetDef Module_getset[] = { + {"prog", (getter)Module_get_prog, NULL, drgn_Module_prog_DOC}, + {"name", (getter)Module_get_name, NULL, drgn_Module_name_DOC}, + {"address_range", (getter)Module_get_address_range, + (setter)Module_set_address_range, drgn_Module_address_range_DOC}, + {"build_id", (getter)Module_get_build_id, (setter)Module_set_build_id, + drgn_Module_build_id_DOC}, + {"loaded_file_status", (getter)Module_get_loaded_file_status, + (setter)Module_set_loaded_file_status, + drgn_Module_loaded_file_status_DOC}, + {"loaded_file_path", (getter)Module_get_loaded_file_path, NULL, + drgn_Module_loaded_file_path_DOC}, + {"loaded_file_bias", (getter)Module_get_loaded_file_bias, NULL, + drgn_Module_loaded_file_bias_DOC}, + {"debug_file_status", (getter)Module_get_debug_file_status, + (setter)Module_set_debug_file_status, + drgn_Module_debug_file_status_DOC}, + {"debug_file_path", (getter)Module_get_debug_file_path, NULL, + drgn_Module_debug_file_path_DOC}, + {"debug_file_bias", (getter)Module_get_debug_file_bias, NULL, + drgn_Module_debug_file_bias_DOC}, + {"supplementary_debug_file_kind", + (getter)Module_get_supplementary_debug_file_kind, NULL, + drgn_Module_supplementary_debug_file_kind_DOC}, + {"supplementary_debug_file_path", + (getter)Module_get_supplementary_debug_file_path, NULL, + drgn_Module_supplementary_debug_file_path_DOC}, + {}, +}; + +PyTypeObject Module_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.Module", + .tp_basicsize = sizeof(Module), + .tp_dealloc = (destructor)Module_dealloc, + .tp_repr = (reprfunc)Module_repr, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_doc = drgn_Module_DOC, + .tp_richcompare = (richcmpfunc)Module_richcompare, + .tp_hash = (hashfunc)Module_hash, + .tp_methods = Module_methods, + .tp_getset = Module_getset, +}; + +PyTypeObject MainModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.MainModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_MainModule_DOC, + .tp_base = &Module_type, +}; + +static PyObject *SharedLibraryModule_get_dynamic_address(Module *self, void *arg) +{ + struct drgn_module_key key = drgn_module_key(self->module); + return PyLong_FromUint64(key.shared_library.dynamic_address); +} + +static PyGetSetDef SharedLibraryModule_getset[] = { + {"dynamic_address", (getter)SharedLibraryModule_get_dynamic_address, + NULL, drgn_SharedLibraryModule_dynamic_address_DOC}, + {}, +}; + +PyTypeObject SharedLibraryModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.SharedLibraryModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_SharedLibraryModule_DOC, + .tp_getset = SharedLibraryModule_getset, + .tp_base = &Module_type, +}; + +static PyObject *VdsoModule_get_dynamic_address(Module *self, void *arg) +{ + struct drgn_module_key key = drgn_module_key(self->module); + return PyLong_FromUint64(key.vdso.dynamic_address); +} + +static PyGetSetDef VdsoModule_getset[] = { + {"dynamic_address", (getter)VdsoModule_get_dynamic_address, NULL, + drgn_VdsoModule_dynamic_address_DOC}, + {}, +}; + +PyTypeObject VdsoModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.VdsoModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_VdsoModule_DOC, + .tp_getset = VdsoModule_getset, + .tp_base = &Module_type, +}; + +static PyObject *RelocatableModule_get_address(Module *self, void *arg) +{ + struct drgn_module_key key = drgn_module_key(self->module); + return PyLong_FromUint64(key.relocatable.address); +} + +static PyObject *RelocatableModule_get_section_addresses(PyObject *self, + void *arg) +{ + return PyObject_CallOneArg(ModuleSectionAddresses_class, self); +} + +static PyGetSetDef RelocatableModule_getset[] = { + {"address", (getter)RelocatableModule_get_address, NULL, + drgn_RelocatableModule_address_DOC}, + {"section_addresses", RelocatableModule_get_section_addresses, + NULL, drgn_RelocatableModule_section_addresses_DOC}, + {}, +}; + +PyTypeObject RelocatableModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.RelocatableModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_RelocatableModule_DOC, + .tp_getset = RelocatableModule_getset, + .tp_base = &Module_type, +}; + +static PyObject *ExtraModule_get_id(Module *self, void *arg) +{ + struct drgn_module_key key = drgn_module_key(self->module); + return PyLong_FromUint64(key.extra.id); +} + +static PyGetSetDef ExtraModule_getset[] = { + {"id", (getter)ExtraModule_get_id, NULL, drgn_ExtraModule_id_DOC}, + {}, +}; + +PyTypeObject ExtraModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.ExtraModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_ExtraModule_DOC, + .tp_getset = ExtraModule_getset, + .tp_base = &Module_type, +}; + +static void ModuleIterator_dealloc(ModuleIterator *self) +{ + if (self->it) { + struct drgn_program *prog = + drgn_module_iterator_program(self->it); + Py_DECREF(container_of(prog, Program, prog)); + drgn_module_iterator_destroy(self->it); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *ModuleIterator_next(ModuleIterator *self) +{ + struct drgn_error *err; + struct drgn_module *module; + err = drgn_module_iterator_next(self->it, &module, NULL); + if (err) + return set_drgn_error(err); + if (!module) + return NULL; + return Module_wrap(module); +} + +static PyObject *ModuleIteratorWithNew_next(ModuleIterator *self) +{ + struct drgn_error *err; + struct drgn_module *module; + bool new; + err = drgn_module_iterator_next(self->it, &module, &new); + if (err) + return set_drgn_error(err); + if (!module) + return NULL; + return Module_and_bool_wrap(module, new); +} + +PyTypeObject ModuleIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleIterator", + .tp_basicsize = sizeof(ModuleIterator), + .tp_dealloc = (destructor)ModuleIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleIterator_next, +}; + +PyTypeObject ModuleIteratorWithNew_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleIteratorWithNew", + .tp_basicsize = sizeof(ModuleIterator), + .tp_dealloc = (destructor)ModuleIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleIteratorWithNew_next, +}; diff --git a/libdrgn/python/module_section_addresses.c b/libdrgn/python/module_section_addresses.c new file mode 100644 index 000000000..76d75fc6d --- /dev/null +++ b/libdrgn/python/module_section_addresses.c @@ -0,0 +1,260 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../cleanup.h" +#include "../util.h" + +PyObject *ModuleSectionAddresses_class; + +static ModuleSectionAddresses *ModuleSectionAddresses_new(PyTypeObject *subtype, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"module", NULL}; + Module *module; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!:_ModuleSectionAddresses", keywords, + &Module_type, &module)) + return NULL; + ModuleSectionAddresses *ret = + (ModuleSectionAddresses *)subtype->tp_alloc(subtype, 0); + if (ret) { + struct drgn_program *prog = drgn_module_program(module->module); + Py_INCREF(container_of(prog, Program, prog)); + ret->module = module->module; + } + return ret; +} + +static void ModuleSectionAddresses_dealloc(ModuleSectionAddresses *self) +{ + if (self->module) { + struct drgn_program *prog = drgn_module_program(self->module); + Py_DECREF(container_of(prog, Program, prog)); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static inline void +drgn_module_section_address_iterator_destroyp(struct drgn_module_section_address_iterator **itp) +{ + drgn_module_section_address_iterator_destroy(*itp); +} + +static PyObject *ModuleSectionAddresses_repr(ModuleSectionAddresses *self) +{ + struct drgn_error *err; + + _cleanup_(drgn_module_section_address_iterator_destroyp) + struct drgn_module_section_address_iterator *it = NULL; + err = drgn_module_section_address_iterator_create(self->module, &it); + if (err) + return set_drgn_error(err); + + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + if (append_string(parts, "ModuleSectionAddresses(")) + return NULL; + bool first = true; + for (;;) { + const char *name; + uint64_t address; + err = drgn_module_section_address_iterator_next(it, &name, + &address); + if (err) + return set_drgn_error(err); + if (!name) + break; + + _cleanup_pydecref_ PyObject *name_obj = + PyUnicode_FromString(name); + if (!name_obj) + return NULL; + if (append_format(parts, "%s%R: ", first ? "{" : ", ", name_obj) + || append_u64_hex(parts, address)) + return NULL; + first = false; + } + if (append_string(parts, first ? ")" : "})")) + return NULL; + return join_strings(parts); +} + +static Py_ssize_t ModuleSectionAddresses_length(ModuleSectionAddresses *self) +{ + size_t ret; + struct drgn_error *err = + drgn_module_num_section_addresses(self->module, &ret); + if (err) { + set_drgn_error(err); + return -1; + } + return ret; +} + +static PyObject *ModuleSectionAddresses_subscript(ModuleSectionAddresses *self, + PyObject *key) +{ + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return NULL; + uint64_t address; + struct drgn_error *err = drgn_module_get_section_address(self->module, + name, + &address); + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } else if (err) { + return set_drgn_error(err); + } + return PyLong_FromUint64(address); +} + +static int ModuleSectionAddresses_ass_subscript(ModuleSectionAddresses *self, + PyObject *key, + PyObject *value) +{ + struct drgn_error *err; + if (value) { + if (!PyUnicode_Check(key)) { + PyErr_SetString(PyExc_TypeError, + "section_addresses key must be str"); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + uint64_t address = PyLong_AsUint64(value); + if (address == (uint64_t)-1 && PyErr_Occurred()) + return -1; + err = drgn_module_set_section_address(self->module, name, + address); + } else { + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + err = drgn_module_delete_section_address(self->module, name); + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static ModuleSectionAddressesIterator * +ModuleSectionAddresses_iter(ModuleSectionAddresses *self) +{ + struct drgn_error *err; + _cleanup_pydecref_ ModuleSectionAddressesIterator *it = + call_tp_alloc(ModuleSectionAddressesIterator); + if (!it) + return NULL; + err = drgn_module_section_address_iterator_create(self->module, + &it->it); + if (err) + return set_drgn_error(err); + struct drgn_program *prog = drgn_module_program(self->module); + Py_INCREF(container_of(prog, Program, prog)); + return_ptr(it); +} + +// We only define the bare minimum for collections.abc.MutableMapping, +// which gives us naive implementations of the remaining methods. We can +// define performance-sensitive ones as needed. +static PyMappingMethods ModuleSectionAddressesMixin_as_mapping = { + .mp_length = (lenfunc)ModuleSectionAddresses_length, + .mp_subscript = (binaryfunc)ModuleSectionAddresses_subscript, + .mp_ass_subscript = (objobjargproc)ModuleSectionAddresses_ass_subscript, +}; + +static PyTypeObject ModuleSectionAddressesMixin_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.ModuleSectionAddressesMixin", + .tp_dealloc = (destructor)ModuleSectionAddresses_dealloc, + .tp_basicsize = sizeof(ModuleSectionAddresses), + .tp_repr = (reprfunc)ModuleSectionAddresses_repr, + .tp_as_mapping = &ModuleSectionAddressesMixin_as_mapping, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_iter = (getiterfunc)ModuleSectionAddresses_iter, + .tp_new = (newfunc)ModuleSectionAddresses_new, +}; + +static void +ModuleSectionAddressesIterator_dealloc(ModuleSectionAddressesIterator *self) +{ + if (self->it) { + struct drgn_module *module = + drgn_module_section_address_iterator_module(self->it); + struct drgn_program *prog = drgn_module_program(module); + Py_DECREF(container_of(prog, Program, prog)); + drgn_module_section_address_iterator_destroy(self->it); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject * +ModuleSectionAddressesIterator_next(ModuleSectionAddressesIterator *self) +{ + struct drgn_error *err; + const char *name; + err = drgn_module_section_address_iterator_next(self->it, &name, NULL); + if (err) + return set_drgn_error(err); + if (!name) + return NULL; + return PyUnicode_FromString(name); +} + +PyTypeObject ModuleSectionAddressesIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleSectionAddressesIterator", + .tp_basicsize = sizeof(ModuleSectionAddressesIterator), + .tp_dealloc = (destructor)ModuleSectionAddressesIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleSectionAddressesIterator_next, +}; + +int init_module_section_addresses(void) +{ + if (PyType_Ready(&ModuleSectionAddressesMixin_type)) + return -1; + _cleanup_pydecref_ PyObject *collections_abc = + PyImport_ImportModule("collections.abc"); + if (!collections_abc) + return -1; + _cleanup_pydecref_ PyObject *MutableMapping = + PyObject_GetAttrString(collections_abc, "MutableMapping"); + if (!MutableMapping) + return -1; + // We can't create a direct subclass of MutableMapping from C (see + // https://github.com/python/cpython/issues/103968). Use this multiple + // inheritance trick taken from cpython/Modules/_decimal/_decimal.c + // instead. + ModuleSectionAddresses_class = + PyObject_CallFunction((PyObject *)&PyType_Type, "s(OO){}", + "ModuleSectionAddresses", + &ModuleSectionAddressesMixin_type, + MutableMapping); + if (!ModuleSectionAddresses_class) + return -1; + return 0; +} diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index eea999cbf..90b972fe7 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -492,6 +492,28 @@ static PyObject *Program_add_memory_segment(Program *self, PyObject *args, Py_RETURN_NONE; } +static struct drgn_error * +py_debug_info_find_fn(struct drgn_module * const *modules, size_t num_modules, + void *arg) +{ + PyGILState_guard(); + + _cleanup_pydecref_ PyObject *modules_list = PyList_New(num_modules); + if (!modules_list) + return drgn_error_from_python(); + for (size_t i = 0; i < num_modules; i++) { + PyObject *module_obj = Module_wrap(modules[i]); + if (!module_obj) + return drgn_error_from_python(); + PyList_SET_ITEM(modules_list, i, module_obj); + } + _cleanup_pydecref_ PyObject *obj = + PyObject_CallOneArg(arg, modules_list); + if (!obj) + return drgn_error_from_python(); + return NULL; +} + static inline struct drgn_error * py_type_find_fn_common(PyObject *type_obj, void *arg, struct drgn_qualified_type *ret) @@ -682,6 +704,7 @@ py_symbol_find_fn(const char *name, uint64_t addr, return NULL; } +#define debug_info_finder_arg(self, fn) PyObject *arg = fn; #define type_finder_arg(self, fn) \ _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); \ if (!arg) \ @@ -829,6 +852,7 @@ static PyObject *Program_enabled_##which##_finders(Program *self) \ return_ptr(res); \ } +DEFINE_PROGRAM_FINDER_METHODS(debug_info) DEFINE_PROGRAM_FINDER_METHODS(type) DEFINE_PROGRAM_FINDER_METHODS(object) DEFINE_PROGRAM_FINDER_METHODS(symbol) @@ -968,6 +992,322 @@ static PyObject *Program_set_pid(Program *self, PyObject *args, PyObject *kwds) Py_RETURN_NONE; } +static ModuleIterator *Program_modules(Program *self) +{ + struct drgn_error *err; + ModuleIterator *it = call_tp_alloc(ModuleIterator); + if (!it) + return NULL; + err = drgn_created_module_iterator_create(&self->prog, &it->it); + if (err) { + it->it = NULL; + Py_DECREF(it); + return set_drgn_error(err); + } + Py_INCREF(self); + return it; +} + +static ModuleIterator *Program_loaded_modules(Program *self) +{ + struct drgn_error *err; + ModuleIterator *it = + (ModuleIterator *)ModuleIteratorWithNew_type.tp_alloc( + &ModuleIteratorWithNew_type, 0); + if (!it) + return NULL; + err = drgn_loaded_module_iterator_create(&self->prog, &it->it); + if (err) { + it->it = NULL; + Py_DECREF(it); + return set_drgn_error(err); + } + Py_INCREF(self); + return it; +} + +static PyObject *Program_main_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "create", NULL}; + PATH_ARG(name); + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&$p:main_module", + keywords, path_converter, &name, + &create)) + return NULL; + + if (create) { + if (!name.path) { + PyErr_SetString(PyExc_TypeError, + "name must be given if create=True"); + return NULL; + } + struct drgn_module *module; + bool new; + err = drgn_module_find_or_create_main(&self->prog, name.path, + &module, &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + struct drgn_module_key key = { .kind = DRGN_MODULE_MAIN }; + struct drgn_module *module = drgn_module_find(&self->prog, &key); + if (!module + || (name.path + && strcmp(drgn_module_name(module), name.path) != 0)) { + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; + } + return Module_wrap(module); + } +} + +static PyObject *Program_find_module(Program *self, const struct drgn_module_key *key) +{ + struct drgn_module *module = drgn_module_find(&self->prog, key); + if (!module) { + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; + } + return Module_wrap(module); +} + +static PyObject *Program_shared_library_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "dynamic_address", "create", NULL}; + PATH_ARG(name); + struct index_arg dynamic_address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O&|$p:shared_library_module", + keywords, path_converter, &name, + index_converter, &dynamic_address, + &create)) + return NULL; + + if (create) { + struct drgn_module *module; + bool new; + err = drgn_module_find_or_create_shared_library(&self->prog, + name.path, + dynamic_address.uvalue, + &module, &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + struct drgn_module_key key = { + .kind = DRGN_MODULE_SHARED_LIBRARY, + .shared_library.name = name.path, + .shared_library.dynamic_address = + dynamic_address.uvalue, + }; + return Program_find_module(self, &key); + } +} + +static PyObject *Program_vdso_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "dynamic_address", "create", NULL}; + PATH_ARG(name); + struct index_arg dynamic_address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|$p:vdso_module", + keywords, path_converter, &name, + index_converter, &dynamic_address, + &create)) + return NULL; + + if (create) { + struct drgn_module *module; + bool new; + err = drgn_module_find_or_create_vdso(&self->prog, name.path, + dynamic_address.uvalue, + &module, &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + struct drgn_module_key key = { + .kind = DRGN_MODULE_VDSO, + .vdso.name = name.path, + .vdso.dynamic_address = dynamic_address.uvalue, + }; + return Program_find_module(self, &key); + } +} + +static PyObject *Program_relocatable_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "address", "create", NULL}; + PATH_ARG(name); + struct index_arg address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O&|$p:relocatable_module", keywords, + path_converter, &name, index_converter, + &address, &create)) + return NULL; + + if (create) { + struct drgn_module *module; + bool new; + err = drgn_module_find_or_create_relocatable(&self->prog, + name.path, + address.uvalue, + &module, &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + struct drgn_module_key key = { + .kind = DRGN_MODULE_RELOCATABLE, + .relocatable.name = name.path, + .relocatable.address = address.uvalue, + }; + return Program_find_module(self, &key); + } +} + +static PyObject *Program_linux_kernel_loadable_module(Program *self, + PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"module_obj", "create", NULL}; + DrgnObject *module_obj; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!|$p:linux_kernel_loadable_module", + keywords, &DrgnObject_type, + &module_obj, &create)) + return NULL; + + if (DrgnObject_prog(module_obj) != self) { + PyErr_SetString(PyExc_ValueError, + "object is from different program"); + return NULL; + } + + struct drgn_module *module; + if (create) { + bool new; + err = drgn_module_find_or_create_linux_kernel_loadable(&module_obj->obj, + &module, + &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + err = drgn_module_find_linux_kernel_loadable(&module_obj->obj, + &module); + if (err) { + set_drgn_error(err); + return NULL; + } + if (!module) { + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; + } + return Module_wrap(module); + } +} + +static PyObject *Program_extra_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "id", "create", NULL}; + PATH_ARG(name); + struct index_arg id = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|O&$p:extra_module", + keywords, path_converter, &name, + index_converter, &id, &create)) + return NULL; + + if (create) { + struct drgn_module *module; + bool new; + err = drgn_module_find_or_create_extra(&self->prog, name.path, + id.uvalue, &module, + &new); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_and_bool_wrap(module, new); + } else { + struct drgn_module_key key = { + .kind = DRGN_MODULE_EXTRA, + .extra.name = name.path, + .extra.id = id.uvalue, + }; + return Program_find_module(self, &key); + } +} + +static PyObject *Program_module(Program *self, PyObject *arg) +{ + struct index_arg address = {}; + if (!index_converter(arg, &address)) + return NULL; + struct drgn_module *module = + drgn_module_find_by_address(&self->prog, address.uvalue); + if (!module) { + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; + } + return Module_wrap(module); +} + +static PyObject *Program_get_debug_info_path(Program *self, void *arg) +{ + return PyUnicode_FromString(drgn_program_debug_info_path(&self->prog)); +} + +static int Program_set_debug_info_path(Program *self, PyObject *value, void *arg) +{ + const char *path; + if (value == Py_None) { + path = NULL; + } else { + if (!PyUnicode_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "debug_info_path must be str or None"); + return -1; + } + path = PyUnicode_AsUTF8(value); + if (!path) + return -1; + } + struct drgn_error *err = + drgn_program_set_debug_info_path(&self->prog, path); + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + DEFINE_VECTOR(path_arg_vector, struct path_arg); static void path_arg_vector_cleanup(struct path_arg_vector *path_args) @@ -1055,6 +1395,40 @@ static PyObject *Program_load_default_debug_info(Program *self) Py_RETURN_NONE; } +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); + +static PyObject *Program_load_module_debug_info(Program *self, PyObject *args) +{ + size_t num_modules = PyTuple_GET_SIZE(args); + _cleanup_free_ struct drgn_module **modules = + malloc_array(num_modules, sizeof(*modules)); + if (!modules) { + PyErr_NoMemory(); + return NULL; + } + + for (size_t i = 0; i < num_modules; i++) { + PyObject *item = PyTuple_GET_ITEM(args, i); + if (!PyObject_TypeCheck(item, &Module_type)) { + return PyErr_Format(PyExc_TypeError, + "expected Module, not %s", + Py_TYPE(item)->tp_name); + } + modules[i] = ((Module *)item)->module; + if (modules[i]->prog != &self->prog) { + PyErr_SetString(PyExc_ValueError, + "module from wrong program"); + return NULL; + } + } + + struct drgn_error *err = + drgn_load_module_debug_info(modules, &num_modules); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + static PyObject *Program_read(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"address", "size", "physical", NULL}; @@ -1562,6 +1936,7 @@ static int Program_set_language(Program *self, PyObject *value, void *arg) static PyMethodDef Program_methods[] = { {"add_memory_segment", (PyCFunction)Program_add_memory_segment, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_memory_segment_DOC}, + PROGRAM_FINDER_METHOD_DEFS(debug_info), PROGRAM_FINDER_METHOD_DEFS(type), PROGRAM_FINDER_METHOD_DEFS(object), PROGRAM_FINDER_METHOD_DEFS(symbol), @@ -1575,11 +1950,33 @@ static PyMethodDef Program_methods[] = { drgn_Program_set_kernel_DOC}, {"set_pid", (PyCFunction)Program_set_pid, METH_VARARGS | METH_KEYWORDS, drgn_Program_set_pid_DOC}, + {"modules", (PyCFunction)Program_modules, METH_NOARGS, + drgn_Program_modules_DOC}, + {"loaded_modules", (PyCFunction)Program_loaded_modules, METH_NOARGS, + drgn_Program_loaded_modules_DOC}, + {"main_module", (PyCFunction)Program_main_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_main_module_DOC}, + {"shared_library_module", (PyCFunction)Program_shared_library_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_shared_library_module_DOC}, + {"vdso_module", (PyCFunction)Program_vdso_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_vdso_module_DOC}, + {"relocatable_module", (PyCFunction)Program_relocatable_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_relocatable_module_DOC}, + {"linux_kernel_loadable_module", + (PyCFunction)Program_linux_kernel_loadable_module, + METH_VARARGS | METH_KEYWORDS, + drgn_Program_linux_kernel_loadable_module_DOC}, + {"extra_module", (PyCFunction)Program_extra_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_extra_module_DOC}, + {"module", (PyCFunction)Program_module, METH_O, + drgn_Program_module_DOC}, {"load_debug_info", (PyCFunction)Program_load_debug_info, METH_VARARGS | METH_KEYWORDS, drgn_Program_load_debug_info_DOC}, {"load_default_debug_info", (PyCFunction)Program_load_default_debug_info, METH_NOARGS, drgn_Program_load_default_debug_info_DOC}, + {"load_module_debug_info", (PyCFunction)Program_load_module_debug_info, + METH_VARARGS, drgn_Program_load_module_debug_info_DOC}, {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, drgn_Program___getitem___DOC}, {"__contains__", (PyCFunction)Program_contains, METH_O | METH_COEXIST, @@ -1661,6 +2058,8 @@ static PyGetSetDef Program_getset[] = { drgn_Program_platform_DOC}, {"language", (getter)Program_get_language, (setter)Program_set_language, drgn_Program_language_DOC}, + {"debug_info_path", (getter)Program_get_debug_info_path, + (setter)Program_set_debug_info_path, drgn_Program_debug_info_path_DOC}, {}, }; diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index 40b09f36f..c2270b02b 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,6 +1,7 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later +#include #include #include "drgnpy.h" @@ -13,6 +14,13 @@ int append_string(PyObject *parts, const char *s) return PyList_Append(parts, str); } +int append_u64_hex(PyObject *parts, uint64_t value) +{ + char buf[19]; + snprintf(buf, sizeof(buf), "0x%" PRIx64, value); + return append_string(parts, buf); +} + static int append_formatv(PyObject *parts, const char *format, va_list ap) { _cleanup_pydecref_ PyObject *str = PyUnicode_FromFormatV(format, ap); @@ -32,6 +40,18 @@ int append_format(PyObject *parts, const char *format, ...) return ret; } +int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name) +{ + _cleanup_pydecref_ PyObject *attr = + PyObject_GetAttrString(obj, attr_name); + if (!attr) + return -1; + _cleanup_pydecref_ PyObject *str = PyObject_Repr(attr); + if (!str) + return -1; + return PyList_Append(parts, str); +} + PyObject *join_strings(PyObject *parts) { _cleanup_pydecref_ PyObject *sep = PyUnicode_New(0, 0); diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index d6c6d3c55..281157b86 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -1,7 +1,6 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later -#include #include #include "debug_info.h" @@ -105,14 +104,8 @@ void drgn_register_state_set_pc(struct drgn_program *prog, pc &= drgn_platform_address_mask(&prog->platform); regs->_pc = pc; drgn_register_state_set_known(regs, 0); - Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, + regs->module = drgn_module_find_by_address(prog, pc - !regs->interrupted); - if (dwfl_module) { - void **userdatap; - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, - NULL, NULL, NULL, NULL); - regs->module = *userdatap; - } } struct optional_uint64 diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 51177deb0..5b7b3779f 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -34,26 +34,6 @@ LIBDRGN_PUBLIC void drgn_symbols_destroy(struct drgn_symbol **syms, free(syms); } -void drgn_symbol_from_elf(const char *name, uint64_t address, - const GElf_Sym *elf_sym, struct drgn_symbol *ret) -{ - ret->name = name; - ret->name_lifetime = DRGN_LIFETIME_STATIC; - ret->lifetime = DRGN_LIFETIME_OWNED; - ret->address = address; - ret->size = elf_sym->st_size; - int binding = GELF_ST_BIND(elf_sym->st_info); - if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) - ret->binding = binding + 1; - else - ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; - int type = GELF_ST_TYPE(elf_sym->st_info); - if (type <= STT_TLS || type == STT_GNU_IFUNC) - ret->kind = type; - else - ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; -} - struct drgn_error * drgn_symbol_copy(struct drgn_symbol *dst, struct drgn_symbol *src) { @@ -142,6 +122,57 @@ drgn_symbol_result_builder_add(struct drgn_symbol_result_builder *builder, return true; } +static void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, + struct drgn_symbol *ret) +{ + ret->name = name; + ret->name_lifetime = DRGN_LIFETIME_STATIC; + ret->lifetime = DRGN_LIFETIME_OWNED; + ret->address = address; + ret->size = elf_sym->st_size; + int binding = GELF_ST_BIND(elf_sym->st_info); + if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) + ret->binding = binding + 1; + else + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + int type = GELF_ST_TYPE(elf_sym->st_info); + if (type <= STT_TLS || type == STT_GNU_IFUNC) + ret->kind = type; + else + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; +} + +bool +drgn_symbol_result_builder_add_from_elf(struct drgn_symbol_result_builder *builder, + const char *name, uint64_t address, + const GElf_Sym *elf_sym) +{ + if (builder->one) { + // As an optimization, reuse the existing symbol allocation if + // we can. + if (!builder->single + || builder->single->lifetime == DRGN_LIFETIME_STATIC) { + builder->single = malloc(sizeof(*builder->single)); + if (!builder->single) + return false; + } else if (builder->single->name_lifetime == DRGN_LIFETIME_OWNED) { + free((char *)builder->single->name); + } + drgn_symbol_from_elf(name, address, elf_sym, builder->single); + } else { + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return false; + drgn_symbol_from_elf(name, address, elf_sym, sym); + if (!symbolp_vector_append(&builder->vector, &sym)) { + free(sym); + return false; + } + } + return true; +} + LIBDRGN_PUBLIC size_t drgn_symbol_result_builder_count(const struct drgn_symbol_result_builder *builder) { diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index c3dd75ca7..3bd0c508c 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -46,10 +46,6 @@ static inline void drgn_symbol_cleanup(struct drgn_symbol **p) drgn_symbol_destroy(*p); } -/** Initialize a @ref drgn_symbol from an ELF symbol. */ -void drgn_symbol_from_elf(const char *name, uint64_t address, - const GElf_Sym *elf_sym, struct drgn_symbol *ret); - /** Destroy the contents of the result builder */ void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder); @@ -57,6 +53,16 @@ void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder void drgn_symbol_result_builder_init(struct drgn_symbol_result_builder *builder, bool one); +/** + * Convert an ELF symbol to a @ref drgn_symbol and add it to a result builder. + * + * @return @c true on success, @c false on failure to allocate memory. + */ +bool +drgn_symbol_result_builder_add_from_elf(struct drgn_symbol_result_builder *builder, + const char *name, uint64_t address, + const GElf_Sym *elf_sym); + /** Return single result */ struct drgn_symbol * drgn_symbol_result_builder_single(struct drgn_symbol_result_builder *builder); diff --git a/libdrgn/util.h b/libdrgn/util.h index 333ff2f58..7cd890598 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -18,6 +18,8 @@ #include #include +#define _unused_ __attribute__((__unused__)) + #ifndef LIBDRGN_PUBLIC #define LIBDRGN_PUBLIC __attribute__((__visibility__("default"))) #endif diff --git a/scripts/crashme/Makefile b/scripts/crashme/Makefile new file mode 100644 index 000000000..b9b5c5a8c --- /dev/null +++ b/scripts/crashme/Makefile @@ -0,0 +1,65 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +# Makefile used to generate tests/resources/crashme* + +.PHONY: all cores clean + +.DELETE_ON_ERROR: + +EXECUTABLES := crashme crashme_pie crashme_static crashme_static_pie +CORES := $(addsuffix .core, $(EXECUTABLES)) $(addsuffix _no_headers.core, $(EXECUTABLES)) +BINARIES := crashme.so $(EXECUTABLES) crashme.dwz crashme.so.dwz crashme.alt +ZSTD_BINARIES := $(addsuffix .zst, $(BINARIES)) +ZSTD_CORES := $(addsuffix .zst, $(CORES)) + +all: $(BINARIES) cores $(ZSTD_BINARIES) $(ZSTD_CORES) + +clean: + rm -f $(BINARIES) $(CORES) $(ZSTD_BINARIES) $(ZSTD_CORES) + +crashme.so: crashme.c common.c + gcc -g -Os -fpic -shared $^ -o $@ + +crashme: main.c common.c crashme.so + gcc -g -Os -fno-pie -no-pie $(filter-out crashme.so,$^) -o $@ -L . -l:crashme.so -Wl,-rpath,$(CURDIR) + +crashme_pie: main.c common.c crashme.so + gcc -g -Os -fpie -pie $(filter-out crashme.so,$^) -o $@ -L . -l:crashme.so -Wl,-rpath,$(CURDIR) + +crashme_static: main.c common.c crashme.c + musl-gcc -g -Os -fno-pie -static $^ -o $@ + +crashme_static_pie: main.c common.c crashme.c + musl-gcc -g -Os -fpie -static-pie $^ -o $@ + +crashme.dwz crashme.so.dwz crashme.alt &: crashme crashme.so + cp crashme crashme.dwz + cp crashme.so crashme.so.dwz + dwz -m crashme.alt -r crashme.dwz crashme.so.dwz + +cores: $(CORES) + +.NOTPARALLEL: cores + +define CORE_COMMAND +flock /proc/sys/kernel/core_pattern sh -e -c '\ +ulimit -c unlimited; \ +echo "$$COREDUMP_FILTER" > /proc/$$$$/coredump_filter; \ +old_pattern=$$(cat /proc/sys/kernel/core_pattern); \ +restore_core_pattern() { \ + echo "$$old_pattern" > /proc/sys/kernel/core_pattern; \ +}; \ +trap restore_core_pattern EXIT; \ +echo "$$PWD/core.%p" > /proc/sys/kernel/core_pattern; \ +su "$$SUDO_USER" -c "env -i sh -l -c \"exec ./$<\" & wait; mv core.\$$! $@"' +endef + +%.core: % + sudo env COREDUMP_FILTER=0x33 $(CORE_COMMAND) + +%_no_headers.core: % + sudo env COREDUMP_FILTER=0x23 $(CORE_COMMAND) + +%.zst: % + zstd -19 $< -o $@ diff --git a/scripts/crashme/common.c b/scripts/crashme/common.c new file mode 100644 index 000000000..98b13c615 --- /dev/null +++ b/scripts/crashme/common.c @@ -0,0 +1,10 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +__attribute__((__visibility__("hidden"))) +int *crashme_ptr(void) +{ + return (int *)0xabc; +} diff --git a/scripts/crashme/crashme.c b/scripts/crashme/crashme.c new file mode 100644 index 000000000..8edf2f5d9 --- /dev/null +++ b/scripts/crashme/crashme.c @@ -0,0 +1,25 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +__attribute__((__noipa__)) static int c(struct crashme *cm) +{ + *cm->ptr = 0xdeadbeef; + return 3; +} + +__attribute__((__noipa__)) static int b(struct crashme *cm) +{ + return c(cm) - 1; +} + +__attribute__((__noipa__)) static int a(struct crashme *cm) +{ + return b(cm) - 1; +} + +int crashme(struct crashme *cm) +{ + return cm->ptr == crashme_ptr() ? a(cm) - 1 : 1; +} diff --git a/scripts/crashme/crashme.h b/scripts/crashme/crashme.h new file mode 100644 index 000000000..75ab6e1cb --- /dev/null +++ b/scripts/crashme/crashme.h @@ -0,0 +1,15 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef CRASHME_H +#define CRASHME_H + +int *crashme_ptr(void); + +struct crashme { + int *ptr; +}; + +int crashme(struct crashme *cm); + +#endif /* CRASHME_H */ diff --git a/scripts/crashme/main.c b/scripts/crashme/main.c new file mode 100644 index 000000000..06c65a758 --- /dev/null +++ b/scripts/crashme/main.c @@ -0,0 +1,10 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +int main(void) +{ + struct crashme cm = { crashme_ptr() }; + return !!crashme(&cm); +} diff --git a/tests/linux_kernel/test_debug_info.py b/tests/linux_kernel/test_debug_info.py index 75ccabf50..f50afbdfb 100644 --- a/tests/linux_kernel/test_debug_info.py +++ b/tests/linux_kernel/test_debug_info.py @@ -2,49 +2,89 @@ # SPDX-License-Identifier: LGPL-2.1-or-later import os -from pathlib import Path -import unittest -from drgn import Program +from drgn import Program, RelocatableModule +from drgn.helpers.linux.module import find_module from tests import modifyenv from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod -KALLSYMS_PATH = Path("/proc/kallsyms") - - -@unittest.skipUnless( - KALLSYMS_PATH.exists(), "kernel does not have kallsyms (CONFIG_KALLSYMS)" -) -@skip_unless_have_test_kmod -class TestModuleDebugInfo(LinuxKernelTestCase): - # Arbitrary symbol that we can use to check that the module debug info was - # loaded. - SYMBOL = "drgn_test_function" - - def setUp(self): - super().setUp() - with KALLSYMS_PATH.open() as f: - for line in f: - tokens = line.split() - if tokens[2] == self.SYMBOL: - self.symbol_address = int(tokens[0], 16) - break - else: - self.fail(f"{self.SYMBOL!r} symbol not found") - def _test_module_debug_info(self, use_sys_module): - old_use_sys_module = int(os.environ.get("DRGN_USE_SYS_MODULE", "1")) != 0 - with modifyenv({"DRGN_USE_SYS_MODULE": "1" if use_sys_module else "0"}): - if old_use_sys_module == use_sys_module: - prog = self.prog +def iter_proc_modules(): + try: + f = open("/proc/modules", "r") + except FileNotFoundError: + return + with f: + for line in f: + tokens = line.split() + yield tokens[0], int(tokens[5], 16) + + +class TestModule(LinuxKernelTestCase): + def test_loaded_modules(self): + expected = [("kernel", None), *iter_proc_modules()] + + loaded_modules = [] + for module, _ in self.prog.loaded_modules(): + if isinstance(module, RelocatableModule): + loaded_modules.append((module.name, module.address)) else: - prog = Program() - prog.set_kernel() - self._load_debug_info(prog) - self.assertEqual(prog.symbol(self.SYMBOL).address, self.symbol_address) + loaded_modules.append((module.name, None)) + + self.assertCountEqual(loaded_modules, expected) + + @skip_unless_have_test_kmod + def test_find(self): + self.assertEqual(self.prog.main_module().name, "kernel") + for name, address in iter_proc_modules(): + if name == "drgn_test": + self.assertEqual( + self.prog.relocatable_module(name, address).name, "drgn_test" + ) + break + else: + self.fail("test module not found") + + @skip_unless_have_test_kmod + def test_find_by_obj(self): + for module in self.prog.modules(): + if module.name == "drgn_test": + break + else: + self.fail("test module not found") - def test_module_debug_info_use_proc_and_sys(self): - self._test_module_debug_info(True) + module_obj = find_module(self.prog, "drgn_test") + self.assertEqual(self.prog.linux_kernel_loadable_module(module_obj), module) + self.assertEqual( + self.prog.linux_kernel_loadable_module(module_obj, create=True), + (module, False), + ) + + def test_no_sys_module(self): + # Test that we get the same modules with and without using /sys/module. + + def module_dict(prog): + return { + (module.name, module.address): ( + module.address_range, + module.build_id, + dict(module.section_addresses), + ) + for module, _ in prog.loaded_modules() + if isinstance(module, RelocatableModule) + } + + use_sys_module = int(os.environ.get("DRGN_USE_SYS_MODULE", "1")) != 0 + + with modifyenv({"DRGN_USE_SYS_MODULE": str(int(not use_sys_module))}): + prog = Program() + prog.set_kernel() + + if use_sys_module: + with_sys_module = module_dict(self.prog) + without_sys_module = module_dict(prog) + else: + with_sys_module = module_dict(prog) + without_sys_module = module_dict(self.prog) - def test_module_debug_info_use_core_dump(self): - self._test_module_debug_info(False) + self.assertEqual(with_sys_module, without_sys_module) diff --git a/tests/resources/crashme.alt.zst b/tests/resources/crashme.alt.zst new file mode 100644 index 0000000000000000000000000000000000000000..1aab40796933e144f210591d017fc3e0480ac349 GIT binary patch literal 409 zcmV;K0cQRvwJ-f-j{;o`0LIivI52Uw2Ef3;paq364ig0tN+b*rJAMK7`dgl~hLBoW z%>BctC9U@t8^0g^Ty#5ertHVMlI?CHQ#AlM05<@k4#0MA|MtJb)_#Z)nwaf?C4z*| z;!=h$sshVQ4a_1Q8$b4Y{_Jh;(WY`cxZT=Iqa$M_CGYfE*kB)vzwEtJmvHA`e@EWw zQrwD5>aYc_H|W@s{q;1pjPnj{oc+6O0Q)(#4{-tZbTzpG<2Pg=)7*bi+slt+m1>x# z=3Jkk;~A=`h-9IqXix=ZEM1+ocH$1a;Jt&m_w2nJ%oc3l(f$*oX)0t*p62R=Ohqw7 zOH?h99KTb@@ibB5sQMmFhjC@L#IJR#K5|9RZ!uicRV~$T6~mbIggks)5MYqVNLY|B zph28f5>${R2}{~xA)1~JyV37#o9AxE$jE D{pHCc literal 0 HcmV?d00001 diff --git a/tests/resources/crashme.core.zst b/tests/resources/crashme.core.zst new file mode 100644 index 0000000000000000000000000000000000000000..215fe38e8c62ae0e771e793770c2d2f3199003f7 GIT binary patch literal 18351 zcmV(tKB_u-j-dm;u1yTh724cTnEz`oWW%#${v*(Sue$_w;Z;2Qg2@aL0DheWh z1-%jOi(=2deX&*Z%_9w8l~BRRN)PgltSls5*3JNm^)1rw*HSOG;)w`;EQBpbNuYnU z@oL-`jm##I4!mAve?LdQR=~RT#6hpJb@^Y_*faFSz-L<=XAwRJ*N_lqE6ZgCk@6``a zT=ngX-^Pozc;k>?(hd{qj3OhRh~~u#p^7qykl>M?Ue)rq)3e1lGI$k~qc*IrXP>{u z`Qs}Id(gNiPIzRl6wf;O70Rn&Hawrr%hln<8abeQ^;^xWF=+nV?Bo}>9%^%P&jS;+2^qbJnNbl4+ z9VXUhgchX*{!BGo8LbUKL((0iM0VM((EX;vb|HuwFdjH<2ie$)1RU;!t6ZAR5v@=$ zmhn)Q!9BpjcLOyI6G9j$anqOt319_U8H!|?ayqpo2&dMnwf)It5pD=Z1b_0cps_f1 z;3fm_GM3!4!RhoC3OvC%J7?P;IGsw9?Y|+!df=+xTU@SDVu?$vKCuj#Ky$dO-~SC7 z*fP|eFd^3>#rN&q5&3al9pK*`ppm*hKO#Zu>Q-K~1AJG%$t^an4=-Lu?CAp$$k(C; zQWq^`&_T1Fy?SS%H`QyrNYVCPTU$o*i7_^8NG6kW;{mps!9Lu9fxq0%jr}X#&`{lN z!*Ya$LSl5ORQ}ytp8VGMy}w-1);F#eE@yw@5RO3hh{0E1Ez;F^A=lzn_17iw3!`0O z3KM8NeL?dT%4(ZoLk9~T+yX5G9_icTvw-&<*!+yJ{n*gK#~$B>KO;x-koe*C-5K$_ zkVnK%?*a$H*YFbl{_BLV{{45wd{tjEzxKb*y?piWCzGFzLas&JudLb;94G|9jj9Ez zf##jG3zU(K86uft8!wWzZtQZ!+wA`VYymc%DAh27iS0AwQpmL{FR=Z2MD&c@{r&jW za7mQ0VrlPq2rVhBT@{6fU{fc7|7`vS0p1fkM?`lyBD#=8({Fib2=>i{u%;#Jt!JU|-YICNY@|WtZrJ+h?)JZ7#&?mdId9sq=Wj32ksZu#d zxLi&t$wnWwO@MHZouJx0+I%Q?0q{KMmc%wCd$R5JCY4{Zz=?J&g56&J-$~5=y()?p zaZmz(rJcUNUa<%xjI{Jue;sDkOsrKqZIBOPuAeoOquVe)?wQv!->zR?;k2r$Gz3;d z?fT?Yf^Esr2pB7TRhJm4$>OYHb)FSmTP2XC$~pv~4tgY>R(0NA16F^rDwzWV)=tqQ zt>S90)T~kf6_Aus0Mtlu)WKG@V+DEntXpn9wd!+#YN*M=ZL@miOzUG+%O$nirp2T1 zi^B@N&Fe1~Nm5n27OsR|RoyFhS4$l9Epf`kLzS00DnNqu5vkW$>xHf!N@@L)VOljB z=AHpueRQXH%Ye1H>NO2}=~!4T-Rrfmy>HCgb<(%0qgPHN00*dp58?t_<XO>nqJ$%BdP#$hLl8q0+&S<={P?wWS(NWk^?{RwA zB#KB`N-a)Nk>YKAgK~KZicc{;R3gZSV8#6=X%kW5!t^lk>tCH5Xn%y?PYohC^+j^TfeYg+Ac&cs+HoDBykMT5~i%4SU6N7B64ZF zuwK?LE{>VlSJmu1VlqAa6VXJbFc!y-m(^OF0!QQ$*LI0xy~M3wOc2RVs8*6?aqAhv z7HuNNxSdg3^YxV!n;!N#wcOZ5>W|W-;IHbNAJwyUekm6ssOgZzgccA2zS6X+zJ2~c za@o~-cJA-1ziOP&jD7Wfg#*IDkFj{7^IO9=sv$o6H|0-7p+GJ!15C3178(yt{3@y} z2ZRi{gR>X?R_%B2=}wF&l`pPEJ`XR*9QmU>o8*gdUM=v(K>yv3BSfGSxEqHa2ms*c zRXV>CIkukYq0*~HF)u7$9Ux}nk3{}{`L)RxH@$J~#V&n&<=5r6wSK+D#ZH`^FAjTF z(Id}1tNtHOy;0B1jR4(GERy!GmVg4%=n|$7%ik%!m@;hi9i!;FA||Bn&45#Zg1+^l zs|cxQr2=|sLO=)W**PzEyzq%E{vP;Z5kAkpdLktff@}U>WssH(6Mh)AIa+vC0j@OP zTIqYEh#Yl5BndUD4wXN`ebLsdsdl_E!ASo;3GopLqyWEWSb`Zugis7Y{(uAYw@fK4 zvUE{6rR1+ei-6G$7B*kh@M6iG5TTQmQxA7f43s1Pi`7jylJ{Iaf2)~wQP zvEqr+H#%Qz(4Ut45kUZwGm0ER%! zUqXM+JWIEM&#Q|xWm1X$bemQ!~%CLm-fY+7Car(R3b4<;`KTYQz(vy-jku=70LK{jLOy~>2x+M>#kLbxe z;n1VD>~1lWwTetyOF}|GvY13^8%Rw0uY@-3l>ZE(4VDNVK-CgB-xD;mQ}Q`3O6=Yz zY?E~@F-NVctgvVyRh^gf4)Hb(39}IIlaeZV1ii6HFs@LN!t4VoJg}W|nVgGbDg{r# z5kB)bWi+{b+KrfdN|z*)iV|v)LNh&|t0Xu{(sIF>i}8 z*rPIyVA#0Q=!~k)z6@L%DwexVA-^e~kzyA&qBgEl@JAw+$^{&W!rVh51>#5(o@7rU z=@K#}1UugYrR{ImR(fX7G`KA>0Kr*Vp%r>Ob(C`1Brz+@0xPNupnjv` zAkGl1yi_k&R8hh)P=LFN4blR!`2yM~yPRsMfhuNI$TdUQI_w(NQ-A*oGnl~H*z>8D zhVc6T42%8HR0YDiurd?33b`62HHc6Uq(FuM_4y;m51OZ!EK{%qULj(|N0l2?T1z+gBC%kKI2)#02R+S&m8 zo&I`@jTwu2x%QqGwd&Pp?S7Bk*#-0gM;myBB^~3)dnwTU}U1up*fnemzK)UtbO~vb}pmRGk zd<|{T&KjB~;~6YZD*U-gt)NDx;Q#6^qw#|*lcwdbBzrSo0m~(rS1OfO4ouuK;BPk( z1~9ouXhG2+AT#~zV8zdyD+z&pIU+5W@R)5~KE z%mZX&8<3BA$Cvk4*XOrqdVhdE z?f{7>F~YX|y0{i5-e}a(h?{`|1&q2DZzf>0NKq#PPKAoNx)X9P-d^BcUBUfUhO8RY)s?T|#^01c0*>fUh=Xq$iU|hx}%6cWIo!*X`MW3E<@Nu~o94-kQCq zot*)2!#Yr)AWaX9N@D|C<8E-kE`o^PkM(~1VEe}&6Nm++5f~PfK`bao=>7P+`?2?z z*OwP$L|)$!Kq3~D9x$9xUbld>Z=gS+Uqm3@e~`(Qv1J&OGd{MD_Qts@nuy@j*eB|N zS!>)iQJh+XZEP?Bfrwjp;oy25Bv;Y?|1@#ayf=O4Oi_gMN)zW3xIbjlz4eE)|44Ad}!N{p)G?F z!v+jW2{m?h_(}93`_)&4SvNaBhVIF%$#6CD+Ji6V4^K0rL6&@s$7lK}<)P=dWSJ0U z8Jubb!yFc652o?$$VUgDND`{Hp^HZd^aL1VZ2F>85=9i3v(dFsC7f7d(v$dknB4e5 zR$pw-z!4tm{pY}z;q#X4@!j?90jawS{85+3=i`n>UY{S|2^Tc}@&JwO1e;a zL#}QNW(XcTK#c|{D%_Kc>QY0jP2SrHfYRWr?Jy4y2BjuolN`OhN`CpoWHy<65&voz z-lp35LMLHlGb+&@A(h^H_buA@20K75qWObpXw8#pwS?WxGQ_22@AoOs|8GQ-_o-|n zy82fBNKCisXVkmehJ9UvnmEq^eL43ypr1tf6xbdm785F*zu;fh=Gv5gLz+^$87qT{ z5(_4a(xWI9Li2afWC@Jms05>G)#t22=70bRgaIG`gO)-Vsd894M#SlK$Kj$x7ywW( z92P(n01QCDpeO+V003kF0Kk9&G($rZFaVtwV2@c_jqB`qXFSQlsnG~d)vl-+1oyi1 z4!o`k-xQEqB+j>zA1{sfD0-jME-IHxooW&(=iR^SbsmYg_X#y9;Aw(q}8mzB%vx%wi3y1p3 zSR*ce+O=i1!~Q-C#de^1G@eXENYT@DyKAFiSk9be&Ub%-&Z9YeCER0+rHrSGR*`M9IT1!J`eD1;2Nkm0-2TCadD{KTYL^M0uRH?&k1v) zHtn3{UpyD7s8z{qqS0oDwCbMs=Hq>2h2r02v42?wFgGI9I^cJfM9#@@6cb-2Z#B5u z9qVTrbS-kuaq`_oUre=lbDCBWaz?SBXBGNX*~i4fwHFO=Lu2R|R^RZ8KD!B=BPqMv z!(ex&IV$W}@g@&11FvKo@#%mY+Lkc|Z?$#>{+CPgqK7xzj3M{dTzt-SYE_Hc%nCOL?zu0$Jj2Unq6gfXe!E7Uz8O)#I7nJ8*$LP3-DG7o@?CP;4kCc!%K7 zK2LqH;87-Ug9ZnAQ+RbaXtR%-a`IMxXjkCRXt1&b0P{-(#7i;wL39_;P~6n?%03|9 z;^!4d`nccTzP1i)12;0sz8?31c09T)?O#Bfcq@+Wqot6@y9I5QrO+jx2NlMhlCXU^ zI#$Zf7%<*jbDV9WtAjQ9rM-bCjV_QQCCgOfz0()s<-k@gIO}t zEKLruqCsI(YNub_J_kWwfX2mwRLqm8a>l$oNH$k zKfsnKgB~PTr*jUs^twrf>pZ&r!wAKyX_kNYx83Uyt3%8M1EP4?cqBSm+UH4XD&=?V zu~!GWY`bRp*)TPW4){hlfR|+rVFvGPiJ4g{AxwO095z!7Wair(a4MQJEdXS z#}ds$#~|_A{F8bnlMnwm*m--}V0ZU%VLJe&+v`cJn{g7&I?5+O-7g=d*8>ODsgB_&E}sjOv8(>)VB`&FU;#-LL^+R zvk|H1UFe=8Tj|_>kDB~4)JLB#s7#+)r1k;un`ZAR*}Z&Y6nxzyhmAN<2zwb}M%25< zS@VrOb^X?xYJmeW?nT$ZWIqzKcOOr9cVW;=O~md9cC=CtNS&zE;G6IG4tfI(bd43UVy|u- z$^it7C)Wcc@G#i{+`Pqmkj|H1y2+ORw<%;7Tx0$5>=A2R;hZ{AZj7x|CR`R^yg0C9 zfN_cHDf{0t7%p!|p))=1_t+tB7O3VZ8p9GS0e1Ly3N=o-Z!k{YNOnJTQh<9Ie?@(( zi3mL(2K3&ZM@E~nOop(SE*EiMUC)e5xX_5Uxx>=uFPP*j+AbxlQ@cCmD*gsGx~43< zxcNsK&Uuwhc1~ZXw|}NJX5)XZxpU621|c;y$|$oAA|DmZOO2KXFLZ5-eHyUa?!GeCl->4P*=>3ZBdahU92^-f`BQCY0zvLG7sBq@kjfBKVqBC zIOt=Eg%oYM8lQE?kPU$LNF*>cOHGteaBE_lezMITxp$i}1B`!i_E3V#jIm<7NuQ9| zA^dLA6K8cPyp_+(F8K}B#1-?CeiUDfohYj)5ZlOGrCJBy`>XEQW#XE5OnuYZ0<6wh zY2Un*g7`*deZxx89mm7*f*)Yrc6r##VbrwGC-VxyUg=E%N)l^aIU9zCO2Jah0NYo~Bq7DUY6wO;q}5mAV&lJ1UX z`(WBOsQ;l{`K5ao$HsIm;2QF9SiKPf&=5p-*1_%||3ukh!Y5`KG-tVOn$qGNJRIhM zrMrLfCtK~8!OU5EpQaaK2{@{CfRGk^g`K${y<3HbzTY%_>P%qigX5to`!o)lr5+XL z%t6*Tlrgu&-jV?&u%VRfLKsE}m6K1MK|Hd2d<Xb02zStrHQeFu(OY^jj^lO3OIuq04$709;-crojld+Q9wy$%Md5D4Zyp?HVa0P{gmjTY?|!&jq=R8) zx#p%Umddwq#=Hnr#+{DIMC#iR%==udi4e5o`58jej)bBOkg7H_K^B{tfU$-{Cn1Bd z?t?hjr)u7CHJ3H}S)Xt~vNdeB+DKfP{cVHX`@stM0ly{f>UIvI2P+zNSO#E=JWq9y z9o#T*|M0URfY$pGia=mEGW!^^n5c4*r^8Z9J0NP&ZjPR0tN;pwEv%W*6WTed?AZlH z{*`5L!&_Sh7uAcxQeIw5@fs^NUsVwk6#L$x#>GHWoA$q|Mu@8v@9(l2$wOTwj?E=n z*(oM#XC-JB>!L6LIJ>lSWOXPb1GB^ou+lr4b-f`*I>uGVYzZ-61I>tfrp^=FV4A^_ zYx9A!ev2YATJ9adij9@QYAM!8_RN#;PDh}X%!48fQpgm-@oOe^*ZncMYWUDZ8E zN3!fEaz7;$%`16}@9WypZvM7dcnCREeU!NAzL(d+GN9PD?+HQr-Vw_%YWUf`7kcBH^H0ND+Db>*s6_C`OuIQwa4uRX_4i$>{86~r~ zb#fJaDi_UH;>C|VnS7>zgNL{<;rpN$&3|4)ZLFa}eL_(yILG3cI`B-d4=@Jp2ucnE zOFd-0atbV^3TYO*&FH0119z_Pq3uI`)E%*U0{Nw1kWq4E3}IrzpZ_a7D_DY;TKZ3{ zpP}-ExB)(1`PKLfi((G@1lT3;1?Q;_dge?l)Al( zId)Uay^HPsGKJ2B3-ufBSAh^RK8<#}!H!u5IWl=Gjbn1=@yEih>8MMcvW8SoIoDLR z4o|udO}YjKk58i$B}t)%-TM zoJGH-Ni(1Q8W%s2SrJ$W1$eIfs{KK}8UhBxP}G|H2Ogj(YiD{vl&=G&uu%yfE&y4%gn{od&rTdR~s_`Zeqhes$OY z3qhno#^?Gy2hvHpzn8!t9r=U-Kk-y{mV9%FQ%56q?a*i2!pQ&8jL`o6v*6b*-HEiz z5TEX zvqarty=DTtJqFvu{a<-T&-H1J*Q{SsXWRXHplyEShmqaQUJdfxa(Hx;`wRbQ#9&a% zRST*I$2?!lEFE!_quiIj^*Q%XYn1=wu96|+C-^R|BM&Iof!#cI|8>78PR4_87e4nt zdi$kj_&~om?&-*b_KdU>?Ad<}W z32%{u<9oK)KJA@PKY`i3UA&!bjA?kqVgILY zY8MA^c(3g;;t`RjGrGWGA8lT~iQ(Ude|aN3{k^q&(EHVund5aTz1^4X6WkH^pFEE) z_nAg_cu#jiK{hwP_}|+E&EM=8XI{4s#{SLri@Z}WWM<_o5Ww{vZJdsb$Q6#qB~SiB z9HigJE`gnypWyj2Hy6JYxMHh4*~gu{XViRvmT|C$_G_|*&54lfH`hBP7&XfpJtfr& z*pau+2I%}S%`H-CTn4Q>VBhjjb41xo*{!;JU*+TF-SV0hgkZD?6h7t~$^uoG#DK?Tr90bQ%>=3b1nEO48V9nZuVOI zFlZ9yEY_G-oxZ>ar-Q!koQwqCDxKn6xNugr0L)A@jh^USoHiQd$~e;ry!xKdt&PKb zxj)Q9H6kdYG3gltN7K};a=rc?!iagi6$(4m0iuN3@apI|J*)<-^1HIw@N?vtI4Nt8 z#j%RUVzVF7n5r9N$2=2{-ZXo=ZAzKQf!YHfJqC7>mrAaQ4m%sRm-*cj zJWQ^zm+WsxVDz3fbL&piafO$gfvmjgno){q0CbbF)&o+yg}#6ThFcQlrj9p;?s&{R zi<8ww&O=H$x?vtnr@vYVhp`{GNP+lMB_aVc;AcZ`XPgt{&J72Cgqq(wpJ|% zUWM}g>^VuktN1%rw}Z>vLlEzW&GQAa*~}YQ&M+#Z!$NvRmrQ=fVv_F|haC^e?I$$h zLS~=8dzywP9cLQy(7=$i!#WfygA(5iOb9V5G{WqqNr+~!{uB$7dBk{$R=I51Ik41V zGBd!y(6>jL1;-H3>@$OD*R)Jb^q@A51MprEt_#_fap3c1j;~MagxPWCIZ8$W*WJou z_MHb1$Vi0q0!2R5%=>TCl{4=(Gxqqys3fndV zClU%~xY*mmgW;_$`#$N+#t^kocqUDMIHUGaIRwlqVc@w_6V&XqWC&o9@^zHD^&8{*jx*XwlROndyylaKyO z_EjoOdjJ}ytR^`iWKsuf8`Ducb9_`8<(}$v55uA2II?bSe9IZ!h%|(zxVl!GB41i) zMw#ppz>m47CXs*uv3u_nSOQrBPXcngZb^H&K+{bqPtt_IKR_z{&S8@D{|{m}j5s59 zuur4I{=duIuCaVEaposbNb9-{pM)%xOnRvENPX(qr(})>cBDVB|NnpPz}`|*+k^R+ zt01(BcK(&w{E!mY^3A15Ygy795NjC7`}A($6cD98UE8O2NeFqGb#@>toS0;ML^KQb zz`Dj0s7b>qI$7mpY>0jq~PTYC8<$l~+tH%vp z8L=7YZQTRiG@V)_ypuF_#ge98Db}?hi>@qxCKG_Mzw*D7^bdFV|E&L|%*Shw7YqbM z2si)#4+#BCo&76?X#ZbqzrY}oSlH@XAId9Fs>jREi~DpJCugLRF*!SpwIXm)@>3H3 zbpKmPlxzJz!BbNPb?>I^&9Ut{p^%s8XfP+n)|Vfkyh>pLnw&f3!4X!Q6TZCAFl9%*ej&i;Xzp{eai%VEQ6O{3vM zhKvLy9vF&A)jfg)SZ)G1S8S;&v#~|jeC1>KGby&N`0zx-BOnqd< zTG|Xd1`P5ZB+>EJCzzlp~ zDpUDJPr2~4d*wpG4$w?7da4wot2eH&mQmV7gEdWM2@CPW-1(1}s{enTk*y^{o<6_i zX&$%=<$4GO3&82=7v^4lCfw+0jLcS%h9<;lDfdjRZshACgV54{`0p@E6VPFQVY^n! zWv6v*C`j{}<@yJeR&BLj+NgJ{C85PJw_zkn#=kCoJe=LMMVj=UA`DFQY{r)nCgTgx0xrP z(`N{pS_o5-iMCvM0>U&)1TC#}_I5>n(b6x6v-GfsDd?M1&WCY$-7wHefh-kJ|9|-I z|FuWjpx_Mpn6fR>H7UG`;S?UR|awZeyyojT&xVlc9 zlw@P6nS}vE`aEz_7u&Yl z9Yc<`THoVVd$Kd!v88H-f_iprwLKl13dck?SR^J9=ElHubd{jB=>}0Vz)fDeAv8^$ zy25OR%mchyE?K_g|F5O|(j&to$=+>rJ2yJu-$WX|^Z>mjdY9e`-%f4*kcB|f>LK#h zDRX`)b*i&Y4zC`}{~QR)LIw=%;m)yxHR@E(DIpgIUw{~w8y2YaSG zCK4()TWJNv^2f2R#>e%@yILH_Z}ycB*m1T5LeCQKNJV)wM@G?BRwN#yRM`ZUohq5E zS>ALwe*rr+@#+ZuM)mktOp8bJy{IQ7JC(as$4>pW$X5ShG$Xsoah?`}xuRZ`A&nt@ z-kG^!|A!Z`8xQ(UxUP&`Kj)!!3*qR&Q?235KOCB+dt{yZ9~@17fRwLhR}xvpmbbj1JcZrJszbHrhakX6k|4e!u%F2l4zrCV&2&`pd3S9q{ zPR1Ap{+)T$71vm(5zUwQvFrJhU#AIL+9`OyLA;CByi8p=)*1cq#&H3SaXinB)VTa7 zS)voyNE^2mg{B?mped+|BCp`R_&``XZ5pYF-#`f&*SY}ZsX6!!d9k&%qSWVwsg8@8 z*OxJ5TO4l;Vku%d{2hTJCnO1AqppE%B8n@si^&c~g<~kxT*>-@P=^s)pE?y6_K3x) zCt})&Ww4QD5DpWe*EKwTc(Jg9!;F9v>6P_2s0nfzH^R@y z((+DEVn{-w(QY&f2lw(=jR_LG$Q8?@EPLfs2~5NdUW9|A!Nz9dpIKHWi!qs`F;c_-B?_{Emj^Ad^3ejw z8V?gTWweLcppZ(L#K-&7&dk=>dRWh{HmqxA|2t&R(9zar=EKL zg+t`OGZPCf%m3=`nQaxNxg`=LHsZ_?yP?hp6`3bGkLuCY0K9cI-1}~ z+D*>+J>YQnjz&0l;Em;=>@PzWWqd*4C-(8WGtRb*;Y;_g#DG>s&K$ko5vbF&Fx`Oy zORQVl_s_D;J!9Bc@<05@zu9+J!Z`!px%K%ZY>$E#u``==w`bZM!jZKMjUg-mCekmw z$PnHo>>2H|yR|W)R6z1~4h7)>820r~5C=ey(SQb>w2me$Bk|@S39#UAN2XJIDUhW8 z7GdVu=f+R*$GpQTxf|FMaaiHU_xzyU5q|yjkAxDMC>g9j+{_by9YS~|=wbbeVxTqvTCR!R0|Ad+-V_=D|#&KO((z!Tk~A^U(Y z7~SSSD4?WQ+)imx!ym|C_kAMNiQfhFUarX|TLZ8gEwq2!EQBDrRg3`=K0HxD2GR0P z&1#yskE-DXj2!76mfF0`G?IG|PKv89WMXxvr=lL z50+PNGt7321M16t3WgogQ;xqHU-}4pes5b0xX@sFhVsD5{1g`t*f_KHe@Lx?F;&hV z$ycd83uBy)w4oO8!!|Lly!9R{p?jil66g%5UUFa+$`G`f375Sfj1O3l=5N!yZLA6$ zadK&3NLaS2ey&^wzC-ARS07|O@5@_#9O2szZkTKQuAt`qnCSJ!4u{Ks%rTK)B=*}+ z40sRA+qawM*;^2i38H8`$F#R+QLBtW^Wr9xbb!dQ^fh(MjFbK$XGZI0cMsw@N7?lw zbH1pSVug-KMy~ME$N4hTSc8|LCRD+B$T)4aF2MZ2w&62Y(Ee0z4i&Q@@ZYu1@0Mq$ zA~1Z+oh{hZwYbp8On|*?` zvmV5%V#c^%Zl^uD-N{Vj^&rY^rY=`IEqc5lj{#moh$ABOe0Qq9;k9;7^Z@WT`2&dV zZm-v!^HSH)A5K0qcd69jatC((y1`eTnvLS0p?~QoS$2PJ30zccx3PVoFZ)fQ2Rg=q zrkCyg*qb<5r|CbK+!)qHNzlf|#+)5^N#{CoKEqkHB#JSn=W9g63Ez|~F53_65|V!G zFt*Eh?@IHrXue%u0RNUz@GEiHYr2aL;Nh+JxRV$?k0z-A-*mdu$OyOz3Z#x%S0+tl zH%=MuEaG=nOxasK*&%X;4RU{44lv5Dx@0V_;OsLAFPf@#1m3A!CeV^HZa0y9eqcC! zkR45fPr%RVqx_|-*}91kVBZ2m%!*BQWfGg&$O99du#>NdnzFO9K+9Uirl1~xplQq; zQUKaXa1cq5T6M;-lK3>d61NZs(lBnQ;+OKL7xhHZXBhOmP-&)p0iYP-r|ATmn;S3N z?=22+q&HCgk!Y5eAIj#o%Rj&DdVfIs_4k%3^#Jey@BoK@L160RX_r-9?Q?g#!tKx9 zY7e+-{iGvz@9pwZ7Isw?-7k*^)He3(y1{h6`zotnHc9`2F#0=U-?sYy5sfM z6h>1ODTUGPrf|Dc;ZApr9Zk%abx+k-d$6nj1jWeQeB3f%s)KwL-7j|E*WsZKoo%uL9o z5`8N0lRHatkoY|^m>V(f{FzF=-9v#VzhTx@f_p6wy=}+{=VM)q5cTQ3rf3T5Afh6n6*VIvoW9X)HVsrnL`1W_ z+g)MZqQ0MeQJ&Cu^j~!$mI&_U^*W8S^Jhg>F+Mj4GLRZ!v<-1|m$3c`6$37)CsA5X zP{$$SWPoKxAYU$>v~gFC4{46WigC)nMY{giaI_XfMz;+be*U697wEc48~FDK?ePMz zEjc$XJhL;CtqNn)i6! zX8t^{$A3wD(B?ic^M{>gcA!&M@bLw%C3i9LICT_b zE5ebainh81!m%H#Q2^D1fXehdW2uY!j*v77BNjNL+U?)DyERQ9{O)?e6}O^>)3g*d zBO3goKNjmG|ND_-{PH^%#R?!7LakOtdQ+t}w{RK{z;hRu<9Pu(e^adIC+R}Zg$7<+ z2#y&>&I9$td~%D9RWkS1zrDxy;U4pRHx4sERTrLndJ^{+ohiKvw?4 ze6C=LXRIMF8u4%E|{UN#$bHloO{OMKqFU%4p);%0JoR`;2vFDd9*A@EMbk z%;XW~RwuAiO|dCcR5BFCP1fsNJDlhID@B}}5?&C75j?yxUDs*KF@3R%)Kayjs@S$l zy;Y?r%f^5wF#-@Gb>vA^OS6L>sN%s{iX=&r!mtLSf{Ma0kaG+&E7=ADF%Sk6LJU!a z3J4Jrg9L;SA`lS~5gC~oiBf1Y50Irfrc4RcsuSJjd9EyluSwV*)eJoM%2!5M3{^iK zZkj6)QIQw>ZZHk_b6)vxyp-Lza7G`KVHk{S!Q!faU3_0<(>+GR*~)B2LHSsv3{-nq zO)-zH!gw&o%rt0xqH(jQmYb0XW3cjWP{+=9ce73M0&XoeO2CFw>@?kkZ7eh&_6BP~ zh?wnoW0m=(em7S0J{e4692bdHqpU-&9jIvIhEW<2)5*mnJw!Y!KQ*e&9(pgzw3~JN zy=hIXn+6aIyZ!xWXB{b8W>+bMO}`nJfUEX-#u_-c8PxVV4A}W9@l_ocf1!E zZP~V+hGiyr_+M?WBACb7D4EMQ(_BMq>^WC!H#fq2wo-CwiZSyz`q!J_7dPs{6Em?i zdzo*wvo;2!Cv`}BV$f$b8JGz~FY{Oq)NqMo2l(X)jhh6Il)jCpymbd~^YdXR1$vO< zG*Kci-0q(wby`rPk3@mgT03=9S zl#n$ogruEmFEZUi2_F$=7%E$|`N)GibiIR$^S`B#WOkoY3XXry^T_nX9UFx9^Y zOWvQNvG*FsyQ0g3=qv||wWG(|xnce=1!Sb{QKfcKAsQm|{?eONPdlxFpTp`5`()l3 zWz@MZYvfNT@_hQ(Y9d6fi!vH!)l@@mjvXTh=09d)_SmJX;PTf~?4w1$#ncK{-#CJ}vGWfAtYnyi=Djh4@i&o}!-Dr%C zlG4zY!ERw2Q#hLwln)MVLT0+Y7$DC4Ax>0dGQJfr8D&hU5F6oT*iKKqG+n21ZS9P; zB3ga9mS=a!IM|FG26r8Ffwm<4Fudz!{#O<)beOa%mla(kvt@GJVyJ}J3wM9yB_W5P z2EF8V%6g4wgk8+}(O@S-9xg-$#k_>=Oc~r~^Gue7(t~E1fA(hEHlFP0&E#t>$DKIh8WlD)5J8N>mc2V?kdEM6~gs;GdtijG{&yslIT z%U~#>Qo@q5d(5^I24J|}7du`GUcRl-K0|YzJw5$~83 zBLJ=oGAV`nHJMgyGB8tsM|6U?{p2^S78ZjuGY_9k!l14m?z0QwiA_n?H4${`>{+>e zB!1$RYcrk&2FBeKJLON?W)p&vu+zj3nanfcg59rWUQ`+lS2Qh1YPJU!x1&3akb$&l9-?x^# z_m=G2`(62cx9pzsN2>CU;XTj7&kcX~+XuSHB$9p|@y1WBp>x7Ch;Uq=Xk@kco+;Ml z!XBA#U-}_CKg9?-hEVzY0(559C|Pj^Z~+8tNIPbzSBCfl8n*DDFx)1&@VDJ2prYz@ za3ZsNxX6uCb0omd3sLSH+Y>Bs-HP(-`q^IqZXNW~j!-SZ5gcUhpcXC0{4xq>pD_Gw z4%)`p9pGkh4llDDimU-F*7s|=#wzOh^^Tiu@I~}M?2(ET?8+LMv`_OaMm;v(@b>w= zubJcZB}>C(QknNIRRBfYFGtJ%32hEQQTayq$vPD=VK^o#T7l-@JupDHY_A?cKI00t(GJ-}S&P(9X2D3uMZ z;blvp_00lBxWfGssC1jY{q1uf(xgf@qM*EM!t{=-EdW}$M=M-lB?zz1r6&5%P62-a zcL0C@Ru@NS2mjf*ku0+$RwwFr|Nk`=HN+n(_mk4D^X31aTiYjU)jXmPWzm6+kor&f zk8BfEg*x)WM6LGNiqK$=Tv`gBa3ON#No$w=KmYG9Q`PnSA0a!2CUmd^gLsDDvc3d{ z&YdXeK3PN>621R``GNS~{y#vKjQU(-I_v@t?Ex(iblq=BKm?9@o}7vd^#Af(Yd2UP ztFA^8RQFpQ;(wRxu0%0-7>aNH{7C09p_vqyPR_o|Ee5P(oKh0n#J~ud)%E zx1fOcwZ5gU>d*i2Uk=hHQ#tcCpU;lNv+#%V&{s;|5d7wTWt2VozD?c>*F|BB{Mo-4 z35<^)qMTvA`1F=}TcY<12kjpbX&2=Z@e6l}^X|^z^Max7)Bq>NqA;ST& zBx4j~iXjpS3bO-}C=MqU7(|RiSR@h%QIH@7DJiRrWElenkjt!5_YE0*GkP6pWllFd zVLTF_4s0fr5$2bwWs2Zd+Sn=S;%A@!0usBs6lf2GoqZFx|ifCfq`J;rdT7$Kno$ zl=hsz-r5grHuU=ob}U6d{oUdnQ^n11C4}0m94X00ZlD#Rmc06VB?%Q1OvOU{$l0dz zRQH%N&yb}z_%bUCQYYO8Q}$K&&k|~*fXYR`!9IJGqBJmG&65<59S0a|^MI2{b|-_! z>=3-%5mZo2Wl z+`+PaC0;)BqcVigZU6gr1zi|T033}k_!t2v%pc~vt#`wI7m>Mk3~f!18%)ENvxCgb zjQS`ZZ-Cy2*HTLN>AQ?czyiK*Zew`wh1J=pXJ$OZnp{kX<`ik7A72xGQ{Kih+=gMl z4Q1-qhyr9&Qe@A}`OI3uf(!%;1P;!xf15 z?Tq`#dx(-1I9X_AtU3DG8s*>W-*#qoI7AKL886I~nsm!0r;-iu4&7V}M&7W)q2>P9 zi3)ZA6hg1}_t5y6l9M}!&RlTIRLz!Sb(~qkk0sdb#R4DkNjH0({#LjpwsRNDMXYE- KTG$qhn+!R3E42~; literal 0 HcmV?d00001 diff --git a/tests/resources/crashme.dwz.zst b/tests/resources/crashme.dwz.zst new file mode 100755 index 0000000000000000000000000000000000000000..79dd123fd667fa9e2adab09a8e8daafe08fd3d58 GIT binary patch literal 2716 zcmV;N3S;#swJ-f-NJe#3019kK7*=3ZU1tmgvZ@U3lpw8<ei`s1jgg;Pz)DFMC-$$IT3#!xNMn^J#gK2%4c|6D~jX58cMx6T0U1B(Ra?JlM{B ztUr+{Zvk}yV*xxi5M>TdR7yOpIZ%lI(<5?zxMuYdBx??PU0Gpd)p^3mTJ`iq1vh-@ zNYs+?Sj6Nff>P(KSJjiq6uXpD)j@}K2V4>7gM+9l2_P#HPiq56o_J2+qW=FExX99_ zBgGkHv?2(yA_hsQ%$J_0)qIhlgMC6pJ06e5C4fArF0vkAY&_5$kC-?BS%Wx|brFO0 zw;?HmtY#BH)++>%N%8GcSH_mkb>0fWgBVVleX@t_#w{(|UZD)MJvWoeusa@J5YTJ> z7;c5WR|urr*>{$$=eDNVSQh?R_UNX$IKRp7Gsw~IK|$pViFGk3DdJKk6jYSL2&)%A zxmm^kAfMv~&`leUtGQvSF%e@pSaXB6Tul!S+ICD#(fO7FxtxlgL~gBw5$o?! znSNEhVcq<$qN1WAf0I*lYDrqZ5k>zEpJ#dgrKz5^{}y#cTiraFL_MqZ0v>bno19hs z)pNLCL`L_eZ!O0)xYycj^ze1* zCfb&!s7wvbt;V%sy6JQP#c)rK<5qXXbD!^r{$esg=r%MmxM5Crv~w9VGBP+C zGBRvxb-SYOTNH!hZZ@}^%uNQxrMNVQLuJIEy4?=wHB>^X_HpXC#N=icT~#Ws7o-l= z>M67rQVU~?rIxFrd|X0BixGy&@~`YTIm|`?G<(KI)frJMgA2?U4>Z;Tqe1Wkyv)qN zfuZvsCIl~;4O;ByjG@OP6g@BH%k4Z}Ju%Fd^WRaX3&yffru`zP{esBx7{!kpMWi%! zV>%wI&N7FyV%g;Pk-tR^L^fa|>PR))Boq=|_7%4C(wpWwe%YpWo?CW_y`}D3zri6B zv1RF5u3a|R_T=D?m0kLc<)Duh*6jS&^!&P)e{Zk;wKdDKms4E*AJ)QYC%~vBz&?l( zyG?mcIh|i2J_fTsTL3kGk~1N+#)*@W2Efi2RW(mLI zC{RQe`~?GBh3%mkO2Zfx%I*G8^6DtHzwA?8ft^vp?kkYqO7>MkTZfd znX^k?(k_uq%H4XENiDL}v03D2WzwrmfL6|0yF?*N8fo%fK4m2%UAD{KCowGc-nN@g zPk9P^y)JvT*eeLsFu}wJjXcl%>iTJww^U}pu zD>^%6{VM04spN2B33~_*2(v8%4dYEvVyu*Yph+aC@C1peff2#xL1V*h_yZJfF=sm( zVrGX8C7CR~an9<&5m7qtg4sjB6##q3ZJ)qkrpdags`*W#Hl3}+mgIYi)3vLps9H=I zjWQ$5+E=k5gskfhx$~sqRMC?#oP5Z4i>j({q9`iG@@;|&t*VMg1lp}wWOZL_GIkod#dkGW*Dv94)_7k zr&zuwIu#WS!=Y$ed?V)^iJSiVdBY<<4ie)NXL6;Jpd=xmwKlb}MBbqHgLN~A-ZCpF zL8O4-@w^V4iNENIm&1P-p_22_kr?|MAXB;HM2{Mnl|cr+$p9W}dLnjVvsO9&q3hp@ z^muU>kXihs@_F@j+wHVngj|eV)|w33STzosqdy~@a6H)ry400x`y00`=~B_+J3qLc zMmT&lEsU?reOB`}BrV~>nfw_Mu3QHa=!#r{+;y8Hg#loL-w%^(MPK{;(vG@CiP7YZ zn^)no;k3m)7sK)U|KX{#`Drf}$K>fqmdv5dC8@&vy|?#{R@I=2uH|;$KiX&UABcL9 z7mBd%;I^+jG#x`*aj%{Wds8|BU)G5$*GIUJg?rd%lk!WKEqMpxI00Q!OWt-bqb2qf zvTc)`q51|wE&Y+kuwJ$B1(LTq8Ws6i6<2OUjvX=^IT=ro&&J~@Tb`?=pK37F1uzVo zIAZ#Fq;>IuHWf?tMwge7X@gxWKwQFd5APMxMVBoFpjNQbpgWT3q$RTyUuC&T0rqi# zHF;@oT=Ue0Bds6CJ4hk6cC!*MGpBAadYLS+q=%?{fn%_Ti(H?5YVWJ1$zd}D+-~5C znW{<3G;GmA4EUs4Vp~F)0sR#J#>0z32aNPaJzpf52`!?CA!0D6UtrW0m}mkv=k&-! zx3M26bzHPZ)idD?S-xIN;u1f`1J?AbhFBiN8|j`1#btK>g04MI~4P>h`Lc(CECOBH=~|re&yXxugb^{uXaGsfO{qsELIV-{w$QjlO{`NTT8G}O(7d5!)<52f%|9CbvgQ6GnwZrz z*S92Ge*kRPE)28dpcSAQ9OGY*=OO2+O0U!d-t0(N*ew>>aby63Bs4+@<`%rnGzxJW z8=`$zuk06vX&1Dft+bPdi`yC~d9@YRxMYa_xNLb)k W!?iC_Z_GSa!jO^#zrY?c{UdGl&p0*! literal 0 HcmV?d00001 diff --git a/tests/resources/crashme.so.dwz.zst b/tests/resources/crashme.so.dwz.zst new file mode 100755 index 0000000000000000000000000000000000000000..df14e0fd997e0b449692919f359e45df744ef655 GIT binary patch literal 2482 zcmV;j2~GAWwJ-f-&_g9n0GeZn7D!;+bq@X7oaF<=az7pjc>4XmR|4oS@!(R;xzB#v z-5_Ws!X9zG3HJz0uwVKR*$BAoU@LFO1I=kj*{;z*P2BT2xYQ&hM5a~&Tme%7Qw!@^ z86;8QypC-ojannV61Ug=VMyc2aE_?;4Lbm-SsM z=PhH_{PT?8G#;Iy_H#MUzw|45-uE1XS4sVSF*J_4|9|_l9|Wf9ZRl1PS%SCJSyPiz z^V;}QV*Pm)ht`i2u;Bi_T<^CRB1Cu#N<2*$?vKL!51$C`-Ht|#T@_Y=P(rr$AI5e1 z+5SEYd|dwmf3*822waNapZM$du3l+br3jtCv+I9=*NDhy-TCwk^q&w3VA0ppf5abI zLI-vZb`W+C+_lI3&3aOsb7gmj_Q9w6w)2>EpM0S0pX_bDnCW?KF7J`YE~GaBvPH0i zUvFa8d3xmE)9ruDEL)FVSHg-F#x75RfS`XS*Aiy^UuJ_kjz^W_5%6w*_SbH8Amgh^ zG*Q>va|rq`{u15PkEF0(1WiJmmSJVhg# z*S93Lozv1B0z+#&2;DITYnFGSTFJZee+VJPZVxFdEx3Bcaf+k^8Q4qDOCf4|}8ctrgb z>(51^|4g^06dmcok>_o$iZ?H~jR0XK>T3TyP|no8-nCrIG$;M?{Y;7snapVF9+{9K zE0M|Xk!>y4z$P~it>-cNzUjF+jFmNBJHy1J^c=lyTsb>wr0rbJwsM|7nw+2b{C3<0 zZ7yphekVUD-JsQn9qD~F7&YYq^`0B|eI8A_6^8iVgXL#bsXT0_J3%s}C&&-qTaO%U z|Bhe-@&BvpMfZcAlmx?kJOH1!eg7{nOWL6tP7mN6W=#4>G*~0;Ey?3fTIlD;=L1(@ zzyKj;nQC;x9lKKj+(&@g&!?SyrU+XJrK7e|{j`}%pNx~uFfbt;tssP#LfDCckinA} z6f=3#$>v={>m^hTmEun!++c|c_7MsO)dX6-f2%6>BbJ5N59<}PEPmJwy8^9JXN@{e zeIJ{lvjlYPui5&voh@rswzoUhM`eMk?mG+gsYacr8}$yLSEu)`^e&VK`k42>dmnJe z^AAJ;R6k4ivinc`0eq<3A0W`C=qI`i)QLb=p{O3?rkn&&10BW#`FG>#Y}E+`xlGlE z)1<`}wE_&sGZ|KppwF}tU8!OUo0&aaf&*w<}$N#V9h#my)r01wJX@3oEIz`ks5@~}$ zn@cFYlp^bBjZ2Zu8^`JRcCz`T?K`G%S{eVXWHRemtj6OwjCR)d47HolyB6`# zF=hPBS2l%8XH$wC!|yuGzQbZEbQ#yU4Q1XqlE$B7(YW~kvz6AAlyNp%bzNOmn6;#) zDrjL*Z5g?$WZ7z-=cmfrlA@5IYW^D!n`^Tt&fZw%U^7x>7=G`jPlN_Iyf} zbN!@mupbM8gQ(JgN~dTbbx8#HLbCg3E=}y9wZ8f@~M_St~d_Q1>nHsWB^I3a!#GI(VAM5v@ffh5s7#e4) zD}W5)c7*0}>xMWpu}0@~b^v&Ug9=;@c=*IV035OkZ{q3u zm}1;1iHe8?2dKn8>hZ2jCJ$?!Lzr8=m-HMQ>zHRH8pnCQJy5Y~y7lIL0{Qh`oJ2hD zmFZ)mn8*96c>j0Ne5;gkK~$SxVikl>eaO9}>2l(MKfd1;mdjY2VEKIstyGWC6kjm{ z=&t{qUu>WpSin%&k`3C5uY*>6LG0Ozj6Kdy_JAYyn?8ew&VrJq9R|RcNJSrw>Wh3H?d-=Iw`epAjy&t&5w$l{IcLAO9v!DAxbD;4P`e-*z;5$LDSgm0 z=F;&ZR@*c-&v;ae1X@|+%1Smm7aH-#>4>u(K=GrD@n~eTb*nGrjg_f}tu$@)hRafc zk01b%D#eM7xp|r5^;(W;w3CrjMz9p0ZY|HsYtSb|!94~x-_tHibZWQi?qG_I@MN}2?RjGRJVsjnA#BU^WY{$0P#lk%bOE>OiEURjAC3rGd*QFYOryljzr z0ciC2)J8c*rM_kNOnE7(-+;*lMd0MxD02u0OUWS^ z4E_g~UjceCvY{}nHL#(t(rFLv6DE%oR6C&*5aU@eZD}o|vEDh8=1{4l*UTlINxsR>rq)f*EB&S;lr3K!EzN=D?2Ar* zesqNGjAqS;$EU=ot%6=&l;(qb<9~>wnMAsxm=5CVD%X?jY)YvfU+$&DB0eo&L$b~k z!-BQgMPs=DQTishYkObnq^?y5L_$$t3VslQ$7r#A4qV(3a53b=YoGom94 z!JjYfRh%R7V=2LJqe%KFcHrf&2q=%TtC5g5hhUNwXJ#OC=)NW~>HKikfcfD&1t@y# z{pvipqAf~d@dNxWZmBrNEP69Z>WVTS^`GBb_)!FK(+%6nE1nq=qwQVqiFWE=SpU6M zi)}n^Was-*X9SYocpLVqIhs7mvKHT{+{>4s#Rk-x&yKC4U}{y6m$@t|;1As0d3Q}T wc{m=57bJ;)z&P?cn^=;Ixpy1$qEsWRI;@dYA`EZ}%74U~R6z>^dmnkF#8$uNm;e9( literal 0 HcmV?d00001 diff --git a/tests/resources/crashme.so.zst b/tests/resources/crashme.so.zst new file mode 100755 index 0000000000000000000000000000000000000000..a31a002adf7fa8170b3fafb2ac02bebd6c5ff6ed GIT binary patch literal 2561 zcmV+c3jXydwJ-f-Kt!ES09tO47gu0b9TyI@2Pw_pr+M}(RlYLZy|VZm9K059@|quf zSmP~UXmcYpvN2?agKZP=O}cS3YYj|8;ii#^I&hiHWx!ztKer!>mJzmTuIpHJU;KCJ zX=xLg0&M|X0ayVM_+VwE*fi;C$c%sy#T!=tNh%;xfI*4Zz>;Eh6tUDUOd5Jc9L3+Z z!sH=*uwSvmgG~~E_@bStKd422mGcR*qeH*b3obQ``PC zGnT`ClJqhZqtpBFBdK+>Z;nstp+?Cq$!o%lssj{5tbB+_HQ)h)lPLdKR`vTyMm4VX z)Bot{PXlzW^WW&nLm<68jFI2|ATN)06%V?iAJwt8b7i|aTP@qY+3C&N%a^x!9p^ex z$Gy*U@_+X@L?lE(ubdZ+cA0^aH~Z_!>XNiU$EyFOnfL*Hc1&jU;_UNGo_lPifjx&B z!M-z_geG4bS=5|aldhlqH=5lw|F7kL`liUFCSSPFnDiBPDgaU*-xV->Cd^EZ6v0c6ttNjVQ}?08q}p>2%JDTE6ZdW86(w(i9{mS74?~`Ej3d zYZ}I<52MB=yR6!Kk787snVg#q&5cH#S!q_A85Ks1Is*u(MtZ}8p1hLB_|^M!i8C*0 zJxHl!sz4cEJo(yRAKvwtlT78DkOTSMVYe0P;#CIA{MdLD)9c*zK;#fH+@)p=H;iUsCxASesA(&*yDyyY*$V{ZjBJ%L2iopjf#r|(8qA&q+dw4|> z{+iyM9P|D{On~Tp>zLp5@ujDDSc<;-$9^Qx(}ehq@cbw58)mwje4OEG6wN{kCHy1& zBK#n5aP?Hj)g71xy>aMM-Z(1!#pyEGN0ZM;UbYGfiY$b(1q&7`zNt}XQzo@di}E@` zI2eR^_vL_lLU#zuyg}F}bKc7PIWgJPp$(hbWK(T!Y&SShJ3e0Q@_z9ve4BP&AyE$6 z)B$FI)VDO1S<%bO7{H0Te~vJw1xhHU(pS?06cfj*#VONfOPAwiCXO~;TITF`(#|%g z&e81iWzt5oHn!g8YbRx6OOO9u-O~U2CNU_w`@hW5oBv&SN&aJhG-JQXSy->HUqvTI zs`Pwy{V4h3dA$zP`d>IN6SQA5UauJ0G*;TO(n7vyF&8~MTV+QL$2#d5EvNG0g-fqy z4t055R{pgKYU6X{PotnEXmEaT$v|$avh{SAd(Fq@N5T zw-b=syudR+O^;jWa4HxEW|ZD3`ZpW~Qr6t-%?b2&9Ezm)Vo{ZO&yWt{so7OWkDfpL z>v`TplE{Rhwv(|Hs*XG1<3BK&={oqEhMP)LwZ}?&wp5Cm`Tx9v*MgKvyB0UImoo;dyq#=l}YDWUYJ%l*8AT3*?Pbq zaR(*GuM-?Qtwdqd`pU4-7t1?W4nhrA+}{cj{rKowoe5QU zMLtG2f?A-jg@=FJ1{n#pLPb7DLp|Z^ekZq3!632c#!QEbW#QXGxpC~azuVe-49mS1=xo1dFZ#- ziL>$e2{^LB4%kWTyodS}8(O=bv5<3PxMy671GF`aB~Y&`O6GRq;Z|ZK?(3tk6a|Wc zetkx(^6XzKh->vp;CN}8i#@fPjhqx(Rw!TZoRr9@UPSYJBJ^uA4(Z1tTfQ(rT6g21vpR;a;Y#iqFsyVFDQsA!s`3uB8iAjw2v{O6;)E**XMkMf~zeh5@5bxe~?5;)sr-rhZnU zosNJ)6(S7$`(7(fXV6Msi}rwrcBVTVBa~|UgRQlp7lVf z?o`5t;6RQ*pHHReY(2%)K!|^tWiQbdWk0HlY&g(LVzBSNL@oxjD z1KDc8%ga?`N04S6FbK(>Bw{$JNkh(la&rVHbo$La(DiZ!Y7$IQ??P(?0wqTTBNzCN zB-Ge#o3a{tGr=~iVM{e;SryuLk-u}q+YKsjsSJKYfC>@tEl-S)7gQ4WS)Yb(Ah}Vo zw>Fu)#R-IP&p8kK$P_y-lo<^d;3;tS1<(nT(PyEe4Huz@%Ir>ZAUHWd)?3E8=p^4H XALf$k|3Z^UE*W?f`WGbwW^tL08S)41 literal 0 HcmV?d00001 diff --git a/tests/resources/crashme.zst b/tests/resources/crashme.zst new file mode 100755 index 0000000000000000000000000000000000000000..670858840ea50fda9422ad9f59d43cfbfbd5cc2f GIT binary patch literal 2727 zcmV;Y3Rv|hwJ-f-ct*`s0E%vS7*}AHRc8$Kf(4Ws0-uLChktOoSG@YU^wVsvex#CH z{)bm{fN<#bski*#nWQ2K00?9fL6xTOn`GTG=%|XyG3uJ4s=jZ$QRo`Lo7T!cr|_P| z$Mz;N1$O~?0bc<~ze)^88ytZI$$`y-X#)^r5k=S>E#ZGvh#Mg%RvqODYr|&CC}v=B zl$e1{Dx!o1GQ0>0Nm23GB;`M9)i>D}A5rrkQ&%JYxcwokW<<(vFQjSy|!ks~)uVJ$G?KTmtv66&roWh`|Bg}AM1S-A_x@R|ca`T!DC{l}dtfTP`TaXLkGPvAOl^ zoOat8y38$T=sOjzXE_$@T$wBggx{c1P^PMBjb>&>^VSS!Hyu85tea)oy%Fhrx8tQe zIhJK1eIpWj%}k93wB3ku4nw9^qsgejsEO4so4&Bni&ZOO8=eW+(BKARd4feTfD`6_>7jb6MvIvp zW2Y-C$dmyF=gNU}#S^1}?*zbPV$28t=R(d1P%AZDs~&Wf9*;}#z)aW-MNd2~P8R6D zk({R#%tnX%LO^!Q(ecPc&x<6;$CBCbcuYb~+l&%sliz1_EyJymOVe%}%W+XVQ(w4- zeZZm6ZG-e&zpc;J%|2c`zd=}@eY^How=`S_Y54ujgLUV*b=$U|Ra*T&rNUh%#H=Kw zJa7SIsWH(dK+wMkzlux$X5c(ZvTUR@PFfF^15zk#UC+Co%Iwq8#R&AQAHG6|uPG3_ z$qP)Nymch#g10oV*VG1DS*eVoi`7Zk3rnhGpFcA3(_8cJH}Bq4zf<8D7@OsySQ-S1 zrMy^r)7#PRE~a4_=H0-uu>Qr;8rn|!_9n9R^7@O6@w3UBZRU7vb4QA~O(>Ixi8OWW z5_y)%17#}UGFhf=wn)=OrhJ=U@m0A(zBc*SR;{Smd;gdH{DJ&xsXU6@58@)9Q*Ac& zw<_2aoJIXE(xw8$wVqfuE9Jj>-aNe|(8|3Suqt10wrcv%5=8@rI?Vi0Hm%%QURByC z8K@bk8w#~5cdPte(9Lqm@q&4ks8)M~1X{3Mxb@1ER!r%m<|OZegpn6CNU$NG8JYpV z!wix4v{68`2axGe!$+-(?-?)g(2jhdKgDdIUJ1bN@qeaDrOvZ7JJ<7@OpQhIw81b zL}H19r%RSZ2|}Jpk{lt}@wK#;v1^}v6pJX=#DYkKLPx4ZNv;GN4r%`XRKWj}8|FbD zPo{rZj(tC1TBn8 z4$|y?at#j+2QBeJNY~Tn6buqAQ;o5C| zZ~OLzgThvhU!a_?*UrJW?eEs^3%4uoM<)kIivu^*(*xB0q3ZFyd)LD_B#f=gXC3Kj9zR`r8 z4L8n!{a5xWh&R8c+(B1np25R*kfgyS#4}`NSZ?IG@ziD*2Kv@W3O-2rfYQ+E-w$Nq za{KC<-;gcBn2_1$ZRJA={eu8Q&|^@Z_O43j<-JoHY(oA?@ z0~y61qQOpkcw(v|6G|2`b42fJNaq*r0mI(YGSL+9(a*+Pn^~+UjWWzBTrvpVGBi*& zNYNwIb6ewH-$&h^gP%0QQs-8Y5_>j;-@!`)t2)hAA(7wGfcKuIdHXW6Dwc;q`txj- zDC@@A@Ml^0uO_7j5w*q7#hB%7I+wy z1E8M!B3^KtctYd0ACP*@$kP%!=pbCShUM;RAQ@-0TL>XRMYvF8e_e7=zZOgP3v*Ws zc*7EQlPzb*o}KX!g*r9wQt^e{e}lA|YKtS6B(-(tQMMg%#=YhI*c%MY_&e{$TS9sA zwaME~4AZKskH>HdEe7Uz73xbfmx4y{@ilcwh-nw)vNqTPU^5%ZviS}eoRqMr>os+;$iOyZAOt3XEqc}6N-@P_Sh4Hch_PZ9qB#oBE- zgbSHlsum8#el`se#HgrLIA~ED5fjdpq}fzqAqWR#rB>k~KG4;KiJZp(TsrUT6;xHW z3NxZ3FnJCeAlaV@q`6ZX3yQ5=n$QQKt46ie3CzHrko@xuAD{0JegoTHfTyNQ-*PDS z9PqgSLwrH1F&2+Id^fj|Y?QJt+BZR`E;SaTj{|m6`-fm{iS~g>Eq?j9VFzG;BS`n` zp3IqMwDsU15OcSK;|8Hbn8~+88-tqh7(f@3en#ZuWLQmp68020eQ_=|NFlK2c|mQl zLdQmB<3?6t+r$}aiT_?&)CuE$q{;7xpcoB%{{z(RIv5fcu*7?@mWF0Nsq!4uFEqJj zeszcwTVUCtP*E{Kg-Xw7eoNvIktPM zbhd%v2u#!KnfQM&16#?c_VTX8za}|qO3{V#?)=lg5CRKH(fzk^o;3aKwj-0^#3gIs zcjD<^jb5fT%GV_Fc^IqgSYTx2E^mQP9hehZ4B9khD$A6TTS+rvOEnDkXNu>eTAZgzte(rB;e>|$jbx3b hCTnhT!r!P0{wBK2V)$L#qiK|+8h74j_I7|jmEJz^YVud6Uz7Ot zOqW;DJlaDVJVR*o_+FOhY557!Xrlfp^}H}Yk_LZ?e^+?kpWh97ViG|h7=Q0oH-BF6 z=IN~zN}u9+^aL6DT=l*ke*;FCvHJgB^DPg#3Ai8_K^esFygVwzt10|C_Lu6_J)bW5 zp*%0iuT=gW5-8%t%KL=8n!)qn{Hf=A%O~2^c=0|ve<^r7#O_xae~EaXocDF`Df&G@ zk0SpJ;d@QI&&I1}ze{4hI>7q^9eMPDKfC-rBZYjFERc`q-9W?ZSNr#Q8;1zto?z0y z8aU4@^HvNKf7Y*hei!v<*QZ+^Ws~#4>wO#i`eo?p5A^o;XO`bF@G2bIjeo}bs-7o+ z+Y8?cM1Dv76@d$w_!GwWet!prA)&(w3d8$uJnH1v;dcl8dF552Pwo7U&aXedC&GWi zuSed}@VqFGvha63y27JJybp@M!dHd7?fr?v_hLMe6G9)Ykb>LO^aBY+$1-(T8j}L* zvO@z#E0E8Bso>vsb(~+!?~Zc~1ey_}5b3xfkpuF>xUqZ`kK0RJxA?)3@w=uR_%pdV zo+r@ASHAZLKwJ>nxHjN3LZbA`0{;Gm&1MvEQceSR zR|_C)+PbYfA|?xLvo~?TCaq~Ur69Z70w&3yK(ong`qBRh$jPLxb~jX)X&kCN=cq)I z!v&E?#Mj-vuck560+`R|nYp;)z-hlr@#qn?BWlN&ks@13j!fo&*%=s`nUpWEHZe3a zuqt9-Uu22KIyuE$z&Rk|%0pY-?sy$kfQjzLY%K`g#Tt>P65l zS&{YjZqYXzeb`_Cg`};B5qP{g0D>X%m{0@~KWEb4?Pzl2y=%!4;nvIRCcsrDL;XRN z>oK-IPSCGz`{7gQCzHv0@tz53gQLyE9en%1uM)TRrazU=^~R0<>NS2U60!su&%Zmb zt|ePZ{@y=_>N#F%sBaAL~hl`Qa*6_m}8!0JSbxr? zH!;C6iXu;p>q1$=(gW}-B9ikNd9S~_==3Rc`1#n;Z#1T^sWN#iLrr(Gb!00&m?MhJ ztoTJ%rECpMP3%v(q1xnAOlnu!VR+-o-Yg!4nn1LD()gx{Er>R~INKs*0asLZF5Hl) zek#9p_P(*26kue_m41^rYVS29Lv2}1a((<5WBaKA){~Y%}RpRB}P|=LGWF;^tL&S&$>CsoZ3k*lEsyq4i4MFSAw@R>`Q^g z^X(uVyFu6EOy0pK9f}2*o^dA(pG#N?EETEx!l&w&)ww(99#v994u4t}iDn%W5(y_XO^dH)U3w&=b%G%pYtj)i z6aaPpXO)t9`V~Pd2>yzkWt4tJ zt_8qf)^eI_k}S}qD*_J8{{j>`OY-3*nsZ!Gt>By#PzNYe75YgqPS68CEYFJK+Fo;R z%Q8+7iM#uvb;RnlN|O1Z9LV}3&ABXYXH6rq+XjR|ax~}KlD2hEKkzYki#<~odh%Tz z`Ge*hmaDjZSWB?5?jqO1AB5twLSkgfk<9P(5t=sO153ud&Z6X%H0Q2JO%3n)#Y!zg z@=WHobV+Y9x#*XZOaW?9mepmNb8K!<{7$C@XqmJEmCOyZ`vO#nSlC9B4;LJB!Q-f8 z(hATrrv*xSL7;z~+Tft2Ij0uH=sU9Rh1|C*zQ69UiMLCYU0EeL6lF7YYy zmlB8w=uxh>O}urglL67T>`xG?Xgm=CuO2^@`*j2LQME5>T5wuW@>>}|e9zC@xIfod z=YCh@=l#|5z6qEZpaS;u%pP5S>h#mN^Su(D$K=)J59W90?$xvR{rKLJPceAkl^=oz zeg^^oEr9(|o(H7O%lobjkRbphqc?v?__MHy;?*BL{9Sov2>8o_SLAzD-dgbImfs2S z>(ldokl@eZr-1&Hf=nB%HorUcC=Ks>d-VQPd44TBnrh)?W$FJCNwWR!Nd;%(-X})v z3!!2YC}=$h1WCYS;OhexdD>hk^E*LS@I1;ys)EiFHNDU4drmwm!t;8^5x}(cJu7Y= zZQ@lPhPbVHI}h$plOk_y=-_?5M{)dxF@*-7hWQokSEV2akWke?5zjk<1Tf#b`dt)% zl}Ny<_@|k-%(q;C{5>r1%i+%zfA08|b~e=f8YhL}(X8K5`G23-^R8ZH;cWAtJbR#)V;6I-zUG2 zx~2QmEN_MQbC~hIra$BSs^xir9?kPSGQZyW6^AEU@*n!062B|@6z)|3o(IGG zzJ68y9Q;Z5J{QmP`aAb3p!e?DE^jC0v^rIj^kbL6Xlg5uCGjB`L*=h=f~{Fh*b=v%C#a z=75y2;SXBHvZa>TQx!zyac-^^OA^YmC75hWFXC;&B#L&7abZ_OEN$)#ABqVQL)&ud zD22;y&FewNIVKWwlFByCUR52FhK?nu-c+e!!VQd3D(C}^VG2bganAFWIg(#xf^Z39 z4@x3tA!8FzNd-akj*b_EGJ<-jyl^S`BSlJ<42V)zC7Jw_XKK&E8PhgpTe;`mi z#YFiCc98o!8i;Sj&qGB z2?FvEfH;_cl9&`kV$R~{0{kt=UkD-C%W=6e31HzuoR1%(+n)q;Pw1eM*zd2aAM|oLY@S=6$c^bh$D$Y;2$nP$6qe>Fq3(Z&vk^GTU-&t z-7eB8j$|rR0}#YI%aL43rVVn`Xs|@48LkPwxoNN(BKoAg8$1lqZcsFy>tu=%Fe`VnamS zK%_^JlQ&TqN-&1ed?Ra!6@0MaKfIDp!!gb>2BQo(<%QlTcmNH`utp&xx;{ei0O#fd z3@G4oSr?J17%b|Fs@Rjq4iG%(c*r3cg9~m-U~}9cjTtf$c+r9dY2|E6m;zC{xCmGv ztH_YwQhItpXQ4pC1~+TSv`uLuMM$anX9ySAr~#A4OuiMnf@lQ;m}9Zlhm^}XZqyi& zX-(XW2#HTeRwBg^t5_!1v&*C5yT5eG+wAkVI94$b}&t00TH1C z3TT#s%mVJS!U`dH^M@W8;jHTBE2R*GDx0RjT7TGj!+y*kdM$xVZtqCK6&&e-a6Jz{RaMBMn^GtP5A~7B0^y&Y zeMVvQZN@1P7EMV%IA^k#r|0G09A20r0H>ZDMSSu}Etnf93XtlIeRCGEoYOO6_U-fS)6;@a5Yn5(8nifLNKfQ-h@fdv21rD4e+BXITol5| zBjHiEv(F1ZUq5q3_l5VFklv%Eqhs-+qc-walX)R$LbLazyrrTE;Ds zh_>9P(lY4(-@y~36M>I&L%$+Yz+nOo&kITya%dq27F1Y)&6^C6WhVF{rcHt>VZtIX z08ElER$hF(-B1%u!vcdIJv7Y=1f6lhOrG6_SmHsc1^`@WaRQ4ElGLz`S?jW-wpL_j zhGd3CfoGCpGK*vU>E_BHCw2ab?yvO!zkNfue`86N<>{9sP%2&bjiF->jt~*~=l3T^ zETPcoZSzklqKtGh{xO{)4v{NA7LCXx0xy}nxfzebgT5B{<~xPwKK7_$;Lva78aH! zWeOVDlq+FmYh-L!!pzjz*ubKWdTlK%P0XwcSejZC7#rBw*HNpDm6ef2wUMz!Ma_0r z1Un;B`x-6HY)y>}%V=4~qAq*P%I&MFC^EJ*v@$ieFfg_%S=OQ+ z{$E&0ida4QdV=L7E31I7BvwTVNOI(>$(5EC)|ZhjC00wKTz#FDeWk7a*Vok&tS>Aj zR!y?T(8vZmjO{B+l`2=pz|^FiMU4%O>?;eHni|-bnJ{XMjE(H9c-s}QuQjzawz01> zu&}hVDPL)0Uth0@eN|!mIxWfxS{U1mD+na9Li_Z+EfG1Q;W~@$|NB4dNPLbP@+-DJ z6n%3zSjfVV0-F^ed^94|K^o@32AEVny0XLd7TYQ0jH1y`^ewt6#i)HLDaptKlm-kK zOJ!#66DhRIO3^?MQYB67GBL5J#mtIQa0J&1z!98+BPgUyObtxTOpOeUY>E&vv@2{? za0GwoFcSQ0q@|y62S}n`r__sAQtEY7>Ln?S;-jJFuxSj7-i^{IHkH9kkIYmg)uv|D zn_AVEBHa{7=<>Gde;VBw_V#QHYcKlMS$mGxIHTuZ`%Qmev=)(#^8Hef-?>EOsNS!q zZ-~#g6YqUfzKhmF+0j#Ky~$@-K|`^s{TVH( zq1wfYwth$LAlh0dIxMvz*i-2?1*bZ)XK%&7wK z&740lc(Sg9aKW89l{+!9a1`iHo6|^<0+=wkoBR^ zsY7j^jn|8?(p`_-x%Sg=AHs9YCklL?kxT_p*b4Y#;D*t`B__B)akaX4=4E8%3K-n! z<&@yigu)6mtC5gbPz_>H^t^V+$T{Zvloh!4|JQNUG|?-%W@JIxfg4dl7cT|c(_0Y1 zgOG5J%mf`lI(2j8STzB(;EtA2An=3yzteFGZyRfqL)NG@s5Fyl2Rl{TK4TibAjW?< zB_-p)IV@<^c>w9d2Kn`8jaGbdi- z;&sy}H#Q)$Zr=f10$Y1q2s~lSt@}9O>?W)6gcWWO>TSI9TSCEr3)a2G7$VU9C{AZ>@RCj?@qulnC2*rif9a;2bDUrsFELH+M)Ei3u2a9|8E62 zj+c|VC!qQqR?%6!CEq|@+)887BHi%ef7YBfjrwEaK>SjWA&mp>iWkIbi-&6=!+>4T zuzp5on_@%a>J|#8Y*;?W{6@c9j;{)AZuuIFVMk$KA>dQf`5UP5j8Xq8-M)}JlS^JV zM1kSe-Begffnf_JPt$pdg}-hXb1kI%Ly4Y#TcpTsLT#?HT~a32?Gg3d)&I-1SG*h_ zHEnNV(Ld-7xx3_3mTnLh#+F35I>EbXu>HCACrlSJgBv#`Zx&1;Vs}oS!w;<$n`boc zG*E!Y5}|jCJoo{%6rf14)8n=E5O=HW6`Aw6FWvSHIove8h@9=b{s6I}=z=s%>6*ya z>Y0z0MGv-H+`+Oe!DSZFx?LNL(&~PuF5vyvtO3h5Q(Pcr5F8Lj)PnfK({%(Nk3!7wvPmLJY}G&wp`?b%XNIL zpLr6dP*gYzRUmQgaB{D>U+%0Uk1a1dXaNid_Q5D^IG{1XV4n+#{5~qO@VJI0k^<3h z_QnVzZWp1TiTS?_+7r7KVKj_`=P)fhP#eTE<=EsRPl6!75Z9QqU@G>LHZyw|o~s)k z?~1Q_=y3e-!K*(37EN{u*m-*zc-+HI3I71_R1|o7Y`vpFW!6PcphfIFGN^Y$CmF=| z(22!XX`V^N8+1(xc?nLrH3#o9cZTAI>MlSUkT7S@<$(YtpjSc2#>r~hcbR_`_S_1)h^i$&W?F7^vF^&P{>Hg_cxNMOB zr|T)aFj#MQ#DX_YJ1HN01!*4}k%j4`TfXn4AGKtH+2ZNqXFPQ+&OGUO6neTeE}p*8 zo+jTSvRS+v8W({=qkBfkyvGwa8+*F=t@q!O;Zc)C`-Fx2~ld{ja!c4>h@N=KL`|M@djzOnQEDJ@TW&+DgdcpFiHTCKdi8zQeZl6POKc zfFv0r_SAOYByc5I{#v-c+!2>2=^}m~qwE@5ySxQ;!MoAcD82W$l|fs5i)Z+Z+ctoM zX!r9v=ZTGR8Mx2qosfu?C1E;kMA@3{0^CulFqu3Ea_0)?cUK9^=A3}Hxw(8t+hscA zTGDTrHdwyMOtxar4K*I-q!Y>E*-F@eCKS*7sMzW*nW6R0iF@1<=XyiPu#GNg|GQ=r z)(NE>IEhE+Q+eLj0IP)9Y z;~?1{S*Z1=;rMd*sJTJ==Gw6Gb!a)=nKx}s0QA4D!M=O!qpi^KLs~YE=^ToCkVzW! zn>0hnXXm>KL7!DM+zvkj34)(PHN=kj9h&)(p+x@Lm0JZySB3xK{(YU{uk9!f!@x1c zQd_I9u9-;O|3k#R=bDL?%{$J}{2V5jn^Tip@s#FB7p+^du|gJ+o{aAhjRsGcAOnI2 z!ZfcH(Xjs1%<}J$hJ5%~QpL^)CHOvp4K|ELI(pJuJZV>$OHN?R7v-d_{0w=u5wRSs zAmU2ZKB8<;nu8ffexxn>Fe=gPPs5ghxC;L3#-oP2MN4RmegUmNv6=aAKHTsK1eiGX zA*CR?1do*OMt(m1`b~9xsWoMVp^JqmC#ao=b6V?pEoG>2+NFHuZ7Ypc-yxMov~3_d z!EdDtgNoVLRpuWc2R2GpT-L0x&FBD*o@mfy?8s?nD&HK6IdAauY;8PsTOw+k*6B~p zUNC7hQ><$Pm1!nz4OwTp2I>>eKI?&0de^{cBV$r`D{?X3-Z{SPbCt@$)Xl)rnfB#J z=z!4%v0Pf5EG^0MxX_bU{%vei$UqDkCcdaS8jmUinZ*Ry!U*{|eU{K*xp~yIZMhp_ z9s^cR%p`KZ%D5BM;XlDLvbz{^P1y}TnPZ()?2!<*e9;SPG~p#(*=Nxm;C45 z44l+ZY_>aWHnt6U%)}^!&M&l`TFdFx5ZQ1qfI zhL=k*UQ5K-S2fF|a4m3^5}Y%8)Bo3Y0(~DSZCi^8+vpXC5en=pyjVVaq;G={k=$5g zVPaIT-m)>UnJ`1eU$1atYO2?x_ML54*zNLJZ9;cF-sqyTB@HAEsx@XtCe)32z+&I_ zJ{?sj#O$m$n>_+sEq)7dqkiR<7hZJQ8?QTowkVv&5tR#}n+o`UFz@bOPX6+eZOrWh zNN@N{?e5ty3?Qm@?9^9PU8YM%=bAACFiaxd9~!BTq{wl`$2nollSg=s0{dF*!HPkP zbQPg+Y17aQ2zwU>Ns8-`SL9)qf&|p4P5?6|$56VIIf5@<&WM}pD}bzey+@kGCpOQ~ zMkHPPYbKO;WObQ358ERSA%?3FH{8u{tM~V5WA?Z}4(VoV(2IkvpzS7aX<|2vOC5<1 z%!`Z|s4Ag&ghUbH?gK2QGp$zFcqROqo$rS`2peItP^0$c|M~mQH*26bssNLl;P&?E zT=ivWD7Z5S4C9c%@TcT9vCx~poQt|FcRIs%Uzw^jfiuD2*dRG2UUH`G+$j zPM5z~VsyNX7YS#VMwt+0tCyNS7@a?W&q_MLCKPF(sZj~fMYw&F7>9@kkHny!;XV>y z0>guF_*=|%9Z?VsBJ^KEpKEV9kiD%FAZKYXtPi#d7g$H06fF4N!1S@K`BQMUXL5nF z_8yv-FGLpND>Z-&GWGDI%PVJvjJ-$OxRZa`U-V{XR9$b)4sbjn!@j!Cp4nq%#F_E} ze*(+i@@b!B>`brqlH|BQdNHtOhPx~e{>~9BfjMJyv}r0FhT=ARtJx9qgTj7zg~$8K zc3=MD>R2w!60nCnof8`d9&>u=o9?s5?cfGmk?hL@l{!oNj1xR>qiWXh)a2K;S+k6PWjCcGboj+NR%FBPi3>e58o9C2-kI&jMMJ>f z?8b<$ICuu7-ZD-=M{VX0cX7&+0RXy^T|bslwC@Mmc;xXrnXle+en?|6BhCNYxkNnA z`drbg=-Y2`M8O%(7x-CjJYKs7`YC~CHhAV9tM^L=$hPv$hQ-!dX>(7&uI(FdxnSOo zK6BilWE!2Tk4(Xz!t|Z#pAciRYD4^CHm@@)dfx2#8T-=Y=-FJMrP;Z5=-){mKHj+F z?yO>4`y)x6Y1rfCIc?KE&@1xSf297nhqDm&ndP%(IQQ)=JlYH|AFuKhK12?^(LkD- z*Z-pCc-hT)Lij5_V} zm#%=s5TC>{=_fnh5AnijpgE`e6PV7jt-meV zo0@Y2Ly{eP`a1IN<2Vqp;QD)D&9XfCu~S4_L>&MLsLBn-4}uMtW%O?ei&8un=*jae zivNT-rZlQrSq5{#680#a%@XVkaI5s(+=9)!8KB(oa5p1-t_Pr1&ani7hcsNbtXj0QZ1?0hV2*CgE=6S+I$ZZt5~Nd{01)o()m1s$G%YP)a^a z42B2zBEmm@?nHOhJGB}r%*?mM$v^idVo6)37BOgNRywqsr*CaN;&r-9-_NkOy_z)Z zi~zx8MMie%(;l5muR$P26WW~tfNVYW9`?XF=nDZ5?nBbP-8ec6HzAm|ye5}iF4tk} zTc49kAx(C|hKBY7N)JGVUWo<3{J2puusS0!eD=|vX<{|)L8y=SDgL!|r|wlNe9yu& z*SiGYK>(rQ5-6cH4~y|EQ0usdDwqQEQU*VPncM61&>0rX#x3fiBteCeLylHe}5saZR}j>#Qp*VYV@appxaN zuHr!23_ON1xMl`u*WR93_`&2^22^_m$zW<)X0u<%&3(1~(SWh-fT6YzP z5##-0TNrD4j+Pp#h3SofTC>m`msqd8eK%^D+RZlW?aZ1qDw|ikZvK;g)fPJjIExnJ zT4(nW>7X}I#FUzpYJa@sca|WLvDxZA-n05AY@VpYa6adWvn@v8qyr1$2kbMkiy8iwRIED;6|h&k|fB-9Nw36R$54s)UWP$O}%Nj?S0%^o7W3n8L=5`Zq@VT z_FT0Q=16<0LTOK}67M^3MOU`}FP8v}y_J6@q<@&h|6~0tff9{_r zf%dF_Cs;~~pst-1J-M}iSg+{>mrMYbmE>q32Zq*H-ygh5VFH+(^+DuTvVb9mmeayn zRm4cR>KHJ^+8qOspu1r}1sw2SnI|h*r zd4k_ytT-$=|C`f)Mb%+#W7uhKVuFSj!k?vcZ_7vV(+oVlM5+J5~QbT@juYLY6we zWGNn)`s8Z(1Pjn@=@;%^d?ng!X*_4Cb72WFJIXz)-i>?R8HgSIhu;okG=UxF7sl^o zTvmGDg@7=fSgwB0DAiW$rGiIa@=+#(oJGnJ2XSsVKcP#6oFUEp6fh` z#&InRg8|MkyTL_~{QnW{2-G`u}0O z|JNGfd=kA>L=&FPG1XnRW4 z${ouU&=X6li}J7x(muiV<`Bngp_LUba)?pKEQ|DzE}^3r8%@Yel_M+d_u ziAYYUo(k5}|1YzfTIDZ|W{vOIJGc1%hvax)7u+^+{!;X23#g<2-<`AX!gwdrs0;4C z5hEejP-13ZyokDpw&?l55{H`c(S|)9zist_b(LGrY&)#c?{N-Q_||RY^aoKLB@tf(jG+EEU;! z)lQ*Cyvp~;t4?)=I(W277??)~uj=LCQ#c;N!4{W8mK?*f(N#hg7c|Y_g_*o{LTq~~ zb%oamp$B-iTeAHBb9VjzvyxwVWBQ`(-9oo?ql5lMq~R;?j~k*_>8FRcGzMJ&I2ev_>$E zyxV@))2X+fI(vAiL`(h1vb~H)cIv|VPq6$COYH-KQw^WgM-HThYN}R<4d;)jffK#a zT>b5C7iRZ5AvIVS9QD-D2Try7|9DMUo}nid#v^hoz8p9a`~NllC&w3Isj?i`$|XUx z;T-#OUpVQN5zche(I`gv|MI(j4%rv{#|7m0YlYDL=_>vI_Y2GG|1l6bzEzh&E2QpD zJawcfil|^6O)KfQ0p13Z%mgrbSICdh9 zm24jfbQZ1gp-XXri%^tu5{8Xfh8kD|-waB_;q3+n9aJD-#R+`ElJ#Z7iw6@<9=f$3 zPraG(i*oQzT+paFxWtE|_9f$FMehYzn6b#9mu zXwF_(eu9|1GL9Qz=U-*{Z*mep3Ic_8q0qOte|e0?1PNW_3S}>4o{Qrjk(72y!Mcb2 zpx@W^@@u`5%K;E@sSNyA1y^Z8nKfluf|4vtI+MUd+F-@Ew;5||CH|LXWwO|EDH_ij z{wqh%KgNt4Y;0yddT6~4 zHncGiSxU7FV-|26DyFu#Ax{749d@S zCkzt#AIIrwW5fGq$baeO6MoeLG*Ndb9(5IfTU||l z)sYg}^>OzST))a~UcRuqAk6>&mDz4(cSYBK_$R^jB!%l+aiBkoo7b0AyMy_V+4O1? zT80c>=4&-2ucd&f#?$A8CZ~O4Ds&hsZFMMpqh~k9H6j3~oNAWyPSg!Z@Y|;dA zEYS0B%_@T&s1mtNs+5uu5fMp}B*Ou+YKQ|t5K_xVsvTl#6hjkZ3=tSeL`GyJ5=l}< zlB6(<0k~M-+uhS1FigTOTsp+7*E6kQulhr8j^{!H@J-e4>gjIZgb$thGK6}j7j$6a z{!Q=%`;pA@wU!xjoU_gs6z0|u!}rR6*?|I0Y}4&+=YU_H(X+P}9p1)Hz~cQ%%XR;) zPh|iW1(S>ITb?qTjlu53s5!rk4iI#5Pqg0y0s5ST*mC!0aUB)&_OUi$-R1T+1bI6pLy>#psAh> zmMt+w%lY2#dZP*P3VbIofA7S{?*p0xPdCDz9)AO8_|+ijJDbM#G3V@F@7(r?;Il`1 zK)^?|p!jGrQXvs4sYOp$nY~g_hBi9Z()Lo-qMqDxQlx{gmX58rP4&Kr3P1JpsZ_+k zltW+01taxvp=2+RRSNl^3*5gY%--fMgAUq?*E6CK?*;=pR5m=Pag3kpTt(f5A*V_? zp$H}ugDxk1r@oh{G==yp(HPI@et87#WoQL2Q*bV?0y`ySJAeYB+F>b#JAZKLJlCSh zgayC>twnuG92RH^V-|d}r~?LZ(t^vWH8~=xX`N)^xJc?Zb+t&Dz>M(75Y^eC7%$l^ z(Dbukvw&aDGAr4v%6TK~t~j+lf;2=*rTyIWX{#^M2vD~s#B+Sn8Spg?pX4i2*6^QssslJ+%JzA;`tKG=^j zT&dp6t&dC_&@DWGxZ6A1C4wDb$-m8*u=_cye*aVIoQ{5By!)njUhO>jkK#Plh12_h zN2vK7E}mhpjfMUp3mX^HO6C#cD%E0Pl+(#dgc}x&2631W(0=~zarrM1$LllfN5|JC%G40*6G%I7Izqr&SW6nO7 z^#g1{1k&ZmnTy;MZ3CY3;=`)QtQRFytkMyskxO6tGG9nc*Kk*?3B}?(B5jbGeegc8 z#rP#Dh)~wYK;pLN87#Q;AQoz`aAxZoR%(FGvIc6?PfF zrqCzVF8T}a+p|PMQ^Ilz4H2Ry)?F$@8bc)2EsEa;3Uql1z%8~!IjxYlBv$W3*Od~_ zh;X11H*MOE3B(|Jl#l#_7{5DQZz}4T#I{>*cb?p?MbmgH5*ci!ZoW@K@bCAGysID# z6R>)|J5~3FSM!)X1K2%$r+mHcjF-A*|8QDMyVQ*f?0(@n=MQy3t~aqw8MfP{U+t-d z=a#_S23UMks7;P>K^QMKcS@)?q0f@`+e>Z?>l91SKkIge9k8Sm!{GCAM_t-0nt$f& zO~cDYn_h|L1IC7m8S5v7Ra$kW1`{)XMBIVwaTHuj^x!r1NE3K?UP8>-%$$ymK^|P4(>)YQ_rt$#q0Pg_3zrmp}1@?5i zs;xe--K_6rgY8y-LZ8-mVruu^Z7*HlR#nmM_Ig77V!LfSOt-tQuG(FHsFMy3Uo^Yu8w{@zxfNs${^ zM{-BY&<`yez*IO55rq&9uQd_KK2MkfONC{A1rQ!Y&dAzU&XMGqFdPNStr#yufF&Rq zv?SmopAJ(&HWjP@D#aE&JU-o%=s+cp3xM)!n<0w(70e>cdWJmEf0{e#wpTFO=eR_Xv4}H=RE(cs&qMPofb4MOd z-0j1y2nnbAU3oL0p4Qoto^!sddw6JYh0(4}uI%b=QJ?#wjtQvM@(%^>H&~YL?pc5P zU4iO$gF`ICJl9DG)I&B^)hiubSyvB%Ju1qchywNYrD*!!Lj(km-s{Re7L{EXhI^!D8oP#foh~;+~;}sSe1bsMhWgd}k7+ zz^e!oCoUjG-r@nI0Lz4sU*dRv@s|t@Y+9^htNdO0_5YT$nK;6o8Gta{MS-obZJoC8 zE))6-L8guzhft3iZ3w}sf#-zb%%VtR3Mm#3SLP5f~obgtyf!G~R5;hwIb4QZjo`2Ub{@MJgRe1XU3$O`;0+#&CGub7z7_xSj~ zF0!5aRLbS7qO4|x?<}}#5kxm5qB4cSW@(EOo3sa9i5%{B{=9axrYU?~-f!s3E!}e3 zHsXe}y)GJLv0ynE%(?iIw~AOII$`8;;U+hI^5MJtO@RRbVt*aBK<78ke1M!R_*giD zGOq(koFeDU)K}r>F*}wRs()$BWzUH+k|@4#St5xT4S};>^P!=-Vns6&1x3^qe`Aie zROQE1*BSA`{v1p(n;BajkBg5AfGpO|b0_;xEda76`+*^)j!znkFab}pyHD*LrT(fa zsy~Mbf}Kn4U&6yN%wRH7nRXx`&SjVUPLC;U70dD?RqTk&hr-y1UHH#JAHs?|vyAW~ zfY+dmXl9J4^G)IT6)HDqtc*kvi&A_s_O)Bb_+PAAzZpUHAPT}z9P_1P%rRvYUpb4= za7qXUEA*a1E{v65?7RswL4zHr(#c(kMv|m5tO0U@3gbACbI!3U*#`13kY*G@3?YgN z2q6MOkU)q?AR-coh{()HlR}#T50J7krmTbii<8bJIhA?vHrqj8C=NWG)HT;y48kAJ zo1H8ErhsmrI%Q@g7L1_A%iC@JQu=@n$3x-^B;1|qR=nz?W*8Y}`wue;8q_U6D9>Rw zgSFjcAIPXoOe4<%Gp=F!3K@|w5(V!eaN8N~Y>ExK8eZb@bLIe4Td#ihf_uOkGK*0zr0OrjHRP?y(ijNdI+m+s_mc+X#G4*p?Rp5YMT5v|ba&tBxVsujviR|v( zeN`Xg&n&?+g#d4RVP*IG$~H5ODr?hk~>Alp@S1 zcfQpG2})0Sjq=~bF{g(}xr$l=)a=Gn4mS(B0wO}GpAyVP$QK7{_# zpM86M@#YOdxa*@=`|rV~14258BJVXS5 zQztN7Iqyu0ML%wIyRW7pUc)_W<`g@$7=6ctFT+R$QkVGssr`!{%N{(tMaF};!yIUa zHd1S$s9i{;$!^A>l@P9b<5#Pxi{CPE*m4d*Ps~J9M{#NBrR%2w1o%EgECfzwsRS=k zfbQ~Ks+4${@6}Uz9@MECVQ@woN9cV?X=e9eMsMOV2G>teJG2==p$`xNYJ6q%m=5zA z<-+G@NNJfIVKEwn*Z?=?EGQ9$fQBP*yRt=q`I76osPKE>&1pQ*r%XZ8k z_wk$gXd>6SrEG5aO$zBIQfW|b)mA!zZy~H0r8G~7P1->ZIEw|s&w$BHklY9tk*=Mh z(K1E5$-3U?pQzen6?s~Kd0j-0mSJ7?Vu=CDz8Kr6(h4s5WjeSD-gLJHZqe8sp+EDC zs7kP123|ZR6Bskq9R3toxp>Er7@@7MJEZj1uWwqhR>DmB9MMg%TjaAb+^~27%rxI* zE&+9ExGOJ2ADcLxQxoXX9z7A+$Lmkri8lSM$z$A&NUdOR+w4$Kvhx&nRr-nBR^D`L zB8zYFM5rm$EpU;Xz>YUs4A$xwOk7?tmLgtV@BjpAflOopJ!k^8uaNxNcEM`{#T>;> zD3eu+!fHgbG2!?(t0B;1;EP4^grhQ8xj#vv;FC|9&Yv`l9RfNUKYu`5P%UQ{zq)0w z@|?@(zBD;X6o~QN5rb&Q`Y=2QFYmTF1)&sq@E@_5SDoDz5qPuuy4%ZpW0^$kT_J}k z)@pSS2ca(SfhUM3(0xPVt?03`-GJ6qj6gRbVg48a_txqcSKMGMm@PxJVaA}!5C?$9 z7!MuH=O&jB+Qk6~y_x~%{=4UKa<5k~i9{89tYBms zuF`~0!*fv&m>^||Qzb4Fk`>7Mq zw}SpcxY@aS_(JLktu%M_8|XvsW3U+!VV1dlvvixo()P7k0s+vsWyD+>hg2X%(1w7xD7t> z?!lm4YPc$o@J(gdK#N>L@|1E|H0CiGJuOf?Gal&Ip5k6J$)GH&YnKBhC;+x*kW8== za>g8k>b+tA@PlrOeg`|lJC`HA%~?|l4bnnm3;8+h#BWUAzYFDh|4&ByXX!K{<<_2#bnEHyz8s^p?i73gcmRF?G60i*$fJfvQO(RS;;jF_pnwJYADtLq zbJ`&^^Z%Ji{7?MhV8w5`H2tl^3`6jAhGu-0hI}t`aH#Ry&I>34Z?VV#E7%Z$`~P7~ z_(Fp7KVXPNb}(VhNV$PuCU~+XWLycbiG;u3MWy^jeieoN#s46_tDzKDB~DlY$NlMv zXtIvL#PMPq^ARHpB2akPFIa!vMJA|?0EY_#pQIu+UUb3!J|DZJ6{BpS`crWJkoC_O zPEma%HB&USp2{EnrFIBs;SsB(z?*Xg_&|EmNQQvqm z8S}P{7x%zEWRKRSyn8*|>8kCFhORfV*lSjkqAyQ1M}@g<>dr2+j5wKucW%ovtP>JC zWdVUus4-27MkI(NNl78Y0nwl_$}z=gBq_`eq96QbWu%41 zenkgEf+#3j{c_Zp@wp}ak}8?maYZ~5uMYf~K#DM~B|t+-punRL37y?+L&9qY^lq|D^bQy_^KWo76 z7>TAE5NpEim^WMrV9gP~y&UNHeLvezb1^FRlXau_Jt^((K@CEgzkg)hy9xJ3&LVhdz1?&izQoXLg| zy#yjnvx<12sN=(gW7^cDW&`?B!|`Y+vkzRWZBGY5@U~L6-3sG{*uw}LSf_HC(MZ}z zg%E4tZxIKH*;>0~oH8;VTi`8Wh1h?B$P2BeTDlf6SD_;cW5Ymt?^{by_i?*8SYIP* zne2m}<7zsHapMza7|feDW{3%5n2BVD;3F=OY+Adgk{{9&HzyvKqsxXy5mzrrXuIg0 z9L=DSA#Oammt}Iv+$Tarz)v8XJEi)Xuu}E z&11|e>Xvi+er7UQ6r6~e&vn>mBT8x@g zSSm1JFU>P&%7FiMQPdhRy9Ne~=fEt^uWW|_f9+Dmq+U=gYZ8;o-J-F72#>HGP;?co4T5FHUP zEW->bS~B7V7&1q~U{HhF&Ym*g#rYmKK<&Qw-pSW@lslTR%q^96ZOqL^o$p9k8+AQI zh^hA14j;iyrRE$BRr*cKf8d1lpUFItrx7VvWPL6-?JHhwf3^Cwe}Nmyo?<>-zrhpz z`~;*fVC$m)g-5^l^C`-yMaLlkt^U8(Ju9rlgHD7I)t@uBYc*9dbSD^q^YI*+Id~Y=3pidgT{Loe@?D-FLmU6!1faJWnp_L*LoYYrEe4sOsPSoT%RS3rZW43#-*&DR$~s*VNzX~Fr0#v)-jUJnXqK?t{Xjt3weYrW`S)Sz zheM)Ev}@Z|;=d15FTtqCL;8Lnl-@y6k4K^BKte8;VRxpdh+N)I!p_&*^<3V|dwo3D z0g?M-VuEU)+9M?9qj!35dK387%+teEk?R1MZFeSlp$~M683E9>rZZHOc{ji26xbrb z3I!p5TAv>VvijX=R#EcySy>5J*0g|0;u8!NJ*Yq+>0PiXVlxak*;6aqHJfg@d_yg) ziNUTwzrn(MgnGN|Kb6&OTFoXLx=BThC=$))n*u3O1yrd2`F+Yi04Q83(v;{?R4cPr zv-x$$>pubtpBxPh_y`gV*aHORr$KVjgqvM;HCZmU{Ob#BrNgM5a-78Pe2$H>@HP_T zdrl7HV;)o4!Y9gkTnptm{`=+S_>k)V-{(}5z>AX)IwHj8gTc#Q__sDk`t8d=;(DNy zwyawcH000D54M&Au>BwtMFP4k$?I=+(x;(ezJM`n?Zs`ACLF&i|_D_pRSQtIXB56=Mr-+QgMjO!y2b;nutW( z%KVsfCGeQs@BP=a7KFo{52E^s)YYJfu7>DQ`x;Y4tfva1r{~jD5ivExO{J|}t)1%a z?D?(&Jvuva9oPGhEKl#cXS4p_6pfwzS=`PK{&d*qzt*C!csTZdJADAMZa`RoPV4#q zmwgo$WT6%eY9K>{lKQ*;vuNx>EcQd{e{4c6$fEy~_85g&h1f3^lcBwa{tsw7ZGZ=y z5DtyiWD7d27Gx2`e%Ak8VZB6(`dHw4l@A{JPluw9QADhW<>hy@-E&JT$8*chNU zw$2~mqX*H68QnfYcPqY&k4wG!!)dT(LpGnuQM=i=&Wwzg(v*xaKsFzk$7mjClf>V5zwKQv^L?>H$U?atK_ zYQF=yt-B>2H>QeAZTt@S#p0Byz2*rc=?t{Kby$(Eu9&)kukIc-EPl>vh6s50?Q zrqqZ?AR~a z8x8tCuWghk6{DlK13T&Ft0p~Mww#U~(}Cq^F)#~!P&u#*3%a(*Y44C1=C{&;`3lT8z42lzyp^P{Df0a?L?V=<4#Ls?GMe$z) zymCWQv|@XTZkh=F_=R^l{NRA6uWif%K7%Lz0R%bJHRHnsUZZvgW+2f*3L2hd8XSqU z_Xr2UB|s{nA<+T*Hr!{_ToBaL8{QxF-3m8|p!QZ>?Pm_iO~~}@DL%&1hiTZnk1t~i z#5r2&vo@zUznOl?<%Qwps@F#_B6v%)Gf`~*u&aiE^ljol_r;9aC2_i5^KjC$^hJ_xw9>_=Ib6|mCmI?yX56(DPu?JSx^0T2a&K8q?QH90fRv0w^ zi+i1ws}U)>-KJx1KQFj{J{|Gw-+{SA%_@AlA6ygALC?fWalVbV+cuuEM^YQK+R)tj zqhc~5c{B_9#3K<}_{C_WleNl^7Rc%mxl#~Q5R4K~uA&ibrQ5gbc|d(yEA=ziaUepu zm!SDjuSOVW!!0?*or0Xm6YQHfYBfPGG$VGm(y9*axNy3;%8~RdX(BQsS-9%DK^ljt zryGqRhW<|C_5|yutx5e%RjloyUYse7y7+$Kx3cc|_S2i+UNPHLmg_b4bbcGvO)3Wi zy;?POdlpZh;vkpqH`_$TfHFpUPQFW!wK$oSt8~69x$2E8sE3${D_`dqFWBZGpeaT0 zsBwo~+XB_spqCJZ`=Y@m8hu z^8OLBh~g~v4FRX!jaVyDcUd~StB1sJ$f*2us3bd}S#7`31eKH9oGvwwC=#hm9;fSA zK$DTi#A$=w;iZ8rhLK6Wt1(40z#;H1_U(?cXx-95Z(kIcqV?qIbvU#mNJ;cn#1)Q7 zhkL$$qPBs=t*PPQ)NHAiVJ0nE%{tnKb8(zD?RiWfn+UHrm2=$yH;`S8TszdPZ1-iu z#pZUMsfWzh2+%dY&dDpkIy{6sBhz3oHEsed_?jZ?r2#tMD19Po4Wz<|v`&eEmmuP# zJ+IqfdF*j?YUd59-rW3QK@2FE<%f=^K`+?`ILSuYQd(6WW&(J~Mls;*z?LVVqCMmt zl}ogR{13lMMO#xmz(4vczCF{zn6{gbby9IqFFZGC>S9zPb4odTxtcWZ*QY^x9U*VwM=#g137csz87hSlmw>VnL&CDO?twSrpb0V9psfYux z%%3N~+H3D8j>#GCrHyvoeDvO(Lwxjgg-DOVw+Di+N4#P(p+tETdabeMgZEegGYVF4 FQ45a{gh>DZ literal 0 HcmV?d00001 diff --git a/tests/resources/crashme_pie_no_headers.core.zst b/tests/resources/crashme_pie_no_headers.core.zst new file mode 100644 index 0000000000000000000000000000000000000000..db67b91361cc1f7a1d67b29630f17080c1b8f8d2 GIT binary patch literal 14787 zcmV;!IXuQFwJ-go08j(~teybMzC=mng+gsHI#Ms&YxdC+xvl?U=Ep$Te$Z6aj~asrkD z%K~Z~n43&$B!yT&A)=tZa|)>$e{WVP7Aq9a>daF0E5fna)S=Zm$J{E`kdvBgqPrHP z4$aLaZkiN5?;kwn8^C*!CTztB`S_~xDjWpOnYT#rhb$rUEVWUrMyZeP=lFV7ExyZa z;HOe$N5{3!Jzp%En2uwB)3dHo10Dj4a-A8;tzDPY$ml9DlnWkX5 zmbJeU{+s<*4;(764vi_`1dgI9h6(~m+;|e$fY2z1((pgOZxlzTgdAN53Q|#@Bd_ffAK4w#R?Tx zK7Jzk6K$t3prTqtNu9MgbFw7KSc|YI7i3Lb6oHB&l4R@!lO|4{kcBM)YeM$I2m@qd zFJwIIe~ka>*Z*DRPxbBp{p|xzyALp{Yq=g>OY$)(^43t8fZAN zNvz>ALd39$C{LIi0{%iTdMbsoKf^x!H@dPn|Ku14;FtafXklrkUthQ|k_0)#2m@kG zz?c;InRoVguAgnG{*4>h6-s{$zB!H9A1B=~-AT<{@XR#@FA@>Bwm2!U05|JhFyvlwsnhwm0ychddUD`h0}FDic(*}lCTqs)H@tdD=vWv*-TWmzFC z8h^D(%qnHNlV?@ri)c?3+=yAX_`9l1qR8~>y8?)^(XQ7odi$?M&vdD?XRlwt(O>I* zwPl4CKX14aqjAJ^^048w3_?%AF(9-Z3WP0w$v;FI|5paR);5D;M__}HP6?U;O1I%$ z%P(B(oo#*0HS)jp9{0WFdcN&aB7HSNq|;1#zHe@z zX%UwYM0o!OG&JW$7o0;G{HXH8Q({ju-FuowyUcdeAlqrJug;a;ZOj!mfV&@aF86i&L|G^Cy9LJRHXyUJWj ztI@N*f^;VB{G0&Q7_>N7=;0?`s=s_lPd0zL)BJp6!whLJe;Ru4`%TKKODfQ#@jr!j zIjSc;`TS?vSCXFRDf6CFo}60M^>uCy=DntN3;-$PFT?>H8r5OYHEuPmdC!E(^yR!) zLhO1F8suRMtANAclg$pZG@W^G>7~XM{9bToG>9&N-dNC~5 zekZ=MqZMw$+@ykLtxc#kT~mBVK2|(HQ&@<9mBZG-xhW++v(ni@YFSuRCl&nmC|5t$ z+=QYWW6Ghg8z|Ob@nUYC9bNAc|0`RH9%J$mg+{D@nVV0Hco$mnu(CqS8}ond5>n;B zfy8}l`XG0vPT>*ebtpUj9dpwOloY#9HL71xj=x9qTJK1!$41r8Hv@h}S5$Cw(@e?d zf76)-g-fyg-Zq<%n))O9;7{`%DlMApncrKA1%*4a>vp|>EUsc!t(Bm1a4F=_Tz@s38bZ>9_-Knr%ezNDVzYCA#)d|&h4TF zg10JZ4m^3w#*h$E;OoaxhmfnP0)94$60rmc&4*42cXV%ONVuRzD9wFdAXB$xjZB8; zkUwfJx0Yew(rY7ag3^^ti)>hD!zR^#o z3*y%gONb2nr-9IrAb^wM5F$M;s0c`>HV_`K3l&aR3iA00q5_G7Z`6mmFJt01F*9@D zI)T^ZlbCgXFYT7+K%n5C15;8nqGUkDfH6Svn8ivJiK~)Y2B=f8a0)Vqf-P(fLNNjZ zqYR!%XdhWnbaH{RRjq+|N|m@vAC+Us^&x^BFQftFE&3d!X@Lvby89{)is^~xVvf$C zczOfrXc)o?2WbeVeaL2r4JqQR;YAET@1%Ndh1{=Vub{aYoi7Qgw)>#pt|4&h3Ks3L zm$_&H3JurLx}h#Wrq=TN<@pc}MWZ`t0^XqO1nklE=P<2*9=HRV6XYBaLLJ%iIoP z6SOVM0$T)aah4iUM7v*C=|^B6Q*6s}u~+gh_8|c!@zRW}7CW}>MdX}a%4It$y*?x< z*_7$9fthx*3L;A~mao0KIfC%3OZwPz9Bdop+vfc#HUR#?R4DFN4+|npHB`C%@)Lo) z;v}ioxwXIub!BkD_8`r;Q$jh=F^V}%0fvB5fyaGbAZ#5uW(?;F<6d;^h+!l6Ikq66 zVolQiDS!c_v1#f8@f{{MNbi~WQ=gC}ACP#n<)y^w_k}osJlnnwMy6`B{Ew9lk%y{F zH$i|rBFm61ijL#niP_-&|6Zdn2H^Ih{oG18$y>eP{x}BCGT}|5X6=xROt^)LAMee$^mSwWZCI7MLr__6@KeM#Wc7sm&B1@)1$9(b8- zXh@ifG7F0(D=xk|z?p~TVtk?n0I8aoQG%UfE+M02ttt6G!pV01-y`N?w|>ArcDU!F zq-s|h9~#p7#XY)e4=HDFv_%}k-cD|qpH3<7MR{E;tPW0ied29B@HEhtES%wF_|4~n=Z1%t^k7RQ|_Ia!BNa*t?{(DpdhO?-X(aC$IXhaHyC| z(`Q1I24a{xE7bwhu^6b(31SQ)3KK$vL=lldBp8V#kU(TaW|IsDz>FWxGVk3J<*K*y zb^U&Y;vks_ajDOO_E0-9`MlE1)@;KHWanOLZO({=41Jh4Jy>>w1!i$=BLn!t9m=os z%EooDp*H%klkq8>u3cRP%!J1#Z{~1!Vde?yRlR|q*`X(P@^*H6Sd_Zs_-Y=1iNLs5 zto68gpn|+F1m{_rH|5^shN-cL%g$y2E7fE^lkG6wpeUeEm;%ifp1krlBpZEVn55j9 z`WEe7h>x3=-(q);4jskcRmBrrg&7$P%0?*np_|k?<#X+QXax%cSQ}a1e-Q)de~09k zl^vzQ<ZTu!veWN>$2cK?*2?KWzuCv)5%ODtqlyK!~n`mq=@S!L?n^HZp?U4m#q z%hV%2FmmaWtse&`H!ocDRjTnVzH3xE=si55=ki9zhp`zD!7u9jSTn&v9jWfn7EVL6A6xJj8iW@%S5!R^yg8?zxWDkEjc zR92i&5n(PVLV-&FUH)vfup@jHo8DF5_@Jn34pXk6dA@LcqFJ?4);*qOUTUkJFo~rx zT6u8F@9;5kK4)Nf&d#0ds=P)7tUu#DA~ z=o>q2@ICpMKDZ>hq=8n zXLCSTT+k25j)UU{J#QOam@`5@?M7%PBqL5ZUBMrJCJtSki{Mn8~ejRXV@yU%L>||+^ z@7{kh*Me|T0;2XqrZw$^4Cx->eVH+`&3gaUjJk#6zn_pAVytF;*fAC(nXNWlVGP85 z@;pYX`i*LvDxi8Jym@O>@euNh6m))hL;u$*nr@YXI=gd;jfv-**H~L|MWH7VpWE%H;?d*_{J2ptRbPYL@h|;3CdX16fo{ zjhiGo!2{51cA&?Aa?9*Jze0~VVv8@)pWk=d?*ZH_!t%^*hO6#m@qfjG(UKRe;eK(U zcQVO8%0vCUmm?Z5eRxZ_~eG+vt7tBLG%}-*4u4wm^+v?Jy@vOZV;$KMe z-$HP(YEXR{xa&=4hkb7yd_C2& zbDNe{HDE-g(FW$QnPO`$sBKbj)S!%n!g(q$Y{E*t2_fcdR7hjv0elD`O2Q2z^Y*XN z7pp~Y)LsXY6p&Q;_FRnx&-K}t^#w^WsQ`r=o?`>x&$aiD#py3bwBbi49a)$WUfF$y zxz-H!;maEngLmudLJ9-3XL^9(*j)#Z6o!VVM4KqL;6{F=QU^CcjttgH?Jxm&%SR3k ze)m+Og$p2w=xmFIIf?^AB}H<)Ur2uS`{2b-U7PdGGPVxe)OdI`51qcBkv%10c9&)3 zmE0Q8sDrlj-6H|xQDdhA&h^)`G805Gr}?|a0O0WlO{3(Z;ZW={4>wis@^;WydG|MhCe>>?!Z--%I%F!$Z&*b^ z!z{ph101_^N`juF;4~6uWiI^{1R{*=B|{*=&>x0_Zz+3V0)PB}(RJ;!qdSJC1qbKE zaY}xcRKdqXzSqN?;pu~F?6_<4(2hI*|UIj(?2 zQC`^psd8W;R35RQH5I$i6xBgm&t>6n5_RU-kulHV&nVo^GH@)~;`M;~iZj2ushipg_HbFUi$32)9ulA#l56 zxYwu9v=uRDY}diiVfK;8#r{2n4xx-x-dM+Zm##SuL3kD-DBILiaTUa;fU6CzaqHle zH=g$Xmq>QLMX0q*4(4>Q-9I8s_#Qwp6ViFACI1ajYpI?gj!H91? z?+63vWZc;>kRRapphDeB1R*&4ZU-6qPzbbTE;snooNKfPgDw@@pnff-INAR#Q+`n;?36jL+M z7qvOnZDBTiX%>Gy5D^hWm@BHT?kAkx*dnqeLncsKj&cPxzGW01rCkbNO>rp8z@lU~ zF2YSFtR$TIG!qmr7$nQ;OB-lFi8TAgC*=-ui`-gyO0Cbq|8=uY#IX4~V()Kk5eNAX z?AHJ~1-&~?AQ)aX2KH~AM6YHK(*L&rp^E`O+}GQ6yXV>N|Mu9d-<~PL>dyB0zTY>s z#`p6Z(vk?!E00U4SCReW%Qee(*veJ{#qb7c4ZrQZWeR@-r31qQu`)@;jHQGz(yFQZ z88CX4l72uTJXneGC=^x=-#2CoF9=}#dq?0IQ7H`FjlkjUL zWHN=XF=;HmI~@^@;p4K(DSVquWlVgJLMi2O>Qxfo_f0sdJPu!E{4(LV$%Khl>Wn0f zQ?OM098L>>diNMaS91U<(*e@|dHyWg``nBzIQHX@agD^z)hy4k(Lme)Lkt&g#Z9vU z31`P8IXm*6CCzIu^tsZnqfz@~t%-O#Lhw?=`k;lQc&7;Qc_U6W!{Yua|J`KUEq z8Wrcd_SIEy=p21R>m8moS}X8&h_maj>Z>)Lf9G3A>u$f$13aq`kn3{(cD?<$-9CiV zXS~V%Z7|2hq@~TT~wnc zL;q*HQQMryNn?%0VPbD`)HCB{`)hI3_9o{xer)=+r7d_J-$_kCcp5j7=7P)kc`p(! zpoe-wa0V=(BNpx)5iUZMGb%dT?J(LI<%)Dhx*bJ11D+nnBMP_mV+rYNngrM$ZGPoF z0_s_=PRrJ24w0>^_Vu(gCk%9(bDd#e!+SU$4RSn7YLud;tXu}mx4oPrl)J$a?#~Uq zEO8)AsiKGQ(uY!6%s3VI&*F1(h|v4O^h53BW`#KZ9K0H?FauA-1#9U&U!x4=q| z>8q&vYNlXg!O+nLD;`9bO>*?p8FHVlxgnRu+*bY@>(FzTV*!Py1!q~%9Gm0daIrDt zawxdC1YyEyFhS>vxrCDFGKg*!`Mtdh2&T z;BBHmr~RoSMOm$@%GIwfw>YgJBnucK1D%qWC$y~b`A8WyVdg80|LoxL>UKMZMcr;^ zq+?g4D=O0Q&_T9a4v@7S$-%Leln$c>a?2|Cs}yIdn!?A<`%>pv92A}@KdOfEv!jI( zE$Cmp&gd=D9Y!KN4w-P(RP#utb2)6n%p=3!N=Pg!VWcsR;VC5V`v?vo8`3l7I9_A@ zAw~|Wg&9#s-4@W>^Z2l5iKLRru@d7l>_l@AJuQT@8KxaT9bqXh0tnD@fCovX7lb%i zp-P0S7$_*>*m@ZPB>)h1s=y)=g=!ZLJzFvVZ1WsfTOG zOVv3m&W+yDx$)TCmA%u)SYmvpi9OYCmh4-ddk^(O?ql4C1pv2RpZ5fg<1&79o`M>M z0@`ncGh9M~^6f{25tP6E7@ku1M)G4|&X3FuZm()q$&U2E*VYy5Uz z6%^HbY^n{_Bdv@msOt(*(6p_R^(|w6(-u&)UH(@!kJAuoIOYAhogSwWu6@pvg2Uya zurzSD%|L8W?MY99uCP#SlIPeSDwV3kCNQ<2+10fg3DUJKBBB#%*>(NVdhF2Hn6BNa zS5z8r9*4@qN#pTIV5XFmvY0eZC52N?DVPYyPAMZZj)l7G-fT)Kheo7O$*>eg-4kACY8n}6Grl+6;XGp6OB5EN2629DrUk^|L!zrE4nV}+Vb*srpt`k|1&7Fm#=8i|J%U!Ek?uIcawNsIO3=)Hk^M;g$A-O z55s{Aq$g>pNRz`ONa9YRuchUR%stsinx;gG#ZhXLTP8V+B+@6_C(V<+ga+c;CG+n~ z|H_rU#4w-`*ICT`QcQY^D{Ai7d$)bKiosj;Y62&DVvi^2T>kkPX9KlKE zXeq{?e`$IrbtzIc|5R2h$dVNqGWWiQ#WY;XSS7#y5oo;}voFl&PFpBizv*<05}+Lm z&ak8246kTXlqdCyHbswIxjL(c7AE)Fkbzc3odC^S%=D2CK1ffbFw0%JX^wo%y|#(G zaX;kUR?WMHQ+uI}k$G)ZNvq8k)8mypt8r4+4#humL}uSqR0iUZwy=bgT?0irqptAK z4X{g|^lI*ZGhJfjs{doI+-T8R8${kRV^AML<( z*FbStK*bE7wr=FT#zee%?=ZFl6+mRpEP3*PW~Up>TrbSTie^Z=U=b^#0m9D8g9^2E zv*3i{Xy^xqoiaDnW+!G4WsulGhv}88qf86ZT;&UGs74tl2G9Z;k+wftVUA>ciSRY$ z%Y`o@qI`j|0vbln!oZi8He94K#M3nB#uCP}Y)8r)1HIzQ4!gnK7lT>>Mhr-E52?Iw z%^zf_nAKb=WMCK>_&=%(7_VtiVysS8=@y#*Rb07Vke+y%u}WAxXCJxuziey9;&Zo? zEs&l}r)IvW^}q&53j+*#7xXMBgSB?Fh{qWPRpXrSy2X{l!0>z!OGsD2ZJOdqV4}cKZ zK)Mi<2A!cc{||%Z>N3zW^3vhi_y2_}cZ#*o%s{shr`~kx0~Z}yz^C3Bd83hkYUJOO zngCohzv2GshN~+Ux9A;%6U@XtuI5Z!s*gFPjOu?91;7Fqn{| z@{g;A01`k&xKPB}Obx9~6;Lqpm+3TrGnrJM&m+UHQbr<1lqT3Vn?BMUCZTFB;dlrW zbx62*jN_v)UMjC1j*ZMD@o`xc27g7IkVdAG56bsC{gRrB_=5O~G!5Y@_>GNHMhm=5 zxCk?sFH|ph^{Q^Z#AD2q5+;pSOyiMe?nIa=q-y>ND={|Tpu0TYQOHy-k#L+`*Cwnp z9^u%CG#+0f95a>8q*LegRjDQtCGvGHheV>2sZ%+8lg4H9bs}{{;j&7Id||4ga2UT! zKnS)GUy<$fHuZwRR|x_juoomAl~h#myA&eTxG5}FDUU}Xa_aiiHqmv)H1}l5tOUJ_ zG4?>XAk$3FAeR@Uue$HP9{BRbc7qlR(kbM4gg91gB(zcO?gQPVN}oNSQ_E$M9~Tw1 z4#C-aD%;}^!)Og)m12Mt&a=T%=`o;{sFydkU7N1f+`~iRaoLQAPGfu&{+&uPnL;E; zbXPKoOhSlq4k#%JNdt4y6uB%bBWWmTMuIMcOEj2HzmT0xrc*RBbcz%eypTcFp^Fe> z>wRRoQmg$c6eWJpKpI4or$KQwh*~>rT84ug#G5yprY5%xbeECG$UBS~&WOBunsQNR ziCk=z((Ero7^+OQun9V}m@ATw(McgA#!M~vCREp}<+7IV1Bly0vm222u4 zx40$g0*h)BcL@!y3C`t#+rp`X$dv-vfWDD(F_~xJk5l)+1PXvOTC~b*7g4JOR>MZK zO1*eq&@=|@G%I>2fjzOrjC4o3K$;8$O0sg$MHH+GX%SJ+rGX&>b445}1=BmsfGAj~ zL5A9oE;aI!7BX;ekOZn#=1G?i=LD$}1_Z$dFG&)#e&vrZMqXzn;Lhmc&;M>cfw>tv5$$ou?b0jb5Ga*_J0w8Dfd0asw02Ze3wS~N z5wKh_tY8yD+#s2wU>64b&d9+ilUayf77zN0C)CIOyaR@`sFPBciqbhmBO)S_CP@dw zr9m_x&=fsy)ky90b5THS0#OiyfM7xpAx0@f1cV5PA|fIpA~K>$l62gzrf>A_Sdp0Q zI%sDyiabL3cQF#z?HDU=y=hB1O*gS4vvgkbveJxPziHmt|4~X% zeWA-t9iHWKnYD|@XqlwDA^v(ng{V&GRW$%^%j@!AUFNyuii8m;i2~KPvRpzPh2I1; z$`+d$!KhgDcE7Uq{5YT{d^WFE)>E)S#E4N{DAMLbzvR|dOlFu_!XsXZM2yF5R3t3Q zk}}g-pi?N^4HtTj=TeypF}J76k?v}3X4(K~-09&<;nlW4p9_4aHf@ocf0okxLYlX z8?y5HBnCoQUD(CbOMj!i42`s`8E#%?j=0nOk4AFK@eFk{gm2HoWu30AJ*#D&d0IN zqGQZRjBQjb+?b`hT;ONLcTvYKl%ML)65P$uEvMMnxg$6#@PA+V`~Q9XUqS`Ll1Uid zk+g6xtAE=UjnAG7p9Fkv7RBdQrB_HSA{)bi(lTu`$x6{H_IQ;e zZLgh0s%-DSGPdXSuK?|2sjx5l3bB0w^gI~`U+4>dfH7ZmAy^iWC5xdh7occMPJ87t zP?5G``H5(Yx}m}{lwY{haurUlT#eiX4_riLO+gfyh%qtggrU<idO9%K*v`zzF zbEcBUUwZ18>upY1u>;_cWiEo??PTS$?s~38k`)JdJhaN>d#{Y#a{~=6j7IP|j8Yg7 zKW-Hl(L^j(4Tm`?Zg7}>;^>(rIA;!rFg_uBoE_x*O>V12pMM4mPNpCw$;#mkm5Xpr zrXHe2sj_(E$POGejEUUY`30X)ctRZ4b;w1W3p#LA&4|!9y!AEha^3q0M3~QF#&AwU zmBtOXgmZ=-8GCRv8gg6-OjHhGKD2);;k#$63||s}xeH}z1Q&h3i$0SBDr6r-rO{3x zVBvM;>$xm74$ljr1t8&C?rK3gpU8uox_&--JVbLG=R>+x*_24{)*$wcYJ40k7-J{# z?3W@lax=gy^N>e_I`6|!PBTja*BQ8W|6!>w@KiRZLocfF_>=dhz0T8(71!0vUwYK@ zaFTf%E(2o^nT744e^#4R#rP5Nmx|d!ElyLk@eO`)EjDdJ5AgO^u7sACQf#-MVVoE1 zs|x)PXwyu{aXdc-Ui?A1Q488s9Z##_Gaa_Ku(6DARby6uvG0{e=9P6YaJ*sjiprIu zozaE``=5l=bQ$wQxTA+N{=Z*hMwm!gKO`|=0L%)ntp$$5g7T9+wULCTxp>BEmGOeR z*po@>W^h@yUVg0+urA>bzk)lo@=re-C>Ggkc^v)p51Ye%(lzY=z2PHPt_*RI?XKg(~z+;|z) zJdi(?{3lywgl{htU3D{U1XRS)EG!<31Bk7YxdCb-F0;Q+h2sGvJC|wesTGMDA zBU<3`YaXU|&)RK91g;~15IFgK=A6Ev(RNQ_H1JCe;JW&|xAn4aFJ)T#Z~_pz;0G7f zAMbl?{y<%Lua62+9jkq4oCccl(RV=)#ew_w0UccJ&y)Fnu&C`AVe3l>dq^8qWY;X}5J0;dCnK7m#iT}gCmUBsD%J>k!@QjYj zfzNBI0f+*qHJd11u-f1;91o6?@g-8L-EV~7;KZ6{A&M=+O=qB1@FM6GSOe!mJ zVPxH=ZmJJvsR@;K;JV%Z6og+HCh^;Vb^axYR;#9b)Jc0SiFWXoE#KOykbKvh$=2&q zvgK)2|LHUga`#*JDhzid+1@(J(}EZg-+g6?Ao?;}Jhr8P*cYFU1!G{XMGIgjk+5b5 zL%u=r6zV{LvsTZ{aOhl5)1CuWY%w_n8l1yRHd)*9&Dnj}Z1zqu&q(cwxD`Dk8bOxn zHuNiGZ>G=&b27h!vX@}w#`zWaC>ARdIfIXRH@n)8e<-V-#GwHx`;vHeW78Vf(> z05gu#u!W0=n=G*duK=^x1HDa^G2ej_SB*EGahFMBn+B%e3P@w-(u@s~TZ@2WdDpC^ z+gzlTs<1fHW;eYuh5nk*s0#dvwrr=2*>06&-J!c_(_719j}z?9ouXl>^s3gRHKtyGu^1Fz58~9k zjk*hk0aq8pU_;D(GuT|EaB=v^F?0pebj2vq6gKW*77-Wl-?vk}kr2$#;3o8#` z#b>bvM&0eolO5!RW#|SwifzKxG1ay^v_NEugW7BhQ2 z@#U>o&aE)VUv@sS-wgQoS4Sr#?}{TK=aZryNqZcEUxg!Fuoz)ATRC6y0ErM4|2DHw z`HV*p7*G?b)j}Ef+F5*p*b2?q2BZqly60@+AD)eixtw2rn&B)!cQ1ho@0{+W{`64U zrQ-o{)ol)t(o=r_hH353L$O3xzAgS(P~0WC)9(5pumf|8N`gFn+L*INjwdP}l$Sa} z5Io4#+{ZQ%TT)es61(l|1QE~0AgK|(BAY??0$;Pgi7uPkS{Gx2=mtty7A4jkP*b0- z7LiIL9L;b3OAEN;2>d`B59wVu(HUsG2snh9KU0ye$IBYlOd6BrdjO`&<#d#nF$J4^ zIV{qqU0{x=>EIp$y-ZsjN89R)>@8&qc>r|)bO5%k>ih<-J(`sL5cyT;>seNAT^&t8|4vo?NC)Ke`~Vjr zCMe6;$TPoxc`AG%3WYx7YYm5Z=wRImZ*QP4hGQ7x`$B|EKyoA?ud;CdvdH*cge}41 z5B}Jt@OC-f|4(M7ZDEY^uQ)U0q2ze5he>Ydp||g4UHJ|`y8gr0m@lA;ZagukKB+U| zdOnRGi8SUr-ian}mN?E>hc@%sx7GiN!#G$h25T~ipK-$=z6JbRU>s?D|NjPMxpn{B z$F*N`e}2{JF!mL;_TR=^-7ug^@caV1^el3R=RjZe4SLF^Q?mErzomJd&f+ot+XUh${MI#+w{R^0w8rs42(x(3X$cCh*Ei7NX==y(ad=Q`e zs}b%~;BiQjTU$H!hFG?jYKmz=m6XSyuZA`5vO$2?8Vm~X-$u9(N{FHjY0LgVvNf97 zJ=KGleJJh$0<~vd&N1+xd?_wyCMYHtrfkBQRI3E+fbZ1XtgmIwf7#0Y0A(^}X7#Jl zqdxdG#tpTaQqY@{5_#;+|2%2D$x#2g`8_3I{A~pX3!8tp`}WYMo?l52&t0eM?jzx% zIJbgFm(Q0bXO8sJxIgnKft{$~aIF+1$uI|DB|-?HnIW1&2ZA61ASXc(1YtOwfDi#8 z#tmzbUh+m#_3`-Rx#Hwd zsns@bu01QFPsWS98%XYaZig-2jG%Rw{PbpZ1~wXzn=Q5(?UE9fmbwb;%uf`+{c|87 zA^bFWn7``w&GaFRJdN4UyS`mJyF>HRvUjjLhF}S6+hQ6N*f?T7uA6dfwt?7K?6~fN zq#D|X$s|hHgQjFZhumjX-8WC-iZGqQccevKW~KLqdnK{icvu-nz*aWHP?}$|ebZoU zVHcV08m>)|x~#VxbA4F@L7+kNyZB`rm^Hx%&lVa2+cy5ocRZVP!b-+K%I-cCA8ZGG z(>yYcsJ%K*PEu?IC(BEEa>Ch<55e91aJVOWn8rgWN?ULKSrn9okj%soT4Jto-EL(t zPdsg9XOIrq_S6IACLCE2p0k zpZKyh6G1+}YC8w-ZTT1t*Y0E6nBC-SFdTP3R731lokPw=GYw9QrWPQsv;R$6E>!p_ zXLzhPaSGj_-2m)#7Gn9a;+Zd(8|%yhZQ9%n42BEO|rFC%1njEhE2x)@Zhphgm+Z|Cc1Q5$L)t^ znJrDQD7T#hQSa)>H7Hv8TX@)2`j!mtE{$N=a&pRWh($IL_v)3aan^4;a;>tzkjF%u#HEymn25wHNNjJjvHe~X9J zSe8G#4&!u=SRR3zE%c@|p05Cr1de;f9&=%Je;O^%V^*0LCq;Ot2DmHJ%5y30{z= zf$aP+<8vU!@Hoc=fn-wILNB29qPf8;ZB4%4=k>}A)2;t3v**M1kCkSX{;yDicO^wX zO#+%gmrX8_CyK000hIuc0G0s%gQN)JX?|L6kba6O)`uN9H7{5UxKSb;Iq)cxez4a4 zdx>^68GixEEGijR=8NRzRb;&LFbU>+);qaCJI5OnIsuqpfB!DmjJc8&D!{^|d;;3d^83dVo)zo@?m^id4*wn)W< z{K=$^8M_vfNRs_V2~yO4d_r;l=k$B|8iJB?Y=!vU<*FHf<6XX3ny8$(a)BK@mSKA8 zZD+f+I(4OSKM&&}It*S=c`ck<*{ke~Z3n+C?rEAfP}vH(x2Ac3GI1W_1XW$jgLmx< z{N`|P{g>*1qofB^6H*Y5{I<+aZp2=$2pPTj|7b~uz7gj%R3c&l3u|XqgkzM5+$B;H z`swp*fik3pt(4`BM9IEkAb>^yxiL)p*A(5q`$Hamg$dR9ms7Rke`nD}0m5Lsg9_)F zu2o&mHuJ|BP?p*z&UBBA>cq4i3vW$QWiK9TR7ud_x1GC`(P-c8>b$cs<|R!t8Rlo(sD^ zRdo||xR=Miz1JwRoSG)nLAtJGp^uNcW8L0D=FmR9d_IyJ*fqH1y;zbbhbRuj**8q2L))v0D9!vzQ?fn(y={+z1Dih9#G^<|)Z@mZZey z#-Wk0l{X*PUM}3a_X}VbF^ZP;hl31*)ZcYt1bG&QP=i1$@9trKynuGSm16BCQTTEyN)AQr)cBXB)Y4oZ( z+K96Ia$yc^>8(Vd9J_Jhbv~9luO-4`)bR-e1GBI3zD&4(%a~zy-V-OzIZ`^c@i(8GxKDJ+;K zrVF8Se z-Y=EH`}F8<>z&P1#$LTBV!qf>&ub8n)euSuo(;nW8BBoG5N+Q#&OSQxO%yIbQ-)zO z0ky;JCi37wNfTlJ*#1zo>h89h*LFv`OF$uM3|o3+6G6)}XzFFFzzDl~gd;y}d0rMUn^gmD@9Qo2l{$0z dE%OB$iXZ?0 literal 0 HcmV?d00001 diff --git a/tests/resources/crashme_static.core.zst b/tests/resources/crashme_static.core.zst new file mode 100644 index 0000000000000000000000000000000000000000..db207fbc6b408052ad796b85f33a39b117d4232a GIT binary patch literal 5234 zcmV-&6piaBwJ-go0KftOWRd{N$NVf#Ae2gK3|j{9g=iio(*6}mk4X!=kfS8Mxz3we zm+t}_Z$mb=yP8!Arm4*R(|3s4=Shrs2@!d90fk+B5f~ge9jC`{xEGU{)o=ISTc(); z#RA3xM=@mp0$ylI(Hn9Cizg}{nb6M0p-^NYrEWe}Qv6r1oYy?i3HV?1sFXMs;thsW zB{9fyzyVzn;NVIrg%(Rn=<*w9=I@5Il6bo#&gAY^`giYF`g)}u9Tq1hByu>#Pl zLNdanuBY$1-oEY8{LAs;^#03|8}g@B6r?{(EU2nWd}B-xlw*QN!QX^THo2%|ni6gcDFOaLd)v=_bC4at`2Q3xrF0ErdROXq6RQ)=4i5(-Z zOde{KBpr|GhVaiz|COt&jw$JQNP zlq3gLRV}Y*a3!N?2m!WMB}f$|a_F%1p(zKHxH5INXtAOOr9J=GW`4d9A3S=`ZBu(4 z1W`&hK-T|6Q&Zpl|6Q0Kt!e+zg*#}(Jr&_S#`fu0iv!>zO?ie0@liDXY5J?e|GLNV z55w!Klcy{fR@4=83yVh53TukSkEa**{`E{nBk_%W;R1=S(q~`*5T$Ct0*H6wd0|`i z-FbT0UyVYc)3BXb7xw|K`*YTRHUla0D6;R-@cmF!N~ChjDQPveh17ylpE~wUzxp4_ z^M%99kdu}dKoLVcBpiAnWXd~C@yI}~>qn~pQ;t6YpMWv8QiMm#tT;Q4*x&N;4`A$ zPTnAW#qpaQ)`mMmOd>>v77QzRCYiel1X`XEJMNMWc$)1NgCm$G)lTsio3EL?-yF$K z7sm)+K4S-Q;TXY7jI^aJ)q=wxVI)HEeuCL?d7sIkH&_=iJ%tCWzyJ&j1vdvmGWHH8 z(nMVrSor@KF`d`AYu=?NVdDQ3!gMUQmDKMaQUNlB&Uy~0vVkOkaZdqHovIlH+v3PP|I9=zGa+Q94~-Y1x1nK?te&9|6^ zmGOhn7HvkE-&nc1BU~{rm^kKQMhNzT>rtw)+bYx8U9rrudoLJT&AB5^2ff>sSyVHp zxc2U|p51dZ+iK3ZiMG`&<-~5lS{X!8!(>u)vSwl95FTu70T;(&CO~5Dx4IKP(&7J; z@blvQImdsZpQj3u6hu-+qVl-9F(#I|M&Y4KNz!sR#wS1~%%T3>lIATiOW74ljrFPg zN$N5LB1tXD=~I@n?mFh9==P^YOA4Sc_G^nj+LQfyOt_1FpWUB(j^@dhi4gXuLs@@L z3xY#+k<}PlWk6Zc<$v2@NSo=XPO+)a7aO?~vid z{=Y6;m`+0MsD|lg*p7!5t;J3j{VKlLK=Z`z@J~b7K>zKLPR4En{xpCv{U#H4|169D zzeYscf0`sr1GC&{GI{s!hgZ%pg5j8VC;^%7J@GVSS-iiFeaM=sk}@(iC#(OnDSLza z9|s1jIh(-$*P-nnrJUHlP7;<4pnSp-!1CWwX`v0wS`14debF(3jtqz3h>l%0+G~P` zuMsqw1nQ;=s(W`n-XxAdzqSfGUt5T5YF75Od2k!(f65bGlfE={!NqiC9@n;}9`Mi! zB}K=}b4~K+`ye+Y04A;xb-V*(7UNh=5~1H(aLB@}nbZ_Q&6+=&8HDK!g_T=-hjVUj zSJR0QoCcSJGpIoNLIc+e({zG3;Nh;xyIURa-gUwI#WI`8k!dsAsSJYZv*Y+R8r+dCjKibM{;LpxAPma7sxT*1v9y1m2# zzJ%>ux39Ce4Gfh4n8q^i|6KOJxq;Sp+}4eo28xX5-Kw%k2yYt5&|rj+8yh8FLLoQ^ z3~)LQ92<}Y^vf9=&_xuUS_28Jx=mP`2@(aVz=MH>gJ|Wj<_8*Ph{N=ZXBQ+`1g9L) zi#0?Tz%&90YDiaoKWC01FfgzUh8o7Rar?b zr!6Jal1f>H^j+DNRQj$JHRY7FLTX7lO(lI#DzUDdid<=FA-TAGM=Fs~SX#bAQCT6Q zoPLExwWXEhvO=6@K?j= zXVe4wjC?hKy%ZFQS-xEq=zJhlKFH^W%qJ?ludfiBi8JD0JIg&k2Q2(@KCsi_s`F z5I$l$Z7xWK%A1cfLAIO|g=yoow3H8O-e{>Wb6UzLOBD4W?=%u#-L@kfr$$MvqJ{$I3H zLUUomQK4o+GnY<`9Yx|u39XEhHz*IfK=d^V9Z{Ds(L_4}y5*@Co%0%dTdbgHd9TGd zV}c7;Sp8=mWp&;qC0^s!;E2my^HoOd839T}jN_Pmr1GWfy0lPMXwf?*o7kT7@k)6C z&h1w!O;a3n3$9$py~BidXTNKkF794|;34IDCO6#iEHR5aoszkE+}tSJYkXVQ`5zUc z?P!nY+2hcbFpKKbSm!lHDPSJ|9Q8!=)NzYOy3?U*SA`*y*#E6JUl7b}57^Dnfos75BpRfDKm(&bY`qJgT1VOt%FP%x)Wf%SQdt~}N>Egn`bHQv5`j{! z`q9_BX^3XS#mfvU=dN!}Z(P_~Y$_Y3SC{}EN@+?52bcShTzmFBBcMLmZy!wxNZ-AV z0yBfawgppsGlR(6NdUeqCk=zFB)p1NO+g|Rg9W(=KuIA1;dJPS%Du>N1_SW+_86Q2 z@Hoh;g|UKCYX<-&IB7i&(o)#>$GUsenZDP$tDC$EZ~9jzQlE=18BhFg;8;j}NN?+G z7*`d>CR4KnmmZh&Vsba{6 zd1caF#14K7{;75uC^Y;h8}C3Q3r705V^=_UuIF~jQXk-ruv~6N&iP&79p<%JNW=lI z0WD2^;ur-Zj9=j69<^N%QQX2!0~3;*YCGgPamU#8Ls!F=kWS{uBU~IL4m%L<;d9Ho zepyTw1kfbNiZ82|DmEN8^-c@zvycIqonRU&rren70&aTfVvgtvI@NUV90*2(4Fm^} z?B2Bx;i6KH&QHv$=Fuk zU)SMtVH%5AhDT)fJnk=iYb0`(1HGj_i4IXx1|+wx!@;3&*n}#3ip2RE6>q$Q9~aLU zJ39Woc&!N60EEtixcz&sjtsZMj1P8Gn);4DB}N(gBhD5*F(NTNA3UVEylNtnexgV& z^S!vF#=4c&s)yDXHqt`lBZeR~WZ^zece2?G|8fg0n4m$ug|a9Cs}W>dP-Ot}i#C=D zj0j{#34%v!Qt*KfxRCa=3FNv}#ejoX<$-PwTBdYW4U`-1jc9y(^`3^$o5WQMM_Fy! zZP35}1yTd(7^)r}a9AGwvm^!ti}dmu*a17^6>jYd9Lp`32ciKmx-o5#XEbaX2i`>j zlU|_^u~ZY*7TS_NDrb)8rVWacoOf)xMsmFvXo?&idyLGumqX?=9c>LmmY<+^=4nGG zYU{u!$Hwa?RnRD{+|O=iKEXYK_4!iynx)o8ySZ4y-f>#eemv>a@rFB#uMv~w?Ra*z z==_Py1G3f4(glpQ@tYt-fTzZr!8*Z-gW6r+3@}GLbU&vQo(7=0v2UE=zM&lJ)@~FC zX%xE^iZ5){f8C=GyIn*I@44L*xDTTp>K+0{U7|~!E2tky;TDBv!q_D{#OkWLh1fs^}q>a7d-A5{X z!<;&wqhlWag)u~X+H~C#&fuP8Qd;dC-|j-ln1_dKpA1hAp{eHquK6Uz$y<&(b@>@X zhX9-{oS9Mcs#(xOozYPE_o8vWaQPB4LxMF=P~o&SxXfQ%+LJ~5TYMt+$15DR*qcuy zfXk|swIls)lG&43yrH99(xlkdYYsnhq~pg4Yf>l|RwE06$9Mp4(3K3Lsm;)X>JVKi ztv@tBFG4Xhe(DrsI9C|c7{&g5Y8^uLulc?>I~)Pl0ifZC zo<|quN{ICdy#bK{pX%>#)b~uKg{po)2=uh*7++R#a$;HgTZ;vay+^+)|^b7 zQL0d;k;%b@h!0a6GbkTADHoOOqn8JHqHPl_{>4gf(uYQluNb_8=Ep)aJP zAmZ56-(-x!ryb$)&m=-mTtJax-!v@rb-BOF;hC7>?>9uf=TXmLhDYHLyDZ7i3I|71 zxBb|7ss41DV?|SDW&TldisGTMQVS}@o^@3wkKgFqzuB&@@43&ON;B-fo_$9x zX_iYNf4N*R?8}1XJv$bJa)5X zIadw)U$f^p+>Dju6RPEwJ|q}~>b$PMuyHC5$){IF`F$dp?Up*X^L-DuP22vBX}7V8I$j7=)aV#vtsUTQX*K(gM-Q45n|-rC^nN% zfZZzMDiq0KE!XDSL>2VUOC|;S7gd&1e!>AHy0-N}m5)@e1TpQY&k&A<+mQ4s0p`wY zlshjgKVMFMzKr=z#%w^P^JP3c(E(-+8YuD`>G!6QEoFAj&MkZBOj>I=6LfJvs&z8N sxruh}K2OQan7IJ~U_=YyRCoaIq^UakMjA?*0bnG3 zK1-RbA@zE8E?oCc$lagcSnc~f=w+FeL|>zW^uxvJ`bjWj;kS`4n80{EV>_vLduhp* zDZ2v20-XXX6sVXV(Tn5mK5U-K*ElA>-T1G5A$#(MpkeMrpFForvx6C5ng^xcQ;CUQ zUHRNBOwQavGWU}XO`w1pIeQIR*Z{*sePr%n5_j;2`vR!g-KlK2fmOaHZlIOV#ZrF- zeP|(yT^ZmTq6E_wYxbQen9+w8LMdpLWeh0B1`JA7<*R{=D}9?7`-R1Wmtx0^K{reX zGA&!Uc)i~wNwesc)Z@bc1>nMJA!B1v3c6UDoqf05L0gS$o7DW{Ofhk#RknQEf-eo< zdU~;M`s86=WB>FTd*SN@FQOL=uL>>fdFGpRy+d9~JFhNK3X-TX^avYF7d++VcUSGA zGp+}!!E~XR^78J^p}zW#>&jS9tEcN#L#F5c%At^RzXZ_OPDI~B%>v)hiq zIBPyd>3YFZex^+o_p)ZrDmh7Bx23zX$)7vCf6}``!=m zIRQX&Nxuxs}pheBY0ixIpy`q((F$O-lx|LbzeN) z-F>mZ7q^1?{JRxEf4Iw_Pu@REgZ{5fKwTr6oy|=(=I&N!EM7EXK&GCC2E1TUg@KtH zRO~E0ZZs*&;jGG(C@N3-lU9Lj-s;mQwngn z^=rW%%i;gj$@=pE)IMnkwAX``Pr#?oc?uQ$DGABhC@1MH+Nd1L%(#}5H0q?5k(?Z6 z32WxCriDbTq*5p>G9t%CL?%s@{N2)z>W`LyQ@*IDu@r<#T7q(V-jdU9X^QHJI$lxD zB;t&0)JU>XMAF(V{fLs2ShFk-mwYrmk$A)%<)kdz%q1T^9;t>AZ+W!j!}mlqjEs0n zMnp)tdf0KrxW^EVw?o<;8SRX4JRMJW#KRE{;g+17qCS1x?|M2*HK|faYI_nvRhz%m zC>l-EZwPRewoTSaj`i!;gUJx^5HbydX&Iq5eP)un4A*8?oirsazy$?PvdyQ}Ne=Z3 z3W)OC<`e7Ww|+&b4>YwoP)NOcZK_V3+8n1x^WvZ5Y@LV=>V`{E+Z?H1+Jn;!@rYr& z_~2WSlnL)f3eE=%3cmSnVracHif|i^BEb}NZ&$e66O3r|%$g%1AOUbNmxiN)!&>}% zAmKbpr7ag3iHk*I8o;TQHg^|eW5*EvfURjkG(qTSRI)4miycF>e}Gfd#@6sK-Ltzd zQHFiJ-2Da>BEq7?zJ!csv1$x$^*DQ)A!K|BR*O)w=l&A)0%PF%Z^2h=zF%I*E64dX zq~-OVEpdSCc>`@WZHQy-r?npeghaRO1$3mtVvYn$@{6{dAWl)CH$=G<>J+SemH|e1vsg$ z4)EExRC@z|HrOiC2%tj>%;+vnad4!wh5M)&v?G^%&->M#HSdsqWZV1=_g zbIzcZugwjlb$xXV#*~SMbx?*^z-lqOa`q<==X7^ihmH&{)_FH+N@?C*jMO~iwsX1# zNJ57+$!W~0CZZKY*`|ket|%vu$TmGpDu=ncrjfNhGmFe2lRPKsBr#?#b2J?jwc?^2 z$4sL!M`SA2bPi`!z%hrLm$^7=$Ta4R!&sTClUZ|amWfGbEXPNtShnk&jU+85B#}8h zvXPa;DJePOK2~DcbMc9|Vk{q%v~1&!@~r$6Ie>O}UAO5AwgDhKI<^5G7i|$|Tf9KE zWc$RXLqd@vF0|>+&v2N^2r_XN7=*JxYBn~=55A_a!v?XjI(%52ma{u4>l5|kr_?51 zgye@mPj`5JfSnK^wW}j#b8U^?+0oIaEN}TuO>56t80nj~;5j!ooWJhfA?z&&=Uti* z(5j~VZD|hgI6TRB+fnWRbuO)C|7}S*+-AF}d`C^$n~Iy7!YT#)I{cOV@CJAi-eFgL z_C?~;qn+S7H{O%81LQq8KSDN}TZ6ey3ab_>TBx?28w{@FGT@W`r)Xj!MY;pD{ZY$* zZrFg7Yy)thrJMSLgX?@&7s&SS4)EEVY9qjA`xm^+5FM#mdwX{X5p#gg3^>36Qo(sF zuEvl$AyCrc>P;4%AF{d?`9!tiYf>vQ3amg8#+!jf!EL~s+Kv_I0%^`7?YewjOXJ8@ zb@>3pD?Y4v4i6!Ya>|B$qLW3uX=^PF>B=}B*_^axbMPPSCVghT{JFK1i2A}mHBGIk zQzpc?cohvG7eZb{izJl?;WXEU759MU!D&xefPn?nbZ&M>Ej*?@(X)K3FyQTt``m!Y z(E&^Kf&+v4VZ6vlpSB`wPh{{lH%i!F=^GUAUvNpq`_Z2cb{3;Lt(A7hs9ycMAknct*nK0 zQcljyJciU$N6oRU`M5BAXj4wOgeK10SHQNEtL@%Y1!qtdyxG1iE z%YEw$BvSYPDNV&uW)kOEHWD+ju#WQZXk*1i8!M@kW8EWT-OSW!#kEfN#bj=)X2wgM zEgyApJ!KTx!`vAzc(Ihgh@nD4dIQOcZ9-LYf&DUntxGjQTYRe^S04P{RJ3 zbyYFeto+{cZ8kn=UDcbG8YrG2a7;~M+<4PN7a$IA1qoOMDPUY0IFVtnSP6&qxP%nY zBW*HLAEqD;5)$$e1Ee$Mxy6)_brH1k;6d%WforSnfBIECh{gJL$^r`i4OXET>jqj0s4;?8D<1SK z0&`Z@Onjv7jm-316lcxIm*eD;M&b$MEZ&jM2uJ0s!OJOWJFDm^2()S{VkV14Wzxnf zdRmt(IViL~OXSXYY26^mVfTL2RVud2)2@p<7}tf_}-vahT9ZauhnbKbR1P znc)Bfe1YA8(pf%43x$|1xtXNissrvuD9$yP6JMxujv?py#c zn@uaS`FiU5$dqVsIdU>d0mITgW~!MZt&UID7A1QLg-J}I4KI9HTz72fZXXd-^cBHr z5%Ak4QH-5heUJ{yj!8`CU3@yMad5YevUzRan!NzGCHHHO>E#=5eB>6d;OkgQ#r>C3 zp%AMURi)smjFr(%#{UKxH6(`}ApJPH1ya8lOUXh|n;bm0@Cre#$%GY~VYu{m}L=k&8n z^^s%}+2riRWcmS!j$Bkuc2ca&lJPQe&@^O@IL+3!EFxc{@@u?oTs0Q4wv1MMWh_zU z>o<%*4xt70pIvZO$WYM%GBVCw-okb)DY`Vd2K+&WD}C2OFbFhRLW~im72KbVE|Xys z##%lWdBnp~inG?SXFV(P~MQa2i`S#$%euv5pd2pj#vBF3Y&2^U68~_(Hx^c}AG;pOG!=}be zZUOc6DvWbBn2|*j&G?M1$PKY&TQI?7T`XvAL(jPJKtBlDU^-;jd|Hn3vJH5UuEJOm8^6u| z)!;GDPpUjd8y_6;2a?q8x#-42Nx#VQ0(ZI3%*Mq%QGi^iLPn#a-uEBnG|?ClcjH1o zV(V`9J8El~@lZ6FgVPhEA3o>;q`(w2thLLyPDV^rp}i~s!=ryDbBNWG>cD*A(Cm|i z9WP8qJR#zr@^8fC-|nBL52y;bfnseIsFok43eS1Qf621H_FQNj0j_m{iN+NdFjNN1 z&?MO}Z{|1kcz^^)rz*+(LF@*EgneKgZci9{|BN@cBZiB9?0xywy$qnBAXOq9zJW}c zJ6kF*fwNtUkM}4?r8yUBLlOYUMmaB?5m^_{Kunzf>Z>H{-@fA^ybEcyO^x8~#w`$x z*ssC;fRpBlwZTHk;HbwnoG90HTqLH&TKF`VOg$PdxTv#VlSgQ+Uv@M@lLZ+hZEhV_~cFPW1Y?V)s>zm7vk&po@ zukQqxYy-u`EJ1}-tD;7@Ti^Os8BJTMbe!u`WIg+_!(+wPwW-iA98tJI@BCAeyjxB|9w z$v`VgprOcyhG5gFFbbRck_}-ojG^?p)Tr%Hq6_@9_XEvKBt3ka6a+qupjb)3+-_R4 zx61}GII#4|!iq;hUySn_e-jKsH*HLX?x6F?c5RQkjz3?yQ5V1haE zggB&xdgB24dXX6G6*Me0D92#Q8i@~B8ZgIQm+Q6C! z2g&qZolT=_7xDf8>j2OgeFv&MJ^*gGoN~aK_(*xfFwXJQEOeYN;u+G9j*E*x=;7RX zYBnyTR{{C$T|nhdIY4-e!Yk;>tJ_Jg1)&^|E(5}k0|mX)OY>#yQUv_RKRuXZ1c%oA zuYTH9M`MP+NYWrIMXq*TH~#=Mj25T-i%F(F&-}9U{toXeI#SbIg^fB>h#k?F&H7?T z^aHkl{{WbqgHErw!RO7cvI32gE?L+`3hvnK;5-PR30j|*rNpzw5pk)x#*HX4ixzir z=Ep790R9q|al+HLSJssQeAzO2zsL=ZQRs_8s3%@L=HrHbfebysEDtpE<)>#F%Cmnv zZnJX^*bA^YMB)G-S_?=eps|M53e$=cWONA1eO?9mu^vvLco;Z;wUkc!=d3wIt_If& zh_ZMx<;eM$=T)t!G%|TtmozhY89LBVT5r$LL}svmMI#5wQsht2(ZIi0BajbGKGEBS zC)X*R)xqLbf@Eqd4-zE>wir=DE?54tPjwqQb>L?roi&u_=-V==bkNI z?vlfjU9bB0#3;|)TewogE=};ER~GvIH-rM{Y6< zYwu<)V|gc&wH@Z4R6O7b26M2{tz353s5mB#!jl+s%kP&f&2Ee2Z`MIcAo-l8L;+a! FpLUkHAM5}C literal 0 HcmV?d00001 diff --git a/tests/resources/crashme_static_pie.core.zst b/tests/resources/crashme_static_pie.core.zst new file mode 100644 index 0000000000000000000000000000000000000000..2e2ef463b5078bc3578908b0a46ce4139dbb6de2 GIT binary patch literal 5482 zcmV-w6_x5JwJ-go0KftO1eyT)%cL$jAf`?Sz&X)DSkjDE@qY29ldiKc=*T3u(SCfl zClxX(0X#l4P(((ElIpdsYxKskh{a56BW>?3Q_%wB0@VVvlm(NP7HF(%J}tPe{bMB* z3HOzs&;NfY|IA4L`@{cR{WD)YUU$59j+aSR?u8|Hg1T2=6sqiD zMaQb*B@@(v9oOwA)3N{g{`p>4zxsDtN;Lt*nDlG%dH-(Q|CZB)!oz!DzeL_3u#w^X z8tfBT2`az{nx4h%rB6XZgcJr1Wg0ltIk7OHsX4K*v?^d_IkU2y8a9>k$w>+;Z8WLX z95X6tU1-MDQU3+x_zB*`KNxTpcOt;rOGD{2Rvl=>GqZVlsL|W&gYH z)GEtzR;aVSv9&#w?yL<>j#_=AmetXidp9|29NHXJ&ITG>8`^)4votk0svBFiN~KW- zHr`Dgha8g9X)^}3m8yt$J3SE2Fxt&7H}i{l8tx|KyYXDdTlR|w2UUbOn=%@={r*I;f82!wmsX@+Rgmuu* z|NU?tpA2XyA(Yq4AJ=GI6Vfq-MY#1O@Q3xnc4_~xV^|w`d<#iF63)*^MY#^^V~Up3 zxVOd#;KBpc*v#XQTOR1=vd7$G5OZJF(Y?yL{~i$&|GN8ijcio6<#C#l55}cc6-09c z4W)sdr3ZfR4=4<4V`ieibNrGvHjqkQr0w@d}KATy&G{qg7R*Jr1tY z6WXQJ^>AxIHc9qKwo0}QkSrfW^5GmOLv}E7jy6lR-pU>D(j6mH+eJuDWY`J8@cxA| zqDfe|Xb#%BKh1$qMADe#f@bOAEd75oKWX*d%4t=%cC~zv{{K)OALPROYlOcP-O&Q( z=l?VE(ST6ieKg|2tLsHcNi&wFt}$XfVLV?jQm-AG6=nUSH(h!?VEpUxjHw}(tp}K+ z*xAqSjd1L&?7<10uh6CFJUE$CbM+$D!L8>{=^RsZXEf}Ny90{h{YT&reuPP1$w1Rn zTYbpXgdXD2pslPWN&o+$9^ZxJLNnM?Y3To3$>X(NcU+YF2^7z|J~OAnRhr^yXzq~a zD5W`?f>Htw=@sz*|IbsvooZ@wcE@7nvqgYz5`N)baZ<;2US*1&!lZklUZP>oGcy`S z`%KC+W9MjJazdRcH{4HkFD4%&56OIH!@7>+XNtaSY^uJs99P|19$?+I{C5iYU^ax~ z8s2008YMI-N;r4GrruQi>x0=EE7%EHAN4lSyn1Jg(|l@>n9J4dP=5j;qOeMt zH?mf^;(-4DH*6+t9}k+AN?ErV9-v)+BlImI4qt+O z>=w05?S<_uox<_%c!U)^qT$rWN0McI%z8O6jd73~!5zgW$~}Z3`6(yG zEd6(ZFv>|W_s3bP)c*B)mTG4y7F~!-a+EN|wVTadr7)_mM2{EhP4$T2Q^l?6qX_m66hyHZ;gVMc^#|*Z`ZI&7e<5R!?cg;Wr4Q);L2l}WTpUrO6`~O)k z%;SsnrvL7n$FX@!5olbl$G3RVsvw-HmY@GFlOX&*ey-b+S!~w-zf!P2RUh^GbFVJH1y!>x2(150BwP{=pD>IgE<8v1#D;&lO#cpcA{#!EXon%z-GYjz7v0Ue9c(WMwYtzq5R$0rjF z#w2w`92CdR%6qi`e*_4>8u3Q4zTRk^Jyl{=+yV>G>F9R%giWw+)J$_3FB2c@Hm57q zj4mm1o4@ujWL>H0-!7&QjPFnFx5?rCuVP_fd7wa56BFu=Svk}UB36nc z`l5tPvW$w+i~@TH3U>T6u(}|@gMko#x5V+p#i*jf;;3ijhyp@d?Bo`Cl}s@%h|Ras z`{9k@D5~o%hAokjEVd;9L-Fr6>w1z@Bt4062F+}t<7=y%k#<5_*O8{%G(Bc8y!dwq zG5l{Rcz|D9oAu2C9W~7Yh_w)iZx*wV`hN;+GB6jJsIma7fRL4lLx*=h;5>oYZWDI@ zPB!=MBa2^Y;@LEz#!kEy;$yj6*4AA3YGXk-$CNK$6NZBNIhO z3#fLmpS)c!0;&zKZN#7YwEZsJzhmzvxf>kX8`X`9cGeb0l`EU8qFLFhZD-D})={g@ zmrGkalT#~OqdK2jg?lTb@g^sCMn~uTv+7wIoSgH~*xb(2%HH7E=H#e!@0MfkEk~O3 z*PPZj8qS|_R+X#Ht<5?ABV4FZJ?qUhD%n+2D{Bi2gBF%nGs~&v#E{jDaMkSa^(A5} z$RR;)p2cB!fS1*DQAJ%pK033jQP+}YYmd5yG|NYfK#7#a9Ybdg;`<)rY)K~n-5>vd zr>-5Lsi9TV)kbW=VADR4CPuk_o&rObo`B58Az%NM&F6t5C5s{9mY7?Eb?TzpHxx%~w(lwa zyvLg`>`rIrYXsnwg67kEr|fEQ$$r-|c7IIQSB=k!ji1_uttgE%Fzf1F-o>ILF|yj9 zdlX+k9;YD}r%qdRKL5aodtke*Jd88&EIbdOiE>|2zsfMShaWJP=&ujc!8e`LDaEP# zqD=HzWwJ2X(j5I5#y-E3JUphr`Df;nUzxYPMERJ{(`E)9+jz0Lk$j;w=^J&zR^T8D*}&*Kv35DkJ&Jd>LU z6aip=&pE5Ki}xQsOgwfnh{9hvVYVgnp78r4GwveMtklx-20@}UdW_npUb=8TIJ4!V z^ivOa9=4O8fbp|B+3h*I`r|y_tJA)V)afA>#nNRv$YM{xXxV*_Db1FSgMg@y8yC_h zcf+NiPpiNgj|vjG1kX)0N}U0ioziohH>fh0m>s8p<_!mgJ5r!Z2S5~ho~bmd>Z!xi z+^X1u-x)gxL*4WsS%(?)*GWRgq+3Iu_OSAH<{Xg*03t+4dCL)g!X!&GV@xDrn~9Rk zZ5t4FFN{=e?f)1a07=3`^SQgfBbPfqUYmM2^Pt1o!&*xj6@{wheGiJzcm8;wh5J zQLOl}&{UY|j-=Q}Ru!Qg!)ruzny!+;oFh&KV`34(p# zLhiUFBo-&J?A@7+q#g&E>$sl1Q`=FL4Bl13;+m6sDFMtI`l9H?>vWg*f z!v)h1Y!OrzX0>F;kOoyCmsq(#^jIt$<=v@E6W1>A?T!1Vd)OVj_F?w~CXa1oF|U&I zn1x2vF%&}Wfhh`6xTZDzoO=Gm{KDgTX00;m7fjF#(U)jq`|si7bF(7ri$px4qmOdh z_!X}rMU*!s0NzyjfisvFy2~66{9$ZVX-Q;2Ys`H^1$sOXjR6&A7%iJ2D#UJ3^<9L#=%TPE?ZV)&Y`ykYe+4>@I5R^x zr#mC%?5M30dEUkLDH?P;h3KP>-Sm3bVWn^s`ACr+`b${})#@yOFe(P&Sq_2(}-KlE1Y= z;)aBbpA<|ws^B44NX6qLDZNV}uJB-QRe;@ICeGWiD5FGt1)bv)xQlu_7nN4eG|CDCKTYS+nm?4L}}yHoNHcIc6t zK|YJn$a6$ga%w&Yp!Y*((7JxKio*Us&=;WM1s@gN)o>MuU=Kvhzs_Til%wU&11T4< zkL9+dh3=tXK6t4=1$>I|gWanCb+5t-Ooni5{hd)LwtzX1;k&Ef1-5h zfft&5AaeJ}&?3Pxp29*Yu6JKq0+ut0O$Lf}rnbld)}}7iKU`YNUR3CnJAjcEP+hTtsrSbL(I=l)?^yt#!4o7tcTA^qH*tqU)A8&jjZ6J; z(4W)nb}^e5!bB6ZB^vCZDMQeB5xhvoIF_$g-F&xi#ca~p`_b6BF>$dGrcAAdoxBJ8F zoMy*X|JLo+{$q9uk2$_edt~_Z)-Qa*z#>1fWv;*DZrD%J?(b&mH&x(Kkzvj;qpZRM z2B80Hk5N<~1NcM<-ObX{OoPt`oe;RifdI%ELY?e(GN^R>vvpy~;w*yQ gmhji_@% literal 0 HcmV?d00001 diff --git a/tests/resources/crashme_static_pie.zst b/tests/resources/crashme_static_pie.zst new file mode 100755 index 0000000000000000000000000000000000000000..03fc71cac07f70564934818348aa30c97bb05905 GIT binary patch literal 8036 zcmV-qADiGPwJ-f-2xhhV07@cBLr@@6Lel}3dTC~ry+u?+R7C75p>CqOh`6{{)Yj~D z&CJx^!EJ4@0oQXn8vm&az-fKwwa;IPm1PcLgS2*zp>^IQG#T>{T zG6ehsXRbnFoDimAkH9c~W{md(l061ZWd$Hwu{>m~v3P@Z=XHP^RDg_nu2yC1HKQ@e zNk+}Z9?hDwQ+qVJdmyV{@G<`Igpq(1**ZIvJzf#hV2!phZEb-OVY*Z@2*W4nS=NMP zflk<(-eCh|q-U{7u28Pn+6xb9(5}b{+l9vP@FF>(UENw=@Di?EQ>xDDMbK(mQmdq9-iVpi} zMPvpak4Geo z6t==MW@kcZEZrEmHY*NSUf^PDA|%9U=>iaji+q?05FoIy#G~1=HO>kZB5FeT4?WA0 z+wG!JixI53jgV1==~`dOuzqI}JM+?=eRmof>MXp5*IcM|DB(h#M#!i}(wUd;ZHrdp z+-pf&ea17sso}r`g;l4mZDjvv1^N$djTXjBcgqaeV;ZbZ*Vbp&f4;UoVw|tccoz2+ z?2$`D{pIFRp9C683sr8X^%4SdMhk)i`jaIUyMC!;YNCxJ?>=C zGZ0oWNjP3};WeGDZ`S$DwGJEN$c)U2_3*Cl8jiHJ7OwR`7wZ;g3)V>2GHMsF$1&Cy z>AD1!A91b4@cMgrFv4sR7HxQReQ1qbGBGBsX|qQt>lU(Eo830O@U!;R!&ADqNUia+ zMy~Zr(%KNMrsuUQEm}QzcLOkG= zYyArG%$nV9nlv(P-R(w^_JFqDF2QTgSB~16jJ0m&+&`lkaIGb%`Sxnd{&Olw9I0m@ z+FA_O_&?Ue3pW3s1y@WFLo?LS)?l!862kgydjju=0lDI)&&A3}(2i$*~1>6~lYmEk5WdydqI>Un3#H(=)dw4#)AmX7$_Y4cEql*@e z<@=vurG_pE9~t;;dxZFj`l5qa7?xZP*!SMH{HjM3kaH2J=$C!G6<`f=c^gp`|4)}M zf)fACr#ZOZIEZg^1d%dq28?TJI)B*7$1eUS_ecWFKcve($HUM03w11p1v0F|i^EBW zBQb@%pYg^&-0z`G>dnX)6(0p#@(S20fouNDn&df|a?APWt$-slNYL%50>(q&04d^G z95=hL;!nB{j*FmYNI0$TXcrtw2gU-P1FJ!RMc8YTtt$qQzlil?*qAey$42jOnVp}q z-1`H|nSqUnXAOo3v*b_0##PZpwt*g?Gl@U!gP4bT(1M3WX@G}8!Sb*U{yBi+VH-r@ zVH$+-uqQt7uncf`7zSEA%n1@atjUvyF(H75T~PBd3l`u{q=P2&#E5-8rxW1!iZkVW zkyVZ!RMmkE@Lnph{__z*rrP2N>ox%GtFHj!79Nu;D!tDaw^RIGX6|EoE|CtAifX{BVm$}(2b!xSp%0$svw}S=tj|zQIQc)7!(GNgg~Jn zppuhQ)W5(STpye1qPc0oLz@mRjPuKJ98gB;u(X?|3=fuyc}IumrlV_o$Ti>;0N&hF z2QYjszDC`y6ZF^&A+LPxWm2)WNQHnDtRSxUXf`~8uf!y@e98D)!(~lKPqh#9vtOU$ zGAURKHATKY^|E5Dje-|wl66owy4^B!*&ScuGACK*^kk6WszYMhgA1nI-d12yplGFM14U@k1j94i!3HY$lT9N&R+E^zM}q zV+GXhj#&=r|6CL9WdN3SQZEAZuJ}gjc9-bF1DtI&JDmRFPgR8b(7;RGt{Irdg2WLN zGE0hpE_t#7cDqmb$lTxs3k*=a%%ERe%PKNF*1Fv$mN!f}3iAKLaP-e>;uKVHz&jiY zwML1&m`kEFCiL4v8f<-r#>%CuLBDN7TsNqjfYn^6X|QtX)*95@Hnb;v)HQ5vCQRq0 zVQaIcDjf`!u5x%pt=hM0yZ|E;M2(y2S$;;q!O{@LGjb98Pl33+@@V*pL6?0B`Xb|< zIr0gpZL!9j)%L>Y1qnb9OfdFA5*{CJA8S;@RkmoxuJA^sMHQltSDGWYwJ%m&fT=R0 zfJ4N#c1=U85rmlxJ~a?e)ZJQJ=fGe{KfJsSCz^^)!(5ERL)FhYErkb*!(YKn4vN|G_HZ-Zcnbl@f>SR1lHSx%* zy5d4ItI4e|_L%BnMB_;OVLMe7pPYJJLETa2O_g$0-)qUNMy<>#*zMKD>{?Q#K&DZ} z9(Iu`6Z&i#RiR7UexNfdg}zm>O8H&ZDA&_Rolj`e{$_Kk2`wJku>Nf`i#)4OT`rSe z_Vl5krjSF;UyUxR=^O)qAMfXWj zlKT!^-yyZ+>ry&j@9hxay`|_pSx5K7M4o$(#%p%#J*KMMtvR`?So#6WZ%4A!12)>R z)QRMHNv8*wux@N_R$R~O-D>M*FJSqVGT%t<=FFv-`M@&&Kz>uu-6_K{ix^94iSAi7 zQc_F5u7#)HQn-uGbsYd)zrI~ciVW{`Bte=6%zJzIi`9c^s7CJLx*ZzMseb+#fZy{R?l#+GARiv=G5mJ^ADmzQ zI$qasBVWI`+y4jUu8VU!nD^kf?{;=@&Q%WmHZEEm`Q(6MIvfc%!s{N6!vbf84iL`{ z5XVuWlgs7c@vrCbT#olr{LeTvx5>Hc+3A6Ez1<6L<0JCv^^Ba~@SIJ~70=N*KD>F? zbwz$d+YPtT0n+7axXx0}=he}{gSi8K?;COR?Kgk7%|5y3<&Zj#SeHw&gL64@x&O&& zLLukv8FNWF9Nb6gNXN5-KZwT&NWo9YJDv@s)ERFuVQP%gn&0XiKH$Y+ha?*g$>U1^1P`{Ro1 ze1z3Nr#UVquT7T{X91VX++^DWa}1}m>t0#PhI^lSPvhczduJrr%1fC@SuuRkoQd%n zIWs*3P~$e_|LJcJQ}5>gia&v^I8MvO8^Pj8kk%!oC5Y2_Xeh5d7`J_pz*k!O@9z!zT+^G3p&@5re!~smBQ~m`$B;qW6@W#C20SnU1r}*u zVz-?Znp^?gfCOw{Y1R+E6}1raAp!pikWWBiGid^N9gaZ9xP+*?h$urD!cZf=Eu8_` zh&b}HLvy4Vuc>J)ZGG9Ccw>OVU;^Mk0I_J`yhG7&wgISv%Y`P=(d7lY^C_2X<+XSS zSiC25Fb8rM!K+34Zx{~vMnni0=C%xn&_G%Sn3kD8EqE9Q;V=?h#rqEi=61|H@Joro z-*HB%Ri%^}^_dUt_Uga5YG$+V@DJMplZDu0?(65$$R zL+#?RiD+dLo2HIY_ESC8b&4n5KO&`weeu@R?FZ3xO1c2?j5% z#MT0klWY?$K2GQZxU`5o{k|b+%Xt{UjaA$& z1q>kYOB>rUL)GF*8?2+x%g{yH>x_61M-qWNMP7LCHPYd5nHyRU#4rcQO-R$hx4rVk zk!)=zfPnnn0!Sn|u$~uW2z;C=ADxs2qvz!3XM^$Zo{A1X(TJYpKjUY71pdT;FR1`ax0@%K^N-=R z5;x%lz^|3JN|jfH*av?QZRotZLweb@p`OgoW^Bx6;BF5q{ z6xC{!?j-I`dXwIyU(he0x3U$CxXBj*Fbtd6irX}afm*~@yv4^dY=X3_OZr~Ys+5S? zCFS}d(I>PCWjvl>=-^Us_#4iGsnX&3smG2$@~Nqo5J6idaK9 z1g%=Th@Otdt0p4qbGd{*ty@H24L*sEY}jA?VU;dF3pwR9O?KVSEh1Q6PHK{9Q*9%r zv+8I>V~8pGbXuWPCnK^6g<6|TO(j<~2~}3DQZ8i_teTWkrMGm5a+|JgCrS9G#C#SH7p(!s|ZG9XE6?DFr4U! zfdCju00I!A03ZMW0Z9PPdCELcdO<=|hJ9GJF$~2veyKGPlQx$XYNS3x5UJokSV$=N zaJ@ISN+J^Ym>pf;cEyz8N6{5*r3XB^)f$G8@}INMz_8Le(UsuCJLn(S_ii(w_LN-$ z=nj8jFy*0o(Od?h?Z=@V49o2SZ90U8m%hLzg96G=Pmy-A7T(lAb{!USrZi|s$HMmt zX^{rtOx(Tz4&eeR)y9f$(VGRXZgH_N?TSO`yqJL*^?iW*8s?}1X+ z9p`vQ9eF`X_l|Py*+WqdJ@ddbH3t<{Ca@yJEpjD6byZ;I)yB&lw7h#7g zC_z!GbXSs3Bm>6f0+Jef?L^9h5n&`fN@#?LQ4TMhR1Y2iXA~?jt}9L@TYm=+Pb%t} zIfR@$Y`QOWrP4H1Nv1~fnA6U;a~V<1X{FNzc=P(2ub*Fla4Cxtm7SJ`V&?qURl-l4 zu+-U5SWNlp@RAGLMnM3@$f zl3Ns>1j;<&&i4{%LkbfKl!MNGhk)9%xI9O=@Jn+zu~D|TX3>K1ovw^0f5yfJmqwpp z8R$-iOobBP_9oTcXK?k}Hsmmpg5_edFo^=9^%utnywTYkfiu za=)xH>aF{&I*hW4#zTP5tZ64_+lg4(zoTSywQA)O}bPk)ij}lqkb({Fam_VjAa5~>zq#$IRT%l;R&#KJV^q1%3!ey3bO#jc8b_g?aqUWK+LID zIanjS4*i>g1>pWW7q@IBTqfIj@!biNW&6*tUXX32ty?O~<;HYf`h>ukE&>EU5zs+E z9eULB7H6~xeBmWYg@Sef<3M7JT+G5ii8&Q(<%O*?BfawHx0MGG-Nxi(tcGGmf-f4g zG62;^@I%liGjbzG_JHj(3zvNW3jicqk6yc2p4m3&7V3_b?Xl`5BMCu6l4*(|bP^1X zV@LS{28fVpdfeEA-(V@#L#a|tbuhbiWFL4F zBLxwr*&*mG;eT4+-vLh?Q7=?GyB*9)eLTp&lf0x!x!)YMzhLwX?W+X0tdY`*%>jNFf+2QGo499| zF?r&e&`F5!NNNR5!1;k|pGaX3HT2{AXXyed57fmV+_xiIn1bi_<#MBeFuW)#dBOSU zd(f2iG%@Cw?ZVIrqFw}GC?goE_W!zff*}D4q_q?Q@(~txh8zK<2wO|IeSNQ`T^4%i zzBT>#Rs|xa-5x$-Er0_5J7qyO41i76(*ef88Z0>*2L9ZHvz_1F2W=0iX% zwP#!@eX>%JWw9_9e?ON z8MXyTMrgpZKBbqFqW~ppGwll{Vsa#)V~;SQ?#>m5L)|LBp;Kjtf4|~j3{jvHd@*5^ znzEXB&xU_b#)4R8%1A~Y;#}a~Tni_047YL%V6kD#e^DLk$PA54oJZ;o6ITVeUL4o>h#sI=Wwzocj5V1nq)8(sw zpAxpE|7{N3_RqmCzdOI-$V6aS=6rOgZyrQvuk6lRjo8OYzA zER0{1bW{|l@rl?4E20ie1|c&^f0*Kq0VAxxr2RC1ZdQjG3X9Q?;eQ#?e3HqO<#=2} z)uF~K!~4O#oiMcn$)QIDv+Vlz@mc^AzS~61g{rs*PVu;oK)*fxk642ow!ff{Ld2z^ z!n~+mVbpJu41X+XHAQyF;3s_2)ryrpy3yQX-}*-Ds)*mJPMFfZz&Yw^F45ktMy zLf-1HmJ6B_Uc`e4>A_P?s@u%NCN{&8^`U-~Oi4PzcNdOwPr&8BI~mM;`-8v>vxQ%k zb`=>-&x^-?+my1ljBt$Qx&Q}oM!E!toU>{PgZ6sgmDm*_?JgGK3(`E~uOismgRrOs zG;pSHBHpcUw&D=(KzDk{vxu*Wdn|tN3pfuszHurPCF@zhe}R%PQqvx zbnlIS3oU0b{->-g`X1e<89h>XD({5_6glJ^DgGIVJ^73kwLiNseFbZ$ zZW6rW%zg)6$aAuZ>2(Fr%+3)PD_cLxZe_O+)$X+XRMM+yB^>o9fHeUXyk=w##XC4Y zBeYeN38ozSWF)_FAgzQtYxrL3Ya{3}N=CSh25;=)O1_tTXGWIx7RaEei5$KVgGVnU z|6>F{EtsTW&8&rF;jBh9B*Gwh98ZR(Qut+&5xtm2owWCv#N_b6i)Na3s=E%lso(Xqig5_ zOof4^Z9qcBxeU-(VhqY!NOQE|(86+gQdSgOe)49A=z~x#k1j3yf*n-l-Hzv>90D$3 zR5oDTF=^X7`N{K?kuD<9m3x)|IZgCSykPcQ`aIrX2u-7Bia$c<`Xmz6$;dq=In_CU zR*_Jjd22;D5+F1tr|b40Gj6LaqLP+WuxzvdO{`V+{IVSLe|g`MWM#4jlq{Sgsa2vA z5j=tASPxyy4l>A#QbV!a0-ya(){*Tmmh`x71nYYLbE_&3bmRSovnopSK zRc{2c+MoU?SOFiAi5qLD%i%0OX{^Q3aX!oEPd`Sv4_jS}bnxs!=&|*hpW=S64S$su z=`ErTz>(9~#Mkna~f`zp@<2ePwc3 z`O-Dd+1IxC^Qd6PeFUmgDRoa=M;o`x#?o!i20DEN$y&$0A6xc^TsqE-Yyg_-u@h-J zj(QmJT$0|Hi**Fe;J(9~wBJD{-;08yvELdinH(dGy6BWhBKV}^)1gZYbak#mHiC=k z_LpnB76+1}^@>ntiDzJq6po-il0oz!p2%Hh!1wIt&qHk!K;ot}CBQT8G}hzI^aPc~ zyt{ajUFfHtFWz!5mI0QMnigtwCJrPOO7 zcU)w19V1O%?GT3p>K1T-6!~iyQBefRaf#SARDci_21igLVxV1x(TZVo&Yz>bZaVLV z<0gmo!WoYwtEL=TuBPn2YOlV=Bs@n^V1;1Fe4=#f)}g-wEBhO~9OjlK_gEk7_1}UER*=D$}n6Rb4WkiXmV#9gho}l$}D+xc3Eq3#Y!LF_(+?)(%UdK zrE;HFq&oHNhG76=KpmPOe{sNkWmL+1YW0?ykd$0mnT%RKtoW)~&qzmLRm;&Vzi}r9 zvBH*O)LyNq&N)sr>r69o#Qar)X_c6OQ^HFWQ{rt2V*R^MBtAsb4`Ir$8Q|_ov7T=j zLl8amvhe+S{ume1JFrzDFdB{YT$0u|+R|_z#CA7liF4&FSO?z3!ZyS`n>l9E^QR*? z4hg#3?rGy$9-Z1RS2C%8-TtqY_uD|-nExdMvWn4xUw&hPdX+m$gv>k@G?Uj)qOCA1 z4R)lt$)C`*`?4KOY~v_dy#BlhMN&p@mLQI z1Z>|9o+9H8u^EKH@)#85C=C_=E>gk|jJ7+AS#mDzD=vP?$LP%i9^NgQeMb223r-t! z(_qDX61Gv-pjvbJSAlDR=^Aty`D}#yf^GE{NPl9<8v&JX%_9|@VXV{Ziqr>NgDLOd zmBd8S=dGUu;6wrQi1AaRD0z6*XedT-jS(thO*x$37N_1R^E(^FV7|Lt=NFr#lN|2e zbTi!O>26AckSvY|Wv`eGaagF{k2KL#=3|lc!OGl#3}!1LrvIYI1kLbIqalUB#&s=& mn3(VG%Z^w$qqEz64HQzq`xfT&YZ$?#qAoy=_lwf!=a;@HUUNJE literal 0 HcmV?d00001 diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py new file mode 100644 index 000000000..79c795005 --- /dev/null +++ b/tests/test_debug_info.py @@ -0,0 +1,2671 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + + +import binascii +import contextlib +import http.server +import os +import os.path +from pathlib import Path +import re +import shutil +import socket +import socketserver +import tempfile +import threading +import unittest +import unittest.mock + +from _drgn_util.elf import ET, PT, SHF, SHT +from drgn import ( + MainModule, + MissingDebugInfoError, + ModuleFileStatus, + Program, + SharedLibraryModule, + SupplementaryFileKind, + VdsoModule, +) +from tests import TestCase, modifyenv +from tests.dwarfwriter import compile_dwarf +from tests.elfwriter import ElfSection, create_elf_file +from tests.resources import get_resource + + +def gnu_debuglink_section(path, crc): + path = os.fsencode(path) + return ElfSection( + name=".gnu_debuglink", + sh_type=SHT.PROGBITS, + data=path + bytes(4 - len(path) % 4) + crc.to_bytes(4, "little"), + ) + + +def gnu_debugaltlink_section(path, build_id): + return ElfSection( + name=".gnu_debugaltlink", + sh_type=SHT.PROGBITS, + data=os.fsencode(path) + b"\0" + build_id, + ) + + +ALLOCATED_SECTION = ElfSection( + name=".bss", + sh_type=SHT.PROGBITS, + sh_flags=SHF.ALLOC, + p_type=PT.LOAD, + vaddr=0x10000000, + memsz=0x1000, +) + + +@contextlib.contextmanager +def NamedTemporaryElfFile(*, loadable=True, debug=True, build_id=None, sections=()): + if loadable: + sections = (ALLOCATED_SECTION,) + sections + with tempfile.NamedTemporaryFile() as f: + if debug: + f.write(compile_dwarf((), sections=sections, build_id=build_id)) + else: + f.write(create_elf_file(ET.EXEC, sections=sections, build_id=build_id)) + f.flush() + yield f + + +class TestModuleTryFile(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_enabled_debug_info_finders([]) + + def test_want_both(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + for status in set(ModuleFileStatus) - {ModuleFileStatus.HAVE}: + for file in ("loaded", "debug"): + with self.subTest(file=file): + self.assertEqual(getattr(module, f"wants_{file}_file")(), False) + # Test that we can't unset the file once it's set. + status_attr = file + "_file_status" + with self.subTest(from_=ModuleFileStatus.HAVE, to=status): + self.assertRaises( + ValueError, setattr, module, status_attr, status + ) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.HAVE + ) + + def test_want_both_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_want_both_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_want_both_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_debug(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_only_want_debug_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_only_want_debug_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_debug_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_want_neither(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_separate_files_loaded_first(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(debug=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + with NamedTemporaryElfFile(loadable=False) as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_separate_files_debug_first(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(loadable=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + with NamedTemporaryElfFile(debug=False) as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f2.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + def test_loadable_then_both(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(debug=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + with NamedTemporaryElfFile() as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_debug_then_both(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(loadable=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + with NamedTemporaryElfFile() as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f2.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + def test_no_build_id_force(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile() as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_no_build_id_file_has_build_id(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_no_build_id_file_has_build_id_force(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_match(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_match_force(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_mismatch(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_mismatch_force(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_missing(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_missing_force(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile() as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(alt_path, alt_build_id), + ), + build_id=build_id, + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + module.try_file(binary_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.wants_debug_file(), True) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + with self.assertRaises(ValueError): + module.debug_file_status = ModuleFileStatus.HAVE + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + module.debug_file_status = ModuleFileStatus.WANT_SUPPLEMENTARY + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + module.try_file(alt_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual( + module.supplementary_debug_file_kind, + SupplementaryFileKind.GNU_DEBUGALTLINK, + ) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + def test_gnu_debugaltlink_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id[::-1])) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(alt_path, alt_build_id), + ), + build_id=build_id, + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + module.try_file(binary_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + module.try_file(alt_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + def test_gnu_debugaltlink_then_both(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + with NamedTemporaryElfFile( + sections=(gnu_debugaltlink_section(alt_path, alt_build_id),), + build_id=build_id, + ) as f1: + module.try_file(f1.name) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + with NamedTemporaryElfFile(build_id=build_id) as f2: + module.try_file(f2.name) + + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_gnu_debugaltlink_cancel(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + with NamedTemporaryElfFile( + sections=(gnu_debugaltlink_section(alt_path, alt_build_id),), + build_id=build_id, + ) as f: + module.try_file(f.name) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + module.debug_file_status = ModuleFileStatus.WANT + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.wants_debug_file(), True) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + def test_extra_module_no_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertIsNone(module.address_range) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + + def test_extra_module_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.address_range = (0x40000000, 0x40001000) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.address_range, (0x40000000, 0x40001000)) + self.assertEqual(module.loaded_file_bias, 0x30000000) + self.assertEqual(module.debug_file_bias, 0x30000000) + + +class TestLinuxUserspaceCoreDump(TestCase): + def setUp(self): + self.prog = Program() + self.prog.debug_info_path = None + self.prog.set_enabled_debug_info_finders(["standard"]) + + def test_loaded_modules(self): + self.prog.set_core_dump(get_resource("crashme.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme") + self.assertEqual(module.address_range, (0x400000, 0x404010)) + self.assertEqual( + module.build_id.hex(), "99a6524c4df01fbff9b43a6ead3d8e8e6201568b" + ) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F6112CACE08 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CA9000, 0x7F6112CAD010)) + self.assertEqual( + module.build_id.hex(), "7bd58f10e741c3c8fbcf2031aa65f830f933d616" + ) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7F6112C94960) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112AAE000, 0x7F6112C9EB70)) + self.assertEqual( + module.build_id.hex(), "77c77fee058b19c6f001cf2cb0371ce3b8341211" + ) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F6112CEAE68 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CB6000, 0x7F6112CEC2D8)) + self.assertEqual( + module.build_id.hex(), "91dcd0244204201b616bbf59427771b3751736ce" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F6112CB4438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CB4000, 0x7F6112CB590F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def _try_vdso_in_core(self, module): + module.debug_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_bias(self): + self.prog.set_core_dump(get_resource("crashme.core")) + + for _ in self.prog.loaded_modules(): + pass + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme")) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F6112CACE08 + ) + module.try_file(get_resource("crashme.so")) + self.assertEqual(module.loaded_file_bias, 0x7F6112CA9000) + self.assertEqual(module.debug_file_bias, 0x7F6112CA9000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F6112CB4438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7F6112CB4000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_pie(self): + self.prog.set_core_dump(get_resource("crashme_pie.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_pie") + self.assertEqual(module.address_range, (0x557ED343D000, 0x557ED3441018)) + self.assertEqual( + module.build_id.hex(), "eb4ad7aaded3815ab133a6d7784a2c95a4e52998" + ) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7FAB2C38DE08 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C38A000, 0x7FAB2C38E010)) + self.assertEqual( + module.build_id.hex(), "7bd58f10e741c3c8fbcf2031aa65f830f933d616" + ) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7FAB2C375960) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C18F000, 0x7FAB2C37FB70)) + self.assertEqual( + module.build_id.hex(), "77c77fee058b19c6f001cf2cb0371ce3b8341211" + ) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7FAB2C3CBE68 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C397000, 0x7FAB2C3CD2D8)) + self.assertEqual( + module.build_id.hex(), "91dcd0244204201b616bbf59427771b3751736ce" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FAB2C395438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C395000, 0x7FAB2C39690F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_pie(self): + self.prog.set_core_dump(get_resource("crashme_pie.core")) + + for _ in self.prog.loaded_modules(): + pass + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_pie")) + self.assertEqual(module.loaded_file_bias, 0x557ED343D000) + self.assertEqual(module.debug_file_bias, 0x557ED343D000) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7FAB2C38DE08 + ) + module.try_file(get_resource("crashme.so")) + self.assertEqual(module.loaded_file_bias, 0x7FAB2C38A000) + self.assertEqual(module.debug_file_bias, 0x7FAB2C38A000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FAB2C395438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FAB2C395000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_static(self): + self.prog.set_core_dump(get_resource("crashme_static.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_static") + self.assertEqual(module.address_range, (0x400000, 0x4042B8)) + self.assertEqual( + module.build_id.hex(), "a0b6befad9f0883c52c475ba3cee9c549cd082cf" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FBC73A66438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FBC73A66000, 0x7FBC73A6790F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_static(self): + self.prog.set_core_dump(get_resource("crashme_static.core")) + + for _ in self.prog.loaded_modules(): + pass + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_static")) + self.assertEqual(module.loaded_file_bias, 0x0) + self.assertEqual(module.debug_file_bias, 0x0) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FBC73A66438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FBC73A66000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_static_pie(self): + self.prog.set_core_dump(get_resource("crashme_static_pie.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_static_pie") + self.assertEqual(module.address_range, (0x7FD981DC9000, 0x7FD981DCD278)) + self.assertEqual( + module.build_id.hex(), "3e0bc47f80d7e64724e11fc021a251ed0d35bc2c" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FD981DC7438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FD981DC7000, 0x7FD981DC890F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_static_pie(self): + self.prog.set_core_dump(get_resource("crashme_static_pie.core")) + + for _ in self.prog.loaded_modules(): + pass + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_static_pie")) + self.assertEqual(module.loaded_file_bias, 0x7FD981DC9000) + self.assertEqual(module.debug_file_bias, 0x7FD981DC9000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FD981DC7438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FD981DC7000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_pie_no_headers(self): + self.prog.set_core_dump(get_resource("crashme_pie_no_headers.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + # Without ELF headers saved in the core dump, and without the main ELF + # file, only the main module (with limited information) and vDSO can be + # found. + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_pie") + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F299F607438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F299F607000, 0x7F299F60890F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + # If we can read the file headers (specifically, the program header + # table and the interpreter path), then we should be able to get all of + # the modules (with limited information). + exe_file = self.enterContext(open(get_resource("crashme_pie"), "rb")) + + def read_headers(address, count, offset, physical): + exe_file.seek(offset) + return exe_file.read(count) + + self.prog.add_memory_segment(0x5623363D6000, 4096, read_headers, False) + + old_loaded_modules = [] + new_loaded_modules = [] + for module, new in self.prog.loaded_modules(): + (new_loaded_modules if new else old_loaded_modules).append(module) + new_found_modules = [] + + with self.subTest(module="main2"): + module = self.prog.main_module() + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7F299F5E7960) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F299F63DE68 + ) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + self.assertCountEqual(old_loaded_modules, loaded_modules) + self.assertCountEqual(new_loaded_modules, new_found_modules) + + +class TestLoadDebugInfo(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_core_dump(get_resource("crashme.core")) + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_nothing(self): + self.prog.load_debug_info(None, default=False, main=False) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_empty_list(self): + self.prog.load_debug_info([], default=False, main=False) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_no_such_file(self): + with tempfile.TemporaryDirectory() as tmp_dir: + self.prog.load_debug_info([Path(tmp_dir) / "file"]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_not_elf(self): + with tempfile.NamedTemporaryFile() as f: + f.write(b"hello, world\n") + f.flush() + self.prog.load_debug_info([f.name]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_no_build_id(self): + with NamedTemporaryElfFile() as f: + self.prog.load_debug_info([f.name]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_only_main_path(self): + crashme_path = get_resource("crashme") + + self.prog.load_debug_info([crashme_path]) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_only_paths(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + self.prog.load_debug_info([crashme_path, crashme_so_path]) + + modules = list(self.prog.modules()) + # All loaded modules should be created. + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_by_path(self): + crashme_path = get_resource("crashme") + + self.prog.load_debug_info([crashme_path], main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_by_finder(self): + crashme_path = get_resource("crashme") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and set the file for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_default_by_paths(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_path, crashme_so_path], + default=True, + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + missing_modules = [] + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + missing_modules.append(module) + self.assertEqual(len(missing_modules), 3) + # The finder should be called for the rest. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], missing_modules) + + def test_default_by_finder(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_path) + elif module.name == "/home/osandov/crashme.so": + module.try_file(crashme_so_path) + + self.finder.side_effect = finder + + self.assertRaises( + MissingDebugInfoError, self.prog.load_debug_info, default=True + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The finder should be called and set the files for the matching + # modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + # The finder should be called for all loaded modules. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], modules) + + def test_main_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + self.prog.load_debug_info([crashme_dwz_path, crashme_alt_path], main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided paths should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_dwz_path) + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and set the files for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_path_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + self.finder.side_effect = finder + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_dwz_path], + main=True, + ) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided path should be used for the loaded file. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + # The finder should be called and fail to find the supplementary file + # for the main module. + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.assertEqual( + self.prog.main_module().wanted_supplementary_debug_file()[:3], + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(crashme_dwz_path), + "crashme.alt", + ), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_finder_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + module.try_file(crashme_dwz_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + self.finder.side_effect = finder + + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and set the loaded file for the main + # module but fail to find the supplementary file. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.assertEqual( + self.prog.main_module().wanted_supplementary_debug_file()[:3], + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(crashme_dwz_path), + "crashme.alt", + ), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_path_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if ( + module.name == "/home/osandov/crashme" + and module.debug_file_status == ModuleFileStatus.WANT_SUPPLEMENTARY + ): + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info([crashme_dwz_path], main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + # The finder should be called and set the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_finder_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_dwz_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info([crashme_alt_path], main=True) + + # The provided path should be used for the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # The finder should be called and set the file for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_wants_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + self.prog.load_debug_info([crashme_alt_path], main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The provided path should be used for the supplementary file. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_wants_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + def finder(modules): + for module in modules: + if ( + module.name == "/home/osandov/crashme" + and module.debug_file_status == ModuleFileStatus.WANT_SUPPLEMENTARY + ): + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and set the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_wants_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and fail to find the supplementary file + # for the main module, but the supplementary file should still be + # wanted. + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_default_gnu_debugaltlink_by_paths(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_so_dwz_path = get_resource("crashme.so.dwz") + crashme_alt_path = get_resource("crashme.alt") + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_dwz_path, crashme_so_dwz_path, crashme_alt_path], + default=True, + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_dwz_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_dwz_path), + ) + self.assertEqual( + crashme_so_module.supplementary_debug_file_path, + str(crashme_alt_path), + ) + # The rest should not have a file. + missing_modules = [] + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + missing_modules.append(module) + self.assertEqual(len(missing_modules), 3) + # The finder should be called for the rest. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], missing_modules) + + def test_dont_want(self): + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.loaded_file_status = ModuleFileStatus.DONT_WANT + module.debug_file_status = ModuleFileStatus.DONT_WANT + break + # DONT_WANT should be reset to WANT. + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_dont_need(self): + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.loaded_file_status = ModuleFileStatus.DONT_NEED + module.debug_file_status = ModuleFileStatus.DONT_NEED + break + # DONT_NEED should be preserved. + self.prog.load_debug_info(main=True) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_NEED) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_NEED) + self.finder.assert_not_called() + + def test_unmatched(self): + self.prog.load_debug_info([get_resource("crashme_static")]) + modules = list(self.prog.modules()) + # All loaded modules should be created. + self.assertEqual(len(modules), 5) + # None of them should have files. + for module in modules: + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.finder.assert_not_called() + + +class TestLoadDebugInfoCoreNoHeaders(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_core_dump(get_resource("crashme_pie_no_headers.core")) + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_main_by_finder(self): + crashme_pie_path = get_resource("crashme_pie") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme_pie": + module.try_file(crashme_pie_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # Only the main module should be created. + self.assertEqual(list(self.prog.modules()), [self.prog.main_module()]) + # The finder should be called and set the files, address range, and + # build ID for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().address_range, (0x5623363D6000, 0x5623363DA018) + ) + self.assertEqual( + self.prog.main_module().build_id.hex(), + "eb4ad7aaded3815ab133a6d7784a2c95a4e52998", + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + @unittest.expectedFailure # Issue #291 + def test_default_by_finder(self): + crashme_pie_path = get_resource("crashme_pie") + crashme_so_path = get_resource("crashme.so") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme_pie": + module.try_file(crashme_pie_path) + elif module.name == "/home/osandov/crashme.so": + module.try_file(crashme_so_path) + else: + module.loaded_file_status = ModuleFileStatus.DONT_NEED + module.debug_file_status = ModuleFileStatus.DONT_NEED + + self.finder.side_effect = finder + + self.prog.load_debug_info(default=True) + + # All loaded modules should be created (except ld-linux.so; see + # tests.test_module.TestLinuxUserspaceCoreDump.test_loaded_modules_pie_no_headers). + self.assertCountEqual( + list(self.prog.modules()), + [ + self.prog.main_module(), + self.prog.vdso_module("linux-vdso.so.1", 0x7F299F607438), + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ), + self.prog.shared_library_module("/lib64/libc.so.6", 0x7F299F5E7960), + self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F299F63DE68 + ), + ], + ) + # The finder should be called and set the files, address range, and + # build ID for the main and crashme.so modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().address_range, (0x5623363D6000, 0x5623363DA018) + ) + self.assertEqual( + self.prog.main_module().build_id.hex(), + "eb4ad7aaded3815ab133a6d7784a2c95a4e52998", + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).debug_file_path, + str(crashme_so_path), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).address_range, + (0x7F299F5FC000, 0x7F299F600010), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).build_id.hex(), + "7bd58f10e741c3c8fbcf2031aa65f830f933d616", + ) + self.finder.assert_called() + + +class TestLoadModuleDebugInfo(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_empty(self): + self.prog.load_module_debug_info() + self.finder.assert_not_called() + + def test_multiple(self): + self.prog.load_module_debug_info( + self.prog.extra_module("/foo/bar", create=True)[0], + self.prog.extra_module("/foo/baz", create=True)[0], + ) + self.finder.assert_called_once() + self.assertCountEqual( + self.finder.call_args[0][0], + [ + self.prog.extra_module("/foo/bar"), + self.prog.extra_module("/foo/baz"), + ], + ) + + def test_wrong_program(self): + self.assertRaisesRegex( + ValueError, + "module from wrong program", + self.prog.load_module_debug_info, + self.prog.extra_module("/foo/bar", create=True)[0], + Program().extra_module("/foo/baz", create=True)[0], + ) + + def test_type_error(self): + self.assertRaises( + TypeError, + self.prog.load_module_debug_info, + self.prog.extra_module("/foo/bar", create=True)[0], + None, + ) + + +class TestStandardDebugInfoFinder(TestCase): + def setUp(self): + self.prog = Program() + self.prog.debug_info_path = None + self.prog.set_enabled_debug_info_finders(["standard"]) + + def test_by_module_name(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_path, f.name) + + def test_by_module_name_with_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile(build_id=build_id) as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_path, f.name) + + def test_by_module_name_missing_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_by_module_name_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile(build_id=build_id[::-1]) as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_reuse_loaded_file(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.debug_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + + module.debug_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_reuse_debug_file(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + module.loaded_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_reuse_wanted_supplementary_debug_file(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + ) as f: + module = self.prog.extra_module(f.name, create=True)[0] + module.loaded_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY) + + module.loaded_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY) + + def test_vdso_in_core(self): + self.prog.set_core_dump(get_resource("crashme.core")) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, VdsoModule): + break + else: + self.fail("vDSO module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, "[vdso]") + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_main_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + break + else: + self.fail("main module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_vdso_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, VdsoModule): + break + else: + self.skipTest("vDSO module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, "[vdso]") + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_shared_library_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, SharedLibraryModule): + break + else: + self.skipTest("shared library module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_by_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + binary_path = build_id_dir / build_id.hex()[2:] + binary_path.write_bytes(compile_dwarf((), sections=(ALLOCATED_SECTION,))) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + + self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + + def test_by_build_id_separate(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + loadable_path = build_id_dir / build_id.hex()[2:] + loadable_path.write_bytes( + create_elf_file(ET.EXEC, sections=(ALLOCATED_SECTION,)) + ) + debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") + debug_path.write_bytes(compile_dwarf(())) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + + self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_build_id_from_loaded(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, sections=(ALLOCATED_SECTION,), build_id=build_id + ) + ) + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") + debug_path.write_bytes(compile_dwarf(())) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + + self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink(self): + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + debug_file_contents = compile_dwarf(()) + crc = binascii.crc32(debug_file_contents) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=( + ALLOCATED_SECTION, + gnu_debuglink_section("binary.debug", crc), + ), + ) + ) + + self.prog.debug_info_path = ":.debug:" + str(debug_dir) + for i, debug_path in enumerate( + ( + bin_dir / "binary.debug", + bin_dir / ".debug" / "binary.debug", + debug_dir / bin_dir.relative_to("/") / "binary.debug", + ) + ): + with self.subTest(debug_path=debug_path): + try: + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module( + bin_dir / "binary", i, create=True + )[0] + + self.prog.load_module_debug_info(module) + self.assertEqual( + module.loaded_file_status, ModuleFileStatus.HAVE + ) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.HAVE + ) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + finally: + try: + debug_path.unlink() + except FileNotFoundError: + pass + + def test_by_gnu_debuglink_absolute(self): + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + debug_file_contents = compile_dwarf(()) + crc = binascii.crc32(debug_file_contents) + debug_path = debug_dir / "binary.debug" + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=( + ALLOCATED_SECTION, + gnu_debuglink_section(debug_path, crc), + ), + ) + ) + + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink_crc_mismatch(self): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + debug_file_contents = compile_dwarf(()) + crc = binascii.crc32(debug_file_contents) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=( + ALLOCATED_SECTION, + gnu_debuglink_section("binary.debug", crc ^ 1), + ), + ) + ) + + debug_path = bin_dir / "binary.debug" + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.debug_info_path = "" + self.prog.load_module_debug_info(module) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_invalid_gnu_debuglink(self): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=( + ALLOCATED_SECTION, + ElfSection( + name=".gnu_debuglink", sh_type=SHT.PROGBITS, data=b"foo" + ), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + + def test_gnu_debugaltlink_absolute(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(alt_path, alt_build_id), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_gnu_debugaltlink_not_found(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(debug_dir / "alt.debug", alt_build_id), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(debug_dir / "alt.debug"), + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + def test_only_gnu_debugaltlink_absolute(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(alt_path, alt_build_id), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.try_file(binary_path) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_only_gnu_debugaltlink_not_found(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(debug_dir / "alt.debug", alt_build_id), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.try_file(binary_path) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(debug_dir / "alt.debug"), + alt_build_id, + ), + ) + + def test_gnu_debugaltlink_relative(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section( + Path(os.path.relpath(alt_path, bin_dir)), alt_build_id + ), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_gnu_debugaltlink_debug_directories(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / ".dwz/alt.debug" + alt_path.parent.mkdir() + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + + self.prog.debug_info_path = ":.debug:" + str(debug_dir) + for i, debugaltlink in enumerate( + ( + bin_dir / "debug/.dwz/alt.debug", + Path("debug/.dwz/alt.debug"), + ) + ): + with self.subTest(debugaltlink=debugaltlink): + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(debugaltlink, alt_build_id), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", i, create=True)[ + 0 + ] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual( + module.supplementary_debug_file_path, str(alt_path) + ) + + def test_gnu_debugaltlink_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id[::-1])) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + gnu_debugaltlink_section(alt_path, alt_build_id), + ), + build_id=build_id, + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + def test_invalid_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + compile_dwarf( + (), + sections=( + ALLOCATED_SECTION, + ElfSection( + name=".gnu_debugaltlink", + sh_type=SHT.PROGBITS, + data=b"foo", + ), + ), + build_id=build_id, + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + +class _DebuginfodHTTPHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + match = re.fullmatch( + r"/buildid/((?:[0-9a-fA-F][0-9a-fA-F])+)/(executable|debuginfo)", self.path + ) + if not match: + self.send_error(http.HTTPStatus.BAD_REQUEST) + return + + build_id = bytes.fromhex(match.group(1)) + type = match.group(2) + + try: + file_path = self.server.build_ids[build_id][type] + except KeyError: + self.send_error(http.HTTPStatus.NOT_FOUND) + return + + try: + f = open(file_path, "rb") + except OSError: + self.send_error(http.HTTPStatus.INTERNAL_SERVER_ERROR) + return + + with f: + self.send_response(http.HTTPStatus.OK) + st = os.fstat(f.fileno()) + self.send_header("Content-Type", "application/octet-stream") + self.send_header("Content-Length", str(st.st_size)) + self.send_header("X-Debuginfod-Size", str(st.st_size)) + self.send_header("Last-Modified", self.date_time_string(st.st_mtime)) + self.end_headers() + shutil.copyfileobj(f, self.wfile) + + +class TestDebuginfodDebugInfoFinder(TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.server = socketserver.TCPServer(("localhost", 0), _DebuginfodHTTPHandler) + cls.server.build_ids = {} + cls.server_thread = threading.Thread( + target=cls.server.serve_forever, daemon=True + ) + cls.server_thread.start() + + @classmethod + def tearDownClass(cls): + # By default, serve_forever() only checks if it should shut down every + # 0.5 seconds. Shutting down the socket makes it check immediately. + cls.server.socket.shutdown(socket.SHUT_RD) + cls.server.shutdown() + cls.server_thread.join() + + def setUp(self): + self.prog = Program() + try: + self.prog.set_enabled_debug_info_finders(["debuginfod"]) + except ValueError: + self.skipTest("no debuginfod support") + + self.server.build_ids.clear() + self.cache_dir = Path( + self.enterContext(tempfile.TemporaryDirectory(prefix="debuginfod-cache-")) + ) + self.enterContext( + modifyenv( + { + "DEBUGINFOD_URLS": "http://{}:{}/".format( + *self.server.server_address + ), + "DEBUGINFOD_CACHE_PATH": str(self.cache_dir), + } + ) + ) + + def test_no_build_id(self): + module = self.prog.extra_module("foo", create=True)[0] + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_separate(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + + def test_no_servers(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file, modifyenv( + {"DEBUGINFOD_URLS": None} + ): + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_cache_hit(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file: + self.server.build_ids[build_id] = {"debuginfo": debug_file.name} + + for i in range(2): + module = self.prog.extra_module("foo", i, create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + + def test_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, + debug=True, + build_id=build_id, + sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + ) as debug_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=alt_build_id + ) as alt_f: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} + + module = self.prog.extra_module("foo", create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + self.assertEqual( + module.supplementary_debug_file_path, + str(self.cache_dir / alt_build_id.hex() / "debuginfo"), + ) + + def test_gnu_debugaltlink_not_found(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, + debug=True, + build_id=build_id, + sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + ) as debug_file: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True)[0] + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(self.cache_dir / build_id.hex() / "debuginfo"), + "alt.debug", + alt_build_id, + ), + ) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + + def test_only_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + build_id=build_id, + sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + ) as f, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=alt_build_id + ) as alt_f: + self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} + + module = self.prog.extra_module("foo", create=True)[0] + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, f.name) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual( + module.supplementary_debug_file_path, + str(self.cache_dir / alt_build_id.hex() / "debuginfo"), + ) + + def test_only_gnu_debugaltlink_not_found(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + build_id=build_id, + sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + ) as f: + module = self.prog.extra_module("foo", create=True)[0] + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + f.name, + "alt.debug", + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, f.name) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 7658cd67f..f2f0e7187 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -202,12 +202,16 @@ labeled_float_die = (DwarfLabel("float_die"), float_die) +def add_extra_dwarf(prog, path): + prog.extra_module(path, create=True)[0].try_file(path, force=True) + + def dwarf_program(*args, segments=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: f.write(compile_dwarf(*args, **kwds)) f.flush() - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) if segments is not None: add_mock_memory_segments(prog, segments) @@ -6909,7 +6913,7 @@ def test_dwo4(self): ) ) ) - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) def test_dwo4_not_found(self): @@ -6937,7 +6941,12 @@ def test_dwo4_not_found(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -6989,7 +6998,12 @@ def test_dwo4_id_mismatch(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -7034,7 +7048,7 @@ def test_dwo5(self): version=5, ) ) - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) def test_dwo5_not_found(self): @@ -7059,7 +7073,12 @@ def test_dwo5_not_found(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -7105,7 +7124,12 @@ def test_dwo5_id_mismatch(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output diff --git a/tests/test_module.py b/tests/test_module.py new file mode 100644 index 000000000..2ff5c1c89 --- /dev/null +++ b/tests/test_module.py @@ -0,0 +1,489 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from pathlib import Path + +from drgn import ( + ExtraModule, + MainModule, + ModuleFileStatus, + Program, + RelocatableModule, + SharedLibraryModule, + VdsoModule, +) +from tests import TestCase + + +class IntWrapper: + def __init__(self, value): + self._value = value + + def __index__(self): + return self._value + + +class TestModule(TestCase): + def _test_module_init_common(self, module): + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertIsNone(module.loaded_file_bias) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.debug_file_bias) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + + def test_main_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.main_module) + self.assertRaises(LookupError, prog.main_module, "/foo/bar") + + module, new = prog.main_module("/foo/bar", create=True) + self.assertIsInstance(module, MainModule) + self.assertEqual(new, True) + + self.assertEqual(prog.main_module(), module) + self.assertEqual(prog.main_module(create=False), module) + self.assertEqual(prog.main_module("/foo/bar"), module) + self.assertEqual(prog.main_module(b"/foo/bar"), module) + self.assertEqual(prog.main_module(Path("/foo/bar")), module) + self.assertEqual(prog.main_module("/foo/bar", create=True), (module, False)) + + self.assertRaises(LookupError, prog.main_module, "/foo/baz") + self.assertRaises(LookupError, prog.main_module, "/foo/baz", create=True) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self._test_module_init_common(module) + + def test_main_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.main_module, None) + self.assertRaises(TypeError, prog.main_module, create=True) + self.assertRaises(TypeError, prog.main_module, "/foo/bar", True) + + def test_shared_library_module(self): + prog = Program() + + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/bar", 0x10000000 + ) + + module, new = prog.shared_library_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, SharedLibraryModule) + self.assertEqual(new, True) + + self.assertEqual(prog.shared_library_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.shared_library_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.shared_library_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True), + (module, False), + ) + + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/bar", 0x20000000 + ) + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/baz", 0x10000000 + ) + + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x20000000, create=True)[0], module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/baz", 0x10000000, create=True)[0], module + ) + self.assertNotEqual( + prog.vdso_module("/foo/bar", 0x10000000, create=True)[0], module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.dynamic_address, 0x10000000) + self._test_module_init_common(module) + + def test_shared_library_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.shared_library_module) + self.assertRaises(TypeError, prog.shared_library_module, "/foo/bar") + self.assertRaises(TypeError, prog.shared_library_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.shared_library_module, None, 0) + self.assertRaises( + TypeError, prog.shared_library_module, "/foo/bar", 0x10000000, True + ) + + def test_vdso_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x10000000) + + module, new = prog.vdso_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, VdsoModule) + self.assertEqual(new, True) + + self.assertEqual(prog.vdso_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.vdso_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.vdso_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual( + prog.vdso_module("/foo/bar", 0x10000000, create=True), (module, False) + ) + + self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x20000000) + self.assertRaises(LookupError, prog.vdso_module, "/foo/baz", 0x10000000) + + self.assertNotEqual( + prog.vdso_module("/foo/bar", 0x20000000, create=True)[0], module + ) + self.assertNotEqual( + prog.vdso_module("/foo/baz", 0x10000000, create=True)[0], module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True)[0], module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.dynamic_address, 0x10000000) + self._test_module_init_common(module) + + def test_vdso_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.vdso_module) + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar") + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.vdso_module, None, 0) + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar", 0x10000000, True) + + def test_relocatable_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x10000000) + + module, new = prog.relocatable_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, RelocatableModule) + self.assertEqual(new, True) + + self.assertEqual(prog.relocatable_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.relocatable_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.relocatable_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual( + prog.relocatable_module("/foo/bar", 0x10000000, create=True), + (module, False), + ) + + self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x20000000) + self.assertRaises(LookupError, prog.relocatable_module, "/foo/baz", 0x10000000) + + self.assertNotEqual( + prog.relocatable_module("/foo/bar", 0x20000000, create=True)[0], module + ) + self.assertNotEqual( + prog.relocatable_module("/foo/baz", 0x10000000, create=True)[0], module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True)[0], module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.address, 0x10000000) + self._test_module_init_common(module) + + def test_section_addresses(self): + prog = Program() + module = prog.relocatable_module("/foo/bar", 0x10000000, create=True)[0] + + self.assertNotIn(".text", module.section_addresses) + self.assertNotIn(1, module.section_addresses) + + with self.assertRaises(KeyError): + module.section_addresses[".text"] + with self.assertRaises(KeyError): + module.section_addresses[1] + + with self.assertRaises(KeyError): + del module.section_addresses[".text"] + with self.assertRaises(KeyError): + del module.section_addresses[1] + + module.section_addresses[".text"] = 0x10000000 + self.assertIn(".text", module.section_addresses) + self.assertEqual(module.section_addresses[".text"], 0x10000000) + + self.assertEqual(len(module.section_addresses), 1) + self.assertCountEqual(list(module.section_addresses), [".text"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text"]) + self.assertCountEqual(list(module.section_addresses.values()), [0x10000000]) + self.assertCountEqual( + list(module.section_addresses.items()), [(".text", 0x10000000)] + ) + + module.section_addresses[".data"] = 0x10001000 + + self.assertEqual(len(module.section_addresses), 2) + self.assertCountEqual(list(module.section_addresses), [".text", ".data"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text", ".data"]) + self.assertCountEqual( + list(module.section_addresses.values()), [0x10000000, 0x10001000] + ) + self.assertCountEqual( + list(module.section_addresses.items()), + [(".text", 0x10000000), (".data", 0x10001000)], + ) + + del module.section_addresses[".data"] + + self.assertEqual(len(module.section_addresses), 1) + self.assertCountEqual(list(module.section_addresses), [".text"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text"]) + self.assertCountEqual(list(module.section_addresses.values()), [0x10000000]) + self.assertCountEqual( + list(module.section_addresses.items()), [(".text", 0x10000000)] + ) + + def test_relocatable_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.relocatable_module) + self.assertRaises(TypeError, prog.relocatable_module, "/foo/bar") + self.assertRaises(TypeError, prog.relocatable_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.relocatable_module, None, 0) + self.assertRaises( + TypeError, prog.relocatable_module, "/foo/bar", 0x10000000, True + ) + + def test_extra_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 1234) + + module, new = prog.extra_module("/foo/bar", 1234, create=True) + self.assertIsInstance(module, ExtraModule) + self.assertEqual(new, True) + + self.assertEqual(prog.extra_module("/foo/bar", 1234), module) + self.assertEqual(prog.extra_module(b"/foo/bar", 1234), module) + self.assertEqual(prog.extra_module(Path("/foo/bar"), IntWrapper(1234)), module) + self.assertEqual( + prog.extra_module("/foo/bar", 1234, create=True), (module, False) + ) + + self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 5678) + self.assertRaises(LookupError, prog.extra_module, "/foo/baz", 1234) + + self.assertNotEqual(prog.extra_module("/foo/bar", 5678, create=True)[0], module) + self.assertNotEqual(prog.extra_module("/foo/baz", 1234, create=True)[0], module) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 1234, create=True)[0], module + ) + self.assertEqual(prog.extra_module("/foo/bar", create=True)[0].id, 0) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.id, 1234) + self._test_module_init_common(module) + + def test_extra_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.extra_module) + self.assertRaises(TypeError, prog.extra_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.extra_module, None, 0) + self.assertRaises(TypeError, prog.extra_module, "/foo/bar", 1234, True) + + def test_address_range(self): + module = Program().extra_module("/foo/bar", create=True)[0] + + module.address_range = (0x10000000, 0x10010000) + self.assertEqual(module.address_range, (0x10000000, 0x10010000)) + + module.address_range = (0x20000000, 0x20020000) + self.assertEqual(module.address_range, (0x20000000, 0x20020000)) + + module.address_range = None + self.assertIsNone(module.address_range) + + module.address_range = None + self.assertIsNone(module.address_range) + + def test_address_range_empty(self): + module = Program().extra_module("/foo/bar", create=True)[0] + + module.address_range = (0, 0) + self.assertEqual(module.address_range, (0, 0)) + + def test_address_range_type_error(self): + module = Program().extra_module("/foo/bar", create=True)[0] + + with self.assertRaises(TypeError): + module.address_range = 1 + + with self.assertRaises(TypeError): + module.address_range = (1,) + + with self.assertRaises(TypeError): + module.address_range = ("foo", 1) + + with self.assertRaises(TypeError): + module.address_range = (1, "bar") + + def test_address_range_invalid(self): + module = Program().extra_module("/foo/bar", create=True)[0] + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (0x10010000, 0x10000000) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (1, 1) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (2**64 - 1, 1) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (2**64 - 1, 2**64 - 1) + + def test_build_id(self): + module = Program().extra_module("/foo/bar", create=True)[0] + + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + module.build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + self.assertEqual(module.build_id, b"\xfe\xdc\xba\x98\x76\x54\x32\x10") + + module.build_id = None + self.assertIsNone(module.build_id) + + module.build_id = None + self.assertIsNone(module.build_id) + + def test_build_id_type_error(self): + module = Program().extra_module("/foo/bar", create=True)[0] + with self.assertRaises(TypeError): + module.build_id = "abcd" + + def test_build_id_invalid_empty(self): + module = Program().extra_module("/foo/bar", create=True)[0] + with self.assertRaisesRegex(ValueError, "build ID cannot be empty"): + module.build_id = b"" + + def test_find_by_address(self): + prog = Program() + module1 = prog.extra_module("/foo/bar", create=True)[0] + module1.address_range = (0x10000000, 0x10010000) + module2 = prog.extra_module("/asdf/jkl", create=True)[0] + module2.address_range = (0x20000000, 0x20020000) + + self.assertRaises(LookupError, prog.module, 0x0FFFFFFF) + self.assertEqual(prog.module(0x10000000), module1) + self.assertEqual(prog.module(0x10000001), module1) + self.assertEqual(prog.module(0x1000FFFF), module1) + self.assertRaises(LookupError, prog.module, 0x10010000) + + self.assertRaises(LookupError, prog.module, 0x1FFFFFFF) + self.assertEqual(prog.module(0x20000000), module2) + self.assertEqual(prog.module(0x20000001), module2) + self.assertEqual(prog.module(0x2001FFFF), module2) + self.assertRaises(LookupError, prog.module, 0x20020000) + + # Test all of the state transitions that we can without setting a file. + def _test_file_status(self, which): + module = Program().extra_module("/foo/bar", create=True)[0] + + status_attr = which + "_file_status" + wants_file = getattr(module, f"wants_{which}_file") + + self.assertRaises(TypeError, setattr, module, status_attr, 1) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + self.assertEqual(wants_file(), True) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.WANT, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + setattr(module, status_attr, ModuleFileStatus.DONT_WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_WANT) + self.assertEqual(wants_file(), False) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.DONT_WANT, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.DONT_WANT + ) + + setattr(module, status_attr, ModuleFileStatus.DONT_NEED) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_NEED) + self.assertEqual(wants_file(), False) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.DONT_NEED, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.DONT_NEED + ) + + setattr(module, status_attr, ModuleFileStatus.DONT_WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_WANT) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + setattr(module, status_attr, ModuleFileStatus.DONT_NEED) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_NEED) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + def test_loaded_file_status(self): + self._test_file_status("loaded") + + def test_debug_file_status(self): + self._test_file_status("debug") + + +class TestCreatedModules(TestCase): + def test_empty(self): + self.assertEqual(list(Program().modules()), []) + + def test_one(self): + module = Program().extra_module("/foo/bar", create=True)[0] + self.assertEqual(list(module.prog.modules()), [module]) + + def test_multiple(self): + prog = Program() + modules = [ + prog.extra_module("/foo/bar", create=True)[0], + prog.extra_module("/asdf/jkl", create=True)[0], + prog.extra_module("/123/456", create=True)[0], + ] + self.assertCountEqual(list(prog.modules()), modules) + + def test_change_during_iteration(self): + prog = Program() + prog.extra_module("/foo/bar", create=True) + with self.assertRaisesRegex(Exception, "modules changed during iteration"): + for module in prog.modules(): + prog.extra_module("/asdf/jkl", create=True) + prog.extra_module("/123/456", create=True) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index d9cc3dd94..91fac06aa 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -1,8 +1,10 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later + +import itertools import tempfile -from _drgn_util.elf import ET, PT, SHT, STB, STT +from _drgn_util.elf import ET, PT, SHF, SHT, STB, STT from drgn import Program, Symbol, SymbolBinding, SymbolIndex, SymbolKind from tests import TestCase from tests.dwarfwriter import dwarf_sections @@ -10,19 +12,20 @@ def create_elf_symbol_file(symbols): - # We need some DWARF data so that libdwfl will load the file. sections = dwarf_sections(()) # Create a section for the symbols to reference and the corresponding # segment for address lookups. min_address = min(symbol.value for symbol in symbols) max_address = max(symbol.value + symbol.size for symbol in symbols) + size = max(max_address - min_address, 4096) sections.append( ElfSection( name=".foo", sh_type=SHT.NOBITS, + sh_flags=SHF.ALLOC, p_type=PT.LOAD, vaddr=min_address, - memsz=max_address - min_address, + memsz=size, ) ) symbols = [ @@ -31,16 +34,20 @@ def create_elf_symbol_file(symbols): ) for symbol in symbols ] - return create_elf_file(ET.EXEC, sections, symbols) + return create_elf_file(ET.EXEC, sections, symbols), min_address, min_address + size def elf_symbol_program(*modules): prog = Program() for symbols in modules: with tempfile.NamedTemporaryFile() as f: - f.write(create_elf_symbol_file(symbols)) + contents, start, end = create_elf_symbol_file(symbols) + f.write(contents) f.flush() - prog.load_debug_info([f.name]) + module = prog.extra_module(f.name, create=True)[0] + module.address_range = (start, end) + module.try_file(f.name, force=True) + print(module.loaded_file_path) return prog @@ -78,59 +85,167 @@ def test_by_address(self): self.assert_symbols_equal_unordered(prog.symbols(0xFFFF000C), [second]) self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) - def test_by_address_precedence(self): - precedence = (STB.GLOBAL, STB.WEAK, STB.LOCAL) - drgn_precedence = ( - SymbolBinding.GLOBAL, - SymbolBinding.WEAK, - SymbolBinding.LOCAL, + def test_by_address_closest(self): + # If two symbols contain the given address, then the one whose start + # address is closest to the given address should be preferred + # (regardless of the binding of either symbol). + elf_closest = ElfSymbol("closest", 0xFFFF0008, 0x8, STT.OBJECT, STB.WEAK) + elf_furthest = ElfSymbol("furthest", 0xFFFF0000, 0xC, STT.OBJECT, STB.GLOBAL) + closest = Symbol( + "closest", 0xFFFF0008, 0x8, SymbolBinding.WEAK, SymbolKind.OBJECT + ) + furthest = Symbol( + "furthest", 0xFFFF0000, 0xC, SymbolBinding.GLOBAL, SymbolKind.OBJECT ) - def assert_find_higher(*modules): - self.assertEqual( - elf_symbol_program(*modules).symbol(0xFFFF0000).name, "foo" + def test(elf_symbols): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF000B), closest) + self.assert_symbols_equal_unordered( + prog.symbols(0xFFFF000B), [closest, furthest] ) - def assert_finds_both(symbols, *modules): + with self.subTest("closest first"): + test([elf_closest, elf_furthest]) + + with self.subTest("furthest first"): + test([elf_furthest, elf_closest]) + + def test_by_address_closest_end(self): + # If two symbols contain the given address and have the same start + # address, then the one whose end address is closest to the given + # address should be preferred (regardless of the binding of either + # symbol). + elf_closest = ElfSymbol("closest", 0xFFFF0000, 0xC, STT.OBJECT, STB.WEAK) + elf_furthest = ElfSymbol("furthest", 0xFFFF0000, 0x10, STT.OBJECT, STB.GLOBAL) + closest = Symbol( + "closest", 0xFFFF0000, 0xC, SymbolBinding.WEAK, SymbolKind.OBJECT + ) + furthest = Symbol( + "furthest", 0xFFFF0000, 0x10, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + + def test(elf_symbols): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF000B), closest) self.assert_symbols_equal_unordered( - elf_symbol_program(*modules).symbols(0xFFFF0000), - symbols, + prog.symbols(0xFFFF000B), [closest, furthest] ) - for i in range(len(precedence) - 1): - higher_binding = precedence[i] - higher_binding_drgn = drgn_precedence[i] - for j in range(i + 1, len(precedence)): - lower_binding = precedence[j] - lower_binding_drgn = drgn_precedence[j] - with self.subTest(higher=higher_binding, lower=lower_binding): - higher = ElfSymbol( - "foo", 0xFFFF0000, 0x8, STT.OBJECT, higher_binding - ) - lower = ElfSymbol("bar", 0xFFFF0000, 0x8, STT.OBJECT, lower_binding) - symbols = [ - Symbol( - "foo", - 0xFFFF0000, - 0x8, - higher_binding_drgn, - SymbolKind.OBJECT, - ), - Symbol( - "bar", - 0xFFFF0000, - 0x8, - lower_binding_drgn, - SymbolKind.OBJECT, - ), - ] - # Local symbols must be before global symbols. - if lower_binding != STB.LOCAL: - with self.subTest("higher before lower"): - assert_find_higher((higher, lower)) - with self.subTest("lower before higher"): - assert_find_higher((lower, higher)) - assert_finds_both(symbols, (lower, higher)) + with self.subTest("closest first"): + test([elf_closest, elf_furthest]) + + with self.subTest("furthest first"): + test([elf_furthest, elf_closest]) + + def test_by_address_sizeless(self): + label = ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL) + less = ElfSymbol("less", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL) + greater = ElfSymbol("greater", 0xFFFF0010, 0x4, STT.FUNC, STB.LOCAL) + + expected = Symbol( + "label", 0xFFFF0008, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC + ) + + # Test every permutation of every combination of symbols that includes + # "label". + for elf_symbols in itertools.chain.from_iterable( + itertools.permutations((label,) + extra_elf_symbols) + for r in range(3) + for extra_elf_symbols in itertools.combinations((less, greater), r) + ): + with self.subTest(elf_symbols=[sym.name for sym in elf_symbols]): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF0009), expected) + self.assertEqual(prog.symbols(0xFFFF0009), [expected]) + + def test_by_address_sizeless_subsumed(self): + import unittest.util + + unittest.util._MAX_LENGTH = 999999999 + label = ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL) + subsume = ElfSymbol("subsume", 0xFFFF0004, 0x8, STT.FUNC, STB.LOCAL) + less = ElfSymbol("less", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL) + greater = ElfSymbol("greater", 0xFFFF0010, 0x4, STT.FUNC, STB.LOCAL) + + expected = Symbol( + "subsume", 0xFFFF0004, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC + ) + + # Test every permutation of every combination of symbols that includes + # "label" and "subsume". + for elf_symbols in itertools.chain.from_iterable( + itertools.permutations((label, subsume) + extra_elf_symbols) + for r in range(3) + for extra_elf_symbols in itertools.combinations((less, greater), r) + ): + with self.subTest(elf_symbols=[sym.name for sym in elf_symbols]): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF0009), expected) + self.assertEqual(prog.symbols(0xFFFF0009), [expected]) + + def test_by_address_sizeless_wrong_section(self): + prog = elf_symbol_program( + (ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL),) + ) + for module in prog.modules(): + start, end = module.address_range + module.address_range = (start, 0xFFFFFF00) + self.assertRaises(LookupError, prog.symbol, 0xFFFFFE00) + + def test_by_address_binding_precedence(self): + precedence = ( + (STB.GLOBAL, STB.GNU_UNIQUE), + (STB.WEAK,), + (STB.LOCAL, STB.HIPROC), + ) + + def assert_find_higher(*modules, both): + prog = elf_symbol_program(*modules) + self.assertEqual(prog.symbol(0xFFFF0000).name, "foo") + # Test that symbols() finds both if expected or either one if not. + if both: + self.assertCountEqual( + [sym.name for sym in prog.symbols(0xFFFF0000)], ["foo", "bar"] + ) + else: + self.assertIn( + [sym.name for sym in prog.symbols(0xFFFF0000)], (["foo"], ["bar"]) + ) + + for size in (8, 0): + with self.subTest(size=size): + for i in range(len(precedence) - 1): + for higher_binding in precedence[i]: + for j in range(i + 1, len(precedence)): + for lower_binding in precedence[j]: + with self.subTest( + higher=higher_binding, lower=lower_binding + ): + higher = ElfSymbol( + "foo", + 0xFFFF0000, + size, + STT.OBJECT, + higher_binding, + ) + lower = ElfSymbol( + "bar", + 0xFFFF0000, + size, + STT.OBJECT, + lower_binding, + ) + # Local symbols must be before global symbols. + if lower_binding not in precedence[-1]: + with self.subTest("higher before lower"): + assert_find_higher( + (higher, lower), both=size > 0 + ) + with self.subTest("lower before higher"): + assert_find_higher( + (lower, higher), both=size > 0 + ) def test_by_name(self): elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.GLOBAL) @@ -156,7 +271,7 @@ def test_by_name(self): self.assert_symbols_equal_unordered(prog.symbols("second"), [second]) self.assertEqual(prog.symbols("third"), []) - def test_by_name_precedence(self): + def test_by_name_binding_precedence(self): precedence = ( (STB.GLOBAL, STB.GNU_UNIQUE), (STB.WEAK,), @@ -170,10 +285,9 @@ def assert_find_higher(*modules): prog = elf_symbol_program(*modules) self.assertEqual(prog.symbol("foo").address, expected) # assert symbols() always finds both - symbols = sorted(prog.symbols("foo"), key=lambda s: s.address) - self.assertEqual(len(symbols), 2) - self.assertEqual(symbols[0].address, other) - self.assertEqual(symbols[1].address, expected) + self.assertCountEqual( + [sym.address for sym in prog.symbols("foo")], [expected, other] + ) for i in range(len(precedence) - 1): for higher_binding in precedence[i]: From 3ce0feea3691cbdfc5856a9c5e9165d002e6e259 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 19 Dec 2024 12:54:11 -0800 Subject: [PATCH 012/166] tests: don't clobber file in use by libelf Overwriting a file that libelf has already mmap'd can confuse it and cause it to crash. In particular, libelf/elf_begin.c::file_read_elf() initializes Elf_Scn::rawdata_base and Elf_Scn::data_base from the mmap'd file. libelf/elf_getdata.c::__libelf_set_rawdata_wrlock() also sets Elf_Scn::rawdata_base from the mmap'd file. If the file changes between those two events, then Elf_Scn::rawdata_base will change. Then, the following line in libelf/elf_end.c::elf_end() will try to free an mmap'd pointer: if (scn->data_base != scn->rawdata_base) free (scn->data_base); Stephen reported crashes like this from test_gnu_debugaltlink_debug_directories() while testing a patch that inadvertently caused debug info to be indexed on module creation. Reported-by: Stephen Brennan Signed-off-by: Omar Sandoval --- tests/test_debug_info.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 79c795005..8c6f8f731 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -2283,8 +2283,6 @@ def test_gnu_debugaltlink_debug_directories(self): alt_path.parent.mkdir() alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) - binary_path = bin_dir / "binary" - self.prog.debug_info_path = ":.debug:" + str(debug_dir) for i, debugaltlink in enumerate( ( @@ -2293,6 +2291,7 @@ def test_gnu_debugaltlink_debug_directories(self): ) ): with self.subTest(debugaltlink=debugaltlink): + binary_path = bin_dir / f"binary{i}" binary_path.write_bytes( compile_dwarf( (), @@ -2303,9 +2302,7 @@ def test_gnu_debugaltlink_debug_directories(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", i, create=True)[ - 0 - ] + module = self.prog.extra_module(binary_path, create=True)[0] self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) From e6044c767a48edf67f134efe877954236e912a83 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 19 Dec 2024 13:14:35 -0800 Subject: [PATCH 013/166] libdrgn: don't bother getting program language to initialize object Since commit 4e83130008e9 ("Introduce module and debug info finder APIs"), DWARF indexing is somewhat lazy. As a result, drgn_void_type() may require DWARF indexing in order to determine the program's main language. This is overkill for drgn_object_init(), which just needs to initialize a valid dummy object that is usually reinitialized immediately. Signed-off-by: Omar Sandoval --- libdrgn/object.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libdrgn/object.c b/libdrgn/object.c index 5820ce183..e907bec83 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -18,11 +18,11 @@ #include "type.h" #include "util.h" -#define DRGN_OBJECT_INITIALIZER(prog) \ - (struct drgn_object){ \ - .type = drgn_void_type(prog, NULL), \ - .encoding = DRGN_OBJECT_ENCODING_NONE, \ - .kind = DRGN_OBJECT_ABSENT, \ +#define DRGN_OBJECT_INITIALIZER(prog) \ + (struct drgn_object){ \ + .type = &(prog)->void_types[DRGN_LANGUAGE_C], \ + .encoding = DRGN_OBJECT_ENCODING_NONE, \ + .kind = DRGN_OBJECT_ABSENT, \ } LIBDRGN_PUBLIC From 5ee2dfd612c5d5278c8c4f9b959c01b4745396ed Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 19 Dec 2024 13:21:37 -0800 Subject: [PATCH 014/166] tests: drop leftover debugging print Fixes: 4e83130008e9 ("Introduce module and debug info finder APIs") Signed-off-by: Omar Sandoval --- tests/test_symbol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 91fac06aa..d6f6aebbf 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -47,7 +47,6 @@ def elf_symbol_program(*modules): module = prog.extra_module(f.name, create=True)[0] module.address_range = (start, end) module.try_file(f.name, force=True) - print(module.loaded_file_path) return prog From 89656d4e3a2117e18346a1be86d281ebb3dedd46 Mon Sep 17 00:00:00 2001 From: Michel Lind Date: Thu, 19 Dec 2024 16:46:28 -0600 Subject: [PATCH 015/166] Update Ubuntu instructions Mention that `drgn` is shipped with Ubuntu except jammy, but it's an older version Signed-off-by: Michel Lind --- README.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 1bd7c2a34..833df4d4e 100644 --- a/README.rst +++ b/README.rst @@ -127,8 +127,12 @@ drgn can be installed using the package manager on some Linux distributions. * Ubuntu - Enable the `michel-slm/kernel-utils PPA `_. - Then: + All supported Ubuntu releases except for 22.04 (jammy) ships with drgn - but generally the version that + was in Debian unstable at the time that Ubuntu release is branched. + + To get the latest version, including on jammy, enable the `michel-slm/kernel-utils PPA `_. + + To install drgn itself, with or without the PPA: .. code-block:: console From 885a3209f3603593fb89327c1072f94e5862ffbb Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 2 Dec 2024 17:38:24 +0100 Subject: [PATCH 016/166] libdrgn: kdump: simplify getting the PRSTATUS attributes Since libkdumpfile commit 5b044292abe9 ("Clarify and fix attribute data lifetime") changes the lifetime of attribute values retrieved with kdump_attr_ref_get(), the extra reference would keep the PRSTATUS blob around even after kdump_free(). However, the attribute hierarchy cannot change while iterating over the PRSTATUS attributes, so it is not necessary to take an attribute reference and we can use kdump_get_typed_attr(). The attribute blob itself should not change either, but it is a good idea to keep its data pinned, because a raw pointer to it is stored in the drgn_thread_set hash table. If some code tries to modify the PRSTATUS attribute data, the attempt will fail with KDUMP_ERR_BUSY rather than leave a dangling pointer in the hash table and possibly cause a UAF bug later. The blob pin does not prevent freeing the blob when the blob reference count reaches zero. Signed-off-by: Petr Tesarik --- libdrgn/kdump.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 09efd9658..0bb594d17 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -299,39 +299,31 @@ struct drgn_error *drgn_program_cache_kdump_threads(struct drgn_program *prog) } /* - * Note that in the following loop we never call kdump_attr_unref() on - * prstatus_ref, nor kdump_blob_unpin() on the prstatus blob that we get - * from libkdumpfile. Since drgn is completely read-only as a consumer - * of that library, we "leak" both the attribute reference and blob pin - * until kdump_free() is called which will clean up everything for us. + * Note that in the following loop we never call kdump_blob_unpin() on + * the prstatus blob that we get from libkdumpfile. Since drgn never + * modifies the PRSTATUS attributes (neither directly nor indirectly), + * we "leak" the blob pin until kdump_free() is called, which will + * clean up everything for us. */ for (i = 0; i < ncpus; i++) { - /* Enough for the longest possible PRSTATUS attribute name. */ - kdump_attr_ref_t prstatus_ref; kdump_attr_t prstatus_attr; void *prstatus_data; size_t prstatus_size; #define FORMAT "cpu.%" PRIuFAST64 ".PRSTATUS" + /* Enough for the longest possible PRSTATUS attribute name. */ char attr_name[sizeof(FORMAT) - sizeof("%" PRIuFAST64) + max_decimal_length(uint_fast64_t) + 1]; snprintf(attr_name, sizeof(attr_name), FORMAT, i); #undef FORMAT - ks = kdump_attr_ref(prog->kdump_ctx, attr_name, &prstatus_ref); - if (ks != KDUMP_OK) { - return drgn_error_format(DRGN_ERROR_OTHER, - "kdump_attr_ref(%s): %s", - attr_name, - kdump_get_err(prog->kdump_ctx)); - } - - ks = kdump_attr_ref_get(prog->kdump_ctx, &prstatus_ref, - &prstatus_attr); + prstatus_attr.type = KDUMP_BLOB; + ks = kdump_get_typed_attr(prog->kdump_ctx, attr_name, + &prstatus_attr); if (ks != KDUMP_OK) { return drgn_error_format(DRGN_ERROR_OTHER, - "kdump_attr_ref_get(%s): %s", + "kdump_get_typed_attr(%s): %s", attr_name, kdump_get_err(prog->kdump_ctx)); } From 4e06cfdff158935eee725e726a63a5d9c9e57dab Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 3 Dec 2024 13:54:40 +0100 Subject: [PATCH 017/166] libdrgn: kdump: prepare for incompatible changes in libkdumpfile-0.5.5 The kdump_get_typed_attr() function prototype changed in libkdumpfile commit e182aeaf4d72 ("Make kdump_get_typed_attr() easier to use"). Signed-off-by: Petr Tesarik --- libdrgn/kdump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 0bb594d17..4e5eea92a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -318,9 +318,14 @@ struct drgn_error *drgn_program_cache_kdump_threads(struct drgn_program *prog) + 1]; snprintf(attr_name, sizeof(attr_name), FORMAT, i); #undef FORMAT +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 5, 5) + ks = kdump_get_typed_attr(prog->kdump_ctx, attr_name, + KDUMP_BLOB, &prstatus_attr.val); +#else prstatus_attr.type = KDUMP_BLOB; ks = kdump_get_typed_attr(prog->kdump_ctx, attr_name, &prstatus_attr); +#endif if (ks != KDUMP_OK) { return drgn_error_format(DRGN_ERROR_OTHER, "kdump_get_typed_attr(%s): %s", From a1869f9549c1af6f5393406895bfc97ef48a5ac5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 19 Dec 2024 15:12:53 -0800 Subject: [PATCH 018/166] Make StackFrame.name fall back to symbol/PC and add StackFrame.function_name Multiple people have lamented that StackFrame.name is None for functions implemented in assembly or missing debug info for any other reason. With DWARFless debugging, this will be way more common. My original hope was that StackFrame.name would strictly be the function name from the debugging information and that callers would fall back to getting the symbol name themselves. However, the distinction isn't super meaningful to users, so let's add the fallback directly to StackFrame.name and add StackFrame.function_name with the old behavior of StackFrame.name. Signed-off-by: Omar Sandoval --- _drgn.pyi | 29 ++++++++++-------- libdrgn/drgn.h | 13 +++++++- libdrgn/python/stack_trace.c | 19 ++++++++++-- libdrgn/stack_trace.c | 57 +++++++++++++++++++++++++++++------- tests/test_stack_trace.py | 28 ++++++++++++++++++ 5 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 tests/test_stack_trace.py diff --git a/_drgn.pyi b/_drgn.pyi index d7bbb9e95..6e72c5681 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2774,23 +2774,28 @@ class StackFrame: (int)1 """ - name: Final[Optional[str]] + name: Final[str] + """ + Name of the function or symbol at this frame. + + This tries to get the best available name for this frame in the following + order: + + 1. The name of the function in the source code based on debugging + information (:attr:`frame.function_name `). + 2. The name of the symbol in the binary (:meth:`frame.symbol().name + `). + 3. The program counter in hexadecimal (:attr:`hex(frame.pc) `). + 4. The string "???". + """ + + function_name: Final[Optional[str]] """ Name of the function at this frame, or ``None`` if it could not be determined. The name cannot be determined if debugging information is not available for - the function, e.g., because it is implemented in assembly. It may be - desirable to use the symbol name or program counter as a fallback: - - .. code-block:: python3 - - name = frame.name - if name is None: - try: - name = frame.symbol().name - except LookupError: - name = hex(frame.pc) + the function, e.g., because it is implemented in assembly. """ is_inline: Final[bool] diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index e680dadd5..f94e609f8 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -3717,13 +3717,24 @@ bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame); struct drgn_error *drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret); +/** + * Get the best available name for a stack frame. + * + * @param[out] ret Returned name. On success, it must be freed with @c free(). + * On error, it is not modified. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame, char **ret); + /** * Get the name of the function at a stack frame. * * @return Function name. This is valid until the stack trace is destroyed; it * should not be freed. @c NULL if the name could not be determined. */ -const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, size_t frame); +const char *drgn_stack_frame_function_name(struct drgn_stack_trace *trace, + size_t frame); /** Return whether a stack frame is for an inlined call. */ bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, size_t frame); diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 1112a8092..94b1dc140 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -272,9 +272,20 @@ static PyObject *StackFrame_registers(StackFrame *self) static PyObject *StackFrame_get_name(StackFrame *self, void *arg) { - const char *name = drgn_stack_frame_name(self->trace->trace, self->i); - if (name) - return PyUnicode_FromString(name); + _cleanup_free_ char *name = NULL; + struct drgn_error *err = drgn_stack_frame_name(self->trace->trace, + self->i, &name); + if (err) + return set_drgn_error(err); + return PyUnicode_FromString(name); +} + +static PyObject *StackFrame_get_function_name(StackFrame *self, void *arg) +{ + const char *function_name = + drgn_stack_frame_function_name(self->trace->trace, self->i); + if (function_name) + return PyUnicode_FromString(function_name); else Py_RETURN_NONE; } @@ -336,6 +347,8 @@ static PyMethodDef StackFrame_methods[] = { static PyGetSetDef StackFrame_getset[] = { {"name", (getter)StackFrame_get_name, NULL, drgn_StackFrame_name_DOC}, + {"function_name", (getter)StackFrame_get_function_name, NULL, + drgn_StackFrame_function_name_DOC}, {"is_inline", (getter)StackFrame_get_is_inline, NULL, drgn_StackFrame_is_inline_DOC}, {"interrupted", (getter)StackFrame_get_interrupted, NULL, diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index c1fe3c595..282910772 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -117,9 +117,10 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) struct drgn_register_state *regs = trace->frames[frame].regs; struct optional_uint64 pc; - const char *name = drgn_stack_frame_name(trace, frame); - if (name) { - if (!string_builder_append(&str, name)) + const char *function_name = + drgn_stack_frame_function_name(trace, frame); + if (function_name) { + if (!string_builder_append(&str, function_name)) return &drgn_enomem; } else if ((pc = drgn_register_state_get_pc(regs)).has_value) { _cleanup_symbol_ struct drgn_symbol *sym = NULL; @@ -198,8 +199,9 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret return &drgn_enomem; } - const char *name = drgn_stack_frame_name(trace, frame); - if (name && !string_builder_appendf(&str, " in %s", name)) + const char *function_name = drgn_stack_frame_function_name(trace, frame); + if (function_name + && !string_builder_appendf(&str, " in %s", function_name)) return &drgn_enomem; int line, column; @@ -224,8 +226,42 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret return NULL; } -LIBDRGN_PUBLIC const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, - size_t frame) +LIBDRGN_PUBLIC +struct drgn_error *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame, char **ret) +{ + struct drgn_error *err; + char *name; + const char *function_name = drgn_stack_frame_function_name(trace, frame); + if (function_name) { + name = strdup(function_name); + } else { + struct drgn_register_state *regs = trace->frames[frame].regs; + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (pc.has_value) { + _cleanup_symbol_ struct drgn_symbol *sym = NULL; + err = drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + &sym); + if (err) + return err; + if (sym) + name = strdup(sym->name); + else if (asprintf(&name, "0x%" PRIx64, pc.value) < 0) + name = NULL; + } else { + name = strdup("???"); + } + } + if (!name) + return &drgn_enomem; + *ret = name; + return NULL; +} + +LIBDRGN_PUBLIC +const char *drgn_stack_frame_function_name(struct drgn_stack_trace *trace, + size_t frame) { Dwarf_Die *scopes = trace->frames[frame].scopes; size_t num_scopes = trace->frames[frame].num_scopes; @@ -463,11 +499,12 @@ drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame_i, } if (!die.addr) { not_found:; - const char *frame_name = drgn_stack_frame_name(trace, frame_i); - if (frame_name) { + const char *function_name = + drgn_stack_frame_function_name(trace, frame_i); + if (function_name) { return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find '%s' in '%s'", - name, frame_name); + name, function_name); } else { return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find '%s'", name); diff --git a/tests/test_stack_trace.py b/tests/test_stack_trace.py new file mode 100644 index 000000000..75978cf02 --- /dev/null +++ b/tests/test_stack_trace.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import Program +from tests import TestCase +from tests.resources import get_resource + + +class TestLinuxUserspaceCoreDump(TestCase): + @classmethod + def setUpClass(cls): + cls.prog = Program() + cls.prog.set_enabled_debug_info_finders([]) + cls.prog.set_core_dump(get_resource("crashme.core")) + cls.prog.load_debug_info([get_resource("crashme"), get_resource("crashme.so")]) + cls.trace = cls.prog.crashed_thread().stack_trace() + + def test_stack_frame_name(self): + self.assertEqual(self.trace[0].name, "c") + self.assertEqual(self.trace[5].name, "0x7f6112ad8088") + self.assertEqual(self.trace[7].name, "_start") + self.assertEqual(self.trace[8].name, "???") + + def test_stack_frame_function_name(self): + self.assertEqual(self.trace[0].function_name, "c") + self.assertIsNone(self.trace[5].function_name) + self.assertIsNone(self.trace[7].function_name) + self.assertIsNone(self.trace[8].function_name) From e7ac0895c4f452711f0474b840147fbb8f3aa2f7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 14 Jan 2025 14:08:14 -0800 Subject: [PATCH 019/166] docs: use envvar directive for environment variables Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index c717b0b0d..725f06041 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -204,12 +204,14 @@ Environment Variables Some of drgn's behavior can be modified through environment variables: -``DRGN_MAX_DEBUG_INFO_ERRORS`` +.. envvar:: DRGN_MAX_DEBUG_INFO_ERRORS + The maximum number of warnings about missing debugging information to log on CLI startup or from :meth:`drgn.Program.load_debug_info()`. Any additional errors are truncated. The default is 5; -1 is unlimited. -``DRGN_PREFER_ORC_UNWINDER`` +.. envvar:: DRGN_PREFER_ORC_UNWINDER + Whether to prefer using `ORC `_ over DWARF for stack unwinding (0 or 1). The default is 0. Note that drgn will always @@ -217,17 +219,20 @@ Some of drgn's behavior can be modified through environment variables: vice versa. This environment variable is mainly intended for testing and may be ignored in the future. -``DRGN_USE_LIBKDUMPFILE_FOR_ELF`` +.. envvar:: DRGN_USE_LIBKDUMPFILE_FOR_ELF + Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. -``DRGN_USE_SYS_MODULE`` +.. envvar:: DRGN_USE_SYS_MODULE + Whether drgn should use ``/sys/module`` to find information about loaded kernel modules for the running kernel instead of getting them from the core dump (0 or 1). The default is 1. This environment variable is mainly intended for testing and may be ignored in the future. -``PYTHON_BASIC_REPL`` +.. envvar:: PYTHON_BASIC_REPL + If non-empty, don't try to use the `new interactive REPL `_ added in Python 3.13. drgn makes use of the new REPL through internal From 970b9a085790a9b23325bbd06f120fdc2d7d664a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Jan 2025 10:38:19 -0800 Subject: [PATCH 020/166] libdrgn/python: unexport set_drgn_error() This was previously used for testing internals via ctypes, but it's no longer needed. Fixes: 7d251fee6e31 ("Translate C lexer tests to C unit tests") Signed-off-by: Omar Sandoval --- libdrgn/python/error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index a5dfa307b..1d36a2e98 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -117,7 +117,7 @@ struct drgn_error *drgn_error_from_python(void) return err; } -DRGNPY_PUBLIC void *set_drgn_error(struct drgn_error *err) +void *set_drgn_error(struct drgn_error *err) { if (err == &drgn_error_python) return NULL; From 6ff98f0b4b930a0a696f5db0eb0e633922c8024a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 11:55:23 -0800 Subject: [PATCH 021/166] libdrgn: stack_trace: make bad call unwinding an architecture callback The x86-64 fallback unwinder currently has a special case for handling a call to a NULL pointer. Other architectures need the same workaround. To avoid code duplication, let's extract the null program counter check into the generic stack tracing code and add a bad_call_unwind architecture callback. This also gives us a centralized place to add heuristics for detecting non-null bad calls. Signed-off-by: Omar Sandoval --- libdrgn/arch_x86_64.c | 67 ++++++++++++++++++++----------------------- libdrgn/platform.h | 12 ++++++++ libdrgn/stack_trace.c | 23 +++++++++++++-- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index fce948cd5..574dfe9ba 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -228,22 +228,20 @@ get_registers_from_frame_pointer(struct drgn_program *prog, return NULL; } -// Unwind from a call instruction, assuming that nothing else has been changed -// since. -static struct drgn_error *unwind_call(struct drgn_program *prog, - struct drgn_register_state *regs, - struct drgn_register_state **ret) + +static struct drgn_error * +fallback_unwind_x86_64(struct drgn_program *prog, + struct drgn_register_state *regs, + struct drgn_register_state **ret) { struct drgn_error *err; - struct optional_uint64 rsp = - drgn_register_state_get_u64(prog, regs, rsp); - if (!rsp.has_value) + struct optional_uint64 rbp = + drgn_register_state_get_u64(prog, regs, rbp); + if (!rbp.has_value) return &drgn_stop; - // Read the return address from the top of the stack. - uint64_t ret_addr; - err = drgn_program_read_u64(prog, rsp.value, false, &ret_addr); + err = get_registers_from_frame_pointer(prog, rbp.value, ret); if (err) { if (err->code == DRGN_ERROR_FAULT) { drgn_error_destroy(err); @@ -251,41 +249,26 @@ static struct drgn_error *unwind_call(struct drgn_program *prog, } return err; } - - // Most of the registers are unchanged. - struct drgn_register_state *tmp = drgn_register_state_dup(regs); - if (!tmp) - return &drgn_enomem; - - // The PC and rip are the return address we just read. - drgn_register_state_set_pc(prog, tmp, ret_addr); - drgn_register_state_set_from_u64(prog, tmp, rip, ret_addr); - // rsp is after the saved return address. - drgn_register_state_set_from_u64(prog, tmp, rsp, rsp.value + 8); - *ret = tmp; + drgn_register_state_set_cfa(prog, regs, rbp.value + 16); return NULL; } +// Unwind a single call instruction. static struct drgn_error * -fallback_unwind_x86_64(struct drgn_program *prog, +bad_call_unwind_x86_64(struct drgn_program *prog, struct drgn_register_state *regs, struct drgn_register_state **ret) { struct drgn_error *err; - // If the program counter is 0, it's likely that a NULL function pointer - // was called. Assume that the only thing we need to unwind is a single - // call instruction. - struct optional_uint64 pc = drgn_register_state_get_pc(regs); - if (pc.has_value && pc.value == 0) - return unwind_call(prog, regs, ret); - - struct optional_uint64 rbp = - drgn_register_state_get_u64(prog, regs, rbp); - if (!rbp.has_value) + struct optional_uint64 rsp = + drgn_register_state_get_u64(prog, regs, rsp); + if (!rsp.has_value) return &drgn_stop; - err = get_registers_from_frame_pointer(prog, rbp.value, ret); + // Read the return address from the top of the stack. + uint64_t ret_addr; + err = drgn_program_read_u64(prog, rsp.value, false, &ret_addr); if (err) { if (err->code == DRGN_ERROR_FAULT) { drgn_error_destroy(err); @@ -293,7 +276,18 @@ fallback_unwind_x86_64(struct drgn_program *prog, } return err; } - drgn_register_state_set_cfa(prog, regs, rbp.value + 16); + + // Most of the registers are unchanged. + struct drgn_register_state *tmp = drgn_register_state_dup(regs); + if (!tmp) + return &drgn_enomem; + + // The PC and rip are the return address we just read. + drgn_register_state_set_pc(prog, tmp, ret_addr); + drgn_register_state_set_from_u64(prog, tmp, rip, ret_addr); + // rsp is after the saved return address. + drgn_register_state_set_from_u64(prog, tmp, rsp, rsp.value + 8); + *ret = tmp; return NULL; } @@ -663,6 +657,7 @@ const struct drgn_architecture_info arch_info_x86_64 = { DRGN_ARCHITECTURE_REGISTERS, .default_dwarf_cfi_row = &default_dwarf_cfi_row_x86_64, .fallback_unwind = fallback_unwind_x86_64, + .bad_call_unwind = bad_call_unwind_x86_64, .pt_regs_get_initial_registers = pt_regs_get_initial_registers_x86_64, .prstatus_get_initial_registers = prstatus_get_initial_registers_x86_64, .linux_kernel_get_initial_registers = diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 18bc23cef..a3fc0dc16 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -194,6 +194,7 @@ typedef struct drgn_error * * - Define the following @ref drgn_architecture_info members: * - @ref default_dwarf_cfi_row (use @ref DRGN_CFI_ROW) * - @ref fallback_unwind + * - @ref bad_call_unwind * - @ref pt_regs_get_initial_registers * - @ref prstatus_get_initial_registers * - @ref linux_kernel_get_initial_registers @@ -343,6 +344,17 @@ struct drgn_architecture_info { struct drgn_error *(*fallback_unwind)(struct drgn_program *, struct drgn_register_state *, struct drgn_register_state **); + /** + * Try to unwind a stack frame assuming that a call was made to a bad + * program counter. + * + * This should typically undo the effects of a single call instruction + * and nothing more. If this has to read memory, translate @ref + * DRGN_ERROR_FAULT errors to &@ref drgn_stop. + */ + struct drgn_error *(*bad_call_unwind)(struct drgn_program *, + struct drgn_register_state *, + struct drgn_register_state **); /** * Create a @ref drgn_register_state from a Linux `struct pt_regs`. * diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 282910772..f4744b851 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1176,6 +1176,17 @@ drgn_unwind_with_cfi(struct drgn_program *prog, struct drgn_cfi_row **row, return NULL; } +static bool drgn_is_bad_call(const struct drgn_register_state *regs) +{ + // If the program counter is 0, it's likely that a NULL function pointer + // was called. Other than that, it's difficult to differentiate a bad + // program counter from a valid program counter that we don't know about + // (e.g., because it's JIT compiled). We can add heuristics in the + // future. + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + return pc.has_value && pc.value == 0; +} + static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, uint32_t tid, const struct drgn_object *obj, @@ -1227,8 +1238,16 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, err = drgn_unwind_with_cfi(prog, &row, regs, ®s); if (err == &drgn_not_found) { - err = prog->platform.arch->fallback_unwind(prog, regs, - ®s); + if (drgn_is_bad_call(regs) + && prog->platform.arch->bad_call_unwind) { + err = prog->platform.arch->bad_call_unwind(prog, + regs, + ®s); + } else { + err = prog->platform.arch->fallback_unwind(prog, + regs, + ®s); + } } if (err == &drgn_stop) break; From 58552211ce1ae3e69c8fae2d5b900f9dd6760991 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 13:10:53 -0800 Subject: [PATCH 022/166] libdrgn: register_state: add drgn_register_state_unset_has_register() This will mainly be useful for bad_unwind_call implementations that make use of drgn_register_state_dup() and need to mark some registers as unknown in the unwound frame. Signed-off-by: Omar Sandoval --- libdrgn/register_state.c | 14 ++++++++++++++ libdrgn/register_state.h | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index 281157b86..0c9d30e11 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -64,6 +64,13 @@ static void drgn_register_state_set_known(struct drgn_register_state *regs, bitset[i / CHAR_BIT] |= 1 << (i % CHAR_BIT); } +static void drgn_register_state_set_unknown(struct drgn_register_state *regs, + uint32_t i) +{ + unsigned char *bitset = drgn_register_state_known_bitset(regs); + bitset[i / CHAR_BIT] &= ~(1 << (i % CHAR_BIT)); +} + bool drgn_register_state_has_register(const struct drgn_register_state *regs, drgn_register_number regno) { @@ -89,6 +96,13 @@ drgn_register_state_set_has_register_range(struct drgn_register_state *regs, drgn_register_state_set_known(regs, regno + 2); } +void drgn_register_state_unset_has_register(struct drgn_register_state *regs, + drgn_register_number regno) +{ + if (regno < regs->num_regs) + drgn_register_state_set_unknown(regs, (uint32_t)regno + 2); +} + struct optional_uint64 drgn_register_state_get_pc(const struct drgn_register_state *regs) { diff --git a/libdrgn/register_state.h b/libdrgn/register_state.h index fbefbe952..f6f7fbf28 100644 --- a/libdrgn/register_state.h +++ b/libdrgn/register_state.h @@ -177,6 +177,14 @@ drgn_register_state_set_has_register_range(struct drgn_register_state *regs, drgn_register_number first_regno, drgn_register_number last_regno); +/** + * Mark a register as unknown in a @ref drgn_register_state. + * + * @param[in] regno Register number to mark as unknown. + */ +void drgn_register_state_unset_has_register(struct drgn_register_state *regs, + drgn_register_number regno); + /** A `uint64_t` which may or may not be present. */ struct optional_uint64 { uint64_t value; From 4a3ae326f4855462d38b0c30c1c1f77f7ce4342e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 13:14:44 -0800 Subject: [PATCH 023/166] libdrgn: aarch64: implement bad call unwinding Since AArch64 uses a link register rather than storing the return address on the stack, this is a bit easier than on x86-64. Fixes #462. Signed-off-by: Omar Sandoval --- libdrgn/arch_aarch64.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/libdrgn/arch_aarch64.c b/libdrgn/arch_aarch64.c index 1f050f08c..135d1eead 100644 --- a/libdrgn/arch_aarch64.c +++ b/libdrgn/arch_aarch64.c @@ -129,6 +129,31 @@ fallback_unwind_aarch64(struct drgn_program *prog, return NULL; } +// Unwind a single bl or blr instruction. +static struct drgn_error * +bad_call_unwind_aarch64(struct drgn_program *prog, + struct drgn_register_state *regs, + struct drgn_register_state **ret) +{ + struct optional_uint64 lr = + drgn_register_state_get_u64(prog, regs, x30); + if (!lr.has_value) + return &drgn_stop; + + struct drgn_register_state *tmp = drgn_register_state_dup(regs); + if (!tmp) + return &drgn_enomem; + + // lr contains the the old pc + 4. + drgn_register_state_set_pc(prog, tmp, lr.value - 4); + // We don't know the old lr. + drgn_register_state_unset_has_register(tmp, DRGN_REGISTER_NUMBER(x30)); + // The interrupted pc is no longer applicable. + drgn_register_state_unset_has_register(tmp, DRGN_REGISTER_NUMBER(pc)); + *ret = tmp; + return NULL; +} + // elf_gregset_t (in PRSTATUS) and struct user_pt_regs have the same layout. // This layout is a prefix of the in-kernel struct pt_regs (but we don't care // about any of the extra fields). @@ -491,6 +516,7 @@ const struct drgn_architecture_info arch_info_aarch64 = { .default_dwarf_cfi_row = &default_dwarf_cfi_row_aarch64, .demangle_cfi_registers = demangle_cfi_registers_aarch64, .fallback_unwind = fallback_unwind_aarch64, + .bad_call_unwind = bad_call_unwind_aarch64, .pt_regs_get_initial_registers = pt_regs_get_initial_registers_aarch64, .prstatus_get_initial_registers = prstatus_get_initial_registers_aarch64, .linux_kernel_get_initial_registers = From cc14bc1f927f27df680769bb6581348346fff5bb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 13:18:09 -0800 Subject: [PATCH 024/166] contrib/stack_trace_call_fault.py: support AArch64 This is a shorter-term solution for anyone who can't run a version of drgn with the previous fix. Signed-off-by: Omar Sandoval --- contrib/stack_trace_call_fault.py | 99 ++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/contrib/stack_trace_call_fault.py b/contrib/stack_trace_call_fault.py index c82190a89..530422302 100644 --- a/contrib/stack_trace_call_fault.py +++ b/contrib/stack_trace_call_fault.py @@ -3,44 +3,73 @@ # SPDX-License-Identifier: LGPL-2.1-or-later """ -Get a stack trace from a call to an invalid address on x86-64. drgn should do -this automatically eventually. +Get a stack trace from a call to an invalid address on x86-64 or AArch64. drgn +should do this automatically eventually. """ +import drgn from drgn import Object +from drgn.helpers.common.prog import takes_program_or_default -def pt_regs_members_from_stack_frame(frame): +def pt_regs_members_from_stack_frame(prog, frame): regs = frame.registers() - return { - "r15": regs.get("r15", 0), - "r14": regs.get("r14", 0), - "r13": regs.get("r13", 0), - "r12": regs.get("r12", 0), - "bp": regs.get("rbp", 0), - "bx": regs.get("rbx", 0), - "r11": regs.get("r11", 0), - "r10": regs.get("r10", 0), - "r9": regs.get("r9", 0), - "r8": regs.get("r8", 0), - "ax": regs.get("rax", 0), - "cx": regs.get("rcx", 0), - "dx": regs.get("rdx", 0), - "si": regs.get("rsi", 0), - "di": regs.get("rdi", 0), - "orig_ax": -1, - "ip": regs.get("rip", 0), - "cs": regs.get("cs", 0), - "flags": regs.get("rflags", 0), - "sp": regs.get("rsp", 0), - "ss": regs.get("ss", 0), - } - - -pt_regs_members = pt_regs_members_from_stack_frame( - prog.crashed_thread().stack_trace()[0] -) -pt_regs_members["ip"] = prog.read_word(pt_regs_members["sp"]) - 1 -pt_regs_members["sp"] += 8 -trace = prog.stack_trace(Object(prog, "struct pt_regs", pt_regs_members)) -print(trace) + if prog.platform.arch == drgn.Architecture.X86_64: + return { + "r15": regs.get("r15", 0), + "r14": regs.get("r14", 0), + "r13": regs.get("r13", 0), + "r12": regs.get("r12", 0), + "bp": regs.get("rbp", 0), + "bx": regs.get("rbx", 0), + "r11": regs.get("r11", 0), + "r10": regs.get("r10", 0), + "r9": regs.get("r9", 0), + "r8": regs.get("r8", 0), + "ax": regs.get("rax", 0), + "cx": regs.get("rcx", 0), + "dx": regs.get("rdx", 0), + "si": regs.get("rsi", 0), + "di": regs.get("rdi", 0), + "orig_ax": -1, + "ip": regs.get("rip", 0), + "cs": regs.get("cs", 0), + "flags": regs.get("rflags", 0), + "sp": regs.get("rsp", 0), + "ss": regs.get("ss", 0), + } + elif prog.platform.arch == drgn.Architecture.AARCH64: + try: + pc = frame.pc + except LookupError: + pc = 0 + return { + "regs": [regs.get(f"x{i}", 0) for i in range(31)], + "sp": regs.get("sp", 0), + "pc": pc, + "pstate": regs.get("pstate", 0), + } + else: + raise NotImplementedError() + + +@takes_program_or_default +def call_fault_stack_trace(prog): + pt_regs_members = pt_regs_members_from_stack_frame( + prog, prog.crashed_thread().stack_trace()[0] + ) + + if prog.platform.arch == drgn.Architecture.X86_64: + pt_regs_members["ip"] = prog.read_word(pt_regs_members["sp"]) - 1 + pt_regs_members["sp"] += 8 + elif prog.platform.arch == drgn.Architecture.AARCH64: + pt_regs_members["pc"] = pt_regs_members["regs"][30] - 4 + else: + raise NotImplementedError() + + return prog.stack_trace(Object(prog, "struct pt_regs", pt_regs_members)) + + +if __name__ == "__main__": + trace = call_fault_stack_trace(prog) + print(trace) From b1ce3f2d08fd11701011ff76d9881e5e343db2c5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 14:35:00 -0800 Subject: [PATCH 025/166] libdrgn: stack_trace: give hint when stack_trace(0) fails PID 0 is not unique in the Linux kernel; there is a task with PID 0 for each CPU. stack_trace(0) currently fails with a generic "task not found" error message, which can be confusing; see #462. Add a hint to use idle_task() to the error message when the given PID is 0. Signed-off-by: Omar Sandoval --- libdrgn/stack_trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index f4744b851..2a05da77f 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -770,8 +770,14 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, if (err) return err; if (!found) { - return drgn_error_create(DRGN_ERROR_LOOKUP, - "task not found"); + if (tid == 0) { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "task not found; " + "use stack_trace(idle_task(cpu)) for PID 0"); + } else { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "task not found"); + } } } From 64d82ddf6e1aa63cae7e4e395657100a9628e26c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Jan 2025 14:38:36 -0800 Subject: [PATCH 026/166] libdrgn/python: remove dead misspelled ifdef block Alec Rivers noticed in #461 that WITH_LIBKDUMPFILE is misspelled as WITH_KDUMPFILE here. The whole ifdef block isn't actually needed, so remove it. Fixes: 4e330bbb6e58 ("cli: indicate if drgn was compiled with libkdumpfile") Signed-off-by: Omar Sandoval --- libdrgn/python/main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index 34a0aa037..042981f68 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -2,9 +2,6 @@ // SPDX-License-Identifier: LGPL-2.1-or-later #include -#ifdef WITH_KDUMPFILE -#include -#endif #include "drgnpy.h" #include "../path.h" From 4738ddfa5c01dda14e42f6696a8294804faa32e4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 23 Jan 2025 13:51:43 -0800 Subject: [PATCH 027/166] libdrgn/python: fix segfault on del prog.language and more None of our setter functions handle deletions, which pass the value as NULL. This results in a segfault when attempting to access the value. Fix them all with a new convenience macro. Fixes: 50e4ac82452a ("libdrgn: allow overriding program default language") Fixes: 4e83130008e9 ("Introduce module and debug info finder APIs") Signed-off-by: Omar Sandoval --- libdrgn/python/drgnpy.h | 12 ++++++++++++ libdrgn/python/module.c | 3 +++ libdrgn/python/program.c | 2 ++ tests/test_module.py | 12 ++++++++++++ tests/test_program.py | 4 ++++ 5 files changed, 33 insertions(+) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 41a2ba6d3..c76db9c42 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -91,6 +91,18 @@ static inline PyObject *PyObject_CallOneArg(PyObject *callable, PyObject *arg) Py_RETURN_FALSE; \ } while (0) +/** + * Return from a PyGetSetDef setter with an error if attempting to delete the + * attribute. + */ +#define SETTER_NO_DELETE(name, value) do { \ + if (!(value)) { \ + PyErr_Format(PyExc_AttributeError, \ + "can't delete '%s' attribute", (name)); \ + return -1; \ + } \ +} while (0) + static inline void pydecrefp(void *p) { Py_XDECREF(*(PyObject **)p); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 06067f845..7e976a2eb 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -214,6 +214,7 @@ static PyObject *Module_get_address_range(Module *self, void *arg) static int Module_set_address_range(Module *self, PyObject *value, void *arg) { + SETTER_NO_DELETE("address_range", value); struct drgn_error *err; if (value == Py_None) { err = drgn_module_set_address_range(self->module, -1, -1); @@ -258,6 +259,7 @@ static PyObject *Module_get_build_id(Module *self, void *arg) static int Module_set_build_id(Module *self, PyObject *value, void *arg) { + SETTER_NO_DELETE("build_id", value); struct drgn_error *err; if (value == Py_None) { err = drgn_module_set_build_id(self->module, NULL, 0); @@ -300,6 +302,7 @@ static PyObject *Module_get_##which##_file_status(Module *self, void *arg) \ static int Module_set_##which##_file_status(Module *self, PyObject *value, \ void *arg) \ { \ + SETTER_NO_DELETE(#which, value); \ if (!PyObject_TypeCheck(value, \ (PyTypeObject *)ModuleFileStatus_class)) { \ PyErr_SetString(PyExc_TypeError, \ diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 90b972fe7..b863b6a0a 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1286,6 +1286,7 @@ static PyObject *Program_get_debug_info_path(Program *self, void *arg) static int Program_set_debug_info_path(Program *self, PyObject *value, void *arg) { + SETTER_NO_DELETE("debug_info_path", value); const char *path; if (value == Py_None) { path = NULL; @@ -1909,6 +1910,7 @@ static PyObject *Program_get_language(Program *self, void *arg) static int Program_set_language(Program *self, PyObject *value, void *arg) { + SETTER_NO_DELETE("language", value); if (!PyObject_TypeCheck(value, &Language_type)) { PyErr_SetString(PyExc_TypeError, "language must be Language"); return -1; diff --git a/tests/test_module.py b/tests/test_module.py index 2ff5c1c89..e22f1bd45 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -351,6 +351,11 @@ def test_address_range_invalid(self): with self.assertRaisesRegex(ValueError, "invalid module address range"): module.address_range = (2**64 - 1, 2**64 - 1) + def test_address_range_del(self): + module = Program().extra_module("/foo/bar", create=True)[0] + with self.assertRaises(AttributeError): + del module.address_range + def test_build_id(self): module = Program().extra_module("/foo/bar", create=True)[0] @@ -376,6 +381,11 @@ def test_build_id_invalid_empty(self): with self.assertRaisesRegex(ValueError, "build ID cannot be empty"): module.build_id = b"" + def test_build_id_del(self): + module = Program().extra_module("/foo/bar", create=True)[0] + with self.assertRaises(AttributeError): + del module.build_id + def test_find_by_address(self): prog = Program() module1 = prog.extra_module("/foo/bar", create=True)[0] @@ -456,6 +466,8 @@ def _test_file_status(self, which): setattr(module, status_attr, ModuleFileStatus.WANT) self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + self.assertRaises(AttributeError, delattr, module, status_attr) + def test_loaded_file_status(self): self._test_file_status("loaded") diff --git a/tests/test_program.py b/tests/test_program.py index b3b22483b..6b6c4240c 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -162,6 +162,10 @@ def test_language(self): TypeError, "language must be Language", setattr, prog, "language", "CPP" ) + def test_language_del(self): + with self.assertRaises(AttributeError): + del Program().language + class TestMemory(TestCase): def test_simple_read(self): From 191a4d5e329770a4e8764be421c2f6f03246c669 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 27 Jan 2025 13:55:40 -0800 Subject: [PATCH 028/166] libdrgn: add VECTOR and HASH_TABLE scope guards STRING_BUILDER has been really convenient, so let's do the same for vectors and hash tables. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 9 +++------ libdrgn/dwarf_info.c | 27 +++++++++------------------ libdrgn/handler.c | 3 +-- libdrgn/hash_table.h | 8 ++++++++ libdrgn/openmp.c | 2 +- libdrgn/orc_info.c | 3 +-- libdrgn/program.c | 2 +- libdrgn/vector.h | 8 ++++++++ 8 files changed, 32 insertions(+), 30 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 70b49cd4e..944b1df65 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2004,8 +2004,7 @@ drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, drgn_log_debug(prog, "%s: %m", path); return NULL; } - _cleanup_(drgn_map_files_segment_vector_deinit) - struct drgn_map_files_segment_vector segments = VECTOR_INIT; + VECTOR(drgn_map_files_segment_vector, segments); bool sorted = true; bool found = false; struct dirent *ent; @@ -4493,8 +4492,7 @@ process_get_mapped_files(struct process_loaded_module_iterator *it) bool logged_readlink_eperm = false, logged_stat_eperm = false; // While we're reading /proc/$pid/maps, we might as well cache the // segments for drgn_module_try_proc_files_for_shared_library(). - _cleanup_(drgn_map_files_segment_vector_deinit) - struct drgn_map_files_segment_vector map_files_segments = VECTOR_INIT; + VECTOR(drgn_map_files_segment_vector, map_files_segments); struct drgn_mapped_file_segments segments = DRGN_MAPPED_FILE_SEGMENTS_INIT; for (;;) { errno = 0; @@ -5098,8 +5096,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, err = drgn_loaded_module_iterator_create(prog, &it); if (err) return err; - _cleanup_(drgn_module_vector_deinit) - struct drgn_module_vector modules = VECTOR_INIT; + VECTOR(drgn_module_vector, modules); struct drgn_module *module; while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module) { // Reset DONT_WANT to WANT. diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 939a1b663..e8fd88c64 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1821,8 +1821,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_init_num_threads(); - _cleanup_(drgn_module_vector_deinit) - struct drgn_module_vector modules = VECTOR_INIT; + VECTOR(drgn_module_vector, modules); { struct drgn_module *module = dbinfo->modules_pending_indexing; do { @@ -2712,8 +2711,7 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, if (!dwarf) return drgn_error_libdw(); - _cleanup_(dwarf_die_vector_deinit) - struct dwarf_die_vector dies = VECTOR_INIT; + VECTOR(dwarf_die_vector, dies); Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); if (!cu_die) return &drgn_enomem; @@ -3941,8 +3939,7 @@ drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_elf_file *file, NULL, regs, expr, expr_size))) return err; - _cleanup_(uint64_vector_deinit) - struct uint64_vector stack = VECTOR_INIT; + VECTOR(uint64_vector, stack); for (;;) { err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); if (err) @@ -4778,8 +4775,7 @@ struct drgn_error *drgn_dwarf_scopes_names(Dwarf_Die *scopes, { struct drgn_error *err; Dwarf_Die die; - _cleanup_(const_char_p_vector_deinit) - struct const_char_p_vector vec = VECTOR_INIT; + VECTOR(const_char_p_vector, vec); for (size_t scope = 0; scope < num_scopes; scope++) { if (dwarf_child(&scopes[scope], &die) != 0) continue; @@ -5872,8 +5868,7 @@ drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, struct drgn_type **ret) { struct drgn_error *err; - _cleanup_(array_dimension_vector_deinit) - struct array_dimension_vector dimensions = VECTOR_INIT; + VECTOR(array_dimension_vector, dimensions); struct array_dimension *dimension; Dwarf_Die child; int r = dwarf_child(die, &child); @@ -6718,12 +6713,9 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, &file->module->dwarf.datarel_base); } - _cleanup_(drgn_dwarf_cie_vector_deinit) - struct drgn_dwarf_cie_vector cies = VECTOR_INIT; - _cleanup_(drgn_dwarf_fde_vector_deinit) - struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; - _cleanup_(drgn_dwarf_cie_map_deinit) - struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; + VECTOR(drgn_dwarf_cie_vector, cies); + VECTOR(drgn_dwarf_fde_vector, fdes); + HASH_TABLE(drgn_dwarf_cie_map, cie_map); struct drgn_elf_file_section_buffer buffer; err = drgn_elf_file_section_buffer_read(&buffer, file, scn); @@ -7354,8 +7346,7 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, void *buf, size_t size) { struct drgn_error *err; - _cleanup_(uint64_vector_deinit) struct uint64_vector stack = - VECTOR_INIT; + VECTOR(uint64_vector, stack); if (rule->push_cfa) { struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); diff --git a/libdrgn/handler.c b/libdrgn/handler.c index 10965ea02..8c3174a8c 100644 --- a/libdrgn/handler.c +++ b/libdrgn/handler.c @@ -69,8 +69,7 @@ struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, size_t count, const char *what) { // Put all of the handlers in a hash table of tagged pointers. - _cleanup_(drgn_handler_table_deinit) - struct drgn_handler_table table = HASH_TABLE_INIT; + HASH_TABLE(drgn_handler_table, table); drgn_handler_list_for_each_registered(handler, list) { uintptr_t entry = (uintptr_t)handler; if (drgn_handler_table_insert(&table, &entry, NULL) < 0) diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index a524551ca..ba5928ade 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -1679,6 +1679,14 @@ DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) */ #define HASH_TABLE_INIT { hash_table_empty_chunk } +/** + * Define and initialize an empty @ref hash_table of type @p table_type named @p + * table that is automatically deinitialized when it goes out of scope. + */ +#define HASH_TABLE(table_type, table) \ + __attribute__((__cleanup__(table_type##_deinit))) \ + struct table_type table = HASH_TABLE_INIT + /** * @defgroup HashTableHelpers Hash table helpers * diff --git a/libdrgn/openmp.c b/libdrgn/openmp.c index f73fa592e..80ddc700d 100644 --- a/libdrgn/openmp.c +++ b/libdrgn/openmp.c @@ -46,7 +46,7 @@ static int drgn_num_online_cpu_cores(void) + 1]; int num_cores = 0; - _cleanup_(int_set_deinit) struct int_set cpus_seen = HASH_TABLE_INIT; + HASH_TABLE(int_set, cpus_seen); _cleanup_fclose_ FILE *online = fopen("/sys/devices/system/cpu/online", "r"); if (!online) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index d30b4f6da..67d1d0f03 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -402,8 +402,7 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) } } - _cleanup_(uint64_range_vector_deinit) - struct uint64_range_vector preferred = VECTOR_INIT; + VECTOR(uint64_range_vector, preferred); err = remove_fdes_from_orc(module, indices, &preferred, &num_entries); if (err) diff --git a/libdrgn/program.c b/libdrgn/program.c index c06c55419..75c4d7c9f 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1857,7 +1857,7 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_c_string(struct drgn_program *prog, uint64_t address, bool physical, size_t max_size, char **ret) { - _cleanup_(char_vector_deinit) struct char_vector str = VECTOR_INIT; + VECTOR(char_vector, str); for (;;) { struct drgn_error *err = drgn_program_untagged_addr(prog, &address); if (err) diff --git a/libdrgn/vector.h b/libdrgn/vector.h index 8467cf633..93d039529 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -594,6 +594,14 @@ DEFINE_VECTOR_FUNCTIONS(vector) */ #define VECTOR_INIT { { 0 } } +/** + * Define and initialize an empty @ref vector of type @p vector_type named @p + * vector that is automatically deinitialized when it goes out of scope. + */ +#define VECTOR(vector_type, vector) \ + __attribute__((__cleanup__(vector_type##_deinit))) \ + struct vector_type vector = VECTOR_INIT + /** * Iterate over every entry in a @ref vector. * From 45c4a228ac3e3c58603f16f46939b4e4e727d903 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 28 Jan 2025 13:38:53 -0800 Subject: [PATCH 029/166] libdrgn: debug_info: match vmlinux by version if build ID is not available Since the module API was introduced, Program.load_debug_info() and the drgn CLI's -s option match strictly based on build IDs. This fails when the build ID is not available, specifically in the case of Linux kernel core dumps without a usable build ID in VMCOREINFO (old versions and a few buggy stable versions). Before the module API, Program.load_debug_info() and -s used any vmlinux file given to them. This caused confusion when the wrong file was given, so we don't want to bring that behavior back. Instead, let's look for a vmlinux file matching the Linux version from VMCOREINFO. Fixes #464. Fixes: 4e83130008e9 ("Introduce module and debug info finder APIs") Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 88 ++++++++++++++++++++++- libdrgn/elf_file.c | 100 ++++++++++++++++++++++++++ libdrgn/elf_file.h | 15 ++++ tests/linux_kernel/test_debug_info.py | 19 ++++- 4 files changed, 220 insertions(+), 2 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 944b1df65..f2942ce01 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -4968,6 +4968,80 @@ load_debug_info_try_provided_supplementary_files(struct drgn_module *module, DRGN_MODULE_FILE_WANT_SUPPLEMENTARY); } +static struct drgn_error * +load_debug_info_try_provided_vmlinux(struct drgn_module *module, + struct load_debug_info_state *state) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + bool logged_trying = false; + for (auto it = load_debug_info_provided_table_first(&state->provided); + it.entry; + it = load_debug_info_provided_table_next(it)) { + vector_for_each(load_debug_info_file_vector, file, + &it.entry->files) { + int r = elf_is_vmlinux(file->elf); + if (r < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + } + if (r <= 0) + continue; + + if (!logged_trying) { + drgn_module_try_files_log(module, + "(Linux version %s): trying provided files for", + prog->vmcoreinfo.osrelease); + logged_trying = true; + } + + const char *release; + ssize_t release_len = + elf_vmlinux_release(file->elf, &release); + if (release_len < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + continue; + } else if (release_len == 0) { + drgn_log_debug(prog, "%s: %s Linux version not found", + module->name, file->path); + continue; + } + + if (strlen(prog->vmcoreinfo.osrelease) == release_len + && memcmp(release, prog->vmcoreinfo.osrelease, + release_len) == 0) { + drgn_log_debug(prog, "%s: %s Linux version matches", + module->name, file->path); + } else { + drgn_log_debug(prog, + "%s: %s Linux version (%.*s) does not match", + module->name, file->path, + release_len > INT_MAX + ? INT_MAX : (int)release_len, + release); + continue; + } + + if (!it.entry->matched) { + state->unmatched_provided--; + it.entry->matched = true; + } + + err = drgn_module_try_file_internal(module, file->path, + file->fd, true, + NULL); + file->fd = -1; + if (err) + return err; + if (module->loaded_file_status != DRGN_MODULE_FILE_WANT + && module->debug_file_status != DRGN_MODULE_FILE_WANT) + break; + } + } + return NULL; +} + static struct drgn_error * load_debug_info_try_provided_files(struct drgn_module *module, struct load_debug_info_state *state) @@ -4981,7 +5055,7 @@ load_debug_info_try_provided_files(struct drgn_module *module, const void *build_id; size_t build_id_len; drgn_module_build_id(module, &build_id, &build_id_len); - if (build_id_len != 0) { + if (build_id_len > 0) { // Look up the provided file even if we don't need it so that it // counts as matched. struct load_debug_info_provided *provided = @@ -5006,6 +5080,18 @@ load_debug_info_try_provided_files(struct drgn_module *module, return err; } } + } else if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && drgn_module_kind(module) == DRGN_MODULE_MAIN) { + // Before Linux kernel commit 0935288c6e00 ("kdump: append + // kernel build-id string to VMCOREINFO") (in v5.9) and in a few + // broken stable versions (see + // ignore_broken_vmcoreinfo_build_id()), we can't get the + // vmlinux build ID from a kernel core dump. Fall back to + // checking every provided file for a vmlinux file with a + // matching version. + err = load_debug_info_try_provided_vmlinux(module, state); + if (err) + return err; } return NULL; } diff --git a/libdrgn/elf_file.c b/libdrgn/elf_file.c index 4b3af027d..cbc81a6e5 100644 --- a/libdrgn/elf_file.c +++ b/libdrgn/elf_file.c @@ -164,6 +164,7 @@ struct drgn_error *drgn_elf_file_create(struct drgn_module *module, // We consider a file to be vmlinux if it has an // .init.text section and is not relocatable // (which excludes kernel modules). + // Keep this in sync with elf_is_vmlinux(). file->is_vmlinux = ehdr->e_type != ET_REL; index = DRGN_SECTION_INDEX_NUM; } else { @@ -761,3 +762,102 @@ bool drgn_elf_file_address_range(struct drgn_elf_file *file, end_ret); } } + +// Keep this in sync with drgn_elf_file_create(). +int elf_is_vmlinux(Elf *elf) +{ + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return -1; + + if (ehdr->e_type == ET_REL) + return 0; + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return -1; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return -1; + + if (shdr->sh_type != SHT_PROGBITS) + continue; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return -1; + + if (strcmp(scnname, ".init.text") == 0) + return 1; + } + return 0; +} + +ssize_t elf_vmlinux_release(Elf *elf, const char **ret) +{ + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return -1; + + if (shdr->sh_type != SHT_SYMTAB || shdr->sh_entsize == 0) + continue; + + Elf_Data *data = elf_getdata(scn, NULL); + if (!data) + return -1; + + size_t num_syms = shdr->sh_size / shdr->sh_entsize; + for (size_t i = 0; i < num_syms; i++) { + GElf_Sym sym_mem, *sym = gelf_getsym(data, i, &sym_mem); + if (!sym) + return -1; + + static const char prefix[] = "Linux version "; + + if (GELF_ST_TYPE(sym->st_info) != STT_OBJECT + || GELF_ST_BIND(sym->st_info) != STB_GLOBAL + || sym->st_size < sizeof(prefix) - 1) + continue; + + const char *name = elf_strptr(elf, shdr->sh_link, + sym->st_name); + if (!name) + return -1; + if (strcmp(name, "linux_banner") != 0) + continue; + + GElf_Shdr sym_shdr_mem, *sym_shdr = + gelf_getshdr(elf_getscn(elf, sym->st_shndx), + &sym_shdr_mem); + if (!sym_shdr) + return -1; + + int64_t offset = sym_shdr->sh_offset + + sym->st_value - sym_shdr->sh_addr; + Elf_Data *banner_data = + elf_getdata_rawchunk(elf, offset, sym->st_size, + ELF_T_BYTE); + if (!banner_data) + return -1; + + if (memcmp(banner_data->d_buf, prefix, + sizeof(prefix) - 1) != 0) + return 0; + + const char *release = (const char *)banner_data->d_buf + + (sizeof(prefix) - 1); + const char *space = + memchr(release, ' ', + banner_data->d_size - (sizeof(prefix) - 1)); + if (!space) + return 0; + *ret = release; + return space - release; + } + } + return 0; +} diff --git a/libdrgn/elf_file.h b/libdrgn/elf_file.h index 386d6409c..0eea9fa7f 100644 --- a/libdrgn/elf_file.h +++ b/libdrgn/elf_file.h @@ -259,6 +259,21 @@ drgn_elf_file_section_buffer_read(struct drgn_elf_file_section_buffer *buffer, bool drgn_elf_file_address_range(struct drgn_elf_file *file, uint64_t *start_ret, uint64_t *end_ret); +/** + * Return whether an ELF file is a vmlinux file. + * + * @return > 0 if the file is vmlinux, 0 if it is not, < 0 on libelf error. + */ +int elf_is_vmlinux(Elf *elf); + +/** + * Get the Linux release from a vmlinux file. + * + * @param[out] ret Returned release. + * @return Length of @p ret on success, 0 if not found, < 0 on libelf error. + */ +ssize_t elf_vmlinux_release(Elf *elf, const char **ret); + /** @} */ #endif /* DRGN_ELF_FILE_H */ diff --git a/tests/linux_kernel/test_debug_info.py b/tests/linux_kernel/test_debug_info.py index f50afbdfb..db712e684 100644 --- a/tests/linux_kernel/test_debug_info.py +++ b/tests/linux_kernel/test_debug_info.py @@ -3,7 +3,7 @@ import os -from drgn import Program, RelocatableModule +from drgn import MainModule, Program, RelocatableModule from drgn.helpers.linux.module import find_module from tests import modifyenv from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod @@ -20,6 +20,23 @@ def iter_proc_modules(): yield tokens[0], int(tokens[5], 16) +class TestLoadDebugInfo(LinuxKernelTestCase): + def test_no_build_id(self): + prog = Program() + prog.set_kernel() + prog.set_enabled_debug_info_finders([]) + for module, _ in prog.loaded_modules(): + if isinstance(module, MainModule): + module.build_id = None + break + else: + self.fail("main module not found") + prog.load_debug_info([self.prog.main_module().debug_file_path]) + self.assertEqual( + prog.main_module().debug_file_path, self.prog.main_module().debug_file_path + ) + + class TestModule(LinuxKernelTestCase): def test_loaded_modules(self): expected = [("kernel", None), *iter_proc_modules()] From 61b76caf68e14e09a3c925a2d3ff09fa48bb4907 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 31 Jan 2025 00:02:16 -0800 Subject: [PATCH 030/166] libdrgn: hash_table: add hash_table_for_each() We do this so often I'm surprised I didn't add this sooner. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 39 +++++++++++++-------------------------- libdrgn/dwarf_info.c | 12 ++++-------- libdrgn/handler.c | 3 +-- libdrgn/hash_table.h | 11 +++++++++++ libdrgn/python/program.c | 16 ++++------------ libdrgn/type.c | 4 +--- 6 files changed, 34 insertions(+), 51 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index f2942ce01..23472a46a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -251,10 +251,8 @@ DEFINE_BINARY_SEARCH_TREE_FUNCTIONS(drgn_module_address_tree, node, static void drgn_module_free_section_addresses(struct drgn_module *module) { - for (auto it = - drgn_module_section_address_map_first(&module->section_addresses); - it.entry; - it = drgn_module_section_address_map_next(it)) + hash_table_for_each(drgn_module_section_address_map, it, + &module->section_addresses) free(it.entry->key); } @@ -4536,9 +4534,7 @@ process_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) { struct process_loaded_module_iterator *it = container_of(_it, struct process_loaded_module_iterator, u.it); - for (struct process_mapped_files_iterator files_it = - process_mapped_files_first(&it->files); - files_it.entry; files_it = process_mapped_files_next(files_it)) { + hash_table_for_each(process_mapped_files, files_it, &it->files) { free((char *)files_it.entry->file->path); drgn_mapped_file_destroy(files_it.entry->file); } @@ -4721,10 +4717,7 @@ core_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) { struct core_loaded_module_iterator *it = container_of(_it, struct core_loaded_module_iterator, u.it); - for (struct core_mapped_files_iterator files_it = - core_mapped_files_first(&it->files); - files_it.entry; - files_it = core_mapped_files_next(files_it)) + hash_table_for_each(core_mapped_files, files_it, &it->files) drgn_mapped_file_destroy(*files_it.entry); core_mapped_files_deinit(&it->files); userspace_loaded_module_iterator_deinit(&it->u); @@ -4893,10 +4886,8 @@ load_debug_info_add_provided_file(struct drgn_program *prog, static void load_debug_info_state_deinit(struct load_debug_info_state *state) { - for (struct load_debug_info_provided_table_iterator it = - load_debug_info_provided_table_first(&state->provided); - it.entry; - it = load_debug_info_provided_table_next(it)) { + hash_table_for_each(load_debug_info_provided_table, it, + &state->provided) { vector_for_each(load_debug_info_file_vector, file, &it.entry->files) { elf_end(file->elf); @@ -4975,9 +4966,8 @@ load_debug_info_try_provided_vmlinux(struct drgn_module *module, struct drgn_error *err; struct drgn_program *prog = module->prog; bool logged_trying = false; - for (auto it = load_debug_info_provided_table_first(&state->provided); - it.entry; - it = load_debug_info_provided_table_next(it)) { + hash_table_for_each(load_debug_info_provided_table, it, + &state->provided) { vector_for_each(load_debug_info_file_vector, file, &it.entry->files) { int r = elf_is_vmlinux(file->elf); @@ -5286,10 +5276,8 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, } if (state.unmatched_provided != 0) { - for (struct load_debug_info_provided_table_iterator pit = - load_debug_info_provided_table_first(&state.provided); - pit.entry; - pit = load_debug_info_provided_table_next(pit)) { + hash_table_for_each(load_debug_info_provided_table, pit, + &state.provided) { if (!pit.entry->matched) { vector_for_each(load_debug_info_file_vector, file, &pit.entry->files) { @@ -5416,8 +5404,8 @@ elf_symbols_search(const char *name, uint64_t addr, if (err) return err; } - for (auto it = drgn_module_table_first(&prog->dbinfo.modules); - it.entry; it = drgn_module_table_next(it)) { + hash_table_for_each(drgn_module_table, it, + &prog->dbinfo.modules) { err = drgn_module_elf_symbols_search(*it.entry, name, addr, flags, builder); @@ -5498,8 +5486,7 @@ void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) finder->ops.destroy(finder->arg); ); drgn_dwarf_info_deinit(dbinfo); - for (auto it = drgn_module_table_first(&dbinfo->modules); it.entry; - it = drgn_module_table_next(it)) + hash_table_for_each(drgn_module_table, it, &dbinfo->modules) drgn_module_destroy(*it.entry); drgn_module_table_deinit(&dbinfo->modules); } diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index e8fd88c64..871ba1100 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -165,13 +165,11 @@ drgn_namespace_dwarf_index_deinit(struct drgn_namespace_dwarf_index *dindex) { drgn_error_destroy(dindex->saved_err); array_for_each(tag_map, dindex->map) { - for (auto it = drgn_dwarf_index_die_map_first(tag_map); it.entry; - it = drgn_dwarf_index_die_map_next(it)) + hash_table_for_each(drgn_dwarf_index_die_map, it, tag_map) drgn_dwarf_index_die_vector_deinit(&it.entry->value); drgn_dwarf_index_die_map_deinit(tag_map); } - for (auto it = drgn_namespace_table_first(&dindex->children); it.entry; - it = drgn_namespace_table_next(it)) { + hash_table_for_each(drgn_namespace_table, it, &dindex->children) { drgn_namespace_dwarf_index_deinit(*it.entry); free(*it.entry); } @@ -1738,8 +1736,7 @@ drgn_dwarf_specification_map_merge(struct drgn_dwarf_specification_map *dst, struct drgn_error *err) { if (!err) { - for (auto it = drgn_dwarf_specification_map_first(src); - it.entry; it = drgn_dwarf_specification_map_next(it)) { + hash_table_for_each(drgn_dwarf_specification_map, it, src) { if (drgn_dwarf_specification_map_insert(dst, it.entry, NULL) < 0) { err = &drgn_enomem; @@ -1797,8 +1794,7 @@ drgn_dwarf_base_type_map_merge(struct drgn_dwarf_base_type_map *dst, struct drgn_error *err) { if (!err) { - for (auto it = drgn_dwarf_base_type_map_first(src); it.entry; - it = drgn_dwarf_base_type_map_next(it)) { + hash_table_for_each(drgn_dwarf_base_type_map, it, src) { if (drgn_dwarf_base_type_map_insert(dst, it.entry, NULL) < 0) { err = &drgn_enomem; diff --git a/libdrgn/handler.c b/libdrgn/handler.c index 8c3174a8c..a0ba9d1c4 100644 --- a/libdrgn/handler.c +++ b/libdrgn/handler.c @@ -105,8 +105,7 @@ struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, } // The remaining handlers in the hash table are disabled. Insert them. - for (auto it = drgn_handler_table_first(&table); it.entry; - it = drgn_handler_table_next(it)) { + hash_table_for_each(drgn_handler_table, it, &table) { struct drgn_handler *handler = (struct drgn_handler *)*it.entry; handler->enabled = false; *handlerp = handler; diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index ba5928ade..4b9ed36c1 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -1687,6 +1687,17 @@ DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) __attribute__((__cleanup__(table_type##_deinit))) \ struct table_type table = HASH_TABLE_INIT +/** + * Iterate over every entry in a @ref hash_table. + * + * @param[in] table_type Name of hash table type. + * @param[out] it Name of iterator variable. + * @param[in] table Hash table to iterate over. + */ +#define hash_table_for_each(table_type, it, table) \ + for (struct table_type##_iterator it = table_type##_first(table); \ + it.entry; it = table_type##_next(it)) + /** * @defgroup HashTableHelpers Hash table helpers * diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index b863b6a0a..c5a523079 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -170,9 +170,7 @@ static PyObject *LoggerCacheWrapper_clear(PyObject *self) if (!pyobjectp_set_empty(&programs)) { if (cache_logging_status()) return NULL; - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&programs); - it.entry; it = pyobjectp_set_next(it)) { + hash_table_for_each(pyobjectp_set, it, &programs) { Program *prog = (Program *)*it.entry; drgn_program_set_log_level(&prog->prog, cached_log_level); @@ -395,9 +393,7 @@ static void Program_dealloc(Program *self) { Program_deinit_logging(self); drgn_program_deinit(&self->prog); - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_DECREF(*it.entry); pyobjectp_set_deinit(&self->objects); Py_XDECREF(self->cache); @@ -406,9 +402,7 @@ static void Program_dealloc(Program *self) static int Program_traverse(Program *self, visitproc visit, void *arg) { - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_VISIT(*it.entry); Py_VISIT(self->cache); return 0; @@ -416,9 +410,7 @@ static int Program_traverse(Program *self, visitproc visit, void *arg) static int Program_clear(Program *self) { - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_DECREF(*it.entry); pyobjectp_set_deinit(&self->objects); pyobjectp_set_init(&self->objects); diff --git a/libdrgn/type.c b/libdrgn/type.c index c0806e7d0..41232cde5 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1353,9 +1353,7 @@ void drgn_program_deinit_types(struct drgn_program *prog) } drgn_typep_vector_deinit(&prog->created_types); - for (struct drgn_dedupe_type_set_iterator it = - drgn_dedupe_type_set_first(&prog->dedupe_types); - it.entry; it = drgn_dedupe_type_set_next(it)) + hash_table_for_each(drgn_dedupe_type_set, it, &prog->dedupe_types) free(*it.entry); drgn_dedupe_type_set_deinit(&prog->dedupe_types); From 09aa5804506db450a866df57ad1f07ea14a98783 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 23 Jan 2025 14:19:26 -0800 Subject: [PATCH 031/166] libdrgn/python: factor path sequence argument parsing out of Program_load_debug_info() This will be needed elsewhere shortly (with the small addition of optionally null-terminating the result list). Signed-off-by: Omar Sandoval --- libdrgn/python/drgnpy.h | 17 ++++++++ libdrgn/python/program.c | 67 +++----------------------------- libdrgn/python/util.c | 84 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 61 deletions(-) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index c76db9c42..0b0fae33d 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -19,6 +19,7 @@ #include "../pp.h" #include "../program.h" #include "../symbol.h" +#include "../vector.h" /* These were added in Python 3.7. */ #ifndef Py_UNREACHABLE @@ -437,6 +438,22 @@ void path_cleanup(struct path_arg *path); __attribute__((__cleanup__(path_cleanup))) \ struct path_arg name = { __VA_ARGS__ } +DEFINE_VECTOR_TYPE(path_arg_vector, struct path_arg); + +struct path_sequence_arg { + bool allow_none; + bool null_terminate; + struct path_arg_vector args; + const char **paths; +}; +int path_sequence_converter(PyObject *o, void *p); +void path_sequence_cleanup(struct path_sequence_arg *paths); +size_t path_sequence_size(struct path_sequence_arg *paths); + +#define PATH_SEQUENCE_ARG(name, ...) \ + __attribute__((__cleanup__(path_sequence_cleanup))) \ + struct path_sequence_arg name = { .args = VECTOR_INIT, __VA_ARGS__ } + struct enum_arg { PyObject *type; unsigned long value; diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index c5a523079..c27644430 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1301,75 +1301,20 @@ static int Program_set_debug_info_path(Program *self, PyObject *value, void *arg return 0; } -DEFINE_VECTOR(path_arg_vector, struct path_arg); - -static void path_arg_vector_cleanup(struct path_arg_vector *path_args) -{ - vector_for_each(path_arg_vector, path_arg, path_args) - path_cleanup(path_arg); - path_arg_vector_deinit(path_args); -} - static PyObject *Program_load_debug_info(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"paths", "default", "main", NULL}; struct drgn_error *err; - PyObject *paths_obj = Py_None; + PATH_SEQUENCE_ARG(paths, .allow_none = true); int load_default = 0; int load_main = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Opp:load_debug_info", - keywords, &paths_obj, &load_default, - &load_main)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&pp:load_debug_info", + keywords, path_sequence_converter, + &paths, &load_default, &load_main)) return NULL; - - _cleanup_(path_arg_vector_cleanup) - struct path_arg_vector path_args = VECTOR_INIT; - _cleanup_free_ const char **paths = NULL; - if (paths_obj != Py_None) { - _cleanup_pydecref_ PyObject *it = PyObject_GetIter(paths_obj); - if (!it) - return NULL; - - Py_ssize_t length_hint = PyObject_LengthHint(paths_obj, 1); - if (length_hint == -1) - return NULL; - if (!path_arg_vector_reserve(&path_args, length_hint)) { - PyErr_NoMemory(); - return NULL; - } - - for (;;) { - _cleanup_pydecref_ PyObject *item = PyIter_Next(it); - if (!item) - break; - - struct path_arg *path_arg = - path_arg_vector_append_entry(&path_args); - if (!path_arg) { - PyErr_NoMemory(); - return NULL; - } - memset(path_arg, 0, sizeof(*path_arg)); - if (!path_converter(item, path_arg)) { - path_arg_vector_pop(&path_args); - return NULL; - } - } - if (PyErr_Occurred()) - return NULL; - - paths = malloc_array(path_arg_vector_size(&path_args), - sizeof(*paths)); - if (!paths) { - PyErr_NoMemory(); - return NULL; - } - for (size_t i = 0; i < path_arg_vector_size(&path_args); i++) - paths[i] = path_arg_vector_at(&path_args, i)->path; - } - err = drgn_program_load_debug_info(&self->prog, paths, - path_arg_vector_size(&path_args), + err = drgn_program_load_debug_info(&self->prog, paths.paths, + path_sequence_size(&paths), load_default, load_main); if (err) { set_drgn_error(err); diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index c2270b02b..2096635a0 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -5,6 +5,7 @@ #include #include "drgnpy.h" +#include "../vector.h" int append_string(PyObject *parts, const char *s) { @@ -162,6 +163,89 @@ void path_cleanup(struct path_arg *path) Py_CLEAR(path->object); } +DEFINE_VECTOR_FUNCTIONS(path_arg_vector); + +int path_sequence_converter(PyObject *o, void *p) +{ + if (o == NULL) { + path_sequence_cleanup(p); + return 1; + } + + struct path_sequence_arg *paths = p; + + if (paths->allow_none && o == Py_None) + return 1; + + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(o); + if (!it) + return 0; + + Py_ssize_t length_hint = PyObject_LengthHint(o, 1); + if (length_hint == -1) + return 0; + if (!path_arg_vector_reserve(&paths->args, length_hint)) { + PyErr_NoMemory(); + return 0; + } + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + + struct path_arg *path_arg = + path_arg_vector_append_entry(&paths->args); + if (!path_arg) { + PyErr_NoMemory(); + return 0; + } + memset(path_arg, 0, sizeof(*path_arg)); + if (!path_converter(item, path_arg)) { + path_arg_vector_pop(&paths->args); + return 0; + } + } + if (PyErr_Occurred()) + return 0; + + size_t n = path_arg_vector_size(&paths->args); + if (paths->null_terminate) { + if (n == SIZE_MAX) { + PyErr_NoMemory(); + return 0; + } + n++; + } + paths->paths = malloc_array(n, sizeof(paths->paths[0])); + if (!paths->paths) { + PyErr_NoMemory(); + return 0; + } + + for (size_t i = 0; i < path_arg_vector_size(&paths->args); i++) + paths->paths[i] = path_arg_vector_at(&paths->args, i)->path; + if (paths->null_terminate) + paths->paths[path_arg_vector_size(&paths->args)] = NULL; + + return Py_CLEANUP_SUPPORTED; +} + +void path_sequence_cleanup(struct path_sequence_arg *paths) +{ + free(paths->paths); + paths->paths = NULL; + vector_for_each(path_arg_vector, path_arg, &paths->args) + path_cleanup(path_arg); + path_arg_vector_deinit(&paths->args); + path_arg_vector_init(&paths->args); +} + +size_t path_sequence_size(struct path_sequence_arg *paths) +{ + return path_arg_vector_size(&paths->args); +} + int enum_converter(PyObject *o, void *p) { struct enum_arg *arg = p; From bb00871601b3c5d7668788664a84938a91de6bac Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 23 Jan 2025 15:01:16 -0800 Subject: [PATCH 032/166] Replace debug_info_path with debug_info_options There are more options that we will want to control other than the debug directories, so create a more general object to represent them. libdrgn exposes it as an opaque pointer with getters and setters (creating minimal ABI burden), and the Python bindings expose it as a dataclass-esque type. struct drgn_program has a default options object, and an upcoming change will allow passing custom options to a single finder invocation. For now, there are three types of options: list of strings, bool, and enum. (The single debug_info_path colon-separated string is replaced by a list.) This uses a bunch of ugly macros to reduce boilerplate for adding options. This also adds a couple of command line options for controlling the debug directories to prepare for doing the same for other options. Signed-off-by: Omar Sandoval --- _drgn.pyi | 78 +++++--- docs/api_reference.rst | 2 + drgn/__init__.py | 2 + drgn/cli.py | 24 +++ libdrgn/Makefile.am | 3 + libdrgn/debug_info.c | 159 ++++++++-------- libdrgn/debug_info.h | 16 +- libdrgn/debug_info_options.c | 272 ++++++++++++++++++++++++++++ libdrgn/debug_info_options.h | 28 +++ libdrgn/drgn.h | 49 ++++- libdrgn/linux_kernel.c | 42 ++--- libdrgn/linux_kernel.h | 4 +- libdrgn/python/debug_info_options.c | 229 +++++++++++++++++++++++ libdrgn/python/drgnpy.h | 9 + libdrgn/python/main.c | 1 + libdrgn/python/program.c | 38 ++-- tests/test_debug_info.py | 16 +- tests/test_debug_info_options.py | 56 ++++++ 18 files changed, 853 insertions(+), 175 deletions(-) create mode 100644 libdrgn/debug_info_options.c create mode 100644 libdrgn/debug_info_options.h create mode 100644 libdrgn/python/debug_info_options.c create mode 100644 tests/test_debug_info_options.py diff --git a/_drgn.pyi b/_drgn.pyi index 6e72c5681..c5a5758e1 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -968,34 +968,8 @@ class Program: Return the names of enabled debugging information finders, in order. """ ... - debug_info_path: Optional[str] - """ - Directories to search for debugging information files. - - The standard debugging information finder supports searching for files by - *build ID* (a unique byte string present in both the :ref:`loaded file - ` and the :ref:`debug file `) and by - *debug link* (a name and checksum in the loaded file that refers to the - debug file). - - This setting controls what directories the standard debugging information - finder searches. It is a sequence of paths separated by colons (``:``). - - Searches by build ID ignore relative paths. They check under each absolute - path for a file named ``.build-id/xx/yyyy`` (for loaded files) or - ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the - lowercase hexadecimal representation of the build ID. - - Searches by debug link check every path for a file with the name given by - the debug link. Relative paths are relative to the directory containing the - loaded file. An empty path means the directory containing the loaded file. - - The default is ``:.debug:/usr/lib/debug``, which should work out of the box - on most Linux distributions. - - If ``None``, then searches by build ID and debug link are disabled (unless - the debug link is an absolute path). - """ + debug_info_options: DebugInfoOptions + """Default options for debugging information searches.""" def load_debug_info( self, @@ -1403,6 +1377,54 @@ class FindObjectFlags(enum.Flag): ANY = ... "" +class DebugInfoOptions: + """ + Options for debugging information searches. + + All of these options can be reassigned. + """ + + def __init__( + self, + __options: Optional[DebugInfoOptions] = None, + *, + directories: Iterable[Path] = ..., + ) -> None: + """ + Create a ``DebugInfoOptions``. + + :param options: If given, create a copy of the given options. + Otherwise, use the default options. + + Any remaining arguments override the copied/default options. + """ + ... + directories: Tuple[str, ...] + """ + Directories to search for debugging information files. + + The standard debugging information finder supports searching for files by + *build ID* (a unique byte string present in both the :ref:`loaded file + ` and the :ref:`debug file `) and by + *debug link* (a name and checksum in the loaded file that refers to the + debug file). + + This option contains the directories that the standard debugging + information finder searches. + + Searches by build ID ignore relative paths. They check under each absolute + path for a file named ``.build-id/xx/yyyy`` (for loaded files) or + ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the + lowercase hexadecimal representation of the build ID. + + Searches by debug link check every path for a file with the name given by + the debug link. Relative paths are relative to the directory containing the + loaded file. An empty path means the directory containing the loaded file. + + The default is ``("", ".debug", "/usr/lib/debug")``, which should work out + of the box on most Linux distributions. + """ + def get_default_prog() -> Program: """ Get the default program for the current thread. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index eecd0138a..971e9568f 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -11,6 +11,8 @@ Programs .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags +.. drgndoc:: DebugInfoOptions + .. drgndoc:: Thread .. _api-filenames: diff --git a/drgn/__init__.py b/drgn/__init__.py index 981bef3e7..105423ae7 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -49,6 +49,7 @@ from _drgn import ( NULL, Architecture, + DebugInfoOptions, ExtraModule, FaultError, FindObjectFlags, @@ -115,6 +116,7 @@ __all__ = ( "Architecture", + "DebugInfoOptions", "ExtraModule", "FaultError", "FindObjectFlags", diff --git a/drgn/cli.py b/drgn/cli.py index 36d6d9f22..96f922cc4 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -209,6 +209,20 @@ def _main() -> None: "which is assumed not to correspond to a loaded executable, library, or module. " "This option may be given more than once", ) + symbol_group.add_argument( + "--debug-directory", + dest="debug_directories", + metavar="PATH", + type=str, + action="append", + help="search for debugging symbols by build ID and debug link in the given directory. " + "This option may be given more than once", + ) + symbol_group.add_argument( + "--no-default-debug-directories", + action="store_true", + help="don't search for debugging symbols by build ID and debug link in the standard locations", + ) advanced_group = parser.add_argument_group("advanced") advanced_group.add_argument( @@ -309,6 +323,16 @@ def _main() -> None: # E.g., "not an ELF core file" sys.exit(f"error: {e}") + if args.debug_directories is not None: + if args.no_default_debug_directories: + prog.debug_info_options.directories = args.debug_directories + else: + prog.debug_info_options.directories = ( + tuple(args.debug_directories) + prog.debug_info_options.directories + ) + elif args.no_default_debug_directories: + prog.debug_info_options.directories = () + if args.default_symbols is None: args.default_symbols = {"default": True, "main": True} try: diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 6414d95ab..1ade9578d 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -56,6 +56,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ crc32.h \ debug_info.c \ debug_info.h \ + debug_info_options.c \ + debug_info_options.h \ drgn_internal.h \ drgn_section_name_to_index.inc \ dwarf_constants.c \ @@ -178,6 +180,7 @@ noinst_LTLIBRARIES += _drgn.la endif _drgn_la_SOURCES = python/constants.c \ + python/debug_info_options.c \ python/docstrings.c \ python/docstrings.h \ python/drgnpy.h \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 23472a46a..845e769f0 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1058,30 +1058,10 @@ drgn_program_enabled_debug_info_finders(struct drgn_program *prog, names_ret, count_ret); } -static const char *drgn_default_debug_info_path = ":.debug:/usr/lib/debug"; - -LIBDRGN_PUBLIC -const char *drgn_program_debug_info_path(struct drgn_program *prog) -{ - return prog->dbinfo.debug_info_path; -} - -LIBDRGN_PUBLIC -struct drgn_error *drgn_program_set_debug_info_path(struct drgn_program *prog, - const char *path) +LIBDRGN_PUBLIC struct drgn_debug_info_options * +drgn_program_debug_info_options(struct drgn_program *prog) { - char *new_path; - if (path) { - new_path = strdup(path); - if (!new_path) - return &drgn_enomem; - } else { - new_path = NULL; - } - if (prog->dbinfo.debug_info_path != drgn_default_debug_info_path) - free((char *)prog->dbinfo.debug_info_path); - prog->dbinfo.debug_info_path = new_path; - return NULL; + return &prog->dbinfo.options; } static struct drgn_error * @@ -1792,10 +1772,10 @@ drgn_module_try_supplementary_debug_file_log(struct drgn_module *module, } static struct drgn_error * -drgn_module_try_standard_supplementary_files(struct drgn_module *module) +drgn_module_try_standard_supplementary_files(struct drgn_module *module, + const struct drgn_debug_info_options *options) { struct drgn_error *err; - struct drgn_program *prog = module->prog; const char *debug_file_path; const char *debugaltlink_path; @@ -1848,15 +1828,13 @@ drgn_module_try_standard_supplementary_files(struct drgn_module *module) // outside of the debug directory. const char *dwz = strstr(debugaltlink_path, "/.dwz/"); if (dwz) { - const char *debug_dir; - size_t debug_dir_len; - drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { - if (debug_dir_len == 0 || debug_dir[0] != '/') + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (debug_dir[0] != '/') continue; sb.len = 0; - if (!string_builder_appendn(&sb, debug_dir, - debug_dir_len) + if (!string_builder_append(&sb, debug_dir) || !string_builder_append(&sb, dwz) || !string_builder_null_terminate(&sb)) return &drgn_enomem; @@ -1886,8 +1864,9 @@ drgn_module_wanted_supplementary_debug_file_is_new(struct drgn_module *module, } struct drgn_error * -drgn_module_try_standard_file(struct drgn_module *module, const char *path, - int fd, bool check_build_id, +drgn_module_try_standard_file(struct drgn_module *module, + const struct drgn_debug_info_options *options, + const char *path, int fd, bool check_build_id, const uint32_t *expected_crc) { struct drgn_error *err; @@ -1900,7 +1879,8 @@ drgn_module_try_standard_file(struct drgn_module *module, const char *path, // If the wanted supplementary debug file changed, try finding it again. if (drgn_module_wanted_supplementary_debug_file_is_new(module, orig_supplementary_file_generation)) { - err = drgn_module_try_standard_supplementary_files(module); + err = drgn_module_try_standard_supplementary_files(module, + options); if (err) return err; } @@ -1944,6 +1924,7 @@ drgn_debug_info_set_map_files_segments(struct drgn_debug_info *dbinfo, static struct drgn_error * drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, + const struct drgn_debug_info_options *options, bool *tried) { struct drgn_error *err; @@ -1976,8 +1957,9 @@ drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, int fd = open(path, O_RDONLY); if (fd >= 0) { *tried = true; - return drgn_module_try_standard_file(module, path, fd, - false, NULL); + return drgn_module_try_standard_file(module, options, + path, fd, false, + NULL); } else { // We found a match in the cache, but we couldn't open // it. If it doesn't exist anymore, then we need to @@ -2033,6 +2015,7 @@ drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, if (fd >= 0) { *tried = true; err = drgn_module_try_standard_file(module, + options, path, fd, false, NULL); @@ -2059,6 +2042,7 @@ drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, } static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, bool *tried) { struct drgn_program *prog = module->prog; @@ -2077,10 +2061,11 @@ static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, return NULL; } *tried = true; - return drgn_module_try_standard_file(module, path, fd, false, - NULL); + return drgn_module_try_standard_file(module, options, path, fd, + false, NULL); } else if (module->kind == DRGN_MODULE_SHARED_LIBRARY) { return drgn_module_try_proc_files_for_shared_library(module, + options, tried); } else { return NULL; @@ -2088,7 +2073,8 @@ static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, } static struct drgn_error * -drgn_module_try_files_by_build_id(struct drgn_module *module) +drgn_module_try_files_by_build_id(struct drgn_module *module, + const struct drgn_debug_info_options *options) { struct drgn_error *err; @@ -2100,12 +2086,11 @@ drgn_module_try_files_by_build_id(struct drgn_module *module) return NULL; STRING_BUILDER(sb); - const char *debug_dir; - size_t debug_dir_len; - drgn_program_for_each_debug_dir(module->prog, debug_dir, debug_dir_len) { - if (debug_dir_len == 0 || debug_dir[0] != '/') + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (debug_dir[0] != '/') continue; - if (!string_builder_appendn(&sb, debug_dir, debug_dir_len) + if (!string_builder_append(&sb, debug_dir) || !string_builder_appendf(&sb, "/.build-id/%c%c/%s.debug", build_id_str[0], build_id_str[1], &build_id_str[2]) @@ -2114,16 +2099,18 @@ drgn_module_try_files_by_build_id(struct drgn_module *module) // We trust the build ID encoded in the path and don't check it // again. if (module->debug_file_status == DRGN_MODULE_FILE_WANT) { - err = drgn_module_try_standard_file(module, sb.str, -1, - false, NULL); + err = drgn_module_try_standard_file(module, options, + sb.str, -1, false, + NULL); if (err || !drgn_module_wants_file(module)) return err; } if (module->loaded_file_status == DRGN_MODULE_FILE_WANT) { // Remove the ".debug" extension. sb.str[sb.len - sizeof(".debug") + 1] = '\0'; - err = drgn_module_try_standard_file(module, sb.str, -1, - false, NULL); + err = drgn_module_try_standard_file(module, options, + sb.str, -1, false, + NULL); if (err || !drgn_module_wants_file(module)) return err; } @@ -2133,7 +2120,8 @@ drgn_module_try_files_by_build_id(struct drgn_module *module) } static struct drgn_error * -drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module) +drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module, + const struct drgn_debug_info_options *options) { struct drgn_error *err; struct drgn_program *prog = module->prog; @@ -2181,42 +2169,40 @@ drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module) STRING_BUILDER(sb); if (debuglink[0] == '/') { // debuglink is absolute. Try it directly. - err = drgn_module_try_standard_file(module, debuglink, -1, - false, &crc); + err = drgn_module_try_standard_file(module, options, debuglink, + -1, false, &crc); if (err || !drgn_module_wants_file(module)) return err; } else if (file->path[0] && debuglink[0]) { // debuglink is relative. Try it in the debug directories. const char *slash = strrchr(file->path, '/'); size_t dirslash_len = slash ? slash - file->path + 1 : 0; - const char *debug_dir; - size_t debug_dir_len; - drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; // If debug_dir is empty, then try: // $(dirname $path)/$debuglink // If debug_dir is relative, then try: // $(dirname $path)/$debug_dir/$debuglink // If debug_dir is absolute, then try: // $debug_dir/$(dirname $path)/$debuglink - if (debug_dir_len > 0 && debug_dir[0] == '/') { + if (debug_dir[0] == '/') { if (file->path[0] != '/') continue; - if (!string_builder_appendn(&sb, debug_dir, - debug_dir_len)) + if (!string_builder_append(&sb, debug_dir)) return &drgn_enomem; } if (!string_builder_appendn(&sb, file->path, dirslash_len) - || (debug_dir_len > 0 && debug_dir[0] != '/' - && (!string_builder_appendn(&sb, debug_dir, - debug_dir_len) + || (debug_dir[0] && debug_dir[0] != '/' + && (!string_builder_append(&sb, debug_dir) || !string_builder_appendc(&sb, '/'))) || !string_builder_appendn(&sb, debuglink, debuglink_len) || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, sb.str, -1, - false, &crc); + err = drgn_module_try_standard_file(module, options, + sb.str, -1, false, + &crc); if (err || !drgn_module_wants_file(module)) return err; sb.len = 0; @@ -2227,21 +2213,16 @@ drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module) static struct drgn_error * drgn_module_try_standard_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, struct drgn_module_standard_files_state *state) { struct drgn_error *err; struct drgn_program *prog = module->prog; - if (prog->dbinfo.debug_info_path) { - drgn_module_try_files_log(module, - "trying standard paths in \"%s\" for", - prog->dbinfo.debug_info_path); - } else { - drgn_module_try_files_log(module, "trying standard paths for"); - } + drgn_module_try_files_log(module, "trying standard paths for"); // If we need a supplementary file, try that first. - err = drgn_module_try_standard_supplementary_files(module); + err = drgn_module_try_standard_supplementary_files(module, options); if (err || !drgn_module_wants_file(module)) return err; @@ -2286,7 +2267,8 @@ drgn_module_try_standard_files(struct drgn_module *module, if (err || !drgn_module_wants_file(module)) return err; } else if (drgn_program_is_userspace_process(prog)) { - err = drgn_module_try_proc_files(module, &tried_proc_symlink); + err = drgn_module_try_proc_files(module, options, + &tried_proc_symlink); if (err || !drgn_module_wants_file(module)) return err; } @@ -2297,7 +2279,7 @@ drgn_module_try_standard_files(struct drgn_module *module, // us from trying a file with the wrong build ID. const bool had_build_id = module->build_id_len > 0; if (had_build_id) { - err = drgn_module_try_files_by_build_id(module); + err = drgn_module_try_files_by_build_id(module, options); if (err || !drgn_module_wants_file(module)) return err; } @@ -2307,12 +2289,12 @@ drgn_module_try_standard_files(struct drgn_module *module, // paths. if (module->kind == DRGN_MODULE_MAIN && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { - err = drgn_module_try_vmlinux_files(module, state); + err = drgn_module_try_vmlinux_files(module, options); if (err || !drgn_module_wants_file(module)) return err; } else if (module->kind == DRGN_MODULE_RELOCATABLE && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { - err = drgn_module_try_linux_kmod_files(module, state); + err = drgn_module_try_linux_kmod_files(module, options, state); if (err || !drgn_module_wants_file(module)) return err; // Otherwise, if the module name looks like a path (i.e., it contains a @@ -2323,8 +2305,9 @@ drgn_module_try_standard_files(struct drgn_module *module, } else if (module->kind != DRGN_MODULE_VDSO && !tried_proc_symlink && strchr(module->name, '/')) { - err = drgn_module_try_standard_file(module, module->name, -1, - true, NULL); + err = drgn_module_try_standard_file(module, options, + module->name, -1, true, + NULL); if (err || !drgn_module_wants_file(module)) return err; } @@ -2333,14 +2316,14 @@ drgn_module_try_standard_files(struct drgn_module *module, // file and gotten a build ID from it. Try to find the debug file by // build ID now. if (!had_build_id) { - err = drgn_module_try_files_by_build_id(module); + err = drgn_module_try_files_by_build_id(module, options); if (err || !drgn_module_wants_file(module)) return err; } // We might have a loaded file with a .gnu_debuglink. Try to find the // corresponding debug file. - return drgn_module_try_files_by_gnu_debuglink(module); + return drgn_module_try_files_by_gnu_debuglink(module, options); } static void @@ -2354,11 +2337,24 @@ drgn_standard_module_file_find(struct drgn_module * const *modules, size_t num_modules, void *arg) { struct drgn_error *err; + struct drgn_debug_info_options *options = + &modules[0]->prog->dbinfo.options; + + if (drgn_log_is_enabled(modules[0]->prog, DRGN_LOG_DEBUG)) { + _cleanup_free_ char *options_str = + drgn_format_debug_info_options(options); + if (!options_str) + return &drgn_enomem; + drgn_log_debug(modules[0]->prog, + "trying standard debug info finder with %s", + options_str); + } _cleanup_(drgn_module_standard_files_state_deinit) struct drgn_module_standard_files_state state = {}; for (size_t i = 0; i < num_modules; i++) { - err = drgn_module_try_standard_files(modules[i], &state); + err = drgn_module_try_standard_files(modules[i], options, + &state); if (err) return err; } @@ -5452,7 +5448,7 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, "standard", &standard_debug_info_finder_ops, prog, 0); - dbinfo->debug_info_path = drgn_default_debug_info_path; + drgn_debug_info_options_init(&dbinfo->options); #if WITH_DEBUGINFOD dbinfo->debuginfod_client = NULL; if (drgn_have_debuginfod()) { @@ -5474,8 +5470,7 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) { free(dbinfo->map_files_segments); - if (dbinfo->debug_info_path != drgn_default_debug_info_path) - free((char *)dbinfo->debug_info_path); + drgn_debug_info_options_deinit(&dbinfo->options); #if WITH_DEBUGINFOD if (dbinfo->debuginfod_client) drgn_debuginfod_end(dbinfo->debuginfod_client); diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 2241ef3a8..556604d4b 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -20,6 +20,7 @@ #include "binary_search_tree.h" #include "cfi.h" +#include "debug_info_options.h" #include "drgn_internal.h" #include "dwarf_info.h" #include "elf_symtab.h" @@ -86,8 +87,7 @@ struct drgn_debug_info { struct drgn_handler_list debug_info_finders; struct drgn_debug_info_finder standard_debug_info_finder; - /** See @ref drgn_program_debug_info_path(). */ - const char *debug_info_path; + struct drgn_debug_info_options options; /** * Counter used to detect when loading debugging information is * attempted. @@ -292,17 +292,11 @@ struct drgn_module_standard_files_state { // Always takes ownership of fd. Attempts to resolve the real path of path. struct drgn_error * -drgn_module_try_standard_file(struct drgn_module *module, const char *path, - int fd, bool check_build_id, +drgn_module_try_standard_file(struct drgn_module *module, + const struct drgn_debug_info_options *options, + const char *path, int fd, bool check_build_id, const uint32_t *expected_crc); -#define drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) \ - for (debug_dir = (prog)->dbinfo.debug_info_path; \ - debug_dir \ - && (debug_dir_len = strchrnul(debug_dir, ':') - debug_dir, 1); \ - debug_dir = debug_dir[debug_dir_len] == '\0' \ - ? NULL : debug_dir + debug_dir_len + 1) - static inline bool drgn_module_wants_file(struct drgn_module *module) { return drgn_module_wants_loaded_file(module) diff --git a/libdrgn/debug_info_options.c b/libdrgn/debug_info_options.c new file mode 100644 index 000000000..2cdd12476 --- /dev/null +++ b/libdrgn/debug_info_options.c @@ -0,0 +1,272 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "cleanup.h" +#include "debug_info_options.h" +#include "string_builder.h" +#include "util.h" + +static const char * const drgn_debug_info_options_default_directories[] = { + "", ".debug", "/usr/lib/debug", NULL +}; + +void drgn_debug_info_options_init(struct drgn_debug_info_options *options) +{ +#define LIST_OPTION(name) \ + options->name = drgn_debug_info_options_default_##name; +#define BOOL_OPTION(name, default_value) options->name = default_value; +#define ENUM_OPTION(name, type, default_value) options->name = default_value; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +} + +static void drgn_debug_info_options_list_destroy(const char * const *list, + const char * const *default_list) +{ + if (list && list != default_list) { + for (size_t i = 0; list[i]; i++) + free((void *)list[i]); + free((void *)list); + } +} + +static void drgn_debug_info_options_listp_destroy(const char * const **listp) +{ + drgn_debug_info_options_list_destroy((const char * const *)*listp, + NULL); +} + +void drgn_debug_info_options_deinit(struct drgn_debug_info_options *options) +{ +#define LIST_OPTION(name) \ + drgn_debug_info_options_list_destroy(options->name, \ + drgn_debug_info_options_default_##name); +#define BOOL_OPTION(name, default_value) +#define ENUM_OPTION(name, type, default_value) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_debug_info_options_create(struct drgn_debug_info_options **ret) +{ + struct drgn_debug_info_options *options = malloc(sizeof(*options)); + if (!options) + return &drgn_enomem; + drgn_debug_info_options_init(options); + *ret = options; + return NULL; +} + +LIBDRGN_PUBLIC void +drgn_debug_info_options_destroy(struct drgn_debug_info_options *options) +{ + if (options) { + drgn_debug_info_options_deinit(options); + free(options); + } +} + +static const char * const * +drgn_debug_info_options_list_dup(const char * const *list) +{ + size_t n = 0; + while (list[n]) + n++; + char **copy = malloc_array(n + 1, sizeof(copy[0])); + if (!copy) + return NULL; + for (size_t i = 0; i < n; i++) { + copy[i] = strdup(list[i]); + if (!copy[i]) { + for (size_t j = 0; j < i; j++) + free(copy[j]); + free(copy); + return NULL; + } + } + copy[n] = NULL; + return (const char * const *)copy; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_debug_info_options_copy(struct drgn_debug_info_options *dst, + const struct drgn_debug_info_options *src) +{ + if (dst == src) + return NULL; + + // Since copying any list could fail, make all of the copies first. + // Replace the default lists with NULL for now to avoid unnecessary + // copies and simplify cleanup. +#define LIST_OPTION(name) \ + _cleanup_(drgn_debug_info_options_listp_destroy) \ + const char * const *name##_copy = NULL; \ + if (src->name != drgn_debug_info_options_default_##name) { \ + name##_copy = drgn_debug_info_options_list_dup(src->name); \ + if (!name##_copy) \ + return &drgn_enomem; \ + } +#define BOOL_OPTION(name, default_value) +#define ENUM_OPTION(name, type, default_value) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + + // Now we can set everything. +#define LIST_OPTION(name) \ + drgn_debug_info_options_list_destroy(dst->name, \ + drgn_debug_info_options_default_##name);\ + if (name##_copy) \ + dst->name = no_cleanup_ptr(name##_copy); \ + else \ + dst->name = drgn_debug_info_options_default_##name; +#define BOOL_OPTION(name, default_value) dst->name = src->name; +#define ENUM_OPTION(name, type, default_value) dst->name = src->name; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + return NULL; +} + +#define DRGN_DEBUG_INFO_OPTIONS_GET(type, name) \ +LIBDRGN_PUBLIC type \ +drgn_debug_info_options_get_##name(const struct drgn_debug_info_options *options)\ +{ \ + return options->name; \ +} + +#define DRGN_DEBUG_INFO_OPTIONS_GETSET(type, name) \ +DRGN_DEBUG_INFO_OPTIONS_GET(type, name) \ + \ +LIBDRGN_PUBLIC void \ +drgn_debug_info_options_set_##name(struct drgn_debug_info_options *options, \ + type value) \ +{ \ + options->name = value; \ +} + +#define LIST_OPTION(name) \ +DRGN_DEBUG_INFO_OPTIONS_GET(const char * const *, name) \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_debug_info_options_set_##name(struct drgn_debug_info_options *options, \ + const char * const *value) \ +{ \ + const char * const *copy; \ + if (value == drgn_debug_info_options_default_##name) { \ + copy = value; \ + } else { \ + copy = drgn_debug_info_options_list_dup(value); \ + if (!copy) \ + return &drgn_enomem; \ + } \ + drgn_debug_info_options_list_destroy(options->name, \ + drgn_debug_info_options_default_##name);\ + options->name = copy; \ + return NULL; \ +} + +#define BOOL_OPTION(name, default_value) \ + DRGN_DEBUG_INFO_OPTIONS_GETSET(bool, name) +#define ENUM_OPTION(name, type, default_value) \ + DRGN_DEBUG_INFO_OPTIONS_GETSET(enum type, name) + +DRGN_DEBUG_INFO_OPTIONS + +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + +static bool drgn_format_debug_info_options_common(struct string_builder *sb, + const char *name, + bool *first) +{ + if (*first) + *first = false; + else if (!string_builder_append(sb, ", ")) + return false; + return string_builder_append(sb, name) && string_builder_appendc(sb, '='); +} + +static bool drgn_debug_info_options_lists_equal(const char * const *a, + const char * const *b) +{ + if (a == b) + return true; + size_t i; + for (i = 0; a[i]; i++) { + if (!b[i] || strcmp(a[i], b[i]) != 0) + return false; + } + return !b[i]; +} + +static bool drgn_format_debug_info_options_list(struct string_builder *sb, + const char *name, bool *first, + const char * const *list, + const char * const *default_list) +{ + // Always include directories, skip other options set to the default. + if (default_list != drgn_debug_info_options_default_directories + && drgn_debug_info_options_lists_equal(list, default_list)) + return true; + + if (!drgn_format_debug_info_options_common(sb, name, first) + || !string_builder_appendc(sb, '(')) + return false; + size_t i; + for (i = 0; list[i]; i++) { + if (!string_builder_append(sb, i == 0 ? "'" : ", '") + || !string_builder_append(sb, list[i]) + || !string_builder_appendc(sb, '\'')) + return false; + } + return string_builder_append(sb, i == 1 ? ",)" : ")"); +} + +__attribute__((__unused__)) +static bool drgn_format_debug_info_options_bool(struct string_builder *sb, + const char *name, bool *first, + bool value, bool default_value) +{ + // Skip options set to the default. + if (value == default_value) + return true; + return drgn_format_debug_info_options_common(sb, name, first) + && string_builder_append(sb, value ? "True" : "False"); +} + +char *drgn_format_debug_info_options(struct drgn_debug_info_options *options) +{ + STRING_BUILDER(sb); + + bool first = true; +#define LIST_OPTION(name) \ + if (!drgn_format_debug_info_options_list(&sb, #name, &first, \ + options->name, \ + drgn_debug_info_options_default_##name))\ + return NULL; +#define BOOL_OPTION(name, default_value) \ + if (!drgn_format_debug_info_options_bool(&sb, #name, &first, \ + options->name, default_value)) \ + return NULL; +#define ENUM_OPTION(name, type, default_value) \ + if (!type##_format(&sb, #name, &first, options->name, default_value)) \ + return NULL; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + if (!string_builder_null_terminate(&sb)) + return NULL; + return string_builder_steal(&sb); +} diff --git a/libdrgn/debug_info_options.h b/libdrgn/debug_info_options.h new file mode 100644 index 000000000..39e6ceead --- /dev/null +++ b/libdrgn/debug_info_options.h @@ -0,0 +1,28 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef DRGN_DEBUG_INFO_OPTIONS_H +#define DRGN_DEBUG_INFO_OPTIONS_H + +#include "drgn_internal.h" + +// X macro expanding to all debug info options. +#define DRGN_DEBUG_INFO_OPTIONS \ + LIST_OPTION(directories) + +struct drgn_debug_info_options { +#define LIST_OPTION(name) const char * const *name; +#define BOOL_OPTION(name, default_value) bool name; +#define ENUM_OPTION(name, type, default_value) enum type name; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +}; + +void drgn_debug_info_options_init(struct drgn_debug_info_options *options); +void drgn_debug_info_options_deinit(struct drgn_debug_info_options *options); + +char *drgn_format_debug_info_options(struct drgn_debug_info_options *options); + +#endif /* DRGN_DEBUG_INFO_OPTIONS_H */ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index f94e609f8..3e0f8b6ce 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1650,12 +1650,51 @@ drgn_program_enabled_debug_info_finders(struct drgn_program *prog, const char ***names_ret, size_t *count_ret); -/** Colon-separated directories to search for debugging information files. */ -const char *drgn_program_debug_info_path(struct drgn_program *prog); +/** Options for debugging information searches. */ +struct drgn_debug_info_options; -/** Set the directories to search for debugging information files. */ -struct drgn_error *drgn_program_set_debug_info_path(struct drgn_program *prog, - const char *path); +/** Create a @ref drgn_debug_info_options with the default settings. */ +struct drgn_error * +drgn_debug_info_options_create(struct drgn_debug_info_options **ret); + +/** Destroy a @ref drgn_debug_info_options. */ +void +drgn_debug_info_options_destroy(struct drgn_debug_info_options *options); + +/** Set all options in @p dst to the same as @p src. */ +struct drgn_error * +drgn_debug_info_options_copy(struct drgn_debug_info_options *dst, + const struct drgn_debug_info_options *src); + +/** + * Get the list of directories to search for debugging information files. + * + * @return Null-terminated list of directories. Valid until @ref + * drgn_debug_info_options_set_directories() or @ref + * drgn_debug_info_options_destroy() is called on @p options. + */ +const char * const * +drgn_debug_info_options_get_directories(const struct drgn_debug_info_options *options); + +/** + * Set the list of directories to search for debugging information files. + * + * @param[in] value Null-terminated list of directories. It is copied, so it + * need not remain valid after this function returns. + */ +struct drgn_error * +drgn_debug_info_options_set_directories(struct drgn_debug_info_options *options, + const char * const *value) + __attribute__((__nonnull__(1, 2))); + +/** + * Get the default debugging information options for @p prog. + * + * @return Program options. May be modified as needed. Must not be passed to + * @ref drgn_debug_info_options_destroy(). + */ +struct drgn_debug_info_options * +drgn_program_debug_info_options(struct drgn_program *prog); /** * Try to use the given file for a module. diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 61d5e5e52..e5a6e9aa8 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -622,7 +622,7 @@ static struct drgn_error *depmod_index_find(struct depmod_index *depmod, struct drgn_error * drgn_module_try_vmlinux_files(struct drgn_module *module, - struct drgn_module_standard_files_state *state) + const struct drgn_debug_info_options *options) { struct drgn_error *err; struct drgn_program *prog = module->prog; @@ -639,19 +639,18 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, "/lib/modules/%s/vmlinux.debug", }; STRING_BUILDER(sb); - const char *debug_dir; - size_t debug_dir_len; - drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { - if (debug_dir_len == 0 || debug_dir[0] != '/') + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (debug_dir[0] != '/') continue; array_for_each(format, debug_dir_paths) { - if (!string_builder_appendn(&sb, debug_dir, - debug_dir_len) + if (!string_builder_append(&sb, debug_dir) || !string_builder_appendf(&sb, *format, osrelease) || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, sb.str, -1, - true, NULL); + err = drgn_module_try_standard_file(module, options, + sb.str, -1, true, + NULL); if (err || !drgn_module_wants_file(module)) return err; sb.len = 0; @@ -668,8 +667,8 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, if (!string_builder_appendf(&sb, *format, osrelease) || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, sb.str, -1, true, - NULL); + err = drgn_module_try_standard_file(module, options, sb.str, -1, + true, NULL); if (err || !drgn_module_wants_file(module)) return err; sb.len = 0; @@ -679,6 +678,7 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, struct drgn_error * drgn_module_try_linux_kmod_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, struct drgn_module_standard_files_state *state) { struct drgn_error *err; @@ -749,19 +749,18 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, const char *osrelease = prog->vmcoreinfo.osrelease; STRING_BUILDER(sb); - const char *debug_dir; - size_t debug_dir_len; - drgn_program_for_each_debug_dir(prog, debug_dir, debug_dir_len) { - if (debug_dir_len == 0 || debug_dir[0] != '/') + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (debug_dir[0] != '/') continue; // Debian, Ubuntu: // $debug_dir/lib/modules/$(uname -r)/$ko_name - if (!string_builder_appendn(&sb, debug_dir, debug_dir_len) + if (!string_builder_append(&sb, debug_dir) || !string_builder_appendn(&sb, depmod_path, ko_len) || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, sb.str, -1, true, - NULL); + err = drgn_module_try_standard_file(module, options, sb.str, -1, + true, NULL); if (err || !drgn_module_wants_file(module)) return err; @@ -770,8 +769,8 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, if (!string_builder_append(&sb, ".debug") || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, sb.str, -1, true, - NULL); + err = drgn_module_try_standard_file(module, options, sb.str, -1, + true, NULL); if (err || !drgn_module_wants_file(module)) return err; } @@ -781,7 +780,8 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, !string_builder_appendn(&sb, depmod_path, depmod_path_len) || !string_builder_null_terminate(&sb)) return &drgn_enomem; - return drgn_module_try_standard_file(module, sb.str, -1, true, NULL); + return drgn_module_try_standard_file(module, options, sb.str, -1, true, + NULL); } // This has a weird calling convention so that the caller can call diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 16e6f2866..1b89d0557 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -7,6 +7,7 @@ #include "drgn_internal.h" struct depmod_index; +struct drgn_debug_info_options; struct drgn_module_standard_files_state; struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog); @@ -32,10 +33,11 @@ linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, struct drgn_error * drgn_module_try_vmlinux_files(struct drgn_module *module, - struct drgn_module_standard_files_state *state); + const struct drgn_debug_info_options *options); struct drgn_error * drgn_module_try_linux_kmod_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, struct drgn_module_standard_files_state *state); #define KDUMP_SIGNATURE "KDUMP " diff --git a/libdrgn/python/debug_info_options.c b/libdrgn/python/debug_info_options.c new file mode 100644 index 000000000..2502bc34c --- /dev/null +++ b/libdrgn/python/debug_info_options.c @@ -0,0 +1,229 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" + +static PyObject *DebugInfoOptions_wrap_list(const char * const *list) +{ + if (!list) + Py_RETURN_NONE; + size_t n = 0; + while (list[n]) + n++; + _cleanup_pydecref_ PyObject *ret = PyTuple_New(n); + if (!ret) + return NULL; + for (size_t i = 0; i < n; i++) { + PyObject *item = PyUnicode_FromString(list[i]); + if (!item) + return NULL; + PyTuple_SET_ITEM(ret, i, item); + } + return_ptr(ret); +} + +#define DebugInfoOptions_SETTER(name) \ +static int DebugInfoOptions_set_##name(DebugInfoOptions *self, PyObject *value, \ + void *arg) \ +{ \ + SETTER_NO_DELETE(#name, value); \ + if (!DebugInfoOptions_##name##_converter(value, self->options)) \ + return -1; \ + return 0; \ +} + +#define LIST_OPTION(name) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + PATH_SEQUENCE_ARG(list, .null_terminate = true); \ + if (!path_sequence_converter(o, &list)) \ + return 0; \ + struct drgn_error *err = \ + drgn_debug_info_options_set_##name(p, list.paths); \ + if (err) { \ + set_drgn_error(err); \ + return 0; \ + } \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + const char * const *list = \ + drgn_debug_info_options_get_##name(self->options); \ + return DebugInfoOptions_wrap_list(list); \ +} \ +DebugInfoOptions_SETTER(name) + +#define BOOL_OPTION(name, default_value) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + int r = PyObject_IsTrue(o); \ + if (r < 0) \ + return 0; \ + drgn_debug_info_options_set_##name(p, r); \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + Py_RETURN_BOOL(drgn_debug_info_options_get_##name(self->options)); \ +} \ +DebugInfoOptions_SETTER(name) + +#define ENUM_OPTION(name, type, default_value) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + if (!PyObject_TypeCheck(o, (PyTypeObject *)type##_class)) { \ + PyErr_Format(PyExc_TypeError, "%s must be %s", #name, \ + ((PyTypeObject *)type##_class)->tp_name); \ + return 0; \ + } \ + _cleanup_pydecref_ PyObject *value_obj = \ + PyObject_GetAttrString(o, "value"); \ + if (!value_obj) \ + return 0; \ + long value = PyLong_AsLong(value_obj); \ + if (value == -1 && PyErr_Occurred()) \ + return 0; \ + drgn_debug_info_options_set_##name(p, value); \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + return PyObject_CallFunction(type##_class, "i", \ + drgn_debug_info_options_get_##name(self->options));\ +} \ +DebugInfoOptions_SETTER(name) + +DRGN_DEBUG_INFO_OPTIONS + +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + +static inline void +drgn_debug_info_options_destroyp(struct drgn_debug_info_options **optionsp) +{ + drgn_debug_info_options_destroy(*optionsp); +} + +static DebugInfoOptions *DebugInfoOptions_new(PyTypeObject *subtype, + PyObject *args, PyObject *kwds) +{ + struct drgn_error *err; + + _cleanup_(drgn_debug_info_options_destroyp) + struct drgn_debug_info_options *options = NULL; + err = drgn_debug_info_options_create(&options); + if (err) + return set_drgn_error(err); + + // Parse the positional options argument manually so that we can parse + // the keyword arguments directly into the struct + // drgn_debug_info_options. + if (PyTuple_GET_SIZE(args) > 0) { + PyObject *source = PyTuple_GET_ITEM(args, 0); + if (source != Py_None) { + if (!PyObject_TypeCheck(source, + &DebugInfoOptions_type)) { + PyErr_SetString(PyExc_TypeError, + "options must be DebugInfoOptions"); + return NULL; + } + err = drgn_debug_info_options_copy(options, + ((DebugInfoOptions *)source)->options); + if (err) { + set_drgn_error(err); + return NULL; + } + } + } + +#define BOOL_OPTION(name, default_value) LIST_OPTION(name) +#define ENUM_OPTION(name, type, default_value) LIST_OPTION(name) + static char *keywords[] = { + "", +#define LIST_OPTION(name) #name, + DRGN_DEBUG_INFO_OPTIONS +#undef LIST_OPTION + NULL, + }; + PyObject *unused; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "|O$" +#define LIST_OPTION(name) "O&" + DRGN_DEBUG_INFO_OPTIONS +#undef LIST_OPTION + ":DebugInfoOptions", keywords, &unused +#define LIST_OPTION(name) , DebugInfoOptions_##name##_converter, options + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + )) + return NULL; + + DebugInfoOptions *ret = + (DebugInfoOptions *)subtype->tp_alloc(subtype, 0); + if (ret) + ret->options = no_cleanup_ptr(options); + return ret; +} + +static void DebugInfoOptions_dealloc(DebugInfoOptions *self) +{ + if (self->prog) + Py_DECREF(self->prog); + else + drgn_debug_info_options_destroy(self->options); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyGetSetDef DebugInfoOptions_getset[] = { +#define LIST_OPTION(name) \ + {#name, (getter)DebugInfoOptions_get_##name, \ + (setter)DebugInfoOptions_set_##name, \ + drgn_DebugInfoOptions_##name##_DOC}, +#define BOOL_OPTION(name, default_value) LIST_OPTION(name) +#define ENUM_OPTION(name, type, default_value) LIST_OPTION(name) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + {}, +}; + +static PyObject *DebugInfoOptions_repr(PyObject *self) +{ + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + if (append_string(parts, "DebugInfoOptions(")) + return NULL; + bool first = true; + for (size_t i = 0; DebugInfoOptions_getset[i].name; i++) { + if (append_format(parts, "%s%s=", first ? "" : ", ", + DebugInfoOptions_getset[i].name) + || append_attr_repr(parts, self, + DebugInfoOptions_getset[i].name)) + return NULL; + first = false; + } + if (append_string(parts, ")")) + return NULL; + return join_strings(parts); +} + +PyTypeObject DebugInfoOptions_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.DebugInfoOptions", + .tp_dealloc = (destructor)DebugInfoOptions_dealloc, + .tp_basicsize = sizeof(DebugInfoOptions), + .tp_repr = DebugInfoOptions_repr, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_DebugInfoOptions_DOC, + .tp_getset = DebugInfoOptions_getset, + .tp_new = (newfunc)DebugInfoOptions_new, +}; diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 0b0fae33d..522a2aaca 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -197,6 +197,14 @@ typedef struct { struct pyobjectp_set objects; } Program; +typedef struct { + PyObject_HEAD + struct drgn_debug_info_options *options; + // If this is a Program's default debug info options, the Program. + // Otherwise, NULL. + Program *prog; +} DebugInfoOptions; + typedef struct { PyObject_HEAD struct drgn_thread thread; @@ -288,6 +296,7 @@ extern PyObject *SupplementaryFileKind_class; extern PyObject *SymbolBinding_class; extern PyObject *SymbolKind_class; extern PyObject *TypeKind_class; +extern PyTypeObject DebugInfoOptions_type; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; extern PyTypeObject ExtraModule_type; diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index 042981f68..e9304d82d 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -288,6 +288,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) }) if (add_module_constants(m) || + add_type(m, &DebugInfoOptions_type) || add_type(m, &Language_type) || add_languages() || add_type(m, &DrgnObject_type) || add_type(m, &Module_type) || diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index c27644430..c05fef57b 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1271,29 +1271,28 @@ static PyObject *Program_module(Program *self, PyObject *arg) return Module_wrap(module); } -static PyObject *Program_get_debug_info_path(Program *self, void *arg) +static DebugInfoOptions *Program_get_debug_info_options(Program *self, void *arg) { - return PyUnicode_FromString(drgn_program_debug_info_path(&self->prog)); + DebugInfoOptions *options = call_tp_alloc(DebugInfoOptions); + if (options) { + options->options = drgn_program_debug_info_options(&self->prog); + options->prog = self; + Py_INCREF(self); + } + return options; } -static int Program_set_debug_info_path(Program *self, PyObject *value, void *arg) +static int Program_set_debug_info_options(Program *self, PyObject *value, void *arg) { - SETTER_NO_DELETE("debug_info_path", value); - const char *path; - if (value == Py_None) { - path = NULL; - } else { - if (!PyUnicode_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "debug_info_path must be str or None"); - return -1; - } - path = PyUnicode_AsUTF8(value); - if (!path) - return -1; + SETTER_NO_DELETE("debug_info_options", value); + if (!PyObject_TypeCheck(value, &DebugInfoOptions_type)) { + PyErr_SetString(PyExc_TypeError, + "debug_info_options must be DebugInfoOptions"); + return -1; } struct drgn_error *err = - drgn_program_set_debug_info_path(&self->prog, path); + drgn_debug_info_options_copy(drgn_program_debug_info_options(&self->prog), + ((DebugInfoOptions *)value)->options); if (err) { set_drgn_error(err); return -1; @@ -1997,8 +1996,9 @@ static PyGetSetDef Program_getset[] = { drgn_Program_platform_DOC}, {"language", (getter)Program_get_language, (setter)Program_set_language, drgn_Program_language_DOC}, - {"debug_info_path", (getter)Program_get_debug_info_path, - (setter)Program_set_debug_info_path, drgn_Program_debug_info_path_DOC}, + {"debug_info_options", (getter)Program_get_debug_info_options, + (setter)Program_set_debug_info_options, + drgn_Program_debug_info_options_DOC}, {}, }; diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 8c6f8f731..cdb8fe731 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -576,7 +576,7 @@ def test_extra_module_address_range(self): class TestLinuxUserspaceCoreDump(TestCase): def setUp(self): self.prog = Program() - self.prog.debug_info_path = None + self.prog.debug_info_options.directories = () self.prog.set_enabled_debug_info_finders(["standard"]) def test_loaded_modules(self): @@ -1735,7 +1735,7 @@ def test_type_error(self): class TestStandardDebugInfoFinder(TestCase): def setUp(self): self.prog = Program() - self.prog.debug_info_path = None + self.prog.debug_info_options.directories = () self.prog.set_enabled_debug_info_finders(["standard"]) def test_by_module_name(self): @@ -1890,7 +1890,7 @@ def test_by_build_id(self): module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id - self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -1918,7 +1918,7 @@ def test_by_build_id_separate(self): module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id - self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -1947,7 +1947,7 @@ def test_by_build_id_from_loaded(self): module = self.prog.extra_module(bin_dir / "binary", create=True)[0] - self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -1975,7 +1975,7 @@ def test_by_gnu_debuglink(self): ) ) - self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) for i, debug_path in enumerate( ( bin_dir / "binary.debug", @@ -2062,7 +2062,7 @@ def test_by_gnu_debuglink_crc_mismatch(self): debug_path.write_bytes(debug_file_contents) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] - self.prog.debug_info_path = "" + self.prog.debug_info_options.directories = ("",) self.prog.load_module_debug_info(module) self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -2283,7 +2283,7 @@ def test_gnu_debugaltlink_debug_directories(self): alt_path.parent.mkdir() alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) - self.prog.debug_info_path = ":.debug:" + str(debug_dir) + self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) for i, debugaltlink in enumerate( ( bin_dir / "debug/.dwz/alt.debug", diff --git a/tests/test_debug_info_options.py b/tests/test_debug_info_options.py new file mode 100644 index 000000000..4a86c4458 --- /dev/null +++ b/tests/test_debug_info_options.py @@ -0,0 +1,56 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import DebugInfoOptions, Program +from tests import TestCase + + +class TestDebugInfoOptions(TestCase): + def test_list_default(self): + self.assertEqual( + DebugInfoOptions().directories, ("", ".debug", "/usr/lib/debug") + ) + + def test_list_init(self): + self.assertEqual( + DebugInfoOptions(directories=["foo", "bar"]).directories, ("foo", "bar") + ) + self.assertRaises(TypeError, DebugInfoOptions, directories=None) + + def test_list_copy(self): + self.assertEqual( + DebugInfoOptions(DebugInfoOptions(directories=["foo", "bar"])).directories, + ("foo", "bar"), + ) + + def test_list_set(self): + options = DebugInfoOptions() + options.directories = ("foo", "bar") + self.assertEqual(options.directories, ("foo", "bar")) + with self.assertRaises(TypeError): + DebugInfoOptions().directories = None + + def test_del(self): + with self.assertRaises(AttributeError): + del DebugInfoOptions().directories + + def test_repr(self): + self.assertIn("directories=()", repr(DebugInfoOptions(directories=()))) + + +class TestProgramDebugInfoOptions(TestCase): + def test_default(self): + self.assertEqual( + Program().debug_info_options.directories, DebugInfoOptions().directories + ) + + def test_assign(self): + prog = Program() + prog.debug_info_options.directories = ("foo", "bar") + prog.debug_info_options = DebugInfoOptions(directories=("bar", "baz")) + self.assertEqual(prog.debug_info_options.directories, ("bar", "baz")) + + def test_assign_list(self): + prog = Program() + prog.debug_info_options.directories = ("bar", "foo") + self.assertEqual(prog.debug_info_options.directories, ("bar", "foo")) From 2cd7bc69a5733c6f0f049a163e96226349b8eab8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 23 Jan 2025 22:53:27 -0800 Subject: [PATCH 033/166] Add Program.find_standard_debug_info() Many use cases for custom debugging information finders are just slight variations of the standard debugging information finder. Add a method exposing the standard debugging information finder and taking DebugInfoOptions for such finders to use. Signed-off-by: Omar Sandoval --- _drgn.pyi | 18 ++++++++++ docs/advanced_usage.rst | 5 +-- libdrgn/debug_info.c | 41 ++++++++++++++++++---- libdrgn/drgn.h | 11 ++++++ libdrgn/python/program.c | 73 ++++++++++++++++++++++++++++++++++++++-- tests/test_debug_info.py | 27 +++++++++++++++ 6 files changed, 164 insertions(+), 11 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index c5a5758e1..68f205919 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1028,6 +1028,24 @@ class Program: :attr:`Module.loaded_file_status` and :attr:`Module.debug_file_status`. """ ... + + def find_standard_debug_info( + self, modules: Iterable[Module], options: Optional[DebugInfoOptions] = None + ) -> None: + """ + Load debugging information for the given modules from the standard + locations. + + This is equivalent to the ``standard`` debugging information finder + that is registered by default. It is intended for use by other + debugging information finders that need a variation of the standard + finder (e.g., after installing something or setting specific options). + + :param modules: Modules to load debugging information for. + :param options: Options to use when searching for debugging + information. If ``None`` or not given, this uses + :attr:`self.debug_info_options `. + """ cache: Dict[Any, Any] """ Dictionary for caching program metadata. diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 725f06041..239ad4845 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -121,10 +121,11 @@ an example for getting debugging symbols on Fedora Linux using DNF: + sorted(packages) ) - # Leave the rest to the standard debug info finder. + # Now that it's installed, try the standard locations. + modules[0].prog.find_standard_debug_info(modules) - prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=0) + prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=-1) Currently, debug info finders must be configured explicitly by the user. In the future, there will be a plugin system for doing so automatically. diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 845e769f0..c5b5a7184 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2219,6 +2219,11 @@ drgn_module_try_standard_files(struct drgn_module *module, struct drgn_error *err; struct drgn_program *prog = module->prog; + // This can't happen when called from the standard debug info finder, + // but it can from drgn_find_standard_debug_info(). + if (!drgn_module_wants_file(module)) + return NULL; + drgn_module_try_files_log(module, "trying standard paths for"); // If we need a supplementary file, try that first. @@ -2333,12 +2338,11 @@ drgn_module_standard_files_state_deinit(struct drgn_module_standard_files_state } static struct drgn_error * -drgn_standard_module_file_find(struct drgn_module * const *modules, - size_t num_modules, void *arg) +drgn_standard_debug_info_find(struct drgn_module * const *modules, + size_t num_modules, void *arg) { struct drgn_error *err; - struct drgn_debug_info_options *options = - &modules[0]->prog->dbinfo.options; + struct drgn_debug_info_options *options = arg; if (drgn_log_is_enabled(modules[0]->prog, DRGN_LOG_DEBUG)) { _cleanup_free_ char *options_str = @@ -2346,7 +2350,9 @@ drgn_standard_module_file_find(struct drgn_module * const *modules, if (!options_str) return &drgn_enomem; drgn_log_debug(modules[0]->prog, - "trying standard debug info finder with %s", + "trying standard debug info finder with %s%s", + options == &modules[0]->prog->dbinfo.options + ? "" : "given ", options_str); } @@ -2361,6 +2367,27 @@ drgn_standard_module_file_find(struct drgn_module * const *modules, return NULL; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_find_standard_debug_info(struct drgn_module * const *modules, + size_t num_modules, + struct drgn_debug_info_options *options) +{ + if (num_modules == 0) + return NULL; + + struct drgn_program *prog = modules[0]->prog; + for (size_t i = 0; i < num_modules; i++) { + if (modules[i]->prog != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "modules are from different programs"); + } + } + + if (!options) + options = &modules[0]->prog->dbinfo.options; + return drgn_standard_debug_info_find(modules, num_modules, options); +} + #if WITH_DEBUGINFOD static int count_columns(const char *s, size_t n) { @@ -5441,13 +5468,13 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, prog, 0); const struct drgn_debug_info_finder_ops standard_debug_info_finder_ops = { - .find = drgn_standard_module_file_find, + .find = drgn_standard_debug_info_find, }; drgn_program_register_debug_info_finder_impl(prog, &dbinfo->standard_debug_info_finder, "standard", &standard_debug_info_finder_ops, - prog, 0); + &dbinfo->options, 0); drgn_debug_info_options_init(&dbinfo->options); #if WITH_DEBUGINFOD dbinfo->debuginfod_client = NULL; diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 3e0f8b6ce..69e307a7e 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1696,6 +1696,17 @@ drgn_debug_info_options_set_directories(struct drgn_debug_info_options *options, struct drgn_debug_info_options * drgn_program_debug_info_options(struct drgn_program *prog); +/** + * Load debugging information for the given modules from the standard locations. + * + * @param[in] options Options to use, or @p NULL to use the program's default + * options. + */ +struct drgn_error * +drgn_find_standard_debug_info(struct drgn_module * const *modules, + size_t num_modules, + struct drgn_debug_info_options *options); + /** * Try to use the given file for a module. * diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index c05fef57b..3af37ba7d 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1332,8 +1332,6 @@ static PyObject *Program_load_default_debug_info(Program *self) Py_RETURN_NONE; } -DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); - static PyObject *Program_load_module_debug_info(Program *self, PyObject *args) { size_t num_modules = PyTuple_GET_SIZE(args); @@ -1366,6 +1364,73 @@ static PyObject *Program_load_module_debug_info(Program *self, PyObject *args) Py_RETURN_NONE; } +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); + +static PyObject *Program_find_standard_debug_info(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"modules", "options", NULL}; + PyObject *modules_obj; + PyObject *options_obj = Py_None; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O|O:find_standard_debug_info", + keywords, &modules_obj, &options_obj)) + return NULL; + + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(modules_obj); + if (!it) + return NULL; + + Py_ssize_t length_hint = PyObject_LengthHint(modules_obj, 1); + if (length_hint == -1) + return 0; + + VECTOR(drgn_module_vector, modules); + if (!drgn_module_vector_reserve(&modules, length_hint)) + return PyErr_NoMemory(); + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + + if (!PyObject_TypeCheck(item, &Module_type)) { + return PyErr_Format(PyExc_TypeError, + "expected Module, not %s", + Py_TYPE(item)->tp_name); + } + struct drgn_module *module = ((Module *)item)->module; + if (module->prog != &self->prog) { + PyErr_SetString(PyExc_ValueError, + "module from wrong program"); + return NULL; + } + if (!drgn_module_vector_append(&modules, &module)) + return PyErr_NoMemory(); + } + if (PyErr_Occurred()) + return NULL; + + struct drgn_debug_info_options *options; + if (options_obj == Py_None) { + options = NULL; + } else if (PyObject_TypeCheck(options_obj, &DebugInfoOptions_type)) { + options = ((DebugInfoOptions *)options_obj)->options; + } else { + PyErr_SetString(PyExc_TypeError, + "options must be DebugInfoOptions or None"); + return NULL; + } + + err = drgn_find_standard_debug_info(drgn_module_vector_begin(&modules), + drgn_module_vector_size(&modules), + options); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + static PyObject *Program_read(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"address", "size", "physical", NULL}; @@ -1915,6 +1980,10 @@ static PyMethodDef Program_methods[] = { drgn_Program_load_default_debug_info_DOC}, {"load_module_debug_info", (PyCFunction)Program_load_module_debug_info, METH_VARARGS, drgn_Program_load_module_debug_info_DOC}, + {"find_standard_debug_info", + (PyCFunction)Program_find_standard_debug_info, + METH_VARARGS | METH_KEYWORDS, + drgn_Program_find_standard_debug_info_DOC}, {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, drgn_Program___getitem___DOC}, {"__contains__", (PyCFunction)Program_contains, METH_O | METH_COEXIST, diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index cdb8fe731..69373ed9a 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -19,6 +19,7 @@ from _drgn_util.elf import ET, PT, SHF, SHT from drgn import ( + DebugInfoOptions, MainModule, MissingDebugInfoError, ModuleFileStatus, @@ -1954,6 +1955,32 @@ def test_by_build_id_from_loaded(self): self.assertEqual(module.loaded_file_path, str(loadable_path)) self.assertEqual(module.debug_file_path, str(debug_path)) + def test_by_build_id_method(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + binary_path = build_id_dir / build_id.hex()[2:] + binary_path.write_bytes(compile_dwarf((), sections=(ALLOCATED_SECTION,))) + + module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module.build_id = build_id + + self.prog.find_standard_debug_info( + [module], + options=DebugInfoOptions(directories=("", ".debug", str(debug_dir))), + ) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + def test_by_gnu_debuglink(self): with tempfile.TemporaryDirectory( prefix="bin-" From 5e7bec4ef9016db6cbda49c826f89c34d1571994 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 24 Jan 2025 11:25:09 -0800 Subject: [PATCH 034/166] Add debug info options for disabling search methods The standard debug info finder tries a bunch of things, so for flexibility let's add debug info options and corresponding command line options controlling each of them. Signed-off-by: Omar Sandoval --- _drgn.pyi | 86 ++++++++++++++++++++++++++------ drgn/cli.py | 73 ++++++++++++++++++++++++++- libdrgn/debug_info.c | 27 ++++++++-- libdrgn/debug_info_options.c | 1 - libdrgn/debug_info_options.h | 9 +++- libdrgn/drgn.h | 67 +++++++++++++++++++++++++ tests/test_debug_info_options.py | 16 ++++++ 7 files changed, 258 insertions(+), 21 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 68f205919..592a56ab1 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1407,6 +1407,13 @@ class DebugInfoOptions: __options: Optional[DebugInfoOptions] = None, *, directories: Iterable[Path] = ..., + try_module_name: bool = ..., + try_build_id: bool = ..., + try_debug_link: bool = ..., + try_procfs: bool = ..., + try_embedded_vdso: bool = ..., + try_reuse: bool = ..., + try_supplementary: bool = ..., ) -> None: """ Create a ``DebugInfoOptions``. @@ -1421,26 +1428,77 @@ class DebugInfoOptions: """ Directories to search for debugging information files. - The standard debugging information finder supports searching for files by - *build ID* (a unique byte string present in both the :ref:`loaded file - ` and the :ref:`debug file `) and by - *debug link* (a name and checksum in the loaded file that refers to the - debug file). + Defaults to ``("", ".debug", "/usr/lib/debug")``, which should work out of + the box on most Linux distributions. - This option contains the directories that the standard debugging - information finder searches. + This controls searches by build ID (see :attr:`try_build_id`) and debug + link (see :attr:`try_debug_link`). + """ + try_module_name: bool + """ + If the name of a module resembles a filesystem path, try the file at that + path. + + Defaults to ``True``. + """ + try_build_id: bool + """ + Try finding files using build IDs. + + Defaults to ``True``. - Searches by build ID ignore relative paths. They check under each absolute - path for a file named ``.build-id/xx/yyyy`` (for loaded files) or + A *build ID* is a unique byte string present in a module's :ref:`loaded + file ` and :ref:`debug file `. If + configured correctly, it is also present in core dumps and provides a + reliable way to identify the correct files for a module. + + Searches by build ID check under each absolute path in :attr:`directories` + for a file named ``.build-id/xx/yyyy`` (for loaded files) or ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the lowercase hexadecimal representation of the build ID. + """ + try_debug_link: bool + """ + Try finding files using debug links. - Searches by debug link check every path for a file with the name given by - the debug link. Relative paths are relative to the directory containing the - loaded file. An empty path means the directory containing the loaded file. + Defaults to ``True``. + + A *debug link* is a pointer in a module's :ref:`loaded file + ` to its :ref:`debug file `. It + consists of a name and a checksum. + + Searches by debug link check every path in :attr:`directories` for a file + with a matching name and checksum. Relative paths in :attr:`directories` + are relative to the directory containing the loaded file. An empty path in + :attr:`directories` means the directory containing the loaded file. + """ + try_procfs: bool + """ + For local processes, try getting files via the ``proc`` filesystem (e.g., + :manpage:`proc_pid_exe(5)`, :manpage:`proc_pid_map_files(5)`). + + Defaults to ``True``. + """ + try_embedded_vdso: bool + """ + Try reading the vDSO embedded in a process's memory/core dump. + + Defaults to ``True``. + + The entire (stripped) vDSO is included in core dumps, so this is a reliable + way to get it. + """ + try_reuse: bool + """ + Try reusing a module's loaded file as its debug file and vice versa. + + Defaults to ``True``. + """ + try_supplementary: bool + """ + Try finding :ref:`supplementary files `. - The default is ``("", ".debug", "/usr/lib/debug")``, which should work out - of the box on most Linux distributions. + Defaults to ``True``. """ def get_default_prog() -> Program: diff --git a/drgn/cli.py b/drgn/cli.py index 96f922cc4..68bea6cfc 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -14,7 +14,7 @@ import runpy import shutil import sys -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, Optional, Tuple import drgn from drgn.internal.repl import interact, readline @@ -146,6 +146,55 @@ def _displayhook(value: Any) -> None: setattr(builtins, "_", value) +class _DebugInfoOptionAction(argparse.Action): + _choices: Dict[str, Tuple[str, Any]] + + @staticmethod + def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: + return { + option: ("try_" + option.replace("-", "_"), value) + for option in ( + "module-name", + "build-id", + "debug-link", + "procfs", + "embedded-vdso", + "reuse", + "supplementary", + ) + } + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Any, + option_string: Optional[str] = None, + ) -> None: + dest = getattr(namespace, self.dest, None) + if dest is None: + dest = {} + setattr(namespace, self.dest, dest) + + for option in values.split(","): + try: + name, value = self._choices[option] + except KeyError: + raise argparse.ArgumentError( + self, + f"invalid option: {option!r} (choose from {', '.join(self._choices)})", + ) + dest[name] = value + + +class _TryDebugInfoOptionAction(_DebugInfoOptionAction): + _choices = _DebugInfoOptionAction._bool_options(True) + + +class _NoDebugInfoOptionAction(_DebugInfoOptionAction): + _choices = _DebugInfoOptionAction._bool_options(False) + + def _main() -> None: handler = logging.StreamHandler() color = hasattr(sys.stderr, "fileno") and os.isatty(sys.stderr.fileno()) @@ -209,6 +258,24 @@ def _main() -> None: "which is assumed not to correspond to a loaded executable, library, or module. " "This option may be given more than once", ) + symbol_group.add_argument( + "--try-symbols-by", + dest="symbols_by", + metavar="METHOD[,METHOD...]", + action=_TryDebugInfoOptionAction, + help="enable loading debugging symbols using the given methods. " + "Choices are " + ", ".join(_TryDebugInfoOptionAction._choices) + ". " + "This option may be given more than once", + ) + symbol_group.add_argument( + "--no-symbols-by", + dest="symbols_by", + metavar="METHOD[,METHOD...]", + action=_NoDebugInfoOptionAction, + help="disable loading debugging symbols using the given methods. " + "Choices are " + ", ".join(_NoDebugInfoOptionAction._choices) + ". " + "This option may be given more than once", + ) symbol_group.add_argument( "--debug-directory", dest="debug_directories", @@ -323,6 +390,10 @@ def _main() -> None: # E.g., "not an ELF core file" sys.exit(f"error: {e}") + if args.symbols_by: + for option, value in args.symbols_by.items(): + setattr(prog.debug_info_options, option, value) + if args.debug_directories is not None: if args.no_default_debug_directories: prog.debug_info_options.directories = args.debug_directories diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index c5b5a7184..361dc71f3 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1678,11 +1678,15 @@ static const uint64_t MAX_MEMORY_READ_FOR_DEBUG_INFO = UINT64_C(1048576); }) static struct drgn_error * -drgn_module_try_vdso_in_core(struct drgn_module *module) +drgn_module_try_vdso_in_core(struct drgn_module *module, + const struct drgn_debug_info_options *options) { struct drgn_error *err; struct drgn_program *prog = module->prog; + if (!options->try_embedded_vdso) + return NULL; + // The Linux kernel has included the entire vDSO in core dumps since // Linux kernel commit f47aef55d9a1 ("[PATCH] i386 vDSO: use // VM_ALWAYSDUMP") (in v2.6.20). Try to read it from program memory. @@ -1777,6 +1781,9 @@ drgn_module_try_standard_supplementary_files(struct drgn_module *module, { struct drgn_error *err; + if (!options->try_supplementary) + return NULL; + const char *debug_file_path; const char *debugaltlink_path; if (drgn_module_wanted_supplementary_debug_file(module, @@ -2047,6 +2054,9 @@ static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, { struct drgn_program *prog = module->prog; + if (!options->try_procfs) + return NULL; + *tried = false; if (module->kind == DRGN_MODULE_MAIN) { #define FORMAT "/proc/%ld/exe" @@ -2078,6 +2088,9 @@ drgn_module_try_files_by_build_id(struct drgn_module *module, { struct drgn_error *err; + if (!options->try_build_id) + return NULL; + size_t build_id_len; const char *build_id_str = drgn_module_build_id(module, NULL, &build_id_len); @@ -2126,6 +2139,9 @@ drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module, struct drgn_error *err; struct drgn_program *prog = module->prog; + if (!options->try_debug_link) + return NULL; + struct drgn_elf_file *file = module->loaded_file; if (!file || !file->scns[DRGN_SCN_GNU_DEBUGLINK]) return NULL; @@ -2233,7 +2249,8 @@ drgn_module_try_standard_files(struct drgn_module *module, // If a previous attempt used a loadable file with debug info but didn't // want both, we might be able to reuse it. - if (module->loaded_file_status == DRGN_MODULE_FILE_WANT) { + if (options->try_reuse + && module->loaded_file_status == DRGN_MODULE_FILE_WANT) { struct drgn_elf_file *reuse_file = NULL; if (module->debug_file && module->debug_file->is_loadable) reuse_file = module->debug_file; @@ -2250,7 +2267,8 @@ drgn_module_try_standard_files(struct drgn_module *module, return err; } } - if (module->debug_file_status == DRGN_MODULE_FILE_WANT + if (options->try_reuse + && module->debug_file_status == DRGN_MODULE_FILE_WANT && module->loaded_file && drgn_elf_file_has_dwarf(module->loaded_file)) { drgn_log_debug(prog, @@ -2268,7 +2286,7 @@ drgn_module_try_standard_files(struct drgn_module *module, // symlink in /proc. bool tried_proc_symlink = false; if (module->kind == DRGN_MODULE_VDSO) { - err = drgn_module_try_vdso_in_core(module); + err = drgn_module_try_vdso_in_core(module, options); if (err || !drgn_module_wants_file(module)) return err; } else if (drgn_program_is_userspace_process(prog)) { @@ -2308,6 +2326,7 @@ drgn_module_try_standard_files(struct drgn_module *module, // /proc symlink, then we already tried the file that the path is // supposed to refer to, so don't try again. } else if (module->kind != DRGN_MODULE_VDSO + && options->try_module_name && !tried_proc_symlink && strchr(module->name, '/')) { err = drgn_module_try_standard_file(module, options, diff --git a/libdrgn/debug_info_options.c b/libdrgn/debug_info_options.c index 2cdd12476..93c03ab64 100644 --- a/libdrgn/debug_info_options.c +++ b/libdrgn/debug_info_options.c @@ -233,7 +233,6 @@ static bool drgn_format_debug_info_options_list(struct string_builder *sb, return string_builder_append(sb, i == 1 ? ",)" : ")"); } -__attribute__((__unused__)) static bool drgn_format_debug_info_options_bool(struct string_builder *sb, const char *name, bool *first, bool value, bool default_value) diff --git a/libdrgn/debug_info_options.h b/libdrgn/debug_info_options.h index 39e6ceead..9c74318c3 100644 --- a/libdrgn/debug_info_options.h +++ b/libdrgn/debug_info_options.h @@ -8,7 +8,14 @@ // X macro expanding to all debug info options. #define DRGN_DEBUG_INFO_OPTIONS \ - LIST_OPTION(directories) + LIST_OPTION(directories) \ + BOOL_OPTION(try_module_name, true) \ + BOOL_OPTION(try_build_id, true) \ + BOOL_OPTION(try_debug_link, true) \ + BOOL_OPTION(try_procfs, true) \ + BOOL_OPTION(try_embedded_vdso, true) \ + BOOL_OPTION(try_reuse, true) \ + BOOL_OPTION(try_supplementary, true) struct drgn_debug_info_options { #define LIST_OPTION(name) const char * const *name; diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 69e307a7e..a82df7298 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1687,6 +1687,73 @@ drgn_debug_info_options_set_directories(struct drgn_debug_info_options *options, const char * const *value) __attribute__((__nonnull__(1, 2))); +/** Get whether to try module names that look like filesystem paths. */ +bool +drgn_debug_info_options_get_try_module_name(const struct drgn_debug_info_options *options); + +/** Set whether to try module names that look like filesystem paths. */ +void +drgn_debug_info_options_set_try_module_name(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try files by build ID. */ +bool +drgn_debug_info_options_get_try_build_id(const struct drgn_debug_info_options *options); + +/** Set whether to try files by build ID. */ +void +drgn_debug_info_options_set_try_build_id(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try files by debug link. */ +bool +drgn_debug_info_options_get_try_debug_link(const struct drgn_debug_info_options *options); + +/** Set whether to try files by debug link. */ +void +drgn_debug_info_options_set_try_debug_link(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try files via procfs for local processes. */ +bool +drgn_debug_info_options_get_try_procfs(const struct drgn_debug_info_options *options); + +/** Set whether to try files via procfs for local processes. */ +void +drgn_debug_info_options_set_try_procfs(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try the vDSO embedded in a process's memory/core dump. */ +bool +drgn_debug_info_options_get_try_embedded_vdso(const struct drgn_debug_info_options *options); + +/** Set whether to try the vDSO embedded in a process's memory/core dump. */ +void +drgn_debug_info_options_set_try_embedded_vdso(struct drgn_debug_info_options *options, + bool value); + +/** + * Get whether to reuse a module's loaded file as its debug file or vice versa. + */ +bool +drgn_debug_info_options_get_try_reuse(const struct drgn_debug_info_options *options); + +/** + * Set whether to reuse a module's loaded file as its debug file or vice versa. + */ +void +drgn_debug_info_options_set_try_reuse(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try finding supplementary files. */ +bool +drgn_debug_info_options_get_try_supplementary(const struct drgn_debug_info_options *options); + +/** Set whether to try finding supplementary files. */ +void +drgn_debug_info_options_set_try_supplementary(struct drgn_debug_info_options *options, + bool value); + /** * Get the default debugging information options for @p prog. * diff --git a/tests/test_debug_info_options.py b/tests/test_debug_info_options.py index 4a86c4458..eb968beeb 100644 --- a/tests/test_debug_info_options.py +++ b/tests/test_debug_info_options.py @@ -30,6 +30,22 @@ def test_list_set(self): with self.assertRaises(TypeError): DebugInfoOptions().directories = None + def test_bool_default(self): + self.assertIs(DebugInfoOptions().try_build_id, True) + + def test_bool_init(self): + self.assertIs(DebugInfoOptions(try_build_id=False).try_build_id, False) + + def test_bool_copy(self): + self.assertIs( + DebugInfoOptions(DebugInfoOptions(try_build_id=False)).try_build_id, False + ) + + def test_bool_set(self): + options = DebugInfoOptions() + options.try_build_id = False + self.assertIs(options.try_build_id, False) + def test_del(self): with self.assertRaises(AttributeError): del DebugInfoOptions().directories From f9ab6785c4ed45155ef92819157817191ddc2069 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 24 Jan 2025 16:48:39 -0800 Subject: [PATCH 035/166] Add a debug info option for kernel directories We have a very old feature request to support searching for vmlinux and kernel modules in a directory specified by the user. This is useful, for example, if someone is given a vmcore and the corresponding kernel files. Now that we have the flexibility of the module and debug info options APIs, we can finally support it and add a command line option. Closes #17. Signed-off-by: Omar Sandoval --- _drgn.pyi | 14 +- drgn/cli.py | 25 +++ libdrgn/debug_info_options.c | 4 + libdrgn/debug_info_options.h | 3 +- libdrgn/drgn.h | 21 +++ libdrgn/linux_kernel.c | 306 +++++++++++++++++++++++------------ 6 files changed, 272 insertions(+), 101 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 592a56ab1..bb3ea2479 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1414,6 +1414,7 @@ class DebugInfoOptions: try_embedded_vdso: bool = ..., try_reuse: bool = ..., try_supplementary: bool = ..., + kernel_directories: Iterable[Path] = ..., ) -> None: """ Create a ``DebugInfoOptions``. @@ -1432,7 +1433,8 @@ class DebugInfoOptions: the box on most Linux distributions. This controls searches by build ID (see :attr:`try_build_id`) and debug - link (see :attr:`try_debug_link`). + link (see :attr:`try_debug_link`), and for kernel files (see + :attr:`kernel_directories`). """ try_module_name: bool """ @@ -1500,6 +1502,16 @@ class DebugInfoOptions: Defaults to ``True``. """ + kernel_directories: Tuple[str, ...] + """ + Directories to search for the kernel image and loadable kernel modules. + + Defaults to ``("",)``. + + An empty path means to check standard paths (e.g., + :file:`/boot/vmlinux-{release}`, :file:`/lib/modules/{release}`) absolutely + and under each absolute path in :attr:`directories`. + """ def get_default_prog() -> Program: """ diff --git a/drgn/cli.py b/drgn/cli.py index 68bea6cfc..c94b5fd9f 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -290,6 +290,20 @@ def _main() -> None: action="store_true", help="don't search for debugging symbols by build ID and debug link in the standard locations", ) + symbol_group.add_argument( + "--kernel-directory", + dest="kernel_directories", + metavar="PATH", + type=str, + action="append", + help="search for the kernel image and loadable kernel modules in the given directory. " + "This option may be given more than once", + ) + symbol_group.add_argument( + "--no-default-kernel-directories", + action="store_true", + help="don't search for the kernel image and loadable kernel modules in the standard locations", + ) advanced_group = parser.add_argument_group("advanced") advanced_group.add_argument( @@ -404,6 +418,17 @@ def _main() -> None: elif args.no_default_debug_directories: prog.debug_info_options.directories = () + if args.kernel_directories is not None: + if args.no_default_kernel_directories: + prog.debug_info_options.kernel_directories = args.kernel_directories + else: + prog.debug_info_options.kernel_directories = ( + tuple(args.kernel_directories) + + prog.debug_info_options.kernel_directories + ) + elif args.no_default_kernel_directories: + prog.debug_info_options.kernel_directories = () + if args.default_symbols is None: args.default_symbols = {"default": True, "main": True} try: diff --git a/libdrgn/debug_info_options.c b/libdrgn/debug_info_options.c index 93c03ab64..d937167ea 100644 --- a/libdrgn/debug_info_options.c +++ b/libdrgn/debug_info_options.c @@ -12,6 +12,10 @@ static const char * const drgn_debug_info_options_default_directories[] = { "", ".debug", "/usr/lib/debug", NULL }; +static const char * const drgn_debug_info_options_default_kernel_directories[] = { + "", NULL +}; + void drgn_debug_info_options_init(struct drgn_debug_info_options *options) { #define LIST_OPTION(name) \ diff --git a/libdrgn/debug_info_options.h b/libdrgn/debug_info_options.h index 9c74318c3..d26afa057 100644 --- a/libdrgn/debug_info_options.h +++ b/libdrgn/debug_info_options.h @@ -15,7 +15,8 @@ BOOL_OPTION(try_procfs, true) \ BOOL_OPTION(try_embedded_vdso, true) \ BOOL_OPTION(try_reuse, true) \ - BOOL_OPTION(try_supplementary, true) + BOOL_OPTION(try_supplementary, true) \ + LIST_OPTION(kernel_directories) struct drgn_debug_info_options { #define LIST_OPTION(name) const char * const *name; diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index a82df7298..22c986991 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1754,6 +1754,27 @@ void drgn_debug_info_options_set_try_supplementary(struct drgn_debug_info_options *options, bool value); +/** + * Get the list of directories to search for kernel debugging information files. + * + * @return Null-terminated list of directories. Valid until @ref + * drgn_debug_info_options_set_kernel_directories() or @ref + * drgn_debug_info_options_destroy() is called on @p options. + */ +const char * const * +drgn_debug_info_options_get_kernel_directories(const struct drgn_debug_info_options *options); + +/** + * Set the list of directories to search for kernel debugging information files. + * + * @param[in] value Null-terminated list of directories. It is copied, so it + * need not remain valid after this function returns. + */ +struct drgn_error * +drgn_debug_info_options_set_kernel_directories(struct drgn_debug_info_options *options, + const char * const *value) + __attribute__((__nonnull__(1, 2))); + /** * Get the default debugging information options for @p prog. * diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index e5a6e9aa8..fe0ad02d5 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -468,54 +468,32 @@ static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) return NULL; } -__attribute__((__format__(__printf__, 2, 3))) static struct drgn_error *depmod_index_init(struct depmod_index *depmod, - const char *path_format, - ...) + char *_path, int fd) { struct drgn_error *err; - - va_list ap; - va_start(ap, path_format); - int r = vasprintf(&depmod->path, path_format, ap); - va_end(ap); - if (r < 0) - return &drgn_enomem; - - int fd = open(depmod->path, O_RDONLY); - if (fd == -1) { - err = drgn_error_create_os("open", errno, depmod->path); - goto out_path; - } + _cleanup_free_ char *path = _path; // Take ownership of path. struct stat st; - if (fstat(fd, &st) == -1) { - err = drgn_error_create_os("fstat", errno, depmod->path); - goto out_fd; - } + if (fstat(fd, &st) == -1) + return drgn_error_create_os("fstat", errno, path); - if (st.st_size > SIZE_MAX) { - err = &drgn_enomem; - goto out_fd; - } + if (st.st_size > SIZE_MAX) + return &drgn_enomem; void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) { - err = drgn_error_create_os("mmap", errno, depmod->path); - goto out_fd; - } + if (addr == MAP_FAILED) + return drgn_error_create_os("mmap", errno, path); + depmod->path = no_cleanup_ptr(path); depmod->addr = addr; depmod->len = st.st_size; - err = depmod_index_validate(depmod); - if (err) + if (err) { depmod_index_deinit(depmod); -out_fd: - close(fd); -out_path: - if (err) - free(depmod->path); + depmod->path = NULL; + depmod->len = 0; + } return err; } @@ -620,14 +598,12 @@ static struct drgn_error *depmod_index_find(struct depmod_index *depmod, return NULL; } -struct drgn_error * -drgn_module_try_vmlinux_files(struct drgn_module *module, - const struct drgn_debug_info_options *options) +static struct drgn_error * +drgn_module_try_vmlinux_in_debug_directories(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct string_builder *sb) { struct drgn_error *err; - struct drgn_program *prog = module->prog; - const char *osrelease = prog->vmcoreinfo.osrelease; - // Paths relative to the debug directory where vmlinux might be // installed. static const char * const debug_dir_paths[] = { @@ -638,14 +614,58 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, // SUSE: "/lib/modules/%s/vmlinux.debug", }; - STRING_BUILDER(sb); for (size_t i = 0; options->directories[i]; i++) { const char *debug_dir = options->directories[i]; if (debug_dir[0] != '/') continue; + sb->len = 0; + if (!string_builder_append(sb, debug_dir)) + return &drgn_enomem; + size_t debug_dir_len = sb->len; array_for_each(format, debug_dir_paths) { - if (!string_builder_append(&sb, debug_dir) - || !string_builder_appendf(&sb, *format, osrelease) + sb->len = debug_dir_len; + if (!string_builder_appendf(sb, *format, + module->prog->vmcoreinfo.osrelease) + || !string_builder_null_terminate(sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, + sb->str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + } + return NULL; +} + +struct drgn_error * +drgn_module_try_vmlinux_files(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + const char *osrelease = prog->vmcoreinfo.osrelease; + STRING_BUILDER(sb); + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; + + if (kernel_dir[0]) { + sb.len = 0; + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try under the debug directories first. + err = drgn_module_try_vmlinux_in_debug_directories(module, + options, + &sb); + if (err || !drgn_module_wants_file(module)) + return err; + + // Try /boot/vmlinux-$osrelease. + sb.len = 0; + if (!string_builder_append(&sb, "/boot/vmlinux-") + || !string_builder_append(&sb, osrelease) || !string_builder_null_terminate(&sb)) return &drgn_enomem; err = drgn_module_try_standard_file(module, options, @@ -653,25 +673,128 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, NULL); if (err || !drgn_module_wants_file(module)) return err; + + // Try /lib/modules/$osrelease as the kernel directory. sb.len = 0; + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, osrelease)) + return &drgn_enomem; + } + + // Paths relative to the kernel directory where vmlinux might be + // installed. + static const char * const kernel_dir_paths[] = { + "/build/vmlinux", + "/vmlinux", + }; + size_t kernel_dir_len = sb.len; + array_for_each(path, kernel_dir_paths) { + if (!string_builder_append(&sb, *path) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, + sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = kernel_dir_len; } } - // Absolute paths where vmlinux might be installed. - static const char * const paths[] = { - "/boot/vmlinux-%s", - "/lib/modules/%s/build/vmlinux", - "/lib/modules/%s/vmlinux", - }; - array_for_each(format, paths) { - if (!string_builder_appendf(&sb, *format, osrelease) + return NULL; +} + +static struct drgn_error * +drgn_open_modules_dep(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct depmod_index *modules_dep) +{ + struct drgn_error *err; + + if (modules_dep->addr) + return NULL; + + STRING_BUILDER(sb); + _cleanup_close_ int fd = -1; + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; + + sb.len = 0; + if (kernel_dir[0]) { + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try /lib/modules/$osrelease. + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } + if (!string_builder_append(&sb, "/modules.dep.bin") || !string_builder_null_terminate(&sb)) return &drgn_enomem; - err = drgn_module_try_standard_file(module, options, sb.str, -1, - true, NULL); + fd = open(sb.str, O_RDONLY); + if (fd >= 0) + break; + drgn_log_debug(prog, "%s: %m", sb.str); + } + if (fd < 0) { + drgn_log_debug(prog, "couldn't find depmod index"); +fail: + // Set addr so that we don't try again. + modules_dep->addr = MAP_FAILED; + return NULL; + } + + err = depmod_index_init(modules_dep, string_builder_steal(&sb), fd); + if (err) { + if (drgn_error_is_fatal(err)) + return err; + drgn_error_log_warning(prog, err, + "couldn't open depmod index: "); + drgn_error_destroy(err); + goto fail; + } + drgn_log_debug(prog, "found depmod index %s", modules_dep->path); + return NULL; +} + +static struct drgn_error * +drgn_module_try_depmod_in_debug_directories(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct string_builder *sb, + const char *depmod_path, size_t ko_len) +{ + struct drgn_error *err; + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (debug_dir[0] != '/') + continue; + sb->len = 0; + // Debian, Ubuntu: + // $debug_dir/lib/modules/$(uname -r)/$ko_name + if (!string_builder_append(sb, debug_dir) + || !string_builder_append(sb, "/lib/modules/") + || !string_builder_append(sb, + module->prog->vmcoreinfo.osrelease) + || !string_builder_appendc(sb, '/') + || !string_builder_appendn(sb, depmod_path, ko_len) + || !string_builder_null_terminate(sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, sb->str, + -1, true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + + // Fedora, CentOS, SUSE: + // $debug_dir/lib/modules/$(uname -r)/$ko_name.debug + if (!string_builder_append(sb, ".debug") + || !string_builder_null_terminate(sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, sb->str, + -1, true, NULL); if (err || !drgn_module_wants_file(module)) return err; - sb.len = 0; } return NULL; } @@ -685,24 +808,9 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, struct drgn_program *prog = module->prog; struct depmod_index *modules_dep = &state->modules_dep; - if (!modules_dep->addr) { - err = depmod_index_init(modules_dep, - "/lib/modules/%s/modules.dep.bin", - prog->vmcoreinfo.osrelease); - if (err) { - if (drgn_error_is_fatal(err)) - return err; - drgn_error_log_debug(prog, err, - "couldn't open depmod index: "); - drgn_error_destroy(err); - modules_dep->path = NULL; - modules_dep->addr = MAP_FAILED; - modules_dep->len = 0; - } else { - drgn_log_debug(prog, "opened depmod index %s", - modules_dep->path); - } - } + err = drgn_open_modules_dep(prog, options, modules_dep); + if (err) + return err; if (modules_dep->len == 0) return NULL; @@ -747,26 +855,33 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, name_end = dot; } - const char *osrelease = prog->vmcoreinfo.osrelease; STRING_BUILDER(sb); - for (size_t i = 0; options->directories[i]; i++) { - const char *debug_dir = options->directories[i]; - if (debug_dir[0] != '/') - continue; - // Debian, Ubuntu: - // $debug_dir/lib/modules/$(uname -r)/$ko_name - if (!string_builder_append(&sb, debug_dir) - || !string_builder_appendn(&sb, depmod_path, ko_len) - || !string_builder_null_terminate(&sb)) - return &drgn_enomem; - err = drgn_module_try_standard_file(module, options, sb.str, -1, - true, NULL); - if (err || !drgn_module_wants_file(module)) - return err; + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; - // Fedora, CentOS, SUSE: - // $debug_dir/lib/modules/$(uname -r)/$ko_name.debug - if (!string_builder_append(&sb, ".debug") + if (kernel_dir[0]) { + sb.len = 0; + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try under the debug directories first. + err = drgn_module_try_depmod_in_debug_directories(module, + options, + &sb, + depmod_path, + ko_len); + if (err || !drgn_module_wants_file(module)) + return err; + + // Try /lib/modules/$osrelease as the kernel directory. + sb.len = 0; + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } + if (!string_builder_appendc(&sb, '/') + || !string_builder_appendn(&sb, depmod_path, depmod_path_len) || !string_builder_null_terminate(&sb)) return &drgn_enomem; err = drgn_module_try_standard_file(module, options, sb.str, -1, @@ -774,14 +889,7 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, if (err || !drgn_module_wants_file(module)) return err; } - - sb.len = 0; - if (!string_builder_appendf(&sb, "/lib/modules/%s/", osrelease) || - !string_builder_appendn(&sb, depmod_path, depmod_path_len) || - !string_builder_null_terminate(&sb)) - return &drgn_enomem; - return drgn_module_try_standard_file(module, options, sb.str, -1, true, - NULL); + return NULL; } // This has a weird calling convention so that the caller can call From 94b6d390605a006b5c42a4ee7becd7a043258c68 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 27 Jan 2025 11:13:41 -0800 Subject: [PATCH 036/166] libdrgn: debug_info: clean up drgn_module_standard_files_state Rename it to drgn_standard_debug_info_state, document it, and move the deinit function to libdrgn/linux_kernel.c where it's more relevant. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 12 +++--------- libdrgn/debug_info.h | 8 +++++++- libdrgn/linux_kernel.c | 10 ++++++++-- libdrgn/linux_kernel.h | 7 ++----- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 361dc71f3..4203e84e7 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2230,7 +2230,7 @@ drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module, static struct drgn_error * drgn_module_try_standard_files(struct drgn_module *module, const struct drgn_debug_info_options *options, - struct drgn_module_standard_files_state *state) + struct drgn_standard_debug_info_find_state *state) { struct drgn_error *err; struct drgn_program *prog = module->prog; @@ -2350,12 +2350,6 @@ drgn_module_try_standard_files(struct drgn_module *module, return drgn_module_try_files_by_gnu_debuglink(module, options); } -static void -drgn_module_standard_files_state_deinit(struct drgn_module_standard_files_state *state) -{ - depmod_index_deinit(&state->modules_dep); -} - static struct drgn_error * drgn_standard_debug_info_find(struct drgn_module * const *modules, size_t num_modules, void *arg) @@ -2375,8 +2369,8 @@ drgn_standard_debug_info_find(struct drgn_module * const *modules, options_str); } - _cleanup_(drgn_module_standard_files_state_deinit) - struct drgn_module_standard_files_state state = {}; + _cleanup_(drgn_standard_debug_info_find_state_deinit) + struct drgn_standard_debug_info_find_state state = {}; for (size_t i = 0; i < num_modules; i++) { err = drgn_module_try_standard_files(modules[i], options, &state); diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 556604d4b..d21f6eb79 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -280,16 +280,22 @@ static inline void drgn_module_deletep(struct drgn_module **modulep) drgn_module_delete(*modulep); } +// Binary index file generated by depmod(8). struct depmod_index { char *path; void *addr; size_t len; }; -struct drgn_module_standard_files_state { +// State kept by standard debug info finder for all modules it's working on. +// Currently it's only used to cache locations of Linux kernel loadable modules. +struct drgn_standard_debug_info_find_state { struct depmod_index modules_dep; }; +void +drgn_standard_debug_info_find_state_deinit(struct drgn_standard_debug_info_find_state *state); + // Always takes ownership of fd. Attempts to resolve the real path of path. struct drgn_error * drgn_module_try_standard_file(struct drgn_module *module, diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index fe0ad02d5..ccbbf24b9 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -413,7 +413,7 @@ struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) * changes in the future, we can reevaluate this. */ -void depmod_index_deinit(struct depmod_index *depmod) +static void depmod_index_deinit(struct depmod_index *depmod) { if (depmod->len > 0) munmap(depmod->addr, depmod->len); @@ -598,6 +598,12 @@ static struct drgn_error *depmod_index_find(struct depmod_index *depmod, return NULL; } +void +drgn_standard_debug_info_find_state_deinit(struct drgn_standard_debug_info_find_state *state) +{ + depmod_index_deinit(&state->modules_dep); +} + static struct drgn_error * drgn_module_try_vmlinux_in_debug_directories(struct drgn_module *module, const struct drgn_debug_info_options *options, @@ -802,7 +808,7 @@ drgn_module_try_depmod_in_debug_directories(struct drgn_module *module, struct drgn_error * drgn_module_try_linux_kmod_files(struct drgn_module *module, const struct drgn_debug_info_options *options, - struct drgn_module_standard_files_state *state) + struct drgn_standard_debug_info_find_state *state) { struct drgn_error *err; struct drgn_program *prog = module->prog; diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 1b89d0557..a7c04a30e 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -6,9 +6,8 @@ #include "drgn_internal.h" -struct depmod_index; struct drgn_debug_info_options; -struct drgn_module_standard_files_state; +struct drgn_standard_debug_info_find_state; struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog); @@ -25,8 +24,6 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); -void depmod_index_deinit(struct depmod_index *depmod); - struct drgn_error * linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, struct drgn_module_iterator **ret); @@ -38,7 +35,7 @@ drgn_module_try_vmlinux_files(struct drgn_module *module, struct drgn_error * drgn_module_try_linux_kmod_files(struct drgn_module *module, const struct drgn_debug_info_options *options, - struct drgn_module_standard_files_state *state); + struct drgn_standard_debug_info_find_state *state); #define KDUMP_SIGNATURE "KDUMP " #define KDUMP_SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) From 7dc474d75790c6bd3c11ca2723d1000a8c1e0b15 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 31 Jan 2025 10:39:39 -0800 Subject: [PATCH 037/166] Support searching for kernel modules without depmod metadata For live kernel debugging, you usually have a properly installed kernel that has run depmod. However, for core dumps, it's common to get an archive containing all of the kernel module files and no depmod metadata. Peter Collingbourne also reported that the depmod situation on Android is complicated: Android doesn't have a modules.dep.bin file, and modules are split among different images. Let's add support for walking kernel module directories if modules.dep.bin isn't found and a debug info option for controlling whether depmod, walking, or both are tried. We default to using depmod if it is available and walking if not. Closes #369. Signed-off-by: Omar Sandoval --- _drgn.pyi | 57 ++++ docs/api_reference.rst | 1 + drgn/__init__.py | 2 + drgn/cli.py | 13 +- libdrgn/build-aux/gen_constants.py | 1 + libdrgn/debug_info.c | 12 +- libdrgn/debug_info.h | 27 ++ libdrgn/debug_info_options.c | 32 +++ libdrgn/debug_info_options.h | 4 +- libdrgn/drgn.h | 18 ++ libdrgn/linux_kernel.c | 367 +++++++++++++++++++++++++- libdrgn/python/debug_info_options.c | 2 + libdrgn/python/drgnpy.h | 1 + tests/linux_kernel/test_debug_info.py | 66 ++++- tests/test_debug_info_options.py | 27 +- 15 files changed, 597 insertions(+), 33 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index bb3ea2479..12b5f0503 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1415,6 +1415,7 @@ class DebugInfoOptions: try_reuse: bool = ..., try_supplementary: bool = ..., kernel_directories: Iterable[Path] = ..., + try_kmod: KmodSearchMethod = ..., ) -> None: """ Create a ``DebugInfoOptions``. @@ -1512,6 +1513,62 @@ class DebugInfoOptions: :file:`/boot/vmlinux-{release}`, :file:`/lib/modules/{release}`) absolutely and under each absolute path in :attr:`directories`. """ + try_kmod: KmodSearchMethod + """ + How to search for loadable kernel modules. + + Defaults to :attr:`KmodSearchMethod.DEPMOD_OR_WALK`. + """ + +class KmodSearchMethod(enum.Enum): + """ + Methods of searching for loadable kernel module debugging information. + + In addition to searching by build ID, there are currently two methods of + searching for debugging information specific to loadable kernel modules: + + 1. Using :manpage:`depmod(8)` metadata. This looks for :command:`depmod` + metadata (specifically, :file:`modules.dep.bin`) at the top level of + each directory in :attr:`DebugInfoOptions.kernel_directories` (an empty + path means :file:`/lib/modules/{release}`). The metadata is used to + quickly find the path of each module, which is then checked relative to + each directory specified by :attr:`DebugInfoOptions.kernel_directories`. + + This method is faster but typically only applicable to installed + kernels. + 2. Walking kernel directories. This traverses each directory specified by + :attr:`DebugInfoOptions.kernel_directories` looking for ``.ko`` files. + Module names are matched to filenames before the ``.ko`` extension and + with dashes (``-``) replaced with underscores (``_``). + + This method is slower but not limited to installed kernels. + + Debugging information searches can be configured to use one, both, or + neither method. + """ + + NONE = ... + """Don't search using kernel module-specific methods.""" + DEPMOD = ... + """Search using :command:`depmod` metadata.""" + WALK = ... + """Search by walking kernel directories.""" + DEPMOD_OR_WALK = ... + """ + Search using :command:`depmod` metadata, falling back to walking kernel + directories only if no :command:`depmod` metadata is found. + + Since :command:`depmod` metadata is expected to be reliable if present, + this is the default. + """ + DEPMOD_AND_WALK = ... + """ + Search using :command:`depmod` metadata and by walking kernel directories. + + Unlike :attr:`DEPMOD_OR_WALK`, if :command:`depmod` metadata is found but + doesn't result in the desired debugging information, this will still walk + kernel directories. + """ def get_default_prog() -> Program: """ diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 971e9568f..d039aab68 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -12,6 +12,7 @@ Programs .. drgndoc:: FindObjectFlags .. drgndoc:: DebugInfoOptions +.. drgndoc:: KmodSearchMethod .. drgndoc:: Thread diff --git a/drgn/__init__.py b/drgn/__init__.py index 105423ae7..be5f2ec0f 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -54,6 +54,7 @@ FaultError, FindObjectFlags, IntegerLike, + KmodSearchMethod, Language, MainModule, MissingDebugInfoError, @@ -121,6 +122,7 @@ "FaultError", "FindObjectFlags", "IntegerLike", + "KmodSearchMethod", "Language", "MainModule", "MissingDebugInfoError", diff --git a/drgn/cli.py b/drgn/cli.py index c94b5fd9f..135627f3a 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -188,11 +188,20 @@ def __call__( class _TryDebugInfoOptionAction(_DebugInfoOptionAction): - _choices = _DebugInfoOptionAction._bool_options(True) + _choices = { + **_DebugInfoOptionAction._bool_options(True), + "kmod=depmod": ("try_kmod", drgn.KmodSearchMethod.DEPMOD), + "kmod=walk": ("try_kmod", drgn.KmodSearchMethod.WALK), + "kmod=depmod-or-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_OR_WALK), + "kmod=depmod-and-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_AND_WALK), + } class _NoDebugInfoOptionAction(_DebugInfoOptionAction): - _choices = _DebugInfoOptionAction._bool_options(False) + _choices = { + **_DebugInfoOptionAction._bool_options(False), + "kmod": ("try_kmod", drgn.KmodSearchMethod.NONE), + } def _main() -> None: diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index c6999292b..8232bfc71 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -16,6 +16,7 @@ class ConstantClass(NamedTuple): CONSTANTS = ( ConstantClass("Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)"), ConstantClass("FindObjectFlags", "Flag", r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)"), + ConstantClass("KmodSearchMethod", "Enum", r"DRGN_KMOD_SEARCH_([a-zA-Z0-9_]+)"), ConstantClass("ModuleFileStatus", "Enum", r"DRGN_MODULE_FILE_([a-zA-Z0-9_]+)"), ConstantClass( "PlatformFlags", diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 4203e84e7..069345af4 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2370,7 +2370,17 @@ drgn_standard_debug_info_find(struct drgn_module * const *modules, } _cleanup_(drgn_standard_debug_info_find_state_deinit) - struct drgn_standard_debug_info_find_state state = {}; + struct drgn_standard_debug_info_find_state state = { + .modules = modules, + .num_modules = num_modules, + .kmod_walk = { + .modules = HASH_TABLE_INIT, + .stack = VECTOR_INIT, + .path = STRING_BUILDER_INIT, + .visited_dirs = HASH_TABLE_INIT, + .next_kernel_dir = options->kernel_directories, + }, + }; for (size_t i = 0; i < num_modules; i++) { err = drgn_module_try_standard_files(modules[i], options, &state); diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index d21f6eb79..20c446116 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -287,10 +287,37 @@ struct depmod_index { size_t len; }; +DEFINE_VECTOR_TYPE(char_p_vector, char *); + +DEFINE_HASH_MAP_TYPE(drgn_kmod_walk_module_map, const char *, + struct char_p_vector); + +DEFINE_VECTOR_TYPE(drgn_kmod_walk_stack, + struct drgn_kmod_walk_stack_entry); + +struct drgn_kmod_walk_inode { + dev_t dev; + ino_t ino; +}; + +DEFINE_HASH_SET_TYPE(drgn_kmod_walk_inode_set, struct drgn_kmod_walk_inode); + +struct drgn_kmod_walk_state { + struct drgn_kmod_walk_module_map modules; + struct drgn_kmod_walk_stack stack; + struct string_builder path; + struct drgn_kmod_walk_inode_set visited_dirs; + const char * const *next_kernel_dir; + const char * const *next_debug_dir; +}; + // State kept by standard debug info finder for all modules it's working on. // Currently it's only used to cache locations of Linux kernel loadable modules. struct drgn_standard_debug_info_find_state { + struct drgn_module * const *modules; + size_t num_modules; struct depmod_index modules_dep; + struct drgn_kmod_walk_state kmod_walk; }; void diff --git a/libdrgn/debug_info_options.c b/libdrgn/debug_info_options.c index d937167ea..8b9b404bd 100644 --- a/libdrgn/debug_info_options.c +++ b/libdrgn/debug_info_options.c @@ -248,6 +248,38 @@ static bool drgn_format_debug_info_options_bool(struct string_builder *sb, && string_builder_append(sb, value ? "True" : "False"); } +static bool +drgn_kmod_search_method_format(struct string_builder *sb, const char *name, + bool *first, enum drgn_kmod_search_method value, + enum drgn_kmod_search_method default_value) +{ + // Skip options set to the default. + if (value == default_value) + return true; + const char *s; + SWITCH_ENUM(value) { + case DRGN_KMOD_SEARCH_NONE: + s = "NONE"; + break; + case DRGN_KMOD_SEARCH_DEPMOD: + s = "DEPMOD"; + break; + case DRGN_KMOD_SEARCH_WALK: + s = "WALK"; + break; + case DRGN_KMOD_SEARCH_DEPMOD_OR_WALK: + s = "DEPMOD_OR_WALK"; + break; + case DRGN_KMOD_SEARCH_DEPMOD_AND_WALK: + s = "DEPMOD_AND_WALK"; + break; + default: + UNREACHABLE(); + } + return drgn_format_debug_info_options_common(sb, name, first) + && string_builder_append(sb, s); +} + char *drgn_format_debug_info_options(struct drgn_debug_info_options *options) { STRING_BUILDER(sb); diff --git a/libdrgn/debug_info_options.h b/libdrgn/debug_info_options.h index d26afa057..fdac2d0d8 100644 --- a/libdrgn/debug_info_options.h +++ b/libdrgn/debug_info_options.h @@ -16,7 +16,9 @@ BOOL_OPTION(try_embedded_vdso, true) \ BOOL_OPTION(try_reuse, true) \ BOOL_OPTION(try_supplementary, true) \ - LIST_OPTION(kernel_directories) + LIST_OPTION(kernel_directories) \ + ENUM_OPTION(try_kmod, drgn_kmod_search_method, \ + DRGN_KMOD_SEARCH_DEPMOD_OR_WALK) struct drgn_debug_info_options { #define LIST_OPTION(name) const char * const *name; diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 22c986991..706905293 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1775,6 +1775,24 @@ drgn_debug_info_options_set_kernel_directories(struct drgn_debug_info_options *o const char * const *value) __attribute__((__nonnull__(1, 2))); +/** Methods of searching for loadable kernel module debugging information. */ +enum drgn_kmod_search_method { + DRGN_KMOD_SEARCH_NONE, + DRGN_KMOD_SEARCH_DEPMOD, + DRGN_KMOD_SEARCH_WALK, + DRGN_KMOD_SEARCH_DEPMOD_OR_WALK, + DRGN_KMOD_SEARCH_DEPMOD_AND_WALK, +} __attribute__((__packed__)); + +/** Get how to search for loadable kernel module debugging information. */ +enum drgn_kmod_search_method +drgn_debug_info_options_get_try_kmod(const struct drgn_debug_info_options *options); + +/** Set how to search for loadable kernel module debugging information. */ +void +drgn_debug_info_options_set_try_kmod(struct drgn_debug_info_options *options, + enum drgn_kmod_search_method value); + /** * Get the default debugging information options for @p prog. * diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index ccbbf24b9..75c333c30 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -598,9 +598,60 @@ static struct drgn_error *depmod_index_find(struct depmod_index *depmod, return NULL; } +DEFINE_VECTOR_FUNCTIONS(char_p_vector); + +DEFINE_HASH_MAP_FUNCTIONS(drgn_kmod_walk_module_map, c_string_key_hash_pair, + c_string_key_eq); + +struct drgn_kmod_walk_stack_entry { + DIR *dir; + size_t path_len; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_kmod_walk_stack); + +static inline struct hash_pair +drgn_kmod_walk_inode_hash_pair(const struct drgn_kmod_walk_inode *entry) +{ + return hash_pair_from_avalanching_hash(hash_combine(entry->dev, entry->ino)); +} + +static inline bool +drgn_kmod_walk_inode_eq(const struct drgn_kmod_walk_inode *a, + const struct drgn_kmod_walk_inode *b) +{ + return a->dev == b->dev && a->ino == b->ino; +} + +DEFINE_HASH_SET_FUNCTIONS(drgn_kmod_walk_inode_set, + drgn_kmod_walk_inode_hash_pair, + drgn_kmod_walk_inode_eq); + +static void +drgn_kmod_walk_module_map_entry_deinit(struct drgn_kmod_walk_module_map_entry *entry) +{ + vector_for_each(char_p_vector, path, &entry->value) + free(*path); + char_p_vector_deinit(&entry->value); +} + +static void +drgn_kmod_walk_state_deinit(struct drgn_kmod_walk_state *state) +{ + drgn_kmod_walk_inode_set_deinit(&state->visited_dirs); + string_builder_deinit(&state->path); + vector_for_each(drgn_kmod_walk_stack, entry, &state->stack) + closedir(entry->dir); + drgn_kmod_walk_stack_deinit(&state->stack); + hash_table_for_each(drgn_kmod_walk_module_map, it, &state->modules) + drgn_kmod_walk_module_map_entry_deinit(it.entry); + drgn_kmod_walk_module_map_deinit(&state->modules); +} + void drgn_standard_debug_info_find_state_deinit(struct drgn_standard_debug_info_find_state *state) { + drgn_kmod_walk_state_deinit(&state->kmod_walk); depmod_index_deinit(&state->modules_dep); } @@ -805,28 +856,21 @@ drgn_module_try_depmod_in_debug_directories(struct drgn_module *module, return NULL; } -struct drgn_error * -drgn_module_try_linux_kmod_files(struct drgn_module *module, - const struct drgn_debug_info_options *options, - struct drgn_standard_debug_info_find_state *state) +static struct drgn_error * +drgn_module_try_linux_kmod_depmod(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state) { struct drgn_error *err; struct drgn_program *prog = module->prog; - struct depmod_index *modules_dep = &state->modules_dep; - - err = drgn_open_modules_dep(prog, options, modules_dep); - if (err) - return err; - if (modules_dep->len == 0) - return NULL; const char *depmod_path; size_t depmod_path_len; - err = depmod_index_find(modules_dep, module->name, &depmod_path, + err = depmod_index_find(&state->modules_dep, module->name, &depmod_path, &depmod_path_len); if (err) { - drgn_error_log_debug(prog, err, - "couldn't parse depmod index: "); + drgn_error_log_warning(prog, err, + "couldn't parse depmod index: "); drgn_error_destroy(err); return NULL; } @@ -898,6 +942,301 @@ drgn_module_try_linux_kmod_files(struct drgn_module *module, return NULL; } +static struct drgn_error * +drgn_kmod_walk_next_dir(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct drgn_kmod_walk_state *state) +{ + struct string_builder *path = &state->path; + for (;;) { + if (state->next_debug_dir) { + const char *debug_dir = *state->next_debug_dir++; + if (debug_dir && debug_dir[0] != '/') + continue; + + path->len = 0; + if (debug_dir) { + if (!string_builder_append(path, debug_dir)) + return &drgn_enomem; + } else { + state->next_debug_dir = NULL; + } + if (!string_builder_append(path, "/lib/modules/") + || !string_builder_append(path, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } else { + const char *kernel_dir = *state->next_kernel_dir; + if (!kernel_dir) + return &drgn_stop; + state->next_kernel_dir++; + if (kernel_dir[0]) { + path->len = 0; + if (!string_builder_append(path, kernel_dir)) + return &drgn_enomem; + } else { + state->next_debug_dir = options->directories; + continue; + } + } + + if (!string_builder_null_terminate(path)) + return &drgn_enomem; + struct drgn_kmod_walk_stack_entry entry = { + .dir = opendir(path->str), + .path_len = path->len, + }; + if (!entry.dir) { + drgn_log_debug(prog, "opendir: %s: %m", path->str); + continue; + } + if (!drgn_kmod_walk_stack_append(&state->stack, &entry)) { + closedir(entry.dir); + return &drgn_enomem; + } + drgn_log_debug(prog, "searching for kernel modules in %s", + path->str); + return NULL; + } +} + +static struct drgn_error * +drgn_kmod_walk(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct drgn_kmod_walk_state *state, + struct drgn_kmod_walk_module_map_entry *current) +{ + struct drgn_error *err; + struct string_builder *path = &state->path; + + for (;;) { + if (drgn_kmod_walk_stack_empty(&state->stack)) { + err = drgn_kmod_walk_next_dir(prog, options, state); + if (err) + return err; + } + + struct drgn_kmod_walk_stack_entry *top = + drgn_kmod_walk_stack_last(&state->stack); + errno = 0; + struct dirent *ent = readdir(top->dir); + if (!ent) { + if (errno) { + path->str[top->path_len] = '\0'; + drgn_log_debug(prog, "%s: readdir: %m", + path->str); + } + closedir(top->dir); + drgn_kmod_walk_stack_pop(&state->stack); + continue; + } + + // Skip "." and "..". + if (ent->d_name[0] == '.' + && (!ent->d_name[1] + || (ent->d_name[1] == '.' && !ent->d_name[2]))) + continue; + + bool is_directory = false; + if (ent->d_type == DT_LNK || ent->d_type == DT_UNKNOWN) { + struct stat st; + if (fstatat(dirfd(top->dir), ent->d_name, &st, 0) < 0) { + path->str[top->path_len] = '\0'; + drgn_log_debug(prog, "%s/%s: fstatat: %m", + path->str, ent->d_name); + continue; + } + if (S_ISDIR(st.st_mode)) + is_directory = true; + else if (!S_ISREG(st.st_mode)) + continue; + } else if (ent->d_type == DT_DIR) { + is_directory = true; + } else if (ent->d_type != DT_REG) { + continue; + } + + if (is_directory) { + path->len = top->path_len; + if (!string_builder_appendc(path, '/') + || !string_builder_append(path, ent->d_name) + || !string_builder_null_terminate(path)) + return &drgn_enomem; + + _cleanup_close_ int fd = + openat(dirfd(top->dir), ent->d_name, + O_RDONLY | O_DIRECTORY); + if (fd < 0) { + drgn_log_debug(prog, "openat: %s: %m", + path->str); + continue; + } + + struct stat st; + if (fstat(fd, &st) < 0) { + drgn_log_debug(prog, "fstat: %s: %m", + path->str); + continue; + } + struct drgn_kmod_walk_inode inode = { + .dev = st.st_dev, + .ino = st.st_ino, + }; + int r = drgn_kmod_walk_inode_set_insert(&state->visited_dirs, + &inode, NULL); + if (r < 0) + return &drgn_enomem; + if (r == 0) { + drgn_log_debug(prog, + "%s is cycle or duplicate; skipping", + path->str); + continue; + } + + struct drgn_kmod_walk_stack_entry entry = { + .dir = fdopendir(fd), + .path_len = path->len, + }; + if (!entry.dir) { + drgn_log_debug(prog, "fdopendir: %s: %m", + path->str); + continue; + } + fd = -1; // entry.dir owns fd now. + if (!drgn_kmod_walk_stack_append(&state->stack, + &entry)) { + closedir(entry.dir); + return &drgn_enomem; + } + } else { + // Match anything where the first extension is ".ko". + char *dot = strchr(ent->d_name, '.'); + if (!dot || dot[1] != 'k' || dot[2] != 'o' + || (dot[3] != '\0' && dot[3] != '.')) + continue; + + // Borrow the path string builder to build the module + // name (removing extensions and replacing '-' with + // '_'). + path->len = top->path_len; + if (!string_builder_appendn(path, ent->d_name, + dot - ent->d_name) + || !string_builder_null_terminate(path)) + return &drgn_enomem; + char *dash = &path->str[top->path_len]; + while ((dash = strchr(dash, '-'))) + *dash++ = '_'; + + // Find the module (if wanted). + const char *module_name = &path->str[top->path_len]; + auto it = drgn_kmod_walk_module_map_search(&state->modules, + &module_name); + if (!it.entry) + continue; + + size_t name_len = strlen(ent->d_name); + size_t path_len; + if (__builtin_add_overflow(top->path_len, name_len, + &path_len) + || __builtin_add_overflow(path_len, 2, &path_len)) + return &drgn_enomem; + _cleanup_free_ char *file_path = malloc(path_len); + if (!file_path) + return &drgn_enomem; + memcpy(file_path, path->str, top->path_len); + file_path[top->path_len] = '/'; + memcpy(&file_path[top->path_len + 1], ent->d_name, + name_len + 1); + drgn_log_debug(prog, "found kernel module %s", file_path); + + if (!char_p_vector_append(&it.entry->value, &file_path)) + return &drgn_enomem; + file_path = NULL; // it.entry->value owns file_path now. + + // If the file matches the current module, return it. + // Otherwise, keep going. + if (it.entry == current) + return NULL; + } + } +} + +struct drgn_error * +drgn_module_try_linux_kmod_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state) +{ + struct drgn_error *err; + + if (options->try_kmod == DRGN_KMOD_SEARCH_NONE) + return NULL; + + if (options->try_kmod != DRGN_KMOD_SEARCH_WALK) { + err = drgn_open_modules_dep(module->prog, options, + &state->modules_dep); + if (err) + return err; + if (state->modules_dep.len > 0) { + err = drgn_module_try_linux_kmod_depmod(module, options, + state); + if (err + || options->try_kmod != DRGN_KMOD_SEARCH_DEPMOD_AND_WALK + || !drgn_module_wants_file(module)) + return err; + } + if (options->try_kmod == DRGN_KMOD_SEARCH_DEPMOD) + return NULL; + } + + if (drgn_kmod_walk_module_map_empty(&state->kmod_walk.modules)) { + for (size_t i = 0; i < state->num_modules; i++) { + if (!drgn_module_wants_file(state->modules[i])) + continue; + struct drgn_kmod_walk_module_map_entry entry = { + .key = state->modules[i]->name, + .value = VECTOR_INIT, + }; + if (drgn_kmod_walk_module_map_insert(&state->kmod_walk.modules, + &entry, NULL) < 0) + return &drgn_enomem; + } + } + + const char *module_name = module->name; + auto it = drgn_kmod_walk_module_map_search(&state->kmod_walk.modules, + &module_name); + size_t i = 0; + for (;;) { + if (i >= char_p_vector_size(&it.entry->value)) { + // No matches remaining for this module. Clear the old + // matches and find another one. + vector_for_each(char_p_vector, path, &it.entry->value) + free(*path); + char_p_vector_clear(&it.entry->value); + i = 0; + + err = drgn_kmod_walk(module->prog, options, + &state->kmod_walk, it.entry); + if (err == &drgn_stop) + break; + else if (err) + return err; + } + char *path = *char_p_vector_at(&it.entry->value, i++); + err = drgn_module_try_standard_file(module, options, path, -1, + true, NULL); + if (err) + return err; + if (!drgn_module_wants_file(module)) + break; + } + // We won't need any more matches for this module. + drgn_kmod_walk_module_map_entry_deinit(it.entry); + drgn_kmod_walk_module_map_delete_iterator(&state->kmod_walk.modules, + it); + return NULL; +} + // This has a weird calling convention so that the caller can call // drgn_error_format_os() itself. static const char *get_gnu_build_id_from_note_file(int fd, diff --git a/libdrgn/python/debug_info_options.c b/libdrgn/python/debug_info_options.c index 2502bc34c..c17ba2f63 100644 --- a/libdrgn/python/debug_info_options.c +++ b/libdrgn/python/debug_info_options.c @@ -71,6 +71,8 @@ static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) } \ DebugInfoOptions_SETTER(name) +#define drgn_kmod_search_method_class KmodSearchMethod_class + #define ENUM_OPTION(name, type, default_value) \ static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ { \ diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 522a2aaca..0d6206490 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -286,6 +286,7 @@ typedef struct { extern PyObject *Architecture_class; extern PyObject *FindObjectFlags_class; +extern PyObject *KmodSearchMethod_class; extern PyObject *ModuleFileStatus_class; extern PyObject *ModuleSectionAddresses_class; extern PyObject *PlatformFlags_class; diff --git a/tests/linux_kernel/test_debug_info.py b/tests/linux_kernel/test_debug_info.py index db712e684..b0074fe29 100644 --- a/tests/linux_kernel/test_debug_info.py +++ b/tests/linux_kernel/test_debug_info.py @@ -2,8 +2,16 @@ # SPDX-License-Identifier: LGPL-2.1-or-later import os - -from drgn import MainModule, Program, RelocatableModule +from pathlib import Path +import tempfile + +from drgn import ( + DebugInfoOptions, + KmodSearchMethod, + MainModule, + Program, + RelocatableModule, +) from drgn.helpers.linux.module import find_module from tests import modifyenv from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod @@ -20,21 +28,51 @@ def iter_proc_modules(): yield tokens[0], int(tokens[5], 16) -class TestLoadDebugInfo(LinuxKernelTestCase): - def test_no_build_id(self): +class TestDebugInfo(LinuxKernelTestCase): + def test_debug_info(self): + # This is actually two test cases squished into one to avoid indexing + # vmlinux another time. prog = Program() prog.set_kernel() prog.set_enabled_debug_info_finders([]) - for module, _ in prog.loaded_modules(): - if isinstance(module, MainModule): - module.build_id = None - break - else: - self.fail("main module not found") - prog.load_debug_info([self.prog.main_module().debug_file_path]) - self.assertEqual( - prog.main_module().debug_file_path, self.prog.main_module().debug_file_path - ) + + with self.subTest("vmlinux_no_build_id"): + for module, _ in prog.loaded_modules(): + if isinstance(module, MainModule): + module.build_id = None + break + else: + self.fail("main module not found") + prog.load_debug_info([self.prog.main_module().debug_file_path]) + self.assertEqual( + prog.main_module().debug_file_path, + self.prog.main_module().debug_file_path, + ) + + with self.subTest("kmod_walk"), tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + found_modules = set() + for i, module in enumerate(self.prog.modules()): + if isinstance(module, RelocatableModule) and module.debug_file_path: + found_modules.add(module.name) + link = temp_dir / str(i) / (module.name + ".ko") + link.parent.mkdir() + link.symlink_to(module.debug_file_path) + + modules = [ + module + for module, _ in prog.loaded_modules() + if module.name in found_modules + ] + prog.find_standard_debug_info( + modules, + options=DebugInfoOptions( + kernel_directories=(temp_dir,), try_kmod=KmodSearchMethod.WALK + ), + ) + for module in modules: + with self.subTest(module=module.name): + self.assertIsNotNone(module.debug_file_path) class TestModule(LinuxKernelTestCase): diff --git a/tests/test_debug_info_options.py b/tests/test_debug_info_options.py index eb968beeb..87bc168a1 100644 --- a/tests/test_debug_info_options.py +++ b/tests/test_debug_info_options.py @@ -1,7 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later -from drgn import DebugInfoOptions, Program +from drgn import DebugInfoOptions, KmodSearchMethod, Program from tests import TestCase @@ -46,6 +46,31 @@ def test_bool_set(self): options.try_build_id = False self.assertIs(options.try_build_id, False) + def test_enum_default(self): + self.assertEqual(DebugInfoOptions().try_kmod, KmodSearchMethod.DEPMOD_OR_WALK) + + def test_enum_init(self): + self.assertEqual( + DebugInfoOptions(try_kmod=KmodSearchMethod.WALK).try_kmod, + KmodSearchMethod.WALK, + ) + self.assertRaises(TypeError, DebugInfoOptions, try_kmod=False) + + def test_enum_copy(self): + self.assertEqual( + DebugInfoOptions( + DebugInfoOptions(try_kmod=KmodSearchMethod.DEPMOD) + ).try_kmod, + KmodSearchMethod.DEPMOD, + ) + + def test_enum_set(self): + options = DebugInfoOptions() + options.try_kmod = KmodSearchMethod.DEPMOD_AND_WALK + self.assertEqual(options.try_kmod, KmodSearchMethod.DEPMOD_AND_WALK) + with self.assertRaises(TypeError): + options.try_kmod = False + def test_del(self): with self.assertRaises(AttributeError): del DebugInfoOptions().directories From 4dca42953b01da7da747f68c55b9191ed6b6a490 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 31 Jan 2025 13:54:24 -0800 Subject: [PATCH 038/166] Build manylinux wheels with debuginfod support Now that we call libdebuginfod ourselves and can link against it instead of using dlopen, we can add it to our manylinux wheels. Also explicitly configure a few other features we want enabled to make sure we don't lose them. Signed-off-by: Omar Sandoval --- scripts/build_manylinux_in_docker.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index 91787cd3d..40b9025fb 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -13,6 +13,8 @@ sed -i -e 's/mirrorlist/#mirrorlist/g' \ yum install -y \ bzip2-devel \ + json-c-devel \ + libcurl-devel \ libzstd-devel \ lzo-devel \ snappy-devel \ @@ -38,13 +40,7 @@ elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elf mkdir /tmp/elfutils cd /tmp/elfutils curl -L "$elfutils_url" | tar -xj --strip-components=1 -# We don't bother with debuginfod support for a few reasons: -# -# 1. It depends on libcurl, which would pull in a bunch of transitive -# dependencies. -# 2. libdw loads libdebuginfod with dlopen(), which auditwheel misses. -# 3. drgn hasn't been tested with debuginfod. -./configure --disable-libdebuginfod --disable-debuginfod +./configure --enable-libdebuginfod --disable-debuginfod --with-zlib --with-bzlib --with-lzma --with-zstd make -j$(($(nproc) + 1)) make install @@ -85,7 +81,8 @@ build_for_python() { for pybin in /opt/python/cp*/bin; do if build_for_python "$pybin/python"; then - "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ + CONFIGURE_FLAGS="--with-debuginfod --disable-dlopen-debuginfod --with-libkdumpfile" \ + "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ fi done From 20e8759ade1dfd29cac6786cd41c4da17cc5caac Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 3 Feb 2025 13:07:19 -0800 Subject: [PATCH 039/166] libdrgn: linux_kernel: handle module section/note changes in Linux 6.14 Linux 6.14 refactored how module sections and notes are stored [1], removing explicit array lengths in favor of sysfs's existing null-terminated arrays. Update our section and note reading to handle that. 1: https://lore.kernel.org/all/20241227-sysfs-const-bin_attr-module-v2-0-e267275f0f37@weissschuh.net/. Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel.c | 184 +++++++++++++++++++++++++++++++---------- 1 file changed, 142 insertions(+), 42 deletions(-) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 75c333c30..a84a4f848 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1457,31 +1457,74 @@ kernel_module_set_build_id(struct drgn_module *module, _cleanup_free_ void *buf = NULL; size_t capacity = 0; - // n = mod->notes_attrs->notes - uint64_t n; err = drgn_object_member(&attrs, module_obj, "notes_attrs"); - if (err) - return err; - err = drgn_object_member_dereference(&tmp, &attrs, "notes"); - if (err) - return err; - err = drgn_object_read_unsigned(&tmp, &n); if (err) return err; - // attrs = mod->notes_attrs->attrs - err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); - if (err) + bool group = true; + uint64_t n; + err = drgn_object_member_dereference(&attrs, &attrs, "grp"); + if (!err) { + // Since Linux kernel commit 4723f16de64e ("module: sysfs: Add + // notes attributes through attribute_group") (in v6.14), we + // have to iterate over struct attribute_group::bin_attrs, a + // null-terminated array of struct bin_attribute pointers. + + // attr = mod->notes_attrs->grp.bin_attrs + err = drgn_object_member(&attrs, &attrs, "bin_attrs"); + if (err) + return err; + } else if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + // Before that, there was no struct attribute_group for notes, + // so we iterate over struct module_notes_attrs::attrs, an array + // of struct bin_attribute with a length given by struct + // module_notes_attrs::notes. + group = false; + // n = mod->notes_attrs->notes + err = drgn_object_member_dereference(&tmp, &attrs, "notes"); + if (err) + return err; + err = drgn_object_read_unsigned(&tmp, &n); + if (err) + return err; + + // attrs = mod->notes_attrs->attrs + err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); + if (err) + return err; + } else { return err; + } - for (uint64_t i = 0; i < n; i++) { + // If we're not using struct attribute_group, we know how many + // attributes there are. + for (uint64_t i = 0; group || i < n; i++) { // attr = attrs[i] err = drgn_object_subscript(&attr, &attrs, i); if (err) return err; - // address = attr.private - err = drgn_object_member(&tmp, &attr, "private"); + if (group) { + // If we're using struct attribute_group, we stop when + // we hit a NULL pointer. + err = drgn_object_read(&attr, &attr); + if (err) + return err; + bool truthy; + err = drgn_object_bool(&attr, &truthy); + if (err) + return err; + if (!truthy) + break; + } else { + // attr = &attrs[i] + err = drgn_object_address_of(&attr, &attr); + if (err) + return err; + } + + // address = attr->private + err = drgn_object_member_dereference(&tmp, &attr, "private"); if (err) return err; uint64_t address; @@ -1489,8 +1532,8 @@ kernel_module_set_build_id(struct drgn_module *module, if (err) return err; - // size = attr.size - err = drgn_object_member(&tmp, &attr, "size"); + // size = attr->size + err = drgn_object_member_dereference(&tmp, &attr, "size"); if (err) return err; uint64_t size; @@ -1633,50 +1676,107 @@ kernel_module_set_section_addresses(struct drgn_module *module, if (err) return err; - // i = mod->sect_attrs->nsections + bool group = true; + uint64_t nsections; err = drgn_object_member_dereference(&tmp, &attrs, "nsections"); - if (err) - return err; - uint64_t i; - err = drgn_object_read_unsigned(&tmp, &i); - if (err) - return err; + if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + // Since Linux kernel commit d8959b947a8d ("module: sysfs: Drop + // member 'module_sect_attrs::nsections'") (in v6.14), we have + // to iterate over struct attribute_group::bin_attrs, a + // null-terminated array of struct bin_attribute pointers. + + // attrs = mod->sect_attrs->grp.bin_attrs + err = drgn_object_member_dereference(&attrs, &attrs, "grp"); + if (err) + return err; + err = drgn_object_member(&attrs, &attrs, "bin_attrs"); + if (err) + return err; + } else if (!err) { + // Before that, struct module_sect_attrs::grp still exists. + // However, since Linux kernel commit ed66f991bb19 ("module: + // Refactor section attr into bin attribute") (in v5.8), the + // sections are in struct attribute_group::bin_attrs, and before + // that, they're in struct attribute_group::attrs. Additionally, + // we'd then have to get the containing struct module_sect_attr + // to get the section address. + // + // Instead, it's easier to iterate over struct + // module_sect_attrs::attrs, an array of struct module_sect_attr + // with a length given by struct module_sect_attrs::nsections. + group = false; + // nsections = mod->sect_attrs->nsections + err = drgn_object_read_unsigned(&tmp, &nsections); + if (err) + return err; - // attrs = mod->sect_attrs->attrs - err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); - if (err) + // attrs = mod->sect_attrs->attrs + err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); + if (err) + return err; + } else { return err; + } - while (i-- > 0) { + // If we're not using struct attribute_group, we know how many + // attributes there are. + for (uint64_t i = 0; group || i < nsections; i++) { // attr = attrs[i] err = drgn_object_subscript(&attr, &attrs, i); if (err) return err; - // address = attr.address - err = drgn_object_member(&tmp, &attr, "address"); - if (err) - return err; + if (group) { + // If we're using struct attribute_group, we stop when + // we hit a NULL pointer. + err = drgn_object_read(&attr, &attr); + if (err) + return err; + bool truthy; + err = drgn_object_bool(&attr, &truthy); + if (err) + return err; + if (!truthy) + break; + // Since Linux kernel commit 4b2c11e4aaf7 ("module: + // sysfs: Drop member 'module_sect_attr::address'") (in + // v6.14), the section address is in struct + // bin_attribute::private. + err = drgn_object_member_dereference(&tmp, &attr, + "private"); + } else { + // Before that, the section address is in struct + // module_sect_attr::address. + err = drgn_object_member(&tmp, &attr, "address"); + if (err) + return err; + } uint64_t address; err = drgn_object_read_unsigned(&tmp, &address); if (err) return err; - // Since Linux kernel commit ed66f991bb19 ("module: Refactor - // section attr into bin attribute") (in v5.8), the section name - // is module_sect_attr.battr.attr.name. Before that, it is - // simply module_sect_attr.name. - - // attr = attr.battr.attr - err = drgn_object_member(&attr, &attr, "battr"); - if (!err) { - err = drgn_object_member(&attr, &attr, "attr"); + if (group) { + // attr = attr->attr + err = drgn_object_member_dereference(&attr, &attr, + "attr"); if (err) return err; } else { - if (err->code != DRGN_ERROR_LOOKUP) + // Since Linux kernel commit ed66f991bb19 ("module: + // Refactor section attr into bin attribute") (in v5.8), + // the section name is module_sect_attr.battr.attr.name. + // Before that, it is simply module_sect_attr.name. + + // attr = attr.battr.attr + err = drgn_object_member(&attr, &attr, "battr"); + if (!err) { + err = drgn_object_member(&attr, &attr, "attr"); + if (err) + return err; + } else if (!drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { return err; - drgn_error_destroy(err); + } } err = drgn_object_member(&tmp, &attr, "name"); if (err) From 3d493604e62daf3c078246401c656762223c63aa Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Jan 2025 11:06:29 -0800 Subject: [PATCH 040/166] libdrgn: combine libdrgn and _drgn Python extension into one .so For the upcoming plugin system, libdrgn needs to call Python extension code and vice versa. Doing this from two separate libraries would be messy, so let's combine them into one library. liblldb seems to do the same thing, so I don't think this is too crazy. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 87 ++++++++++++++++------------------- libdrgn/configure.ac | 25 +--------- libdrgn/m4/.gitignore | 1 + libdrgn/m4/my_python_devel.m4 | 47 +++++++++++++++++++ setup.py | 2 +- 5 files changed, 89 insertions(+), 73 deletions(-) create mode 100644 libdrgn/m4/my_python_devel.m4 diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 1ade9578d..d31aacbc0 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -5,7 +5,8 @@ ACLOCAL_AMFLAGS = -I m4 .DELETE_ON_ERROR: -AM_CPPFLAGS = -I $(top_srcdir)/include -D_GNU_SOURCE +AM_CPPFLAGS = -I $(top_srcdir)/include -iquote $(dir $@) \ + -iquote $(srcdir)/$(dir $@) -D_GNU_SOURCE AM_CFLAGS = $(WARN_CFLAGS) $(SANITIZER_CFLAGS) AM_LDFLAGS= $(SANITIZER_LDFLAGS) @@ -134,7 +135,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ libdrgnimpl_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ $(elfutils_CFLAGS) -libdrgnimpl_la_CPPFLAGS = $(AM_CPPFLAGS) -iquote . +libdrgnimpl_la_CPPFLAGS = $(AM_CPPFLAGS) libdrgnimpl_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) -lm if WITH_DEBUGINFOD @@ -152,6 +153,36 @@ libdrgnimpl_la_CFLAGS += $(libkdumpfile_CFLAGS) libdrgnimpl_la_LIBADD += $(libkdumpfile_LIBS) endif +if ENABLE_PYTHON +BUILT_SOURCES += python/docstrings.h + +libdrgnimpl_la_SOURCES += python/constants.c \ + python/debug_info_options.c \ + python/docstrings.c \ + python/docstrings.h \ + python/drgnpy.h \ + python/error.c \ + python/helpers.c \ + python/language.c \ + python/main.c \ + python/module.c \ + python/module_section_addresses.c \ + python/object.c \ + python/platform.c \ + python/program.c \ + python/stack_trace.c \ + python/symbol.c \ + python/symbol_index.c \ + python/test.c \ + python/thread.c \ + python/type.c \ + python/type_kind_set.c \ + python/util.c + +libdrgnimpl_la_CPPFLAGS += $(PYTHON_CPPFLAGS) +libdrgnimpl_la_LIBADD += $(PYTHON_LIBS) +endif + %: %.strswitch build-aux/gen_strswitch.py build-aux/codegen_utils.py $(AM_V_GEN)$(PYTHON) $(word 2, $^) -o $@ $< @@ -167,52 +198,6 @@ drgn_section_name_to_index.inc: build-aux/gen_elf_sections.py build-aux/gen_strs elf_sections.h: build-aux/gen_elf_sections.py build-aux/codegen_utils.py $(AM_V_GEN)$(PYTHON) $< -H > $@ -lib_LTLIBRARIES = libdrgn.la - -libdrgn_la_SOURCES = -libdrgn_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:0:0 -libdrgn_la_LIBADD = libdrgnimpl.la - -if ENABLE_PYTHON -BUILT_SOURCES += python/docstrings.h - -noinst_LTLIBRARIES += _drgn.la -endif - -_drgn_la_SOURCES = python/constants.c \ - python/debug_info_options.c \ - python/docstrings.c \ - python/docstrings.h \ - python/drgnpy.h \ - python/error.c \ - python/helpers.c \ - python/language.c \ - python/main.c \ - python/module.c \ - python/module_section_addresses.c \ - python/object.c \ - python/platform.c \ - python/program.c \ - python/stack_trace.c \ - python/symbol.c \ - python/symbol_index.c \ - python/test.c \ - python/thread.c \ - python/type.c \ - python/type_kind_set.c \ - python/util.c - -_drgn_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden -_drgn_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) -iquote $(srcdir)/python \ - -iquote python -_drgn_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--exclude-libs,ALL -avoid-version -module \ - -shared -rpath $(pkgpyexecdir) -_drgn_la_LIBADD = libdrgnimpl.la - -if WITH_LIBKDUMPFILE -_drgn_la_CFLAGS += $(libkdumpfile_CFLAGS) -endif - python/constants.c: drgn.h build-aux/gen_constants.py $(AM_V_GEN)$(PYTHON) $(word 2, $^) < $< > $@ @@ -225,6 +210,12 @@ python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps) python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps) $(AM_V_GEN)$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@ +lib_LTLIBRARIES = libdrgn.la + +libdrgn_la_SOURCES = +libdrgn_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:0:0 +libdrgn_la_LIBADD = libdrgnimpl.la + EXTRA_DIST = $(ARCH_DEFS_PYS) \ $(STRSWITCH_INCS:.inc=.inc.strswitch) \ Doxyfile \ diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 1a523a794..6cbd766e4 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -52,30 +52,7 @@ AC_ARG_ENABLE([python], [], [enable_python=no]) AM_CONDITIONAL([ENABLE_PYTHON], [test "x$enable_python" != xno]) -AM_COND_IF([ENABLE_PYTHON], - [AS_IF([test -z "$PYTHON_CPPFLAGS"], - [prog="import sysconfig -include = sysconfig.get_path('include') -platinclude = sysconfig.get_path('platinclude') -include_paths = [[include]] -if platinclude != include: - include_paths.append(plat_include) -print(' '.join('-I' + path for path in include_paths))" - PYTHON_CPPFLAGS=`"$PYTHON" -c "$prog"`]) - AC_SUBST(PYTHON_CPPFLAGS) - AC_MSG_CHECKING([for $PYTHON development headers]) - save_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR( -[Could not compile test program with Python headers. - -You may need to install your distribution's Python development package (e.g., -python3-devel or python3-dev) or specify the location of the Python development -headers by setting the PYTHON_CPPFLAGS environment variable.])]) - CPPFLAGS="$save_CPPFLAGS"]) +AM_COND_IF([ENABLE_PYTHON], [MY_PYTHON_DEVEL]) PKG_PROG_PKG_CONFIG diff --git a/libdrgn/m4/.gitignore b/libdrgn/m4/.gitignore index 5e048193b..8cb171c93 100644 --- a/libdrgn/m4/.gitignore +++ b/libdrgn/m4/.gitignore @@ -7,3 +7,4 @@ !/my_c_auto.m4 !/my_c_switch_enum.m4 !/my_check_va_args_comma_deletion.m4 +!/my_python_devel.m4 diff --git a/libdrgn/m4/my_python_devel.m4 b/libdrgn/m4/my_python_devel.m4 new file mode 100644 index 000000000..ba34edad6 --- /dev/null +++ b/libdrgn/m4/my_python_devel.m4 @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +# Check for Python development files and define PYTHON_CPPFLAGS and PYTHON_LIBS +# accordingly. +AC_DEFUN([MY_PYTHON_DEVEL], +[ +AS_IF([test -z "$PYTHON_CPPFLAGS"], + [prog="import sysconfig +include = sysconfig.get_path('include') +platinclude = sysconfig.get_path('platinclude') +include_paths = [[include]] +if platinclude != include: + include_paths.append(plat_include) +print(' '.join('-I' + path for path in include_paths))" + PYTHON_CPPFLAGS=`"$PYTHON" -c "$prog"`]) +AC_SUBST(PYTHON_CPPFLAGS) +AS_IF([test -z "$PYTHON_LIBS"], + [prog="import sysconfig +print('-L' + sysconfig.get_config_var('LIBDIR') + + ' -lpython' + sysconfig.get_config_var('LDVERSION'))" + PYTHON_LIBS=`"$PYTHON" -c "$prog"`]) +AC_SUBST(PYTHON_LIBS) +AC_MSG_CHECKING([for $PYTHON development files]) +save_CPPFLAGS="$CPPFLAGS" +save_LIBS="$LIBS" +CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" +LIBS="$LIBS $PYTHON_LIBS" +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#include + +int main(void) +{ + Py_Initialize(); +} +]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR( +[Could not compile Python development test program. + +You may need to install your distribution's Python development package (e.g., +python3-devel or python3-dev) or set the PYTHON_CPPFLAGS and PYTHON_LIBS +environment variables.])]) +CPPFLAGS="$save_CPPFLAGS" +LIBS="$save_LIBS" +]) diff --git a/setup.py b/setup.py index 6535bde97..19ba12bf0 100755 --- a/setup.py +++ b/setup.py @@ -123,7 +123,7 @@ def make(self, *make_args): def run(self): self.make() - so = os.path.join(self.build_temp, ".libs/_drgn.so") + so = os.path.join(self.build_temp, ".libs/libdrgn.so") if self.inplace: self.copy_file(so, self.get_ext_fullpath("_drgn")) old_inplace, self.inplace = self.inplace, 0 From 999e2b10460ecf2769063368a90554d7f0e3ac60 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Jan 2025 13:39:07 -0800 Subject: [PATCH 041/166] libdrgn: always make drgn_program available to Python In order to support Python plugins even when using libdrgn directly, we need every struct drgn_program to be available to Python as a drgn.Program. So, let's rework drgn_program_create*() to always create a drgn.Program, initializing the Python interpreter and extension module if needed. We still support compiling libdrgn without Python support, in which case we keep the old behavior. Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 4 ++- libdrgn/program.c | 50 +++++++++++++---------------------- libdrgn/python/drgnpy.h | 6 +++++ libdrgn/python/main.c | 44 +++++++++++++++++++++++++++++++ libdrgn/python/program.c | 56 ++++++++++++++++++++++++++++++---------- 5 files changed, 114 insertions(+), 46 deletions(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 6cbd766e4..5e6b34be5 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -52,7 +52,9 @@ AC_ARG_ENABLE([python], [], [enable_python=no]) AM_CONDITIONAL([ENABLE_PYTHON], [test "x$enable_python" != xno]) -AM_COND_IF([ENABLE_PYTHON], [MY_PYTHON_DEVEL]) +AM_COND_IF([ENABLE_PYTHON], + [MY_PYTHON_DEVEL + AC_DEFINE(ENABLE_PYTHON)]) PKG_PROG_PKG_CONFIG diff --git a/libdrgn/program.c b/libdrgn/program.c index 75c4d7c9f..edb3892b0 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -177,6 +177,7 @@ void drgn_program_deinit(struct drgn_program *prog) drgn_debug_info_deinit(&prog->dbinfo); } +#if !ENABLE_PYTHON LIBDRGN_PUBLIC struct drgn_error * drgn_program_create(const struct drgn_platform *platform, struct drgn_program **ret) @@ -198,6 +199,7 @@ LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) free(prog); } } +#endif LIBDRGN_PUBLIC struct drgn_error * drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, @@ -1741,18 +1743,14 @@ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_core_dump(const char *path, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_core_dump(prog, path); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1763,18 +1761,14 @@ drgn_program_from_core_dump(const char *path, struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_core_dump_fd(int fd, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_core_dump_fd(prog, fd); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1785,18 +1779,14 @@ drgn_program_from_core_dump_fd(int fd, struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_kernel(struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_kernel(prog); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1807,18 +1797,14 @@ drgn_program_from_kernel(struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_pid(pid_t pid, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_pid(prog, pid); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 0d6206490..307e2cc4e 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -331,6 +331,12 @@ extern PyObject *MissingDebugInfoError; extern PyObject *ObjectAbsentError; extern PyObject *OutOfBoundsError; +PyGILState_STATE drgn_initialize_python(bool *success_ret); + +#define drgn_initialize_python_guard(success_ret) \ + __attribute__((__cleanup__(PyGILState_Releasep), __unused__)) \ + PyGILState_STATE PP_UNIQUE(gstate) = drgn_initialize_python(success_ret) + int add_module_constants(PyObject *m); int init_logging(void); diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index e9304d82d..7f45c876b 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -362,3 +362,47 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) Py_DECREF(m); return NULL; } + +// On return from this function, three things need to be true: +// +// 1. The Python interpreter needs to be initialized. +// 2. The GIL needs to be held (and the caller needs to know whether to release +// it to restore the original state). +// 3. The _drgn module needs to be initialized. +// +// This can be called from many possible contexts (drgn CLI, standalone +// application using libdrgn, etc.), so we have to handle every possible initial +// state. +PyGILState_STATE drgn_initialize_python(bool *success_ret) +{ + PyGILState_STATE gstate; + if (Py_IsInitialized()) { + gstate = PyGILState_Ensure(); + } else { + gstate = PyGILState_UNLOCKED; + // If the Python interpreter wasn't already initialized, then we + // are in a standalone application using libdrgn. Set our + // imports up. + PyImport_AppendInittab("_drgn", PyInit__drgn); + Py_InitializeEx(0); + // Note: we don't have a good place to call Py_Finalize(), so we + // don't call it. +#if PY_VERSION_HEX < 0x03070000 + // Py_Initialize() calls this for us since Python 3.7, and it + // was deprecated in Python 3.9. + PyEval_InitThreads(); +#endif + const char *env = getenv("PYTHONSAFEPATH"); + if (!env || !env[0]) + PyRun_SimpleString("import sys\nsys.path.insert(0, '')"); + } + + bool success = true; + if (!PyState_FindModule(&drgnmodule)) { + _cleanup_pydecref_ PyObject *m = PyImport_ImportModule("_drgn"); + if (!m) + success = false; + } + *success_ret = success; + return gstate; +} diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 3af37ba7d..dccc41375 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -343,6 +343,25 @@ static void drgnpy_end_blocking(struct drgn_program *prog, void *arg, void *stat PyEval_RestoreThread(state); } +static Program *Program_new_impl(const struct drgn_platform *platform) +{ + _cleanup_pydecref_ PyObject *cache = PyDict_New(); + if (!cache) + return NULL; + + _cleanup_pydecref_ Program *prog = call_tp_alloc(Program); + if (!prog) + return NULL; + prog->cache = no_cleanup_ptr(cache); + pyobjectp_set_init(&prog->objects); + drgn_program_init(&prog->prog, platform); + drgn_program_set_blocking_callback(&prog->prog, drgnpy_begin_blocking, + drgnpy_end_blocking, NULL); + if (Program_init_logging(prog)) + return NULL; + return_ptr(prog); +} + static Program *Program_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { @@ -365,30 +384,41 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, "platform must be Platform or None"); return NULL; } - - _cleanup_pydecref_ PyObject *cache = PyDict_New(); - if (!cache) - return NULL; - - _cleanup_pydecref_ Program *prog = call_tp_alloc(Program); + _cleanup_pydecref_ Program *prog = Program_new_impl(platform); if (!prog) return NULL; - prog->cache = no_cleanup_ptr(cache); - pyobjectp_set_init(&prog->objects); - drgn_program_init(&prog->prog, platform); - drgn_program_set_blocking_callback(&prog->prog, drgnpy_begin_blocking, - drgnpy_end_blocking, NULL); if (vmcoreinfo) { struct drgn_error *err = drgn_program_parse_vmcoreinfo( &prog->prog, vmcoreinfo, vmcoreinfo_size); if (err) return set_drgn_error(err); } - if (Program_init_logging(prog)) - return NULL; return_ptr(prog); } +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_create(const struct drgn_platform *platform, + struct drgn_program **ret) +{ + bool success; + drgn_initialize_python_guard(&success); + if (!success) + return drgn_error_from_python(); + Program *prog = Program_new_impl(platform); + if (!prog) + return drgn_error_from_python(); + *ret = &prog->prog; + return NULL; +} + +LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) +{ + if (prog) { + PyGILState_guard(); + Py_DECREF(container_of(prog, Program, prog)); + } +} + static void Program_dealloc(Program *self) { Program_deinit_logging(self); From ce436a23400cab5395435ccf9d3181ad4ef98495 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Jan 2025 13:50:32 -0800 Subject: [PATCH 042/166] libdrgn: replace blocking callbacks with hard-coded functions Now that Python support has been squashed into the main libdrgn.so, there's no reason to go through an indirect call to release the GIL. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 4 +-- libdrgn/drgn.h | 66 ---------------------------------------- libdrgn/dwarf_info.c | 2 +- libdrgn/program.c | 35 --------------------- libdrgn/program.h | 50 ++++++++++++------------------ libdrgn/python/program.c | 6 ++-- 6 files changed, 25 insertions(+), 138 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 069345af4..3f3aef63f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -5186,7 +5186,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, return NULL; } - drgn_blocking_guard(prog); + drgn_blocking_guard(); const char *env = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); unsigned int max_warnings = env ? atoi(env) : 5; @@ -5399,7 +5399,7 @@ drgn_load_module_debug_info(struct drgn_module **modules, size_t *num_modulesp) for (size_t i = 0; i < num_wanted_modules; i++) modules[i]->load_debug_info_generation = generation; - drgn_blocking_guard(prog); + drgn_blocking_guard(); const size_t orig_num_wanted_modules = num_wanted_modules; drgn_handler_list_for_each_enabled(struct drgn_debug_info_finder, diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 706905293..21d3c9796 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1980,72 +1980,6 @@ void drgn_program_set_progress_file(struct drgn_program *prog, FILE *file); /** @} */ -/** - * @defgroup Embedding Embedding - * - * Embedding drgn in another runtime. - * - * @{ - */ - -/** - * Callback before a blocking operation. - * - * @param[in] arg @c callback_arg passed to @ref - * drgn_program_set_blocking_callback(). - * @return Opaque pointer to pass to @ref drgn_program_end_blocking_fn(). - */ -typedef void *drgn_program_begin_blocking_fn(struct drgn_program *prog, - void *arg); - -/** - * Callback after a blocking operation. - * - * @param[in] arg @c callback_arg passed to @ref - * drgn_program_set_blocking_callback(). - * @param[in] state Return value of matching call to @ref - * drgn_program_begin_blocking_fn(). - */ -typedef void drgn_program_end_blocking_fn(struct drgn_program *prog, - void *arg, void *state); - -/** - * Set callbacks around blocking operations. - * - * These callbacks will be called around blocking I/O operations and - * long-running computations. They are intended for things like releasing the - * [global interpreter - * lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock). - * Calls to these callbacks may be nested, but they will always be matched. - * - * @param[in] begin_callback Callback called before a blocking operation. Can be - * @c NULL to unset. - * @param[in] end_callback Callback called after a blocking operation. Can be @c - * NULL to unset. - * @param[in] callback_arg Argument passed to @p begin_callback and @p - * end_callback. - */ -void -drgn_program_set_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn *begin_callback, - drgn_program_end_blocking_fn *end_callback, - void *callback_arg); - -/** - * Get callbacks set by @ref drgn_program_set_blocking_callback(). - * - * @param[out] begin_callback_ret Returned @c begin_callback. - * @param[out] end_callback_ret Returned @c end_callback. - * @param[out] callback_arg_ret Returned @c callback_arg. - */ -void -drgn_program_get_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn **begin_callback_ret, - drgn_program_end_blocking_fn **end_callback_ret, - void **callback_arg_ret); - -/** @} */ - /** * @defgroup Objects Objects * diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 871ba1100..9fa44d369 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -2160,7 +2160,7 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) >= drgn_dwarf_index_cu_vector_size(&ns->dbinfo->dwarf.index_cus))) return NULL; - drgn_blocking_guard(ns->dbinfo->prog); + drgn_blocking_guard(); struct drgn_error *err = drgn_dwarf_index_update(ns->dbinfo); if (err) diff --git a/libdrgn/program.c b/libdrgn/program.c index edb3892b0..1db6f07ae 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -2128,38 +2128,3 @@ drgn_program_element_info(struct drgn_program *prog, struct drgn_type *type, ret->qualified_type = drgn_type_type(underlying_type); return drgn_type_bit_size(ret->qualified_type.type, &ret->bit_size); } - -LIBDRGN_PUBLIC void -drgn_program_set_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn *begin_callback, - drgn_program_end_blocking_fn *end_callback, - void *callback_arg) -{ - prog->begin_blocking_fn = begin_callback; - prog->end_blocking_fn = end_callback; - prog->blocking_arg = callback_arg; -} - -LIBDRGN_PUBLIC void -drgn_program_get_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn **begin_callback_ret, - drgn_program_end_blocking_fn **end_callback_ret, - void **callback_arg_ret) -{ - *begin_callback_ret = prog->begin_blocking_fn; - *end_callback_ret = prog->end_blocking_fn; - *callback_arg_ret = prog->blocking_arg; -} - -void *drgn_program_begin_blocking(struct drgn_program *prog) -{ - if (!prog->begin_blocking_fn) - return NULL; - return prog->begin_blocking_fn(prog, prog->blocking_arg); -} - -void drgn_program_end_blocking(struct drgn_program *prog, void *state) -{ - if (prog->end_blocking_fn) - prog->end_blocking_fn(prog, prog->blocking_arg, state); -} diff --git a/libdrgn/program.h b/libdrgn/program.h index 1dfe47fee..90413d16d 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -261,13 +261,6 @@ struct drgn_program { FILE *progress_file; enum drgn_log_level log_level; bool default_progress_file; - - /* - * Blocking callbacks. - */ - drgn_program_begin_blocking_fn *begin_blocking_fn; - drgn_program_end_blocking_fn *end_blocking_fn; - void *blocking_arg; }; /** Initialize a @ref drgn_program. */ @@ -469,49 +462,46 @@ drgn_program_register_symbol_finder_impl(struct drgn_program *prog, const struct drgn_symbol_finder_ops *ops, void *arg, size_t enable_index); +#if ENABLE_PYTHON /** * Call before a blocking (I/O or long-running) operation. * - * Must be paired with @ref drgn_program_end_blocking(). + * Must be paired with @ref drgn_end_blocking(). * - * @return Opaque pointer to pass to @ref drgn_program_end_blocking(). + * @return Opaque pointer to pass to @ref drgn_end_blocking(). */ -void *drgn_program_begin_blocking(struct drgn_program *prog); +void *drgn_begin_blocking(void); /** * Call after a blocking (I/O or long-running) operation. * - * @param[in] state Return value of @ref drgn_program_begin_blocking(). + * @param[in] state Return value of @ref drgn_begin_blocking(). */ -void drgn_program_end_blocking(struct drgn_program *prog, void *state); - -struct drgn_blocking_guard_struct { - struct drgn_program *prog; - void *state; -}; +void drgn_end_blocking(void *state); +#else +static inline void *drgn_begin_blocking(void) +{ + return NULL; +} -static inline struct drgn_blocking_guard_struct -drgn_blocking_guard_init(struct drgn_program *prog) +static inline void drgn_end_blocking(void *state) { - return (struct drgn_blocking_guard_struct){ - prog, drgn_program_begin_blocking(prog), - }; } +#endif -static inline void -drgn_blocking_guard_cleanup(struct drgn_blocking_guard_struct *guard) +static inline void drgn_blocking_guard_cleanup(void **statep) { - drgn_program_end_blocking(guard->prog, guard->state); + drgn_end_blocking(*statep); } /** - * Scope guard that wraps @ref drgn_program_begin_blocking() and @ref - * drgn_program_end_blocking(). + * Scope guard that wraps @ref drgn_begin_blocking() and @ref + * drgn_end_blocking(). */ -#define drgn_blocking_guard(prog) \ - struct drgn_blocking_guard_struct PP_UNIQUE(guard) \ +#define drgn_blocking_guard() \ + void *PP_UNIQUE(guard) \ __attribute__((__cleanup__(drgn_blocking_guard_cleanup), __unused__)) = \ - drgn_blocking_guard_init(prog) + drgn_begin_blocking() /** * @} diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index dccc41375..91db4fac5 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -329,7 +329,7 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, return 0; } -static void *drgnpy_begin_blocking(struct drgn_program *prog, void *arg) +void *drgn_begin_blocking(void) { PyThreadState *state = PyThreadState_GetUnchecked(); if (state) @@ -337,7 +337,7 @@ static void *drgnpy_begin_blocking(struct drgn_program *prog, void *arg) return state; } -static void drgnpy_end_blocking(struct drgn_program *prog, void *arg, void *state) +void drgn_end_blocking(void *state) { if (state) PyEval_RestoreThread(state); @@ -355,8 +355,6 @@ static Program *Program_new_impl(const struct drgn_platform *platform) prog->cache = no_cleanup_ptr(cache); pyobjectp_set_init(&prog->objects); drgn_program_init(&prog->prog, platform); - drgn_program_set_blocking_callback(&prog->prog, drgnpy_begin_blocking, - drgnpy_end_blocking, NULL); if (Program_init_logging(prog)) return NULL; return_ptr(prog); From adf64729095bd8e42dabf5b22de9953f47bd7a7a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 14 Jan 2025 13:58:12 -0800 Subject: [PATCH 043/166] Add plugin system We have a powerful system for defining custom type, object, symbol, and debug info finders, but those currently require manual setup by users. The next step is a plugin system so that these (and more) can be set up automatically. Plugins are simply Python modules that define hook functions (currently there's only one hook). Plugins are registered as package entry points and can be configured further via environment variables. They are still called when using libdrgn directly (assuming libdrgn was compiled with Python support). [Stephen: suggestion to use package entry points, fix for kdump cores] Co-authored-by: Stephen Brennan Signed-off-by: Omar Sandoval --- _drgn_util/plugins.py | 141 +++++++++++++++++++++++++ docs/advanced_usage.rst | 82 +++++++++++++++ docs/api_reference.rst | 31 ++++++ libdrgn/Makefile.am | 1 + libdrgn/kdump.c | 2 + libdrgn/plugins.h | 17 +++ libdrgn/program.c | 4 + libdrgn/python/plugins.c | 32 ++++++ tests/test_plugins.py | 218 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 528 insertions(+) create mode 100644 _drgn_util/plugins.py create mode 100644 libdrgn/plugins.h create mode 100644 libdrgn/python/plugins.c create mode 100644 tests/test_plugins.py diff --git a/_drgn_util/plugins.py b/_drgn_util/plugins.py new file mode 100644 index 000000000..ea41d20da --- /dev/null +++ b/_drgn_util/plugins.py @@ -0,0 +1,141 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +import fnmatch +from importlib import import_module +import logging +import os +import runpy +import sys +from types import SimpleNamespace +from typing import Any, Callable, Dict, List, Tuple + +logger = logging.getLogger("drgn.plugins") + +_plugins = None +_hooks: Dict[str, List[Tuple[str, Callable[..., Any]]]] = {} + + +def _load_plugins() -> List[Tuple[str, object]]: + plugins = [] + # Mapping from plugin name requested with DRGN_PLUGINS to whether we found + # an entry point with that name. + enabled_entry_points = {} + + env = os.getenv("DRGN_PLUGINS") + if env: + for item in env.split(","): + if not item: + # Ignore empty items for convenience. + continue + name, sep, value = item.partition("=") + if sep: + try: + if "/" in value: + plugin: object = SimpleNamespace(**runpy.run_path(value)) + else: + plugin = import_module(value) + except Exception: + logger.warning("failed to load %r:", item, exc_info=True) + else: + plugins.append((name, plugin)) + logger.debug("loaded %r", item) + else: + enabled_entry_points[name] = False + + env = os.getenv("DRGN_DISABLE_PLUGINS") + # If all plugins are disabled, avoid the entry point machinery entirely. + if env != "*" or enabled_entry_points: + disable_plugins = env.split(",") if env else [] + + group = "drgn.plugins" + if sys.version_info >= (3, 10): + import importlib.metadata # novermin + + entry_points = importlib.metadata.entry_points(group=group) # novermin + + def entry_point_str( # novermin + entry_point: importlib.metadata.EntryPoint, + ) -> str: + return f"{entry_point.name} = {entry_point.value}" + + elif sys.version_info >= (3, 8): + import importlib.metadata # novermin + + entry_points = importlib.metadata.entry_points().get(group, ()) # novermin + + def entry_point_str( # novermin + entry_point: importlib.metadata.EntryPoint, + ) -> str: + return f"{entry_point.name} = {entry_point.value}" + + else: + import pkg_resources + + entry_points = pkg_resources.iter_entry_points(group) + entry_point_str = str + + for entry_point in entry_points: + if entry_point.name in enabled_entry_points: + enabled_entry_points[entry_point.name] = True + elif any( + fnmatch.fnmatch(entry_point.name, disable) + for disable in disable_plugins + ): + continue + try: + plugin = entry_point.load() + except Exception: + logger.warning( + "failed to load %r:", + entry_point_str(entry_point), + exc_info=True, + ) + else: + plugins.append((entry_point.name, plugin)) + logger.debug( + "loaded entry point %r", + entry_point_str(entry_point), + ) + + missing_entry_points = [ + key for key, value in enabled_entry_points.items() if not value + ] + if missing_entry_points: + missing_entry_points.sort() + logger.warning( + "not found: %s", + ", ".join([repr(name) for name in missing_entry_points]), + ) + + return plugins + + +def _load_hook(hook_name: str) -> List[Tuple[str, Callable[..., Any]]]: + global _plugins + if _plugins is None: + _plugins = _load_plugins() + + hooks = [] + for name, plugin in _plugins: + try: + hook = getattr(plugin, hook_name) + except AttributeError: + continue + hooks.append((name, hook)) + + hooks.sort(key=lambda hook: (getattr(hook[1], "drgn_priority", 50), hook[0])) + return hooks + + +def call_plugins(hook_name: str, *args: object) -> None: + try: + hooks = _hooks[hook_name] + except KeyError: + _hooks[hook_name] = hooks = _load_hook(hook_name) + + for name, hook in hooks: + try: + hook(*args) + except Exception: + logger.warning("%r %s failed:", name, hook_name, exc_info=True) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 239ad4845..b500b92a8 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -200,11 +200,93 @@ program "memory": :meth:`drgn.Program.register_object_finder()` are the equivalent methods for plugging in types and objects. +.. _writing-plugins: + +Writing Plugins +--------------- + +In order for drgn to load a plugin automatically, it must be registered as an +`entry point `_ for +the ``drgn.plugins`` group. Here is a minimal example. First: + +.. code-block:: console + + $ mkdir drgn_plugin_example + $ cd drgn_plugin_example + +Then, create ``pyproject.toml`` with the following contents: + +.. code-block:: toml + :caption: pyproject.toml + :emphasize-lines: 5-6 + + [project] + name = 'drgn_plugin_example' + version = '0.0.1' + + [project.entry-points.'drgn.plugins'] + example = 'drgn_plugin_example' + +See the `Python Packaging User Guide +`_ for a complete +description of ``pyproject.toml``. We are most interested in the last two +lines, which define the entry point. In ``example = 'drgn_plugin_example'``, +``example`` is the plugin name, and ``drgn_plugin_example`` is the plugin +module. + +Create ``drgn_plugin_example.py`` with the following contents: + +.. code-block:: python3 + :caption: drgn_plugin_example.py + + import drgn + + def example_debug_info_finder(modules: list[drgn.Module]) -> None: + if isinstance(module, drgn.MainModule): + module.try_file("/my/vmlinux") + + def drgn_prog_set(prog: drgn.Program) -> None: + if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: + prog.register_debug_info_finder( + "example", example_debug_info_finder, enable_index=-1 + ) + # Optional; the default is 50; + drgn_prog_set.drgn_priority = 100 + +This is a typical usage of the :func:`drgn_prog_set()` hook to register +finders. See :ref:`plugins` for more details. + +After creating the above files, the plugin can be installed with +``pip install .``. + Environment Variables --------------------- Some of drgn's behavior can be modified through environment variables: +.. envvar:: DRGN_DISABLE_PLUGINS + + Comma-separated list of plugins to disable. Each item is a glob pattern + matching plugin entry point names. + +.. envvar:: DRGN_PLUGINS + + Comma-separated list of plugins to enable. Each item is either a plugin + entry point name, a file path, or a module name. Empty items are ignored. + + An item not containing ``=`` is interpreted as a plugin entry point name. + This takes precedence over :envvar:`DRGN_DISABLE_PLUGINS`. + + An item containing ``=`` is interpreted as an extra plugin to load manually + instead of via an entry point. The string before ``=`` is the plugin name. + The string after ``=`` is the value. If the value contains a ``/``, it is + the file path of a Python module. Otherwise, it is a module name. + + So, ``DRGN_DISABLE_PLUGINS=* DRGN_PLUGINS=foo,bar=/hello/world.py,baz=my.module`` + results in three plugins being loaded: the entry point ``foo``, the file + ``/hello/world.py`` as ``bar``, and the module ``my.module`` as ``baz``. + All other plugins are disabled. + .. envvar:: DRGN_MAX_DEBUG_INFO_ERRORS The maximum number of warnings about missing debugging information to log diff --git a/docs/api_reference.rst b/docs/api_reference.rst index d039aab68..98463c8fe 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -221,6 +221,37 @@ CLI .. drgndoc:: cli +.. _plugins: + +Plugins +------- + +drgn can be extended with plugins. A drgn plugin is a Python module defining +one or more hook functions that are called at specific times. + +By default, drgn loads installed modules registered as :ref:`entry points +` for the ``drgn.plugins`` group. The :envvar:`DRGN_PLUGINS` +and :envvar:`DRGN_DISABLE_PLUGINS` environment variables can be used to +configure this. + +The following hooks are currently defined: + +.. py:currentmodule:: None + +.. function:: drgn_prog_set(prog: drgn.Program) -> None + + Called after the program target has been set (e.g., one of + :meth:`drgn.Program.set_core_dump()`, :meth:`drgn.Program.set_kernel()`, or + :meth:`drgn.Program.set_pid()` has been called). + +A ``drgn_priority`` integer attribute can be assigned to a hook function to +define when it is called relative to other plugins. Hook functions with lower +``drgn_priority`` values are called earlier. Functions with equal +``drgn_priority`` values are called in an unspecified order. The default if not +defined is 50. + +See :ref:`writing-plugins` for an example. + Logging ------- diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index d31aacbc0..7b0682bfc 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -169,6 +169,7 @@ libdrgnimpl_la_SOURCES += python/constants.c \ python/module_section_addresses.c \ python/object.c \ python/platform.c \ + python/plugins.c \ python/program.c \ python/stack_trace.c \ python/symbol.c \ diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 4e5eea92a..ad6d07316 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -7,6 +7,7 @@ #include #include "linux_kernel.h" +#include "plugins.h" #include "program.h" // IWYU pragma: associated #include "util.h" @@ -271,6 +272,7 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) if (err) goto err_platform; prog->kdump_ctx = ctx; + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; err_platform: diff --git a/libdrgn/plugins.h b/libdrgn/plugins.h new file mode 100644 index 000000000..897ec556f --- /dev/null +++ b/libdrgn/plugins.h @@ -0,0 +1,17 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef DRGN_PLUGINS_H +#define DRGN_PLUGINS_H + +#include + +struct drgn_program; + +#if ENABLE_PYTHON +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog); +#else +static inline void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) {} +#endif + +#endif /* DRGN_PLUGINS_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 1db6f07ae..396cc0131 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -31,6 +31,7 @@ #include "memory_reader.h" #include "minmax.h" #include "object.h" +#include "plugins.h" #include "program.h" #include "serialize.h" #include "symbol.h" @@ -666,6 +667,7 @@ drgn_program_set_core_dump_fd_internal(struct drgn_program *prog, int fd, goto out_segments; } + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; out_segments: @@ -769,6 +771,8 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) prog->pid = pid; prog->flags |= DRGN_PROGRAM_IS_LIVE | DRGN_PROGRAM_IS_LOCAL; + + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; out_segments: diff --git a/libdrgn/python/plugins.c b/libdrgn/python/plugins.c new file mode 100644 index 000000000..2bf2b20a1 --- /dev/null +++ b/libdrgn/python/plugins.c @@ -0,0 +1,32 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../plugins.h" + +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) +{ + PyGILState_guard(); + + static PyObject *call_plugins; + if (!call_plugins) { + _cleanup_pydecref_ PyObject *_drgn_util_plugins_module = + PyImport_ImportModule("_drgn_util.plugins"); + if (!_drgn_util_plugins_module) { + PyErr_WriteUnraisable(NULL); + return; + } + call_plugins = PyObject_GetAttrString(_drgn_util_plugins_module, + "call_plugins"); + if (!call_plugins) { + PyErr_WriteUnraisable(NULL); + return; + } + } + + Program *prog_obj = container_of(prog, Program, prog); + _cleanup_pydecref_ PyObject *res = + PyObject_CallFunction(call_plugins, "sO", name, prog_obj); + if (!res) + PyErr_WriteUnraisable(call_plugins); +} diff --git a/tests/test_plugins.py b/tests/test_plugins.py new file mode 100644 index 000000000..2794aa42b --- /dev/null +++ b/tests/test_plugins.py @@ -0,0 +1,218 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +import logging +import os +from pathlib import Path +import sys +import tempfile +import unittest.mock + +import _drgn_util.plugins +from _drgn_util.plugins import call_plugins +from tests import TestCase, modifyenv + + +class TestPlugins(TestCase): + def setUp(self): + # Clear the plugin and hook caches before each test. + _drgn_util.plugins._plugins = None + _drgn_util.plugins._hooks.clear() + + # pkg_resources caches distributions on import. Delete it before each + # test so that it is reloaded. + sys.modules.pop("pkg_resources", None) + + # These tests change these environment variables and sys.path, so + # restore them after each test. + self.enterContext( + modifyenv({"DRGN_PLUGINS": None, "DRGN_DISABLE_PLUGINS": None}) + ) + self.addCleanup(setattr, sys, "path", list(sys.path)) + + # Delete modules imported by each test so that we can reuse the same + # module names. + def restore_modules(old_modules): + for new_module in set(sys.modules) - old_modules: + sys.modules.pop(new_module, None) + + self.addCleanup(restore_modules, set(sys.modules)) + + @staticmethod + def _create_plugin(dir): + plugin_path = Path(dir) / "test_plugin.py" + plugin_path.write_text( + """\ +def drgn_test_hook(call_me): + call_me() +""" + ) + return plugin_path + + @staticmethod + def _create_dist_info(dir, module_name="test_plugin", entry_point_name="test"): + dist_info_dir = Path(dir) / f"{module_name}-1.0.dist-info" + dist_info_dir.mkdir() + (dist_info_dir / "METADATA").write_text( + f"""\ +Metadata-Version: 1.1 +Name: {module_name} +Version: 1.0 +""" + ) + (dist_info_dir / "entry_points.txt").write_text( + f"""\ +[drgn.plugins] +{entry_point_name} = {module_name} +""" + ) + + def test_entry_point(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_disable_plugins_envvar_all(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "*" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_not_called() + + def test_drgn_disable_plugins_envvar_specific(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "foo,test" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_not_called() + + def test_drgn_plugins_envvar_path(self): + with tempfile.TemporaryDirectory() as temp_dir: + plugin_path = self._create_plugin(temp_dir) + os.environ["DRGN_PLUGINS"] = f"test={plugin_path}" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_plugins_envvar_module(self): + with tempfile.TemporaryDirectory() as temp_dir: + plugin_path = self._create_plugin(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_PLUGINS"] = f"test={plugin_path.stem}" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_plugins_envvar_precedence(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "*" + os.environ["DRGN_PLUGINS"] = "test" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_priority(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin1.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(1) +drgn_test_hook.drgn_priority = 75 +""" + ) + (Path(temp_dir) / "test_plugin2.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(2) +drgn_test_hook.drgn_priority = 25 +""" + ) + (Path(temp_dir) / "test_plugin3.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(3) +""" + ) + self._create_dist_info(temp_dir, "test_plugin1", "test1") + self._create_dist_info(temp_dir, "test_plugin2", "test2") + self._create_dist_info(temp_dir, "test_plugin3", "test3") + sys.path.insert(0, temp_dir) + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + self.assertEqual( + call_me.call_args_list, + [unittest.mock.call(2), unittest.mock.call(3), unittest.mock.call(1)], + ) + + def test_plugin_exception(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin.py").write_text('raise Exception("foo")\n') + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith("WARNING:drgn.plugins:failed to load 'test") + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) + + def test_hook_exception(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin.py").write_text( + """\ +def drgn_test_hook(): + raise Exception("foo") +""" + ) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith( + "WARNING:drgn.plugins:'test' drgn_test_hook failed:" + ) + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) + + def test_missing_entry_point(self): + os.environ["DRGN_PLUGINS"] = "__non__existent__entrypoint__" + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith( + "WARNING:drgn.plugins:not found: '__non__existent__entrypoint__'" + ) + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) From 70391bcfa70110aaa7ea916b16521c6b7ef70744 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 3 Feb 2025 13:22:13 -0800 Subject: [PATCH 044/166] Add 6.14 to supported kernels This one broke our module support until the previous update, unfortunately. Signed-off-by: Omar Sandoval --- docs/support_matrix.rst | 2 +- vmtest/config.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/support_matrix.rst b/docs/support_matrix.rst index 255cf6389..bf81f461a 100644 --- a/docs/support_matrix.rst +++ b/docs/support_matrix.rst @@ -72,7 +72,7 @@ currently fully supported are: .. Keep this in sync with vmtest/config.py. -- 6.0-6.13 +- 6.0-6.14 - 5.10-5.19 - 5.4 - 4.19 diff --git a/vmtest/config.py b/vmtest/config.py index c5f014f40..8f3e42e3f 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -13,6 +13,7 @@ # Kernel versions that we run tests on and therefore support. Keep this in sync # with docs/support_matrix.rst. SUPPORTED_KERNEL_VERSIONS = ( + "6.14", "6.13", "6.12", "6.11", From bf69b7b0593ed7a7d08edeae8a945eea01f86d90 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Feb 2025 13:18:15 -0800 Subject: [PATCH 045/166] docs: add a hands-on tutorial for a blk-rq-qos crash I've been wanting to add something like this for awhile. This also includes some requisite Sphinx and CSS tweaks. Signed-off-by: Omar Sandoval --- .pre-commit-config.yaml | 1 + docs/_static/custom.css | 19 + docs/conf.py | 1 + docs/exts/details.py | 85 +++ docs/index.rst | 1 + docs/tutorials.rst | 9 + docs/tutorials/blk_rq_qos_crash.rst | 853 ++++++++++++++++++++++++++++ 7 files changed, 969 insertions(+) create mode 100644 docs/exts/details.py create mode 100644 docs/tutorials.rst create mode 100644 docs/tutorials/blk_rq_qos_crash.rst diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8fd32a43..2c4383f56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,7 @@ repos: rev: 24.8.0 hooks: - id: black + exclude: ^docs/exts/details\.py$ - repo: https://github.com/pycqa/flake8 rev: 7.1.1 hooks: diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 037d460de..b7ad6d98c 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,8 @@ +details { + margin-block-start: 1em; + margin-block-end: 1em; +} + div.admonition { padding-bottom: 0; } @@ -7,6 +12,20 @@ div.admonition p.admonition-title { font-weight: bold; } +div.tip { + background-color: #DFD; + border-color: #ACA; +} + +div.scroll-y pre { + max-height: 20em; + overflow-y: auto; +} + +div.tutorial pre { + border-left: 5px solid #5A5; +} + @media screen and (min-width: 875px) { div.document { width: 100%; diff --git a/docs/conf.py b/docs/conf.py index 1a270c4a0..24f630406 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,6 +7,7 @@ master_doc = "index" extensions = [ + "details", "drgndoc.ext", "linuxsrc", "setuptools_config", diff --git a/docs/exts/details.py b/docs/exts/details.py new file mode 100644 index 000000000..4d179ed52 --- /dev/null +++ b/docs/exts/details.py @@ -0,0 +1,85 @@ +# Copyright 2017-2019 by Takeshi KOMIYA +# SPDX-License-Identifier: Apache-2.0 +# From https://pypi.org/project/sphinxcontrib-details-directive/, patched to +# use the proper name for the :class: option. + +from docutils import nodes +from docutils.parsers.rst import Directive, directives +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx.util.nodes import NodeMatcher + + +class details(nodes.Element, nodes.General): + pass + + +class summary(nodes.TextElement, nodes.General): + pass + + +def visit_details(self, node): + if node.get('opened'): + self.body.append(self.starttag(node, 'details', open="open")) + else: + self.body.append(self.starttag(node, 'details')) + + +def depart_details(self, node): + self.body.append('') + + +def visit_summary(self, node): + self.body.append(self.starttag(node, 'summary')) + + +def depart_summary(self, node): + self.body.append('') + + +class DetailsDirective(Directive): + required_arguments = 1 + final_argument_whitespace = True + has_content = True + option_spec = { + 'class': directives.class_option, + 'name': directives.unchanged, + 'open': directives.flag, + } + + def run(self): + admonition = nodes.container('', + classes=self.options.get('class', []), + opened='open' in self.options, + type='details') + textnodes, messages = self.state.inline_text(self.arguments[0], + self.lineno) + admonition += nodes.paragraph(self.arguments[0], '', *textnodes) + admonition += messages + self.state.nested_parse(self.content, self.content_offset, admonition) + self.add_name(admonition) + return [admonition] + + +class DetailsTransform(SphinxPostTransform): + default_priority = 200 + builders = ('html',) + + def run(self): + matcher = NodeMatcher(nodes.container, type='details') + for node in self.document.traverse(matcher): + newnode = details(**node.attributes) + newnode += summary('', '', *node[0]) + newnode.extend(node[1:]) + node.replace_self(newnode) + + +def setup(app): + app.add_node(details, html=(visit_details, depart_details)) + app.add_node(summary, html=(visit_summary, depart_summary)) + app.add_directive('details', DetailsDirective) + app.add_post_transform(DetailsTransform) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/docs/index.rst b/docs/index.rst index 6649fc36c..f46454c85 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,6 +34,7 @@ Table of Contents api_reference helpers support_matrix + tutorials case_studies getting_debugging_symbols release_highlights diff --git a/docs/tutorials.rst b/docs/tutorials.rst new file mode 100644 index 000000000..a40b927e5 --- /dev/null +++ b/docs/tutorials.rst @@ -0,0 +1,9 @@ +Tutorials +========= + +Hands-on tutorials for learning how to use drgn. + +.. toctree:: + :maxdepth: 1 + + tutorials/blk_rq_qos_crash.rst diff --git a/docs/tutorials/blk_rq_qos_crash.rst b/docs/tutorials/blk_rq_qos_crash.rst new file mode 100644 index 000000000..a6746a62b --- /dev/null +++ b/docs/tutorials/blk_rq_qos_crash.rst @@ -0,0 +1,853 @@ +Stack Traces and Mystery Addresses (blk-rq-qos Crash) +===================================================== + +| Author: Omar Sandoval +| Date: February 12, 2025 + +.. linuxversion:: v6.11 + +This is a hands-on tutorial walking through a real Linux kernel bug that caused +kernel crashes in production. We'll read kernel code and use a few important +drgn techniques for reading stack traces and interpreting memory in order to +identify the root cause of the bug. + +We saw this crash on storage workloads on multiple kernel versions, up to and +including the latest at the time, Linux 6.11. The kernel logs all implicated +something in the block layer. + +A core dump and debugging symbols are provided for you to follow along with. + +Setup +----- + +.. highlight:: console + +Follow the :doc:`../installation` instructions to get drgn. + +Download and extract the tutorial files: + +.. code-block:: + :class: tutorial + + $ curl -L https://github.com/osandov/drgn/releases/download/tutorial-assets/blk_rq_qos_crash_tutorial.tar.zst \ + | zstd -d | tar -x + +This will create a directory named ``blk_rq_qos_crash_tutorial``. Enter it: + +.. code-block:: + :class: tutorial + + $ cd blk_rq_qos_crash_tutorial + +Then, run drgn as follows. It will print a version banner and automatically +import the relevant :doc:`../helpers`: + +.. code-block:: + :class: tutorial + + $ drgn -c vmcore -s vmlinux --main-symbols + drgn 0.0.30 (using Python 3.13.1, elfutils 0.192, with libkdumpfile) + For help, type help(drgn). + >>> import drgn + >>> from drgn import FaultError, NULL, Object, alignof, cast, container_of, execscript, implicit_convert, offsetof, reinterpret, sizeof, stack_trace + >>> from drgn.helpers.common import * + >>> from drgn.helpers.linux import * + +In another window, check out the source code for Linux 6.11. For example, run +``git checkout v6.11`` in an existing Linux repo, or run: + +.. code-block:: + :class: tutorial + + $ git clone -b v6.11 --depth 1 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + ... + $ cd linux + +Now we can dive into the core dump. + +Starting With Dmesg +------------------- + +.. highlight:: pycon + +The kernel log buffer is usually the first place to look when debugging a +crash. In drgn, call :func:`~drgn.helpers.linux.printk.print_dmesg()` and +scroll up until you find the line starting with ``BUG:``. You should see the +following trace: + +.. code-block:: + :class: scroll-y tutorial + :emphasize-lines: 3,11 + + >>> print_dmesg() + ... + [ 18.051123] BUG: kernel NULL pointer dereference, address: 00000000000006fc + [ 18.051597] #PF: supervisor write access in kernel mode + [ 18.051936] #PF: error_code(0x0002) - not-present page + [ 18.052241] PGD 0 P4D 0 + [ 18.052336] Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI + [ 18.052629] CPU: 0 UID: 0 PID: 906 Comm: fio Kdump: loaded Not tainted 6.11.0 #1 + [ 18.053123] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 + [ 18.053739] RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70 + [ 18.054059] Code: 04 25 28 00 00 00 48 89 44 24 08 48 c7 04 24 00 00 00 00 9c 8f 04 24 48 8b 1c 24 fa 65 ff 05 89 2a b7 7e b9 01 00 00 00 31 c0 0f b1 0f 75 1e 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 75 17 + [ 18.055467] RSP: 0000:ffffc900011abcd0 EFLAGS: 00010046 + [ 18.055788] RAX: 0000000000000000 RBX: 0000000000000082 RCX: 0000000000000001 + [ 18.056260] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 00000000000006fc + [ 18.056725] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000015000e + [ 18.057202] R10: ffff888002fa5900 R11: ffffffff81312090 R12: 0000000000000003 + [ 18.057669] R13: ffff888002d4b678 R14: 00000000000006fc R15: 0000000000000003 + [ 18.058138] FS: 00007f1ee66c06c0(0000) GS:ffff888005a00000(0000) knlGS:0000000000000000 + [ 18.058677] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 18.059039] CR2: 00000000000006fc CR3: 0000000002f4a005 CR4: 0000000000770ef0 + [ 18.059508] PKRU: 55555554 + [ 18.059614] Call Trace: + [ 18.059700] + [ 18.059782] ? __die_body+0x16/0x60 + [ 18.059982] ? page_fault_oops+0x31e/0x3a0 + [ 18.060205] ? exc_page_fault+0x55/0xa0 + [ 18.060409] ? asm_exc_page_fault+0x26/0x30 + [ 18.060640] ? __pfx_wbt_inflight_cb+0x10/0x10 + [ 18.060892] ? _raw_spin_lock_irqsave+0x36/0x70 + [ 18.061150] try_to_wake_up+0x3e/0x400 + [ 18.061342] rq_qos_wake_function+0x4d/0x60 + [ 18.061572] __wake_up_common+0x42/0x80 + [ 18.061770] __wake_up_common_lock+0x33/0x60 + [ 18.062007] wbt_done+0x60/0x80 + [ 18.062152] __rq_qos_done+0x22/0x40 + [ 18.062330] blk_mq_free_request+0x62/0xb0 + [ 18.062551] virtblk_done+0x99/0x120 + [ 18.062731] vring_interrupt+0x71/0x80 + [ 18.062928] vp_interrupt+0xa8/0xe0 + [ 18.063100] __handle_irq_event_percpu+0x89/0x1b0 + [ 18.063373] handle_irq_event_percpu+0xf/0x40 + [ 18.063614] handle_irq_event+0x30/0x50 + [ 18.063831] handle_fasteoi_irq+0xaa/0x1b0 + [ 18.064051] __common_interrupt+0x3a/0xb0 + [ 18.064266] common_interrupt+0x3d/0x90 + [ 18.064462] asm_common_interrupt+0x26/0x40 + [ 18.064691] RIP: 0033:0x7f1ef33679b9 + [ 18.064886] Code: ff 48 85 c0 0f 84 32 35 00 00 48 8b bd b8 f9 ff ff 4c 89 b5 80 f9 ff ff 48 89 07 4c 01 f8 48 89 85 78 f9 ff ff e9 8d ca ff ff <48> 8b 85 60 fa ff ff 48 8d 50 08 48 89 95 60 fa ff ff e9 c7 d5 ff + [ 18.066333] RSP: 002b:00007f1ee66baad0 EFLAGS: 00000212 + [ 18.066624] RAX: 00007f1ee66bad56 RBX: 00007f1ee66bb1d0 RCX: 00007f1ee66bad56 + [ 18.066999] RDX: 0000000000000030 RSI: 00000000000f12b3 RDI: 000000000000000a + [ 18.067476] RBP: 00007f1ee66bb1a0 R08: 000000000000002c R09: 0000000000000000 + [ 18.068003] R10: 00007f1ef348dfe0 R11: 0000000000000020 R12: 0000000000000020 + [ 18.068482] R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000001 + [ 18.069005] + [ 18.069097] CR2: 00000000000006fc + +``BUG: kernel NULL pointer dereference, address: 00000000000006fc`` tells us +that the kernel crashed because it dereferenced a null pointer and tried to +access the address 0x6fc. + +``RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70`` tells us that the bad access +happened in the function :linux:`_raw_spin_lock_irqsave() +`. Below that, the stack trace tells us how we +got there. + +.. tip:: + + Ignore call trace lines starting with ``?``. These are stale function + addresses on the stack that are not part of the actual call trace. They are + printed as a :linuxt:`hint/fail-safe `, + but they are misleading as often as not. + +We'll look at the trace in more detail with drgn soon, but we can see that we +got an interrupt for a disk I/O completion, which then tried to wake up a task +and acquire a spinlock. + +Stack Trace in drgn +------------------- + +Now let's look at drgn's view of the stack trace. Save the stack trace of the +crashed thread: + +.. code-block:: + :class: tutorial + + >>> trace = prog.crashed_thread().stack_trace() + +And print it: + +.. code-block:: + :class: scroll-y tutorial + + >>> trace + #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) + #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) + #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) + #8 try_to_wake_up (kernel/sched/core.c:4051:2) + #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) + #10 __wake_up_common (kernel/sched/wait.c:89:9) + #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) + #12 wbt_done (block/blk-wbt.c:259:3) + #13 __rq_qos_done (block/blk-rq-qos.c:39:4) + #14 rq_qos_done (block/blk-rq-qos.h:122:3) + #15 blk_mq_free_request (block/blk-mq.c:737:2) + #16 virtblk_done (drivers/block/virtio_blk.c:367:5) + #17 vring_interrupt (drivers/virtio/virtio_ring.c:2595:3) + #18 vp_vring_interrupt (drivers/virtio/virtio_pci_common.c:82:7) + #19 vp_interrupt (drivers/virtio/virtio_pci_common.c:113:9) + #20 __handle_irq_event_percpu (kernel/irq/handle.c:158:9) + #21 handle_irq_event_percpu (kernel/irq/handle.c:193:11) + #22 handle_irq_event (kernel/irq/handle.c:210:8) + #23 handle_fasteoi_irq (kernel/irq/chip.c:720:2) + #24 generic_handle_irq_desc (./include/linux/irqdesc.h:173:2) + #25 handle_irq (arch/x86/kernel/irq.c:247:3) + #26 call_irq_handler (arch/x86/kernel/irq.c:259:3) + #27 __common_interrupt (arch/x86/kernel/irq.c:285:6) + #28 common_interrupt (arch/x86/kernel/irq.c:278:1) + #29 asm_common_interrupt+0x26/0x2b (./arch/x86/include/asm/idtentry.h:693) + #30 0x7f1ef33679b9 + +Notice that drgn's stack trace includes information not in the kernel trace, +namely: + +1. File names and line and column numbers. These are very useful for navigating + the code that you're debugging. +2. Inlined function calls. For example, frames 0-5 are all inlined calls, and + frame 6 was the last actual call. You can verify this by printing each frame + individually: + + .. code-block:: + :class: tutorial + + + >>> trace[0] + #0 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in arch_atomic_try_cmpxchg at ./arch/x86/include/asm/atomic.h:107:9 (inlined) + >>> trace[1] + #1 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in raw_atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-arch-fallback.h:2170:9 (inlined) + >>> trace[2] + #2 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-instrumented.h:1302:9 (inlined) + >>> trace[3] + #3 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in queued_spin_lock at ./include/asm-generic/qspinlock.h:111:6 (inlined) + >>> trace[4] + #4 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in do_raw_spin_lock at ./include/linux/spinlock.h:187:2 (inlined) + >>> trace[5] + #5 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in __raw_spin_lock_irqsave at ./include/linux/spinlock_api_smp.h:111:2 (inlined) + >>> trace[6] + #6 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in _raw_spin_lock_irqsave at kernel/locking/spinlock.c:162:9 + + Notice that frames 0-5 end with ``(inlined)``, and all of the frames have + the same instruction pointer, ``0xffffffff814b6446``. + +Tracing Local Variables +----------------------- + +Next, let's walk through the stack trace to figure out where the null pointer +came from. + +Frames 0-2 are low-level atomic operations:: + + #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) + #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) + #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) + +That's essentially a fancy memory access, so let's skip those frames. Frame 3 +is in :linux:`queued_spin_lock() `, the +kernel's spinlock implementation:: + + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) + +In your window with the Linux source code, open +:file:`include/asm-generic/qspinlock.h` and jump to line 111: + +.. code-block:: c + :caption: include/asm-generic/qspinlock.h + :lineno-start: 107 + :emphasize-lines: 5 + + static __always_inline void queued_spin_lock(struct qspinlock *lock) + { + int val = 0; + + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); + } + +Notice that it accesses the ``lock`` parameter. Print it in drgn: + +.. code-block:: + :class: tutorial + + >>> trace[3]["lock"] + (struct qspinlock *)0x6fc + +This matches the address from the ``BUG`` message in dmesg! + +Now let's find out where ``lock`` came from. Frames 4-7 wrap the low-level +spinlock implementation:: + + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) + +Feel free to open the source code for these, but we can quickly check that the +lock simply gets passed through: + +.. code-block:: + :class: tutorial + + >>> trace[4]["lock"] + (raw_spinlock_t *)0x6fc + >>> trace[5]["lock"] + (raw_spinlock_t *)0x6fc + >>> trace[6]["lock"] + (raw_spinlock_t *)0x6fc + +:linux:`class_raw_spinlock_irqsave_constructor() +` is slightly different. It is generated by a +macro and doesn't use the name ``lock``: + +.. code-block:: + :class: tutorial + + >>> trace[7]["lock"] + Traceback (most recent call last): + ... + KeyError: 'lock' + +Let's list all of its local variables and make a guess: + +.. code-block:: + :class: tutorial + + >>> trace[7].locals() + ['l', '_t'] + >>> trace[7]["l"] + (raw_spinlock_t *)0x6fc + +.. tip:: + + Use :meth:`drgn.StackFrame.locals()` to get the list of parameters and + local variables in a stack frame when finding the implementation of the + function is inconvenient. + +The caller must have passed 0x6fc. Let's look at it. The next frame is in +:linux:`try_to_wake_up() `:: + + #8 try_to_wake_up (kernel/sched/core.c:4051:2) + +Open :file:`kernel/sched/core.c` at line 4051: + +.. code-block:: c + :caption: kernel/sched/core.c + :emphasize-lines: 4 + + int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) + { + ... + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) { + +It is acquiring :linux:`pi_lock ` in a +:linux:`task_struct ` (using a `scoped guard +`_). Print the ``task_struct``: + +.. code-block:: + :class: tutorial + + >>> trace[8]["p"] + (struct task_struct *)0x0 + +There's our null pointer! But where did 0x6fc come from? Look at the offset of +``pi_lock`` in ``struct task_struct``: + +.. code-block:: + :class: tutorial + + >>> hex(offsetof(prog.type("struct task_struct"), "pi_lock")) + '0x6fc' + +Or do the inverse and see what's at offset 0x6fc in ``struct task_struct``: + +.. code-block:: + :class: tutorial + + >>> member_at_offset(prog.type("struct task_struct"), 0x6fc) + 'pi_lock.raw_lock.val.counter or pi_lock.raw_lock.locked or pi_lock.raw_lock.locked_pending' + +.. tip:: + + Use :func:`~drgn.offsetof()` and + :func:`~drgn.helpers.common.type.member_at_offset()` to decipher pointers + to struct members. + +So where did ``p`` come from? Let's look at the caller, +:linux:`rq_qos_wake_function() `, in frame 9:: + + #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) + +Open :file:`block/blk-rq-qos.c` at line 223: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 206 + :emphasize-lines: 18 + + static int rq_qos_wake_function(struct wait_queue_entry *curr, + unsigned int mode, int wake_flags, void *key) + { + struct rq_qos_wait_data *data = container_of(curr, + struct rq_qos_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up loop + * in __wake_up_common. + */ + if (!data->cb(data->rqw, data->private_data)) + return -1; + + data->got_token = true; + smp_wmb(); + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; + } + +(Note: :linux:`wake_up_process() ` doesn't show up in +the stack trace because of `tail call elimination +`_. This `may be fixed +`_ in a future release of drgn.) + +``p`` came from ``data->task``. Print ``data``: + +.. code-block:: + :class: tutorial + + >>> trace[9]["data"] + *(struct rq_qos_wait_data *)0xffffc900011b3558 = { + .wq = (struct wait_queue_entry){ + .flags = (unsigned int)2168637095, + .private = (void *)0xffff888002d6c000, + .func = (wait_queue_func_t)0x0, + .entry = (struct list_head){ + .next = (struct list_head *)0xffff888002d6c000, + .prev = (struct list_head *)0xffff888002da2100, + }, + }, + .task = (struct task_struct *)0xffff888000fd6001, + .rqw = (struct rq_wait *)0xffffc900011b3a30, + .cb = (acquire_inflight_cb_t *)0xffff888002763030, + .private_data = (void *)0x1, + .got_token = (bool)201, + } + +Notice that ``data->task`` is NOT null. Print the ``comm`` member, which should +be the thread name: + +.. code-block:: + :class: tutorial + + >>> trace[9]["data"].task.comm + (char [16])"" + +Instead, it's empty. This doesn't appear to be a valid ``task_struct``. + +Identifying Mystery Addresses +----------------------------- + +If ``data->task`` isn't a valid ``task_struct``, then what is it? Pass it to +:func:`~drgn.helpers.common.memory.identify_address()` to answer that: + +.. code-block:: + :class: tutorial + + >>> identify_address(trace[9]["data"].task) + 'slab object: buffer_head+0x1' + +It's a pointer to a completely unrelated type. + +Since our problem seems to stem from ``data``, pass it to +``identify_address()`` to see where it comes from: + +.. code-block:: + :class: tutorial + + >>> identify_address(trace[9]["data"]) + 'vmap stack: 909 (fio) +0x3558' + +This means that ``data`` is on the stack of the task with PID 909. + +.. tip:: + + Use :func:`~drgn.helpers.common.memory.identify_address()` to figure out + what an unknown address refers to. + +Other Stacks +------------ + +Notice that we've seen three possibilities for ``data->task``: + +1. When it was passed to ``wake_up_process()``, it was ``NULL``. +2. By the time of the crash, it was an unrelated pointer. +3. It's supposed to point to a ``task_struct``. + +This suggests that there's a data race on ``data->task``. + +We know that ``data`` is on the stack of another task. Let's find where it's +created. In :file:`block/blk-rq-qos.c`, search for ``struct rq_qos_wait_data``. +You should find it being used in :linux:`rq_qos_wait() +`: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 243 + :emphasize-lines: 5 + + void rq_qos_wait(struct rq_wait *rqw, void *private_data, + acquire_inflight_cb_t *acquire_inflight_cb, + cleanup_cb_t *cleanup_cb) + { + struct rq_qos_wait_data data = { + .wq = { + .func = rq_qos_wake_function, + .entry = LIST_HEAD_INIT(data.wq.entry), + }, + .task = current, + .rqw = rqw, + .cb = acquire_inflight_cb, + .private_data = private_data, + }; + bool has_sleeper; + + has_sleeper = wq_has_sleeper(&rqw->wait); + if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) + return; + + has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, + TASK_UNINTERRUPTIBLE); + do { + /* The memory barrier in set_task_state saves us here. */ + if (data.got_token) + break; + if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { + finish_wait(&rqw->wait, &data.wq); + + /* + * We raced with rq_qos_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + */ + smp_rmb(); + if (data.got_token) + cleanup_cb(rqw, private_data); + break; + } + io_schedule(); + has_sleeper = true; + set_current_state(TASK_UNINTERRUPTIBLE); + } while (1); + finish_wait(&rqw->wait, &data.wq); + } + +This function creates ``data`` on the stack, with ``data->task`` set to the +current task, and then tries to acquire an "inflight counter". If one is not +available, it puts itself on a wait queue and blocks until it can get one. + +So, ``rq_qos_wait()`` waits for an inflight counter, and +``rq_qos_wake_function()`` wakes it up when one becomes available. We would +expect that the PID we found earlier, 909, is currently blocked in +``rq_qos_wait()``. Pass the PID to :func:`~drgn.stack_trace()` to check: + +.. code-block:: + :class: scroll-y tutorial + + >>> stack_trace(909) + #0 rep_nop (./arch/x86/include/asm/vdso/processor.h:0:2) + #1 cpu_relax (./arch/x86/include/asm/vdso/processor.h:18:2) + #2 queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380:3) + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:114:2) + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 virtblk_add_req_batch (drivers/block/virtio_blk.c:481:2) + #8 virtio_queue_rqs (drivers/block/virtio_blk.c:519:11) + #9 __blk_mq_flush_plug_list (block/blk-mq.c:2704:2) + #10 blk_mq_flush_plug_list (block/blk-mq.c:2781:4) + #11 blk_add_rq_to_plug (block/blk-mq.c:1292:3) + #12 blk_mq_submit_bio (block/blk-mq.c:3028:3) + #13 __submit_bio (block/blk-core.c:615:3) + #14 __submit_bio_noacct_mq (block/blk-core.c:696:3) + #15 submit_bio_noacct_nocheck (block/blk-core.c:725:3) + #16 ext4_io_submit (fs/ext4/page-io.c:377:3) + #17 io_submit_add_bh (fs/ext4/page-io.c:418:3) + #18 ext4_bio_write_folio (fs/ext4/page-io.c:560:3) + #19 mpage_submit_folio (fs/ext4/inode.c:1943:8) + #20 mpage_process_page_bufs (fs/ext4/inode.c:2056:9) + #21 mpage_prepare_extent_to_map (fs/ext4/inode.c:2564:11) + #22 ext4_do_writepages (fs/ext4/inode.c:2706:8) + #23 ext4_writepages (fs/ext4/inode.c:2842:8) + #24 do_writepages (mm/page-writeback.c:2683:10) + #25 __filemap_fdatawrite_range (mm/filemap.c:430:9) + #26 generic_fadvise (mm/fadvise.c:114:3) + #27 vfs_fadvise (mm/fadvise.c:185:9) + #28 ksys_fadvise64_64 (mm/fadvise.c:199:8) + #29 __do_sys_fadvise64 (mm/fadvise.c:214:9) + #30 __se_sys_fadvise64 (mm/fadvise.c:212:1) + #31 __x64_sys_fadvise64 (mm/fadvise.c:212:1) + #32 do_syscall_x64 (arch/x86/entry/common.c:52:14) + #33 do_syscall_64 (arch/x86/entry/common.c:83:7) + #34 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121) + #35 0x7f1ef340203a + +It's not in ``rq_qos_wait()``! It seems to have moved on to something else. + +Analysis +-------- + +At this point, we've gotten everything that we need from drgn. Now we need to +interpret what we've gathered and analyze the kernel code. + +Based on the stack trace for PID 909, we can conclude that the *waiter* got a +counter, returned, and moved on to something else. It reused the stack for +unrelated data, which explains the mystery pointer that we saw in +``data->task``. The series of events is something like this: + +1. ``acquire_inflight_cb()`` on line 260 fails. +2. ``prepare_to_wait_exclusive()`` puts ``data`` on the waitqueue. +3. ``acquire_inflight_cb()`` on line 269 succeeds. +4. ``finish_wait()`` removes ``data`` from the waitqueue. +5. ``rq_qos_wait()`` returns and the task moves on to something else, reusing + the stack memory. + +This means that the *waker* found the waiter's ``data`` in between steps 2 and +4, but by the time the waker called ``wake_up_process(data->task)``, the waiter +was past step 5. + +Wakers and waiters are supposed to be synchronized. Going back to the crashing +stack trace, we see that ``rq_qos_wake_function()`` is called via +:linux:`__wake_up_common_lock() `:: + + #10 __wake_up_common (kernel/sched/wait.c:89:9) + #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) + +Open :file:`kernel/sched/wait.c` at line 106 and see that it's holding +``wq_head->lock``: + +.. code-block:: c + :caption: kernel/sched/wait.c + :lineno-start: 99 + :emphasize-lines: 8 + + static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key) + { + unsigned long flags; + int remaining; + + spin_lock_irqsave(&wq_head->lock, flags); + remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, + key); + spin_unlock_irqrestore(&wq_head->lock, flags); + + return nr_exclusive - remaining; + } + +On the waiter side, :linux:`finish_wait() ` also grabs +``wq_head->lock``: + +.. code-block:: c + :caption: kernel/sched/wait.c + :lineno-start: 446 + + void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + unsigned long flags; + + __set_current_state(TASK_RUNNING); + /* + * We can check for list emptiness outside the lock + * IFF: + * - we use the "careful" check that verifies both + * the next and prev pointers, so that there cannot + * be any half-pending updates in progress on other + * CPU's that we haven't seen yet (and that might + * still change the stack area. + * and + * - all other users take the lock (ie we can only + * have _one_ other CPU that looks at or modifies + * the list). + */ + if (!list_empty_careful(&wq_entry->entry)) { + spin_lock_irqsave(&wq_head->lock, flags); + list_del_init(&wq_entry->entry); + spin_unlock_irqrestore(&wq_head->lock, flags); + } + } + +But there's an important detail here: ``finish_wait()`` doesn't take the lock +if the wait queue list entry is empty, i.e., if it has already been removed +from the wait queue. + +Go back to ``rq_qos_wake_function()``: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 206 + + static int rq_qos_wake_function(struct wait_queue_entry *curr, + unsigned int mode, int wake_flags, void *key) + { + struct rq_qos_wait_data *data = container_of(curr, + struct rq_qos_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up loop + * in __wake_up_common. + */ + if (!data->cb(data->rqw, data->private_data)) + return -1; + + data->got_token = true; + smp_wmb(); + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; + } + +It removes the entry from the wait queue on line 222, then accesses the entry +on line 223. + +That's the race condition: as soon as the entry has been removed from the wait +queue, ``finish_wait()`` in the waiter can return instantly, and the waiter is +free to move on. Therefore, after the entry has been removed, the waker must +not access it. + +The Fix +------- + +The fix is trivial: don't delete the wait queue entry until *after* using it. + +.. code-block:: diff + + diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c + index 2cfb297d9a62..058f92c4f9d5 100644 + --- a/block/blk-rq-qos.c + +++ b/block/blk-rq-qos.c + @@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, + + data->got_token = true; + smp_wmb(); + - list_del_init(&curr->entry); + wake_up_process(data->task); + + list_del_init_careful(&curr->entry); + return 1; + } + +The deletion also needs careful memory ordering to pair with the +:linux:`list_empty_careful() ` in ``finish_wait()``, +hence the replacement of :linux:`list_del_init() ` +with :linux:`list_del_init_careful() `. + +This fix was merged in Linux 6.12 in `commit e972b08b91ef ("blk-rq-qos: fix +crash on rq_qos_wait vs. rq_qos_wake_function race") +`_. + +Conclusion +---------- + +Debugging a core dump involves a lot of cross-referencing code and core dump +state. drgn gives you some powerful capabilities for understanding kernel +state, which you can use to discern subtle bugs like this one. In particular, +:func:`~drgn.helpers.common.memory.identify_address()`, +:func:`~drgn.helpers.common.type.member_at_offset()`, and +:meth:`drgn.StackFrame.locals()` are often crucial to an investigation. + +Feel free to reference the :doc:`../helpers` and explore this core dump further. + +Bonus Challenge: Reading File Pages +----------------------------------- + +As a bonus, try dumping the contents of the file ``/init`` in the core dump +(this is the script that I used to reproduce the bug). + +First, find the inode for ``/init`` and its file size. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.fs.path_lookup()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> inode = path_lookup("/init").dentry.d_inode + >>> inode + *(struct inode *)0xffff88800289c568 = { + ... + } + >>> inode.i_size + (loff_t)578 + +The page cache for an inode is in an XArray, ``inode->i_mapping->i_pages``. Get +the cached page at offset 0. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.xarray.xa_load()` and :func:`~drgn.cast()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> entry = xa_load(inode.i_mapping.i_pages.address_of_(), 0) + >>> page = cast("struct page *", entry) + >>> page + *(struct page *)0xffffea000015f840 = { + ... + } + +Get the page's virtual address. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.mm.page_to_virt()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> addr = page_to_virt(page) + >>> addr + (void *)0xffff8880057e1000 + +Finally, read from the virtual address. + +.. details:: Hint + + See :meth:`drgn.Program.read()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> print(prog.read(addr, inode.i_size).decode()) + #!/bin/sh -e + + mount -t proc -o nosuid,nodev,noexec proc /proc + mount -t devtmpfs -o nosuid dev /dev + mkdir /dev/shm + mount -t tmpfs -o nosuid,nodev tmpfs /dev/shm + mount -t sysfs -o nosuid,nodev,noexec sys /sys + mount -t tmpfs -o nosuid,nodev tmpfs /tmp + kexec --load-panic --kexec-syscall-auto --command-line="root=/dev/vda rw console=ttyS0,115200 init=/kdump-init" vmlinuz + echo 1 > /sys/block/vda/queue/wbt_lat_usec + while true; do + cat /init > /dev/null + done & + fio --name=writer --rw=randwrite --ioengine=sync --buffered=1 --bs=4K --time_based --runtime=3600 --size=16M + poweroff -f From fa3695eb9eaa721085f28411455744529d4746fa Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Feb 2025 13:57:14 -0800 Subject: [PATCH 046/166] docs: add video to blk-rq-qos crash tutorial Signed-off-by: Omar Sandoval --- docs/tutorials/blk_rq_qos_crash.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/tutorials/blk_rq_qos_crash.rst b/docs/tutorials/blk_rq_qos_crash.rst index a6746a62b..ed32f27ae 100644 --- a/docs/tutorials/blk_rq_qos_crash.rst +++ b/docs/tutorials/blk_rq_qos_crash.rst @@ -17,6 +17,12 @@ something in the block layer. A core dump and debugging symbols are provided for you to follow along with. +This tutorial is also available as a video: + +.. raw:: html + + + Setup ----- From 1c6307b88dc8e09a0b8c1e7721c594731b8976a5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Feb 2025 15:15:00 -0800 Subject: [PATCH 047/166] README: exclude unsupported repositories from Repology badge We're amassing a graveyard of EOL Fedora releases. Signed-off-by: Omar Sandoval --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 833df4d4e..b1cabf270 100644 --- a/README.rst +++ b/README.rst @@ -66,7 +66,7 @@ Package Manager drgn can be installed using the package manager on some Linux distributions. -.. image:: https://repology.org/badge/vertical-allrepos/drgn.svg +.. image:: https://repology.org/badge/vertical-allrepos/drgn.svg?exclude_unsupported=1 :target: https://repology.org/project/drgn/versions :alt: Packaging Status From 9f56b1eeba0a4d3798b13f9aa32a565c9b62b400 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 13 Feb 2025 14:48:59 -0800 Subject: [PATCH 048/166] libdrgn: dwarf_info: add bias to DW_OP_addr in the middle of an expression While evaluating a DWARF location, we currently add the debug file bias once, at the very end. This works for DW_OP_addr operations at the end of a location description, but if there is a DW_OP_addr operation in the middle of an expression, it ignores the bias. Fix it by adding the bias immediately after a DW_OP_addr (or DW_OP_addrx or DW_OP_GNU_addr_index) operation instead. P.S. This manifested as FaultErrors when accessing the vmap_zone_size variable on a kernel compiled with Clang 19. The reason why is really interesting: vmap_zone_size only has two possible values, 1 and (1 << 4) * PAGE_SIZE. It's only used in one place: static inline unsigned int addr_to_node_id(unsigned long addr) { return (addr / vmap_zone_size) % nr_vmap_nodes; } To optimize that division, Clang apparently replaced vmap_zone_size with a shift such that the division (addr / vmap_zone_size) could be replaced with (addr >> (shift << 4)). The location description for vmap_zone_zone is therefore: DW_OP_addr 0xffffffff82350194, DW_OP_deref_size 0x1, DW_OP_constu 0xffff, DW_OP_mul, DW_OP_lit1, DW_OP_plus, DW_OP_stack_value Fixes: 35a1af7ad690 ("libdrgn: add DWARF expression evaluation") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 9fa44d369..a79e8f442 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -3500,6 +3500,19 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, address_size, &uvalue))) return err; +addr: + /* + * If the address is not in the module's address range, + * then it's probably something special like a Linux + * per-CPU variable (which isn't actually a variable + * address but an offset). Don't apply the bias in that + * case. + */ + if (ctx->file->module->start + <= uvalue + ctx->file->module->debug_file_bias + && uvalue + ctx->file->module->debug_file_bias + < ctx->file->module->end) + uvalue += ctx->file->module->debug_file_bias; PUSH(uvalue); break; case DW_OP_const1u: @@ -3562,8 +3575,19 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, PUSH_MASK(uvalue); break; case DW_OP_addrx: - case DW_OP_constx: case DW_OP_GNU_addr_index: + if (!ctx->cu_die.addr) { + ctx->bb.pos = ctx->bb.prev; + return NULL; + } + if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->file, + &ctx->cu_die, + address_size, + &ctx->cu_addr_base, + &uvalue))) + return err; + goto addr; + case DW_OP_constx: case DW_OP_GNU_const_index: if (!ctx->cu_die.addr) { ctx->bb.pos = ctx->bb.prev; @@ -4627,17 +4651,6 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); err = NULL; } else if (bit_offset >= 0) { - uint64_t biased_address = - address + file->module->debug_file_bias; - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (file->module->start <= biased_address - && biased_address < file->module->end) - address = biased_address; err = drgn_object_set_reference_internal(ret, &type, address, bit_offset); } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { From 972259aa8389f2a5cedd61f5ab50bed5c2d8e7c9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Feb 2025 00:09:34 -0800 Subject: [PATCH 049/166] drgn.helpers.experimental.kmodify: apply conversions to literal arguments When a literal (i.e., Python value) is passed to call_function(), we're converting it to a drgn.Object but not converting it to the parameter type or applying the default argument promotions. Fix that. Fixes: 1652183026a8 ("Add experimental helpers for calling kernel functions and writing to memory") Signed-off-by: Omar Sandoval --- drgn/helpers/experimental/kmodify.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drgn/helpers/experimental/kmodify.py b/drgn/helpers/experimental/kmodify.py index 8840d9b68..8e8fb8710 100644 --- a/drgn/helpers/experimental/kmodify.py +++ b/drgn/helpers/experimental/kmodify.py @@ -1336,13 +1336,12 @@ def align_data(alignment: int) -> None: call_args.append(_Symbol(".data", section=True, offset=len(data))) data.extend(value) else: - if isinstance(arg, Object): - if i < len(func_type.parameters): - arg = implicit_convert(func_type.parameters[i].type, arg) - else: - arg = _default_argument_promotions(arg) - else: + if not isinstance(arg, Object): arg = Object(prog, value=arg) + if i < len(func_type.parameters): + arg = implicit_convert(func_type.parameters[i].type, arg) + else: + arg = _default_argument_promotions(arg) type = _underlying_type(arg.type_) if type.kind not in { From 96ce55efdbcb9f5df262f562c6909b2d8535ae35 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Feb 2025 00:15:41 -0800 Subject: [PATCH 050/166] drgn.helpers.experimental.kmodify: sign-extend <32-bit arguments Clang on x86-64 apparently relies on <32-bit arguments being sign-extended to 32 bits despite this not being in the psABI. This was caught by running the kmodify test case on a Clang-built kernel. Work around it by doing the sign extension. Fixes: 1652183026a8 ("Add experimental helpers for calling kernel functions and writing to memory") Signed-off-by: Omar Sandoval --- drgn/helpers/experimental/kmodify.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drgn/helpers/experimental/kmodify.py b/drgn/helpers/experimental/kmodify.py index 8e8fb8710..2d02c4ce1 100644 --- a/drgn/helpers/experimental/kmodify.py +++ b/drgn/helpers/experimental/kmodify.py @@ -400,7 +400,7 @@ def relocation_data(relocations: Sequence[_ElfRelocation]) -> bytes: class _Integer: def __init__(self, size: int, value: IntegerLike) -> None: self.size = size - self.value = operator.index(value) & ((1 << (size * 8)) - 1) + self.value = operator.index(value) class _Symbol(NamedTuple): @@ -503,7 +503,8 @@ def leave_frame(self) -> None: b"\xC3" ) - def _mov_imm(self, value: int, reg: int) -> None: + def _mov_imm(self, i: _Integer, reg: int, sign_extend_bits: int = 0) -> None: + value = i.value & ((1 << max(i.size * 8, sign_extend_bits)) - 1) assert value >= 0 and value <= 0xFFFFFFFFFFFFFFFF assert reg < 16 if value <= 0xFFFFFFFF: @@ -556,7 +557,8 @@ def _store_rax_on_stack(self, offset: int) -> None: self.code.extend(b"\x48\x89\x84\x24") self.code.extend(offset.to_bytes(4, "little", signed=True)) - def _store_imm_on_stack(self, value: int, offset: int) -> None: + def _store_imm_on_stack(self, i: _Integer, offset: int) -> None: + value = i.value & ((1 << max(i.size * 8, 64)) - 1) if (0 <= value <= 0x7FFFFFFF) or ( 0xFFFFFFFF80000000 <= value <= 0xFFFFFFFFFFFFFFFF ): @@ -571,7 +573,7 @@ def _store_imm_on_stack(self, value: int, offset: int) -> None: self.code.extend(offset.to_bytes(4, "little", signed=True)) self.code.extend((value & 0xFFFFFFFF).to_bytes(4, "little")) else: - self._mov_imm(value, self._rax) + self._mov_imm(i, self._rax, 64) self._store_rax_on_stack(offset) def _store_symbol_on_stack(self, sym: _Symbol, offset: int) -> None: @@ -583,13 +585,21 @@ def call(self, func: _Symbol, args: Sequence[Union[_Integer, _Symbol]]) -> None: if i < len(self._argument_registers): reg = self._argument_registers[i] if isinstance(arg, _Integer): - self._mov_imm(arg.value, reg) + # Clang/LLVM as of version 19 relies on <32-bit arguments + # being sign-extended to 32 bits despite this not being + # guaranteed by the psABI. It's unclear whether this will + # be resolved by changing LLVM or the psABI, so work around + # it for now. See: + # https://groups.google.com/g/x86-64-abi/c/h7FFh30oS3s/m/Gksanh3WAAAJ + # https://github.com/llvm/llvm-project/issues/12579 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46942 + self._mov_imm(arg, reg, 32) else: self._mov_symbol(arg, reg) else: stack_offset = 8 * (i - len(self._argument_registers)) if isinstance(arg, _Integer): - self._store_imm_on_stack(arg.value, stack_offset) + self._store_imm_on_stack(arg, stack_offset) else: self._store_symbol_on_stack(arg, stack_offset) @@ -638,7 +648,7 @@ def return_(self, value: _Integer, last: bool) -> None: raise NotImplementedError( "return values larger than 8 bytes not implemented" ) - self._mov_imm(value.value, self._rax) + self._mov_imm(value, self._rax) # Jump to the function epilogue. If this return is the last operation, # we can fall through instead of jumping. if not last: @@ -654,7 +664,7 @@ def return_if_last_return_value_nonzero(self, value: _Integer) -> None: ) # mov %rax, %rdx self.code.extend(b"\x48\x89\xC2") - self._mov_imm(value.value, self._rax) + self._mov_imm(value, self._rax) # Jump to the function epilogue if the last return value was non-zero. self.code.extend( # test %rdx, %rdx From a099064959cb4b84a5aeb6bc1803412e5ba0c74d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Feb 2025 13:50:29 -0800 Subject: [PATCH 051/166] libdrgn: dwarf_info: use uintptr_t for DIE addresses in DWARF index size_t was carried over from when DIEs were represented as section offsets. It will be compatible with uintptr_t on any reasonable platform, but it's not totally correct. Fixes: 3823b21e17cd ("libdrgn: dwarf_index: uses DIE address instead of section offset") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index a79e8f442..8162b16cb 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1131,7 +1131,7 @@ index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, const char *debug_info_buffer = cu->file->scn_data[cu->scn]->d_buf; unsigned int depth = 0; for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; + uintptr_t die_addr = (uintptr_t)buffer->bb.pos; uint64_t code; if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) @@ -1426,9 +1426,9 @@ index_cu_second_pass(struct drgn_debug_info *dbinfo, Elf_Data *debug_str = cu->file->scn_data[DRGN_SCN_DEBUG_STR]; unsigned int depth = 0; uint8_t depth1_tag = 0; - size_t depth1_addr = 0; + uintptr_t depth1_addr = 0; for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; + uintptr_t die_addr = (uintptr_t)buffer->bb.pos; uint64_t code; if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) From b999e7ed6878738f7f768a7eb60a3171cf9e47c0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 Mar 2025 15:58:08 -0800 Subject: [PATCH 052/166] tests: skip slow test_mtree_load_three_levels* tests under emulation Signed-off-by: Omar Sandoval --- vmtest/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmtest/__main__.py b/vmtest/__main__.py index d910e424d..690c19a16 100644 --- a/vmtest/__main__.py +++ b/vmtest/__main__.py @@ -331,7 +331,7 @@ def add_kernel(arch: Architecture, pattern: str) -> None: else: python_executable = "/usr/bin/python3" # Skip excessively slow tests when emulating. - tests_expression = "-k 'not test_slab_cache_for_each_allocated_object'" + tests_expression = "-k 'not test_slab_cache_for_each_allocated_object and not test_mtree_load_three_levels'" if _kdump_works(kernel): kdump_command = """\ From 9de433fe89ef0d60d95d2cedeae821cf06e84855 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 Mar 2025 00:41:09 -0800 Subject: [PATCH 053/166] libdrgn: dwarf_info: fix DWARF index error handling DWARF indexing has some awkward error handling because we can't break out of an OpenMP parallel region and also because we need to free some temporary data structures regardless of success or error. Each thread has its own error, and they are consolidated at a few points. This error handling has a couple of bugs: 1. In some loops, we check the err variable shared by all threads to avoid doing real work if there has been an error. This is technically a data race. 2. After the second pass and before merging the index maps, we set thread_err to err (again to short circuit the merging in all threads if there was an error in any thread). But we later destroy thread_err if err is set, which will always be true, resulting in a use-after-free or double-free. We don't need to optimize for the error case at all, so let's simplify all of this to exclusively use the per-thread error and consolidate them only at the end. Fixes: e0921c5bdbef ("libdrgn: don't use OpenMP tasking") Fixes: c4a122ead686 ("libdrgn: dwarf_info: scalably index all DIEs per name") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 76 +++++++++++++++++--------------------------- tests/dwarfwriter.py | 7 ++-- tests/test_dwarf.py | 16 ++++++++++ 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 8162b16cb..31193ad3d 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1852,6 +1852,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) struct drgn_error *err = NULL; #pragma omp parallel num_threads(drgn_num_threads) { + struct drgn_error *thread_err = NULL; struct drgn_dwarf_index_cu_vector *cus; int thread_num = omp_get_thread_num(); if (thread_num == 0) { @@ -1863,20 +1864,20 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp for schedule(dynamic) for (size_t i = 0; i < drgn_module_vector_size(&modules); i++) { + if (thread_err) + continue; struct drgn_module *module = *drgn_module_vector_at(&modules, i); - if (err) - continue; - struct drgn_error *module_err = + thread_err = drgn_dwarf_index_read_file(module->debug_file, cus); - if (module_err) { - #pragma omp critical(drgn_dwarf_info_update_index_error) - if (err) - drgn_error_destroy(module_err); - else - err = module_err; - } + } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(thread_err); + else + err = thread_err; } } if (err) @@ -1904,6 +1905,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp parallel num_threads(drgn_num_threads) { + struct drgn_error *thread_err = NULL; struct drgn_dwarf_specification_map *specifications; int thread_num = omp_get_thread_num(); if (thread_num == 0) { @@ -1916,26 +1918,26 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp for schedule(dynamic) for (size_t i = dbinfo->dwarf.global.cus_indexed; i < drgn_dwarf_index_cu_vector_size(cus); i++) { + if (thread_err) + continue; struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_cu_vector_at(cus, i); - if (err) - continue; - struct drgn_error *cu_err = read_cu(cu); - if (!cu_err) { + thread_err = read_cu(cu); + if (!thread_err) { struct drgn_dwarf_index_cu_buffer buffer; drgn_dwarf_index_cu_buffer_init(&buffer, cu); buffer.bb.pos += cu_header_size(cu); - cu_err = index_cu_first_pass(specifications, - &buffer); - } - if (cu_err) { - #pragma omp critical(drgn_dwarf_info_update_index_error) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; + thread_err = index_cu_first_pass(specifications, + &buffer); } } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(thread_err); + else + err = thread_err; + } } for (int i = 0; i < drgn_num_threads - 1; i++) { err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, @@ -1947,8 +1949,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp parallel num_threads(drgn_num_threads) { - struct drgn_error *thread_err; - + struct drgn_error *thread_err = NULL; struct drgn_dwarf_index_die_map *map; struct drgn_dwarf_base_type_map *base_types; int thread_num = omp_get_thread_num(); @@ -1966,7 +1967,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp for schedule(dynamic) for (size_t i = dbinfo->dwarf.global.cus_indexed; i < drgn_dwarf_index_cu_vector_size(cus); i++) { - if (err) + if (thread_err) continue; struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_cu_vector_at(cus, i); @@ -1975,17 +1976,8 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) buffer.bb.pos += cu_header_size(cu); thread_err = index_cu_second_pass(dbinfo, map, base_types, &buffer); - if (thread_err) { - #pragma omp critical(drgn_dwarf_info_update_index_error) - if (err) - drgn_error_destroy(thread_err); - else - err = thread_err; - } } - thread_err = err; - #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i <= array_size(dbinfo->dwarf.global.map); i++) { if (i < array_size(dbinfo->dwarf.global.map)) { @@ -2081,8 +2073,7 @@ static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index err = NULL; #pragma omp parallel num_threads(drgn_num_threads) { - struct drgn_error *thread_err; - + struct drgn_error *thread_err = NULL; struct drgn_dwarf_index_die_map *map; int thread_num = omp_get_thread_num(); if (thread_num == 0) { @@ -2099,7 +2090,7 @@ static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index #pragma omp for schedule(dynamic) nowait for (uint32_t j = ns->dies_indexed[tags_to_index[i]]; j < drgn_dwarf_index_die_vector_size(dies); j++) { - if (err) + if (thread_err) continue; uintptr_t die_addr = *drgn_dwarf_index_die_vector_at(dies, j); @@ -2111,19 +2102,10 @@ static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index thread_err = index_cu_second_pass(ns->dbinfo, map, NULL, &buffer); - if (thread_err) { - #pragma omp critical(drgn_index_namespace_error) - if (err) - drgn_error_destroy(thread_err); - else - err = thread_err; - } } } #pragma omp barrier - thread_err = err; - #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i < array_size(ns->map); i++) { for (int j = 0; j < drgn_num_threads - 1; j++) { diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index b496bf2b7..d68df48f6 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -66,6 +66,7 @@ def aux(die): def _compile_debug_info(units, little_endian, bits, version, use_dw_form_indirect): + offset_size = 4 # We only emit the 32-bit format for now. byteorder = "little" if little_endian else "big" all_labels = set() labels = {} @@ -114,6 +115,8 @@ def aux(buf, die, depth): elif attrib.form == DW_FORM.block1: buf.append(len(value)) buf.extend(value) + elif attrib.form == DW_FORM.strp: + buf.extend(value.to_bytes(offset_size, byteorder)) elif attrib.form == DW_FORM.string: buf.extend(value.encode()) buf.append(0) @@ -123,7 +126,7 @@ def aux(buf, die, depth): elif attrib.form == DW_FORM.ref_sig8: buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: - buf.extend(b"\0\0\0\0") + buf.extend(bytes(offset_size)) elif attrib.form == DW_FORM.flag_present: pass elif attrib.form == DW_FORM.exprloc: @@ -163,7 +166,7 @@ def aux(buf, die, depth): if unit.type in (DW_UT.type, DW_UT.split_type): buf.extend(unit.type_signature.to_bytes(8, byteorder)) # type_signature relocations.append((len(buf), unit.type_offset)) - buf.extend(b"\0\0\0\0") # type_offset + buf.extend(bytes(offset_size)) # type_offset else: assert unit.type_signature is None assert unit.type_offset is None diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index f2f0e7187..43cf1de05 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -249,6 +249,22 @@ def wrapper(self): return wrapper +class TestInvalidDwarf(TestCase): + def test_name_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "name is out of bounds"): + prog = dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.name, DW_FORM.strp, 0xDEADBEEF), + ), + ) + ) + # Force indexing. + "foo" in prog + + class TestTypes(TestCase): def test_unknown_tag(self): prog = dwarf_program(wrap_test_type_dies(DwarfDie(0x9999, ()))) From 1ade28db7c6de08658c95112fccf0989aa50322d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 Mar 2025 13:38:56 -0800 Subject: [PATCH 054/166] tests: don't run TC tests with old pyroute2 Pyroute2 < 0.6.10 had a bug that caused all TC tests to fail. I first noticed this all the way back in commit 05395422eb4b ("setup.py: add 5.19 to vmtest kernels"), but I still hit this when I run tests on older distros. Finally add a version check. Signed-off-by: Omar Sandoval --- tests/linux_kernel/helpers/test_tc.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/linux_kernel/helpers/test_tc.py b/tests/linux_kernel/helpers/test_tc.py index 9089b25e6..0672c3d1f 100644 --- a/tests/linux_kernel/helpers/test_tc.py +++ b/tests/linux_kernel/helpers/test_tc.py @@ -11,17 +11,20 @@ from drgn.helpers.linux.tc import qdisc_lookup from tests import classCleanups from tests.linux_kernel import LinuxKernelTestCase +from util import verrevcmp try: - from pyroute2 import NetNS - from pyroute2.netlink.exceptions import NetlinkError + import pyroute2 - have_pyroute2 = True + # Before Pyroute2 commit 1eb08312de30 ("iproute/linux: try to improve flags + # when sending del messages") (in v0.6.10), Pyroute2 passes an invalid flag + # to deletion requests, resulting in ENOTSUP errors. + have_pyroute2 = verrevcmp(getattr(pyroute2, "__version__", "0"), "0.6.10") >= 0 except ImportError: have_pyroute2 = False -@unittest.skipUnless(have_pyroute2, "pyroute2 not found") +@unittest.skipUnless(have_pyroute2, "pyroute2 >= 0.6.10 not found") class TestTc(LinuxKernelTestCase): @classmethod @classCleanups @@ -33,7 +36,7 @@ def setUpClass(cls): cls.name = "".join( random.choice(string.ascii_letters) for _ in range(16) ) - cls.ns = NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) + cls.ns = pyroute2.NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) except FileExistsError: pass cls.addClassCleanup(cls.ns.remove) @@ -41,7 +44,7 @@ def setUpClass(cls): def test_qdisc_lookup(self): try: self.ns.link("add", ifname="dummy0", kind="dummy") - except NetlinkError: + except pyroute2.NetlinkError: self.skipTest("kernel does not support dummy interface (CONFIG_DUMMY)") dummy = self.ns.link_lookup(ifname="dummy0")[0] @@ -57,7 +60,7 @@ def test_qdisc_lookup(self): bands=3, priomap=[1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], ) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Multi Band Priority Queueing (CONFIG_NET_SCH_PRIO)" @@ -65,7 +68,7 @@ def test_qdisc_lookup(self): # tc qdisc add dev dummy0 parent 1:1 handle 10: sfq try: self.ns.tc("add", kind="sfq", index=dummy, parent="1:1", handle="10:") - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Stochastic Fairness Queueing (CONFIG_NET_SCH_SFQ)" @@ -82,7 +85,7 @@ def test_qdisc_lookup(self): burst=1600, limit=3000, ) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Token Bucket Filter (CONFIG_NET_SCH_TBF)" @@ -92,7 +95,7 @@ def test_qdisc_lookup(self): # tc qdisc add dev dummy0 ingress try: self.ns.tc("add", kind="ingress", index=dummy) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support ingress Qdisc (CONFIG_NET_SCH_INGRESS)" From 792458d68a6d223885fdb244f5a256103589886b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 Mar 2025 13:43:27 -0800 Subject: [PATCH 055/166] tests: remove leftover debugging code in symbols test Fixes: 4e83130008e9 ("Introduce module and debug info finder APIs") Signed-off-by: Omar Sandoval --- tests/test_symbol.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index d6f6aebbf..8228c52e0 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -159,9 +159,6 @@ def test_by_address_sizeless(self): self.assertEqual(prog.symbols(0xFFFF0009), [expected]) def test_by_address_sizeless_subsumed(self): - import unittest.util - - unittest.util._MAX_LENGTH = 999999999 label = ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL) subsume = ElfSymbol("subsume", 0xFFFF0004, 0x8, STT.FUNC, STB.LOCAL) less = ElfSymbol("less", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL) From 03e90d5f2f330e38a0ea18661ba386b4668de24b Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 13 Dec 2024 10:12:19 -0800 Subject: [PATCH 056/166] orc_info: don't use ELF_Data when processing orc header This will allow orc_version_from_header() to be reused for upcoming ORC integration that does not use libelf. Signed-off-by: Stephen Brennan --- libdrgn/orc_info.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 67d1d0f03..7c0d260aa 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -214,11 +214,8 @@ remove_fdes_from_orc(struct drgn_module *module, unsigned int *indices, return NULL; } -static int orc_version_from_header(Elf_Data *orc_header) +static int orc_version_from_header(const void *buffer) { - if (orc_header->d_size != 20) - return -1; - // Known version identifiers in .orc_header. These can be generated in // the kernel source tree with: // sh ./scripts/orc_hash.sh < arch/x86/include/asm/orc_types.h | sed -e 's/^#define ORC_HASH //' -e 's/,/, /g' @@ -236,9 +233,9 @@ static int orc_version_from_header(Elf_Data *orc_header) 0x17, 0xf8, 0xf7, 0x97, 0x83, 0xca, 0x98, 0x5c, 0x2c, 0x51, }; - if (memcmp(orc_header->d_buf, orc_hash_6_4, 20) == 0) + if (memcmp(buffer, orc_hash_6_4, 20) == 0) return 3; - else if (memcmp(orc_header->d_buf, orc_hash_6_3, 20) == 0) + else if (memcmp(buffer, orc_hash_6_3, 20) == 0) return 2; return -1; } @@ -318,7 +315,9 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) err = read_elf_section(orc_header_scn, &orc_header); if (err) return err; - module->orc.version = orc_version_from_header(orc_header); + module->orc.version = -1; + if (orc_header->d_size == 20) + module->orc.version = orc_version_from_header(orc_header->d_buf); if (module->orc.version < 0) { return drgn_error_create(DRGN_ERROR_OTHER, "unrecognized .orc_header"); From 242f0c3985beb4e48a5d40125584f383ee96a15a Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 2 Jan 2025 15:16:43 -0800 Subject: [PATCH 057/166] libdrgn: python: add Module_prog Signed-off-by: Stephen Brennan --- libdrgn/python/drgnpy.h | 6 ++++++ libdrgn/python/module.c | 9 +++------ libdrgn/python/module_section_addresses.c | 3 +-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 307e2cc4e..16be14997 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -351,6 +351,12 @@ void *set_error_type_name(const char *format, PyObject *Module_wrap(struct drgn_module *module); PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b); +static inline Program *Module_prog(Module *module) +{ + struct drgn_program *prog = drgn_module_program(module->module); + return container_of(prog, Program, prog); +} + int add_WantedSupplementaryFile(PyObject *m); int init_module_section_addresses(void); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 7e976a2eb..11f4642af 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -68,10 +68,8 @@ PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b) static void Module_dealloc(Module *self) { - if (self->module) { - struct drgn_program *prog = drgn_module_program(self->module); - Py_DECREF(container_of(prog, Program, prog)); - } + if (self->module) + Py_DECREF(Module_prog(self)); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -192,8 +190,7 @@ static PyObject *Module_try_file(Module *self, PyObject *args, PyObject *kwds) static Program *Module_get_prog(Module *self, void *arg) { - Program *prog = - container_of(drgn_module_program(self->module), Program, prog); + Program *prog = Module_prog(self); Py_INCREF(prog); return prog; } diff --git a/libdrgn/python/module_section_addresses.c b/libdrgn/python/module_section_addresses.c index 76d75fc6d..a2d26b4ba 100644 --- a/libdrgn/python/module_section_addresses.c +++ b/libdrgn/python/module_section_addresses.c @@ -20,8 +20,7 @@ static ModuleSectionAddresses *ModuleSectionAddresses_new(PyTypeObject *subtype, ModuleSectionAddresses *ret = (ModuleSectionAddresses *)subtype->tp_alloc(subtype, 0); if (ret) { - struct drgn_program *prog = drgn_module_program(module->module); - Py_INCREF(container_of(prog, Program, prog)); + Py_INCREF(Module_prog(module)); ret->module = module->module; } return ret; From 984ab04c5e3c519264e45ad39a649ef63e7b89c0 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 13 Dec 2024 15:52:45 -0800 Subject: [PATCH 058/166] module: add object This allows users to get the object which the module was created from. The primary use case is for Linux kernel modules, to return the "struct module" associated with the drgn module object. To simplify the implementation, we've added the restriction that Program.linux_kernel_loadable_module(), and the associated C APIs, will only accept objects of type "struct module *". Signed-off-by: Stephen Brennan --- _drgn.pyi | 18 ++++++---- libdrgn/debug_info.c | 15 +++++++++ libdrgn/debug_info.h | 2 ++ libdrgn/drgn.h | 24 +++++++++++-- libdrgn/linux_kernel.c | 75 +++++++++++++++++++---------------------- libdrgn/python/module.c | 31 +++++++++++++++++ 6 files changed, 116 insertions(+), 49 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 12b5f0503..bc02dc85b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -838,12 +838,11 @@ class Program: self, module_obj: Object, *, create: Literal[False] = False ) -> RelocatableModule: """ - Find a Linux kernel loadable module from a ``struct module`` object. + Find a Linux kernel loadable module from a ``struct module *`` object. Note that kernel modules are represented as relocatable modules. - :param module_obj: ``struct module`` or ``struct module *`` object for - the kernel module. + :param module_obj: ``struct module *`` object for the kernel module. :return: Relocatable module with a name and address matching *module_obj*. :raises LookupError: if no matching module has been created @@ -855,14 +854,13 @@ class Program: self, module_obj: Object, *, create: Literal[True] ) -> Tuple[RelocatableModule, bool]: """ - Find or create a Linux kernel loadable module from a ``struct module`` + Find or create a Linux kernel loadable module from a ``struct module *`` object. If a new module is created, its :attr:`~Module.address_range` and :attr:`~RelocatableModule.section_addresses` are set from *module_obj*. - :param module_obj: ``struct module`` or ``struct module *`` object for - the kernel module. + :param module_obj: `struct module *`` object for the kernel module. :return: Module and ``True`` if it was newly created or ``False`` if it was found. """ @@ -1666,6 +1664,14 @@ class Module: module, it is set to the file's build ID if it is not already set. It can also be set manually. """ + object: Object + """ + The object associated with this module. + + For Linux kernel loadable modules, this is the ``struct module *`` + associated with the kernel module. For other kinds, this is currently an + absent object. The object may be set manually. + """ loaded_file_status: ModuleFileStatus """Status of the module's :ref:`loaded file `.""" loaded_file_path: Optional[str] diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 3f3aef63f..1285cfd64 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -321,6 +321,7 @@ struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, module->prog = prog; module->kind = key->kind; + drgn_object_init(&module->object, prog); // Linux userspace core dumps usually filter out file-backed mappings // (see coredump_filter in core(5)), so we need the loaded file to read // the text. Additionally, .eh_frame is in the loaded file and not the @@ -412,6 +413,7 @@ struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, err_name: free(module->name); err_module: + drgn_object_deinit(&module->object); free(module); return err; } @@ -513,6 +515,7 @@ static void drgn_module_destroy(struct drgn_module *module) drgn_elf_file_destroy(module->loaded_file); free(module->build_id); free(module->name); + drgn_object_deinit(&module->object); free(module); } @@ -985,6 +988,18 @@ drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, : DRGN_SUPPLEMENTARY_FILE_NONE; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_object(const struct drgn_module *module, struct drgn_object *ret) +{ + return drgn_object_copy(ret, &module->object); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_set_object(struct drgn_module *module, const struct drgn_object *obj) +{ + return drgn_object_copy(&module->object, obj); +} + static struct drgn_error * drgn_program_register_debug_info_finder_impl(struct drgn_program *prog, struct drgn_debug_info_finder *finder, diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 20c446116..e61563c90 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -260,6 +260,8 @@ struct drgn_module { struct drgn_module_wanted_supplementary_file *wanted_supplementary_debug_file; /** Node in @ref drgn_debug_info::modules_pending_indexing. */ struct drgn_module *pending_indexing_next; + /** Object the module was created from */ + struct drgn_object object; }; struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 21d3c9796..414f4aa72 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1314,7 +1314,7 @@ drgn_module_find_or_create_relocatable(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret); /** - * Find a created Linux kernel loadable module from a ``struct module`` object. + * Find a created Linux kernel loadable module from a ``struct module *`` object. * * @param[out] new_ret @c true if the module was newly created, @c false if it * was found. @@ -1324,7 +1324,7 @@ drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, struct drgn_module **ret); /** - * Find a Linux kernel loadable module from a ``struct module`` object, creating + * Find a Linux kernel loadable module from a ``struct module *`` object, creating * it if it doesn't already exist. * * @param[out] new_ret @c true if the module was newly created, @c false if it @@ -1570,6 +1570,26 @@ drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, const void **checksum_ret, size_t *checksum_len_ret); +/** + * Return the object associated with this module. + * + * For some modules, there may be an object related to it. For example, drgn + * automatically identifies the Linux kernel `struct module *` associated with + * loadable modules, and associates it with them. Users may set or replace an + * associated object with @ref drgn_set_module_object(). + * + * @param[out] ret Initialized object where the module object is placed + */ +struct drgn_error * +drgn_module_object(const struct drgn_module *module, struct drgn_object *ret); + +/** + * Set the object associated with this module. + * @param[in] obj A new (or replacement) object for the module + */ +struct drgn_error * +drgn_module_set_object(struct drgn_module *module, const struct drgn_object *obj); + /** Debugging information finder callback table. */ struct drgn_debug_info_finder_ops { /** diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index a84a4f848..5c118a69d 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1797,7 +1797,8 @@ kernel_module_set_section_addresses(struct drgn_module *module, } static struct drgn_error * -kernel_module_find_or_create_internal(const struct drgn_object *module_obj, +kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, + const struct drgn_object *module_obj, struct drgn_module **ret, bool *new_ret, bool create, bool log) { @@ -1896,6 +1897,10 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_obj, return NULL; } + err = drgn_module_set_object(module, module_ptr); + if (err) + return err; + if (layout_in_module) err = drgn_object_member(&val, module_obj, "core_size"); else @@ -1939,60 +1944,47 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_obj, } static struct drgn_error * -drgn_module_find_or_create_linux_kernel_loadable_internal(const struct drgn_object *module_obj, +drgn_module_find_or_create_linux_kernel_loadable_internal(const struct drgn_object *module_ptr, struct drgn_module **ret, bool *new_ret, bool create) { struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(module_ptr); - // kernel_module_find_or_create_internal() expects a `struct module` - // value. - struct drgn_object mod; - if (drgn_type_kind(drgn_underlying_type(module_obj->type)) - == DRGN_TYPE_POINTER) { - drgn_object_init(&mod, drgn_object_program(module_obj)); - err = drgn_object_dereference(&mod, module_obj); - if (!err) - err = drgn_object_read(&mod, &mod); - module_obj = &mod; - if (err) - goto out; - } else if (module_obj->kind != DRGN_OBJECT_VALUE) { - drgn_object_init(&mod, drgn_object_program(module_obj)); - err = drgn_object_read(&mod, module_obj); - module_obj = &mod; - if (err) - goto out; - } + if (drgn_type_kind(drgn_underlying_type(module_ptr->type)) + != DRGN_TYPE_POINTER) + return drgn_error_create(DRGN_ERROR_TYPE, + "struct module * is required"); - err = kernel_module_find_or_create_internal(module_obj, ret, new_ret, - create, false); -out: - if (module_obj == &mod) - drgn_object_deinit(&mod); - return err; + DRGN_OBJECT(module_obj, prog); + err = drgn_object_dereference(&module_obj, module_ptr); + if (err) + return err; + + err = drgn_object_read(&module_obj, &module_obj); + if (err) + return err; + + return kernel_module_find_or_create_internal(module_ptr, &module_obj, ret, new_ret, + create, false); } LIBDRGN_PUBLIC struct drgn_error * -drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, +drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_ptr, struct drgn_module **ret) { - return drgn_module_find_or_create_linux_kernel_loadable_internal(module_obj, - ret, - NULL, - false); + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_ptr, ret, + NULL, false); } LIBDRGN_PUBLIC struct drgn_error * -drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_obj, +drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_ptr, struct drgn_module **ret, bool *new_ret) { - return drgn_module_find_or_create_linux_kernel_loadable_internal(module_obj, - ret, - new_ret, - true); + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_ptr, ret, + new_ret, true); } static struct drgn_error * @@ -2003,6 +1995,7 @@ yield_kernel_module(struct linux_kernel_loaded_module_iterator *it, struct drgn_program *prog = it->it.prog; DRGN_OBJECT(mod, prog); + DRGN_OBJECT(mod_ptr, prog); for (;;) { uint64_t addr; err = drgn_object_read_unsigned(&it->node, &addr); @@ -2034,12 +2027,12 @@ yield_kernel_module(struct linux_kernel_loaded_module_iterator *it, } it->module_list_iterations_remaining--; - err = drgn_object_container_of(&mod, &it->node, it->module_type, + err = drgn_object_container_of(&mod_ptr, &it->node, it->module_type, "list"); if (err) goto list_walk_err; - err = drgn_object_dereference(&mod, &mod); + err = drgn_object_dereference(&mod, &mod_ptr); if (err) goto list_walk_err; // We need several fields from the `struct module`. Especially @@ -2057,8 +2050,8 @@ yield_kernel_module(struct linux_kernel_loaded_module_iterator *it, if (err) goto list_walk_err; - err = kernel_module_find_or_create_internal(&mod, ret, new_ret, - true, true); + err = kernel_module_find_or_create_internal(&mod_ptr, &mod, ret, + new_ret, true, true); if (err && !drgn_error_is_fatal(err)) { drgn_error_log_warning(prog, err, "ignoring module: "); drgn_error_destroy(err); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 11f4642af..21edb4f4d 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -284,6 +284,35 @@ static int Module_set_build_id(Module *self, PyObject *value, void *arg) return 0; } +static DrgnObject *Module_get_object(Module *self, void *arg) +{ + + Program *prog_obj = Module_prog(self); + _cleanup_pydecref_ DrgnObject *ret = DrgnObject_alloc(prog_obj); + if (!ret) + return NULL; + + struct drgn_error *err = drgn_module_object(self->module, &ret->obj); + if (err) + return set_drgn_error(err); + return_ptr(ret); +} + +static int Module_set_object(Module *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("object", value); + if (!PyObject_TypeCheck(value, &DrgnObject_type)) { + PyErr_SetString(PyExc_TypeError, "object must be a drgn.Object"); + return -1; + } + DrgnObject *object = (DrgnObject *)value; + + struct drgn_error *err = drgn_module_set_object(self->module, &object->obj); + if (err) + set_drgn_error(err); + return 0; +} + #define MODULE_FILE_STATUS_GETSET(which) \ static PyObject *Module_wants_##which##_file(Module *self) \ { \ @@ -400,6 +429,8 @@ static PyGetSetDef Module_getset[] = { (setter)Module_set_address_range, drgn_Module_address_range_DOC}, {"build_id", (getter)Module_get_build_id, (setter)Module_set_build_id, drgn_Module_build_id_DOC}, + {"object", (getter)Module_get_object, (setter)Module_set_object, + drgn_Module_object_DOC}, {"loaded_file_status", (getter)Module_get_loaded_file_status, (setter)Module_set_loaded_file_status, drgn_Module_loaded_file_status_DOC}, From 3ab0f6abcf5443b211e4fe266a571718ce7cc632 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Sat, 14 Dec 2024 00:12:14 -0800 Subject: [PATCH 059/166] orc_info: enable the use of built-in ORC ORC has always been loaded from the ELF debug file. However, ORC is present in the memory pages of kernel core dumps, so it can still be used when the debug file is unavailable. Implement the ability to load built-in ORC for vmlinux and kernel modules. We still prefer to load ORC from the debug file wherever possible, because this is almost certainly faster. Signed-off-by: Stephen Brennan --- libdrgn/debug_info.c | 28 +++++- libdrgn/orc_info.c | 205 +++++++++++++++++++++++++++++++++++++++--- libdrgn/orc_info.h | 5 +- libdrgn/stack_trace.c | 4 + 4 files changed, 230 insertions(+), 12 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 1285cfd64..45f9f28ef 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -5613,9 +5613,19 @@ drgn_module_find_cfi(struct drgn_program *prog, struct drgn_module *module, module->parsed_debug_frame = true; } if (!module->parsed_orc) { - err = drgn_module_parse_orc(module); + err = drgn_module_parse_orc(module, false); if (err) return err; + + // For some distributions, such as Fedora & derivatives, + // ORC sections are stripped from the debug file. Try + // using built-in ORC if nothing was loaded from the + // debug_file. + if (!module->orc.num_entries) + err = drgn_module_parse_orc(module, true); + if (err) + return err; + module->parsed_orc = true; } @@ -5658,5 +5668,21 @@ drgn_module_find_cfi(struct drgn_program *prog, struct drgn_module *module, if (err != &drgn_not_found) return err; } + + if (!can_use_debug_file) { + if (!module->parsed_orc) { + err = drgn_module_parse_orc(module, true); + if (err) + return err; + module->parsed_orc = true; + } + *file_ret = NULL; + err = drgn_module_find_orc_cfi(module, pc, row_ret, + interrupted_ret, + ret_addr_regno_ret); + if (err != &drgn_not_found) + return err; + } + return &drgn_not_found; } diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 7c0d260aa..15f297088 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -13,6 +13,7 @@ #include "debug_info.h" // IWYU pragma: associated #include "elf_file.h" #include "error.h" +#include "log.h" #include "orc.h" #include "platform.h" #include "program.h" @@ -34,7 +35,7 @@ static inline uint64_t drgn_raw_orc_pc(struct drgn_module *module, { int32_t offset; memcpy(&offset, &module->orc.pc_offsets[i], sizeof(offset)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) offset = bswap_32(offset); return module->orc.pc_base + UINT64_C(4) * i + offset; } @@ -44,7 +45,7 @@ drgn_raw_orc_entry_is_terminator(struct drgn_module *module, unsigned int i) { uint16_t flags; memcpy(&flags, &module->orc.entries[i].flags, sizeof(flags)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) flags = bswap_16(flags); if (module->orc.version >= 3) { // orc->type == ORC_TYPE_UNDEFINED @@ -63,7 +64,7 @@ drgn_raw_orc_entry_is_preferred(struct drgn_module *module, unsigned int i) { uint16_t flags; memcpy(&flags, &module->orc.entries[i].flags, sizeof(flags)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) flags = bswap_16(flags); // ORC_REG_SP_INDIRECT is used for the stack switching pattern used in // the Linux kernel's call_on_stack()/call_on_irqstack() macros. See @@ -355,6 +356,165 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) return NULL; } +static struct drgn_error * +copy_builtin_orc_buffers(struct drgn_module *module, uint64_t num_entries, + uint64_t unwind, uint64_t unwind_ip, uint64_t header) +{ + uint8_t header_data[20]; + + struct drgn_error *err; + + if (header) { + err = drgn_program_read_memory(module->prog, header_data, + header, sizeof(header_data), + false); + + if (err) + return err; + + module->orc.version = orc_version_from_header(header_data); + if (module->orc.version < 0) + return drgn_error_create(DRGN_ERROR_OTHER, + "unrecognized .orc_header"); + } else { + module->orc.version = orc_version_from_osrelease(module->prog); + } + + _cleanup_free_ int32_t *pc_offsets = malloc_array(num_entries, + sizeof(pc_offsets[0])); + if (!pc_offsets) + return &drgn_enomem; + err = drgn_program_read_memory(module->prog, pc_offsets, unwind_ip, + num_entries * sizeof(pc_offsets[0]), false); + if (err) + return err; + + _cleanup_free_ struct drgn_orc_entry *entries = + malloc_array(num_entries, sizeof(entries[0])); + if (!entries) + return &drgn_enomem; + err = drgn_program_read_memory(module->prog, entries, unwind, + num_entries * sizeof(entries[0]), false); + if (err) + return err; + + module->orc.entries = no_cleanup_ptr(entries); + module->orc.pc_offsets = no_cleanup_ptr(pc_offsets); + module->orc.num_entries = num_entries; + module->orc.pc_base = unwind_ip; + drgn_log_debug(module->prog, "Loaded built-in ORC (v%d) for module %s", + module->orc.version, module->name); + return NULL; +} + +static struct drgn_error *drgn_read_vmlinux_orc(struct drgn_module *module) +{ + struct drgn_error *err; + struct drgn_symbol *sym; + + uint64_t unwind_ip_start, unwind_ip_end; + uint64_t unwind_start, unwind_end; + uint64_t header_start = 0, header_end = 0; + +#define get_symbol(name, var, optional) \ + err = drgn_program_find_symbol_by_name(module->prog, name, &sym); \ + if (!err) { \ + var = sym->address; \ + drgn_symbol_destroy(sym); \ + sym = NULL; \ + } else if (optional && drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { \ + sym = NULL; \ + } else { \ + drgn_error_catch(&err, DRGN_ERROR_LOOKUP); \ + return err; \ + } + + get_symbol("__start_orc_unwind_ip", unwind_ip_start, false); + get_symbol("__stop_orc_unwind_ip", unwind_ip_end, false); + get_symbol("__start_orc_unwind", unwind_start, false); + get_symbol("__stop_orc_unwind", unwind_end, false); + get_symbol("__start_orc_header", header_start, true); + get_symbol("__stop_orc_header", header_end, true); +#undef get_symbol + + if ((unwind_ip_end - unwind_ip_start) % sizeof(int32_t)) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_unwind_ip range"); + uint64_t num_entries = (unwind_ip_end - unwind_ip_start) / sizeof(int32_t); + if (num_entries > UINT_MAX) + return drgn_error_create(DRGN_ERROR_OTHER, + "built-in orc_unwind_ip range is too large"); + + if ((unwind_end - unwind_start) % sizeof(struct drgn_orc_entry) + || (unwind_end - unwind_start) / sizeof(struct drgn_orc_entry) != num_entries) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_unwind range"); + + if (header_start && header_end && header_end - header_start != 20) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_header size"); + + return copy_builtin_orc_buffers(module, num_entries, unwind_start, + unwind_ip_start, header_start); +} + +static struct drgn_error *drgn_read_builtin_orc(struct drgn_module *module) +{ + if (!(module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) + return NULL; + if (module->kind == DRGN_MODULE_MAIN) + return drgn_read_vmlinux_orc(module); + else if (module->kind != DRGN_MODULE_RELOCATABLE) + return NULL; + else if (module->object.kind == DRGN_OBJECT_ABSENT) + return NULL; + + // num_entries is implied by the size of the arrays. We can get the + // array addresses from the section address info, but not their size. So + // we need to find num_orcs by reading it out of the arch-specific + // module info. + DRGN_OBJECT(tmp, module->prog); + struct drgn_error *err; + + err = drgn_object_dereference(&tmp, &module->object); + if (err) + return err; + + err = drgn_object_member(&tmp, &tmp, "arch"); + if (err) + return err; + + err = drgn_object_member(&tmp, &tmp, "num_orcs"); + // If the kernel does not support ORC (e.g. it is too old), this will be + // the first lookup error we encounter. Catch it and don't return any + // error. + if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP) || err) + return err; + + uint64_t num_entries; + err = drgn_object_read_unsigned(&tmp, &num_entries); + if (err) + return err; + + // We'll still use the section addresses for everything else, because + // the orc_header is only present there, and it should be a bit faster + // to read data which we already parsed, rather than going back to read + // it from program memory. + uint64_t orc_unwind; + uint64_t orc_unwind_ip; + uint64_t orc_header = 0; + err = drgn_module_get_section_address(module, ".orc_unwind", &orc_unwind); + if (err) + return err; + err = drgn_module_get_section_address(module, ".orc_unwind_ip", &orc_unwind_ip); + if (err) + return err; + err = drgn_module_get_section_address(module, ".orc_header", &orc_header); + drgn_error_catch(&err, DRGN_ERROR_LOOKUP); + if (err) + return err; + + return copy_builtin_orc_buffers(module, num_entries, orc_unwind, + orc_unwind_ip, orc_header); +} + static inline void drgn_module_clear_orc(struct drgn_module **modulep) { if (*modulep) { @@ -363,22 +523,47 @@ static inline void drgn_module_clear_orc(struct drgn_module **modulep) } } -struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) +struct drgn_error *drgn_module_parse_orc(struct drgn_module *module, + bool use_builtin) { struct drgn_error *err; - if (module->debug_file->platform.arch->arch != DRGN_ARCH_X86_64) + if (module->prog->platform.arch->arch != DRGN_ARCH_X86_64) return NULL; - // pc_offsets and entries point to the Elf_Data buffers until we're - // done. We don't want those freed by drgn_module_orc_info_deinit(), so - // clear them if anything goes wrong. + // When loading from the debug_file, pc_offsets and entries point to the + // Elf_Data buffers until the end of this function, when copies are made. + // When loading built-in ORC, we own both buffers. In either case, we + // don't want these pointers freed by drgn_module_orc_info_deinit() if + // we fail early. So we use this to reset the pointers to NULL on + // failure. _cleanup_(drgn_module_clear_orc) struct drgn_module *clear = module; - err = drgn_read_orc_sections(module); + // For the built-in ORC, we use these to clean up the memory allocated + // in drgn_read_builtin_orc(). This happens even on success, because the + // buffers are copied at the end of this function. + _cleanup_free_ void *cleanup_pc_offsets = NULL; + _cleanup_free_ void *cleanup_entries = NULL; + + if (use_builtin) { + err = drgn_read_builtin_orc(module); + cleanup_pc_offsets = module->orc.pc_offsets; + cleanup_entries = module->orc.entries; + } else { + err = drgn_read_orc_sections(module); + } if (err || !module->orc.num_entries) return err; + // We may need to byte swap ORC entries. Rather than checking the + // debug_file's platform, use the program's platform (since they are the + // same) because it's possible there is no debug_file (e.g. for builtin + // ORC). + bool bswap; + err = drgn_program_bswap(module->prog, &bswap); + if (err) + return err; + unsigned int num_entries = module->orc.num_entries; _cleanup_free_ unsigned int *indices = malloc_array(num_entries, sizeof(indices[0])); @@ -417,7 +602,6 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) return &drgn_enomem; const int32_t *orig_offsets = module->orc.pc_offsets; const struct drgn_orc_entry *orig_entries = module->orc.entries; - const bool bswap = drgn_elf_file_bswap(module->debug_file); const int version = module->orc.version; for (unsigned int i = 0; i < num_entries; i++) { unsigned int index = indices[i]; @@ -474,6 +658,7 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) module->orc.pc_offsets = no_cleanup_ptr(pc_offsets); module->orc.entries = no_cleanup_ptr(entries); module->orc.num_entries = num_entries; + module->orc.bswap = bswap; clear = NULL; return NULL; } diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h index 49c07076c..71af686b8 100644 --- a/libdrgn/orc_info.h +++ b/libdrgn/orc_info.h @@ -72,11 +72,14 @@ struct drgn_module_orc_info { unsigned int num_entries; /** Version of the ORC format. See @ref orc.h. */ int version; + /** Whether to byte swap data */ + bool bswap; }; void drgn_module_orc_info_deinit(struct drgn_module *module); -struct drgn_error *drgn_module_parse_orc(struct drgn_module *module); +struct drgn_error *drgn_module_parse_orc(struct drgn_module *module, + bool use_builtin); bool drgn_module_should_prefer_orc_cfi(struct drgn_module *module, uint64_t pc); diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 2a05da77f..059ed7911 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1067,6 +1067,10 @@ drgn_unwind_one_register(struct drgn_program *prog, struct drgn_elf_file *file, } case DRGN_CFI_RULE_AT_DWARF_EXPRESSION: case DRGN_CFI_RULE_DWARF_EXPRESSION: + // It is possible for file to be NULL when using built-in ORC. + // However, it should be impossible to encounter a DWARF + // expression for built-in ORC. + assert(file != NULL); err = drgn_eval_cfi_dwarf_expression(prog, file, rule, regs, buf, size); break; From 6ff328771fef808656675adcf8e19c58158b5fb5 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 16 Dec 2024 11:50:39 -0800 Subject: [PATCH 060/166] orc_info: store biased pc_base When looking up CFI rules using ORC, we apply module->debug_file_bias to pc_base. This made sense when the ORC was always loaded from an ELF debug file. However, now that built-in ORC can be loaded, this only works for ORC when debug_file_bias is zero; that is, when there is no debug_file. This is a problem, because it's possible that a debug_file is loaded when built-in ORC is used. It can happen either when the debug_file has no ORC sections present, or when the debug_file is loaded after the built-in ORC is. To avoid this, we define the pc_base as the biased (runtime) address of the orc_unwind_ip section. This is already the case for built-in ORC, but when we load it from the debug_file, we must apply the bias. In cases such as remove_fdes_from_orc(), which want to compare ORC PCs against unbiased addresses, they'll need to subtract the bias to match the relevant file. Signed-off-by: Stephen Brennan --- libdrgn/orc_info.c | 12 ++++++++---- libdrgn/orc_info.h | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 15f297088..a63f3a980 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -152,11 +152,15 @@ remove_fdes_from_orc(struct drgn_module *module, unsigned int *indices, unsigned int num_entries = *num_entriesp; unsigned int new_num_entries = 0; - uint64_t start_pc = drgn_raw_orc_pc(module, 0); + // ORC can be built-in or from the debug file. Because of that, we + // always store the biased/actual address at orc.pc_base. Since we are + // comparing to the unbiased addresses in the debug_frame FDEs, we need + // to subtract the bias from the ORC PC. + uint64_t start_pc = drgn_raw_orc_pc(module, 0) - module->debug_file_bias; uint64_t end_pc; for (unsigned int i = 0; i < num_entries; i++, start_pc = end_pc) { if (i < num_entries - 1) - end_pc = drgn_raw_orc_pc(module, i + 1); + end_pc = drgn_raw_orc_pc(module, i + 1) - module->debug_file_bias; else end_pc = UINT64_MAX; @@ -551,6 +555,7 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module, cleanup_entries = module->orc.entries; } else { err = drgn_read_orc_sections(module); + module->orc.pc_base += module->debug_file_bias; } if (err || !module->orc.num_entries) return err; @@ -684,11 +689,10 @@ drgn_module_find_orc_cfi(struct drgn_module *module, uint64_t pc, struct drgn_cfi_row **row_ret, bool *interrupted_ret, drgn_register_number *ret_addr_regno_ret) { - uint64_t unbiased_pc = pc - module->debug_file_bias; #define less_than_orc_pc(a, b) \ (*(a) < drgn_orc_pc(module, (b) - module->orc.pc_offsets)) size_t i = binary_search_gt(module->orc.pc_offsets, - module->orc.num_entries, &unbiased_pc, + module->orc.num_entries, &pc, less_than_orc_pc); #undef less_than_orc_pc // We can tell when the program counter is below the minimum program diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h index 71af686b8..a95d234d1 100644 --- a/libdrgn/orc_info.h +++ b/libdrgn/orc_info.h @@ -41,7 +41,8 @@ struct drgn_module_orc_info { * Base for calculating program counter corresponding to an ORC unwinder * entry. * - * This is the address of the `.orc_unwind_ip` ELF section. + * This is the address of the `.orc_unwind_ip` ELF section. It is the + * actual loaded location, with any bias already applied. * * @sa drgn_module_orc_info::entries */ From c15fc07f8295823bec17a069da5eb2a935274d8b Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 13 Feb 2025 14:24:03 -0800 Subject: [PATCH 061/166] orc_info: use a constant for size of the ORC version header Arrays may not be declared using a "const" variable for the size, so we need to use a macro for this. Signed-off-by: Stephen Brennan --- libdrgn/orc_info.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index a63f3a980..7015d0965 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -21,6 +21,8 @@ DEFINE_VECTOR(uint64_range_vector, struct uint64_range); +#define ORC_HEADER_SIZE 20 + void drgn_module_orc_info_deinit(struct drgn_module *module) { free(module->orc.entries); @@ -227,20 +229,20 @@ static int orc_version_from_header(const void *buffer) // Linux kernel commit fb799447ae29 ("x86,objtool: Split // UNWIND_HINT_EMPTY in two") (in v6.4) - static const uint8_t orc_hash_6_4[20] = { + static const uint8_t orc_hash_6_4[ORC_HEADER_SIZE] = { 0xfe, 0x5d, 0x32, 0xbf, 0x58, 0x1b, 0xd6, 0x3b, 0x2c, 0xa9, 0xa5, 0xc6, 0x5b, 0xa5, 0xa6, 0x25, 0xea, 0xb3, 0xfe, 0x24, }; // Linux kernel commit ffb1b4a41016 ("x86/unwind/orc: Add 'signal' field // to ORC metadata") (in v6.3) - static const uint8_t orc_hash_6_3[20] = { + static const uint8_t orc_hash_6_3[ORC_HEADER_SIZE] = { 0xdb, 0x84, 0xae, 0xd4, 0x10, 0x3b, 0x31, 0xdd, 0x51, 0xe0, 0x17, 0xf8, 0xf7, 0x97, 0x83, 0xca, 0x98, 0x5c, 0x2c, 0x51, }; - if (memcmp(buffer, orc_hash_6_4, 20) == 0) + if (memcmp(buffer, orc_hash_6_4, ORC_HEADER_SIZE) == 0) return 3; - else if (memcmp(buffer, orc_hash_6_3, 20) == 0) + else if (memcmp(buffer, orc_hash_6_3, ORC_HEADER_SIZE) == 0) return 2; return -1; } @@ -321,7 +323,7 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) if (err) return err; module->orc.version = -1; - if (orc_header->d_size == 20) + if (orc_header->d_size == ORC_HEADER_SIZE) module->orc.version = orc_version_from_header(orc_header->d_buf); if (module->orc.version < 0) { return drgn_error_create(DRGN_ERROR_OTHER, @@ -364,7 +366,7 @@ static struct drgn_error * copy_builtin_orc_buffers(struct drgn_module *module, uint64_t num_entries, uint64_t unwind, uint64_t unwind_ip, uint64_t header) { - uint8_t header_data[20]; + uint8_t header_data[ORC_HEADER_SIZE]; struct drgn_error *err; @@ -452,7 +454,7 @@ static struct drgn_error *drgn_read_vmlinux_orc(struct drgn_module *module) || (unwind_end - unwind_start) / sizeof(struct drgn_orc_entry) != num_entries) return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_unwind range"); - if (header_start && header_end && header_end - header_start != 20) + if (header_start && header_end && header_end - header_start != ORC_HEADER_SIZE) return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_header size"); return copy_builtin_orc_buffers(module, num_entries, unwind_start, From a842645ea428eadff9a1d86fe3f6688c38de97f9 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 19 Dec 2024 14:37:05 -0800 Subject: [PATCH 062/166] tests: linux_kernel: test built-in ORC unwinding Loading built-in ORC is a difficult functionality to test: it is best tested when there is no debuginfo file. Thus, we add two tests: one simpler test in which the kernel has debuginfo, but a module does not, and we must unwind a stack with functions from the module. The second test is more complex, where we create a program with no debuginfo at all, and provide it just enough data to initialize the module API and unwind with built-in ORC. In both cases, to verify that drgn is actually using ORC, we capture its log messages. Signed-off-by: Stephen Brennan --- tests/__init__.py | 12 +++ tests/linux_kernel/test_stack_trace.py | 139 ++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 2 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 12a5d3264..322379011 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -3,6 +3,7 @@ import contextlib import functools +import logging import os import sys from typing import Any, Mapping, NamedTuple, Optional @@ -455,3 +456,14 @@ def modifyenv(vars: Mapping[str, Optional[str]]): del os.environ[key] else: os.environ[key] = old_value + + +@contextlib.contextmanager +def drgn_log_level(level: int): + logger = logging.getLogger("drgn") + old_level = logger.getEffectiveLevel() + logger.setLevel(level) + try: + yield + finally: + logger.setLevel(old_level) diff --git a/tests/linux_kernel/test_stack_trace.py b/tests/linux_kernel/test_stack_trace.py index d414306a9..2ba3a8055 100644 --- a/tests/linux_kernel/test_stack_trace.py +++ b/tests/linux_kernel/test_stack_trace.py @@ -1,12 +1,15 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import logging import os +import re import unittest from _drgn_util.platform import NORMALIZED_MACHINE_NAME -from drgn import Object, Program, reinterpret -from tests import assertReprPrettyEqualsStr, modifyenv +from drgn import Object, Program, TypeMember, reinterpret +from drgn.helpers.linux import load_module_kallsyms, load_vmlinux_kallsyms +from tests import assertReprPrettyEqualsStr, drgn_log_level, modifyenv from tests.linux_kernel import ( LinuxKernelTestCase, fork_and_stop, @@ -59,6 +62,60 @@ def test_by_pid_dwarf(self): def test_by_pid_orc(self): self._test_by_pid(True) + def _check_logged_orc_message(self, captured_logs, module): + # To be sure that we actually used ORC to unwind through the drgn_test + # stack frames, search for the log output. We don't know which ORC + # version is used, so just ensure that we have a log line that mentions + # loading ORC. + expr = re.compile( + r"DEBUG:drgn:Loaded built-in ORC \(v\d+\) for module " + module + ) + for line in captured_logs.output: + if expr.fullmatch(line): + break + else: + self.fail(f"Did not load built-in ORC for {module}") + + @unittest.skipUnless( + NORMALIZED_MACHINE_NAME == "x86_64", + f"{NORMALIZED_MACHINE_NAME} does not use ORC", + ) + @skip_unless_have_test_kmod + def test_by_pid_builtin_orc(self): + # ORC was introduced in kernel 4.14. Detect the presence of ORC or skip + # the test. + try: + self.prog.symbol("__start_orc_unwind") + except LookupError: + ver = self.prog["UTS_RELEASE"].string_().decode() + self.skipTest(f"ORC is not available for {ver}") + + with drgn_log_level(logging.DEBUG): + # Create a program with the core kernel debuginfo loaded, + # but without module debuginfo. Load a symbol finder using + # kallsyms so that the module's stack traces can still have + # usable frame names. + prog = Program() + prog.set_kernel() + prog.load_debug_info(main=True) + # Now that vmlinux is loaded, enumerate all the kernel modules so + # that a drgn_module is created to hold the ORC data + list(prog.loaded_modules()) + kallsyms = load_module_kallsyms(prog) + prog.register_symbol_finder("module_kallsyms", kallsyms, enable_index=1) + for thread in prog.threads(): + if b"drgn_test_kthread".startswith(thread.object.comm.string_()): + pid = thread.tid + break + else: + self.fail("couldn't find drgn_test_kthread") + # We must set drgn's log level manually, beacuse it won't log messages + # to the logger if it isn't enabled for them. + with self.assertLogs("drgn", logging.DEBUG) as log: + self._test_drgn_test_kthread_trace(prog.stack_trace(pid)) + + self._check_logged_orc_message(log, "drgn_test") + @skip_unless_have_test_kmod def test_by_pt_regs(self): pt_regs = self.prog["drgn_test_kthread_pt_regs"] @@ -104,6 +161,84 @@ def test_locals(self): else: self.fail("Couldn't find drgn_test_kthread_fn3 frame") + @unittest.skipUnless( + NORMALIZED_MACHINE_NAME == "x86_64", + f"{NORMALIZED_MACHINE_NAME} does not use ORC", + ) + def test_vmlinux_builtin_orc(self): + # ORC was introduced in kernel 4.14. Detect the presence of ORC or skip + # the test. + try: + self.prog.symbol("__start_orc_unwind") + except LookupError: + ver = self.prog["UTS_RELEASE"].string_().decode() + self.skipTest(f"ORC is not available for {ver}") + + with drgn_log_level(logging.DEBUG): + # It is difficult to test stack unwinding in a program without also + # loading types, which necessarily will also make DWARF CFI and ORC + # available in the debug file. The way we get around this is by creating + # a new program with no debuginfo, getting a pt_regs from the program + # that has debuginfo, and then using that to unwind the kernel. We still + # need a symbol finder, and we'll need the Module API to recognize the + # kernel address range correctly. + prog = Program() + prog.set_kernel() + prog.register_symbol_finder( + "vmlinux_kallsyms", load_vmlinux_kallsyms(prog), enable_index=0 + ) + main, _ = prog.main_module(name="kernel", create=True) + main.address_range = self.prog.main_module().address_range + + # Luckily, all drgn cares about for x86_64 pt_regs is that it is a + # structure. Rather than creating a matching struct pt_regs definition, + # we can just create a dummy one of the correct size: + # struct pt_regs { unsigned char[size]; }; + # Drgn will happily use that and reinterpret the bytes correctly. + real_pt_regs_type = self.prog.type("struct pt_regs") + fake_pt_regs_type = prog.struct_type( + tag="pt_regs", + size=real_pt_regs_type.size, + members=[ + TypeMember( + prog.array_type( + prog.int_type("unsigned char", 1, False), + real_pt_regs_type.size, + ), + "data", + ), + ], + ) + + with fork_and_stop() as pid: + trace = self.prog.stack_trace(pid) + regs_dict = trace[0].registers() + pt_regs_obj = Object( + self.prog, + real_pt_regs_type, + { + "bp": regs_dict["rbp"], + "sp": regs_dict["rsp"], + "ip": regs_dict["rip"], + "r15": regs_dict["r15"], + }, + ) + fake_pt_regs_obj = Object.from_bytes_( + prog, fake_pt_regs_type, pt_regs_obj.to_bytes_() + ) + # We must set drgn's log level manually, beacuse it won't log messages + # to the logger if it isn't enabled for them. + with self.assertLogs("drgn", logging.DEBUG) as log: + no_debuginfo_trace = prog.stack_trace(fake_pt_regs_obj) + + dwarf_pcs = [] + for frame in trace: + if not dwarf_pcs or dwarf_pcs[-1] != frame.pc: + dwarf_pcs.append(frame.pc) + orc_pcs = [frame.pc for frame in no_debuginfo_trace] + self.assertEqual(dwarf_pcs, orc_pcs) + self._check_logged_orc_message(log, "kernel") + def test_registers(self): # Smoke test that we get at least one register and that # StackFrame.registers() agrees with StackFrame.register(). From ff8be674f28a870b2684ae2b28c07385706e4bed Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Mar 2025 13:34:35 -0800 Subject: [PATCH 063/166] vmtest.kbuild: add patch to work around missing build ID on aarch64:5.18.19-vmtest32.0alternative There are some linker shenanigans causing vmlinux to not have a build ID on this specific version. Revert the patch that caused this, which isn't the real fix but is the easiest way to deal with it. Signed-off-by: Omar Sandoval --- vmtest/config.py | 6 +++ vmtest/kbuild.py | 4 ++ ...link-with-z-noexecstack-no-warn-rwx-.patch | 42 +++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch diff --git a/vmtest/config.py b/vmtest/config.py index 8f3e42e3f..7e9333770 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -432,6 +432,12 @@ def kconfig_localversion(arch: Architecture, flavor: KernelFlavor, version: str) 1 if flavor.name == "default" else 0, ] patch_level = 0 + if ( + arch.name == "aarch64" + and flavor.name == "alternative" + and KernelVersion("5.18.18") <= KernelVersion(version) < KernelVersion("5.19") + ): + patch_level += 1 # If only specific architecture/flavor/version combinations need to be # rebuilt, conditionally increment the patch level here. if patch_level: diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index f71c20394..16e03f8be 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -146,6 +146,10 @@ class _Patch(NamedTuple): name="lib-raid6-add-option-to-skip-algo-benchmarking.patch", versions=((None, KernelVersion("5.0")),), ), + _Patch( + name="5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch", + versions=((KernelVersion("5.18.18"), KernelVersion("5.19")),), + ), ) diff --git a/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch b/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch new file mode 100644 index 000000000..7fa450bb9 --- /dev/null +++ b/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch @@ -0,0 +1,42 @@ +From 656f46da75e198bec61e4f2cc425d9fc9b2679cc Mon Sep 17 00:00:00 2001 +Message-ID: <656f46da75e198bec61e4f2cc425d9fc9b2679cc.1741383572.git.osandov@osandov.com> +From: Omar Sandoval +Date: Fri, 7 Mar 2025 13:38:13 -0800 +Subject: [PATCH] Revert "Makefile: link with -z noexecstack + --no-warn-rwx-segments" + +This reverts commit d81aa6bfff835ceea33c192d394f03e4a59cd12c. + +This results in a missing build ID on arm64 on v5.18-stable starting +with v5.18.18 if CONFIG_MODVERSIONS=y. In mainline and LTS kernels, this +was fixed properly by commit 99cb0d917ffa ("arch: fix broken BuildID for +arm64 and riscv") (in v6.2), but that had a bunch of followup fixes. +Commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing +CONFIG_MODULE_REL_CRCS") (in v5.19) also somehow works around it. Older +stable branches didn't get this noexecstack change, so that leaves us +with just v5.18 that needs this revert. + +Signed-off-by: Omar Sandoval +--- + Makefile | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/Makefile b/Makefile +index fc7efcdab0a2..31e952dc676d 100644 +--- a/Makefile ++++ b/Makefile +@@ -1031,11 +1031,6 @@ KBUILD_CFLAGS += $(KCFLAGS) + KBUILD_LDFLAGS_MODULE += --build-id=sha1 + LDFLAGS_vmlinux += --build-id=sha1 + +-KBUILD_LDFLAGS += -z noexecstack +-ifeq ($(CONFIG_LD_IS_BFD),y) +-KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments) +-endif +- + ifeq ($(CONFIG_STRIP_ASM_SYMS),y) + LDFLAGS_vmlinux += $(call ld-option, -X,) + endif +-- +2.48.1 + From af535aa000b04940cc7726a6f2ab7b1bb16d7115 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Mar 2025 13:41:31 -0800 Subject: [PATCH 064/166] Revert "vmtest.kbuild: add patch to fix 9p slab cache naming on Linux 6.12" This reverts commit fd2ffbdaa2da0ab52f4ec5193c83dd6754d6accb. This has been fixed upstream for a while. Signed-off-by: Omar Sandoval --- vmtest/config.py | 1 + vmtest/kbuild.py | 4 -- ...ix-slab-cache-name-creation-for-real.patch | 50 ------------------- 3 files changed, 1 insertion(+), 54 deletions(-) delete mode 100644 vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch diff --git a/vmtest/config.py b/vmtest/config.py index 7e9333770..69c7d3414 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -9,6 +9,7 @@ from typing import Dict, Mapping, NamedTuple, Sequence from _drgn_util.platform import NORMALIZED_MACHINE_NAME +from util import KernelVersion # Kernel versions that we run tests on and therefore support. Keep this in sync # with docs/support_matrix.rst. diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index 16e03f8be..f20397186 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -51,10 +51,6 @@ class _Patch(NamedTuple): name="proc-kcore-allow-enabling-CONFIG_PROC_KCORE-on-ARM.patch", versions=((None, None),), ), - _Patch( - name="9p-fix-slab-cache-name-creation-for-real.patch", - versions=((KernelVersion("6.12"), None),), - ), _Patch( name="filelock-fix-name-of-file_lease-slab-cache.patch", versions=((KernelVersion("6.9"), KernelVersion("6.10")),), diff --git a/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch b/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch deleted file mode 100644 index db8207078..000000000 --- a/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch +++ /dev/null @@ -1,50 +0,0 @@ -From a360f311f57a36e96d88fa8086b749159714dcd2 Mon Sep 17 00:00:00 2001 -Message-ID: -From: Linus Torvalds -Date: Mon, 21 Oct 2024 11:57:38 -0700 -Subject: [PATCH] 9p: fix slab cache name creation for real - -This was attempted by using the dev_name in the slab cache name, but as -Omar Sandoval pointed out, that can be an arbitrary string, eg something -like "/dev/root". Which in turn trips verify_dirent_name(), which fails -if a filename contains a slash. - -So just make it use a sequence counter, and make it an atomic_t to avoid -any possible races or locking issues. - -Reported-and-tested-by: Omar Sandoval -Link: https://lore.kernel.org/all/ZxafcO8KWMlXaeWE@telecaster.dhcp.thefacebook.com/ -Fixes: 79efebae4afc ("9p: Avoid creating multiple slab caches with the same name") -Acked-by: Vlastimil Babka -Cc: Dominique Martinet -Cc: Thorsten Leemhuis -Signed-off-by: Linus Torvalds ---- - net/9p/client.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/net/9p/client.c b/net/9p/client.c -index 9e7b9151816d..09f8ced9f8bb 100644 ---- a/net/9p/client.c -+++ b/net/9p/client.c -@@ -977,6 +977,7 @@ static int p9_client_version(struct p9_client *c) - struct p9_client *p9_client_create(const char *dev_name, char *options) - { - int err; -+ static atomic_t seqno = ATOMIC_INIT(0); - struct p9_client *clnt; - char *client_id; - char *cache_name; -@@ -1036,7 +1037,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - if (err) - goto close_trans; - -- cache_name = kasprintf(GFP_KERNEL, "9p-fcall-cache-%s", dev_name); -+ cache_name = kasprintf(GFP_KERNEL, -+ "9p-fcall-cache-%u", atomic_inc_return(&seqno)); - if (!cache_name) { - err = -ENOMEM; - goto close_trans; --- -2.47.0 - From 335e4b84b381d8cce6dff8186daedbc1b2ebd884 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Mar 2025 14:33:07 -0800 Subject: [PATCH 065/166] dco-check: enforce that sign-off matches commit author This is the best practice for DCO sign-offs, but it turns out we're not enforcing it. This came up in #476. Signed-off-by: Omar Sandoval --- .github/workflows/dco-check.yml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml index 06822fe41..6ab2bba39 100644 --- a/.github/workflows/dco-check.yml +++ b/.github/workflows/dco-check.yml @@ -18,14 +18,22 @@ jobs: git init git fetch --filter=blob:none "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" "$GITHUB_BASE_REF" "$GITHUB_REF" - name: Check for DCO sign-offs + shell: bash run: | - no_sign_off="$(git log --no-merges --grep=Signed-off-by --invert-grep "FETCH_HEAD..$GITHUB_SHA")" - if [ -z "$no_sign_off" ]; then + status=0 + while read -r commit; do + author="$(git show --no-patch --pretty='format:%an <%ae>' "$commit")" + if ! git show --no-patch --pretty='format:%(trailers:key=Signed-off-by,valueonly)' "$commit" | grep -Fxq "$author"; then + if [ $status -eq 0 ]; then + echo "The following commits are missing a Developer Certificate of Origin sign-off;" + echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" + echo + fi + status=1 + git show --no-patch "$commit" + fi + done < <(git rev-list --no-merges "FETCH_HEAD..$GITHUB_SHA") + if [ $status -eq 0 ]; then echo "All commits have a Developer Certificate of Origin sign-off" - else - echo "The following commits are missing a Developer Certificate of Origin sign-off;" - echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" - echo - echo "$no_sign_off" - exit 1 fi + exit $status From bdc3212e19e006f2410ac0a813a9ed37a0a6d4e3 Mon Sep 17 00:00:00 2001 From: Septatrix <24257556+Septatrix@users.noreply.github.com> Date: Sat, 8 Mar 2025 00:24:24 +0100 Subject: [PATCH 066/166] Fix exception handling in for_each_child_dentry Accessing `dentry.d_children` fails before the lookup of `d_sib` on `struct dentry`. Signed-off-by: Septatrix <24257556+Septatrix@users.noreply.github.com> --- contrib/negdentdelete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/negdentdelete.py b/contrib/negdentdelete.py index 889a2ebb1..186faa1e2 100644 --- a/contrib/negdentdelete.py +++ b/contrib/negdentdelete.py @@ -37,7 +37,7 @@ def for_each_child_dentry(dentry: Object) -> Iterator[Object]: dentry.d_children.address_of_(), "d_sib", ) - except LookupError: + except AttributeError: return list_for_each_entry( "struct dentry", dentry.d_subdirs.address_of_(), "d_child" ) From aeaa16b8c63a365743e40b5baec613e7fe8cffc7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 8 Mar 2025 21:08:26 -0800 Subject: [PATCH 067/166] vmtest: use cortex-a76 CPU for AArch64 emulation cortex-a57 doesn't have 16K page support, so the tiny flavor hangs on boot. cortex-a76 is the first Cortex-A CPU in QEMU with support for 16K pages, so use it instead. It was added in QEMU 7.1 in August 2022. Signed-off-by: Omar Sandoval --- vmtest/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmtest/config.py b/vmtest/config.py index 69c7d3414..c302afd65 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -286,7 +286,7 @@ class Architecture(NamedTuple): """, }, kernel_org_compiler_name="aarch64-linux", - qemu_options=("-M", "virt", "-cpu", "cortex-a57"), + qemu_options=("-M", "virt", "-cpu", "cortex-a76"), qemu_console="ttyAMA0", ), Architecture( From 109eee62cea6deeb3e18511d733768f533fc42f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Mar 2025 09:36:19 -0700 Subject: [PATCH 068/166] tests: add test for unwinding through IRQ handler I should've done this as part of commit 98995b617d44 ("libdrgn: orc_info: prefer ORC stack switching entries over DWARF CFI"), but I couldn't think of a good way to test it at the time. The solution is: 1. Add a sysfs file, /sys/kernel/drgn_test/crash, that panics from an IRQ handler using irq_work_queue(). 2. Use that file from vmtest.enter_kdump instead of /proc/sysrq-trigger. 3. Check for frames in and below the IRQ handler in the crashed thread stack trace tests. Signed-off-by: Omar Sandoval --- tests/linux_kernel/kmod/drgn_test.c | 75 ++++++++++++++++++++++++ tests/linux_kernel/vmcore/test_vmcore.py | 29 +++++++-- vmtest/enter_kdump.py | 8 +++ 3 files changed, 107 insertions(+), 5 deletions(-) diff --git a/tests/linux_kernel/kmod/drgn_test.c b/tests/linux_kernel/kmod/drgn_test.c index d417b44a3..91a917258 100644 --- a/tests/linux_kernel/kmod/drgn_test.c +++ b/tests/linux_kernel/kmod/drgn_test.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #ifdef CONFIG_STACKDEPOT #include #endif +#include #include #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) @@ -1385,8 +1387,78 @@ DEFINE_KMODIFY_TEST_ARGS( ) #endif +#ifdef CONFIG_SYSFS + +// Crash from an IRQ handler on architectures where drgn supports unwinding +// through IRQ handlers. +#ifdef __x86_64__ +#define DRGN_TEST_IRQ_CRASH +#endif + +static __noreturn noinline_for_stack void drgn_test_crash_func(struct irq_work *work) +{ + panic("drgn_test\n"); +} + +#ifdef DRGN_TEST_IRQ_CRASH +static DEFINE_IRQ_WORK(drgn_test_crash_irq_work, drgn_test_crash_func); +#endif + +static ssize_t drgn_test_crash_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + if (val != 1) + return -EINVAL; + +#ifdef DRGN_TEST_IRQ_CRASH + preempt_disable(); + irq_work_queue(&drgn_test_crash_irq_work); + // Spin until we get interrupted and crash. + while (1); +#else + drgn_test_crash_func(NULL); +#endif +} + +static struct kobj_attribute drgn_test_crash_attr = + __ATTR(crash, 0200, NULL, drgn_test_crash_store); + +static struct attribute_group drgn_test_attr_group = { + .attrs = (struct attribute *[]){ + &drgn_test_crash_attr.attr, + NULL, + }, +}; + +static struct kobject *drgn_test_kobj; + +static int __init drgn_test_sysfs_init(void) +{ + drgn_test_kobj = kobject_create_and_add("drgn_test", kernel_kobj); + if (!drgn_test_kobj) + return -ENOMEM; + + return sysfs_create_group(drgn_test_kobj, &drgn_test_attr_group); +} + +static void drgn_test_sysfs_exit(void) +{ + kobject_put(drgn_test_kobj); +} +#else +static inline int drgn_test_sysfs_init(void) { return 0; } +static inline void drgn_test_sysfs_exit(void) {} +#endif + static void drgn_test_exit(void) { + drgn_test_sysfs_exit(); drgn_test_slab_exit(); drgn_test_percpu_exit(); drgn_test_maple_tree_exit(); @@ -1436,6 +1508,9 @@ static int __init drgn_test_init(void) if (ret) goto out; ret = drgn_test_idr_init(); + if (ret) + goto out; + ret = drgn_test_sysfs_init(); out: if (ret) drgn_test_exit(); diff --git a/tests/linux_kernel/vmcore/test_vmcore.py b/tests/linux_kernel/vmcore/test_vmcore.py index 7ad7f75eb..595de1216 100644 --- a/tests/linux_kernel/vmcore/test_vmcore.py +++ b/tests/linux_kernel/vmcore/test_vmcore.py @@ -53,18 +53,37 @@ def test_crashed_thread(self): # why anyone would run these tests from kdump otherwise. self.assertEqual(crashed_thread.object.comm.string_(), b"selfdestruct") + def _test_crashed_thread_stack_trace(self, trace): + # This assumes that we crashed using the drgn_test kmod. Note that on + # supported architectures, drgn_test_crash_func() is called from an IRQ + # handler that interrupts drgn_test_crash_store(). + trace_iter = iter(trace) + for frame in trace_iter: + if frame.name == "drgn_test_crash_func": + break + else: + self.fail("drgn_test_crash_func frame not found") + + for frame in trace_iter: + if frame.name == "drgn_test_crash_store": + break + else: + self.fail( + "drgn_test_crash_store frame not found below drgn_test_crash_func" + ) + def test_crashed_thread_stack_trace(self): self._skip_if_cpu0_on_s390x() - self.assertIn("sysrq", str(self.prog.crashed_thread().stack_trace())) + self._test_crashed_thread_stack_trace(self.prog.crashed_thread().stack_trace()) def test_crashed_thread_stack_trace_by_tid(self): self._skip_if_cpu0_on_s390x() - self.assertIn( - "sysrq", str(self.prog.stack_trace(self.prog.crashed_thread().tid)) + self._test_crashed_thread_stack_trace( + self.prog.stack_trace(self.prog.crashed_thread().tid) ) def test_crashed_thread_stack_trace_by_task_struct(self): self._skip_if_cpu0_on_s390x() - self.assertIn( - "sysrq", str(self.prog.stack_trace(self.prog.crashed_thread().object)) + self._test_crashed_thread_stack_trace( + self.prog.stack_trace(self.prog.crashed_thread().object) ) diff --git a/vmtest/enter_kdump.py b/vmtest/enter_kdump.py index 674006af2..b9175fcc2 100644 --- a/vmtest/enter_kdump.py +++ b/vmtest/enter_kdump.py @@ -65,6 +65,14 @@ def main() -> None: if cpus: os.sched_setaffinity(0, cpus) + # Try the drgn_test kmod crash method first. + try: + with open("/sys/kernel/drgn_test/crash", "w") as f: + f.write("1") + except FileNotFoundError: + pass + + # Fall back to sysrq-trigger. with open("/proc/sysrq-trigger", "w") as f: f.write("c") From dd579782ff7e2aa645bc4a6190e9823d278d1303 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Mar 2025 15:35:13 -0700 Subject: [PATCH 069/166] libdrgn: rename drgn_object_slice() to drgn_object_fragment() Septatrix suggested adding support for array slicing in #475. The ideal name for that operation in libdrgn would be "drgn_object_slice", but we're already using that name. Rename it to the more appropriate "drgn_object_fragment". Signed-off-by: Omar Sandoval --- libdrgn/drgn.h | 14 +++++++------- libdrgn/language_c.c | 31 +++++++++++++++++-------------- libdrgn/object.c | 38 +++++++++++++++++++------------------- libdrgn/object.h | 8 ++++---- libdrgn/python/object.c | 10 +++++----- 5 files changed, 52 insertions(+), 49 deletions(-) diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 414f4aa72..57d9f0f0b 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -2504,7 +2504,7 @@ struct drgn_error *drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj); /** - * Get a @ref drgn_object from a "slice" of an object. + * Get a @ref drgn_object from a "fragment" of an object. * * This is a low-level interface used to implement @ref drgn_object_subscript(), * @ref drgn_object_member(), and @ref drgn_object_reinterpret(). Those @@ -2529,11 +2529,11 @@ struct drgn_error *drgn_object_copy(struct drgn_object *res, * bits. Otherwise, 0. * @return @c NULL on success, non-@c NULL on error. */ -struct drgn_error *drgn_object_slice(struct drgn_object *res, - const struct drgn_object *obj, - struct drgn_qualified_type qualified_type, - uint64_t bit_offset, - uint64_t bit_field_size); +struct drgn_error *drgn_object_fragment(struct drgn_object *res, + const struct drgn_object *obj, + struct drgn_qualified_type qualified_type, + uint64_t bit_offset, + uint64_t bit_field_size); /** * Get a @ref drgn_object from dereferencing a pointer object with an offset. @@ -2543,7 +2543,7 @@ struct drgn_error *drgn_object_slice(struct drgn_object *res, * convenient, but this function can be more efficient if accessing multiple * elements or the same member multiple times. * - * @sa drgn_object_slice + * @sa drgn_object_fragment * * @param[out] res Dereferenced object. * @param[in] obj Pointer object. diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 6229c46f4..94f4851ab 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1049,9 +1049,10 @@ compound_initializer_iter_next(struct initializer_iter *iter_, if (member->name || !(iter->flags & DRGN_FORMAT_OBJECT_MEMBER_NAMES) || !drgn_type_has_members(member_type.type)) { - err = drgn_object_slice(obj_ret, iter->obj, member_type, - bit_offset + member->bit_offset, - member_bit_field_size); + err = drgn_object_fragment(obj_ret, iter->obj, + member_type, + bit_offset + member->bit_offset, + member_bit_field_size); if (err) return err; @@ -1178,9 +1179,9 @@ c_format_compound_object(const struct drgn_object *obj, if (err) goto out; - err = drgn_object_slice(&member, obj, member_type, - new->end[-1].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + new->end[-1].bit_offset, + member_bit_field_size); if (err) goto out; @@ -1385,8 +1386,10 @@ array_initializer_iter_next(struct initializer_iter *iter_, if (iter->i >= iter->length) return &drgn_stop; - err = drgn_object_slice(obj_ret, iter->obj, iter->element_type, - iter->i * iter->element_bit_size, 0); + err = drgn_object_fragment(obj_ret, iter->obj, + iter->element_type, + iter->i * iter->element_bit_size, + 0); if (err) return err; iter->i++; @@ -1499,11 +1502,11 @@ c_format_array_object(const struct drgn_object *obj, do { bool zero; - err = drgn_object_slice(&element, obj, - iter.element_type, - (iter.length - 1) * - iter.element_bit_size, - 0); + err = drgn_object_fragment(&element, obj, + iter.element_type, + (iter.length - 1) + * iter.element_bit_size, + 0); if (err) return err; @@ -3470,7 +3473,7 @@ c_op_implicit_convert(struct drgn_object *res, return err; if (!compatible) goto incompatible_type_error; - return drgn_object_slice_internal(res, obj, &type, 0, 0); + return drgn_object_fragment_internal(res, obj, &type, 0, 0); } case DRGN_TYPE_POINTER: { if (drgn_type_kind(obj_type.underlying_type) diff --git a/libdrgn/object.c b/libdrgn/object.c index e907bec83..ee1b38f7b 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -565,10 +565,10 @@ drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj) } struct drgn_error * -drgn_object_slice_internal(struct drgn_object *res, - const struct drgn_object *obj, - const struct drgn_object_type *type, - uint64_t bit_offset, uint64_t bit_field_size) +drgn_object_fragment_internal(struct drgn_object *res, + const struct drgn_object *obj, + const struct drgn_object_type *type, + uint64_t bit_offset, uint64_t bit_field_size) { struct drgn_error *err; @@ -617,9 +617,9 @@ drgn_object_slice_internal(struct drgn_object *res, } LIBDRGN_PUBLIC struct drgn_error * -drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, - struct drgn_qualified_type qualified_type, - uint64_t bit_offset, uint64_t bit_field_size) +drgn_object_fragment(struct drgn_object *res, const struct drgn_object *obj, + struct drgn_qualified_type qualified_type, + uint64_t bit_offset, uint64_t bit_field_size) { struct drgn_error *err; if (drgn_object_program(res) != drgn_object_program(obj)) { @@ -630,8 +630,8 @@ drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, err = drgn_object_type(qualified_type, bit_field_size, &type); if (err) return err; - return drgn_object_slice_internal(res, obj, &type, bit_offset, - bit_field_size); + return drgn_object_fragment_internal(res, obj, &type, bit_offset, + bit_field_size); } LIBDRGN_PUBLIC struct drgn_error * @@ -1225,9 +1225,9 @@ drgn_compound_object_is_zero(const struct drgn_object *obj, if (err) return err; - err = drgn_object_slice(&member, obj, member_type, - members[i].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + members[i].bit_offset, + member_bit_field_size); if (err) return err; @@ -1254,8 +1254,8 @@ drgn_array_object_is_zero(const struct drgn_object *obj, DRGN_OBJECT(element, drgn_object_program(obj)); length = drgn_type_length(underlying_type); for (i = 0; i < length; i++) { - err = drgn_object_slice(&element, obj, element_type, - i * element_bit_size, 0); + err = drgn_object_fragment(&element, obj, element_type, + i * element_bit_size, 0); if (err) return err; @@ -1383,7 +1383,7 @@ drgn_object_reinterpret(struct drgn_object *res, struct drgn_qualified_type qualified_type, const struct drgn_object *obj) { - return drgn_object_slice(res, obj, qualified_type, 0, 0); + return drgn_object_fragment(res, obj, qualified_type, 0, 0); } LIBDRGN_PUBLIC struct drgn_error * @@ -1586,8 +1586,8 @@ drgn_object_subscript(struct drgn_object *res, const struct drgn_object *obj, index * element.bit_size, 0); } else { - return drgn_object_slice(res, obj, element.qualified_type, - index * element.bit_size, 0); + return drgn_object_fragment(res, obj, element.qualified_type, + index * element.bit_size, 0); } } @@ -1613,8 +1613,8 @@ drgn_object_member(struct drgn_object *res, const struct drgn_object *obj, err = drgn_member_type(member, &member_type, &member_bit_field_size); if (err) return err; - return drgn_object_slice(res, obj, member_type, member_bit_offset, - member_bit_field_size); + return drgn_object_fragment(res, obj, member_type, member_bit_offset, + member_bit_field_size); } LIBDRGN_PUBLIC struct drgn_error * diff --git a/libdrgn/object.h b/libdrgn/object.h index 12dcc8f57..5b9c20547 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -209,10 +209,10 @@ drgn_object_set_absent_internal(struct drgn_object *res, } struct drgn_error * -drgn_object_slice_internal(struct drgn_object *res, - const struct drgn_object *obj, - const struct drgn_object_type *type, - uint64_t bit_offset, uint64_t bit_field_size); +drgn_object_fragment_internal(struct drgn_object *res, + const struct drgn_object *obj, + const struct drgn_object_type *type, + uint64_t bit_offset, uint64_t bit_field_size); /** * Binary operator implementation. diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index b0808dec6..302a8f8b0 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -575,9 +575,9 @@ static PyObject *DrgnObject_compound_value(struct drgn_object *obj, if (err) return set_drgn_error(err); - err = drgn_object_slice(&member, obj, member_type, - members[i].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + members[i].bit_offset, + member_bit_field_size); if (err) return set_drgn_error(err); @@ -622,8 +622,8 @@ static PyObject *DrgnObject_array_value(struct drgn_object *obj, DRGN_OBJECT(element, drgn_object_program(obj)); for (uint64_t i = 0; i < length; i++) { - err = drgn_object_slice(&element, obj, element_type, - i * element_bit_size, 0); + err = drgn_object_fragment(&element, obj, element_type, + i * element_bit_size, 0); if (err) return set_drgn_error(err); From 5d70f344caf350711ebc2182166df36efc31080c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Mar 2025 15:40:34 -0700 Subject: [PATCH 070/166] vmtest.__main__: exit with non-zero status on failure Signed-off-by: Omar Sandoval --- vmtest/__main__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vmtest/__main__.py b/vmtest/__main__.py index 690c19a16..bc97a1a3e 100644 --- a/vmtest/__main__.py +++ b/vmtest/__main__.py @@ -45,6 +45,9 @@ def __init__(self, file: TextIO) -> None: self._passed: Dict[str, List[str]] = {} self._failed: Dict[str, List[str]] = {} + def succeeded(self) -> bool: + return not self._failed + def _green(self, s: str) -> str: if self._color: return "\033[32m" + s + "\033[0m" @@ -374,3 +377,4 @@ def add_kernel(arch: Architecture, pattern: str) -> None: if in_github_actions: shutil.rmtree(kernel.path) progress.update(kernel.arch.name, kernel.release, status == 0) + sys.exit(0 if progress.succeeded() else 1) From 09a1bb6376aeb6a189d54de39ce00cd2297a291c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Mar 2025 16:54:14 -0700 Subject: [PATCH 071/166] docs: suggest usage of operator.index()/IntegerLike instead of Object.value_() We make use of this practice in many helpers, but it's not documented anywhere. Closes #473. Suggested-by: Septatrix <24257556+Septatrix@users.noreply.github.com> Signed-off-by: Omar Sandoval --- _drgn.pyi | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/_drgn.pyi b/_drgn.pyi index bc02dc85b..6b6835606 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -54,6 +54,10 @@ class IntegerLike(Protocol): Parameters annotated with this type expect an integer which may be given as a Python :class:`int` or an :class:`Object` with integer type. + + .. note:: + This is equivalent to :class:`typing.SupportsIndex` except that it is + not runtime-checkable. """ def __index__(self) -> int: ... @@ -2399,6 +2403,20 @@ class Object: returns a ``dict`` of members. For arrays, this returns a ``list`` of values. + .. note:: + Helpers that wish to accept an argument that may be an + :class:`Object` or an :class:`int` should use + :func:`operator.index()` and :class:`IntegerLike` instead: + + .. code-block:: python3 + + import operator + from drgn import IntegerLike + + def my_helper(i: IntegerLike) -> ...: + value = operator.index(i) # Returns an int + ... + :raises FaultError: if reading the object causes a bad memory access :raises TypeError: if this object has an unreadable type (e.g., ``void``) From c64f74f266a1bce3e8a8d6c5961b53209e2fbc2d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Mar 2025 16:56:26 -0700 Subject: [PATCH 072/166] docs: update required Sphinx version to 7.3.7 Once again match the version that I use locally (on Fedora 41). Signed-off-by: Omar Sandoval --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index cdd5f14b9..ea03d967b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -sphinx==6.2.1 +sphinx==7.3.7 From 27deab519ed6342765197402de2c25526d426347 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 14:39:45 -0700 Subject: [PATCH 073/166] tests: handle PrimitiveType in assertIdentical() I never caught this before because it only happens when the a._ptr == b._ptr check fails for a base type, which would be a test failure anyways. Signed-off-by: Omar Sandoval --- tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index 322379011..e5619e5ef 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,6 +17,7 @@ Object, Platform, PlatformFlags, + PrimitiveType, Program, Type, TypeEnumerator, @@ -126,6 +127,7 @@ def assertReprPrettyEqualsStr(obj): _IDENTICAL_EQ_TYPES = ( type(None), Language, + PrimitiveType, Program, TypeEnumerator, TypeKind, From 39cd968afb7e34f4a04a708a90cce6fffc854c68 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 14:48:24 -0700 Subject: [PATCH 074/166] tests: dwarfwriter: rename compile_dwarf() to create_dwarf_file() I want to use the name compile_dwarf() for a more general method that can return information in addition to the file data (specifically, labels), so rename it to match create_elf_file(). While we're doing this, rename the dies parameter to units_or_dies for clarity and make little_endian and bits keyword-only, none of which make any difference for current callers. Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 10 +++---- tests/test_debug_info.py | 60 +++++++++++++++++++++------------------- tests/test_dwarf.py | 24 ++++++++-------- 3 files changed, 49 insertions(+), 45 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index d68df48f6..b3ff3d832 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -385,10 +385,8 @@ def debug_section(name, data): return sections -def compile_dwarf( - dies, - little_endian=True, - bits=64, +def create_dwarf_file( + units_or_dies, *, version=4, lang=None, @@ -397,13 +395,15 @@ def compile_dwarf( split=None, sections=(), build_id=None, + little_endian=True, + bits=64, ): return create_elf_file( ET.EXEC, sections=[ *sections, *dwarf_sections( - dies, + units_or_dies, little_endian=little_endian, bits=bits, version=version, diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 69373ed9a..6d652a562 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -29,7 +29,7 @@ VdsoModule, ) from tests import TestCase, modifyenv -from tests.dwarfwriter import compile_dwarf +from tests.dwarfwriter import create_dwarf_file from tests.elfwriter import ElfSection, create_elf_file from tests.resources import get_resource @@ -67,7 +67,7 @@ def NamedTemporaryElfFile(*, loadable=True, debug=True, build_id=None, sections= sections = (ALLOCATED_SECTION,) + sections with tempfile.NamedTemporaryFile() as f: if debug: - f.write(compile_dwarf((), sections=sections, build_id=build_id)) + f.write(create_dwarf_file((), sections=sections, build_id=build_id)) else: f.write(create_elf_file(ET.EXEC, sections=sections, build_id=build_id)) f.flush() @@ -372,11 +372,11 @@ def test_gnu_debugaltlink(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -441,11 +441,11 @@ def test_gnu_debugaltlink_build_id_mismatch(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id[::-1])) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id[::-1])) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -506,7 +506,7 @@ def test_gnu_debugaltlink_then_both(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id @@ -538,7 +538,7 @@ def test_gnu_debugaltlink_cancel(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id @@ -1886,7 +1886,9 @@ def test_by_build_id(self): build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] build_id_dir.mkdir(parents=True) binary_path = build_id_dir / build_id.hex()[2:] - binary_path.write_bytes(compile_dwarf((), sections=(ALLOCATED_SECTION,))) + binary_path.write_bytes( + create_dwarf_file((), sections=(ALLOCATED_SECTION,)) + ) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id @@ -1914,7 +1916,7 @@ def test_by_build_id_separate(self): create_elf_file(ET.EXEC, sections=(ALLOCATED_SECTION,)) ) debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") - debug_path.write_bytes(compile_dwarf(())) + debug_path.write_bytes(create_dwarf_file(())) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id @@ -1944,7 +1946,7 @@ def test_by_build_id_from_loaded(self): build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] build_id_dir.mkdir(parents=True) debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") - debug_path.write_bytes(compile_dwarf(())) + debug_path.write_bytes(create_dwarf_file(())) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] @@ -1967,7 +1969,9 @@ def test_by_build_id_method(self): build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] build_id_dir.mkdir(parents=True) binary_path = build_id_dir / build_id.hex()[2:] - binary_path.write_bytes(compile_dwarf((), sections=(ALLOCATED_SECTION,))) + binary_path.write_bytes( + create_dwarf_file((), sections=(ALLOCATED_SECTION,)) + ) module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id @@ -1988,7 +1992,7 @@ def test_by_gnu_debuglink(self): bin_dir = Path(bin_dir) debug_dir = Path(debug_dir) - debug_file_contents = compile_dwarf(()) + debug_file_contents = create_dwarf_file(()) crc = binascii.crc32(debug_file_contents) loadable_path = bin_dir / "binary" @@ -2041,7 +2045,7 @@ def test_by_gnu_debuglink_absolute(self): bin_dir = Path(bin_dir) debug_dir = Path(debug_dir) - debug_file_contents = compile_dwarf(()) + debug_file_contents = create_dwarf_file(()) crc = binascii.crc32(debug_file_contents) debug_path = debug_dir / "binary.debug" @@ -2071,7 +2075,7 @@ def test_by_gnu_debuglink_crc_mismatch(self): with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: bin_dir = Path(bin_dir) - debug_file_contents = compile_dwarf(()) + debug_file_contents = create_dwarf_file(()) crc = binascii.crc32(debug_file_contents) loadable_path = bin_dir / "binary" @@ -2126,11 +2130,11 @@ def test_gnu_debugaltlink_absolute(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2158,7 +2162,7 @@ def test_gnu_debugaltlink_not_found(self): binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2194,11 +2198,11 @@ def test_only_gnu_debugaltlink_absolute(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2232,7 +2236,7 @@ def test_only_gnu_debugaltlink_not_found(self): binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2274,11 +2278,11 @@ def test_gnu_debugaltlink_relative(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2308,7 +2312,7 @@ def test_gnu_debugaltlink_debug_directories(self): alt_path = debug_dir / ".dwz/alt.debug" alt_path.parent.mkdir() - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id)) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) for i, debugaltlink in enumerate( @@ -2320,7 +2324,7 @@ def test_gnu_debugaltlink_debug_directories(self): with self.subTest(debugaltlink=debugaltlink): binary_path = bin_dir / f"binary{i}" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2350,11 +2354,11 @@ def test_gnu_debugaltlink_build_id_mismatch(self): debug_dir = Path(debug_dir) alt_path = debug_dir / "alt.debug" - alt_path.write_bytes(compile_dwarf((), build_id=alt_build_id[::-1])) + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id[::-1])) binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, @@ -2389,7 +2393,7 @@ def test_invalid_gnu_debugaltlink(self): binary_path = bin_dir / "binary" binary_path.write_bytes( - compile_dwarf( + create_dwarf_file( (), sections=( ALLOCATED_SECTION, diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 43cf1de05..85c046ca2 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -47,7 +47,7 @@ DwarfDie, DwarfLabel, DwarfUnit, - compile_dwarf, + create_dwarf_file, ) bool_die = DwarfDie( @@ -209,7 +209,7 @@ def add_extra_dwarf(prog, path): def dwarf_program(*args, segments=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: - f.write(compile_dwarf(*args, **kwds)) + f.write(create_dwarf_file(*args, **kwds)) f.flush() add_extra_dwarf(prog, f.name) @@ -6890,7 +6890,7 @@ def test_dwo4(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -6910,7 +6910,7 @@ def test_dwo4(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -6937,7 +6937,7 @@ def test_dwo4_not_found(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -6975,7 +6975,7 @@ def test_dwo4_id_mismatch(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -6994,7 +6994,7 @@ def test_dwo4_id_mismatch(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7032,7 +7032,7 @@ def test_dwo5(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -7048,7 +7048,7 @@ def test_dwo5(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7072,7 +7072,7 @@ def test_dwo5_not_found(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7107,7 +7107,7 @@ def test_dwo5_id_mismatch(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -7123,7 +7123,7 @@ def test_dwo5_id_mismatch(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( From 591d7079c4fe5f8a509a5f8145c5c7db42111b27 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 16:21:45 -0700 Subject: [PATCH 075/166] tests: don't add DWARF to ELF symbol test files Before commit 4e83130008e9 ("Introduce module and debug info finder APIs"), a file needed to have DWARF info to be considered by drgn. This is no longer the case. However, the file does still need to appear to be loadable. Tweak create_elf_symbol_file() to satisfy that requirement and remove the dummy DWARF info. Signed-off-by: Omar Sandoval --- tests/test_symbol.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 8228c52e0..aae2b9ee1 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -7,43 +7,50 @@ from _drgn_util.elf import ET, PT, SHF, SHT, STB, STT from drgn import Program, Symbol, SymbolBinding, SymbolIndex, SymbolKind from tests import TestCase -from tests.dwarfwriter import dwarf_sections from tests.elfwriter import ElfSection, ElfSymbol, create_elf_file def create_elf_symbol_file(symbols): - sections = dwarf_sections(()) # Create a section for the symbols to reference and the corresponding - # segment for address lookups. - min_address = min(symbol.value for symbol in symbols) - max_address = max(symbol.value + symbol.size for symbol in symbols) - size = max(max_address - min_address, 4096) - sections.append( + # segment for address lookups. It must be SHF_ALLOC and must not be + # SHT_NOBITS or SHT_NOTE for the file to be loadable. + start = min(symbol.value for symbol in symbols) & ~7 + end = (max(symbol.value + max(symbol.size, 1) for symbol in symbols) + 7) & ~7 + size = end - start + assert size <= 4096, "symbols are too far apart; file would be too large" + sections = [ ElfSection( - name=".foo", - sh_type=SHT.NOBITS, + name=".data", + sh_type=SHT.PROGBITS, sh_flags=SHF.ALLOC, p_type=PT.LOAD, - vaddr=min_address, + vaddr=start, memsz=size, - ) - ) + data=bytes(size), + ), + ] symbols = [ symbol._replace( shindex=len(sections) if symbol.shindex is None else symbol.shindex ) for symbol in symbols ] - return create_elf_file(ET.EXEC, sections, symbols), min_address, min_address + size + return create_elf_file(ET.EXEC, sections, symbols), start, end def elf_symbol_program(*modules): prog = Program() + address_ranges = [] for symbols in modules: with tempfile.NamedTemporaryFile() as f: contents, start, end = create_elf_symbol_file(symbols) f.write(contents) f.flush() + for i, (other_start, other_end) in enumerate(address_ranges): + assert ( + end <= other_start or start >= other_end + ), f"module {len(address_ranges)} overlaps module {i}" + address_ranges.append((start, end)) module = prog.extra_module(f.name, create=True)[0] module.address_range = (start, end) module.try_file(f.name, force=True) From e2fd91d9b5eea79b518a2001a43ddc0edcb567d9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 16:25:26 -0700 Subject: [PATCH 076/166] tests: dwarfwriter: remove dwarf_sections() It's no longer used as of the previous commit, so fold it into create_dwarf_file(). Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index b3ff3d832..38efce86d 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -301,16 +301,18 @@ def collect_file_names(die): _UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) -def dwarf_sections( +def create_dwarf_file( units_or_dies, - little_endian=True, - bits=64, *, version=4, lang=None, use_dw_form_indirect=False, compress=None, split=None, + sections=(), + build_id=None, + little_endian=True, + bits=64, ): assert compress in (None, "zlib-gnu", "zlib-gabi") assert split in (None, "dwo") @@ -371,7 +373,7 @@ def debug_section(name, data): ) return name - sections = [ + dwarf_sections = [ debug_section( ".debug_abbrev", _compile_debug_abbrev(units, use_dw_form_indirect) ), @@ -379,40 +381,13 @@ def debug_section(name, data): debug_section(".debug_str", b"\0"), ] if not split: - sections.append(debug_section(".debug_line", debug_line)) + dwarf_sections.append(debug_section(".debug_line", debug_line)) if debug_types: - sections.append(debug_section(".debug_types", debug_types)) - return sections - + dwarf_sections.append(debug_section(".debug_types", debug_types)) -def create_dwarf_file( - units_or_dies, - *, - version=4, - lang=None, - use_dw_form_indirect=False, - compress=None, - split=None, - sections=(), - build_id=None, - little_endian=True, - bits=64, -): return create_elf_file( ET.EXEC, - sections=[ - *sections, - *dwarf_sections( - units_or_dies, - little_endian=little_endian, - bits=bits, - version=version, - lang=lang, - use_dw_form_indirect=use_dw_form_indirect, - compress=compress, - split=split, - ), - ], + sections=[*sections, *dwarf_sections], build_id=build_id, little_endian=little_endian, bits=bits, From d171a1d6170999c2075f237fc84a1ee1413dc990 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 16:31:11 -0700 Subject: [PATCH 077/166] tests: move .gnu_debug{,alt}link encoding to elfwriter Upcoming tests for imported units need to generate files with .gnu_debugaltlink, so replace gnu_debugaltlink_section() in test_debug_info.py with proper support in create_elf_file(). Do the same for .gnu_debuglink for consistency. Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 4 +- tests/elfwriter.py | 35 +++++++++++- tests/test_debug_info.py | 113 +++++++++++++-------------------------- 3 files changed, 72 insertions(+), 80 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 38efce86d..0d3d91e69 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -310,9 +310,9 @@ def create_dwarf_file( compress=None, split=None, sections=(), - build_id=None, little_endian=True, bits=64, + **kwargs, ): assert compress in (None, "zlib-gnu", "zlib-gabi") assert split in (None, "dwo") @@ -388,7 +388,7 @@ def debug_section(name, data): return create_elf_file( ET.EXEC, sections=[*sections, *dwarf_sections], - build_id=build_id, little_endian=little_endian, bits=bits, + **kwargs, ) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index 430432aac..157f6a265 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -1,8 +1,9 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import os import struct -from typing import List, NamedTuple, Optional, Sequence +from typing import List, NamedTuple, Optional, Sequence, Tuple, Union import zlib from _drgn_util.elf import ET, PT, SHF, SHN, SHT, STB, STT, STV @@ -121,6 +122,12 @@ def create_elf_file( symbols: Sequence[ElfSymbol] = (), *, build_id: Optional[bytes] = None, + gnu_debuglink: Optional[ + Tuple[Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"], int] + ] = None, + gnu_debugaltlink: Optional[ + Tuple[Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"], bytes] + ] = None, little_endian: bool = True, bits: int = 64, ): @@ -157,6 +164,32 @@ def create_elf_file( sections.append( ElfSection(name=".note.gnu.build-id", sh_type=SHT.NOTE, data=build_id_note) ) + + if gnu_debuglink is not None: + gnu_debuglink_path, gnu_debuglink_crc = gnu_debuglink + gnu_debuglink_path = os.fsencode(gnu_debuglink_path) + sections.append( + ElfSection( + name=".gnu_debuglink", + sh_type=SHT.PROGBITS, + data=gnu_debuglink_path + + bytes(4 - len(gnu_debuglink_path) % 4) + + gnu_debuglink_crc.to_bytes(4, "little"), + ) + ) + + if gnu_debugaltlink is not None: + gnu_debugaltlink_path, gnu_debugaltlink_build_id = gnu_debugaltlink + sections.append( + ElfSection( + name=".gnu_debugaltlink", + sh_type=SHT.PROGBITS, + data=os.fsencode(gnu_debugaltlink_path) + + b"\0" + + gnu_debugaltlink_build_id, + ) + ) + shnum = 0 phnum = 0 shstrtab = bytearray(1) diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 6d652a562..3c6592a12 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -33,24 +33,6 @@ from tests.elfwriter import ElfSection, create_elf_file from tests.resources import get_resource - -def gnu_debuglink_section(path, crc): - path = os.fsencode(path) - return ElfSection( - name=".gnu_debuglink", - sh_type=SHT.PROGBITS, - data=path + bytes(4 - len(path) % 4) + crc.to_bytes(4, "little"), - ) - - -def gnu_debugaltlink_section(path, build_id): - return ElfSection( - name=".gnu_debugaltlink", - sh_type=SHT.PROGBITS, - data=os.fsencode(path) + b"\0" + build_id, - ) - - ALLOCATED_SECTION = ElfSection( name=".bss", sh_type=SHT.PROGBITS, @@ -62,14 +44,14 @@ def gnu_debugaltlink_section(path, build_id): @contextlib.contextmanager -def NamedTemporaryElfFile(*, loadable=True, debug=True, build_id=None, sections=()): +def NamedTemporaryElfFile(*, loadable=True, debug=True, sections=(), **kwargs): if loadable: sections = (ALLOCATED_SECTION,) + sections with tempfile.NamedTemporaryFile() as f: if debug: - f.write(create_dwarf_file((), sections=sections, build_id=build_id)) + f.write(create_dwarf_file((), sections=sections, **kwargs)) else: - f.write(create_elf_file(ET.EXEC, sections=sections, build_id=build_id)) + f.write(create_elf_file(ET.EXEC, sections=sections, **kwargs)) f.flush() yield f @@ -378,11 +360,9 @@ def test_gnu_debugaltlink(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(alt_path, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), ) ) @@ -447,11 +427,9 @@ def test_gnu_debugaltlink_build_id_mismatch(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(alt_path, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), ) ) @@ -511,8 +489,8 @@ def test_gnu_debugaltlink_then_both(self): module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id with NamedTemporaryElfFile( - sections=(gnu_debugaltlink_section(alt_path, alt_build_id),), build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), ) as f1: module.try_file(f1.name) self.assertEqual( @@ -543,8 +521,8 @@ def test_gnu_debugaltlink_cancel(self): module = self.prog.extra_module(bin_dir / "binary", create=True)[0] module.build_id = build_id with NamedTemporaryElfFile( - sections=(gnu_debugaltlink_section(alt_path, alt_build_id),), build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), ) as f: module.try_file(f.name) self.assertEqual( @@ -1816,7 +1794,7 @@ def test_reuse_wanted_supplementary_debug_file(self): alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" with NamedTemporaryElfFile( - sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + gnu_debugaltlink=("alt.debug", alt_build_id), ) as f: module = self.prog.extra_module(f.name, create=True)[0] module.loaded_file_status = ModuleFileStatus.DONT_WANT @@ -1999,10 +1977,8 @@ def test_by_gnu_debuglink(self): loadable_path.write_bytes( create_elf_file( ET.EXEC, - sections=( - ALLOCATED_SECTION, - gnu_debuglink_section("binary.debug", crc), - ), + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc), ) ) @@ -2053,10 +2029,8 @@ def test_by_gnu_debuglink_absolute(self): loadable_path.write_bytes( create_elf_file( ET.EXEC, - sections=( - ALLOCATED_SECTION, - gnu_debuglink_section(debug_path, crc), - ), + sections=(ALLOCATED_SECTION,), + gnu_debuglink=(debug_path, crc), ) ) @@ -2082,10 +2056,8 @@ def test_by_gnu_debuglink_crc_mismatch(self): loadable_path.write_bytes( create_elf_file( ET.EXEC, - sections=( - ALLOCATED_SECTION, - gnu_debuglink_section("binary.debug", crc ^ 1), - ), + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc ^ 1), ) ) @@ -2136,10 +2108,8 @@ def test_gnu_debugaltlink_absolute(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(alt_path, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(alt_path, alt_build_id), ) ) @@ -2164,10 +2134,8 @@ def test_gnu_debugaltlink_not_found(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(debug_dir / "alt.debug", alt_build_id), - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debug_dir / "alt.debug", alt_build_id), ) ) @@ -2204,10 +2172,8 @@ def test_only_gnu_debugaltlink_absolute(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(alt_path, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(alt_path, alt_build_id), ) ) @@ -2238,10 +2204,8 @@ def test_only_gnu_debugaltlink_not_found(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(debug_dir / "alt.debug", alt_build_id), - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debug_dir / "alt.debug", alt_build_id), ) ) @@ -2284,11 +2248,10 @@ def test_gnu_debugaltlink_relative(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section( - Path(os.path.relpath(alt_path, bin_dir)), alt_build_id - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=( + Path(os.path.relpath(alt_path, bin_dir)), + alt_build_id, ), ) ) @@ -2326,10 +2289,8 @@ def test_gnu_debugaltlink_debug_directories(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(debugaltlink, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debugaltlink, alt_build_id), ) ) @@ -2360,11 +2321,9 @@ def test_gnu_debugaltlink_build_id_mismatch(self): binary_path.write_bytes( create_dwarf_file( (), - sections=( - ALLOCATED_SECTION, - gnu_debugaltlink_section(alt_path, alt_build_id), - ), + sections=(ALLOCATED_SECTION,), build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), ) ) @@ -2572,7 +2531,7 @@ def test_gnu_debugaltlink(self): loadable=False, debug=True, build_id=build_id, - sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + gnu_debugaltlink=("alt.debug", alt_build_id), ) as debug_file, NamedTemporaryElfFile( loadable=False, debug=True, build_id=alt_build_id ) as alt_f: @@ -2610,7 +2569,7 @@ def test_gnu_debugaltlink_not_found(self): loadable=False, debug=True, build_id=build_id, - sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + gnu_debugaltlink=("alt.debug", alt_build_id), ) as debug_file: self.server.build_ids[build_id] = { "executable": loadable_file.name, @@ -2644,7 +2603,7 @@ def test_only_gnu_debugaltlink(self): with NamedTemporaryElfFile( build_id=build_id, - sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + gnu_debugaltlink=("alt.debug", alt_build_id), ) as f, NamedTemporaryElfFile( loadable=False, debug=True, build_id=alt_build_id ) as alt_f: @@ -2673,7 +2632,7 @@ def test_only_gnu_debugaltlink_not_found(self): with NamedTemporaryElfFile( build_id=build_id, - sections=(gnu_debugaltlink_section("alt.debug", alt_build_id),), + gnu_debugaltlink=("alt.debug", alt_build_id), ) as f: module = self.prog.extra_module("foo", create=True)[0] module.try_file(f.name) From 4edc604d0baf99f7a3fe707c9aef0ac04eea32f9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 16:47:58 -0700 Subject: [PATCH 078/166] tests: test all DWARF reference forms We currently only test DW_FORM_ref4 and DW_FORM_ref_sig8. Let's add support for the rest (DW_FORM_ref{1,2,8,_udata,_addr}, not including supplementary file references) to dwarfwriter and test them. Signed-off-by: Omar Sandoval --- tests/assembler.py | 1 + tests/dwarfwriter.py | 51 +++++++++++++++++++++++++++++--------------- tests/test_dwarf.py | 26 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/tests/assembler.py b/tests/assembler.py index c1ac523e4..dedc996ed 100644 --- a/tests/assembler.py +++ b/tests/assembler.py @@ -5,6 +5,7 @@ def _append_uleb128(buf, value): + assert value >= 0 while True: byte = value & 0x7F value >>= 7 diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 0d3d91e69..0cbeb63bb 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -68,19 +68,16 @@ def aux(die): def _compile_debug_info(units, little_endian, bits, version, use_dw_form_indirect): offset_size = 4 # We only emit the 32-bit format for now. byteorder = "little" if little_endian else "big" - all_labels = set() labels = {} - relocations = [] + references = [] + unit_references = [] code = 1 decl_file = 1 def aux(buf, die, depth): if isinstance(die, DwarfLabel): - # For now, labels are only supported within a unit, but make sure - # they're unique across all units. - if die.name in all_labels: + if die.name in labels: raise ValueError(f"duplicate label {die.name!r}") - all_labels.add(die.name) labels[die.name] = len(buf) return @@ -120,9 +117,26 @@ def aux(buf, die, depth): elif attrib.form == DW_FORM.string: buf.extend(value.encode()) buf.append(0) + elif attrib.form == DW_FORM.ref1: + unit_references.append((len(buf), 1, value)) + buf.append(0) + elif attrib.form == DW_FORM.ref2: + unit_references.append((len(buf), 2, value)) + buf.extend(bytes(2)) elif attrib.form == DW_FORM.ref4: - relocations.append((len(buf), value)) - buf.extend(b"\0\0\0\0") + unit_references.append((len(buf), 4, value)) + buf.extend(bytes(4)) + elif attrib.form == DW_FORM.ref8: + unit_references.append((len(buf), 8, value)) + buf.extend(bytes(8)) + elif attrib.form == DW_FORM.ref_udata: + assert ( + value in labels + ), "DW_FORM_ref_udata can only be used for backreferences" + _append_uleb128(buf, labels[value] - unit_offset) + elif attrib.form == DW_FORM.ref_addr: + references.append((len(buf), offset_size, value)) + buf.extend(bytes(offset_size)) elif attrib.form == DW_FORM.ref_sig8: buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: @@ -142,14 +156,13 @@ def aux(buf, die, depth): debug_info = bytearray() debug_types = bytearray() for unit in units: - labels.clear() - relocations.clear() + unit_references.clear() decl_file = 1 if version == 4 and unit.type in (DW_UT.type, DW_UT.split_type): buf = debug_types else: buf = debug_info - orig_len = len(buf) + unit_offset = len(buf) buf.extend(b"\0\0\0\0") # unit_length buf.extend(version.to_bytes(2, byteorder)) # version if version >= 5: @@ -165,7 +178,7 @@ def aux(buf, die, depth): assert unit.dwo_id is None if unit.type in (DW_UT.type, DW_UT.split_type): buf.extend(unit.type_signature.to_bytes(8, byteorder)) # type_signature - relocations.append((len(buf), unit.type_offset)) + unit_references.append((len(buf), offset_size, unit.type_offset)) buf.extend(bytes(offset_size)) # type_offset else: assert unit.type_signature is None @@ -173,12 +186,16 @@ def aux(buf, die, depth): aux(buf, unit.die, 0) - unit_length = len(buf) - orig_len - 4 - buf[orig_len : orig_len + 4] = unit_length.to_bytes(4, byteorder) + unit_length = len(buf) - unit_offset - 4 + buf[unit_offset : unit_offset + 4] = unit_length.to_bytes(4, byteorder) + + for offset, size, label in unit_references: + die_offset = labels[label] - unit_offset + buf[offset : offset + size] = die_offset.to_bytes(size, byteorder) + + for offset, size, label in references: + buf[offset : offset + size] = labels[label].to_bytes(size, byteorder) - for offset, label in relocations: - die_offset = labels[label] - orig_len - buf[offset : offset + 4] = die_offset.to_bytes(4, byteorder) return debug_info, debug_types diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 85c046ca2..d9beed75f 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -344,6 +344,32 @@ def test_unknown_base_type_encoding(self): ) self.assertRaisesRegex(Exception, "unknown DWARF encoding", prog.type, "TEST") + def test_reference_forms(self): + for form in ( + DW_FORM.ref1, + DW_FORM.ref2, + DW_FORM.ref4, + DW_FORM.ref8, + DW_FORM.ref_udata, + DW_FORM.ref_addr, + ): + with self.subTest(form=form): + prog = dwarf_program( + ( + *labeled_int_die, + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, form, "int_die"), + ), + ), + ) + ) + self.assertIdentical( + prog.type("TEST").type, prog.int_type("int", 4, True) + ) + def test_int_type_byteorder(self): prog = dwarf_program( wrap_test_type_dies( From fb391b80bafdc847ff41e40091b9d00bf8a52428 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 16:54:51 -0700 Subject: [PATCH 079/166] tests: dwarfwriter: add optional label for top-level DIEs There's currently nowhere to put a DwarfLabel for the top-level DIE in a unit, which is necessary for testing imported units. Add a die_label attribute to DwarfUnit for this. Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 0cbeb63bb..214c29ac9 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -31,6 +31,7 @@ class DwarfDie(NamedTuple): class DwarfUnit(NamedTuple): type: DW_UT die: DwarfDie + die_label: Optional[str] = None dwo_id: Optional[int] = None type_signature: Optional[int] = None type_offset: Optional[str] = None @@ -184,6 +185,8 @@ def aux(buf, die, depth): assert unit.type_signature is None assert unit.type_offset is None + if unit.die_label is not None: + aux(buf, DwarfLabel(unit.die_label), 0) aux(buf, unit.die, 0) unit_length = len(buf) - unit_offset - 4 From 4797153cbc395c15e268db0681cbfb5ade10cba1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 Mar 2025 21:59:21 -0700 Subject: [PATCH 080/166] tests: dwarfwriter: add compile_dwarf() function that returns labels For testing .gnu_debugaltlink files, we need a way to reference DIEs from the .gnu_debugaltlink file in the main debug file. Add a function, compile_dwarf(), that returns the file data as well as a dictionary of labels (and can be extended to return more information in the future if needed). Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 214c29ac9..f4d7f2101 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -3,7 +3,7 @@ from collections import OrderedDict import os.path -from typing import Any, NamedTuple, Optional, Sequence, Union +from typing import Any, Dict, NamedTuple, Optional, Sequence, Union import zlib from _drgn_util.elf import ET, SHF, SHT @@ -199,7 +199,7 @@ def aux(buf, die, depth): for offset, size, label in references: buf[offset : offset + size] = labels[label].to_bytes(size, byteorder) - return debug_info, debug_types + return debug_info, debug_types, labels def _compile_debug_line(units, little_endian, bits, version): @@ -321,7 +321,12 @@ def collect_file_names(die): _UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) -def create_dwarf_file( +class DwarfResult(NamedTuple): + data: bytes + labels: Dict[str, int] + + +def compile_dwarf( units_or_dies, *, version=4, @@ -371,7 +376,7 @@ def create_dwarf_file( if not split: debug_line = _compile_debug_line(units, little_endian, bits, version) - debug_info, debug_types = _compile_debug_info( + debug_info, debug_types, labels = _compile_debug_info( units, little_endian, bits, version, use_dw_form_indirect ) @@ -405,10 +410,17 @@ def debug_section(name, data): if debug_types: dwarf_sections.append(debug_section(".debug_types", debug_types)) - return create_elf_file( - ET.EXEC, - sections=[*sections, *dwarf_sections], - little_endian=little_endian, - bits=bits, - **kwargs, + return DwarfResult( + data=create_elf_file( + ET.EXEC, + sections=[*sections, *dwarf_sections], + little_endian=little_endian, + bits=bits, + **kwargs, + ), + labels=labels, ) + + +def create_dwarf_file(*args, **kwargs): + return compile_dwarf(*args, **kwargs).data From 45628d08c5b724d3c21ae3d94e9e944cc4cc9677 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 23:24:57 -0700 Subject: [PATCH 081/166] tests: test invalid DW_AT_sibling handling Test out of bounds and backwards DW_AT_sibling references, which also tests error handling of the first indexing pass. This requires the ability to pass arbitrary reference offsets to dwarfwriter. Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 46 ++++++++++++++++++++++++++++++-------------- tests/test_dwarf.py | 28 ++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index f4d7f2101..91119afa7 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -119,25 +119,43 @@ def aux(buf, die, depth): buf.extend(value.encode()) buf.append(0) elif attrib.form == DW_FORM.ref1: - unit_references.append((len(buf), 1, value)) - buf.append(0) + if isinstance(value, str): + unit_references.append((len(buf), 1, value)) + buf.append(0) + else: + buf.extend(value.to_bytes(1, byteorder)) elif attrib.form == DW_FORM.ref2: - unit_references.append((len(buf), 2, value)) - buf.extend(bytes(2)) + if isinstance(value, str): + unit_references.append((len(buf), 2, value)) + buf.extend(bytes(2)) + else: + buf.extend(value.to_bytes(2, byteorder)) elif attrib.form == DW_FORM.ref4: - unit_references.append((len(buf), 4, value)) - buf.extend(bytes(4)) + if isinstance(value, str): + unit_references.append((len(buf), 4, value)) + buf.extend(bytes(4)) + else: + buf.extend(value.to_bytes(4, byteorder)) elif attrib.form == DW_FORM.ref8: - unit_references.append((len(buf), 8, value)) - buf.extend(bytes(8)) + if isinstance(value, str): + unit_references.append((len(buf), 8, value)) + buf.extend(bytes(8)) + else: + buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.ref_udata: - assert ( - value in labels - ), "DW_FORM_ref_udata can only be used for backreferences" - _append_uleb128(buf, labels[value] - unit_offset) + if isinstance(value, str): + assert ( + value in labels + ), "DW_FORM_ref_udata can only be used for backreferences" + _append_uleb128(buf, labels[value] - unit_offset) + else: + _append_uleb128(buf, value) elif attrib.form == DW_FORM.ref_addr: - references.append((len(buf), offset_size, value)) - buf.extend(bytes(offset_size)) + if isinstance(value, str): + references.append((len(buf), offset_size, value)) + buf.extend(bytes(offset_size)) + else: + buf.extend(value.to_bytes(offset_size, byteorder)) elif attrib.form == DW_FORM.ref_sig8: buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index d9beed75f..6e6001953 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -252,7 +252,7 @@ def wrapper(self): class TestInvalidDwarf(TestCase): def test_name_out_of_bounds(self): with self.assertRaisesRegex(Exception, "name is out of bounds"): - prog = dwarf_program( + "foo" in dwarf_program( DwarfDie( DW_TAG.base_type, ( @@ -261,8 +261,30 @@ def test_name_out_of_bounds(self): ), ) ) - # Force indexing. - "foo" in prog + + def test_sibling_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "DW_AT_sibling is out of bounds"): + "foo" in dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 0xDEADBEEF), + ), + ) + ) + + def test_sibling_points_backwards(self): + with self.assertRaisesRegex(Exception, "DW_AT_sibling points backwards"): + "foo" in dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref1, 0), + ), + ) + ) class TestTypes(TestCase): From e2b60e4b5d65a3bc646542659a19169090095e74 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Mar 2025 13:58:45 -0700 Subject: [PATCH 082/166] Fix task_cpu() and stack traces for CentOS/RHEL 9 kernels task_cpu() (which is also used to implement kernel stack traces) handles the two places where we might get the CPU number depending on the kernel version: task_thread_info(task)->cpu and task->cpu. Since modern kernels use the former, we optimize for that and check for task_thread_info(task)->cpu first. This works fine for all upstream mainline, release candidate, stable, and longterm kernels. However, CentOS Stream 9's kernel (based on 5.14) backported the patch that adds struct thread_info::cpu back without the patch that makes use of it, so we use the dummy task_thread_info(task)->cpu and always get 0. This is also the case for RHEL 9 and other derivatives, including Rocky Linux 9. Fix it by checking for task->cpu first. Fixes #479. Co-authored-by: Georges Aureau Signed-off-by: Georges Aureau [Omar: write commit message, update comment] Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel_helpers.c | 43 +++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 7a8ae0ac5..eb54cac41 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -320,25 +320,40 @@ struct drgn_error *linux_helper_task_cpu(const struct drgn_object *task, struct drgn_error *err; DRGN_OBJECT(tmp, drgn_object_program(task)); - // If CONFIG_THREAD_INFO_IN_TASK=y and since Linux kernel commit - // bcf9033e5449 ("sched: move CPU field back into thread_info if - // THREAD_INFO_IN_TASK=y") (in v5.16), the CPU is task->thread_info.cpu. + // The CPU may be task_thread_info(task)->cpu or task->cpu depending on + // the kernel version. If neither exists, then the kernel must be !SMP. // - // If CONFIG_THREAD_INFO_IN_TASK=y but before that commit, the cpu is - // task->cpu. + // Since Linux kernel commit bcf9033e5449 ("sched: move CPU field back + // into thread_info if THREAD_INFO_IN_TASK=y") (in v5.16), or if + // CONFIG_THREAD_INFO_IN_TASK=n, or before Linux kernel commit + // c65eacbe290b ("sched/core: Allow putting thread_info into + // task_struct") (in v4.9), the CPU is task_thread_info(task)->cpu. // - // If CONFIG_THREAD_INFO_IN_TASK=n or before Linux kernel commit + // Between Linux kernel commits bcf9033e5449 ("sched: move CPU field + // back into thread_info if THREAD_INFO_IN_TASK=y") (in v5.16) and // c65eacbe290b ("sched/core: Allow putting thread_info into - // task_struct") (in v4.9), the CPU is - // ((struct thread_info *)task->stack)->cpu. + // task_struct") (in v4.9), if CONFIG_THREAD_INFO_IN_TASK=y, then the + // CPU is task->cpu. // - // If none of those exist, then the kernel must be !SMP. - err = linux_helper_task_thread_info(&tmp, task); - if (err) - return err; - err = drgn_object_member_dereference(&tmp, &tmp, "cpu"); + // Note that between Linux kernel commit bcf9033e5449 ("sched: move CPU + // field back into thread_info if THREAD_INFO_IN_TASK=y") and commits + // 001430c1910d ("arm64: add CPU field to struct thread_info"), + // 5443f98fb9e0 ("x86: add CPU field to struct thread_info"), + // bd2e2632556a ("s390: add CPU field to struct thread_info"), and + // 227d735d889e ("powerpc: add CPU field to struct thread_info") (all in + // v5.16-rc1), if CONFIG_THREAD_INFO_IN_TASK=y, then + // struct thread_info::cpu may exist but task->cpu is still used. + // Therefore, we must check for task->cpu first. (Normally we don't care + // about commits in the middle of a release candidate, but CentOS Stream + // 9 and its derivatives apparently backported commit 5443f98fb9e0 + // without commit bcf9033e5449: + // https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/commit/6d09fbd042c8d99009e16ddba62af09c89358f80.) + err = drgn_object_member_dereference(&tmp, task, "cpu"); if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { - err = drgn_object_member_dereference(&tmp, task, "cpu"); + err = linux_helper_task_thread_info(&tmp, task); + if (err) + return err; + err = drgn_object_member_dereference(&tmp, &tmp, "cpu"); } if (!err) { union drgn_value value; From 72b4ecf001670b1458039993e3df9000f37ef54e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 10:10:12 -0700 Subject: [PATCH 083/166] libdrgn: dwarf_info: resolve address of inline member functions GCC and Clang both represent inline member functions that have an out-of-line instance with a declaration in the class scope, an abstract instance root at the top level that refers to the declaration with DW_AT_specification, and a concrete instance at the top level that refers to the abstract instance root with DW_AT_abstract_origin. To index this correctly, we therefore need to resolve two references. DWARF indexing can resolve both references: one if a DIE has DW_AT_declaration and another if it has DW_AT_inline. The problem for inline member functions is that the check for DW_AT_inline checks the original (declaration) DIE, which doesn't have DW_AT_inline. So, it only resolves the first reference but not the second. We could store an extra flag in the specification map to indicate whether a definition has DW_AT_inline and check that. However, that would require additional memory. Instead, this replaces the check for DW_AT_inline with a more general check for the absence of DW_AT_low_pc/DW_AT_ranges. This triggers the necessary second lookup for declaration DIEs while still working for the global inline function case. It will also work for functions in supplementary debug files in the future. The second lookup may be unnecessary in some cases, but this had no measurable performance difference. Fixes: a8e2e1719760 ("libdrgn: dwarf_info: resolve address of inline functions with out-of-line instances") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 34 +++++++++---- tests/test_dwarf.py | 114 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 11 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 31193ad3d..7f2e7e980 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -318,13 +318,8 @@ enum drgn_dwarf_index_abbrev_insn { * of flags combined with the drgn_dwarf_index_tag. */ INSN_DIE_FLAG_TAG_MASK = 0x1f, - /* - * DIE has a DW_AT_inline attribute (which may be DW_INL_not_inlined or - * DW_INL_declared_not_inlined). We use this to decide whether to look - * for a concrete out-of-line instance of an abstract instance root, so - * false positives are okay. - */ - INSN_DIE_FLAG_MAYBE_INLINED = 0x20, + /* DIE is DW_TAG_subprogram with no DW_AT_low_pc or DW_AT_ranges. */ + INSN_DIE_FLAG_SUBPROGRAM_NO_PC = 0x20, /* DIE is a declaration. */ INSN_DIE_FLAG_DECLARATION = 0x40, /* DIE has children. */ @@ -958,6 +953,8 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, should_index = false; break; } + if (tag == DW_TAG_subprogram) + die_flags |= INSN_DIE_FLAG_SUBPROGRAM_NO_PC; uint8_t children; if ((err = binary_buffer_next_u8(&buffer->bb, &children))) @@ -989,8 +986,8 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, err = dw_at_specification_to_insn(cu, &buffer->bb, form, &insn); } else { - if (tag == DW_TAG_subprogram && name == DW_AT_inline) - die_flags |= INSN_DIE_FLAG_MAYBE_INLINED; + if (name == DW_AT_low_pc || name == DW_AT_ranges) + die_flags &= ~INSN_DIE_FLAG_SUBPROGRAM_NO_PC; err = dw_form_to_insn(cu, &buffer->bb, form, &insn); } if (err) @@ -1675,7 +1672,24 @@ indirect_insn:; goto next; } - if (insn & INSN_DIE_FLAG_MAYBE_INLINED) { + // A subprogram DIE without an address may be the + // abstract instance root for an inlined function. Check + // for a concrete instance. + // + // Note that if the original DIE was a declaration, then + // this is technically checking whether the declaration + // itself has an address, not the definition. Since + // declarations don't have an address, this always does + // an extra lookup for definitions of declarations. + // + // The extra lookup is redundant for normal definitions, + // but we actually need it in the case that the + // definition is an abstract instance root (so we need + // to go from declaration -> abstract instance root -> + // concrete instance). Avoiding redundant lookups would + // require storing an extra flag in the specification + // map, which empirically isn't worth it. + if (insn & INSN_DIE_FLAG_SUBPROGRAM_NO_PC) { drgn_dwarf_find_definition(dbinfo, die_addr, &die_addr); } diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 6e6001953..4ac812c7a 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -4578,7 +4578,7 @@ def test_function_no_address(self): def test_function_concrete_out_of_line_instance(self): prog = dwarf_program( - wrap_test_type_dies( + ( *labeled_int_die, DwarfLabel("abstract_instance_root"), DwarfDie( @@ -4637,6 +4637,118 @@ def test_function_concrete_out_of_line_instance(self): ), ) + def test_member_function_specification(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "Foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ( + DwarfLabel("declaration"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "bar"), + DwarfAttrib( + DW_AT.declaration, DW_FORM.flag_present, True + ), + ), + ), + ), + ), + # This is how GCC and Clang do it: the declaration is in the + # correct scope, and the definition is at the top level. + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, "declaration"), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + lang=DW_LANG.C_plus_plus, + ) + self.assertIdentical( + prog["Foo::bar"], + Object( + prog, + prog.function_type( + prog.void_type(), + (), + ), + address=0x7FC3EB9B1C30, + ), + ) + self.assertNotIn("bar", prog) + + def test_member_function_concrete_out_of_line_instance(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "Foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ( + DwarfLabel("declaration"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "bar"), + DwarfAttrib( + DW_AT.declaration, DW_FORM.flag_present, True + ), + ), + ), + ), + ), + DwarfLabel("abstract_instance_root"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, "declaration"), + DwarfAttrib( + DW_AT.inline, DW_FORM.data1, DW_INL.declared_inlined + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib( + DW_AT.specification, DW_FORM.ref4, "abstract_instance_root" + ), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + lang=DW_LANG.C_plus_plus, + ) + self.assertIdentical( + prog["Foo::bar"], + Object( + prog, + prog.function_type( + prog.void_type(), + (), + ), + address=0x7FC3EB9B1C30, + ), + ) + self.assertNotIn("bar", prog) + def test_variable(self): prog = dwarf_program( wrap_test_type_dies( From 61088fb500b4e815e69833925dde9fa615f12dc9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 10:23:20 -0700 Subject: [PATCH 084/166] libdrgn: debug_info: check for supplementary debug file in drgn_module_find_dwarf_file() drgn_module_find_dwarf_file() is currently only used for finding scopes in stack traces and for finding split files, neither of which should resolve to a supplementary file, but let's cover it for future-proofing. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 45f9f28ef..87170f332 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -5558,6 +5558,9 @@ struct drgn_elf_file *drgn_module_find_dwarf_file(struct drgn_module *module, return NULL; if (dwarf == module->debug_file->_dwarf) return module->debug_file; + if (module->supplementary_debug_file + && dwarf == module->supplementary_debug_file->_dwarf) + return module->supplementary_debug_file; struct drgn_elf_file_dwarf_table_iterator it = drgn_elf_file_dwarf_table_search(&module->split_dwarf_files, &dwarf); From 61e2418138226104fa0a5e6bbdc19e98ff6161c4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 13:40:59 -0700 Subject: [PATCH 085/166] libdrgn: dwarf_info: update DWARF index in one OpenMP parallel region drgn_dwarf_index_update() currently splits its work between 3 OpenMP parallel regions. To index imported units, we need to share some temporary per-thread data between those regions. To simplify this, merge them into one region, which requires tweaking our (already quirky) error handling. This doesn't cause a measurable performance change. Also add comments for the many steps in drgn_dwarf_index_update(). Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 125 +++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 52 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 7f2e7e980..4f31234a0 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1831,6 +1831,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_init_num_threads(); + // Gather linked list of modules into a vector that we can parallelize. VECTOR(drgn_module_vector, modules); { struct drgn_module *module = dbinfo->modules_pending_indexing; @@ -1860,15 +1861,15 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) return &drgn_enomem; } - size_t old_cus_size = - drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); - struct drgn_error *err = NULL; + size_t new_cus_size; #pragma omp parallel num_threads(drgn_num_threads) { struct drgn_error *thread_err = NULL; - struct drgn_dwarf_index_cu_vector *cus; int thread_num = omp_get_thread_num(); + + // Enumerate CUs in new modules. + struct drgn_dwarf_index_cu_vector *cus; if (thread_num == 0) { cus = &dbinfo->dwarf.index_cus; } else { @@ -1876,7 +1877,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_dwarf_index_cu_vector_init(cus); } - #pragma omp for schedule(dynamic) + #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i < drgn_module_vector_size(&modules); i++) { if (thread_err) continue; @@ -1892,36 +1893,49 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_error_destroy(thread_err); else err = thread_err; + thread_err = NULL; } - } - if (err) - goto err; - - struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; - - size_t new_cus_size = drgn_dwarf_index_cu_vector_size(cus); - for (int i = 0; i < drgn_num_threads - 1; i++) - new_cus_size += drgn_dwarf_index_cu_vector_size(&threads[i].cus); - if (new_cus_size == old_cus_size) - return NULL; - - if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) { - for (int i = 0; i < drgn_num_threads - 1; i++) - drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); - err = &drgn_enomem; - goto err; - } + #pragma omp barrier - for (int i = 0; i < drgn_num_threads - 1; i++) { - drgn_dwarf_index_cu_vector_extend(cus, &threads[i].cus); - drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); - } + // Merge the per-thread CUs into dbinfo (and free them). + #pragma omp master + { + if (!err) { + new_cus_size = + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + for (int i = 0; i < drgn_num_threads - 1; i++) + new_cus_size += drgn_dwarf_index_cu_vector_size(&threads[i].cus); + + if (new_cus_size > dbinfo->dwarf.global.cus_indexed) { + if (drgn_dwarf_index_cu_vector_reserve(&dbinfo->dwarf.index_cus, + new_cus_size)) { + for (int i = 0; i < drgn_num_threads - 1; i++) { + drgn_dwarf_index_cu_vector_extend(&dbinfo->dwarf.index_cus, + &threads[i].cus); + drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + } + } else { + err = &drgn_enomem; + } + } + } + if (err) { + for (int i = 0; i < drgn_num_threads - 1; i++) + drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + // If there was an error, we'd like to avoid + // doing any more work, but we can't break out + // of an OpenMP parallel region. Set the number + // of CUs to the old number so the remaining + // loops are essentially no-ops. + new_cus_size = dbinfo->dwarf.global.cus_indexed; + drgn_dwarf_index_cu_vector_resize(&dbinfo->dwarf.index_cus, + new_cus_size); + } + } + #pragma omp barrier - #pragma omp parallel num_threads(drgn_num_threads) - { - struct drgn_error *thread_err = NULL; + // Do the first indexing pass. struct drgn_dwarf_specification_map *specifications; - int thread_num = omp_get_thread_num(); if (thread_num == 0) { specifications = &dbinfo->dwarf.specifications; } else { @@ -1929,13 +1943,13 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_dwarf_specification_map_init(specifications); } - #pragma omp for schedule(dynamic) + #pragma omp for schedule(dynamic) nowait for (size_t i = dbinfo->dwarf.global.cus_indexed; - i < drgn_dwarf_index_cu_vector_size(cus); i++) { + i < new_cus_size; i++) { if (thread_err) continue; struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_at(cus, i); + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); thread_err = read_cu(cu); if (!thread_err) { struct drgn_dwarf_index_cu_buffer buffer; @@ -1951,22 +1965,28 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) drgn_error_destroy(thread_err); else err = thread_err; + thread_err = NULL; } - } - for (int i = 0; i < drgn_num_threads - 1; i++) { - err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, - &threads[i].specifications, - err); - } - if (err) - goto err; + #pragma omp barrier - #pragma omp parallel num_threads(drgn_num_threads) - { - struct drgn_error *thread_err = NULL; + // Merge the per-thread specification maps into dbinfo (and free + // them). + #pragma omp master + { + for (int i = 0; i < drgn_num_threads - 1; i++) { + err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, + &threads[i].specifications, + err); + } + // Same error handling trick as above. + if (err) + new_cus_size = dbinfo->dwarf.global.cus_indexed; + } + #pragma omp barrier + + // Do the second indexing pass. struct drgn_dwarf_index_die_map *map; struct drgn_dwarf_base_type_map *base_types; - int thread_num = omp_get_thread_num(); if (thread_num == 0) { map = dbinfo->dwarf.global.map; base_types = &dbinfo->dwarf.base_types; @@ -1980,11 +2000,11 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) #pragma omp for schedule(dynamic) for (size_t i = dbinfo->dwarf.global.cus_indexed; - i < drgn_dwarf_index_cu_vector_size(cus); i++) { + i < new_cus_size; i++) { if (thread_err) continue; struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_at(cus, i); + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); struct drgn_dwarf_index_cu_buffer buffer; drgn_dwarf_index_cu_buffer_init(&buffer, cu); buffer.bb.pos += cu_header_size(cu); @@ -1992,6 +2012,8 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) base_types, &buffer); } + // Merge the per-thread DIE and base type maps into dbinfo (and + // free them). #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i <= array_size(dbinfo->dwarf.global.map); i++) { if (i < array_size(dbinfo->dwarf.global.map)) { @@ -2020,16 +2042,15 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) } if (err) { -err: dbinfo->dwarf.global.saved_err = err; return drgn_error_copy(err); } - qsort(drgn_dwarf_index_cu_vector_begin(cus), - drgn_dwarf_index_cu_vector_size(cus), + qsort(drgn_dwarf_index_cu_vector_begin(&dbinfo->dwarf.index_cus), + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), sizeof(struct drgn_dwarf_index_cu), drgn_dwarf_index_cu_cmp); dbinfo->modules_pending_indexing = NULL; dbinfo->dwarf.global.cus_indexed = - drgn_dwarf_index_cu_vector_size(cus); + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); return NULL; } From f8c0a79ba1f68cb313df2008399b37556ccc30c9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 13:50:31 -0700 Subject: [PATCH 086/166] libdrgn: dwarf_info: read all abbreviation tables before indexing CUs During DWARF indexing, we currently read CU abbreviation tables together with the first indexing pass. This means that while we're indexing one CU, some other CU's abbreviation tables may not have been read yet. However, when we index imported units, we need the imported unit's abbreviation table to already be read. The easiest way to ensure this is to read all of them in a pass before the first indexing pass. This doesn't cause a measurable performance change. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 4f31234a0..03fd15426 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1934,6 +1934,37 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) } #pragma omp barrier + // Read the abbreviation tables of new CUs. + #pragma omp for schedule(dynamic) nowait + for (size_t i = dbinfo->dwarf.global.cus_indexed; + i < drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + i++) { + if (thread_err) + continue; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + thread_err = read_cu(cu); + } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + { + if (err) + drgn_error_destroy(thread_err); + else + err = thread_err; + // Same error handling trick as above, except + // that we can't resize the vector anymore for a + // couple of reasons: the CUs now need to be + // properly deinitialized by + // drgn_dwarf_index_cu_deinit(), and we can't + // change the iteration count of the above loop + // while it is running on other threads. + new_cus_size = dbinfo->dwarf.global.cus_indexed; + } + thread_err = NULL; + } + #pragma omp barrier + // Do the first indexing pass. struct drgn_dwarf_specification_map *specifications; if (thread_num == 0) { @@ -1950,14 +1981,11 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) continue; struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); - thread_err = read_cu(cu); - if (!thread_err) { - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); - thread_err = index_cu_first_pass(specifications, - &buffer); - } + struct drgn_dwarf_index_cu_buffer buffer; + drgn_dwarf_index_cu_buffer_init(&buffer, cu); + buffer.bb.pos += cu_header_size(cu); + thread_err = index_cu_first_pass(specifications, + &buffer); } if (thread_err) { #pragma omp critical(drgn_dwarf_info_update_index_error) From 5efb0de4447f0e29105dd30bf0a33db064edd816 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 14:29:50 -0700 Subject: [PATCH 087/166] libdrgn: dwarf_info: create separate CU lookup table Currently, we use the drgn_dwarf_info::index_cus array to look up CUs by DIE address. This requires re-sorting the array each time the global namespace index is updated. We do this sort as the final step of indexing. In order to index imported units (e.g., for gnu_debugaltlink), we need to look up CUs during indexing, which means that we need the array sorted sooner. But, we also want to loop over only the new CUs during indexing, which doesn't work if sorting mixes them among the old CUs. Resolve this by adding a separate array, index_cu_lookup, that is sorted by address so that we don't have to reorder index_cus. The extra memory usage is negligible (for the Linux kernel on my laptop, an extra ~80K on top of ~180MB of anonymous memory). We also avoid a startup time regression because it's slightly faster to sort this smaller array, and we're able to update it in parallel with reading CUs. It might also be slightly faster to search this smaller array, although I didn't try measuring that. This also brings back realloc_array() that we got rid of back in commit a95e42ef2e47 ("libdrgn/python: use vector for Program_load_debug_info()"). Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 65 +++++++++++++++++++++++++++++++++++--------- libdrgn/dwarf_info.h | 6 ++++ libdrgn/util.h | 12 ++++++++ 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 03fd15426..cf85884eb 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -137,6 +137,14 @@ struct drgn_dwarf_index_cu { Dwarf_CU *libdw_cu; }; +/** Indexed CU lookup table entry. */ +struct drgn_dwarf_index_cu_lookup { + /** Address of CU data (@ref drgn_dwarf_index_cu::buf). */ + uintptr_t buf; + /** Index of CU in @ref drgn_dwarf_info::index_cus. */ + size_t index; +}; + DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector); DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); @@ -184,6 +192,7 @@ void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) dbinfo->dwarf.global.parent = NULL; drgn_dwarf_base_type_map_init(&dbinfo->dwarf.base_types); drgn_dwarf_specification_map_init(&dbinfo->dwarf.specifications); + free(dbinfo->dwarf.index_cu_lookup); drgn_dwarf_index_cu_vector_init(&dbinfo->dwarf.index_cus); drgn_dwarf_type_map_init(&dbinfo->dwarf.types); drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); @@ -1718,10 +1727,10 @@ indirect_insn:; return NULL; } -static inline int drgn_dwarf_index_cu_cmp(const void *_a, const void *_b) +static inline int drgn_dwarf_index_cu_lookup_cmp(const void *_a, const void *_b) { - uintptr_t a = (uintptr_t)((struct drgn_dwarf_index_cu *)_a)->buf; - uintptr_t b = (uintptr_t)((struct drgn_dwarf_index_cu *)_b)->buf; + uintptr_t a = ((struct drgn_dwarf_index_cu_lookup *)_a)->buf; + uintptr_t b = ((struct drgn_dwarf_index_cu_lookup *)_b)->buf; return (a > b) - (a < b); } @@ -1729,16 +1738,21 @@ static inline int drgn_dwarf_index_cu_cmp(const void *_a, const void *_b) static struct drgn_dwarf_index_cu * drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) { - struct drgn_dwarf_index_cu *cus = - drgn_dwarf_index_cu_vector_begin(&dbinfo->dwarf.index_cus); - #define less_than_cu_buf(a, b) (*(a) < (uintptr_t)(b)->buf) - size_t i = binary_search_gt(cus, + struct drgn_dwarf_index_cu_lookup *lookup = + dbinfo->dwarf.index_cu_lookup; + #define less_than_cu_lookup_buf(a, b) (*(a) < (b)->buf) + size_t i = binary_search_gt(lookup, drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), - &die_addr, less_than_cu_buf); + &die_addr, less_than_cu_lookup_buf); #undef less_than_cu_buf - if (i == 0 || die_addr - (uintptr_t)cus[i - 1].buf >= cus[i - 1].len) + if (i == 0) return NULL; - return &cus[i - 1]; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, + lookup[i - 1].index); + if (die_addr - lookup[i - 1].buf >= cu->len) + return NULL; + return cu; } // If there wasn't already an error, merge src into dst, and return an error if @@ -1934,6 +1948,34 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) } #pragma omp barrier + // Update the CU lookup table. This can be done by one thread in + // parallel with reading CUs. + #pragma omp master + if (drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus) + > dbinfo->dwarf.global.cus_indexed) { + struct drgn_dwarf_index_cu_lookup *lookup = + realloc_array(dbinfo->dwarf.index_cu_lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + sizeof(lookup[0])); + if (lookup) { + dbinfo->dwarf.index_cu_lookup = lookup; + for (size_t i = dbinfo->dwarf.global.cus_indexed; + i < drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + i++) { + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + lookup[i].buf = (uintptr_t)cu->buf; + lookup[i].index = i; + } + qsort(lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + sizeof(lookup[0]), + drgn_dwarf_index_cu_lookup_cmp); + } else { + thread_err = &drgn_enomem; + } + } + // Read the abbreviation tables of new CUs. #pragma omp for schedule(dynamic) nowait for (size_t i = dbinfo->dwarf.global.cus_indexed; @@ -2073,9 +2115,6 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) dbinfo->dwarf.global.saved_err = err; return drgn_error_copy(err); } - qsort(drgn_dwarf_index_cu_vector_begin(&dbinfo->dwarf.index_cus), - drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), - sizeof(struct drgn_dwarf_index_cu), drgn_dwarf_index_cu_cmp); dbinfo->modules_pending_indexing = NULL; dbinfo->dwarf.global.cus_indexed = drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h index 50f51dfc6..90576aea5 100644 --- a/libdrgn/dwarf_info.h +++ b/libdrgn/dwarf_info.h @@ -196,6 +196,12 @@ struct drgn_dwarf_info { struct drgn_dwarf_specification_map specifications; /** Indexed compilation units. */ struct drgn_dwarf_index_cu_vector index_cus; + /** + * Lookup table for indexed compilation units sorted on buffer address. + * + * Size is equal to that of @ref index_cus. + */ + struct drgn_dwarf_index_cu_lookup *index_cu_lookup; /** * Cache of parsed types. diff --git a/libdrgn/util.h b/libdrgn/util.h index 7cd890598..07297a880 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -133,6 +133,18 @@ static inline void *malloc64(uint64_t size) return malloc(size); } +// glibc added reallocarray() in 2.26, but since it's so trivial, it's easier to +// duplicate it here than it is to do feature detection. +static inline void *realloc_array(void *ptr, size_t nmemb, size_t size) +{ + size_t bytes; + if (__builtin_mul_overflow(nmemb, size, &bytes)) { + errno = ENOMEM; + return NULL; + } + return realloc(ptr, bytes); +} + static inline void *memdup(const void *ptr, size_t size) { void *copy = malloc(size); From 8d838e157baece740ae84878eacbf1232d62699a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 15:54:35 -0700 Subject: [PATCH 088/166] libdrgn: dwarf_info: don't index partial units The contents of partial units should not be visible unless they are imported into a full compilation unit. Place partial units after full compilation units in drgn_dwarf_info::index_cus so we can skip indexing them. This will be especially important for .gnu_debugaltlink files. This doesn't cause a measurable performance change. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 119 ++++++++++++++++++++++++++++++++++--------- tests/dwarfwriter.py | 2 +- tests/test_dwarf.py | 36 +++++++++++++ 3 files changed, 133 insertions(+), 24 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index cf85884eb..9b8f43352 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -386,12 +386,14 @@ static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) static struct drgn_error * drgn_dwarf_index_read_file(struct drgn_elf_file *file, - struct drgn_dwarf_index_cu_vector *cus); + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units); static struct drgn_error * drgn_dwarf_index_read_cus(struct drgn_elf_file *file, enum drgn_section_index scn, - struct drgn_dwarf_index_cu_vector *cus) + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units) { struct drgn_error *err; @@ -448,7 +450,8 @@ drgn_dwarf_index_read_cus(struct drgn_elf_file *file, if (err) return err; err = drgn_dwarf_index_read_file(split_file, - cus); + cus, + partial_units); if (err) return err; } @@ -473,8 +476,17 @@ drgn_dwarf_index_read_cus(struct drgn_elf_file *file, abbrev_offset += dwp_offset; } #else - unit_type = (scn == DRGN_SCN_DEBUG_TYPES - ? DW_UT_type : DW_UT_compile); + switch (dwarf_tag(&cudie)) { + case DW_TAG_type_unit: + unit_type = DW_UT_type; + break; + case DW_TAG_partial_unit: + unit_type = DW_UT_partial; + break; + default: + unit_type = DW_UT_compile; + break; + } #endif if (!elf_data_contains_ptr(file->scn_data[scn], @@ -558,7 +570,8 @@ drgn_dwarf_index_read_cus(struct drgn_elf_file *file, } struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_append_entry(cus); + drgn_dwarf_index_cu_vector_append_entry(unit_type == DW_UT_partial + ? partial_units : cus); if (!cu) return &drgn_enomem; *cu = (struct drgn_dwarf_index_cu){ @@ -582,7 +595,8 @@ drgn_dwarf_index_read_cus(struct drgn_elf_file *file, static struct drgn_error * drgn_dwarf_index_read_file(struct drgn_elf_file *file, - struct drgn_dwarf_index_cu_vector *cus) + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units) { struct drgn_error *err; @@ -611,10 +625,11 @@ drgn_dwarf_index_read_file(struct drgn_elf_file *file, } } - err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus); + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus, + partial_units); if (!err && file->scns[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, - cus); + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, cus, + partial_units); } return err; } @@ -1755,6 +1770,38 @@ drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) return cu; } +static void +drgn_dwarf_index_cus_merge_partial(struct drgn_dwarf_index_cu_vector *dst, + struct drgn_dwarf_index_cu_vector *src_partial, + size_t *partial_pos) +{ + if (!drgn_dwarf_index_cu_vector_empty(src_partial)) { + memcpy(drgn_dwarf_index_cu_vector_at(dst, *partial_pos), + drgn_dwarf_index_cu_vector_begin(src_partial), + drgn_dwarf_index_cu_vector_size(src_partial) + * sizeof(struct drgn_dwarf_index_cu)); + *partial_pos += drgn_dwarf_index_cu_vector_size(src_partial); + } + drgn_dwarf_index_cu_vector_deinit(src_partial); +} + +static void +drgn_dwarf_index_cus_merge(struct drgn_dwarf_index_cu_vector *dst, + struct drgn_dwarf_index_cu_vector *src, + struct drgn_dwarf_index_cu_vector *src_partial, + size_t *pos, size_t *partial_pos) +{ + if (!drgn_dwarf_index_cu_vector_empty(src)) { + memcpy(drgn_dwarf_index_cu_vector_at(dst, *pos), + drgn_dwarf_index_cu_vector_begin(src), + drgn_dwarf_index_cu_vector_size(src) + * sizeof(struct drgn_dwarf_index_cu)); + *pos += drgn_dwarf_index_cu_vector_size(src); + } + drgn_dwarf_index_cu_vector_deinit(src); + drgn_dwarf_index_cus_merge_partial(dst, src_partial, partial_pos); +} + // If there wasn't already an error, merge src into dst, and return an error if // that fails. If there was already an error, return the original error. Free // src whether or not there was an error. @@ -1860,7 +1907,10 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) // the dbinfo directly. These are merged into the dbinfo and freed. _cleanup_free_ union { // For reading modules. - struct drgn_dwarf_index_cu_vector cus; + struct { + struct drgn_dwarf_index_cu_vector cus; + struct drgn_dwarf_index_cu_vector partial_units; + }; // For first pass. struct drgn_dwarf_specification_map specifications; // For second pass. @@ -1875,6 +1925,9 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) return &drgn_enomem; } + // Thread 0 needs its own temporary partial_units vector. + struct drgn_dwarf_index_cu_vector partial_units0; + struct drgn_error *err = NULL; size_t new_cus_size; #pragma omp parallel num_threads(drgn_num_threads) @@ -1883,13 +1936,16 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) int thread_num = omp_get_thread_num(); // Enumerate CUs in new modules. - struct drgn_dwarf_index_cu_vector *cus; + struct drgn_dwarf_index_cu_vector *cus, *partial_units; if (thread_num == 0) { cus = &dbinfo->dwarf.index_cus; + partial_units = &partial_units0; } else { cus = &threads[thread_num - 1].cus; + partial_units = &threads[thread_num - 1].partial_units; drgn_dwarf_index_cu_vector_init(cus); } + drgn_dwarf_index_cu_vector_init(partial_units); #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i < drgn_module_vector_size(&modules); i++) { @@ -1899,7 +1955,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) *drgn_module_vector_at(&modules, i); thread_err = drgn_dwarf_index_read_file(module->debug_file, - cus); + cus, partial_units); } if (thread_err) { #pragma omp critical(drgn_dwarf_info_update_index_error) @@ -1911,22 +1967,36 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) } #pragma omp barrier - // Merge the per-thread CUs into dbinfo (and free them). + // Merge the per-thread CUs into dbinfo (and free them). Partial + // units are placed at the end and excluded from new_cus_size so + // that they are not indexed. #pragma omp master { if (!err) { - new_cus_size = + size_t cus_pos = new_cus_size = drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); - for (int i = 0; i < drgn_num_threads - 1; i++) + size_t new_partial_units = + drgn_dwarf_index_cu_vector_size(&partial_units0); + for (int i = 0; i < drgn_num_threads - 1; i++) { new_cus_size += drgn_dwarf_index_cu_vector_size(&threads[i].cus); + new_partial_units += drgn_dwarf_index_cu_vector_size(&threads[i].partial_units); + } - if (new_cus_size > dbinfo->dwarf.global.cus_indexed) { - if (drgn_dwarf_index_cu_vector_reserve(&dbinfo->dwarf.index_cus, - new_cus_size)) { + if (new_cus_size + new_partial_units + > dbinfo->dwarf.global.cus_indexed) { + if (drgn_dwarf_index_cu_vector_resize(&dbinfo->dwarf.index_cus, + new_cus_size + + new_partial_units)) { + size_t partial_pos = new_cus_size; + drgn_dwarf_index_cus_merge_partial(&dbinfo->dwarf.index_cus, + &partial_units0, + &partial_pos); for (int i = 0; i < drgn_num_threads - 1; i++) { - drgn_dwarf_index_cu_vector_extend(&dbinfo->dwarf.index_cus, - &threads[i].cus); - drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + drgn_dwarf_index_cus_merge(&dbinfo->dwarf.index_cus, + &threads[i].cus, + &threads[i].partial_units, + &cus_pos, + &partial_pos); } } else { err = &drgn_enomem; @@ -1934,8 +2004,11 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) } } if (err) { - for (int i = 0; i < drgn_num_threads - 1; i++) + for (int i = 0; i < drgn_num_threads - 1; i++) { + drgn_dwarf_index_cu_vector_deinit(&threads[i].partial_units); drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + } + drgn_dwarf_index_cu_vector_deinit(&partial_units0); // If there was an error, we'd like to avoid // doing any more work, but we can't break out // of an OpenMP parallel region. Set the number diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 91119afa7..7a1fc0806 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -336,7 +336,7 @@ def collect_file_names(die): return buf -_UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) +_UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit, DW_TAG.partial_unit}) class DwarfResult(NamedTuple): diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 4ac812c7a..f32adb80d 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -7312,3 +7312,39 @@ def test_dwo5_id_mismatch(self): for output in log.output ) ) + + +class TestImportedUnit(TestCase): + def test_unused_partial_unit(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.void_type()) + self.assertRaises(LookupError, prog.type, "UNUSED") From a22c752ffee90ec481f5d5c8f852589877ebcb0b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 10:30:27 -0700 Subject: [PATCH 089/166] libdrgn: dwarf_info: read units of supplementary debug files This will be needed to handle units imported from supplementary files. As far as I know, supplementary files only contain partial units, but if there are full units in there, it also makes sense to index them. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 9b8f43352..6ed738a0c 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -608,28 +608,26 @@ drgn_dwarf_index_read_file(struct drgn_elf_file *file, return err; } } - struct drgn_elf_file *supplementary_file = - file->module->supplementary_debug_file; - if (supplementary_file) { - err = drgn_elf_file_read_section(supplementary_file, - DRGN_SCN_DEBUG_INFO, - &file->alt_debug_info_data); + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus, + partial_units); + if (err) + return err; + if (file->scns[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, + cus, partial_units); if (err) return err; - if (supplementary_file->scns[DRGN_SCN_DEBUG_STR]) { - err = drgn_elf_file_read_section(supplementary_file, - DRGN_SCN_DEBUG_STR, - &file->alt_debug_str_data); - if (err) - return err; - } } - - err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus, - partial_units); - if (!err && file->scns[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, cus, - partial_units); + if (file == file->module->debug_file + && file->module->supplementary_debug_file) { + err = drgn_dwarf_index_read_file(file->module->supplementary_debug_file, + cus, partial_units); + if (err) + return err; + file->alt_debug_info_data = + file->module->supplementary_debug_file->scn_data[DRGN_SCN_DEBUG_INFO]; + file->alt_debug_str_data = + file->module->supplementary_debug_file->scn_data[DRGN_SCN_DEBUG_STR]; } return err; } From 6c5ec7a3bfc3f27341803954f0848c6f6d296164 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 16:03:37 -0700 Subject: [PATCH 090/166] libdrgn: dwarf_info: bounds check specifications while indexing We currently skip bounds checking DW_AT_specification and DW_AT_abstract_origin references because it doesn't matter whether they're in bounds: we only use them as a hash table key. However, we're going to reuse INSN_SPECIFICATION_* for DW_AT_import in DW_TAG_imported_unit, and that will actually need to use the reference. Add bounds checks and replace the uintptr_t specification with a const char * now that it's guaranteed to be a valid pointer. This doesn't cause a measurable performance change. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 6ed738a0c..125f9962b 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1147,7 +1147,6 @@ index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, { struct drgn_error *err; struct drgn_dwarf_index_cu *cu = buffer->cu; - const char *debug_info_buffer = cu->file->scn_data[cu->scn]->d_buf; unsigned int depth = 0; for (;;) { uintptr_t die_addr = (uintptr_t)buffer->bb.pos; @@ -1168,7 +1167,7 @@ index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; bool declaration = false; - uintptr_t specification = 0; + const char *specification = NULL; const char *sibling = NULL; uint8_t insn; uint8_t extra_die_flags = 0; @@ -1294,7 +1293,11 @@ indirect_insn:; &tmp))) return err; specification: - specification = (uintptr_t)cu->buf + tmp; + if (tmp >= cu->len) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = cu->buf + tmp; break; case INSN_SPECIFICATION_REF_ADDR4: if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, @@ -1306,7 +1309,12 @@ indirect_insn:; &tmp))) return err; specification_ref_addr: - specification = (uintptr_t)debug_info_buffer + tmp; + if (tmp >= cu->file->scn_data[cu->scn]->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->scn_data[cu->scn]->d_buf + + tmp; break; case INSN_SPECIFICATION_REF_ALT4: if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, @@ -1318,8 +1326,12 @@ indirect_insn:; &tmp))) return err; specification_ref_alt: - specification = ((uintptr_t)cu->file->alt_debug_info_data->d_buf - + tmp); + if (tmp >= cu->file->alt_debug_info_data->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->alt_debug_info_data->d_buf + + tmp; break; case INSN_INDIRECT: case INSN_SIBLING_INDIRECT: @@ -1356,7 +1368,8 @@ indirect_insn:; */ if (!declaration && !index_specification(specifications, - specification, die_addr)) + (uintptr_t)specification, + die_addr)) return &drgn_enomem; } From ef5bda4a35b6fae0531cb87e8dda9d65a44876d3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 Mar 2025 16:03:52 -0700 Subject: [PATCH 091/166] libdrgn: dwarf_info: parse specifications in second indexing pass This will be needed for indexing imported units. No functional change other than bounds checking that wasn't there before. No measurable performance change. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 78 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 125f9962b..12ef6116b 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1479,7 +1479,7 @@ index_cu_second_pass(struct drgn_debug_info *dbinfo, uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; const char *name = NULL; bool declaration = false; - bool specification = false; + const char *specification = NULL; const char *sibling = NULL; uint8_t insn; uint8_t extra_die_flags = 0; @@ -1507,9 +1507,6 @@ indirect_insn:; &skip))) return err; goto skip; - case INSN_SPECIFICATION_REF_UDATA: - specification = true; - fallthrough; case INSN_SKIP_LEB128: if ((err = binary_buffer_skip_leb128(&buffer->bb))) return err; @@ -1628,25 +1625,70 @@ indirect_insn:; break; } case INSN_SPECIFICATION_REF1: - specification = true; - skip = 1; - goto skip; + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF2: - specification = true; - skip = 2; - goto skip; + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF4: - case INSN_SPECIFICATION_REF_ADDR4: - case INSN_SPECIFICATION_REF_ALT4: - specification = true; - skip = 4; - goto skip; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +specification: + if (tmp >= cu->len) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = cu->buf + tmp; + break; + case INSN_SPECIFICATION_REF_ADDR4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_addr; case INSN_SPECIFICATION_REF_ADDR8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_addr: + if (tmp >= cu->file->scn_data[cu->scn]->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->scn_data[cu->scn]->d_buf + + tmp; + break; + case INSN_SPECIFICATION_REF_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_alt; case INSN_SPECIFICATION_REF_ALT8: - specification = true; - skip = 8; - goto skip; + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_alt: + if (tmp >= cu->file->alt_debug_info_data->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->alt_debug_info_data->d_buf + + tmp; + break; case INSN_INDIRECT: case INSN_SIBLING_INDIRECT: case INSN_NAME_INDIRECT: From b53407898c4ad019f8361e383be3ab2462317dc1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 10:35:27 -0700 Subject: [PATCH 092/166] libdrgn: dwarf_info: move drgn_dwarf_index_find_cu() code No code changes other than moving it in the file to make upcoming diffs cleaner. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 12ef6116b..7e1541a32 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -375,6 +375,27 @@ drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, buffer->cu = cu; } +// Returns NULL if die_addr is not from an indexed CU. +static struct drgn_dwarf_index_cu * +drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) +{ + struct drgn_dwarf_index_cu_lookup *lookup = + dbinfo->dwarf.index_cu_lookup; + #define less_than_cu_lookup_buf(a, b) (*(a) < (b)->buf) + size_t i = binary_search_gt(lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + &die_addr, less_than_cu_lookup_buf); + #undef less_than_cu_buf + if (i == 0) + return NULL; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, + lookup[i - 1].index); + if (die_addr - lookup[i - 1].buf >= cu->len) + return NULL; + return cu; +} + static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) { Dwarf_Attribute attr_mem, *attr; @@ -1802,27 +1823,6 @@ static inline int drgn_dwarf_index_cu_lookup_cmp(const void *_a, const void *_b) return (a > b) - (a < b); } -// Returns NULL if die_addr is not from an indexed CU. -static struct drgn_dwarf_index_cu * -drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) -{ - struct drgn_dwarf_index_cu_lookup *lookup = - dbinfo->dwarf.index_cu_lookup; - #define less_than_cu_lookup_buf(a, b) (*(a) < (b)->buf) - size_t i = binary_search_gt(lookup, - drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), - &die_addr, less_than_cu_lookup_buf); - #undef less_than_cu_buf - if (i == 0) - return NULL; - struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, - lookup[i - 1].index); - if (die_addr - lookup[i - 1].buf >= cu->len) - return NULL; - return cu; -} - static void drgn_dwarf_index_cus_merge_partial(struct drgn_dwarf_index_cu_vector *dst, struct drgn_dwarf_index_cu_vector *src_partial, From 790f79e826e7e24d22434c11404a2058d404946f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 10:42:18 -0700 Subject: [PATCH 093/166] libdrgn: dwarf_info: use //-style comments in enum drgn_dwarf_index_abbrev_insn Just to make it less awkward to use //-style comments in upcoming changes. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 7e1541a32..f411a657f 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -268,13 +268,11 @@ static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) * friendly), which is important for the tight DIE parsing loop. */ enum drgn_dwarf_index_abbrev_insn { - /* - * Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to - * be skipped over. - */ + // Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to + // be skipped over. INSN_MAX_SKIP = 219, - /* These instructions indicate an attribute that can be skipped over. */ + // These instructions indicate an attribute that can be skipped over. INSN_SKIP_BLOCK, INSN_SKIP_BLOCK1, INSN_SKIP_BLOCK2, @@ -282,7 +280,7 @@ enum drgn_dwarf_index_abbrev_insn { INSN_SKIP_LEB128, INSN_SKIP_STRING, - /* These instructions indicate an attribute that should be parsed. */ + // These instructions indicate an attribute that should be parsed. INSN_SIBLING_REF1, INSN_SIBLING_REF2, INSN_SIBLING_REF4, @@ -316,22 +314,18 @@ enum drgn_dwarf_index_abbrev_insn { NUM_INSNS, - /* - * Every sequence of instructions for a DIE is terminated by a zero - * byte. - */ + // Every sequence of instructions for a DIE is terminated by a zero + // byte. INSN_END = 0, - /* - * The byte after INSN_END contains the DIE flags, which are a bitmask - * of flags combined with the drgn_dwarf_index_tag. - */ + // The byte after INSN_END contains the DIE flags, which are a bitmask + // of flags combined with the drgn_dwarf_index_tag. INSN_DIE_FLAG_TAG_MASK = 0x1f, - /* DIE is DW_TAG_subprogram with no DW_AT_low_pc or DW_AT_ranges. */ + // DIE is DW_TAG_subprogram with no DW_AT_low_pc or DW_AT_ranges. INSN_DIE_FLAG_SUBPROGRAM_NO_PC = 0x20, - /* DIE is a declaration. */ + // DIE is a declaration. INSN_DIE_FLAG_DECLARATION = 0x40, - /* DIE has children. */ + // DIE has children. INSN_DIE_FLAG_CHILDREN = 0x80, }; @@ -340,7 +334,7 @@ enum drgn_dwarf_index_abbrev_insn { static_assert((int)DRGN_DWARF_INDEX_NUM_TAGS < (int)INSN_DIE_FLAG_TAG_MASK, "too many instruction DIE tags"); -/* Instructions are 8 bits. */ +// Instructions are 8 bits. static_assert(NUM_INSNS - 1 == UINT8_MAX, "maximum DWARF index instruction is invalid"); From 83d40a5947a53a8c5b9646958c3cfcb802fa3d7d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 11:27:58 -0700 Subject: [PATCH 094/166] libdrgn: dwarf_info: index imported units Since commit 844d82848cd7 ("libdrgn: add partial support for .gnu_debugaltlink"), we can read debug files that need a supplementary debug file (i.e., .gnu_debugaltlink), and DWARF indexing can handle references and strings in the supplementary file. However, DWARF indexing doesn't handle imported units, which often contain the bulk of the interesting debugging information (e.g., the Python package on Fedora has the core type definitions in a .gnu_debugaltlink file rather than the python3 executable or libpython shared object). Imported units can apparently also be used in other contexts (e.g., section groups). This adds proper support for imported units to DWARF indexing. To do this, we replace our single CU buffer with a stack of buffers, overload INSN_SPECIFICATION_* again to represent DW_AT_import, and push/pop imports on the buffer stack. There's a lot of subtlety in how we interpret the depth of a DIE which is extensively commented. This doesn't cause a measurable performance change for files not using imports. Closes #333. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 274 ++++++-- tests/dwarfwriter.py | 11 +- tests/test_dwarf.py | 1422 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 1631 insertions(+), 76 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index f411a657f..3cf2077cd 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -297,6 +297,8 @@ enum drgn_dwarf_index_abbrev_insn { INSN_NAME_STRP_ALT4, INSN_NAME_STRP_ALT8, INSN_DECLARATION_FLAG, + // "Specification" is overloaded to mean DW_AT_specification, + // DW_AT_abstract_origin, or DW_AT_import. INSN_SPECIFICATION_REF1, INSN_SPECIFICATION_REF2, INSN_SPECIFICATION_REF4, @@ -319,8 +321,15 @@ enum drgn_dwarf_index_abbrev_insn { INSN_END = 0, // The byte after INSN_END contains the DIE flags, which are a bitmask - // of flags combined with the drgn_dwarf_index_tag. + // of flags combined with the tag (either a drgn_dwarf_index_tag or one + // of the special INSN_DIE_TAG_ tags below). INSN_DIE_FLAG_TAG_MASK = 0x1f, + + // Tags that need special handling but don't need to be indexed + // themselves. + INSN_DIE_TAG_imported_unit = DRGN_DWARF_INDEX_NUM_TAGS, + INSN_DIE_NUM_TAGS, + // DIE is DW_TAG_subprogram with no DW_AT_low_pc or DW_AT_ranges. INSN_DIE_FLAG_SUBPROGRAM_NO_PC = 0x20, // DIE is a declaration. @@ -331,7 +340,7 @@ enum drgn_dwarf_index_abbrev_insn { // We use INSN_DIE_FLAG_TAG_MASK as a sentinel when the DIE shouldn't be // indexed, so this is < and not <=. -static_assert((int)DRGN_DWARF_INDEX_NUM_TAGS < (int)INSN_DIE_FLAG_TAG_MASK, +static_assert((int)INSN_DIE_NUM_TAGS < (int)INSN_DIE_FLAG_TAG_MASK, "too many instruction DIE tags"); // Instructions are 8 bits. @@ -345,6 +354,8 @@ DEFINE_VECTOR(uint64_vector, uint64_t); struct drgn_dwarf_index_cu_buffer { struct binary_buffer bb; struct drgn_dwarf_index_cu *cu; + // Depth of current DIE relative to starting DIE, which has depth 0. + unsigned int depth; }; static struct drgn_error * @@ -367,6 +378,7 @@ drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, drgn_elf_file_is_little_endian(cu->file), drgn_dwarf_index_cu_buffer_error); buffer->cu = cu; + buffer->depth = 0; } // Returns NULL if die_addr is not from an indexed CU. @@ -949,7 +961,7 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_specification or DW_AT_abstract_origin", + "unknown attribute form %#" PRIx64 " for DW_AT_specification, DW_AT_abstract_origin, or DW_AT_import", form); } } @@ -985,6 +997,10 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, #define X(name) case DW_TAG_##name: die_flags = DRGN_DWARF_INDEX_##name; break; DRGN_DWARF_INDEX_TAGS #undef X + case DW_TAG_imported_unit: + die_flags = INSN_DIE_TAG_imported_unit; + should_index = false; + break; default: die_flags = INSN_DIE_FLAG_TAG_MASK; should_index = false; @@ -1016,10 +1032,12 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, } else if (name == DW_AT_declaration && should_index) { err = dw_at_declaration_to_insn(&buffer->bb, form, &insn, &die_flags); - } else if (should_index - && (name == DW_AT_specification - || (tag == DW_TAG_subprogram - && name == DW_AT_abstract_origin))) { + } else if ((should_index + && (name == DW_AT_specification + || (tag == DW_TAG_subprogram + && name == DW_AT_abstract_origin))) + || (tag == DW_TAG_imported_unit + && name == DW_AT_import)) { err = dw_at_specification_to_insn(cu, &buffer->bb, form, &insn); } else { @@ -1152,17 +1170,25 @@ static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, } } +// Stack of CU buffers. The bottom is the initial unit/DIE, and +// DW_TAG_imported_unit DIEs push additional buffers. We use an inline size of 1 +// to avoid an allocation in the common case of no imports. +DEFINE_VECTOR(drgn_dwarf_index_cu_buffer_stack, + struct drgn_dwarf_index_cu_buffer, 1); +static const size_t MAX_IMPORTED_UNIT_DEPTH = 128; + /* * First pass: index DIEs with DW_AT_specification and DW_AT_abstract_origin. * This recurses into namespaces. */ static struct drgn_error * index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, - struct drgn_dwarf_index_cu_buffer *buffer) + struct drgn_dwarf_index_cu_buffer_stack *stack) { struct drgn_error *err; + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_last(stack); struct drgn_dwarf_index_cu *cu = buffer->cu; - unsigned int depth = 0; for (;;) { uintptr_t die_addr = (uintptr_t)buffer->bb.pos; @@ -1170,10 +1196,17 @@ index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) return err; if (code == 0) { - if (depth-- > 1) - continue; - else - break; + if (buffer->depth > 1) { + buffer->depth--; + } else { +pop: + drgn_dwarf_index_cu_buffer_stack_pop(stack); + if (drgn_dwarf_index_cu_buffer_stack_empty(stack)) + break; + buffer = drgn_dwarf_index_cu_buffer_stack_last(stack); + cu = buffer->cu; + } + continue; } else if (code > cu->num_abbrev_decls) { return binary_buffer_error(&buffer->bb, "unknown abbreviation code %" PRIu64, @@ -1372,7 +1405,8 @@ indirect_insn:; } insn = *insnp | extra_die_flags; - if (depth > 0 && specification) { + uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; + if (specification && tag != INSN_DIE_TAG_imported_unit) { if (insn & INSN_DIE_FLAG_DECLARATION) declaration = true; /* @@ -1388,15 +1422,57 @@ indirect_insn:; return &drgn_enomem; } + unsigned int orig_depth = buffer->depth; if (insn & INSN_DIE_FLAG_CHILDREN) { - if (sibling - && ((insn & INSN_DIE_FLAG_TAG_MASK) - != DRGN_DWARF_INDEX_namespace)) - buffer->bb.pos = sibling; + // We descend into a DIE's children in these cases: + // 1. The DIE doesn't have a sibling pointer, in which + // case we have no choice. + // 2. The DIE is the unit that we're indexing. + // 3. The DIE is a namespace. + // In cases 2 and 3, we ignore the DIE's sibling pointer + // if it has one. + // + // Otherwise, we skip over the DIE's children by + // following the sibling pointer. + if (!sibling + || buffer->depth == 0 + || tag == DRGN_DWARF_INDEX_namespace) + buffer->depth++; else - depth++; - } else if (depth == 0) { - break; + buffer->bb.pos = sibling; + } else if (buffer->depth == 0) { + goto pop; + } + + // We only need to follow imported_unit DIEs whose parent is a + // unit or namespace. To do that, we'd need to track extra + // information. In practice, imported_unit DIEs are mainly used + // in that case anyways, so we don't bother checking and take + // the risk of unnecessary imports. + // + // imported_unit DIEs at depth 0 are malformed, so we ignore + // those. + if (tag == INSN_DIE_TAG_imported_unit && orig_depth > 0) { + if (!specification) { + return binary_buffer_error(&buffer->bb, + "DW_TAG_imported_unit is missing DW_AT_import"); + } + cu = drgn_dwarf_index_find_cu(&cu->file->module->prog->dbinfo, + (uintptr_t)specification); + if (!cu) { + return binary_buffer_error(&buffer->bb, + "imported unit not found"); + } + if (drgn_dwarf_index_cu_buffer_stack_size(stack) + >= MAX_IMPORTED_UNIT_DEPTH) { + return binary_buffer_error(&buffer->bb, + "maximum DWARF imported unit depth exceeded"); + } + buffer = drgn_dwarf_index_cu_buffer_stack_append_entry(stack); + if (!buffer) + return &drgn_enomem; + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = specification; } } return NULL; @@ -1466,12 +1542,12 @@ static struct drgn_error * index_cu_second_pass(struct drgn_debug_info *dbinfo, struct drgn_dwarf_index_die_map map[static DRGN_DWARF_INDEX_MAP_SIZE], struct drgn_dwarf_base_type_map *base_types, - struct drgn_dwarf_index_cu_buffer *buffer) + struct drgn_dwarf_index_cu_buffer_stack *stack) { struct drgn_error *err; + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_last(stack); struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->file->scn_data[DRGN_SCN_DEBUG_STR]; - unsigned int depth = 0; uint8_t depth1_tag = 0; uintptr_t depth1_addr = 0; for (;;) { @@ -1481,10 +1557,17 @@ index_cu_second_pass(struct drgn_debug_info *dbinfo, if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) return err; if (code == 0) { - if (depth-- > 1) - continue; - else - break; + if (buffer->depth > 1) { + buffer->depth--; + } else { +pop: + drgn_dwarf_index_cu_buffer_stack_pop(stack); + if (drgn_dwarf_index_cu_buffer_stack_empty(stack)) + break; + buffer = drgn_dwarf_index_cu_buffer_stack_last(stack); + cu = buffer->cu; + } + continue; } else if (code > cu->num_abbrev_decls) { return binary_buffer_error(&buffer->bb, "unknown abbreviation code %" PRIu64, @@ -1578,11 +1661,12 @@ indirect_insn:; if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) return err; strp: - if (tmp >= debug_str->d_size) { + if (tmp >= cu->file->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { return binary_buffer_error(&buffer->bb, "DW_AT_name is out of bounds"); } - name = (const char *)debug_str->d_buf + tmp; + name = (const char *)cu->file->scn_data[DRGN_SCN_DEBUG_STR]->d_buf + + tmp; __builtin_prefetch(name); break; case INSN_NAME_STRX: @@ -1729,11 +1813,11 @@ indirect_insn:; insn = *insnp | extra_die_flags; uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; - if (depth == 1) { + if (buffer->depth == 1) { depth1_tag = tag; depth1_addr = die_addr; } - if (depth == (tag == DRGN_DWARF_INDEX_enumerator ? 2 : 1) + if (buffer->depth == (tag == DRGN_DWARF_INDEX_enumerator ? 2 : 1) && name && !specification) { if (insn & INSN_DIE_FLAG_DECLARATION) declaration = true; @@ -1765,8 +1849,10 @@ indirect_insn:; } // A subprogram DIE without an address may be the - // abstract instance root for an inlined function. Check - // for a concrete instance. + // abstract instance root for an inlined function, or a + // subprogram DIE in a supplementary file. Check for a + // concrete instance or a definition in the main debug + // file, respectively. // // Note that if the original DIE was a declaration, then // this is technically checking whether the declaration @@ -1790,21 +1876,82 @@ indirect_insn:; return &drgn_enomem; } -next: +next:; + unsigned int orig_depth = buffer->depth; if (insn & INSN_DIE_FLAG_CHILDREN) { - /* - * We must descend into the children of enumeration_type - * DIEs to index enumerator DIEs. We don't want to skip - * over the children of the top-level DIE even if it has - * a sibling pointer. - */ - if (sibling && tag != DRGN_DWARF_INDEX_enumeration_type - && depth > 0) - buffer->bb.pos = sibling; + // We descend into a DIE's children in these cases: + // 1. The DIE doesn't have a sibling pointer, in which + // case we have no choice. + // 2. The DIE is the unit or namespace that we're + // indexing. + // 3. The DIE is a top-level enumeration_type DIE, so we + // want to index its children enumerator DIEs. + // In cases 2 and 3, we ignore the DIE's sibling pointer + // if it has one. + // + // Otherwise, we skip over the DIE's children by + // following the sibling pointer. + if (!sibling + || buffer->depth == 0 + || (buffer->depth == 1 && tag == DRGN_DWARF_INDEX_enumeration_type)) + buffer->depth++; else - depth++; - } else if (depth == 0) { - break; + buffer->bb.pos = sibling; + } else if (buffer->depth == 0) { + goto pop; + } + + // Each buffer actually has two depths: the physical depth in + // the file of the current DIE relative to where the buffer was + // initialized (either the partial unit that we imported or the + // unit or namespace DIE where we started indexing), and the + // logical depth, treating the children of a partial unit as if + // they were siblings of the imported_unit DIE. Therefore, the + // logical depth of the children of a partial unit is equal to + // the logical depth of the imported_unit DIE, and the logical + // depth of the partial unit itself is the logical depth of the + // imported_unit DIE minus 1. + // + // Other than enumerator DIEs, we only index DIEs at logical + // depth 1. We assume that partial units will not have top-level + // enumerator DIEs, or alternatively that an enumeration_type + // DIE will not have an imported_unit DIE child. + // + // imported_unit DIEs at logical depth > 1 can only contain DIEs + // at logical depth > 1, which we would ignore anyways. + // imported_unit DIEs at depth 0 are malformed. Therefore, we + // only follow imported_unit DIEs at logical depth 1 and ignore + // others. + // + // This lets us avoid tracking the depth and logical depth + // separately: since we only follow imports at logical depth 1, + // depth == logical depth. + // + // If our assumption about enumerator DIEs is incorrect, then we + // will need to track depth and logical depth separately, update + // everything to use the appropriate one, and also take imports + // into account for depth1_{tag,addr}. + if (tag == INSN_DIE_TAG_imported_unit && orig_depth == 1) { + if (!specification) { + return binary_buffer_error(&buffer->bb, + "DW_TAG_imported_unit is missing DW_AT_import"); + } + cu = drgn_dwarf_index_find_cu(&cu->file->module->prog->dbinfo, + (uintptr_t)specification); + if (!cu) { + return binary_buffer_error(&buffer->bb, + "imported unit not found"); + } + if (drgn_dwarf_index_cu_buffer_stack_size(stack) + >= MAX_IMPORTED_UNIT_DEPTH) { + return binary_buffer_error(&buffer->bb, + "maximum DWARF imported unit depth exceeded"); + } + buffer = drgn_dwarf_index_cu_buffer_stack_append_entry(stack); + if (!buffer) + return &drgn_enomem; + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = specification; } } return NULL; @@ -2135,6 +2282,7 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) specifications = &threads[thread_num - 1].specifications; drgn_dwarf_specification_map_init(specifications); } + VECTOR(drgn_dwarf_index_cu_buffer_stack, buffer_stack); #pragma omp for schedule(dynamic) nowait for (size_t i = dbinfo->dwarf.global.cus_indexed; @@ -2143,11 +2291,13 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) continue; struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos += cu_header_size(cu); thread_err = index_cu_first_pass(specifications, - &buffer); + &buffer_stack); } if (thread_err) { #pragma omp critical(drgn_dwarf_info_update_index_error) @@ -2195,11 +2345,14 @@ drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) continue; struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos += cu_header_size(cu); thread_err = index_cu_second_pass(dbinfo, map, - base_types, &buffer); + base_types, + &buffer_stack); } // Merge the per-thread DIE and base type maps into dbinfo (and @@ -2305,6 +2458,7 @@ static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index drgn_dwarf_index_die_map_init(tag_map); map = maps[thread_num - 1]; } + VECTOR(drgn_dwarf_index_cu_buffer_stack, buffer_stack); for (int i = 0; i < num_tags_to_index; i++) { struct drgn_dwarf_index_die_vector *dies = @@ -2318,12 +2472,14 @@ static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index *drgn_dwarf_index_die_vector_at(dies, j); struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_find_cu(ns->dbinfo, die_addr); - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos = (void *)die_addr; + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = (void *)die_addr; thread_err = index_cu_second_pass(ns->dbinfo, map, NULL, - &buffer); + &buffer_stack); } } #pragma omp barrier diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 7a1fc0806..1fcc378c9 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -24,7 +24,7 @@ class DwarfLabel(NamedTuple): class DwarfDie(NamedTuple): tag: DW_TAG - attribs: Sequence[DwarfAttrib] + attribs: Sequence[DwarfAttrib] = () children: Sequence[Union["DwarfDie", DwarfLabel]] = () @@ -99,9 +99,9 @@ def aux(buf, die, depth): buf.append(value) elif attrib.form == DW_FORM.data2: buf.extend(value.to_bytes(2, byteorder)) - elif attrib.form == DW_FORM.data4: + elif attrib.form in (DW_FORM.data4, DW_FORM.ref_sup4): buf.extend(value.to_bytes(4, byteorder)) - elif attrib.form == DW_FORM.data8: + elif attrib.form in (DW_FORM.data8, DW_FORM.ref_sup8): buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.udata: _append_uleb128(buf, value) @@ -113,7 +113,7 @@ def aux(buf, die, depth): elif attrib.form == DW_FORM.block1: buf.append(len(value)) buf.extend(value) - elif attrib.form == DW_FORM.strp: + elif attrib.form in (DW_FORM.strp, DW_FORM.GNU_ref_alt): buf.extend(value.to_bytes(offset_size, byteorder)) elif attrib.form == DW_FORM.string: buf.extend(value.encode()) @@ -355,6 +355,7 @@ def compile_dwarf( sections=(), little_endian=True, bits=64, + allow_any_unit_die=False, **kwargs, ): assert compress in (None, "zlib-gnu", "zlib-gabi") @@ -374,7 +375,7 @@ def compile_dwarf( DwarfUnit(DW_UT.compile, DwarfDie(DW_TAG.compile_unit, (), units_or_dies)), ) assert all(isinstance(unit, DwarfUnit) for unit in units) - assert all(unit.die.tag in _UNIT_TAGS for unit in units) + assert allow_any_unit_die or all(unit.die.tag in _UNIT_TAGS for unit in units) unit_attribs = [] if lang is not None: diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index f32adb80d..7ce407ae1 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -47,6 +47,7 @@ DwarfDie, DwarfLabel, DwarfUnit, + compile_dwarf, create_dwarf_file, ) @@ -202,16 +203,28 @@ labeled_float_die = (DwarfLabel("float_die"), float_die) -def add_extra_dwarf(prog, path): - prog.extra_module(path, create=True)[0].try_file(path, force=True) +def add_extra_dwarf(prog, path, supplementary_path=None): + module = prog.extra_module(path, create=True)[0] + module.try_file(path, force=True) + if module.debug_file_status == drgn.ModuleFileStatus.WANT_SUPPLEMENTARY: + module.try_file(supplementary_path) + else: + assert supplementary_path is None + assert not module.wants_debug_file() -def dwarf_program(*args, segments=None, **kwds): +def dwarf_program(*args, segments=None, gnu_debugaltlink=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: - f.write(create_dwarf_file(*args, **kwds)) + f.write(create_dwarf_file(*args, gnu_debugaltlink=gnu_debugaltlink, **kwds)) f.flush() - add_extra_dwarf(prog, f.name) + add_extra_dwarf( + prog, + f.name, + supplementary_path=( + None if gnu_debugaltlink is None else gnu_debugaltlink[0] + ), + ) if segments is not None: add_mock_memory_segments(prog, segments) @@ -7315,7 +7328,9 @@ def test_dwo5_id_mismatch(self): class TestImportedUnit(TestCase): - def test_unused_partial_unit(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + def test_global(self): prog = dwarf_program( ( DwarfUnit( @@ -7325,8 +7340,94 @@ def test_unused_partial_unit(self): (), ( DwarfDie( - DW_TAG.typedef, - (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_nested(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), ), ), ), @@ -7338,13 +7439,1310 @@ def test_unused_partial_unit(self): (), ( DwarfDie( - DW_TAG.typedef, - (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, DW_FORM.ref_addr, "partial_unit2" + ), + ), ), ), ), + die_label="partial_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit2", ), ), ) - self.assertIdentical(prog.type("TEST").type, prog.void_type()) - self.assertRaises(LookupError, prog.type, "UNUSED") + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_nested_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, DW_FORM.ref_addr, "alt_unit2" + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="alt_unit2", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + die_label="partial_unit", + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_enumeration_type(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "RED" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 0 + ), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "GREEN" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 1 + ), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "BLUE" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 2 + ), + ), + ), + ), + ), + *labeled_unsigned_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) + + def test_namespace(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + ( + # TODO: in practice, partial units don't seem to + # have a language set, and it's supposed to be + # inherited from the unit that imports it. We don't + # handle that yet. + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + *labeled_int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "TEST" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="partial_unit", + ), + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + ( + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + ), + ), + ), + ) + self.assertIdentical(prog.type("foo::TEST").type, prog.int_type("int", 4, True)) + + def test_namespace_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + ( + # See above re: language in partial units. + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + *labeled_int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "TEST" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + ( + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog.type("foo::TEST").type, prog.int_type("int", 4, True) + ) + + def test_specification_imported(self): + # DW_AT_specification in an imported unit referring to a + # DW_AT_declaration DIE in a normal CU. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref_addr, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_declaration_and_specification_imported(self): + # DW_AT_specification in an imported unit referring to a + # DW_AT_declaration DIE in the same imported unit. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref4, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_declaration_and_specification_alt(self): + # DW_AT_specification in an imported unit from a .gnu_debugaltlink file + # referring to a DW_AT_declaration DIE in the same imported unit. + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "point" + ), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref4, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_function_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + *labeled_int_die, + # DWP puts a subprogram DIE without + # DW_AT_low_pc/DW_AT_ranges in the + # supplementary file and another subprogram DIE + # that references it with DW_AT_abstract_origin + # in the main debug file. + DwarfLabel("abstract_instance_root"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ( + DwarfLabel("abstract_instance_parameter"), + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib( + DW_AT.abstract_origin, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["abstract_instance_root"], + ), + DwarfAttrib( + DW_AT.low_pc, + DW_FORM.addr, + 0x7FC3EB9B1C30, + ), + ), + ( + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib( + DW_AT.abstract_origin, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels[ + "abstract_instance_parameter" + ], + ), + ), + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog["abs"], + Object( + prog, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("int", 4, True), "x"),), + False, + ), + address=0x7FC3EB9B1C30, + ), + ) + + def test_unused_partial_unit(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.void_type()) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_unused_partial_unit_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + ), + ), + ), + die_label="alt_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "UNUSED" + ), + ), + ), + ), + ), + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.void_type()) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_imported_unit_with_children(self): + # DW_TAG_imported_unit shouldn't have children. Test that we ignore the + # children properly and continue where we left off. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "UNUSED" + ), + ), + ), + ), + ), + *labeled_unsigned_int_die, + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST2"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical( + prog.type("TEST2").type, prog.int_type("unsigned int", 4, False) + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_imported_unit_with_sibling(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + DwarfAttrib( + DW_AT.sibling, + DW_FORM.ref4, + "TEST2_die", + ), + ), + ), + DwarfLabel("TEST2_die"), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST2"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + ), + ), + *labeled_unsigned_int_die, + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical( + prog.type("TEST2").type, prog.int_type("unsigned int", 4, False) + ) + + def test_top_level_imported_unit_with_children(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + die_label="partial_unit", + ), + ), + allow_any_unit_die=True, + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_top_level_imported_unit(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + die_label="partial_unit", + ), + ), + allow_any_unit_die=True, + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_missing_import(self): + with self.assertRaisesRegex( + Exception, "DW_TAG_imported_unit is missing DW_AT_import" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + (DwarfDie(DW_TAG.imported_unit),), + ), + ), + ), + ) + + def test_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "reference is out of bounds"): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + 0x100000, + ), + ), + ), + ), + ), + ), + ), + ) + + def test_out_of_bounds_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + (), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + with self.assertRaisesRegex(Exception, "reference is out of bounds"): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + 0x100000, + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + + def test_cycle(self): + with self.assertRaisesRegex( + Exception, "maximum DWARF imported unit depth exceeded" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + "cycle_unit", + ), + ), + ), + ), + ), + die_label="cycle_unit", + ), + ), + ) + + def test_cycle_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + "cycle_unit", + ), + ), + ), + ), + ), + die_label="cycle_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + with self.assertRaisesRegex( + Exception, "maximum DWARF imported unit depth exceeded" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["cycle_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) From a46df41ba3527eadf099c854f49eb9933ad42c46 Mon Sep 17 00:00:00 2001 From: Septatrix <24257556+Septatrix@users.noreply.github.com> Date: Fri, 14 Mar 2025 19:34:16 +0100 Subject: [PATCH 095/166] Use shell builtin instead of which Signed-off-by: Septatrix <24257556+Septatrix@users.noreply.github.com> --- docs/getting_debugging_symbols.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst index fc0f61f9c..ad36238da 100644 --- a/docs/getting_debugging_symbols.rst +++ b/docs/getting_debugging_symbols.rst @@ -20,14 +20,13 @@ Fedora Fedora makes it very easy to install debugging symbols with the `DNF debuginfo-install plugin `_, -which is installed by default. Simply run ``sudo dnf debuginfo-install -$package``:: +which is installed by default. Simply run ``sudo dnf debuginfo-install $package``:: $ sudo dnf debuginfo-install python3 To find out what package owns a binary, use ``rpm -qf``:: - $ rpm -qf $(which python3) + $ rpm -qf $(command -v python3) python3-3.9.7-1.fc34.x86_64 To install symbols for the running kernel:: @@ -61,9 +60,9 @@ You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies - $ find-dbgsym-packages $(which python3) + $ find-dbgsym-packages $(command -v python3) libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym - $ find-dbgsym-packages $(which cat) + $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg To install symbols for the running kernel:: @@ -98,9 +97,9 @@ You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies - $ find-dbgsym-packages $(which python3) + $ find-dbgsym-packages $(command -v python3) libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym - $ find-dbgsym-packages $(which cat) + $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg To install symbols for the running kernel:: From 06382a6e85ba398c3efc9969f15a19664a04f9a9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 14:33:38 -0700 Subject: [PATCH 096/166] drgn.helpers.linux: add kthread helpers I wanted to_kthread() and kthread_data() when investigating something with SJ Park. They're fairly trivial in recent kernels, but to_kthread() has a weird history, so it's worth a helper. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/kthread.py | 61 ++++++++++++++++++++++ tests/linux_kernel/helpers/test_kthread.py | 17 ++++++ tests/linux_kernel/kmod/drgn_test.c | 3 +- 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 drgn/helpers/linux/kthread.py create mode 100644 tests/linux_kernel/helpers/test_kthread.py diff --git a/drgn/helpers/linux/kthread.py b/drgn/helpers/linux/kthread.py new file mode 100644 index 000000000..cfa6d8340 --- /dev/null +++ b/drgn/helpers/linux/kthread.py @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +""" +Kernel Threads +-------------- + +The ``drgn.helpers.linux.kthread`` module provides helpers for working with +Linux kernel threads, a.k.a. kthreads. +""" + +from drgn import Object, cast, container_of + +__all__ = ( + "kthread_data", + "to_kthread", +) + + +def to_kthread(task: Object) -> Object: + """ + Get the kthread information for a task. + + >>> to_kthread(find_task(3)) + *(struct kthread *)0xffff8ef600191580 = { + ... + .threadfn = (int (*)(void *))kthread_worker_fn+0x0 = 0xffffffffba1e61b0, + .full_name = (char *)0xffff8ef6003d4ac0 = "pool_workqueue_release", + } + + :param task: ``struct task *`` + :return: ``struct kthread *`` + """ + try: + # Since Linux kernel commit e32cf5dfbe22 ("kthread: Generalize + # pf_io_worker so it can point to struct kthread") (in v5.17), the + # struct kthread * is in task->worker_private. + return cast("struct kthread *", task.worker_private) + except AttributeError: + if "free_kthread_struct" in task.prog_: + # Between that and Linux kernel commit 1da5c46fa965 ("kthread: Make + # struct kthread kmalloc'ed") (in v4.10), it is in + # task->set_child_tid. Unfortunately we can only distinguish this + # by looking for another function added in that commit. + return cast("struct kthread *", task.set_child_tid) + else: + # Before that, task->vfork_done points to kthread->exited. + return container_of(task.vfork_done, "struct kthread", "exited") + + +def kthread_data(task: Object) -> Object: + """ + Get the data that was specified when a kthread was created. + + >>> kthread_data(find_task(3)) + (void *)0xffff8ef6001812c0 + + :param task: ``struct task *`` + :return: ``void *`` + """ + return to_kthread(task).data.read_() diff --git a/tests/linux_kernel/helpers/test_kthread.py b/tests/linux_kernel/helpers/test_kthread.py new file mode 100644 index 000000000..356d27f20 --- /dev/null +++ b/tests/linux_kernel/helpers/test_kthread.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import Object +from drgn.helpers.linux.kthread import kthread_data +from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod + + +@skip_unless_have_test_kmod +class TestKthread(LinuxKernelTestCase): + # There's no good way to test to_kthread() directly, but it gets tested + # indirectly through kthread_data(). + def test_kthread_data(self): + self.assertIdentical( + kthread_data(self.prog["drgn_test_kthread"]), + Object(self.prog, "void *", 0xB0BA000), + ) diff --git a/tests/linux_kernel/kmod/drgn_test.c b/tests/linux_kernel/kmod/drgn_test.c index 91a917258..a02a825a1 100644 --- a/tests/linux_kernel/kmod/drgn_test.c +++ b/tests/linux_kernel/kmod/drgn_test.c @@ -970,7 +970,8 @@ static void drgn_test_stack_trace_exit(void) static int drgn_test_stack_trace_init(void) { - drgn_test_kthread = kthread_create(drgn_test_kthread_fn, NULL, + drgn_test_kthread = kthread_create(drgn_test_kthread_fn, + (void *)0xb0ba000, "drgn_test_kthread"); if (!drgn_test_kthread) return -1; From b9deae182213d43955bee15963c36c96ea357389 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 14 Mar 2025 13:55:56 -0700 Subject: [PATCH 097/166] libdrgn/python: modernize drgn_error_from_python() Use the _cleanup_pydecref_ helpers. Signed-off-by: Stephen Brennan Co-authored-by: Omar Sandoval --- libdrgn/python/error.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 1d36a2e98..572c507bf 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -75,46 +75,38 @@ void clear_drgn_in_python(void) struct drgn_error *drgn_error_from_python(void) { - PyObject *exc_type, *exc_value, *exc_traceback, *exc_message; - const char *type, *message; - struct drgn_error *err; - + _cleanup_pydecref_ PyObject *exc_type, *exc_value, *exc_traceback; PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); if (!exc_type) return NULL; if (drgn_in_python) { PyErr_Restore(exc_type, exc_value, exc_traceback); + exc_type = exc_value = exc_traceback = NULL; return &drgn_error_python; } - type = ((PyTypeObject *)exc_type)->tp_name; + const char *type = ((PyTypeObject *)exc_type)->tp_name; + _cleanup_pydecref_ PyObject *exc_message = NULL; + const char *message; if (exc_value) { exc_message = PyObject_Str(exc_value); message = exc_message ? PyUnicode_AsUTF8(exc_message) : NULL; if (!message) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: ", type); - goto out; + PyErr_Clear(); + return drgn_error_format(DRGN_ERROR_OTHER, + "%s: ", type); } } else { - exc_message = NULL; message = ""; } if (message[0]) { - err = drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", type, - message); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", type, + message); } else { - err = drgn_error_create(DRGN_ERROR_OTHER, type); + return drgn_error_create(DRGN_ERROR_OTHER, type); } - -out: - Py_XDECREF(exc_message); - Py_XDECREF(exc_traceback); - Py_XDECREF(exc_value); - Py_DECREF(exc_type); - return err; } void *set_drgn_error(struct drgn_error *err) From 00c4f663b21eb4c6e78b5b64ac058434209c46b7 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 14 Mar 2025 14:29:48 -0700 Subject: [PATCH 098/166] libdrgn: python: pass fault errors through Some parts of libdrgn detect drgn error codes and handle them appropriately. For instance, the stack tracing code expects to get a fault error. A drgn error that has been translated into a Python exception, and back to a drgn error, no longer retains its code. This means that if the stack tracing code is used with Python memory readers, the fault errors will not be treated as fault errors. In general, it may not be a good idea to translate every Python exception back to a drgn error. There may be a small performance cost to doing so, and what's more: it can be quite useful to know that a Python error was wrapped into a drgn error. So for now, we'll make this a special case for FaultErrors, so that custom memory readers will behave as expected. Signed-off-by: Stephen Brennan [Omar: style fixes, tweak address error check] Signed-off-by: Omar Sandoval --- libdrgn/python/error.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 572c507bf..f4e11ddba 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -73,6 +73,23 @@ void clear_drgn_in_python(void) drgn_in_python = false; } +static struct drgn_error *drgn_fault_error_from_python(PyObject *exc_value) +{ + _cleanup_pydecref_ PyObject *py_message = + PyObject_GetAttrString(exc_value, "message"); + const char *message = py_message ? PyUnicode_AsUTF8(py_message) : NULL; + if (!message) + return NULL; + + _cleanup_pydecref_ PyObject *py_address = + PyObject_GetAttrString(exc_value, "address"); + uint64_t address = py_address ? PyLong_AsUint64(py_address) : (uint64_t)-1; + if (address == (uint64_t)-1 && PyErr_Occurred()) + return NULL; + + return drgn_error_create_fault(message, address); +} + struct drgn_error *drgn_error_from_python(void) { _cleanup_pydecref_ PyObject *exc_type, *exc_value, *exc_traceback; @@ -80,6 +97,20 @@ struct drgn_error *drgn_error_from_python(void) if (!exc_type) return NULL; + // Python FaultErrors should be translated back to drgn errors because + // they are frequently handled in libdrgn. They should be translated no + // matter how deeply nested we are, so we do this before checking + // drgn_in_python. + if ((PyTypeObject *)exc_type == &FaultError_type && exc_value) { + struct drgn_error *err = drgn_fault_error_from_python(exc_value); + if (err) + return err; + // A NULL return means that we encountered a Python error while + // trying to convert it. Clear the Python error and fall back to + // the standard code path. + PyErr_Clear(); + } + if (drgn_in_python) { PyErr_Restore(exc_type, exc_value, exc_traceback); exc_type = exc_value = exc_traceback = NULL; From ca3f004201287b702bae598435817cba105bc0b7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 15:18:40 -0700 Subject: [PATCH 099/166] tests: test FaultError translation Signed-off-by: Omar Sandoval --- tests/test_program.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_program.py b/tests/test_program.py index 6b6c4240c..18ae3aec6 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -407,6 +407,42 @@ def test_invalid_read_fn(self): 8, ) + def test_python_fault_error(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError("fault from Python", address) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + with self.assertRaises(FaultError) as cm: + Object(prog, "int", address=0xFFFF0004).read_() + self.assertEqual(cm.exception.message, "fault from Python") + self.assertEqual(cm.exception.address, 0xFFFF0004) + + # If the FaultError from Python is translated to a drgn_error + # correctly, then this shouldn't raise an exception. + str(Object(prog, "int *", 0xFFFF0004)) + + def test_python_fault_error_invalid_message(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError(None, address) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + # Just test that it doesn't crash. + self.assertRaises(Exception, Object(prog, "int", address=0xFFFF0004).read_) + + def test_python_fault_error_invalid_address(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError("fault from Python", None) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + # Just test that it doesn't crash. + self.assertRaises(Exception, Object(prog, "int", address=0xFFFF0004).read_) + class TestTypeFinder(TestCase): def test_register(self): From 9fab6468eb77ea7f1d9a1163553c79b1dd8d2d40 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 5 Feb 2025 15:02:30 -0800 Subject: [PATCH 100/166] libdrgn: stack_trace: allow unwinding anything with a pt_regs Right now, we're a bit conservative in the stack tracing code: we only allow unwinding userspace cores, or Linux kernel programs. These are the only two types of programs for which we can get initial registers. However, if the user provides a pt_regs object, then there's no reason we can't try to do a stack trace with that. Move the check for live programs into drgn_get_initial_registers(), after we've already handled pt_regs. Signed-off-by: Stephen Brennan --- libdrgn/stack_trace.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 059ed7911..c85c9fc19 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -823,6 +823,9 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, } return prog->platform.arch->linux_kernel_get_initial_registers(&obj, ret); + } else if (drgn_program_is_userspace_process(prog)) { + return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, + "stack unwinding is not yet supported for live processes"); } else { struct nstring prstatus; err = drgn_program_find_prstatus(prog, tid, &prstatus); @@ -1209,14 +1212,6 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "cannot unwind stack without platform"); } - if (drgn_program_is_userspace_process(prog)) { - return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, - "stack unwinding is not yet supported for live processes"); - } else if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - && !drgn_program_is_userspace_core(prog)) { - return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, - "stack unwinding is not supported for this program"); - } size_t trace_capacity = 1; struct drgn_stack_trace *trace = From 0625a2738f52c2dc116c75c966cf2d8b26d2dcd0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 17 Mar 2025 16:13:21 -0700 Subject: [PATCH 101/166] libdrgn: dwarf_info: skip DW_CFA_GNU_args_size Stephen reported "unknown DWARF CFI opcode 0x2e" errors. 0x2e is DW_CFA_GNU_args_size, which "takes an unsigned LEB128 operand representing an argument size" that "specifies the total of the size of the arguments which have been pushed onto the stack". It seems fairly common in .eh_frame. We have no use for this information, so we can simply skip it (elfutils and LLDB apparently do the same). Closes #480. Reported-and-tested-by: Stephen Brennan Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 3cf2077cd..e6ee2b128 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -7609,6 +7609,11 @@ drgn_eval_dwarf_cfi(struct drgn_elf_file *file, enum drgn_section_index scn, goto set_reg; } fallthrough; + case DW_CFA_GNU_args_size: + // We have no use for this. Skip it. + if ((err = binary_buffer_skip_leb128(&buffer.bb))) + goto out; + break; default: err = binary_buffer_error(&buffer.bb, "unknown DWARF CFI opcode %#" PRIx8, From 0e635df5294ebf34660fb12ee2ee371dd5143391 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Mon, 17 Mar 2025 16:58:33 -0700 Subject: [PATCH 102/166] Handle truncated pr_fname It appears that some core dumps may have a "pr_fname" which isn't nul-terminated, which leads to the following exception, even if you're not interested in the thread name: >>> next(prog.threads()).stack_trace() Traceback (most recent call last): File "", line 1, in Exception: pr_fname is not null terminated Handle this by creating a copy and nul-terminating it. Closes: #483 Fixes: e6cc9b07 ("Add name to drgn.Thread") Signed-off-by: Stephen Brennan --- libdrgn/program.c | 29 ++++++++++++----------------- libdrgn/program.h | 2 +- tests/test_thread.py | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/libdrgn/program.c b/libdrgn/program.c index 396cc0131..9789d384e 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -137,12 +137,12 @@ void drgn_program_init(struct drgn_program *prog, void drgn_program_deinit(struct drgn_program *prog) { drgn_thread_set_deinit(&prog->thread_set); - /* - * For userspace core dumps, main_thread and crashed_thread are in - * prog->thread_set and thus freed by the above call to - * drgn_thread_set_deinit(). - */ - if (!drgn_program_is_userspace_core(prog)) { + if (drgn_program_is_userspace_core(prog)) { + free(prog->core_dump_fname_cached); + } else { + // For userspace core dumps, main_thread and crashed_thread are + // in prog->thread_set and thus freed by the above call to + // drgn_thread_set_deinit(). drgn_thread_destroy(prog->crashed_thread); drgn_thread_destroy(prog->main_thread); } @@ -924,7 +924,7 @@ static struct drgn_error *get_prpsinfo_pid(struct drgn_program *prog, static struct drgn_error *get_prpsinfo_fname(struct drgn_program *prog, const char *data, size_t size, - const char **ret) + char **ret) { bool is_64_bit; struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); @@ -938,15 +938,10 @@ static struct drgn_error *get_prpsinfo_fname(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "NT_PRPSINFO is truncated"); } - // No need to make a copy: the data returned by elf_getdata_rawchunk() - // is valid for the lifetime of the Elf handle, and prog->core is valid for - // the lifetime of prog. - const char *tmp = data + offset; - size_t len = strnlen(tmp, PR_FNAME_LEN); - if (len == PR_FNAME_LEN) + char *tmp = strndup(data + offset, PR_FNAME_LEN); #undef PR_FNAME_LEN - return drgn_error_create(DRGN_ERROR_OTHER, - "pr_fname is not null terminated"); + if (!tmp) + return &drgn_enomem; *ret = tmp; return NULL; } @@ -1030,7 +1025,7 @@ drgn_program_cache_core_dump_threads(struct drgn_program *prog) uint32_t first_prstatus_tid; bool found_prpsinfo = false; uint32_t prpsinfo_pid; - const char *prpsinfo_fname = NULL; + _cleanup_free_ char *prpsinfo_fname = NULL; if (prog->core_dump_threads_cached) return NULL; @@ -1130,7 +1125,7 @@ drgn_program_cache_core_dump_threads(struct drgn_program *prog) &prpsinfo_pid); /* If the PID isn't found, then this is NULL. */ prog->main_thread = it.entry; - prog->core_dump_fname_cached = prpsinfo_fname; + prog->core_dump_fname_cached = no_cleanup_ptr(prpsinfo_fname); } if (found_prstatus) { /* diff --git a/libdrgn/program.h b/libdrgn/program.h index 90413d16d..79cb67e2c 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -153,7 +153,7 @@ struct drgn_program { */ struct { /** Cached `pr_fname` from `NT_PRPSINFO` note. */ - const char *core_dump_fname_cached; + char *core_dump_fname_cached; /** Cache of important parts of auxiliary vector. */ struct { uint64_t at_phdr; diff --git a/tests/test_thread.py b/tests/test_thread.py index 023410439..3d3fa1a14 100644 --- a/tests/test_thread.py +++ b/tests/test_thread.py @@ -3,6 +3,7 @@ import os import os.path +import tempfile from drgn import Program from tests import TestCase @@ -95,3 +96,20 @@ def test_thread_name(self): for tid in self.TIDS: if tid != self.MAIN_TID: self.assertIsNone(self.prog.thread(tid).name) + + +class TestCoreDumpLongName(TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prog = Program() + with open(get_resource("crashme_static_pie.core"), "rb") as f: + data = f.read() + data = data.replace(b"crashme_static_\x00", b"crashme_static_p") + with tempfile.NamedTemporaryFile("wb") as f: + f.write(data) + f.flush() + cls.prog.set_core_dump(f.name) + + def test_thread_name(self): + self.assertEqual(self.prog.main_thread().name, "crashme_static_p") From 53e57ea2d4166aaf3a8c9bd35fcde04fdbefe3a6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 24 Mar 2025 15:19:26 -0700 Subject: [PATCH 103/166] libdrgn: debug_info: don't set extra module bias if address range is empty An address range of (0, 0) means that the module is not in memory, so the bias isn't meaningful. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 2 +- tests/test_debug_info.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 87170f332..19f9e6cbd 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1259,7 +1259,7 @@ static bool drgn_module_elf_file_bias(struct drgn_module *module, return elf_dso_bias(prog, file->elf, module->vdso.dynamic_address, ret); case DRGN_MODULE_EXTRA: - if (module->start != UINT64_MAX) { + if (module->start < module->end) { uint64_t elf_start, elf_end; if (!drgn_elf_file_address_range(file, &elf_start, &elf_end)) diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 3c6592a12..ab3ed5ea7 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -551,6 +551,15 @@ def test_extra_module_address_range(self): self.assertEqual(module.loaded_file_bias, 0x30000000) self.assertEqual(module.debug_file_bias, 0x30000000) + def test_extra_module_empty_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True)[0] + module.address_range = (0, 0) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.address_range, (0, 0)) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + class TestLinuxUserspaceCoreDump(TestCase): def setUp(self): From 139078e5c3d49174ea6d6ce326f7ecae88328ed1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 24 Mar 2025 15:21:30 -0700 Subject: [PATCH 104/166] docs: document details of bias calculation The descriptions of how file biases are determined for SharedLibraryModule, VdsoModule, and ExtraModule are vague. Elaborate on them. Signed-off-by: Omar Sandoval --- _drgn.pyi | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 6b6835606..f34bf2b42 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1690,18 +1690,25 @@ class Module: This is often non-zero due to address space layout randomization (ASLR). - It is set automatically based on the module type: + It is set automatically based on the module type when the loaded file is + added: * For :class:`MainModule`, it is set based on metadata from the process or core dump (the `auxiliary vector `_ for userspace programs, the ``VMCOREINFO`` note for the Linux kernel). - * For :class:`SharedLibraryModule` and :class:`VdsoModule`, it is set based - on :attr:`~SharedLibraryModule.dynamic_address`. + * For :class:`SharedLibraryModule` and :class:`VdsoModule`, it is set to + :attr:`~SharedLibraryModule.dynamic_address` minus the address of the + dynamic section in the file. * For :class:`RelocatableModule`, it is set to zero. Addresses are adjusted according to :attr:`~RelocatableModule.section_addresses` instead. - * For :class:`ExtraModule`, it is set based on - :attr:`~Module.address_range`. + * For :class:`ExtraModule`, if :attr:`~Module.address_range` is set before + the file is added, then the bias is set to :attr:`address_range[0] + ` (i.e., the module's start address) minus the + file's start address. If :attr:`~Module.address_range` is not set when + the file is added or is set to ``(0, 0)``, then the bias is set to zero. + + This cannot be set manually. """ debug_file_status: ModuleFileStatus """Status of the module's :ref:`debug file `.""" From 284bd5f1aeb4fe148e8fb7c6ee0ec7b623b418d0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 24 Mar 2025 15:24:07 -0700 Subject: [PATCH 105/166] docs: emphasize that ExtraModule name and ID are both arbitrary Signed-off-by: Omar Sandoval --- _drgn.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_drgn.pyi b/_drgn.pyi index f34bf2b42..a9c12b419 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1899,7 +1899,8 @@ class ExtraModule(Module): used to add debugging information from a standalone file that is not in use by a particular program. - Extra modules are uniquely identified by an arbitrary name and ID number. + Extra modules are uniquely identified by a name and ID number. Both the + name and ID number are arbitrary. """ id: Final[int] From 4a6a9f335994a4e16aea49437d44a59247c58480 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 07:15:04 -0700 Subject: [PATCH 106/166] cli: allow enabling/disabling debug info finders with --{try,no}-symbols-by It'd be useful to be able to control debug info finders in addition to options. Rather than adding yet another set of command-line options, co-opt --try-symbols-by and --no-symbols-by. Signed-off-by: Omar Sandoval --- drgn/cli.py | 208 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 138 insertions(+), 70 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index 135627f3a..dc2d3bf41 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -146,8 +146,9 @@ def _displayhook(value: Any) -> None: setattr(builtins, "_", value) -class _DebugInfoOptionAction(argparse.Action): - _choices: Dict[str, Tuple[str, Any]] +class _TrySymbolsByBaseAction(argparse.Action): + _enable: bool + _finder = ("disable_debug_info_finders", "enable_debug_info_finders") @staticmethod def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: @@ -164,6 +165,24 @@ def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: ) } + _options = ( + { + **_bool_options(False), + "kmod": ("try_kmod", drgn.KmodSearchMethod.NONE), + }, + { + **_bool_options(True), + "kmod=depmod": ("try_kmod", drgn.KmodSearchMethod.DEPMOD), + "kmod=walk": ("try_kmod", drgn.KmodSearchMethod.WALK), + "kmod=depmod-or-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_OR_WALK), + "kmod=depmod-and-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_AND_WALK), + }, + ) + + def __init__(self, *args: Any, **kwargs: Any) -> None: + kwargs["dest"] = argparse.SUPPRESS + super().__init__(*args, **kwargs) + def __call__( self, parser: argparse.ArgumentParser, @@ -171,37 +190,117 @@ def __call__( values: Any, option_string: Optional[str] = None, ) -> None: - dest = getattr(namespace, self.dest, None) - if dest is None: - dest = {} - setattr(namespace, self.dest, dest) - - for option in values.split(","): + for value in values.split(","): try: - name, value = self._choices[option] + option_name, option_value = self._options[self._enable][value] except KeyError: - raise argparse.ArgumentError( - self, - f"invalid option: {option!r} (choose from {', '.join(self._choices)})", + # Raise an error if passed an option meant for the opposite + # argument. + if value in self._options[not self._enable]: + raise argparse.ArgumentError(self, f"invalid option: {value!r}") + + if not hasattr(namespace, self._finder[self._enable]): + setattr(namespace, self._finder[self._enable], {}) + getattr(namespace, self._finder[self._enable])[value] = None + + if hasattr(namespace, self._finder[not self._enable]): + getattr(namespace, self._finder[not self._enable]).pop(value, None) + else: + if not hasattr(namespace, "debug_info_options"): + namespace.debug_info_options = {} + namespace.debug_info_options[option_name] = option_value + + +class _TrySymbolsByAction(_TrySymbolsByBaseAction): + _enable = True + + +class _NoSymbolsByAction(_TrySymbolsByBaseAction): + _enable = False + + +def _load_debugging_symbols( + prog: drgn.Program, args: argparse.Namespace, color: bool +) -> None: + enable_debug_info_finders = getattr(args, "enable_debug_info_finders", ()) + disable_debug_info_finders = getattr(args, "disable_debug_info_finders", ()) + if enable_debug_info_finders or disable_debug_info_finders: + debug_info_finders = prog.enabled_debug_info_finders() + registered_debug_info_finders = prog.registered_debug_info_finders() + + unknown_finders = [] + + for finder in enable_debug_info_finders: + if finder not in debug_info_finders: + if finder in registered_debug_info_finders: + debug_info_finders.append(finder) + else: + unknown_finders.append(finder) + + for finder in disable_debug_info_finders: + try: + debug_info_finders.remove(finder) + except ValueError: + if finder not in registered_debug_info_finders: + unknown_finders.append(finder) + + if unknown_finders: + if len(unknown_finders) == 1: + unknown_finders_repr = repr(unknown_finders[0]) + elif len(unknown_finders) == 2: + unknown_finders_repr = ( + f"{unknown_finders[0]!r} or {unknown_finders[1]!r}" ) - dest[name] = value + elif len(unknown_finders) > 2: + unknown_finders = [repr(finder) for finder in unknown_finders] + unknown_finders[-1] = "or " + unknown_finders[-1] + unknown_finders_repr = ", ".join(unknown_finders) + logger.warning( + "no matching debugging information finders or options for %s", + unknown_finders_repr, + ) + prog.set_enabled_debug_info_finders(debug_info_finders) -class _TryDebugInfoOptionAction(_DebugInfoOptionAction): - _choices = { - **_DebugInfoOptionAction._bool_options(True), - "kmod=depmod": ("try_kmod", drgn.KmodSearchMethod.DEPMOD), - "kmod=walk": ("try_kmod", drgn.KmodSearchMethod.WALK), - "kmod=depmod-or-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_OR_WALK), - "kmod=depmod-and-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_AND_WALK), - } + debug_info_options = getattr(args, "debug_info_options", None) + if debug_info_options: + for option, value in debug_info_options.items(): + setattr(prog.debug_info_options, option, value) + if args.debug_directories is not None: + if args.no_default_debug_directories: + prog.debug_info_options.directories = args.debug_directories + else: + prog.debug_info_options.directories = ( + tuple(args.debug_directories) + prog.debug_info_options.directories + ) + elif args.no_default_debug_directories: + prog.debug_info_options.directories = () -class _NoDebugInfoOptionAction(_DebugInfoOptionAction): - _choices = { - **_DebugInfoOptionAction._bool_options(False), - "kmod": ("try_kmod", drgn.KmodSearchMethod.NONE), - } + if args.kernel_directories is not None: + if args.no_default_kernel_directories: + prog.debug_info_options.kernel_directories = args.kernel_directories + else: + prog.debug_info_options.kernel_directories = ( + tuple(args.kernel_directories) + + prog.debug_info_options.kernel_directories + ) + elif args.no_default_kernel_directories: + prog.debug_info_options.kernel_directories = () + + if args.default_symbols is None: + args.default_symbols = {"default": True, "main": True} + try: + prog.load_debug_info(args.symbols, **args.default_symbols) + except drgn.MissingDebugInfoError as e: + logger.warning("\033[1m%s\033[m" if color else "%s", e) + + if args.extra_symbols: + for extra_symbol_path in args.extra_symbols: + extra_symbol_path = os.path.abspath(extra_symbol_path) + module, new = prog.extra_module(extra_symbol_path, create=True) + if new: + module.try_file(extra_symbol_path) def _main() -> None: @@ -269,20 +368,26 @@ def _main() -> None: ) symbol_group.add_argument( "--try-symbols-by", - dest="symbols_by", metavar="METHOD[,METHOD...]", - action=_TryDebugInfoOptionAction, + action=_TrySymbolsByAction, help="enable loading debugging symbols using the given methods. " - "Choices are " + ", ".join(_TryDebugInfoOptionAction._choices) + ". " + "Choices are debugging information finder names " + "(standard, debuginfod, or any added by plugins) " + "or debugging information options (" + + ", ".join(_TrySymbolsByBaseAction._options[True]) + + "). " "This option may be given more than once", ) symbol_group.add_argument( "--no-symbols-by", - dest="symbols_by", metavar="METHOD[,METHOD...]", - action=_NoDebugInfoOptionAction, + action=_NoSymbolsByAction, help="disable loading debugging symbols using the given methods. " - "Choices are " + ", ".join(_NoDebugInfoOptionAction._choices) + ". " + "Choices are debugging information finder names " + "(standard, debuginfod, or any added by plugins) " + "or debugging information options (" + + ", ".join(_TrySymbolsByBaseAction._options[False]) + + "). " "This option may be given more than once", ) symbol_group.add_argument( @@ -413,44 +518,7 @@ def _main() -> None: # E.g., "not an ELF core file" sys.exit(f"error: {e}") - if args.symbols_by: - for option, value in args.symbols_by.items(): - setattr(prog.debug_info_options, option, value) - - if args.debug_directories is not None: - if args.no_default_debug_directories: - prog.debug_info_options.directories = args.debug_directories - else: - prog.debug_info_options.directories = ( - tuple(args.debug_directories) + prog.debug_info_options.directories - ) - elif args.no_default_debug_directories: - prog.debug_info_options.directories = () - - if args.kernel_directories is not None: - if args.no_default_kernel_directories: - prog.debug_info_options.kernel_directories = args.kernel_directories - else: - prog.debug_info_options.kernel_directories = ( - tuple(args.kernel_directories) - + prog.debug_info_options.kernel_directories - ) - elif args.no_default_kernel_directories: - prog.debug_info_options.kernel_directories = () - - if args.default_symbols is None: - args.default_symbols = {"default": True, "main": True} - try: - prog.load_debug_info(args.symbols, **args.default_symbols) - except drgn.MissingDebugInfoError as e: - logger.warning("\033[1m%s\033[m" if color else "%s", e) - - if args.extra_symbols: - for extra_symbol_path in args.extra_symbols: - extra_symbol_path = os.path.abspath(extra_symbol_path) - module, new = prog.extra_module(extra_symbol_path, create=True) - if new: - module.try_file(extra_symbol_path) + _load_debugging_symbols(prog, args, color) if args.script: sys.argv = args.script From 3736aba81336534499855f9fbc3a20dcb57087b7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 07:23:33 -0700 Subject: [PATCH 107/166] cli: clarify locations -> directories in help strings Signed-off-by: Omar Sandoval --- drgn/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index dc2d3bf41..1921e82d5 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -402,7 +402,7 @@ def _main() -> None: symbol_group.add_argument( "--no-default-debug-directories", action="store_true", - help="don't search for debugging symbols by build ID and debug link in the standard locations", + help="don't search for debugging symbols by build ID and debug link in the standard directories", ) symbol_group.add_argument( "--kernel-directory", @@ -416,7 +416,7 @@ def _main() -> None: symbol_group.add_argument( "--no-default-kernel-directories", action="store_true", - help="don't search for the kernel image and loadable kernel modules in the standard locations", + help="don't search for the kernel image and loadable kernel modules in the standard directories", ) advanced_group = parser.add_argument_group("advanced") From 1b05b8c1732d0fc99ed04b19632977d11747fa4e Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 11 Mar 2025 15:15:16 -0700 Subject: [PATCH 108/166] cli: extract out default_globals() Drgn has a set of globals provided in interactive mode, but they can be useful to other modes (e.g. for executing statements directly). They may also be useful to client code. Make this a separate, public helper. It does make the globals_func argument to run_interactive() a bit much, but that's ok. Signed-off-by: Stephen Brennan --- drgn/cli.py | 80 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index 1921e82d5..dfee291c7 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -21,10 +21,27 @@ from drgn.internal.rlcompleter import Completer from drgn.internal.sudohelper import open_via_sudo -__all__ = ("run_interactive", "version_header") +__all__ = ("default_globals", "run_interactive", "version_header") logger = logging.getLogger("drgn") +# The list of attributes from the drgn module which are imported and inserted +# into the global namespace for interactive debugging. +_DRGN_GLOBALS = [ + "FaultError", + "NULL", + "Object", + "alignof", + "cast", + "container_of", + "execscript", + "implicit_convert", + "offsetof", + "reinterpret", + "sizeof", + "stack_trace", +] + class _LogFormatter(logging.Formatter): _LEVELS = ( @@ -89,6 +106,33 @@ def version_header() -> str: return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" +def default_globals(prog: drgn.Program) -> Dict[str, Any]: + """ + Return the default globals for an interactive drgn session + + :param prog: the program which will be debugged + :return: a dict of globals + """ + # Don't forget to update the default banner in run_interactive() + # with any new additions. + init_globals: Dict[str, Any] = { + "prog": prog, + "drgn": drgn, + "__name__": "__main__", + "__doc__": None, + } + for attr in _DRGN_GLOBALS: + init_globals[attr] = getattr(drgn, attr) + module = importlib.import_module("drgn.helpers.common") + for name in module.__dict__["__all__"]: + init_globals[name] = getattr(module, name) + if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: + module = importlib.import_module("drgn.helpers.linux") + for name in module.__dict__["__all__"]: + init_globals[name] = getattr(module, name) + return init_globals + + def _identify_script(path: str) -> str: EI_NIDENT = 16 SIZEOF_E_TYPE = 2 @@ -566,44 +610,14 @@ def run_interactive( function, applications should restore their history and settings before using ``readline``. """ - init_globals: Dict[str, Any] = { - "prog": prog, - "drgn": drgn, - "__name__": "__main__", - "__doc__": None, - } - drgn_globals = [ - "FaultError", - "NULL", - "Object", - "alignof", - "cast", - "container_of", - "execscript", - "implicit_convert", - "offsetof", - "reinterpret", - "sizeof", - "stack_trace", - ] - for attr in drgn_globals: - init_globals[attr] = getattr(drgn, attr) - + init_globals = default_globals(prog) banner = f"""\ For help, type help(drgn). >>> import drgn ->>> from drgn import {", ".join(drgn_globals)} +>>> from drgn import {", ".join(_DRGN_GLOBALS)} >>> from drgn.helpers.common import *""" - - module = importlib.import_module("drgn.helpers.common") - for name in module.__dict__["__all__"]: - init_globals[name] = getattr(module, name) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" - module = importlib.import_module("drgn.helpers.linux") - for name in module.__dict__["__all__"]: - init_globals[name] = getattr(module, name) - if banner_func: banner = banner_func(banner) if globals_func: From 150ee760dcd302ea6212fab8d57f6d9da49133f8 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 11 Mar 2025 15:25:45 -0700 Subject: [PATCH 109/166] cli: add -e option to exec() code directly The Python interpreter has "-c" which allows directly running code provided as a command line argument. This is useful for quick tests, without needing to write a script and execute it, and without needing to run the interactive interpreter. It can also be used in scripts or one-liners. Unfortunately, "-c" is already used in the drgn CLI. However, this functionality would be quite useful. Let's add it to the CLI using the option "-e", which is short for "execute" or "exec", the underlying Python function. Signed-off-by: Stephen Brennan --- drgn/cli.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index dfee291c7..dce6b8d79 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -491,6 +491,12 @@ def _main() -> None: const="none", help="don't print any logs or download progress", ) + parser.add_argument( + "-e", + dest="exec", + metavar="CODE", + help="an expression or statement to evaluate, instead of running in interactive mode", + ) parser.add_argument( "script", metavar="ARG", @@ -502,7 +508,7 @@ def _main() -> None: args = parser.parse_args() - if args.script: + if args.script and not args.exec: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with # some inscrutable syntax or encoding error, try to catch this early @@ -518,7 +524,7 @@ def _main() -> None: ) elif script_type == "elf": sys.exit(f"error: {args.script[0]} is a binary, not a drgn script") - else: + elif not args.exec: print(version, file=sys.stderr, flush=True) if args.log_level == "none": @@ -564,7 +570,11 @@ def _main() -> None: _load_debugging_symbols(prog, args, color) - if args.script: + if args.exec: + sys.path.insert(0, "") + sys.argv = ["-e"] + args.script + exec(args.exec, default_globals(prog)) + elif args.script: sys.argv = args.script script = args.script[0] if pkgutil.get_importer(script) is None: From 9dea02782661f4f55c62c8a1f46877d0d3c223a3 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 25 Mar 2025 12:05:04 -0700 Subject: [PATCH 110/166] tests: add simple CLI smoke test Signed-off-by: Stephen Brennan --- tests/test_cli.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/test_cli.py diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 000000000..d26e81bd8 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,32 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + + +import subprocess +import sys + +from tests import TestCase + + +class TestCli(TestCase): + + def run_cli(self, *args: str): + try: + return subprocess.run( + [sys.executable, "-m", "drgn"] + list(args), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + ) + except subprocess.CalledProcessError as e: + # With captured output, there's nothing left to debug in CI logs. + # Print output on a failure so we can debug. + print(f"STDOUT:\n{e.stdout.decode()}") + print(f"STDERR:\n{e.stderr.decode()}") + raise + + def test_smoke(self): + proc = self.run_cli( + "--quiet", "--pid", "0", "--no-default-symbols", "-e", "print('pass')" + ) + self.assertEqual(proc.stdout, b"pass\n") From c5210929e5db1eb7fce852ea64a3153e5953174e Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 25 Mar 2025 11:53:20 -0700 Subject: [PATCH 111/166] cli: fix 'staticmethod' object is not callable for Python < 3.10 Prior to Python 3.10, the object returned by @staticmethod apparently cannot be called unless bound to the class instance. So the new changes in the fixed commit cause the CLI to fail on Python < 3.10. We can resolve this in a variety of ways, but the one which doesn't result in any static checker warnings is the simplest: just move this function out to a module-level function. There are no users in sub-classes which need to be converted. Fixes: 4a6a9f33 ("cli: allow enabling/disabling debug info finders with --{try,no}-symbols-by") Signed-off-by: Stephen Brennan --- drgn/cli.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index dce6b8d79..38a213645 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -190,25 +190,25 @@ def _displayhook(value: Any) -> None: setattr(builtins, "_", value) +def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: + return { + option: ("try_" + option.replace("-", "_"), value) + for option in ( + "module-name", + "build-id", + "debug-link", + "procfs", + "embedded-vdso", + "reuse", + "supplementary", + ) + } + + class _TrySymbolsByBaseAction(argparse.Action): _enable: bool _finder = ("disable_debug_info_finders", "enable_debug_info_finders") - @staticmethod - def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: - return { - option: ("try_" + option.replace("-", "_"), value) - for option in ( - "module-name", - "build-id", - "debug-link", - "procfs", - "embedded-vdso", - "reuse", - "supplementary", - ) - } - _options = ( { **_bool_options(False), From 4520c3d5b333fbaf06ae02722c3fc4f383ceb265 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 12:30:28 -0700 Subject: [PATCH 112/166] cli: set default program for -e Do this for consistency with the other modes. Fixes: 150ee760dcd3 ("cli: add -e option to exec() code directly") Signed-off-by: Omar Sandoval --- drgn/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/drgn/cli.py b/drgn/cli.py index 38a213645..c2f89dad9 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -573,6 +573,7 @@ def _main() -> None: if args.exec: sys.path.insert(0, "") sys.argv = ["-e"] + args.script + drgn.set_default_prog(prog) exec(args.exec, default_globals(prog)) elif args.script: sys.argv = args.script From bafb43fd1e687172ec5b783de0b979cceb929898 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 12:31:41 -0700 Subject: [PATCH 113/166] cli: rename args.script to args.args With -e, args.script doesn't necessarily mean that we're running a script. That's a bug waiting to happen, so rename it and add a local variable for whether we're running a script. Signed-off-by: Omar Sandoval --- drgn/cli.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index c2f89dad9..e6c704e4f 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -498,32 +498,34 @@ def _main() -> None: help="an expression or statement to evaluate, instead of running in interactive mode", ) parser.add_argument( - "script", + "args", metavar="ARG", type=str, nargs=argparse.REMAINDER, - help="script to execute instead of running in interactive mode", + help="script to execute instead of running in interactive mode " + "(unless -e is given) and arguments to pass", ) parser.add_argument("--version", action="version", version=version) args = parser.parse_args() - if args.script and not args.exec: + script = bool(not args.exec and args.args) + if script: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with # some inscrutable syntax or encoding error, try to catch this early # and provide a helpful message. try: - script_type = _identify_script(args.script[0]) + script_type = _identify_script(args.args[0]) except OSError as e: sys.exit(str(e)) if script_type == "core": sys.exit( - f"error: {args.script[0]} is a core dump\n" - f'Did you mean "-c {args.script[0]}"?' + f"error: {args.args[0]} is a core dump\n" + f'Did you mean "-c {args.args[0]}"?' ) elif script_type == "elf": - sys.exit(f"error: {args.script[0]} is a binary, not a drgn script") + sys.exit(f"error: {args.args[0]} is a binary, not a drgn script") elif not args.exec: print(version, file=sys.stderr, flush=True) @@ -572,16 +574,16 @@ def _main() -> None: if args.exec: sys.path.insert(0, "") - sys.argv = ["-e"] + args.script + sys.argv = ["-e"] + args.args drgn.set_default_prog(prog) exec(args.exec, default_globals(prog)) - elif args.script: - sys.argv = args.script - script = args.script[0] - if pkgutil.get_importer(script) is None: - sys.path.insert(0, os.path.dirname(os.path.abspath(script))) + elif script: + sys.argv = args.args + script_path = args.args[0] + if pkgutil.get_importer(script_path) is None: + sys.path.insert(0, os.path.dirname(os.path.abspath(script_path))) drgn.set_default_prog(prog) - runpy.run_path(script, init_globals={"prog": prog}, run_name="__main__") + runpy.run_path(script_path, init_globals={"prog": prog}, run_name="__main__") else: run_interactive(prog) From 7f63f1a1bee4a455ebab8dae7d51c63d87682cfb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 13:03:19 -0700 Subject: [PATCH 114/166] cli: handle !isatty(stdin) Redirecting drgn's stdin (e.g., to a pipe or a file) is really broken: with the enhanced REPL, it fails with "termios.error: (25, 'Inappropriate ioctl for device')" and "AttributeError: 'UnixConsole' object has no attribute '_UnixConsole__buffer'". With the basic REPL, it prints ps1/ps2 prompts and expects extra newlines to terminate indented blocks. The standard Python REPL handles this by checking if stdin is a terminal. Unfortunately, code.interact()/_pyrepl don't do this, so let's do the check and exec manually. Reported-by: Stephen Brennan Reported-by: Jeff Layton Signed-off-by: Omar Sandoval --- drgn/cli.py | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index e6c704e4f..03a4ac2f1 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -14,7 +14,7 @@ import runpy import shutil import sys -from typing import Any, Callable, Dict, Optional, Tuple +from typing import IO, Any, Callable, Dict, Optional, Tuple import drgn from drgn.internal.repl import interact, readline @@ -43,6 +43,13 @@ ] +def _is_tty(file: IO[Any]) -> bool: + try: + return os.isatty(file.fileno()) + except (AttributeError, OSError): + return False + + class _LogFormatter(logging.Formatter): _LEVELS = ( (logging.DEBUG, "debug", "36"), @@ -349,7 +356,7 @@ def _load_debugging_symbols( def _main() -> None: handler = logging.StreamHandler() - color = hasattr(sys.stderr, "fileno") and os.isatty(sys.stderr.fileno()) + color = _is_tty(sys.stderr) handler.setFormatter(_LogFormatter(color)) logging.getLogger().addHandler(handler) @@ -510,6 +517,7 @@ def _main() -> None: args = parser.parse_args() script = bool(not args.exec and args.args) + interactive = bool(not args.exec and not args.args and _is_tty(sys.stdin)) if script: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with @@ -526,7 +534,7 @@ def _main() -> None: ) elif script_type == "elf": sys.exit(f"error: {args.args[0]} is a binary, not a drgn script") - elif not args.exec: + elif interactive: print(version, file=sys.stderr, flush=True) if args.log_level == "none": @@ -572,20 +580,28 @@ def _main() -> None: _load_debugging_symbols(prog, args, color) - if args.exec: - sys.path.insert(0, "") - sys.argv = ["-e"] + args.args - drgn.set_default_prog(prog) - exec(args.exec, default_globals(prog)) - elif script: - sys.argv = args.args - script_path = args.args[0] - if pkgutil.get_importer(script_path) is None: - sys.path.insert(0, os.path.dirname(os.path.abspath(script_path))) - drgn.set_default_prog(prog) - runpy.run_path(script_path, init_globals={"prog": prog}, run_name="__main__") - else: + if interactive: run_interactive(prog) + else: + drgn.set_default_prog(prog) + if script: + sys.argv = args.args + script_path = args.args[0] + if pkgutil.get_importer(script_path) is None: + sys.path.insert(0, os.path.dirname(os.path.abspath(script_path))) + runpy.run_path( + script_path, init_globals={"prog": prog}, run_name="__main__" + ) + else: + sys.path.insert(0, "") + exec_globals = default_globals(prog) + if args.exec: + sys.argv = ["-e"] + args.args + exec(args.exec, exec_globals) + else: + sys.argv = [""] + exec_globals["__file__"] = "" + exec(compile(sys.stdin.read(), "", "exec"), exec_globals) def run_interactive( From f89e3fa324a381c75bcf351a67f9dc9a9b26c101 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 14:06:42 -0700 Subject: [PATCH 115/166] tests: expand CLI tests Also cover script mode and piping in a script and check that we set __name__, __file__, sys.path, sys.argv, prog, and the default program appropriately. Signed-off-by: Omar Sandoval --- tests/test_cli.py | 59 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index d26e81bd8..bb98ce531 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,19 +4,21 @@ import subprocess import sys +import tempfile from tests import TestCase class TestCli(TestCase): - def run_cli(self, *args: str): + def run_cli(self, *args: str, **kwargs): try: return subprocess.run( [sys.executable, "-m", "drgn"] + list(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, + **kwargs, ) except subprocess.CalledProcessError as e: # With captured output, there's nothing left to debug in CI logs. @@ -25,8 +27,57 @@ def run_cli(self, *args: str): print(f"STDERR:\n{e.stderr.decode()}") raise - def test_smoke(self): + def test_e(self): + script = r""" +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert "__file__" not in globals() +assert sys.path[0] == "" +print(sys.argv) +""" + proc = self.run_cli( + "--quiet", "--pid", "0", "--no-default-symbols", "-e", script, "pass" + ) + self.assertEqual(proc.stdout, b"['-e', 'pass']\n") + + def test_script(self): + with tempfile.NamedTemporaryFile() as f: + f.write( + rb""" +assert "drgn" not in globals() + +import drgn +import os.path +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert __file__ == sys.argv[0] +assert sys.path[0] == os.path.dirname(__file__) +print(sys.argv) +""" + ) + f.flush() + proc = self.run_cli( + "--quiet", "--pid", "0", "--no-default-symbols", f.name, "pass" + ) + self.assertEqual(proc.stdout, f"[{f.name!r}, 'pass']\n".encode()) + + def test_pipe(self): + script = rb""" +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert __file__ == "" +assert sys.path[0] == "" +# Dummy if statement to test handling of multi-line blocks. +if True: + print(sys.argv) +""" proc = self.run_cli( - "--quiet", "--pid", "0", "--no-default-symbols", "-e", "print('pass')" + "--quiet", "--pid", "0", "--no-default-symbols", input=script ) - self.assertEqual(proc.stdout, b"pass\n") + self.assertEqual(proc.stdout, b"['']\n") From 64dd5c883d4196e6f3287ae47c868db3dd477c65 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 25 Mar 2025 15:22:32 -0700 Subject: [PATCH 116/166] Add Program.create_loaded_modules() This is a shortcut for `for _ in prog.loaded_modules: pass` or `list(prog.loaded_modules())`, which is used in several test cases and is occasionally useful elsewhere. Signed-off-by: Omar Sandoval --- _drgn.pyi | 20 ++++++++++++++++++-- libdrgn/debug_info.c | 23 ++++++++++++++++++----- libdrgn/drgn.h | 13 ++++++++++++- libdrgn/python/program.c | 10 ++++++++++ tests/linux_kernel/test_stack_trace.py | 2 +- tests/test_debug_info.py | 16 ++++------------ 6 files changed, 63 insertions(+), 21 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index a9c12b419..7a2f8adfb 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -702,8 +702,10 @@ class Program: def loaded_modules(self) -> Iterator[Tuple[Module, bool]]: """ - Determine what executables, libraries, etc. are loaded in the program - and create modules to represent them. + Get an iterator over executables, libraries, etc. that are loaded in + the program, creating modules to represent them. + + Modules are created lazily as items are consumed. This may automatically load some debugging information necessary to enumerate the modules. Other than that, it does not load debugging @@ -717,6 +719,20 @@ class Program: """ ... + def create_loaded_modules(self) -> None: + """ + Determine what executables, libraries, etc. are loaded in the program + and create modules to represent them. + + This is a shortcut for exhausting a :meth:`loaded_modules()` iterator. + It is equivalent to: + + .. code-block:: python3 + + for _ in prog.loaded_modules(): + pass + """ + @overload def main_module( self, name: Optional[Path] = None, *, create: Literal[False] = False diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 19f9e6cbd..6dcc71d4b 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -4832,6 +4832,24 @@ drgn_loaded_module_iterator_create(struct drgn_program *prog, return null_module_iterator_create(prog, ret); } +static inline void drgn_module_iterator_destroyp(struct drgn_module_iterator **itp) +{ + drgn_module_iterator_destroy(*itp); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_create_loaded_modules(struct drgn_program *prog) +{ + _cleanup_(drgn_module_iterator_destroyp) + struct drgn_module_iterator *it = NULL; + struct drgn_error *err = drgn_loaded_module_iterator_create(prog, &it); + if (err) + return err; + struct drgn_module *module; + while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module); + return err; +} + struct load_debug_info_file { const char *path; // We only keep this to keep load_debug_info_provided::build_id alive @@ -5185,11 +5203,6 @@ static void load_debug_info_log_missing(struct drgn_module *module, missing_debug, module->name); } -static inline void drgn_module_iterator_destroyp(struct drgn_module_iterator **itp) -{ - drgn_module_iterator_destroy(*itp); -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, size_t n, bool load_default, bool load_main) diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 57d9f0f0b..0e719b808 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1860,7 +1860,7 @@ drgn_module_iterator_program(const struct drgn_module_iterator *it); /** * Get the next module in a module iterator. * - * @param[out] ret Returned module. + * @param[out] ret Returned module, or @c NULL if there are no more modules. * @param[out] new_ret Whether the module was newly created. May be @c NULL. */ struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, @@ -1880,6 +1880,17 @@ struct drgn_error * drgn_loaded_module_iterator_create(struct drgn_program *prog, struct drgn_module_iterator **ret); +/** + * Determine what executables, libraries, etc. are loaded in the program and + * create modules to represent them. + * + * This is a shortcut for creating an iterator with @ref + * drgn_loaded_module_iterator_create() and calling @ref + * drgn_module_iterator_next() until it is exhausted. + */ +struct drgn_error * +drgn_create_loaded_modules(struct drgn_program *prog); + /** * Load debugging information for the given set of files and/or modules. * diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 91db4fac5..637a1e549 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1046,6 +1046,14 @@ static ModuleIterator *Program_loaded_modules(Program *self) return it; } +static PyObject *Program_create_loaded_modules(Program *self) +{ + struct drgn_error *err = drgn_create_loaded_modules(&self->prog); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + static PyObject *Program_main_module(Program *self, PyObject *args, PyObject *kwds) { @@ -1985,6 +1993,8 @@ static PyMethodDef Program_methods[] = { drgn_Program_modules_DOC}, {"loaded_modules", (PyCFunction)Program_loaded_modules, METH_NOARGS, drgn_Program_loaded_modules_DOC}, + {"create_loaded_modules", (PyCFunction)Program_create_loaded_modules, + METH_NOARGS, drgn_Program_create_loaded_modules_DOC}, {"main_module", (PyCFunction)Program_main_module, METH_VARARGS | METH_KEYWORDS, drgn_Program_main_module_DOC}, {"shared_library_module", (PyCFunction)Program_shared_library_module, diff --git a/tests/linux_kernel/test_stack_trace.py b/tests/linux_kernel/test_stack_trace.py index 2ba3a8055..314b5aa1f 100644 --- a/tests/linux_kernel/test_stack_trace.py +++ b/tests/linux_kernel/test_stack_trace.py @@ -100,7 +100,7 @@ def test_by_pid_builtin_orc(self): prog.load_debug_info(main=True) # Now that vmlinux is loaded, enumerate all the kernel modules so # that a drgn_module is created to hold the ORC data - list(prog.loaded_modules()) + prog.create_loaded_modules() kallsyms = load_module_kallsyms(prog) prog.register_symbol_finder("module_kallsyms", kallsyms, enable_index=1) for thread in prog.threads(): diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index ab3ed5ea7..db5ffed08 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -636,9 +636,7 @@ def _try_vdso_in_core(self, module): def test_bias(self): self.prog.set_core_dump(get_resource("crashme.core")) - - for _ in self.prog.loaded_modules(): - pass + self.prog.create_loaded_modules() with self.subTest(module="main"): module = self.prog.main_module() @@ -724,9 +722,7 @@ def test_loaded_modules_pie(self): def test_bias_pie(self): self.prog.set_core_dump(get_resource("crashme_pie.core")) - - for _ in self.prog.loaded_modules(): - pass + self.prog.create_loaded_modules() with self.subTest(module="main"): module = self.prog.main_module() @@ -784,9 +780,7 @@ def test_loaded_modules_static(self): def test_bias_static(self): self.prog.set_core_dump(get_resource("crashme_static.core")) - - for _ in self.prog.loaded_modules(): - pass + self.prog.create_loaded_modules() with self.subTest(module="main"): module = self.prog.main_module() @@ -836,9 +830,7 @@ def test_loaded_modules_static_pie(self): def test_bias_static_pie(self): self.prog.set_core_dump(get_resource("crashme_static_pie.core")) - - for _ in self.prog.loaded_modules(): - pass + self.prog.create_loaded_modules() with self.subTest(module="main"): module = self.prog.main_module() From 21cbafa47438242dfb38ff0538f9544c82f38a54 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Mar 2025 07:31:29 -0700 Subject: [PATCH 117/166] libdrgn: debug_info: improve load_debug_info() warnings for kernel Include the kernel version and silence the loaded module iterator warning. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 10 ++++++++-- libdrgn/debug_info.h | 2 ++ libdrgn/linux_kernel.c | 7 +++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 6dcc71d4b..63c1c4758 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -5198,9 +5198,14 @@ static void load_debug_info_log_missing(struct drgn_module *module, missing_debug = ""; break; } - drgn_log_warning(module->prog, "missing %s%s%s for %s", missing_loaded, + const char *name_extra = ""; + if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && drgn_module_kind(module) == DRGN_MODULE_MAIN) + name_extra = module->prog->vmcoreinfo.osrelease; + drgn_log_warning(module->prog, "missing %s%s%s for %s%s%s", missing_loaded, missing_loaded[0] && missing_debug[0] ? " and ": "", - missing_debug, module->name); + missing_debug, module->name, name_extra[0] ? " " : "", + name_extra); } LIBDRGN_PUBLIC struct drgn_error * @@ -5246,6 +5251,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, err = drgn_loaded_module_iterator_create(prog, &it); if (err) return err; + it->for_load_debug_info = true; VECTOR(drgn_module_vector, modules); struct drgn_module *module; while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module) { diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index e61563c90..c4ba60a84 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -142,6 +142,7 @@ struct drgn_module_iterator { struct drgn_program *prog; drgn_module_iterator_destroy_fn *destroy; drgn_module_iterator_next_fn *next; + bool for_load_debug_info; }; static inline void @@ -153,6 +154,7 @@ drgn_module_iterator_init(struct drgn_module_iterator *it, it->prog = prog; it->destroy = destroy; it->next = next; + it->for_load_debug_info = false; } /** Bitmask of files in a @ref drgn_module. */ diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 5c118a69d..3c3dd9202 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -2107,8 +2107,11 @@ linux_kernel_loaded_module_iterator_next(struct drgn_module_iterator *_it, } if (!prog->dbinfo.main_module || drgn_module_wants_debug_file(prog->dbinfo.main_module)) { - drgn_log_warning(prog, - "can't find loaded modules without kernel debug info"); + drgn_log(it->it.for_load_debug_info + ? DRGN_LOG_DEBUG + : DRGN_LOG_WARNING, + prog, + "can't find loaded modules without kernel debug info"); } else { drgn_log_debug(prog, "kernel does not have loadable module support"); From 50bcb65f8b69ce2c9b56371d8094b9995834e2b0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Mar 2025 07:40:10 -0700 Subject: [PATCH 118/166] libdrgn: debug_info: simplify missing debug info error return in load_debug_info() We don't need iterator_tried_missing since we can just check num_missing (renamed from num_warnings). Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 63c1c4758..88f25eb2d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -5167,9 +5167,9 @@ load_debug_info_try_provided_files(struct drgn_module *module, static void load_debug_info_log_missing(struct drgn_module *module, unsigned int max_warnings, - unsigned int *num_warnings) + unsigned int *num_missing) { - if (++(*num_warnings) > max_warnings) + if (++(*num_missing) > max_warnings) return; const char *missing_loaded = ""; if (drgn_module_loaded_file_status(module) == DRGN_MODULE_FILE_WANT) { @@ -5223,7 +5223,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, const char *env = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); unsigned int max_warnings = env ? atoi(env) : 5; - unsigned int num_warnings = 0; + unsigned int num_missing = 0; drgn_log_debug(prog, "loading %sdebugging symbols", load_default ? "default " : load_main ? "main " : ""); @@ -5289,7 +5289,6 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, struct drgn_module **wanted_modules = drgn_module_vector_begin(&modules); size_t num_wanted_modules = drgn_module_vector_size(&modules); - bool iterator_tried_missing = false; // The module iterator may have tried to load debug info, so we need to // check each module again. @@ -5312,8 +5311,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, } else if (drgn_module_wants_file(module)) { load_debug_info_log_missing(module, max_warnings, - &num_warnings); - iterator_tried_missing = true; + &num_missing); } } num_wanted_modules = new_num_wanted_modules; @@ -5371,11 +5369,11 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, for (size_t i = 0; i < num_wanted_modules; i++) { load_debug_info_log_missing(wanted_modules[i], max_warnings, - &num_warnings); + &num_missing); } - if (num_warnings > max_warnings) { + if (num_missing > max_warnings) { drgn_log_warning(prog, "... missing %u more", - num_warnings - max_warnings); + num_missing - max_warnings); } // Update the DWARF index eagerly, mostly because that's what we did @@ -5384,7 +5382,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, if (err) return err; - if (num_wanted_modules > 0 || iterator_tried_missing) { + if (num_missing > 0) { return drgn_error_create(DRGN_ERROR_MISSING_DEBUG_INFO, "missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html"); } From 12b789e7b2ce4fea5247763989d2370d91db4c12 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 26 Mar 2025 09:17:04 -0700 Subject: [PATCH 119/166] cli: log missing main debug info as critical Instead of manually bolding the warning, always bold critical messages and log as critical or warning depending on whether we're missing the main debug info. Signed-off-by: Omar Sandoval --- drgn/cli.py | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index 03a4ac2f1..27eb04194 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -52,36 +52,37 @@ def _is_tty(file: IO[Any]) -> bool: class _LogFormatter(logging.Formatter): _LEVELS = ( - (logging.DEBUG, "debug", "36"), - (logging.INFO, "info", "32"), - (logging.WARNING, "warning", "33"), - (logging.ERROR, "error", "31"), - (logging.CRITICAL, "critical", "31;1"), + (logging.DEBUG, "debug", "\033[36m", "\033[m", ""), + (logging.INFO, "info", "\033[32m", "\033[m", ""), + (logging.WARNING, "warning", "\033[33m", "\033[m", ""), + (logging.ERROR, "error", "\033[31m", "\033[m", ""), + (logging.CRITICAL, "critical", "\033[31;1m", "\033[0;1m", "\033[m"), ) def __init__(self, color: bool) -> None: if color: - level_prefixes = { - level: f"\033[{level_color}m{level_name}:\033[0m" - for level, level_name, level_color in self._LEVELS + levels = { + level: (f"{level_prefix}{level_name}:{message_prefix}", message_suffix) + for level, level_name, level_prefix, message_prefix, message_suffix in self._LEVELS } else: - level_prefixes = { - level: f"{level_name}:" for level, level_name, _ in self._LEVELS + levels = { + level: (f"{level_name}:", "") + for level, level_name, _, _, _ in self._LEVELS } default_prefix = "%(levelname)s:" self._drgn_formatters = { - level: logging.Formatter(f"{prefix} %(message)s") - for level, prefix in level_prefixes.items() + level: logging.Formatter(f"{prefix} %(message)s{suffix}") + for level, (prefix, suffix) in levels.items() } self._default_drgn_formatter = logging.Formatter( f"{default_prefix} %(message)s" ) self._other_formatters = { - level: logging.Formatter(f"{prefix}%(name)s: %(message)s") - for level, prefix in level_prefixes.items() + level: logging.Formatter(f"{prefix}%(name)s: %(message)s{suffix}") + for level, (prefix, suffix) in levels.items() } self._default_other_formatter = logging.Formatter( f"{default_prefix}%(name)s: %(message)s" @@ -270,9 +271,7 @@ class _NoSymbolsByAction(_TrySymbolsByBaseAction): _enable = False -def _load_debugging_symbols( - prog: drgn.Program, args: argparse.Namespace, color: bool -) -> None: +def _load_debugging_symbols(prog: drgn.Program, args: argparse.Namespace) -> None: enable_debug_info_finders = getattr(args, "enable_debug_info_finders", ()) disable_debug_info_finders = getattr(args, "disable_debug_info_finders", ()) if enable_debug_info_finders or disable_debug_info_finders: @@ -344,7 +343,17 @@ def _load_debugging_symbols( try: prog.load_debug_info(args.symbols, **args.default_symbols) except drgn.MissingDebugInfoError as e: - logger.warning("\033[1m%s\033[m" if color else "%s", e) + if args.default_symbols.get("main"): + try: + main_module = prog.main_module() + critical = ( + main_module.wants_debug_file() or main_module.wants_loaded_file() + ) + except LookupError: + critical = True + else: + critical = False + logger.log(logging.CRITICAL if critical else logging.WARNING, "%s", e) if args.extra_symbols: for extra_symbol_path in args.extra_symbols: @@ -578,7 +587,7 @@ def _main() -> None: # E.g., "not an ELF core file" sys.exit(f"error: {e}") - _load_debugging_symbols(prog, args, color) + _load_debugging_symbols(prog, args) if interactive: run_interactive(prog) From 9033fb381714e2f7533c84c5af95307f923e4498 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Mar 2025 12:58:17 -0700 Subject: [PATCH 120/166] drgn.helpers.experimental.kmodify: add wake_up_process() example to call_function() docstring wake_up_process() is one of the best use cases for call_function(), so use it as the first example. Signed-off-by: Omar Sandoval --- drgn/helpers/experimental/kmodify.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drgn/helpers/experimental/kmodify.py b/drgn/helpers/experimental/kmodify.py index 2d02c4ce1..2c76c9a00 100644 --- a/drgn/helpers/experimental/kmodify.py +++ b/drgn/helpers/experimental/kmodify.py @@ -1157,6 +1157,12 @@ def call_function(prog: Program, func: Union[str, Object], *args: Any) -> Object """ Call a function in the kernel. + >>> task = find_task(99) + >>> if task: + ... call_function("wake_up_process", task) + ... + (int)1 + Arguments can be either :class:`~drgn.Object`\\ s or Python values. The function return value is returned as an :class:`~drgn.Object`: From c8c7c17d2e5e7e3d36df4a3430da7728c9dd1296 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 1 Apr 2025 15:03:10 -0700 Subject: [PATCH 121/166] README: clean up and update installation instructions for libdebuginfod Signed-off-by: Omar Sandoval --- README.rst | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/README.rst b/README.rst index b1cabf270..9ccc576b9 100644 --- a/README.rst +++ b/README.rst @@ -70,13 +70,13 @@ drgn can be installed using the package manager on some Linux distributions. :target: https://repology.org/project/drgn/versions :alt: Packaging Status -* Fedora >= 32 +* Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console $ sudo dnf install drgn -* RHEL/CentOS >= 8 +* RHEL/CentOS < 9 `Enable EPEL `_. Then: @@ -86,14 +86,14 @@ drgn can be installed using the package manager on some Linux distributions. * Oracle Linux >= 8 - Enable the ``ol8_addons`` or ``ol9_addons`` repository and install drgn: + Enable the ``ol8_addons`` or ``ol9_addons`` repository. Then: .. code-block:: console $ sudo dnf config-manager --enable ol8_addons # OR: ol9_addons $ sudo dnf install drgn - Drgn is also available for Python versions in application streams. For + drgn is also available for Python versions in application streams. For example, use ``dnf install python3.12-drgn`` to install drgn for Python 3.12. See the documentation for drgn in `Oracle Linux 9 `_ @@ -101,17 +101,20 @@ drgn can be installed using the package manager on some Linux distributions. `_ for more information. -* Arch Linux +* Debian >= 12 (Bookworm)/Ubuntu >= 24.04 (Noble Numbat) .. code-block:: console - $ sudo pacman -S drgn + $ sudo apt install python3-drgn -* Debian >= 12 (Bookworm) + To get the latest version on Ubuntu, enable the `michel-slm/kernel-utils PPA + `_ first. + +* Arch Linux .. code-block:: console - $ sudo apt install python3-drgn + $ sudo pacman -S drgn * Gentoo @@ -125,19 +128,6 @@ drgn can be installed using the package manager on some Linux distributions. $ sudo zypper install python3-drgn -* Ubuntu - - All supported Ubuntu releases except for 22.04 (jammy) ships with drgn - but generally the version that - was in Debian unstable at the time that Ubuntu release is branched. - - To get the latest version, including on jammy, enable the `michel-slm/kernel-utils PPA `_. - - To install drgn itself, with or without the PPA: - - .. code-block:: console - - $ sudo apt install python3-drgn - pip ^^^ @@ -166,13 +156,13 @@ From Source To get the development version of drgn, you will need to build it from source. First, install dependencies: -* Fedora +* Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console - $ sudo dnf install autoconf automake check-devel elfutils-devel gcc git libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo dnf install autoconf automake check-devel elfutils-debuginfod-client-devel elfutils-devel gcc git libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools -* RHEL/CentOS/Oracle Linux +* RHEL/CentOS < 9, Oracle Linux .. code-block:: console @@ -195,10 +185,11 @@ First, install dependencies: .. code-block:: console - $ sudo apt install autoconf automake check gcc git liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + $ sudo apt install autoconf automake check gcc git libdebuginfod-dev libkdumpfile-dev liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev - Optionally, install libkdumpfile from source if you want support for the - makedumpfile format. + On Debian <= 11 (Bullseye) and Ubuntu <= 22.04 (Jammy Jellyfish), + ``libkdumpfile-dev`` is not available, so you must install libkdumpfile from + source if you want support for the makedumpfile format. * Arch Linux @@ -216,7 +207,7 @@ First, install dependencies: .. code-block:: console - $ sudo zypper install autoconf automake check-devel gcc git libdw-devel libelf-devel libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo zypper install autoconf automake check-devel gcc git libdebuginfod-devel libdw-devel libelf-devel libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools Then, run: From 75d3ab660d98470254ade8975e813dcd2dfea4b4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Apr 2025 00:50:24 -0700 Subject: [PATCH 122/166] libdrgn: linux_kernel: disable debuginfod except on Fedora My debuginfod optimizations for the Linux kernel haven't been deployed anywhere other than Fedora's debuginfod servers. On other servers, it's better to not even try since downloads will take forever. Do a sketchy check to determine whether the kernel is from Fedora and disable the debuginfod finder if not. It can still be reenabled manually with --try-symbols-by debuginfod or by a plugin. Signed-off-by: Omar Sandoval --- libdrgn/handler.c | 28 ++++++++++++++++++++++++++++ libdrgn/handler.h | 3 +++ libdrgn/linux_kernel.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/libdrgn/handler.c b/libdrgn/handler.c index a0ba9d1c4..2285b9d1c 100644 --- a/libdrgn/handler.c +++ b/libdrgn/handler.c @@ -133,3 +133,31 @@ struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, *count_ret = n; return NULL; } + +bool drgn_handler_list_disable(struct drgn_handler_list *list, + const char *name) +{ + // Find an enabled handler with the given name. + struct drgn_handler **handlerp = &list->head; + struct drgn_handler *handler = list->head; + for (;;) { + if (!handler || !handler->enabled) + return false; + if (strcmp(handler->name, name) == 0) + break; + handlerp = &handler->next; + handler = handler->next; + } + + // Disable the handler. + handler->enabled = false; + + // Move it to the appropriate part of the list (after all enabled + // handlers). + *handlerp = handler->next; + while (*handlerp && (*handlerp)->enabled) + handlerp = &(*handlerp)->next; + handler->next = *handlerp; + *handlerp = handler; + return true; +} diff --git a/libdrgn/handler.h b/libdrgn/handler.h index c73b9f2cf..630583039 100644 --- a/libdrgn/handler.h +++ b/libdrgn/handler.h @@ -51,6 +51,9 @@ struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, const char ***names_ret, size_t *count_ret); +bool drgn_handler_list_disable(struct drgn_handler_list *list, + const char *name); + static inline bool drgn_handler_is_last_enabled(struct drgn_handler *handler) { return handler->enabled && (!handler->next || !handler->next->enabled); diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 3c3dd9202..5147ef057 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1,6 +1,7 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later +#include #include #include #include @@ -386,6 +387,24 @@ static struct drgn_error *linux_kernel_get_vmemmap(struct drgn_program *prog, #include "linux_kernel_object_find.inc" // IWYU pragma: keep +// Return whether the given kernel is from Fedora. We check whether the release +// matches the regular expression /.fc[0-9]+(.|$)/ +static bool is_fedora_kernel(const char *osrelease) +{ + const char *p = osrelease; + while ((p = strstr(p, ".fc"))) { + p += sizeof(".fc") - 1; + if (isdigit(*p)) { + do { + p++; + } while (isdigit(*p)); + if (*p == '.' || *p == '\0') + return true; + } + } + return false; +} + struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) { struct drgn_error *err; @@ -397,6 +416,15 @@ struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) return err; if (!prog->lang) prog->lang = &drgn_language_c; + + // At the time of writing, only Fedora's debuginfod server provides fast + // Linux kernel downloads. It's painfully slow everywhere else, so + // disable it. + if (!is_fedora_kernel(prog->vmcoreinfo.osrelease) + && drgn_handler_list_disable(&prog->dbinfo.debug_info_finders, + "debuginfod")) + drgn_log_debug(prog, "disabled debuginfod for Linux kernel"); + return NULL; } From d03bbe582160e5c4ad2d93d8fa18ad536f6fdd59 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Apr 2025 01:09:07 -0700 Subject: [PATCH 123/166] cli: include debuginfod support status in version banner Signed-off-by: Omar Sandoval --- _drgn.pyi | 2 ++ drgn/__init__.py | 2 ++ drgn/cli.py | 5 ++++- libdrgn/debug_info.c | 7 +------ libdrgn/debug_info.h | 16 ++++++++++++++++ libdrgn/python/main.c | 19 +++++++++++++++++++ 6 files changed, 44 insertions(+), 7 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 7a2f8adfb..5daafd8e1 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -3693,6 +3693,8 @@ class OutOfBoundsError(Exception): ... _elfutils_version: str +_have_debuginfod: bool +_enable_dlopen_debuginfod: bool _with_libkdumpfile: bool def _linux_helper_direct_mapping_offset(__prog: Program) -> int: ... diff --git a/drgn/__init__.py b/drgn/__init__.py index be5f2ec0f..c2b214e43 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -111,6 +111,8 @@ # isort: split from _drgn import ( # noqa: F401 _elfutils_version as _elfutils_version, + _enable_dlopen_debuginfod as _enable_dlopen_debuginfod, + _have_debuginfod as _have_debuginfod, _with_libkdumpfile as _with_libkdumpfile, ) from drgn.internal.version import __version__ as __version__ # noqa: F401 diff --git a/drgn/cli.py b/drgn/cli.py index 27eb04194..c36aa1f97 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -110,8 +110,11 @@ def version_header() -> str: calling :func:`run_interactive()`. """ python_version = ".".join(str(v) for v in sys.version_info[:3]) + debuginfod = f'with{"" if drgn._have_debuginfod else "out"} debuginfod' + if drgn._enable_dlopen_debuginfod: + debuginfod += " (dlopen)" libkdumpfile = f'with{"" if drgn._with_libkdumpfile else "out"} libkdumpfile' - return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" + return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {debuginfod}, {libkdumpfile})" def default_globals(prog: drgn.Program) -> Dict[str, Any]: diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 88f25eb2d..6d17d104d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -120,7 +120,7 @@ static void drgn_dlopen_debuginfod(void) } } -static inline bool drgn_have_debuginfod(void) +bool drgn_have_debuginfod(void) { return drgn_debuginfod_begin != NULL; } @@ -129,11 +129,6 @@ static inline bool drgn_have_debuginfod(void) #define X(name) static const typeof(&name) drgn_##name = name; DRGN_DEBUGINFOD_FUNCTIONS #undef X - -static inline bool drgn_have_debuginfod(void) -{ - return true; -} #endif #undef DRGN_DEBUGINFOD_FUNCTIONS diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index c4ba60a84..c49b48f66 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -47,6 +47,22 @@ struct drgn_elf_file; * @{ */ +#if WITH_DEBUGINFOD +#if ENABLE_DLOPEN_DEBUGINFOD +bool drgn_have_debuginfod(void); +#else +static inline bool drgn_have_debuginfod(void) +{ + return true; +} +#endif +#else +static inline bool drgn_have_debuginfod(void) +{ + return false; +} +#endif + DEFINE_HASH_TABLE_TYPE(drgn_elf_file_dwarf_table, struct drgn_elf_file *); DEFINE_HASH_TABLE_TYPE(drgn_module_table, struct drgn_module *); DEFINE_BINARY_SEARCH_TREE_TYPE(drgn_module_address_tree, struct drgn_module); diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index 7f45c876b..8cc49de30 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -344,6 +344,25 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) dwfl_version(NULL))) goto err; + PyObject *have_debuginfod = PyBool_FromLong(drgn_have_debuginfod()); + if (PyModule_AddObject(m, "_have_debuginfod", have_debuginfod)) { + Py_XDECREF(have_debuginfod); + goto err; + } + + PyObject *enable_dlopen_debuginfod; +#if ENABLE_DLOPEN_DEBUGINFOD + enable_dlopen_debuginfod = Py_True; +#else + enable_dlopen_debuginfod = Py_False; +#endif + Py_INCREF(enable_dlopen_debuginfod); + if (PyModule_AddObject(m, "_enable_dlopen_debuginfod", + enable_dlopen_debuginfod)) { + Py_DECREF(enable_dlopen_debuginfod); + goto err; + } + PyObject *with_libkdumpfile; #ifdef WITH_LIBKDUMPFILE with_libkdumpfile = Py_True; From 38f40b3658ad8a2115e110293c788a6d4a18e595 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 3 Apr 2025 15:22:54 -0700 Subject: [PATCH 124/166] CI: install pyroute2 < 0.9.1 for Python < 3.9. pyroute 0.9.1 dropped support for Python < 3.9. On Python 3.6 and 3.7, it fails with an ImportError from typing that the try/except in tests/linux_kernel/helpers/test_tc.py catches by accident, but on 3.8 it fails with a TypeError: 'type' object is not subscriptable. Fix it by installing an older version of pyroute2 on those Python versions. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3def0a2d4..f03b4eaf8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,7 +65,11 @@ jobs: run: | sudo apt-get update -y sudo apt-get install -y btrfs-progs check dwarves libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} - pip install pyroute2 setuptools ${USE_PRE_COMMIT/1/pre-commit} + # pyroute2 0.9.1 dropped support for Python < 3.9. + if [[ "${{ matrix.python-version }}" =~ ^3\.[678]$ ]]; then + pyroute2_version="<0.9.1" + fi + pip install "pyroute2$pyroute2_version" setuptools ${USE_PRE_COMMIT/1/pre-commit} - name: Generate version.py run: python setup.py --version - name: Check with mypy From c78289300dd7e7a18e0e2930a5ced5a8c8fe1d8d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Apr 2025 14:12:40 -0700 Subject: [PATCH 125/166] Add absence reason to absent objects An object can be absent for multiple reasons: e.g., the object is a placeholder, it was optimized out, or its value is represented using a DWARF opcode we don't support. We print all of these as "", which is technically correct but criminally vague. Add an absence reason to struct drgn_object/drgn.Object. For now, this only has one possible value, "OTHER", but it will be made more specific in follow-up commits. Signed-off-by: Omar Sandoval --- _drgn.pyi | 20 ++++++++++-- docs/api_reference.rst | 1 + drgn/__init__.py | 2 ++ libdrgn/build-aux/gen_constants.py | 1 + libdrgn/drgn.h | 10 ++++++ libdrgn/dwarf_info.c | 16 +++++++--- libdrgn/language_c.c | 3 +- libdrgn/object.c | 4 ++- libdrgn/object.h | 4 ++- libdrgn/python/drgnpy.h | 2 ++ libdrgn/python/object.c | 50 +++++++++++++++++++++++++----- libdrgn/python/type.c | 3 +- libdrgn/python/util.c | 12 +++++++ tests/__init__.py | 3 ++ tests/test_object.py | 42 ++++++++++++++++++++++++- 15 files changed, 152 insertions(+), 21 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 5daafd8e1..9c0fbe625 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2322,6 +2322,7 @@ class Object: prog: Program, type: Union[str, Type], *, + absence_reason: AbsenceReason = AbsenceReason.OTHER, bit_field_size: Optional[IntegerLike] = None, ) -> None: """Create an absent object.""" @@ -2332,6 +2333,12 @@ class Object: type_: Final[Type] """Type of this object.""" + address_: Final[Optional[int]] + """ + Address of this object if it is a reference, ``None`` if it is a value or + absent. + """ + absent_: Final[bool] """ Whether this object is absent. @@ -2340,10 +2347,11 @@ class Object: an invalid address). """ - address_: Final[Optional[int]] + absence_reason_: Final[Optional[AbsenceReason]] """ - Address of this object if it is a reference, ``None`` if it is a value or - absent. + Reason that this object is absent. + + This is ``None`` for all values and references. """ bit_offset_: Final[Optional[int]] @@ -2666,6 +2674,12 @@ class Object: def __ceil__(self) -> int: ... def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... +class AbsenceReason(enum.Enum): + """Reason an object is :ref:absent `.""" + + OTHER = ... + """Another reason not listed below.""" + def NULL(prog: Program, type: Union[str, Type]) -> Object: """ Get an object representing ``NULL`` casted to the given type. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 98463c8fe..357bbcbc4 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -100,6 +100,7 @@ Objects ------- .. drgndoc:: Object +.. drgndoc:: AbsenceReason .. drgndoc:: NULL .. drgndoc:: cast .. drgndoc:: implicit_convert diff --git a/drgn/__init__.py b/drgn/__init__.py index c2b214e43..cc58a9ea8 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -48,6 +48,7 @@ from _drgn import ( NULL, + AbsenceReason, Architecture, DebugInfoOptions, ExtraModule, @@ -118,6 +119,7 @@ from drgn.internal.version import __version__ as __version__ # noqa: F401 __all__ = ( + "AbsenceReason", "Architecture", "DebugInfoOptions", "ExtraModule", diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 8232bfc71..008ece802 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -14,6 +14,7 @@ class ConstantClass(NamedTuple): CONSTANTS = ( + ConstantClass("AbsenceReason", "Enum", r"DRGN_ABSENCE_REASON_([a-zA-Z0-9_]+)"), ConstantClass("Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)"), ConstantClass("FindObjectFlags", "Flag", r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)"), ConstantClass("KmodSearchMethod", "Enum", r"DRGN_KMOD_SEARCH_([a-zA-Z0-9_]+)"), diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 0e719b808..bfe76ee9c 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -2189,6 +2189,12 @@ static inline bool drgn_value_is_inline(uint64_t bits) return bits <= CHAR_BIT * sizeof(((union drgn_value *)0)->ibuf); } +/** Reason object is absent. */ +enum drgn_absence_reason { + /** Another reason not listed below. */ + DRGN_ABSENCE_REASON_OTHER, +}; + /** * Object in a program. * @@ -2245,6 +2251,8 @@ struct drgn_object { union drgn_value value; /** Address of reference object. */ uint64_t address; + /** Reason object is absent. */ + enum drgn_absence_reason absence_reason; }; }; @@ -2435,6 +2443,7 @@ drgn_object_set_reference(struct drgn_object *res, * * @param[out] res Object to set. * @param[in] qualified_type Type to set to. + * @param[in] reason Reason object is absent. * @param[in] bit_field_size If the object should be a bit field, its size in * bits. Otherwise, 0. * @return @c NULL on success, non-@c NULL on error. @@ -2442,6 +2451,7 @@ drgn_object_set_reference(struct drgn_object *res, struct drgn_error * drgn_object_set_absent(struct drgn_object *res, struct drgn_qualified_type qualified_type, + enum drgn_absence_reason reason, uint64_t bit_field_size); /** diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index e6ee2b128..2e04337b4 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -4655,8 +4655,10 @@ drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, if (err) return err; Dwarf_Addr low_pc; - if (dwarf_lowpc(die, &low_pc) == -1) - return drgn_object_set_absent(ret, qualified_type, 0); + if (dwarf_lowpc(die, &low_pc) == -1) { + return drgn_object_set_absent(ret, qualified_type, + DRGN_ABSENCE_REASON_OTHER, 0); + } return drgn_object_set_reference(ret, qualified_type, low_pc + file->module->debug_file_bias, 0, 0); @@ -5008,7 +5010,8 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_template_value_parameter is missing value"); } - drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + drgn_object_set_absent_internal(ret, &type, + DRGN_ABSENCE_REASON_OTHER); err = NULL; } else if (bit_offset >= 0) { err = drgn_object_set_reference_internal(ret, &type, address, @@ -5456,6 +5459,7 @@ drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) } err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, bit_field_size); if (err) return err; @@ -5723,7 +5727,8 @@ drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) if (err) return err; - err = drgn_object_set_absent(res, qualified_type, 0); + err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return err; } @@ -6312,7 +6317,8 @@ drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) if (err) return err; - err = drgn_object_set_absent(res, qualified_type, 0); + err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return err; } diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 94f4851ab..fb106a006 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -3398,7 +3398,8 @@ static struct drgn_error *c_op_cast(struct drgn_object *res, switch (drgn_type_kind(type.underlying_type)) { case DRGN_TYPE_VOID: - drgn_object_set_absent_internal(res, &type); + drgn_object_set_absent_internal(res, &type, + DRGN_ABSENCE_REASON_OTHER); return NULL; case DRGN_TYPE_BOOL: { bool truthy; diff --git a/libdrgn/object.c b/libdrgn/object.c index ee1b38f7b..d74680b67 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -23,6 +23,7 @@ .type = &(prog)->void_types[DRGN_LANGUAGE_C], \ .encoding = DRGN_OBJECT_ENCODING_NONE, \ .kind = DRGN_OBJECT_ABSENT, \ + .absence_reason = DRGN_ABSENCE_REASON_OTHER, \ } LIBDRGN_PUBLIC @@ -498,6 +499,7 @@ drgn_object_set_reference(struct drgn_object *res, LIBDRGN_PUBLIC struct drgn_error * drgn_object_set_absent(struct drgn_object *res, struct drgn_qualified_type qualified_type, + enum drgn_absence_reason reason, uint64_t bit_field_size) { struct drgn_error *err; @@ -505,7 +507,7 @@ drgn_object_set_absent(struct drgn_object *res, err = drgn_object_type(qualified_type, bit_field_size, &type); if (err) return err; - drgn_object_set_absent_internal(res, &type); + drgn_object_set_absent_internal(res, &type, reason); return NULL; } diff --git a/libdrgn/object.h b/libdrgn/object.h index 5b9c20547..d1a7c688d 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -203,9 +203,11 @@ drgn_object_set_reference_internal(struct drgn_object *res, */ static inline void drgn_object_set_absent_internal(struct drgn_object *res, - const struct drgn_object_type *type) + const struct drgn_object_type *type, + enum drgn_absence_reason reason) { drgn_object_reinit(res, type, DRGN_OBJECT_ABSENT); + res->absence_reason = reason; } struct drgn_error * diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 16be14997..5eb59975a 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -284,6 +284,7 @@ typedef struct { PyObject *is_default; } TypeTemplateParameter; +extern PyObject *AbsenceReason_class; extern PyObject *Architecture_class; extern PyObject *FindObjectFlags_class; extern PyObject *KmodSearchMethod_class; @@ -427,6 +428,7 @@ int append_string(PyObject *parts, const char *s); int append_u64_hex(PyObject *parts, uint64_t value); int append_format(PyObject *parts, const char *format, ...); int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name); +int append_attr_str(PyObject *parts, PyObject *obj, const char *attr_name); PyObject *join_strings(PyObject *parts); // Implementation of _repr_pretty_() for IPython/Jupyter that just calls str(). PyObject *repr_pretty_from_str(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 302a8f8b0..4cc9d8e45 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -365,21 +365,28 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "prog", "type", "value", "address", "bit_offset", - "bit_field_size", NULL, + "prog", "type", "value", "address", "absence_reason", + "bit_offset", "bit_field_size", NULL, }; struct drgn_error *err; Program *prog; PyObject *type_obj = Py_None, *value_obj = Py_None; struct index_arg address = { .allow_none = true, .is_none = true }; + struct enum_arg absence_reason = { + .type = AbsenceReason_class, + // Sentinel value so we can tell when the argument was passed. + .value = ULONG_MAX, + }; struct index_arg bit_offset = { .allow_none = true, .is_none = true }; struct index_arg bit_field_size = { .allow_none = true, .is_none = true }; struct drgn_qualified_type qualified_type; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|OO$O&O&O&:Object", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|OO$O&O&O&O&:Object", keywords, &Program_type, &prog, &type_obj, &value_obj, index_converter, - &address, index_converter, &bit_offset, - index_converter, &bit_field_size)) + &address, enum_converter, + &absence_reason, index_converter, + &bit_offset, index_converter, + &bit_field_size)) return NULL; if (Program_type_arg(prog, type_obj, true, &qualified_type) == -1) @@ -394,9 +401,17 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, _cleanup_pydecref_ DrgnObject *obj = DrgnObject_alloc(prog); if (!obj) return NULL; - if (!address.is_none && value_obj != Py_None) { - PyErr_SetString(PyExc_ValueError, - "object cannot have address and value"); + if (!address.is_none + + (value_obj != Py_None) + + (absence_reason.value != ULONG_MAX) > 1) { + PyErr_Format(PyExc_ValueError, + "object cannot have %s and %s", + !address.is_none + ? (value_obj != Py_None + && absence_reason.value != ULONG_MAX) + ? "address, value," : "address" : "value", + absence_reason.value != ULONG_MAX + ? "absence reason" : "value"); return NULL; } else if (!address.is_none) { if (!qualified_type.type) { @@ -532,6 +547,9 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, return NULL; } err = drgn_object_set_absent(&obj->obj, qualified_type, + absence_reason.value == ULONG_MAX + ? DRGN_ABSENCE_REASON_OTHER + : absence_reason.value, bit_field_size.uvalue); } if (err) @@ -872,6 +890,12 @@ static PyObject *DrgnObject_repr(DrgnObject *self) break; } case DRGN_OBJECT_ABSENT: + if (self->obj.absence_reason != DRGN_ABSENCE_REASON_OTHER) { + if (append_format(parts, ", absence_reason=") < 0 + || append_attr_str(parts, (PyObject *)self, + "absence_reason_") < 0) + return NULL; + } break; default: UNREACHABLE(); @@ -1001,6 +1025,14 @@ static PyObject *DrgnObject_get_absent(DrgnObject *self, void *arg) Py_RETURN_BOOL(self->obj.kind == DRGN_OBJECT_ABSENT); } +static PyObject *DrgnObject_get_absence_reason(DrgnObject *self, void *arg) +{ + if (self->obj.kind != DRGN_OBJECT_ABSENT) + Py_RETURN_NONE; + return PyObject_CallFunction(AbsenceReason_class, "i", + (int)self->obj.absence_reason); +} + static PyObject *DrgnObject_get_address(DrgnObject *self, void *arg) { if (self->obj.kind == DRGN_OBJECT_REFERENCE) @@ -1535,6 +1567,8 @@ static PyGetSetDef DrgnObject_getset[] = { {"type_", (getter)DrgnObject_get_type, NULL, drgn_Object_type__DOC}, {"absent_", (getter)DrgnObject_get_absent, NULL, drgn_Object_absent__DOC}, + {"absence_reason_", (getter)DrgnObject_get_absence_reason, NULL, + drgn_Object_absence_reason__DOC}, {"address_", (getter)DrgnObject_get_address, NULL, drgn_Object_address__DOC}, {"bit_offset_", (getter)DrgnObject_get_bit_offset, NULL, diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index b073b9b56..a68bdd301 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -829,7 +829,8 @@ static DrgnObject *DrgnType_to_absent_DrgnObject(DrgnType *type) if (!obj) return NULL; struct drgn_error *err = - drgn_object_set_absent(&obj->obj, DrgnType_unwrap(type), 0); + drgn_object_set_absent(&obj->obj, DrgnType_unwrap(type), + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return set_drgn_error(err); return_ptr(obj); diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index 2096635a0..ac087aa72 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -53,6 +53,18 @@ int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name) return PyList_Append(parts, str); } +int append_attr_str(PyObject *parts, PyObject *obj, const char *attr_name) +{ + _cleanup_pydecref_ PyObject *attr = + PyObject_GetAttrString(obj, attr_name); + if (!attr) + return -1; + _cleanup_pydecref_ PyObject *str = PyObject_Str(attr); + if (!str) + return -1; + return PyList_Append(parts, str); +} + PyObject *join_strings(PyObject *parts) { _cleanup_pydecref_ PyObject *sep = PyUnicode_New(0, 0); diff --git a/tests/__init__.py b/tests/__init__.py index e5619e5ef..2b646acc0 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,6 +11,7 @@ from unittest.mock import Mock from drgn import ( + AbsenceReason, Architecture, FindObjectFlags, Language, @@ -126,6 +127,7 @@ def assertReprPrettyEqualsStr(obj): _IDENTICAL_EQ_TYPES = ( type(None), + AbsenceReason, Language, PrimitiveType, Program, @@ -199,6 +201,7 @@ def _identical(a, b): "prog_", "type_", "address_", + "absence_reason_", "bit_offset_", "bit_field_size_", ), diff --git a/tests/test_object.py b/tests/test_object.py index 5c21eb422..7a111a70d 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -6,6 +6,7 @@ import struct from drgn import ( + AbsenceReason, FaultError, Object, ObjectAbsentError, @@ -43,7 +44,7 @@ def test_type(self): ValueError, "absent object must have type", Object, self.prog ) - def test_address_nand_value(self): + def test_address_value_absence_reason_nand(self): self.assertRaisesRegex( ValueError, "object cannot have address and value", @@ -62,6 +63,34 @@ def test_address_nand_value(self): value=0, address=0, ) + self.assertRaisesRegex( + ValueError, + "object cannot have address and absence reason", + Object, + self.prog, + "int", + address=0, + absence_reason=AbsenceReason.OTHER, + ) + self.assertRaisesRegex( + ValueError, + "object cannot have value and absence reason", + Object, + self.prog, + "int", + value=0, + absence_reason=AbsenceReason.OTHER, + ) + self.assertRaisesRegex( + ValueError, + "object cannot have address, value, and absence reason", + Object, + self.prog, + "int", + value=0, + address=0, + absence_reason=AbsenceReason.OTHER, + ) def test_integer_address(self): self.assertRaises(TypeError, Object, self.prog, "int", address="NULL") @@ -644,6 +673,7 @@ def test_signed(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("int")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -677,6 +707,7 @@ def test_unsigned(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("unsigned int")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -766,6 +797,7 @@ def test_signed_big(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.int_type("__int128", 16, True)) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -799,6 +831,7 @@ def test_unsigned_big(self): obj.type_, self.prog.int_type("unsigned __int128", 16, False) ) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -863,6 +896,7 @@ def test_float(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("double")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 3.14) self.assertEqual(repr(obj), "Object(prog, 'double', value=3.14)") @@ -1118,6 +1152,7 @@ def truncate(x, bit_size): def test_pointer(self): obj = Object(self.prog, "int *", value=0xFFFF0000) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'int *', value=0xffff0000)") @@ -1129,6 +1164,7 @@ def test_pointer_typedef(self): value=0xFFFF0000, ) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'INTP', value=0xffff0000)") @@ -1136,6 +1172,7 @@ def test_pointer_typedef(self): def test_array(self): obj = Object(self.prog, "int [2]", value=[1, 2]) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIdentical(obj[0], Object(self.prog, "int", value=1)) @@ -1215,6 +1252,9 @@ def test_basic(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("int")) self.assertTrue(obj.absent_) + self.assertEqual( + Object(self.prog, "int").absence_reason_, AbsenceReason.OTHER + ) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) From 2d25e2249aa90bd0ca4702a028cd82397f8f770d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Apr 2025 15:47:05 -0700 Subject: [PATCH 126/166] Add OPTIMIZED_OUT absence reason and use it for DWARF The most common ways users will run into absent objects are optimized out local variables and inlined functions. Add an absence reason for this and use it for DWARF, then use it to display such objects with more familiar terminology (""). We might want to distinguish between truly optimized out values and unsaved registers, but we can improve that later. See #488. Signed-off-by: Omar Sandoval --- _drgn.pyi | 2 ++ libdrgn/drgn.h | 2 ++ libdrgn/dwarf_info.c | 5 +++-- libdrgn/language_c.c | 14 +++++++++++++- tests/test_dwarf.py | 38 +++++++++++++++++++++++++++++++------- tests/test_language_c.py | 7 +++++++ tests/test_object.py | 7 +++++++ 7 files changed, 65 insertions(+), 10 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 9c0fbe625..28f7c0203 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2679,6 +2679,8 @@ class AbsenceReason(enum.Enum): OTHER = ... """Another reason not listed below.""" + OPTIMIZED_OUT = ... + """Object was optimized out by the compiler.""" def NULL(prog: Program, type: Union[str, Type]) -> Object: """ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index bfe76ee9c..52c80aa1d 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -2193,6 +2193,8 @@ static inline bool drgn_value_is_inline(uint64_t bits) enum drgn_absence_reason { /** Another reason not listed below. */ DRGN_ABSENCE_REASON_OTHER, + /** Object was optimized out by the compiler. */ + DRGN_ABSENCE_REASON_OPTIMIZED_OUT, }; /** diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 2e04337b4..3a8567afa 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -4657,7 +4657,8 @@ drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, Dwarf_Addr low_pc; if (dwarf_lowpc(die, &low_pc) == -1) { return drgn_object_set_absent(ret, qualified_type, - DRGN_ABSENCE_REASON_OTHER, 0); + DRGN_ABSENCE_REASON_OPTIMIZED_OUT, + 0); } return drgn_object_set_reference(ret, qualified_type, low_pc + file->module->debug_file_bias, @@ -5011,7 +5012,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, "DW_AT_template_value_parameter is missing value"); } drgn_object_set_absent_internal(ret, &type, - DRGN_ABSENCE_REASON_OTHER); + DRGN_ABSENCE_REASON_OPTIMIZED_OUT); err = NULL; } else if (bit_offset >= 0) { err = drgn_object_set_reference_internal(ret, &type, address, diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index fb106a006..5cf07667f 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1536,6 +1536,17 @@ c_format_function_object(const struct drgn_object *obj, return NULL; } +static const char *drgn_absence_reason_str(enum drgn_absence_reason reason) +{ + SWITCH_ENUM (reason) { + case DRGN_ABSENCE_REASON_OPTIMIZED_OUT: + return ""; + case DRGN_ABSENCE_REASON_OTHER: + default: + return ""; + } +} + static struct drgn_error * c_format_object_impl(const struct drgn_object *obj, size_t indent, size_t one_line_columns, size_t multi_line_columns, @@ -1574,7 +1585,8 @@ c_format_object_impl(const struct drgn_object *obj, size_t indent, } if (obj->kind == DRGN_OBJECT_ABSENT) { - if (!string_builder_append(sb, "")) + if (!string_builder_append(sb, + drgn_absence_reason_str(obj->absence_reason))) return &drgn_enomem; return NULL; } diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 7ce407ae1..8b82b615e 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -10,6 +10,7 @@ import drgn from drgn import ( + AbsenceReason, FaultError, FindObjectFlags, Language, @@ -4586,7 +4587,12 @@ def test_function_no_address(self): ) ) self.assertIdentical( - prog.object("abort"), Object(prog, prog.function_type(prog.void_type(), ())) + prog.object("abort"), + Object( + prog, + prog.function_type(prog.void_type(), ()), + absence_reason=AbsenceReason.OPTIMIZED_OUT, + ), ) def test_function_concrete_out_of_line_instance(self): @@ -4838,7 +4844,10 @@ def test_variable_no_address(self): ), ) ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_empty(self): prog = dwarf_program( @@ -4854,7 +4863,10 @@ def test_variable_expr_empty(self): ), ) ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_bit_piece(self): prog = dwarf_program( @@ -5091,7 +5103,10 @@ def test_variable_expr_implicit_value_piece_empty(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_stack_value(self): for little_endian in (True, False): @@ -5221,7 +5236,10 @@ def test_variable_expr_stack_value_piece_empty(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_contiguous_piece_addresses(self): prog = dwarf_program( @@ -5507,7 +5525,10 @@ def test_variable_expr_address_empty_piece(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_absent_empty_piece(self): prog = dwarf_program( @@ -5530,7 +5551,10 @@ def test_variable_expr_absent_empty_piece(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_unknown(self): prog = dwarf_program( diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 448feb326..9af7825ce 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -4,6 +4,7 @@ import operator from drgn import ( + AbsenceReason, Object, Qualifiers, Type, @@ -3059,6 +3060,12 @@ def test_absent(self): type_name = type_ self.assertEqual(str(Object(self.prog, type_)), f"({type_name})") + def test_optimized_out(self): + self.assertEqual( + str(Object(self.prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT)), + "(int)", + ) + def test_bigint(self): segment = bytearray(16) self.add_memory_segment(segment, virt_addr=0xFFFF0000) diff --git a/tests/test_object.py b/tests/test_object.py index 7a111a70d..5b0509482 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1263,6 +1263,13 @@ def test_basic(self): self.assertRaises(ObjectAbsentError, obj.read_) + def test_reason(self): + obj = Object(self.prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT) + self.assertEqual(obj.absence_reason_, AbsenceReason.OPTIMIZED_OUT) + self.assertEqual( + repr(obj), "Object(prog, 'int', absence_reason=AbsenceReason.OPTIMIZED_OUT)" + ) + def test_bit_field(self): obj = Object(self.prog, "int", bit_field_size=1) self.assertIs(obj.prog_, self.prog) From 2027d0fea84d74b835e77392f7040c2a333180c6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Apr 2025 16:45:20 -0700 Subject: [PATCH 127/166] Add NOT_IMPLEMENTED absence reason and use it for DWARF Rather than failing hard on an unknown DWARF expression opcode, log a warning and return an absent object with a NOT_IMPLEMENTED absence reason. Since DW_OP_(GNU_)entry_value is somewhat common and we already know about it, we still classify it as OPTIMIZED_OUT rather than NOT_IMPLEMENTED. Signed-off-by: Omar Sandoval --- _drgn.pyi | 2 + libdrgn/drgn.h | 2 + libdrgn/dwarf_info.c | 87 ++++++++++++++++++++++++++++++++++++-------- libdrgn/language_c.c | 2 + tests/test_dwarf.py | 10 +++-- 5 files changed, 84 insertions(+), 19 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 28f7c0203..579205481 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2681,6 +2681,8 @@ class AbsenceReason(enum.Enum): """Another reason not listed below.""" OPTIMIZED_OUT = ... """Object was optimized out by the compiler.""" + NOT_IMPLEMENTED = ... + """Encountered unknown debugging information.""" def NULL(prog: Program, type: Union[str, Type]) -> Object: """ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 52c80aa1d..881a44d45 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -2195,6 +2195,8 @@ enum drgn_absence_reason { DRGN_ABSENCE_REASON_OTHER, /** Object was optimized out by the compiler. */ DRGN_ABSENCE_REASON_OPTIMIZED_OUT, + /** Encountered unknown debugging information. */ + DRGN_ABSENCE_REASON_NOT_IMPLEMENTED, }; /** diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 3a8567afa..00684a72c 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -3798,6 +3798,48 @@ drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_elf_file *file, Dwarf_Die *die, const struct drgn_register_state *regs, int *remaining_ops, uint64_t *ret); +static struct drgn_error drgn_unknown_dwarf_opcode = { + .code = DRGN_ERROR_NOT_IMPLEMENTED, + .message = "unknown DWARF expression opcode", +}; + +static bool drgn_dwarf_opcode_is_known(uint8_t opcode) +{ +#define X(name, _) if (opcode == name) return true; + DW_OP_DEFINITIONS +#undef X + return false; +} + +static struct drgn_error * +drgn_handle_unknown_dwarf_opcode(struct drgn_dwarf_expression_context *ctx, + uint8_t opcode, + bool after_simple_location_description) +{ + // We warn the first time that we see an opcode that appears to be + // valid. + static bool warned; + enum drgn_log_level log_level = DRGN_LOG_DEBUG; + if (drgn_dwarf_opcode_is_known(opcode) + && !__atomic_test_and_set(&warned, __ATOMIC_SEQ_CST)) + log_level = DRGN_LOG_WARNING; + if (drgn_log_is_enabled(ctx->prog, log_level)) { + struct drgn_error *err; + char op_buf[DW_OP_STR_BUF_LEN]; + err = binary_buffer_error(&ctx->bb, + "unknown DWARF expression opcode %s%s; " + "please report this to %s", + dw_op_str(opcode, op_buf), + after_simple_location_description + ? " after simple location description" + : "", + PACKAGE_BUGREPORT); + drgn_error_log(log_level, ctx->prog, err, ""); + drgn_error_destroy(err); + } + return &drgn_unknown_dwarf_opcode; +} + /* * Evaluate a DWARF expression up to the next location description operation or * operation that can't be evaluated in the given context. @@ -4248,6 +4290,15 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, // address and using the DW_AT_(GNU_)call_value of a // DW_TAG_(GNU_)call_parameter with a DW_AT_location // matching that register. + if (drgn_log_is_enabled(ctx->prog, DRGN_LOG_DEBUG)) { + char op_buf[DW_OP_STR_BUF_LEN]; + err = binary_buffer_error(&ctx->bb, + "unimplemented DWARF expression opcode %s; " + "please upvote https://github.com/osandov/drgn/issues/337", + dw_op_str(opcode, op_buf)); + drgn_error_log_debug(ctx->prog, err, ""); + drgn_error_destroy(err); + } return &drgn_not_found; /* Location description operations. */ case DW_OP_reg0 ... DW_OP_reg31: @@ -4272,14 +4323,8 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, * DW_OP_xderef_size, DW_OP_xderef_type. */ default: - { - char op_buf[DW_OP_STR_BUF_LEN]; - return binary_buffer_error(&ctx->bb, - "unknown DWARF expression opcode %s; " - "please report this to %s", - dw_op_str(opcode, op_buf), - PACKAGE_BUGREPORT); - } + return drgn_handle_unknown_dwarf_opcode(ctx, opcode, + false); } } @@ -4760,6 +4805,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ int bit_offset = -1; /* -1 means that we don't have an address. */ + enum drgn_absence_reason absence_reason = + DRGN_ABSENCE_REASON_OPTIMIZED_OUT; + uint64_t bit_pos = 0; int remaining_ops = MAX_DWARF_EXPR_OPS; @@ -4772,6 +4820,13 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, do { uint64_vector_clear(&stack); err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err) { + if (err == &drgn_unknown_dwarf_opcode) + absence_reason = DRGN_ABSENCE_REASON_NOT_IMPLEMENTED; + else if (err != &drgn_not_found) + goto out; + goto absent; + } if (err == &drgn_not_found) goto absent; else if (err) @@ -4873,10 +4928,10 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, piece_bit_size = type.bit_size - bit_pos; break; default: - err = binary_buffer_error(&ctx.bb, - "unknown DWARF expression opcode %#" PRIx8 " after simple location description", - opcode); - goto out; + drgn_handle_unknown_dwarf_opcode(&ctx, opcode, + true); + absence_reason = DRGN_ABSENCE_REASON_NOT_IMPLEMENTED; + goto absent; } } else { piece_bit_size = type.bit_size - bit_pos; @@ -5011,8 +5066,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_template_value_parameter is missing value"); } - drgn_object_set_absent_internal(ret, &type, - DRGN_ABSENCE_REASON_OPTIMIZED_OUT); + drgn_object_set_absent_internal(ret, &type, absence_reason); err = NULL; } else if (bit_offset >= 0) { err = drgn_object_set_reference_internal(ret, &type, address, @@ -7742,8 +7796,11 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, drgn_dwarf_expression_context_init(&ctx, prog, file, NULL, NULL, regs, rule->expr, rule->expr_size); err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); - if (err) + if (err) { + if (err == &drgn_unknown_dwarf_opcode) + err = &drgn_not_found; return err; + } if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; err = binary_buffer_next_u8(&ctx.bb, &opcode); diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 5cf07667f..448b2e9c8 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1541,6 +1541,8 @@ static const char *drgn_absence_reason_str(enum drgn_absence_reason reason) SWITCH_ENUM (reason) { case DRGN_ABSENCE_REASON_OPTIMIZED_OUT: return ""; + case DRGN_ABSENCE_REASON_NOT_IMPLEMENTED: + return ""; case DRGN_ABSENCE_REASON_OTHER: default: return ""; diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 8b82b615e..23fabe997 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -5570,8 +5570,9 @@ def test_variable_expr_unknown(self): ), ) ) - self.assertRaisesRegex( - Exception, "unknown DWARF expression opcode", prog.object, "x" + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.NOT_IMPLEMENTED), ) def test_variable_expr_unknown_after_location(self): @@ -5597,8 +5598,9 @@ def test_variable_expr_unknown_after_location(self): ), ) ) - self.assertRaisesRegex( - Exception, "unknown DWARF expression opcode", prog.object, "x" + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.NOT_IMPLEMENTED), ) def _eval_dwarf_expr(self, ops, **kwds): From 644a47ba508778a0649d120d69d02c866551a0b8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 14:01:20 -0700 Subject: [PATCH 128/166] drgn.helpers.linux.fs: handle DCACHE_MOUNTED value change in 6.15 Famous last words: "it hasn't changed since v2.6.38, so let's hardcode it for now." Linux 6.15 changed the value of DCACHE_MOUNTED, which broke path_lookup(). I sent a patch converting the DCACHE_* flags to an enum [1], but we need to unbreak 6.15 (with a version check, unfortunately) until that lands. [1]: https://lore.kernel.org/linux-fsdevel/177665a082f048cf536b9cd6af467b3be6b6e6ed.1744141838.git.osandov@fb.com/T/#u Signed-off-by: Omar Sandoval --- drgn/helpers/linux/fs.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 7424db96e..9a8eaf6c6 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -40,9 +40,22 @@ def _follow_mount(mnt: Object, dentry: Object) -> Tuple[Object, Object]: - # DCACHE_MOUNTED is a macro, so we can't easily get the value. But, it - # hasn't changed since v2.6.38, so let's hardcode it for now. - DCACHE_MOUNTED = 0x10000 + prog = dentry.prog_ + try: + DCACHE_MOUNTED = prog.cache["DCACHE_MOUNTED"] + except KeyError: + tokens = prog["UTS_RELEASE"].string_().split(b".", 2) + major, minor = int(tokens[0]), int(tokens[1]) + # Linux kernel commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") (in + # v6.15) changed the value of DCACHE_MOUNTED. Unfortunately, it's a + # macro, so we have to hardcode it based on a version check until it's + # converted to an enum. + if (major, minor) >= (6, 15): + DCACHE_MOUNTED = 1 << 15 + else: + DCACHE_MOUNTED = 1 << 16 + prog.cache["DCACHE_MOUNTED"] = DCACHE_MOUNTED + while dentry.d_flags & DCACHE_MOUNTED: for mounted in list_for_each_entry( "struct mount", mnt.mnt_mounts.address_of_(), "mnt_child" From 36d30653594642d4de557a9881cc345e4337b95e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 14:09:02 -0700 Subject: [PATCH 129/166] drgn.helpers.linux.kernfs: add kernfs_parent() and handle 6.15 Linux 6.15 renamed kernfs_node::parent to __parent, which broke a couple of kernfs helpers. Add a kernfs_parent() helper that handles the change and use it in the existing helpers. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/kernfs.py | 25 +++++++++++++++++++---- tests/linux_kernel/helpers/test_kernfs.py | 19 ++++++++++++++++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index b60c958f3..b0ee7a581 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -16,11 +16,27 @@ __all__ = ( "kernfs_name", + "kernfs_parent", "kernfs_path", "kernfs_walk", ) +def kernfs_parent(kn: Object) -> Object: + """ + Get the parent of the given kernfs node. + + :param kn: ``struct kernfs_node *`` + :return: ``struct kernfs_node *`` + """ + # Linux kernel commit 633488947ef6 ("kernfs: Use RCU to access + # kernfs_node::parent.") (in v6.15) renamed the parent member. + try: + return kn.__parent.read_() + except AttributeError: + return kn.parent.read_() + + def kernfs_name(kn: Object) -> bytes: """ Get the name of the given kernfs node. @@ -29,12 +45,13 @@ def kernfs_name(kn: Object) -> bytes: """ if not kn: return b"(null)" - return kn.name.string_() if kn.parent else b"/" + return kn.name.string_() if kernfs_parent(kn) else b"/" def _kernfs_root(kn: Object) -> Object: - if kn.parent: - kn = kn.parent + knp = kernfs_parent(kn) + if knp: + kn = knp return kn.dir.root @@ -54,7 +71,7 @@ def kernfs_path(kn: Object) -> bytes: names = [] while kn != root_kn: names.append(kn.name.string_()) - kn = kn.parent + kn = kernfs_parent(kn) names.append(root_kn.name.string_()) names.reverse() diff --git a/tests/linux_kernel/helpers/test_kernfs.py b/tests/linux_kernel/helpers/test_kernfs.py index dfa8b4d15..37128f582 100644 --- a/tests/linux_kernel/helpers/test_kernfs.py +++ b/tests/linux_kernel/helpers/test_kernfs.py @@ -1,11 +1,17 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import contextlib import os from drgn import NULL, cast from drgn.helpers.linux.fs import fget -from drgn.helpers.linux.kernfs import kernfs_name, kernfs_path, kernfs_walk +from drgn.helpers.linux.kernfs import ( + kernfs_name, + kernfs_parent, + kernfs_path, + kernfs_walk, +) from drgn.helpers.linux.pid import find_task from tests.linux_kernel import LinuxKernelTestCase @@ -16,6 +22,17 @@ def kernfs_node_from_fd(cls, fd): file = fget(find_task(cls.prog, os.getpid()), fd) return cast("struct kernfs_node *", file.f_inode.i_private) + def test_kernfs_parent(self): + with contextlib.ExitStack() as exit_stack: + fd = os.open("/sys/kernel/vmcoreinfo", os.O_RDONLY) + exit_stack.callback(os.close, fd) + dfd = os.open("/sys/kernel", os.O_RDONLY) + exit_stack.callback(os.close, dfd) + self.assertEqual( + kernfs_parent(self.kernfs_node_from_fd(fd)), + self.kernfs_node_from_fd(dfd), + ) + def test_kernfs_name(self): with open("/sys/kernel/vmcoreinfo", "r") as f: kn = self.kernfs_node_from_fd(f.fileno()) From 70dcba2348d5365df10bf05d780af11435cd3735 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 14:17:32 -0700 Subject: [PATCH 130/166] drgn.helpers.linux.kernfs: add kernfs_root() helper We already have this internally, so let's make it public. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/kernfs.py | 23 +++++++++++++++-------- tests/linux_kernel/helpers/test_kernfs.py | 13 +++++++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index b0ee7a581..5a1604b85 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -18,10 +18,24 @@ "kernfs_name", "kernfs_parent", "kernfs_path", + "kernfs_root", "kernfs_walk", ) +def kernfs_root(kn: Object) -> Object: + """ + Get the kernfs root that the given kernfs node belongs to. + + :param kn: ``struct kernfs_node *`` + :return: ``struct kernfs_root *`` + """ + knp = kernfs_parent(kn) + if knp: + kn = knp + return kn.dir.root.read_() + + def kernfs_parent(kn: Object) -> Object: """ Get the parent of the given kernfs node. @@ -48,13 +62,6 @@ def kernfs_name(kn: Object) -> bytes: return kn.name.string_() if kernfs_parent(kn) else b"/" -def _kernfs_root(kn: Object) -> Object: - knp = kernfs_parent(kn) - if knp: - kn = knp - return kn.dir.root - - def kernfs_path(kn: Object) -> bytes: """ Get full path of the given kernfs node. @@ -64,7 +71,7 @@ def kernfs_path(kn: Object) -> bytes: if not kn: return b"(null)" - root_kn = _kernfs_root(kn).kn + root_kn = kernfs_root(kn).kn if kn == root_kn: return b"/" diff --git a/tests/linux_kernel/helpers/test_kernfs.py b/tests/linux_kernel/helpers/test_kernfs.py index 37128f582..746a7e986 100644 --- a/tests/linux_kernel/helpers/test_kernfs.py +++ b/tests/linux_kernel/helpers/test_kernfs.py @@ -10,6 +10,7 @@ kernfs_name, kernfs_parent, kernfs_path, + kernfs_root, kernfs_walk, ) from drgn.helpers.linux.pid import find_task @@ -33,6 +34,18 @@ def test_kernfs_parent(self): self.kernfs_node_from_fd(dfd), ) + def test_kernfs_root(self): + for path in ("/sys", "/sys/kernel", "/sys/kernel/vmcoreinfo"): + with self.subTest(path=path): + fd = os.open(path, os.O_RDONLY) + try: + self.assertEqual( + kernfs_root(self.kernfs_node_from_fd(fd)), + self.prog["sysfs_root"], + ) + finally: + os.close(fd) + def test_kernfs_name(self): with open("/sys/kernel/vmcoreinfo", "r") as f: kn = self.kernfs_node_from_fd(f.fileno()) From cfe3823c64c653bd8eb3ace686d7c9db753b46ab Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 14:20:22 -0700 Subject: [PATCH 131/166] Add 6.15 to supported kernels This one needed updates to the kernfs helpers and path_lookup(). Signed-off-by: Omar Sandoval --- docs/support_matrix.rst | 2 +- vmtest/config.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/support_matrix.rst b/docs/support_matrix.rst index bf81f461a..41c7d51ad 100644 --- a/docs/support_matrix.rst +++ b/docs/support_matrix.rst @@ -72,7 +72,7 @@ currently fully supported are: .. Keep this in sync with vmtest/config.py. -- 6.0-6.14 +- 6.0-6.15 - 5.10-5.19 - 5.4 - 4.19 diff --git a/vmtest/config.py b/vmtest/config.py index c302afd65..28447458f 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -14,6 +14,7 @@ # Kernel versions that we run tests on and therefore support. Keep this in sync # with docs/support_matrix.rst. SUPPORTED_KERNEL_VERSIONS = ( + "6.15", "6.14", "6.13", "6.12", From 9b7297dfd2c4d168bad0351250e797d6f2e67f74 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 14:32:35 -0700 Subject: [PATCH 132/166] vmtest.config: enable CONFIG_KPROBES for upcoming kmodify breakpoints I'm experimenting with kprobes-based breakpoint support via kmodify (not coming soon but eventually). Enable CONFIG_KPROBES so I can test it. Signed-off-by: Omar Sandoval --- vmtest/config.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vmtest/config.py b/vmtest/config.py index 28447458f..699af13a3 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -9,7 +9,6 @@ from typing import Dict, Mapping, NamedTuple, Sequence from _drgn_util.platform import NORMALIZED_MACHINE_NAME -from util import KernelVersion # Kernel versions that we run tests on and therefore support. Keep this in sync # with docs/support_matrix.rst. @@ -110,6 +109,9 @@ # For testing kernel core dumps from QEMU's dump-guest-memory command. CONFIG_FW_CFG_SYSFS=y +# kmodify breakpoints need kprobes. +CONFIG_KPROBES=y + # For BPF tests. CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y @@ -429,17 +431,11 @@ def kconfig_localversion(arch: Architecture, flavor: KernelFlavor, version: str) vmtest_kernel_version = [ # Increment the major version to rebuild every # architecture/flavor/version combination. - 34, + 35, # The minor version makes the default flavor the "latest" version. 1 if flavor.name == "default" else 0, ] patch_level = 0 - if ( - arch.name == "aarch64" - and flavor.name == "alternative" - and KernelVersion("5.18.18") <= KernelVersion(version) < KernelVersion("5.19") - ): - patch_level += 1 # If only specific architecture/flavor/version combinations need to be # rebuilt, conditionally increment the patch level here. if patch_level: From 32d29a1f2c34cdf0ba02673e3a066fd62a35c994 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 4 Apr 2025 16:06:39 -0700 Subject: [PATCH 133/166] libdrgn: linux_kernel: don't use drgn_module_find_or_create() I think I only used drgn_module_find_or_create() instead of drgn_module_find_or_create_relocatable() because at some point earlier in development, the latter didn't have new_ret. Change it so we can make drgn_module_find_or_create() static. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 9 ++++----- libdrgn/debug_info.h | 6 ------ libdrgn/linux_kernel.c | 21 ++++++++++++--------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 6d17d104d..74f0d981f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -276,11 +276,10 @@ struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, return it.entry; } -struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, - const struct drgn_module_key *key, - const char *name, - struct drgn_module **ret, - bool *new_ret) +static struct drgn_error * +drgn_module_find_or_create(struct drgn_program *prog, + const struct drgn_module_key *key, const char *name, + struct drgn_module **ret, bool *new_ret) { struct drgn_error *err; diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index c49b48f66..bb38cf005 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -282,12 +282,6 @@ struct drgn_module { struct drgn_object object; }; -struct drgn_error *drgn_module_find_or_create(struct drgn_program *prog, - const struct drgn_module_key *key, - const char *name, - struct drgn_module **ret, - bool *new_ret); - /** * Delete a partially-initialized module. This can only be called before the * module is returned from public API. diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 5147ef057..da37eb4e9 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1833,8 +1833,6 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, struct drgn_error *err; struct drgn_program *prog = drgn_object_program(module_obj); - struct drgn_module_key key; - key.kind = DRGN_MODULE_RELOCATABLE; uint64_t name_offset; err = drgn_type_offsetof(module_obj->type, "name", &name_offset); if (err) @@ -1845,7 +1843,7 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, return drgn_error_create(DRGN_ERROR_OTHER, "couldn't read module name"); } - key.relocatable.name = drgn_object_buffer(module_obj) + name_offset; + const char *name = drgn_object_buffer(module_obj) + name_offset; DRGN_OBJECT(mem, prog); DRGN_OBJECT(val, prog); @@ -1896,16 +1894,22 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, err = drgn_object_member(&val, &mem, "base"); if (err) return err; - err = drgn_object_read_unsigned(&val, &key.relocatable.address); + uint64_t address; + err = drgn_object_read_unsigned(&val, &address); if (err) return err; if (log) { drgn_log_debug(prog, "found loaded kernel module %s@0x%" PRIx64, - key.relocatable.name, key.relocatable.address); + name, address); } if (!create) { + const struct drgn_module_key key = { + .kind = DRGN_MODULE_RELOCATABLE, + .relocatable.name = name, + .relocatable.address = address, + }; *ret = drgn_module_find(prog, &key); if (new_ret) *new_ret = false; @@ -1914,8 +1918,8 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; bool new; - err = drgn_module_find_or_create(prog, &key, key.relocatable.name, - &module, &new); + err = drgn_module_find_or_create_relocatable(prog, name, address, + &module, &new); if (err) return err; if (!new) { @@ -1941,8 +1945,7 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, return err; drgn_log_debug(prog, "module size is %" PRIu64, size); - err = drgn_module_set_address_range(module, key.relocatable.address, - key.relocatable.address + size); + err = drgn_module_set_address_range(module, address, address + size); if (err) return err; From d48a5bb9716fcbb10e3de3b789c994214c488d9a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 4 Apr 2025 21:44:13 -0700 Subject: [PATCH 134/166] libdrgn: replace drgn_module_find() with kind-specific find functions This is more type safe and more consistent with the Python API. (drgn_module_find() still exists as a static function in debug_info.c.) Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 66 ++++++++++++++++++++++++++++++++++-- libdrgn/drgn.h | 36 ++++++++++++++------ libdrgn/linux_kernel.c | 7 +--- libdrgn/python/program.c | 72 +++++++++++++--------------------------- 4 files changed, 113 insertions(+), 68 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 74f0d981f..da121d989 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -251,9 +251,8 @@ static void drgn_module_free_section_addresses(struct drgn_module *module) free(it.entry->key); } -LIBDRGN_PUBLIC -struct drgn_module *drgn_module_find(struct drgn_program *prog, - const struct drgn_module_key *key) +static struct drgn_module *drgn_module_find(struct drgn_program *prog, + const struct drgn_module_key *key) { if (key->kind == DRGN_MODULE_MAIN) { return prog->dbinfo.main_module; @@ -412,6 +411,16 @@ drgn_module_find_or_create(struct drgn_program *prog, return err; } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_main(struct drgn_program *prog, + const char *name) +{ + if (name && prog->dbinfo.main_module + && strcmp(prog->dbinfo.main_module->name, name) != 0) + return NULL; + return prog->dbinfo.main_module; +} + LIBDRGN_PUBLIC struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, const char *name, @@ -422,6 +431,19 @@ struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_SHARED_LIBRARY, + .shared_library.name = name, + .shared_library.dynamic_address = dynamic_address, + }; + return drgn_module_find(prog, &key); +} + LIBDRGN_PUBLIC struct drgn_error * drgn_module_find_or_create_shared_library(struct drgn_program *prog, const char *name, @@ -437,6 +459,19 @@ drgn_module_find_or_create_shared_library(struct drgn_program *prog, return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_VDSO, + .vdso.name = name, + .vdso.dynamic_address = dynamic_address, + }; + return drgn_module_find(prog, &key); +} + LIBDRGN_PUBLIC struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, const char *name, @@ -452,6 +487,19 @@ struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_relocatable(struct drgn_program *prog, + const char *name, + uint64_t address) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_RELOCATABLE, + .relocatable.name = name, + .relocatable.address = address, + }; + return drgn_module_find(prog, &key); +} + LIBDRGN_PUBLIC struct drgn_error * drgn_module_find_or_create_relocatable(struct drgn_program *prog, const char *name, uint64_t address, @@ -465,6 +513,18 @@ drgn_module_find_or_create_relocatable(struct drgn_program *prog, return drgn_module_find_or_create(prog, &key, name, ret, new_ret); } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_extra(struct drgn_program *prog, + const char *name, uint64_t id) +{ + const struct drgn_module_key key = { + .kind = DRGN_MODULE_EXTRA, + .extra.name = name, + .extra.id = id, + }; + return drgn_module_find(prog, &key); +} + LIBDRGN_PUBLIC struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, const char *name, diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 881a44d45..6294c0080 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1251,20 +1251,20 @@ struct drgn_module_key { }; /** - * Find the created @ref drgn_module matching the given @p key. + * Find the created @ref drgn_module containing the given @p address. * * @return Module, or @c NULL if not found. */ -struct drgn_module *drgn_module_find(struct drgn_program *prog, - const struct drgn_module_key *key); +struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, + uint64_t address); /** - * Find the created @ref drgn_module containing the given @p address. + * Find the main module. * - * @return Module, or @c NULL if not found. + * @param[in] name Module name, or @c NULL to match any name. */ -struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, - uint64_t address); +struct drgn_module *drgn_module_find_main(struct drgn_program *prog, + const char *name); /** * Find the main module, creating it if it doesn't already exist. @@ -1277,6 +1277,11 @@ struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret); +/** Find a shared library module. */ +struct drgn_module *drgn_module_find_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address); + /** * Find a shared library module, creating it if it doesn't already exist. * @@ -1290,6 +1295,11 @@ drgn_module_find_or_create_shared_library(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret); +/** Find a vDSO module. */ +struct drgn_module *drgn_module_find_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address); + /** * Find a vDSO module, creating it if it doesn't already exist. * @@ -1302,6 +1312,11 @@ struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret); +/** Find a relocatable module. */ +struct drgn_module *drgn_module_find_relocatable(struct drgn_program *prog, + const char *name, + uint64_t address); + /** * Find a relocatable module, creating it if it doesn't already exist. * @@ -1315,9 +1330,6 @@ drgn_module_find_or_create_relocatable(struct drgn_program *prog, /** * Find a created Linux kernel loadable module from a ``struct module *`` object. - * - * @param[out] new_ret @c true if the module was newly created, @c false if it - * was found. */ struct drgn_error * drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, @@ -1335,6 +1347,10 @@ drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *modul struct drgn_module **ret, bool *new_ret); +/** Find an extra module. */ +struct drgn_module *drgn_module_find_extra(struct drgn_program *prog, + const char *name, uint64_t id); + /** * Find an extra module, creating it if it doesn't already exist. * diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index da37eb4e9..d7c2da04f 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1905,12 +1905,7 @@ kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, } if (!create) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_RELOCATABLE, - .relocatable.name = name, - .relocatable.address = address, - }; - *ret = drgn_module_find(prog, &key); + *ret = drgn_module_find_relocatable(prog, name, address); if (new_ret) *new_ret = false; return NULL; diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 637a1e549..40d564691 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1054,6 +1054,14 @@ static PyObject *Program_create_loaded_modules(Program *self) Py_RETURN_NONE; } +static inline PyObject *Module_wrap_find(struct drgn_module *module) +{ + if (module) + return Module_wrap(module); + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; +} + static PyObject *Program_main_module(Program *self, PyObject *args, PyObject *kwds) { @@ -1082,28 +1090,11 @@ static PyObject *Program_main_module(Program *self, PyObject *args, } return Module_and_bool_wrap(module, new); } else { - struct drgn_module_key key = { .kind = DRGN_MODULE_MAIN }; - struct drgn_module *module = drgn_module_find(&self->prog, &key); - if (!module - || (name.path - && strcmp(drgn_module_name(module), name.path) != 0)) { - PyErr_SetString(PyExc_LookupError, "module not found"); - return NULL; - } - return Module_wrap(module); + return Module_wrap_find(drgn_module_find_main(&self->prog, + name.path)); } } -static PyObject *Program_find_module(Program *self, const struct drgn_module_key *key) -{ - struct drgn_module *module = drgn_module_find(&self->prog, key); - if (!module) { - PyErr_SetString(PyExc_LookupError, "module not found"); - return NULL; - } - return Module_wrap(module); -} - static PyObject *Program_shared_library_module(Program *self, PyObject *args, PyObject *kwds) { @@ -1132,13 +1123,9 @@ static PyObject *Program_shared_library_module(Program *self, PyObject *args, } return Module_and_bool_wrap(module, new); } else { - struct drgn_module_key key = { - .kind = DRGN_MODULE_SHARED_LIBRARY, - .shared_library.name = name.path, - .shared_library.dynamic_address = - dynamic_address.uvalue, - }; - return Program_find_module(self, &key); + return Module_wrap_find(drgn_module_find_shared_library(&self->prog, + name.path, + dynamic_address.uvalue)); } } @@ -1168,12 +1155,9 @@ static PyObject *Program_vdso_module(Program *self, PyObject *args, } return Module_and_bool_wrap(module, new); } else { - struct drgn_module_key key = { - .kind = DRGN_MODULE_VDSO, - .vdso.name = name.path, - .vdso.dynamic_address = dynamic_address.uvalue, - }; - return Program_find_module(self, &key); + return Module_wrap_find(drgn_module_find_vdso(&self->prog, + name.path, + dynamic_address.uvalue)); } } @@ -1204,12 +1188,9 @@ static PyObject *Program_relocatable_module(Program *self, PyObject *args, } return Module_and_bool_wrap(module, new); } else { - struct drgn_module_key key = { - .kind = DRGN_MODULE_RELOCATABLE, - .relocatable.name = name.path, - .relocatable.address = address.uvalue, - }; - return Program_find_module(self, &key); + return Module_wrap_find(drgn_module_find_relocatable(&self->prog, + name.path, + address.uvalue)); } } @@ -1251,11 +1232,7 @@ static PyObject *Program_linux_kernel_loadable_module(Program *self, set_drgn_error(err); return NULL; } - if (!module) { - PyErr_SetString(PyExc_LookupError, "module not found"); - return NULL; - } - return Module_wrap(module); + return Module_wrap_find(module); } } @@ -1284,12 +1261,9 @@ static PyObject *Program_extra_module(Program *self, PyObject *args, } return Module_and_bool_wrap(module, new); } else { - struct drgn_module_key key = { - .kind = DRGN_MODULE_EXTRA, - .extra.name = name.path, - .extra.id = id.uvalue, - }; - return Program_find_module(self, &key); + return Module_wrap_find(drgn_module_find_extra(&self->prog, + name.path, + id.uvalue)); } } From b9ad36eceaead114faa7021e09ecbcd748d60419 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 15:59:41 -0700 Subject: [PATCH 135/166] libdrgn: hash_table: add delete_entry Just like commit 59b23c88aca4 ("libdrgn: binary_search_tree: add delete_entry") but for hash tables. Signed-off-by: Omar Sandoval --- libdrgn/hash_table.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 4b9ed36c1..4e6b68584 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -291,6 +291,14 @@ hash_table_delete_iterator_hashed(struct hash_table *table, struct hash_table_iterator it, struct hash_pair hp); +/** + * Delete an entry in a @ref hash_table. + * + * @return An iterator pointing to the next entry in the table. See @ref + * hash_table_next(). + */ +bool hash_table_delete_entry(struct hash_table *table, const entry_type *entry); + /** * Get an iterator pointing to the first entry in a @ref hash_table. * @@ -1515,13 +1523,20 @@ static bool table##_delete_hashed(struct table *table, \ return true; \ } \ \ -__attribute__((__unused__)) \ static bool table##_delete(struct table *table, const table##_key_type *key) \ { \ return table##_delete_hashed(table, key, table##_hash(key)); \ } \ \ __attribute__((__unused__)) \ +static inline bool table##_delete_entry(struct table *table, \ + const table##_entry_type *entry) \ +{ \ + const table##_key_type key = table##_entry_to_key(entry); \ + return table##_delete(table, &key); \ +} \ + \ +__attribute__((__unused__)) \ static struct table##_iterator table##_first(struct table *table) \ { \ if (table##_vector_policy) { \ From 5ce2116086204d049ace9cd96cab82fb057479fb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 16:34:22 -0700 Subject: [PATCH 136/166] libdrgn: get rid of struct drgn_module_key The tagged union is overkill; replace it with a single uint64_t info. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 269 ++++++++++++---------------------------- libdrgn/debug_info.h | 19 +-- libdrgn/drgn.h | 59 +++------ libdrgn/python/module.c | 46 ++----- 4 files changed, 110 insertions(+), 283 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index da121d989..075c75f1f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -143,93 +143,36 @@ DEFINE_HASH_TABLE_FUNCTIONS(drgn_elf_file_dwarf_table, drgn_elf_file_dwarf_key, ptr_key_hash_pair, scalar_key_eq); DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); +struct drgn_module_key { + enum drgn_module_kind kind; + const char *name; + uint64_t info; +}; + static inline struct drgn_module_key drgn_module_entry_key(struct drgn_module * const *entry) { - struct drgn_module_key key; - key.kind = (*entry)->kind; - SWITCH_ENUM(key.kind) { - case DRGN_MODULE_SHARED_LIBRARY: - key.shared_library.name = (*entry)->name; - key.shared_library.dynamic_address = - (*entry)->shared_library.dynamic_address; - break; - case DRGN_MODULE_VDSO: - key.vdso.name = (*entry)->name; - key.vdso.dynamic_address = (*entry)->vdso.dynamic_address; - break; - case DRGN_MODULE_RELOCATABLE: - key.relocatable.name = (*entry)->name; - key.relocatable.address = (*entry)->relocatable.address; - break; - case DRGN_MODULE_EXTRA: - key.extra.name = (*entry)->name; - key.extra.id = (*entry)->extra.id; - break; - case DRGN_MODULE_MAIN: - default: - UNREACHABLE(); - } - return key; + return (struct drgn_module_key){ + .kind = (*entry)->kind, + .name = (*entry)->name, + .info = (*entry)->info, + }; } static inline struct hash_pair drgn_module_key_hash_pair(const struct drgn_module_key *key) { - size_t hash = key->kind; - SWITCH_ENUM(key->kind) { - case DRGN_MODULE_SHARED_LIBRARY: - hash = hash_combine(hash, - hash_c_string(key->shared_library.name)); - hash = hash_combine(hash, key->shared_library.dynamic_address); - break; - case DRGN_MODULE_VDSO: - hash = hash_combine(hash, hash_c_string(key->vdso.name)); - hash = hash_combine(hash, key->vdso.dynamic_address); - break; - case DRGN_MODULE_RELOCATABLE: - hash = hash_combine(hash, hash_c_string(key->relocatable.name)); - hash = hash_combine(hash, key->relocatable.address); - break; - case DRGN_MODULE_EXTRA: - hash = hash_combine(hash, hash_c_string(key->extra.name)); - hash = hash_combine(hash, key->extra.id); - break; - case DRGN_MODULE_MAIN: - default: - UNREACHABLE(); - } + size_t hash = hash_combine(key->kind, hash_c_string(key->name)); + hash = hash_combine(hash, key->info); return hash_pair_from_avalanching_hash(hash); } static inline bool drgn_module_key_eq(const struct drgn_module_key *a, const struct drgn_module_key *b) { - if (a->kind != b->kind) - return false; - SWITCH_ENUM(a->kind) { - case DRGN_MODULE_SHARED_LIBRARY: - return (strcmp(a->shared_library.name, - b->shared_library.name) == 0 - && a->shared_library.dynamic_address - == b->shared_library.dynamic_address); - break; - case DRGN_MODULE_VDSO: - return (strcmp(a->vdso.name, b->vdso.name) == 0 - && a->vdso.dynamic_address == b->vdso.dynamic_address); - break; - case DRGN_MODULE_RELOCATABLE: - return (strcmp(a->relocatable.name, b->relocatable.name) == 0 - && a->relocatable.address == b->relocatable.address); - break; - case DRGN_MODULE_EXTRA: - return (strcmp(a->extra.name, b->extra.name) == 0 - && a->extra.id == b->extra.id); - break; - case DRGN_MODULE_MAIN: - default: - UNREACHABLE(); - } + return a->kind == b->kind + && strcmp(a->name, b->name) == 0 + && a->info == b->info; } DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_entry_key, @@ -252,15 +195,17 @@ static void drgn_module_free_section_addresses(struct drgn_module *module) } static struct drgn_module *drgn_module_find(struct drgn_program *prog, - const struct drgn_module_key *key) + enum drgn_module_kind kind, + const char *name, uint64_t info) { - if (key->kind == DRGN_MODULE_MAIN) { - return prog->dbinfo.main_module; - } else { - struct drgn_module_table_iterator it = - drgn_module_table_search(&prog->dbinfo.modules, key); - return it.entry ? *it.entry : NULL; - } + const struct drgn_module_key key = { + .kind = kind, + .name = name, + .info = info, + }; + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, &key); + return it.entry ? *it.entry : NULL; } LIBDRGN_PUBLIC @@ -277,13 +222,14 @@ struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, static struct drgn_error * drgn_module_find_or_create(struct drgn_program *prog, - const struct drgn_module_key *key, const char *name, - struct drgn_module **ret, bool *new_ret) + enum drgn_module_kind kind, const char *name, + uint64_t info, struct drgn_module **ret, + bool *new_ret) { struct drgn_error *err; struct hash_pair hp; - if (key->kind == DRGN_MODULE_MAIN) { + if (kind == DRGN_MODULE_MAIN) { if (prog->dbinfo.main_module) { if (strcmp(prog->dbinfo.main_module->name, name) != 0) { return drgn_error_create(DRGN_ERROR_LOOKUP, @@ -295,10 +241,15 @@ drgn_module_find_or_create(struct drgn_program *prog, return NULL; } } else { - hp = drgn_module_table_hash(key); + const struct drgn_module_key key = { + .kind = kind, + .name = name, + .info = info, + }; + hp = drgn_module_table_hash(&key); struct drgn_module_table_iterator it = drgn_module_table_search_hashed(&prog->dbinfo.modules, - key, hp); + &key, hp); if (it.entry) { *ret = *it.entry; if (new_ret) @@ -313,7 +264,8 @@ drgn_module_find_or_create(struct drgn_program *prog, module->start = module->end = UINT64_MAX; module->prog = prog; - module->kind = key->kind; + module->kind = kind; + module->info = info; drgn_object_init(&module->object, prog); // Linux userspace core dumps usually filter out file-backed mappings // (see coredump_filter in core(5)), so we need the loaded file to read @@ -323,31 +275,12 @@ drgn_module_find_or_create(struct drgn_program *prog, // Linux kernel core dumps preserve the main kernel and kernel module // text, and the kernel doesn't use .eh_frame, so we don't need the // loaded file for the kernel. - module->loaded_file_status = DRGN_MODULE_FILE_WANT; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && (kind == DRGN_MODULE_MAIN || kind == DRGN_MODULE_RELOCATABLE)) + module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; + else + module->loaded_file_status = DRGN_MODULE_FILE_WANT; module->debug_file_status = DRGN_MODULE_FILE_WANT; - SWITCH_ENUM(key->kind) { - case DRGN_MODULE_MAIN: - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; - break; - case DRGN_MODULE_SHARED_LIBRARY: - module->shared_library.dynamic_address = - key->shared_library.dynamic_address; - break; - case DRGN_MODULE_VDSO: - module->vdso.dynamic_address = key->vdso.dynamic_address; - break; - case DRGN_MODULE_RELOCATABLE: - module->relocatable.address = key->relocatable.address; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; - break; - case DRGN_MODULE_EXTRA: - module->extra.id = key->extra.id; - break; - default: - UNREACHABLE(); - } module->name = strdup(name); if (!module->name) { @@ -355,7 +288,7 @@ drgn_module_find_or_create(struct drgn_program *prog, goto err_module; } - if (key->kind == DRGN_MODULE_MAIN) { + if (kind == DRGN_MODULE_MAIN) { prog->dbinfo.main_module = module; } else { if (drgn_module_table_insert_searched(&prog->dbinfo.modules, @@ -376,23 +309,19 @@ drgn_module_find_or_create(struct drgn_program *prog, case DRGN_MODULE_SHARED_LIBRARY: drgn_log_debug(prog, "created shared library module %s@0x%" PRIx64, - module->name, - module->shared_library.dynamic_address); + module->name, module->info); break; case DRGN_MODULE_VDSO: - drgn_log_debug(prog, - "created vDSO module %s@0x%" PRIx64, - module->name, module->vdso.dynamic_address); + drgn_log_debug(prog, "created vDSO module %s@0x%" PRIx64, + module->name, module->info); break; case DRGN_MODULE_RELOCATABLE: - drgn_log_debug(prog, - "created relocatable module %s@0x%" PRIx64, - module->name, module->relocatable.address); + drgn_log_debug(prog, "created relocatable module %s@0x%" PRIx64, + module->name, module->info); break; case DRGN_MODULE_EXTRA: - drgn_log_debug(prog, - "created extra module %s 0x%" PRIx64, - module->name, module->extra.id); + drgn_log_debug(prog, "created extra module %s 0x%" PRIx64, + module->name, module->info); break; default: UNREACHABLE(); @@ -427,8 +356,8 @@ struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret) { - struct drgn_module_key key = { .kind = DRGN_MODULE_MAIN }; - return drgn_module_find_or_create(prog, &key, name, ret, new_ret); + return drgn_module_find_or_create(prog, DRGN_MODULE_MAIN, name, 0, ret, + new_ret); } LIBDRGN_PUBLIC @@ -436,12 +365,8 @@ struct drgn_module *drgn_module_find_shared_library(struct drgn_program *prog, const char *name, uint64_t dynamic_address) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_SHARED_LIBRARY, - .shared_library.name = name, - .shared_library.dynamic_address = dynamic_address, - }; - return drgn_module_find(prog, &key); + return drgn_module_find(prog, DRGN_MODULE_SHARED_LIBRARY, name, + dynamic_address); } LIBDRGN_PUBLIC struct drgn_error * @@ -451,12 +376,8 @@ drgn_module_find_or_create_shared_library(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_SHARED_LIBRARY, - .shared_library.name = name, - .shared_library.dynamic_address = dynamic_address, - }; - return drgn_module_find_or_create(prog, &key, name, ret, new_ret); + return drgn_module_find_or_create(prog, DRGN_MODULE_SHARED_LIBRARY, + name, dynamic_address, ret, new_ret); } LIBDRGN_PUBLIC @@ -464,12 +385,7 @@ struct drgn_module *drgn_module_find_vdso(struct drgn_program *prog, const char *name, uint64_t dynamic_address) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_VDSO, - .vdso.name = name, - .vdso.dynamic_address = dynamic_address, - }; - return drgn_module_find(prog, &key); + return drgn_module_find(prog, DRGN_MODULE_VDSO, name, dynamic_address); } LIBDRGN_PUBLIC @@ -479,12 +395,8 @@ struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_VDSO, - .vdso.name = name, - .vdso.dynamic_address = dynamic_address, - }; - return drgn_module_find_or_create(prog, &key, name, ret, new_ret); + return drgn_module_find_or_create(prog, DRGN_MODULE_VDSO, name, + dynamic_address, ret, new_ret); } LIBDRGN_PUBLIC @@ -492,12 +404,7 @@ struct drgn_module *drgn_module_find_relocatable(struct drgn_program *prog, const char *name, uint64_t address) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_RELOCATABLE, - .relocatable.name = name, - .relocatable.address = address, - }; - return drgn_module_find(prog, &key); + return drgn_module_find(prog, DRGN_MODULE_RELOCATABLE, name, address); } LIBDRGN_PUBLIC struct drgn_error * @@ -505,24 +412,15 @@ drgn_module_find_or_create_relocatable(struct drgn_program *prog, const char *name, uint64_t address, struct drgn_module **ret, bool *new_ret) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_RELOCATABLE, - .relocatable.name = name, - .relocatable.address = address, - }; - return drgn_module_find_or_create(prog, &key, name, ret, new_ret); + return drgn_module_find_or_create(prog, DRGN_MODULE_RELOCATABLE, name, + address, ret, new_ret); } LIBDRGN_PUBLIC struct drgn_module *drgn_module_find_extra(struct drgn_program *prog, const char *name, uint64_t id) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_EXTRA, - .extra.name = name, - .extra.id = id, - }; - return drgn_module_find(prog, &key); + return drgn_module_find(prog, DRGN_MODULE_EXTRA, name, id); } LIBDRGN_PUBLIC @@ -532,12 +430,8 @@ struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, struct drgn_module **ret, bool *new_ret) { - const struct drgn_module_key key = { - .kind = DRGN_MODULE_EXTRA, - .extra.name = name, - .extra.id = id, - }; - return drgn_module_find_or_create(prog, &key, name, ret, new_ret); + return drgn_module_find_or_create(prog, DRGN_MODULE_EXTRA, name, id, + ret, new_ret); } static void @@ -584,9 +478,8 @@ void drgn_module_delete(struct drgn_module *module) if (module->kind == DRGN_MODULE_MAIN) { module->prog->dbinfo.main_module = NULL; } else { - struct drgn_module_key key = - drgn_module_entry_key((struct drgn_module * const *)&module); - drgn_module_table_delete(&module->prog->dbinfo.modules, &key); + drgn_module_table_delete_entry(&module->prog->dbinfo.modules, + &module); module->prog->dbinfo.modules_generation++; } drgn_module_destroy(module); @@ -598,17 +491,6 @@ struct drgn_program *drgn_module_program(const struct drgn_module *module) return module->prog; } -LIBDRGN_PUBLIC -struct drgn_module_key drgn_module_key(const struct drgn_module *module) -{ - if (module->kind == DRGN_MODULE_MAIN) { - struct drgn_module_key key; - key.kind = DRGN_MODULE_MAIN; - return key; - } - return drgn_module_entry_key((struct drgn_module * const *)&module); -} - LIBDRGN_PUBLIC enum drgn_module_kind drgn_module_kind(const struct drgn_module *module) { @@ -620,6 +502,11 @@ LIBDRGN_PUBLIC const char *drgn_module_name(const struct drgn_module *module) return module->name; } +LIBDRGN_PUBLIC uint64_t drgn_module_info(const struct drgn_module *module) +{ + return module->info; +} + LIBDRGN_PUBLIC bool drgn_module_address_range(const struct drgn_module *module, uint64_t *start_ret, uint64_t *end_ret) @@ -1306,12 +1193,8 @@ static bool drgn_module_elf_file_bias(struct drgn_module *module, return elf_main_bias(prog, file->elf, ret); } case DRGN_MODULE_SHARED_LIBRARY: - return elf_dso_bias(prog, file->elf, - module->shared_library.dynamic_address, - ret); case DRGN_MODULE_VDSO: - return elf_dso_bias(prog, file->elf, - module->vdso.dynamic_address, ret); + return elf_dso_bias(prog, file->elf, module->info, ret); case DRGN_MODULE_EXTRA: if (module->start < module->end) { uint64_t elf_start, elf_end; @@ -2005,7 +1888,7 @@ drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, { struct drgn_error *err; struct drgn_program *prog = module->prog; - const uint64_t address = module->shared_library.dynamic_address; + const uint64_t address = module->info; #define DIR_FORMAT "/proc/%ld/map_files" #define ENTRY_FORMAT "/%" PRIx64 "-%" PRIx64 diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index bb38cf005..800da1515 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -84,7 +84,7 @@ struct drgn_debug_info { /** Main module. @c NULL if not created yet. */ struct drgn_module *main_module; - /** Table of non-main modules indexed on @ref drgn_module_key. */ + /** Table of non-main modules indexed on (kind, name, info). */ struct drgn_module_table modules; /** * Counter used to detect when @ref modules is modified during iteration @@ -187,21 +187,8 @@ struct drgn_module { /** Module name. */ char *name; - /** Kind-specific information. */ - union { - struct { - uint64_t dynamic_address; - } shared_library; - struct { - uint64_t dynamic_address; - } vdso; - struct { - uint64_t address; - } relocatable; - struct { - uint64_t id; - } extra; - }; + /** Kind-specific info. */ + uint64_t info; /** * Raw binary build ID. @c NULL if the module does not have a build ID. */ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 6294c0080..0f4f4a777 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1194,7 +1194,13 @@ struct drgn_error *drgn_program_element_info(struct drgn_program *prog, * @{ */ -/** An executable, library, or other binary file used by a program. */ +/** + * An executable, library, or other binary file used by a program. + * + * Modules are uniquely identified by the combination of their kind (@ref + * drgn_module_kind()), name (@ref drgn_module_name()), and info (@ref + * drgn_module_info()). + */ struct drgn_module; /** Kinds of modules. */ @@ -1214,42 +1220,6 @@ enum drgn_module_kind { DRGN_MODULE_EXTRA, } __attribute__((__packed__)); -/** Unique key for a @ref drgn_module. */ -struct drgn_module_key { - /** Kind of module. */ - enum drgn_module_kind kind; - /** Kind-specific key. */ - union { - struct { - /** Name of module. */ - const char *name; - /** Address of dynamic section. */ - uint64_t dynamic_address; - } shared_library; - struct { - /** Name of module. */ - const char *name; - /** Address of dynamic section. */ - uint64_t dynamic_address; - } vdso; - struct { - /** Name of module. */ - const char *name; - /** - * Address identifying the module (e.g., for Linux - * kernel loadable modules, the base address). - */ - uint64_t address; - } relocatable; - struct { - /** Name of module. */ - const char *name; - /** Arbitrary identification number. */ - uint64_t id; - } extra; - }; -}; - /** * Find the created @ref drgn_module containing the given @p address. * @@ -1366,15 +1336,24 @@ struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, /** Get the program that a module is from. */ struct drgn_program *drgn_module_program(const struct drgn_module *module); -/** Get the unique key for a module. */ -struct drgn_module_key drgn_module_key(const struct drgn_module *module); - /** Get the kind of a module. */ enum drgn_module_kind drgn_module_kind(const struct drgn_module *module); /** Get the name of a module. */ const char *drgn_module_name(const struct drgn_module *module); +/** + * Get the kind-specific info of a module. + * + * - For the main module, it is always 0. + * - For shared library and vDSO modules, it is the address of the dynamic + * section. + * - For relocatable modules, it is an address identifying the module (e.g., for + * Linux kernel loadable modules, it is the base address). + * - For extra modules, it is an arbitrary identification number. + */ +uint64_t drgn_module_info(const struct drgn_module *module); + /** * Get the address range where a module is loaded. * diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 21edb4f4d..34e80df81 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -84,13 +84,11 @@ static int append_module_repr_common(PyObject *parts, Module *self, static PyObject *Module_repr(Module *self) { - struct drgn_module_key key = drgn_module_key(self->module); - _cleanup_pydecref_ PyObject *parts = PyList_New(0); if (!parts) return NULL; - SWITCH_ENUM(key.kind) { + SWITCH_ENUM(drgn_module_kind(self->module)) { case DRGN_MODULE_MAIN: if (append_module_repr_common(parts, self, "main") < 0) return NULL; @@ -99,26 +97,25 @@ static PyObject *Module_repr(Module *self) if (append_module_repr_common(parts, self, "shared_library") || append_string(parts, ", dynamic_address=") - || append_u64_hex(parts, - key.shared_library.dynamic_address)) + || append_u64_hex(parts, drgn_module_info(self->module))) return NULL; break; case DRGN_MODULE_VDSO: if (append_module_repr_common(parts, self, "vdso") || append_string(parts, ", dynamic_address=") - || append_u64_hex(parts, key.vdso.dynamic_address)) + || append_u64_hex(parts, drgn_module_info(self->module))) return NULL; break; case DRGN_MODULE_RELOCATABLE: if (append_module_repr_common(parts, self, "relocatable") || append_string(parts, ", address=") - || append_u64_hex(parts, key.relocatable.address)) + || append_u64_hex(parts, drgn_module_info(self->module))) return NULL; break; case DRGN_MODULE_EXTRA: if (append_module_repr_common(parts, self, "extra") || append_string(parts, ", id=") - || append_u64_hex(parts, key.extra.id)) + || append_u64_hex(parts, drgn_module_info(self->module))) return NULL; break; default: @@ -476,15 +473,14 @@ PyTypeObject MainModule_type = { .tp_base = &Module_type, }; -static PyObject *SharedLibraryModule_get_dynamic_address(Module *self, void *arg) +static PyObject *Module_get_info(Module *self, void *arg) { - struct drgn_module_key key = drgn_module_key(self->module); - return PyLong_FromUint64(key.shared_library.dynamic_address); + return PyLong_FromUint64(drgn_module_info(self->module)); } static PyGetSetDef SharedLibraryModule_getset[] = { - {"dynamic_address", (getter)SharedLibraryModule_get_dynamic_address, - NULL, drgn_SharedLibraryModule_dynamic_address_DOC}, + {"dynamic_address", (getter)Module_get_info, NULL, + drgn_SharedLibraryModule_dynamic_address_DOC}, {}, }; @@ -497,14 +493,8 @@ PyTypeObject SharedLibraryModule_type = { .tp_base = &Module_type, }; -static PyObject *VdsoModule_get_dynamic_address(Module *self, void *arg) -{ - struct drgn_module_key key = drgn_module_key(self->module); - return PyLong_FromUint64(key.vdso.dynamic_address); -} - static PyGetSetDef VdsoModule_getset[] = { - {"dynamic_address", (getter)VdsoModule_get_dynamic_address, NULL, + {"dynamic_address", (getter)Module_get_info, NULL, drgn_VdsoModule_dynamic_address_DOC}, {}, }; @@ -518,12 +508,6 @@ PyTypeObject VdsoModule_type = { .tp_base = &Module_type, }; -static PyObject *RelocatableModule_get_address(Module *self, void *arg) -{ - struct drgn_module_key key = drgn_module_key(self->module); - return PyLong_FromUint64(key.relocatable.address); -} - static PyObject *RelocatableModule_get_section_addresses(PyObject *self, void *arg) { @@ -531,7 +515,7 @@ static PyObject *RelocatableModule_get_section_addresses(PyObject *self, } static PyGetSetDef RelocatableModule_getset[] = { - {"address", (getter)RelocatableModule_get_address, NULL, + {"address", (getter)Module_get_info, NULL, drgn_RelocatableModule_address_DOC}, {"section_addresses", RelocatableModule_get_section_addresses, NULL, drgn_RelocatableModule_section_addresses_DOC}, @@ -547,14 +531,8 @@ PyTypeObject RelocatableModule_type = { .tp_base = &Module_type, }; -static PyObject *ExtraModule_get_id(Module *self, void *arg) -{ - struct drgn_module_key key = drgn_module_key(self->module); - return PyLong_FromUint64(key.extra.id); -} - static PyGetSetDef ExtraModule_getset[] = { - {"id", (getter)ExtraModule_get_id, NULL, drgn_ExtraModule_id_DOC}, + {"id", (getter)Module_get_info, NULL, drgn_ExtraModule_id_DOC}, {}, }; From dcf392a2921cee136e119188fb80face1fd1a06a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 9 Apr 2025 12:56:00 -0700 Subject: [PATCH 137/166] libdrgn: debug_info: index modules by name We currently index modules by their unique key (kind, name, info). I've wanted to look up modules using only their name, which can't be done efficiently with the current module table. Instead, let's make the table map a name to a list of modules with that name. It's very rare for modules to have the same name, so this is just as efficient for the common case of existing operations and enables us to add a name lookup. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 202 ++++++++++++++++++++++--------------------- libdrgn/debug_info.h | 11 ++- tests/test_module.py | 15 ++++ 3 files changed, 129 insertions(+), 99 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 075c75f1f..9383c1274 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -143,40 +143,13 @@ DEFINE_HASH_TABLE_FUNCTIONS(drgn_elf_file_dwarf_table, drgn_elf_file_dwarf_key, ptr_key_hash_pair, scalar_key_eq); DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); -struct drgn_module_key { - enum drgn_module_kind kind; - const char *name; - uint64_t info; -}; - -static inline -struct drgn_module_key drgn_module_entry_key(struct drgn_module * const *entry) -{ - return (struct drgn_module_key){ - .kind = (*entry)->kind, - .name = (*entry)->name, - .info = (*entry)->info, - }; -} - -static inline struct hash_pair -drgn_module_key_hash_pair(const struct drgn_module_key *key) -{ - size_t hash = hash_combine(key->kind, hash_c_string(key->name)); - hash = hash_combine(hash, key->info); - return hash_pair_from_avalanching_hash(hash); -} - -static inline bool drgn_module_key_eq(const struct drgn_module_key *a, - const struct drgn_module_key *b) +static inline const char *drgn_module_entry_name(struct drgn_module * const *entry) { - return a->kind == b->kind - && strcmp(a->name, b->name) == 0 - && a->info == b->info; + return (*entry)->name; } -DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_entry_key, - drgn_module_key_hash_pair, drgn_module_key_eq); +DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_entry_name, + c_string_key_hash_pair, c_string_key_eq); static inline uint64_t drgn_module_address_key(const struct drgn_module *entry) { @@ -194,20 +167,6 @@ static void drgn_module_free_section_addresses(struct drgn_module *module) free(it.entry->key); } -static struct drgn_module *drgn_module_find(struct drgn_program *prog, - enum drgn_module_kind kind, - const char *name, uint64_t info) -{ - const struct drgn_module_key key = { - .kind = kind, - .name = name, - .info = info, - }; - struct drgn_module_table_iterator it = - drgn_module_table_search(&prog->dbinfo.modules, &key); - return it.entry ? *it.entry : NULL; -} - LIBDRGN_PUBLIC struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, uint64_t address) @@ -220,6 +179,23 @@ struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, return it.entry; } +static struct drgn_module *drgn_module_find(struct drgn_program *prog, + enum drgn_module_kind kind, + const char *name, uint64_t info) +{ + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, &name); + if (!it.entry) + return NULL; + struct drgn_module *module = *it.entry; + while (module->kind != kind || module->info != info) { + module = module->next_same_name; + if (!module) + break; + } + return module; +} + static struct drgn_error * drgn_module_find_or_create(struct drgn_program *prog, enum drgn_module_kind kind, const char *name, @@ -229,6 +205,7 @@ drgn_module_find_or_create(struct drgn_program *prog, struct drgn_error *err; struct hash_pair hp; + struct drgn_module_table_iterator it; if (kind == DRGN_MODULE_MAIN) { if (prog->dbinfo.main_module) { if (strcmp(prog->dbinfo.main_module->name, name) != 0) { @@ -240,21 +217,23 @@ drgn_module_find_or_create(struct drgn_program *prog, *new_ret = false; return NULL; } + hp = drgn_module_table_hash(&name); + it.entry = NULL; } else { - const struct drgn_module_key key = { - .kind = kind, - .name = name, - .info = info, - }; - hp = drgn_module_table_hash(&key); - struct drgn_module_table_iterator it = - drgn_module_table_search_hashed(&prog->dbinfo.modules, - &key, hp); + hp = drgn_module_table_hash(&name); + it = drgn_module_table_search_hashed(&prog->dbinfo.modules, + &name, hp); if (it.entry) { - *ret = *it.entry; - if (new_ret) - *new_ret = false; - return NULL; + struct drgn_module *module = *it.entry; + do { + if (module->kind == kind && module->info == info) { + *ret = module; + if (new_ret) + *new_ret = false; + return NULL; + } + module = module->next_same_name; + } while (module); } } @@ -288,16 +267,17 @@ drgn_module_find_or_create(struct drgn_program *prog, goto err_module; } - if (kind == DRGN_MODULE_MAIN) { - prog->dbinfo.main_module = module; - } else { - if (drgn_module_table_insert_searched(&prog->dbinfo.modules, - &module, hp, NULL) < 0) { - err = &drgn_enomem; - goto err_name; - } - prog->dbinfo.modules_generation++; + if (it.entry) { + module->next_same_name = *it.entry; + *it.entry = module; + } else if (drgn_module_table_insert_searched(&prog->dbinfo.modules, + &module, hp, NULL) < 0) { + err = &drgn_enomem; + goto err_name; } + if (kind == DRGN_MODULE_MAIN) + prog->dbinfo.main_module = module; + prog->dbinfo.modules_generation++; drgn_elf_file_dwarf_table_init(&module->split_dwarf_files); drgn_module_section_address_map_init(&module->section_addresses); @@ -475,13 +455,23 @@ void drgn_module_delete(struct drgn_module *module) drgn_module_address_tree_delete_entry(&module->prog->dbinfo.modules_by_address, module); } - if (module->kind == DRGN_MODULE_MAIN) { - module->prog->dbinfo.main_module = NULL; + + const char *name = module->name; + struct drgn_module_table_iterator it = + drgn_module_table_search(&module->prog->dbinfo.modules, &name); + if (*it.entry == module && !module->next_same_name) { + drgn_module_table_delete_iterator(&module->prog->dbinfo.modules, + it); } else { - drgn_module_table_delete_entry(&module->prog->dbinfo.modules, - &module); - module->prog->dbinfo.modules_generation++; + struct drgn_module **modulep = it.entry; + while (*modulep != module) + modulep = &(*modulep)->next_same_name; + *modulep = module->next_same_name; } + if (module->kind == DRGN_MODULE_MAIN) + module->prog->dbinfo.main_module = NULL; + module->prog->dbinfo.modules_generation++; + drgn_module_destroy(module); } @@ -3094,6 +3084,7 @@ struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, struct drgn_created_module_iterator { struct drgn_module_iterator it; struct drgn_module_table_iterator table_it; + struct drgn_module *next_module; uint64_t generation; bool yielded_main; }; @@ -3106,6 +3097,7 @@ drgn_created_module_iterator_next(struct drgn_module_iterator *_it, struct drgn_created_module_iterator *it = container_of(_it, struct drgn_created_module_iterator, it); struct drgn_debug_info *dbinfo = &it->it.prog->dbinfo; + if (!it->yielded_main) { it->yielded_main = true; it->table_it = drgn_module_table_first(&dbinfo->modules); @@ -3117,19 +3109,32 @@ drgn_created_module_iterator_next(struct drgn_module_iterator *_it, return NULL; } } + if (it->generation != dbinfo->modules_generation) { return drgn_error_create(DRGN_ERROR_OTHER, "modules changed during iteration"); } - if (it->table_it.entry) { - *ret = *it->table_it.entry; - if (new_ret) - *new_ret = false; - it->table_it = drgn_module_table_next(it->table_it); - } else { - *ret = NULL; + + for (;;) { + if (!it->next_module) { + if (it->table_it.entry) { + it->next_module = *it->table_it.entry; + it->table_it = drgn_module_table_next(it->table_it); + } else { + *ret = NULL; + return NULL; + } + } + if (it->next_module == dbinfo->main_module) { + it->next_module = it->next_module->next_same_name; + } else { + *ret = it->next_module; + if (new_ret) + *new_ret = false; + it->next_module = it->next_module->next_same_name; + return NULL; + } } - return NULL; } LIBDRGN_PUBLIC struct drgn_error * @@ -5410,24 +5415,19 @@ elf_symbols_search(const char *name, uint64_t addr, return drgn_module_elf_symbols_search(module, name, addr, flags, builder); } else { - if (prog->dbinfo.main_module) { - err = drgn_module_elf_symbols_search(prog->dbinfo.main_module, - name, addr, flags, - builder); - if (err == &drgn_stop) - return NULL; - if (err) - return err; - } hash_table_for_each(drgn_module_table, it, &prog->dbinfo.modules) { - err = drgn_module_elf_symbols_search(*it.entry, name, - addr, flags, - builder); - if (err == &drgn_stop) - break; - if (err) - return err; + for (struct drgn_module *module = *it.entry; module; + module = module->next_same_name) { + err = drgn_module_elf_symbols_search(module, + name, addr, + flags, + builder); + if (err == &drgn_stop) + break; + if (err) + return err; + } } return NULL; } @@ -5500,8 +5500,14 @@ void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) finder->ops.destroy(finder->arg); ); drgn_dwarf_info_deinit(dbinfo); - hash_table_for_each(drgn_module_table, it, &dbinfo->modules) - drgn_module_destroy(*it.entry); + hash_table_for_each(drgn_module_table, it, &dbinfo->modules) { + struct drgn_module *module = *it.entry; + do { + struct drgn_module *next = module->next_same_name; + drgn_module_destroy(module); + module = next; + } while (module); + } drgn_module_table_deinit(&dbinfo->modules); } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 800da1515..b5dea865e 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -84,7 +84,12 @@ struct drgn_debug_info { /** Main module. @c NULL if not created yet. */ struct drgn_module *main_module; - /** Table of non-main modules indexed on (kind, name, info). */ + /** + * Table of all modules indexed by name. + * + * Modules with the same name (which should be rare) are on a + * singly-linked list (@ref drgn_module::next_same_name). + */ struct drgn_module_table modules; /** * Counter used to detect when @ref modules is modified during iteration @@ -189,6 +194,10 @@ struct drgn_module { char *name; /** Kind-specific info. */ uint64_t info; + + /** Next module with the same name in @ref drgn_debug_info::modules. */ + struct drgn_module *next_same_name; + /** * Raw binary build ID. @c NULL if the module does not have a build ID. */ diff --git a/tests/test_module.py b/tests/test_module.py index e22f1bd45..b47bc55f4 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -492,6 +492,21 @@ def test_multiple(self): ] self.assertCountEqual(list(prog.modules()), modules) + def test_same_name(self): + prog = Program() + modules = [ + prog.extra_module("foo", id=0, create=True)[0], + prog.main_module("foo", create=True)[0], + ] + actual = list(prog.modules()) + self.assertCountEqual(actual, modules) + self.assertEqual(actual[0], prog.main_module()) + + modules.append(prog.extra_module("foo", id=1, create=True)[0]) + actual = list(prog.modules()) + self.assertCountEqual(actual, modules) + self.assertEqual(actual[0], prog.main_module()) + def test_change_during_iteration(self): prog = Program() prog.extra_module("/foo/bar", create=True) From 21decd3c4526cd314545198f6720ed80ad4180d5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 9 Apr 2025 13:10:29 -0700 Subject: [PATCH 138/166] Add module lookups by name It's rare to know the address of the module you're looking for, so the existing module lookup functions are often not helpful. Add drgn_module_find_by_name() and drgn.Program.module(name) to address this. Userspace still has the issue that you may not know the absolute path of the binary you care about, but this at least helps the kernel. Signed-off-by: Omar Sandoval --- _drgn.pyi | 13 +++++++++---- libdrgn/debug_info.c | 9 +++++++++ libdrgn/drgn.h | 11 +++++++++++ libdrgn/python/program.c | 22 +++++++++++++--------- tests/test_module.py | 12 ++++++++++++ 5 files changed, 54 insertions(+), 13 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 579205481..05422813f 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -914,14 +914,19 @@ class Program: """ ... - def module(self, __address: IntegerLike) -> Module: + def module(self, __address_or_name: Union[IntegerLike, str]) -> Module: """ - Find the module containing the given address. + Find the module containing the given address, or the module with the + given name. Addresses are matched based on :attr:`Module.address_range`. - :param address: Address to search for. - :raises LookupError: if no module contains the given address + If there are multiple modules with the given name, one is returned + arbitrarily. + + :param address_or_name: Address or name to search for. + :raises LookupError: if no module contains the given address or has the + given name """ ... diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 9383c1274..eb6064c62 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -167,6 +167,15 @@ static void drgn_module_free_section_addresses(struct drgn_module *module) free(it.entry->key); } +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_by_name(struct drgn_program *prog, + const char *name) +{ + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, &name); + return it.entry ? *it.entry : NULL; +} + LIBDRGN_PUBLIC struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, uint64_t address) diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 0f4f4a777..15f4c43da 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -1220,6 +1220,17 @@ enum drgn_module_kind { DRGN_MODULE_EXTRA, } __attribute__((__packed__)); +/** + * Find the created @ref drgn_module with the given @p name. + * + * If there are multiple modules with the given name, one is returned + * arbitrarily. + * + * @return Module, or @c NULL if not found. + */ +struct drgn_module *drgn_module_find_by_name(struct drgn_program *prog, + const char *name); + /** * Find the created @ref drgn_module containing the given @p address. * diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 40d564691..873767a60 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1269,16 +1269,20 @@ static PyObject *Program_extra_module(Program *self, PyObject *args, static PyObject *Program_module(Program *self, PyObject *arg) { - struct index_arg address = {}; - if (!index_converter(arg, &address)) - return NULL; - struct drgn_module *module = - drgn_module_find_by_address(&self->prog, address.uvalue); - if (!module) { - PyErr_SetString(PyExc_LookupError, "module not found"); - return NULL; + struct drgn_module *module; + if (PyUnicode_Check(arg)) { + const char *name = PyUnicode_AsUTF8(arg); + if (!name) + return NULL; + module = drgn_module_find_by_name(&self->prog, name); + } else { + struct index_arg address = {}; + if (!index_converter(arg, &address)) + return NULL; + module = drgn_module_find_by_address(&self->prog, + address.uvalue); } - return Module_wrap(module); + return Module_wrap_find(module); } static DebugInfoOptions *Program_get_debug_info_options(Program *self, void *arg) diff --git a/tests/test_module.py b/tests/test_module.py index b47bc55f4..a4e5541b8 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -386,6 +386,18 @@ def test_build_id_del(self): with self.assertRaises(AttributeError): del module.build_id + def test_find_by_name(self): + prog = Program() + self.assertRaises(LookupError, prog.module, "foo") + + module1 = prog.extra_module("foo", create=True)[0] + self.assertEqual(prog.module("foo"), module1) + + module2 = prog.main_module("foo", create=True)[0] + self.assertIn(prog.module("foo"), (module1, module2)) + + self.assertRaises(LookupError, prog.module, "bar") + def test_find_by_address(self): prog = Program() module1 = prog.extra_module("/foo/bar", create=True)[0] From b72fc704b9dbee1a1f5e31fad6c62d1191da9b20 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 9 Apr 2025 13:17:26 -0700 Subject: [PATCH 139/166] docs: fix formatting of create_loaded_modules() docstring Fixes: 64dd5c883d41 ("Add Program.create_loaded_modules()") Signed-off-by: Omar Sandoval --- _drgn.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 05422813f..f169aafb2 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -729,8 +729,8 @@ class Program: .. code-block:: python3 - for _ in prog.loaded_modules(): - pass + for _ in prog.loaded_modules(): + pass """ @overload From 4f518e489b6a677970e93dee213c26a3edd19c2a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 9 Apr 2025 13:48:09 -0700 Subject: [PATCH 140/166] Drop "new" return from prog.*_module(create=True) The differing return type between the module lookup functions with create=False and create=True is ugly and not very Pythonic. Drop the "new" part and always just return the module. You can do something like this is you really need to distinguish it: try: module = prog.extra_module("foo") except LookupError: module = prog.extra_module("foo", create=True) ... first-time setup ... Suggested-by: Stephen Brennan Signed-off-by: Omar Sandoval --- _drgn.pyi | 134 ++++++------------------ docs/api_reference.rst | 5 +- drgn/cli.py | 6 +- libdrgn/python/drgnpy.h | 1 - libdrgn/python/module.c | 8 +- libdrgn/python/program.c | 30 +++--- tests/linux_kernel/test_debug_info.py | 3 +- tests/linux_kernel/test_stack_trace.py | 2 +- tests/test_debug_info.py | 138 ++++++++++++------------- tests/test_dwarf.py | 2 +- tests/test_module.py | 101 ++++++++---------- tests/test_symbol.py | 2 +- 12 files changed, 167 insertions(+), 265 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index f169aafb2..fee8b2b67 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -31,9 +31,9 @@ from typing import ( ) if sys.version_info < (3, 8): - from typing_extensions import Final, Literal, Protocol + from typing_extensions import Final, Protocol else: - from typing import Final, Literal, Protocol + from typing import Final, Protocol if sys.version_info < (3, 10): from typing_extensions import TypeAlias @@ -734,128 +734,82 @@ class Program: """ @overload - def main_module( - self, name: Optional[Path] = None, *, create: Literal[False] = False - ) -> MainModule: + def main_module(self) -> MainModule: """ Find the main module. - :param name: :attr:`Module.name`, or ``None`` to match any name - :raises LookupError: if main module has not been created or its name - doesn't match + :raises LookupError: if the main module has not been created """ ... @overload - def main_module( - self, name: Path, *, create: Literal[True] - ) -> Tuple[MainModule, bool]: + def main_module(self, name: Path, *, create: bool = False) -> MainModule: """ - Find or create the main module. + Find the main module. :param name: :attr:`Module.name` - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. - :raises LookupError: if main module was already created with a - different name + :param create: Create the module if it doesn't exist. + :raises LookupError: if the main module has not been created and + *create* is ``False``, or if the main module has already been + created with a different name """ ... - @overload def shared_library_module( self, name: Path, dynamic_address: IntegerLike, *, - create: Literal[False] = False, + create: bool = False, ) -> SharedLibraryModule: """ Find a shared library module. :param name: :attr:`Module.name` :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` + :param create: Create the module if it doesn't exist. :return: Shared library module with the given name and dynamic address. - :raises LookupError: if no matching module has been created + :raises LookupError: if no matching module has been created and + *create* is ``False`` """ ... - @overload - def shared_library_module( - self, name: Path, dynamic_address: IntegerLike, *, create: Literal[True] - ) -> Tuple[SharedLibraryModule, bool]: - """ - Find or create a shared library module. - - :param name: :attr:`Module.name` - :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. - """ - ... - - @overload def vdso_module( self, name: Path, dynamic_address: IntegerLike, *, - create: Literal[False] = False, + create: bool = False, ) -> VdsoModule: """ Find a vDSO module. :param name: :attr:`Module.name` :param dynamic_address: :attr:`VdsoModule.dynamic_address` + :param create: Create the module if it doesn't exist. :return: vDSO module with the given name and dynamic address. - :raises LookupError: if no matching module has been created - """ - ... - - @overload - def vdso_module( - self, name: Path, dynamic_address: IntegerLike, *, create: Literal[True] - ) -> Tuple[VdsoModule, bool]: - """ - Find or create a vDSO module. - - :param name: :attr:`Module.name` - :param dynamic_address: :attr:`VdsoModule.dynamic_address` - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. + :raises LookupError: if no matching module has been created and + *create* is ``False`` """ ... - @overload def relocatable_module( - self, name: Path, address: IntegerLike, *, create: Literal[False] = False + self, name: Path, address: IntegerLike, *, create: bool = False ) -> RelocatableModule: """ Find a relocatable module. :param name: :attr:`Module.name` :param address: :attr:`RelocatableModule.address` + :param create: Create the module if it doesn't exist. :return: Relocatable module with the given name and address. - :raises LookupError: if no matching module has been created + :raises LookupError: if no matching module has been created and + *create* is ``False`` """ ... - @overload - def relocatable_module( - self, name: Path, address: IntegerLike, *, create: Literal[True] - ) -> Tuple[RelocatableModule, bool]: - """ - Find or create a relocatable module. - - :param name: :attr:`Module.name` - :param address: :attr:`RelocatableModule.address` - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. - """ - ... - - @overload def linux_kernel_loadable_module( - self, module_obj: Object, *, create: Literal[False] = False + self, module_obj: Object, *, create: bool = False ) -> RelocatableModule: """ Find a Linux kernel loadable module from a ``struct module *`` object. @@ -863,54 +817,26 @@ class Program: Note that kernel modules are represented as relocatable modules. :param module_obj: ``struct module *`` object for the kernel module. + :param create: Create the module if it doesn't exist. :return: Relocatable module with a name and address matching *module_obj*. - :raises LookupError: if no matching module has been created + :raises LookupError: if no matching module has been created and + *create* is ``False`` """ ... - @overload - def linux_kernel_loadable_module( - self, module_obj: Object, *, create: Literal[True] - ) -> Tuple[RelocatableModule, bool]: - """ - Find or create a Linux kernel loadable module from a ``struct module *`` - object. - - If a new module is created, its :attr:`~Module.address_range` and - :attr:`~RelocatableModule.section_addresses` are set from *module_obj*. - - :param module_obj: `struct module *`` object for the kernel module. - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. - """ - ... - - @overload def extra_module( - self, name: Path, id: IntegerLike = 0, *, create: Literal[False] = False + self, name: Path, id: IntegerLike = 0, *, create: bool = False ) -> ExtraModule: """ Find an extra module. :param name: :attr:`Module.name` :param id: :attr:`ExtraModule.id` + :param create: Create the module if it doesn't exist. :return: Extra module with the given name and ID number. - :raises LookupError: if no matching module has been created - """ - ... - - @overload - def extra_module( - self, name: Path, id: IntegerLike = 0, *, create: Literal[True] - ) -> Tuple[ExtraModule, bool]: - """ - Find or create an extra module. - - :param name: :attr:`Module.name` - :param id: :attr:`ExtraModule.id` - :return: Module and ``True`` if it was newly created or ``False`` if it - was found. + :raises LookupError: if no matching module has been created and + *create* is ``False`` """ ... diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 357bbcbc4..ac9feee60 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -189,10 +189,9 @@ that type or find one that was previously created:: ... LookupError: module not found >>> prog.extra_module("foo", 1234, create=True) - (prog.extra_module(name='foo', id=0x4d2), True) + prog.extra_module(name='foo', id=0x4d2) >>> prog.extra_module("foo", 1234) - >>> prog.extra_module("foo", 1234, create=True) - (prog.extra_module(name='foo', id=0x4d2), False) + prog.extra_module(name='foo', id=0x4d2) .. drgndoc:: Program.main_module .. drgndoc:: Program.shared_library_module diff --git a/drgn/cli.py b/drgn/cli.py index c36aa1f97..c29e3d259 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -361,9 +361,9 @@ def _load_debugging_symbols(prog: drgn.Program, args: argparse.Namespace) -> Non if args.extra_symbols: for extra_symbol_path in args.extra_symbols: extra_symbol_path = os.path.abspath(extra_symbol_path) - module, new = prog.extra_module(extra_symbol_path, create=True) - if new: - module.try_file(extra_symbol_path) + prog.extra_module(extra_symbol_path, create=True).try_file( + extra_symbol_path + ) def _main() -> None: diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 5eb59975a..2ed6f064a 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -351,7 +351,6 @@ void *set_error_type_name(const char *format, #define call_tp_alloc(type) ((type *)type##_type.tp_alloc(&type##_type, 0)) PyObject *Module_wrap(struct drgn_module *module); -PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b); static inline Program *Module_prog(Module *module) { struct drgn_program *prog = drgn_module_program(module->module); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 34e80df81..130a62fe9 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -61,11 +61,6 @@ PyObject *Module_wrap(struct drgn_module *module) return (PyObject *)ret; } -PyObject *Module_and_bool_wrap(struct drgn_module *module, bool b) -{ - return Py_BuildValue("NO", Module_wrap(module), b ? Py_True : Py_False); -} - static void Module_dealloc(Module *self) { if (self->module) @@ -578,7 +573,8 @@ static PyObject *ModuleIteratorWithNew_next(ModuleIterator *self) return set_drgn_error(err); if (!module) return NULL; - return Module_and_bool_wrap(module, new); + return Py_BuildValue("NO", Module_wrap(module), + new ? Py_True : Py_False); } PyTypeObject ModuleIterator_type = { diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 873767a60..561930ac6 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1081,14 +1081,13 @@ static PyObject *Program_main_module(Program *self, PyObject *args, return NULL; } struct drgn_module *module; - bool new; err = drgn_module_find_or_create_main(&self->prog, name.path, - &module, &new); + &module, NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { return Module_wrap_find(drgn_module_find_main(&self->prog, name.path)); @@ -1112,16 +1111,15 @@ static PyObject *Program_shared_library_module(Program *self, PyObject *args, if (create) { struct drgn_module *module; - bool new; err = drgn_module_find_or_create_shared_library(&self->prog, name.path, dynamic_address.uvalue, - &module, &new); + &module, NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { return Module_wrap_find(drgn_module_find_shared_library(&self->prog, name.path, @@ -1145,15 +1143,14 @@ static PyObject *Program_vdso_module(Program *self, PyObject *args, if (create) { struct drgn_module *module; - bool new; err = drgn_module_find_or_create_vdso(&self->prog, name.path, dynamic_address.uvalue, - &module, &new); + &module, NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { return Module_wrap_find(drgn_module_find_vdso(&self->prog, name.path, @@ -1177,16 +1174,15 @@ static PyObject *Program_relocatable_module(Program *self, PyObject *args, if (create) { struct drgn_module *module; - bool new; err = drgn_module_find_or_create_relocatable(&self->prog, name.path, address.uvalue, - &module, &new); + &module, NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { return Module_wrap_find(drgn_module_find_relocatable(&self->prog, name.path, @@ -1216,15 +1212,14 @@ static PyObject *Program_linux_kernel_loadable_module(Program *self, struct drgn_module *module; if (create) { - bool new; err = drgn_module_find_or_create_linux_kernel_loadable(&module_obj->obj, &module, - &new); + NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { err = drgn_module_find_linux_kernel_loadable(&module_obj->obj, &module); @@ -1251,15 +1246,14 @@ static PyObject *Program_extra_module(Program *self, PyObject *args, if (create) { struct drgn_module *module; - bool new; err = drgn_module_find_or_create_extra(&self->prog, name.path, id.uvalue, &module, - &new); + NULL); if (err) { set_drgn_error(err); return NULL; } - return Module_and_bool_wrap(module, new); + return Module_wrap(module); } else { return Module_wrap_find(drgn_module_find_extra(&self->prog, name.path, diff --git a/tests/linux_kernel/test_debug_info.py b/tests/linux_kernel/test_debug_info.py index b0074fe29..fb565a53c 100644 --- a/tests/linux_kernel/test_debug_info.py +++ b/tests/linux_kernel/test_debug_info.py @@ -111,8 +111,7 @@ def test_find_by_obj(self): module_obj = find_module(self.prog, "drgn_test") self.assertEqual(self.prog.linux_kernel_loadable_module(module_obj), module) self.assertEqual( - self.prog.linux_kernel_loadable_module(module_obj, create=True), - (module, False), + self.prog.linux_kernel_loadable_module(module_obj, create=True), module ) def test_no_sys_module(self): diff --git a/tests/linux_kernel/test_stack_trace.py b/tests/linux_kernel/test_stack_trace.py index 314b5aa1f..870c75d66 100644 --- a/tests/linux_kernel/test_stack_trace.py +++ b/tests/linux_kernel/test_stack_trace.py @@ -187,7 +187,7 @@ def test_vmlinux_builtin_orc(self): prog.register_symbol_finder( "vmlinux_kallsyms", load_vmlinux_kallsyms(prog), enable_index=0 ) - main, _ = prog.main_module(name="kernel", create=True) + main = prog.main_module(name="kernel", create=True) main.address_range = self.prog.main_module().address_range # Luckily, all drgn cares about for x86_64 pt_regs is that it is a diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index db5ffed08..28d50760c 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -62,7 +62,7 @@ def setUp(self): self.prog.set_enabled_debug_info_finders([]) def test_want_both(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile() as f: module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -85,7 +85,7 @@ def test_want_both(self): ) def test_want_both_not_loadable(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(loadable=False) as f: module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -94,7 +94,7 @@ def test_want_both_not_loadable(self): self.assertEqual(module.debug_file_path, f.name) def test_want_both_no_debug(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(debug=False) as f: module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -103,7 +103,7 @@ def test_want_both_no_debug(self): self.assertIsNone(module.debug_file_path) def test_want_both_is_neither(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(loadable=False, debug=False) as f: module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -112,7 +112,7 @@ def test_want_both_is_neither(self): self.assertIsNone(module.debug_file_path) def test_only_want_loaded(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.debug_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile() as f: module.try_file(f.name) @@ -122,7 +122,7 @@ def test_only_want_loaded(self): self.assertIsNone(module.debug_file_path) def test_only_want_loaded_not_loadable(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.debug_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(loadable=False) as f: module.try_file(f.name) @@ -132,7 +132,7 @@ def test_only_want_loaded_not_loadable(self): self.assertIsNone(module.debug_file_path) def test_only_want_loaded_no_debug(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.debug_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(debug=False) as f: module.try_file(f.name) @@ -142,7 +142,7 @@ def test_only_want_loaded_no_debug(self): self.assertIsNone(module.debug_file_path) def test_only_want_loaded_is_neither(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.debug_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(loadable=False, debug=False) as f: module.try_file(f.name) @@ -152,7 +152,7 @@ def test_only_want_loaded_is_neither(self): self.assertIsNone(module.debug_file_path) def test_only_want_debug(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile() as f: module.try_file(f.name) @@ -162,7 +162,7 @@ def test_only_want_debug(self): self.assertEqual(module.debug_file_path, f.name) def test_only_want_debug_not_loadable(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(loadable=False) as f: module.try_file(f.name) @@ -172,7 +172,7 @@ def test_only_want_debug_not_loadable(self): self.assertEqual(module.debug_file_path, f.name) def test_only_want_debug_no_debug(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(debug=False) as f: module.try_file(f.name) @@ -182,7 +182,7 @@ def test_only_want_debug_no_debug(self): self.assertIsNone(module.debug_file_path) def test_only_want_debug_is_neither(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile(loadable=False, debug=False) as f: module.try_file(f.name) @@ -192,7 +192,7 @@ def test_only_want_debug_is_neither(self): self.assertIsNone(module.debug_file_path) def test_want_neither(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT module.debug_file_status = ModuleFileStatus.DONT_WANT with NamedTemporaryElfFile() as f: @@ -203,7 +203,7 @@ def test_want_neither(self): self.assertIsNone(module.debug_file_path) def test_separate_files_loaded_first(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(debug=False) as f1: module.try_file(f1.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -219,7 +219,7 @@ def test_separate_files_loaded_first(self): self.assertEqual(module.debug_file_path, f2.name) def test_separate_files_debug_first(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(loadable=False) as f1: module.try_file(f1.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -235,7 +235,7 @@ def test_separate_files_debug_first(self): self.assertEqual(module.debug_file_path, f1.name) def test_loadable_then_both(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(debug=False) as f1: module.try_file(f1.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -251,7 +251,7 @@ def test_loadable_then_both(self): self.assertEqual(module.debug_file_path, f2.name) def test_debug_then_both(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(loadable=False) as f1: module.try_file(f1.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -267,14 +267,14 @@ def test_debug_then_both(self): self.assertEqual(module.debug_file_path, f1.name) def test_no_build_id_force(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile() as f: module.try_file(f.name, force=True) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_path, f.name) def test_no_build_id_file_has_build_id(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: module.try_file(f.name) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -282,7 +282,7 @@ def test_no_build_id_file_has_build_id(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_no_build_id_file_has_build_id_force(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: module.try_file(f.name, force=True) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -290,7 +290,7 @@ def test_no_build_id_file_has_build_id_force(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_match(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: module.try_file(f.name) @@ -299,7 +299,7 @@ def test_build_id_match(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_match_force(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: module.try_file(f.name, force=True) @@ -308,7 +308,7 @@ def test_build_id_match_force(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_mismatch(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: module.try_file(f.name) @@ -317,7 +317,7 @@ def test_build_id_mismatch(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_mismatch_force(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: module.try_file(f.name, force=True) @@ -326,7 +326,7 @@ def test_build_id_mismatch_force(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_missing(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile() as f: module.try_file(f.name) @@ -335,7 +335,7 @@ def test_build_id_missing(self): self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") def test_build_id_missing_force(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile() as f: module.try_file(f.name, force=True) @@ -366,7 +366,7 @@ def test_gnu_debugaltlink(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -433,7 +433,7 @@ def test_gnu_debugaltlink_build_id_mismatch(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -486,7 +486,7 @@ def test_gnu_debugaltlink_then_both(self): alt_path = debug_dir / "alt.debug" alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id with NamedTemporaryElfFile( build_id=build_id, @@ -518,7 +518,7 @@ def test_gnu_debugaltlink_cancel(self): alt_path = debug_dir / "alt.debug" alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id with NamedTemporaryElfFile( build_id=build_id, @@ -535,7 +535,7 @@ def test_gnu_debugaltlink_cancel(self): self.assertRaises(ValueError, module.wanted_supplementary_debug_file) def test_extra_module_no_address_range(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) with NamedTemporaryElfFile() as f: module.try_file(f.name) self.assertIsNone(module.address_range) @@ -543,7 +543,7 @@ def test_extra_module_no_address_range(self): self.assertEqual(module.debug_file_bias, 0) def test_extra_module_address_range(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.address_range = (0x40000000, 0x40001000) with NamedTemporaryElfFile() as f: module.try_file(f.name) @@ -552,7 +552,7 @@ def test_extra_module_address_range(self): self.assertEqual(module.debug_file_bias, 0x30000000) def test_extra_module_empty_address_range(self): - module = self.prog.extra_module("/foo/bar", create=True)[0] + module = self.prog.extra_module("/foo/bar", create=True) module.address_range = (0, 0) with NamedTemporaryElfFile() as f: module.try_file(f.name) @@ -1682,8 +1682,8 @@ def test_empty(self): def test_multiple(self): self.prog.load_module_debug_info( - self.prog.extra_module("/foo/bar", create=True)[0], - self.prog.extra_module("/foo/baz", create=True)[0], + self.prog.extra_module("/foo/bar", create=True), + self.prog.extra_module("/foo/baz", create=True), ) self.finder.assert_called_once() self.assertCountEqual( @@ -1699,15 +1699,15 @@ def test_wrong_program(self): ValueError, "module from wrong program", self.prog.load_module_debug_info, - self.prog.extra_module("/foo/bar", create=True)[0], - Program().extra_module("/foo/baz", create=True)[0], + self.prog.extra_module("/foo/bar", create=True), + Program().extra_module("/foo/baz", create=True), ) def test_type_error(self): self.assertRaises( TypeError, self.prog.load_module_debug_info, - self.prog.extra_module("/foo/bar", create=True)[0], + self.prog.extra_module("/foo/bar", create=True), None, ) @@ -1720,7 +1720,7 @@ def setUp(self): def test_by_module_name(self): with NamedTemporaryElfFile() as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -1731,7 +1731,7 @@ def test_by_module_name_with_build_id(self): build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=build_id) as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -1743,7 +1743,7 @@ def test_by_module_name_missing_build_id(self): build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile() as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -1753,7 +1753,7 @@ def test_by_module_name_build_id_mismatch(self): build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" with NamedTemporaryElfFile(build_id=build_id[::-1]) as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -1761,7 +1761,7 @@ def test_by_module_name_build_id_mismatch(self): def test_reuse_loaded_file(self): with NamedTemporaryElfFile() as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.debug_file_status = ModuleFileStatus.DONT_WANT self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -1777,7 +1777,7 @@ def test_reuse_loaded_file(self): def test_reuse_debug_file(self): with NamedTemporaryElfFile() as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) @@ -1797,7 +1797,7 @@ def test_reuse_wanted_supplementary_debug_file(self): with NamedTemporaryElfFile( gnu_debugaltlink=("alt.debug", alt_build_id), ) as f: - module = self.prog.extra_module(f.name, create=True)[0] + module = self.prog.extra_module(f.name, create=True) module.loaded_file_status = ModuleFileStatus.DONT_WANT self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) @@ -1869,7 +1869,7 @@ def test_by_build_id(self): create_dwarf_file((), sections=(ALLOCATED_SECTION,)) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) @@ -1897,7 +1897,7 @@ def test_by_build_id_separate(self): debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") debug_path.write_bytes(create_dwarf_file(())) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) @@ -1927,7 +1927,7 @@ def test_by_build_id_from_loaded(self): debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") debug_path.write_bytes(create_dwarf_file(())) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.debug_info_options.directories = ("", ".debug", str(debug_dir)) self.prog.load_module_debug_info(module) @@ -1952,7 +1952,7 @@ def test_by_build_id_method(self): create_dwarf_file((), sections=(ALLOCATED_SECTION,)) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.build_id = build_id self.prog.find_standard_debug_info( @@ -1998,7 +1998,7 @@ def test_by_gnu_debuglink(self): module = self.prog.extra_module( bin_dir / "binary", i, create=True - )[0] + ) self.prog.load_module_debug_info(module) self.assertEqual( @@ -2038,7 +2038,7 @@ def test_by_gnu_debuglink_absolute(self): debug_path.parent.mkdir(parents=True, exist_ok=True) debug_path.write_bytes(debug_file_contents) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -2065,7 +2065,7 @@ def test_by_gnu_debuglink_crc_mismatch(self): debug_path = bin_dir / "binary.debug" debug_path.write_bytes(debug_file_contents) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.debug_info_options.directories = ("",) self.prog.load_module_debug_info(module) self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -2087,7 +2087,7 @@ def test_invalid_gnu_debuglink(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -2114,7 +2114,7 @@ def test_gnu_debugaltlink_absolute(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -2140,7 +2140,7 @@ def test_gnu_debugaltlink_not_found(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( @@ -2178,7 +2178,7 @@ def test_only_gnu_debugaltlink_absolute(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.try_file(binary_path) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( @@ -2210,7 +2210,7 @@ def test_only_gnu_debugaltlink_not_found(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) module.try_file(binary_path) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( @@ -2257,7 +2257,7 @@ def test_gnu_debugaltlink_relative(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -2295,7 +2295,7 @@ def test_gnu_debugaltlink_debug_directories(self): ) ) - module = self.prog.extra_module(binary_path, create=True)[0] + module = self.prog.extra_module(binary_path, create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -2328,7 +2328,7 @@ def test_gnu_debugaltlink_build_id_mismatch(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( @@ -2367,7 +2367,7 @@ def test_invalid_gnu_debugaltlink(self): ) ) - module = self.prog.extra_module(bin_dir / "binary", create=True)[0] + module = self.prog.extra_module(bin_dir / "binary", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -2451,7 +2451,7 @@ def setUp(self): ) def test_no_build_id(self): - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) @@ -2469,7 +2469,7 @@ def test_separate(self): "debuginfo": debug_file.name, } - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -2498,7 +2498,7 @@ def test_no_servers(self): "debuginfo": debug_file.name, } - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) @@ -2513,7 +2513,7 @@ def test_cache_hit(self): self.server.build_ids[build_id] = {"debuginfo": debug_file.name} for i in range(2): - module = self.prog.extra_module("foo", i, create=True)[0] + module = self.prog.extra_module("foo", i, create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) @@ -2542,7 +2542,7 @@ def test_gnu_debugaltlink(self): } self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -2577,7 +2577,7 @@ def test_gnu_debugaltlink_not_found(self): "debuginfo": debug_file.name, } - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.build_id = build_id self.prog.load_module_debug_info(module) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) @@ -2610,7 +2610,7 @@ def test_only_gnu_debugaltlink(self): ) as alt_f: self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( @@ -2635,7 +2635,7 @@ def test_only_gnu_debugaltlink_not_found(self): build_id=build_id, gnu_debugaltlink=("alt.debug", alt_build_id), ) as f: - module = self.prog.extra_module("foo", create=True)[0] + module = self.prog.extra_module("foo", create=True) module.try_file(f.name) self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) self.assertEqual( diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 23fabe997..fcdb9891a 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -205,7 +205,7 @@ def add_extra_dwarf(prog, path, supplementary_path=None): - module = prog.extra_module(path, create=True)[0] + module = prog.extra_module(path, create=True) module.try_file(path, force=True) if module.debug_file_status == drgn.ModuleFileStatus.WANT_SUPPLEMENTARY: module.try_file(supplementary_path) diff --git a/tests/test_module.py b/tests/test_module.py index a4e5541b8..c2e24a56b 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -42,16 +42,15 @@ def test_main_module(self): self.assertRaises(LookupError, prog.main_module) self.assertRaises(LookupError, prog.main_module, "/foo/bar") - module, new = prog.main_module("/foo/bar", create=True) + module = prog.main_module("/foo/bar", create=True) self.assertIsInstance(module, MainModule) - self.assertEqual(new, True) self.assertEqual(prog.main_module(), module) self.assertEqual(prog.main_module(create=False), module) self.assertEqual(prog.main_module("/foo/bar"), module) self.assertEqual(prog.main_module(b"/foo/bar"), module) self.assertEqual(prog.main_module(Path("/foo/bar")), module) - self.assertEqual(prog.main_module("/foo/bar", create=True), (module, False)) + self.assertEqual(prog.main_module("/foo/bar", create=True), module) self.assertRaises(LookupError, prog.main_module, "/foo/baz") self.assertRaises(LookupError, prog.main_module, "/foo/baz", create=True) @@ -73,9 +72,8 @@ def test_shared_library_module(self): LookupError, prog.shared_library_module, "/foo/bar", 0x10000000 ) - module, new = prog.shared_library_module("/foo/bar", 0x10000000, create=True) + module = prog.shared_library_module("/foo/bar", 0x10000000, create=True) self.assertIsInstance(module, SharedLibraryModule) - self.assertEqual(new, True) self.assertEqual(prog.shared_library_module("/foo/bar", 0x10000000), module) self.assertEqual(prog.shared_library_module(b"/foo/bar", 0x10000000), module) @@ -83,8 +81,7 @@ def test_shared_library_module(self): prog.shared_library_module(Path("/foo/bar"), IntWrapper(0x10000000)), module ) self.assertEqual( - prog.shared_library_module("/foo/bar", 0x10000000, create=True), - (module, False), + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module ) self.assertRaises( @@ -95,13 +92,13 @@ def test_shared_library_module(self): ) self.assertNotEqual( - prog.shared_library_module("/foo/bar", 0x20000000, create=True)[0], module + prog.shared_library_module("/foo/bar", 0x20000000, create=True), module ) self.assertNotEqual( - prog.shared_library_module("/foo/baz", 0x10000000, create=True)[0], module + prog.shared_library_module("/foo/baz", 0x10000000, create=True), module ) self.assertNotEqual( - prog.vdso_module("/foo/bar", 0x10000000, create=True)[0], module + prog.vdso_module("/foo/bar", 0x10000000, create=True), module ) self.assertIs(module.prog, prog) @@ -124,30 +121,27 @@ def test_vdso_module(self): self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x10000000) - module, new = prog.vdso_module("/foo/bar", 0x10000000, create=True) + module = prog.vdso_module("/foo/bar", 0x10000000, create=True) self.assertIsInstance(module, VdsoModule) - self.assertEqual(new, True) self.assertEqual(prog.vdso_module("/foo/bar", 0x10000000), module) self.assertEqual(prog.vdso_module(b"/foo/bar", 0x10000000), module) self.assertEqual( prog.vdso_module(Path("/foo/bar"), IntWrapper(0x10000000)), module ) - self.assertEqual( - prog.vdso_module("/foo/bar", 0x10000000, create=True), (module, False) - ) + self.assertEqual(prog.vdso_module("/foo/bar", 0x10000000, create=True), module) self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x20000000) self.assertRaises(LookupError, prog.vdso_module, "/foo/baz", 0x10000000) self.assertNotEqual( - prog.vdso_module("/foo/bar", 0x20000000, create=True)[0], module + prog.vdso_module("/foo/bar", 0x20000000, create=True), module ) self.assertNotEqual( - prog.vdso_module("/foo/baz", 0x10000000, create=True)[0], module + prog.vdso_module("/foo/baz", 0x10000000, create=True), module ) self.assertNotEqual( - prog.shared_library_module("/foo/bar", 0x10000000, create=True)[0], module + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module ) self.assertIs(module.prog, prog) @@ -168,9 +162,8 @@ def test_relocatable_module(self): self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x10000000) - module, new = prog.relocatable_module("/foo/bar", 0x10000000, create=True) + module = prog.relocatable_module("/foo/bar", 0x10000000, create=True) self.assertIsInstance(module, RelocatableModule) - self.assertEqual(new, True) self.assertEqual(prog.relocatable_module("/foo/bar", 0x10000000), module) self.assertEqual(prog.relocatable_module(b"/foo/bar", 0x10000000), module) @@ -178,21 +171,20 @@ def test_relocatable_module(self): prog.relocatable_module(Path("/foo/bar"), IntWrapper(0x10000000)), module ) self.assertEqual( - prog.relocatable_module("/foo/bar", 0x10000000, create=True), - (module, False), + prog.relocatable_module("/foo/bar", 0x10000000, create=True), module ) self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x20000000) self.assertRaises(LookupError, prog.relocatable_module, "/foo/baz", 0x10000000) self.assertNotEqual( - prog.relocatable_module("/foo/bar", 0x20000000, create=True)[0], module + prog.relocatable_module("/foo/bar", 0x20000000, create=True), module ) self.assertNotEqual( - prog.relocatable_module("/foo/baz", 0x10000000, create=True)[0], module + prog.relocatable_module("/foo/baz", 0x10000000, create=True), module ) self.assertNotEqual( - prog.shared_library_module("/foo/bar", 0x10000000, create=True)[0], module + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module ) self.assertIs(module.prog, prog) @@ -202,7 +194,7 @@ def test_relocatable_module(self): def test_section_addresses(self): prog = Program() - module = prog.relocatable_module("/foo/bar", 0x10000000, create=True)[0] + module = prog.relocatable_module("/foo/bar", 0x10000000, create=True) self.assertNotIn(".text", module.section_addresses) self.assertNotIn(1, module.section_addresses) @@ -267,26 +259,23 @@ def test_extra_module(self): self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 1234) - module, new = prog.extra_module("/foo/bar", 1234, create=True) + module = prog.extra_module("/foo/bar", 1234, create=True) self.assertIsInstance(module, ExtraModule) - self.assertEqual(new, True) self.assertEqual(prog.extra_module("/foo/bar", 1234), module) self.assertEqual(prog.extra_module(b"/foo/bar", 1234), module) self.assertEqual(prog.extra_module(Path("/foo/bar"), IntWrapper(1234)), module) - self.assertEqual( - prog.extra_module("/foo/bar", 1234, create=True), (module, False) - ) + self.assertEqual(prog.extra_module("/foo/bar", 1234, create=True), module) self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 5678) self.assertRaises(LookupError, prog.extra_module, "/foo/baz", 1234) - self.assertNotEqual(prog.extra_module("/foo/bar", 5678, create=True)[0], module) - self.assertNotEqual(prog.extra_module("/foo/baz", 1234, create=True)[0], module) + self.assertNotEqual(prog.extra_module("/foo/bar", 5678, create=True), module) + self.assertNotEqual(prog.extra_module("/foo/baz", 1234, create=True), module) self.assertNotEqual( - prog.shared_library_module("/foo/bar", 1234, create=True)[0], module + prog.shared_library_module("/foo/bar", 1234, create=True), module ) - self.assertEqual(prog.extra_module("/foo/bar", create=True)[0].id, 0) + self.assertEqual(prog.extra_module("/foo/bar", create=True).id, 0) self.assertIs(module.prog, prog) self.assertEqual(module.name, "/foo/bar") @@ -301,7 +290,7 @@ def test_extra_module_invalid(self): self.assertRaises(TypeError, prog.extra_module, "/foo/bar", 1234, True) def test_address_range(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) module.address_range = (0x10000000, 0x10010000) self.assertEqual(module.address_range, (0x10000000, 0x10010000)) @@ -316,13 +305,13 @@ def test_address_range(self): self.assertIsNone(module.address_range) def test_address_range_empty(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) module.address_range = (0, 0) self.assertEqual(module.address_range, (0, 0)) def test_address_range_type_error(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaises(TypeError): module.address_range = 1 @@ -337,7 +326,7 @@ def test_address_range_type_error(self): module.address_range = (1, "bar") def test_address_range_invalid(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaisesRegex(ValueError, "invalid module address range"): module.address_range = (0x10010000, 0x10000000) @@ -352,12 +341,12 @@ def test_address_range_invalid(self): module.address_range = (2**64 - 1, 2**64 - 1) def test_address_range_del(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaises(AttributeError): del module.address_range def test_build_id(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") @@ -372,17 +361,17 @@ def test_build_id(self): self.assertIsNone(module.build_id) def test_build_id_type_error(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaises(TypeError): module.build_id = "abcd" def test_build_id_invalid_empty(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaisesRegex(ValueError, "build ID cannot be empty"): module.build_id = b"" def test_build_id_del(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) with self.assertRaises(AttributeError): del module.build_id @@ -390,19 +379,19 @@ def test_find_by_name(self): prog = Program() self.assertRaises(LookupError, prog.module, "foo") - module1 = prog.extra_module("foo", create=True)[0] + module1 = prog.extra_module("foo", create=True) self.assertEqual(prog.module("foo"), module1) - module2 = prog.main_module("foo", create=True)[0] + module2 = prog.main_module("foo", create=True) self.assertIn(prog.module("foo"), (module1, module2)) self.assertRaises(LookupError, prog.module, "bar") def test_find_by_address(self): prog = Program() - module1 = prog.extra_module("/foo/bar", create=True)[0] + module1 = prog.extra_module("/foo/bar", create=True) module1.address_range = (0x10000000, 0x10010000) - module2 = prog.extra_module("/asdf/jkl", create=True)[0] + module2 = prog.extra_module("/asdf/jkl", create=True) module2.address_range = (0x20000000, 0x20020000) self.assertRaises(LookupError, prog.module, 0x0FFFFFFF) @@ -419,7 +408,7 @@ def test_find_by_address(self): # Test all of the state transitions that we can without setting a file. def _test_file_status(self, which): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) status_attr = which + "_file_status" wants_file = getattr(module, f"wants_{which}_file") @@ -492,29 +481,29 @@ def test_empty(self): self.assertEqual(list(Program().modules()), []) def test_one(self): - module = Program().extra_module("/foo/bar", create=True)[0] + module = Program().extra_module("/foo/bar", create=True) self.assertEqual(list(module.prog.modules()), [module]) def test_multiple(self): prog = Program() modules = [ - prog.extra_module("/foo/bar", create=True)[0], - prog.extra_module("/asdf/jkl", create=True)[0], - prog.extra_module("/123/456", create=True)[0], + prog.extra_module("/foo/bar", create=True), + prog.extra_module("/asdf/jkl", create=True), + prog.extra_module("/123/456", create=True), ] self.assertCountEqual(list(prog.modules()), modules) def test_same_name(self): prog = Program() modules = [ - prog.extra_module("foo", id=0, create=True)[0], - prog.main_module("foo", create=True)[0], + prog.extra_module("foo", id=0, create=True), + prog.main_module("foo", create=True), ] actual = list(prog.modules()) self.assertCountEqual(actual, modules) self.assertEqual(actual[0], prog.main_module()) - modules.append(prog.extra_module("foo", id=1, create=True)[0]) + modules.append(prog.extra_module("foo", id=1, create=True)) actual = list(prog.modules()) self.assertCountEqual(actual, modules) self.assertEqual(actual[0], prog.main_module()) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index aae2b9ee1..04e23e19c 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -51,7 +51,7 @@ def elf_symbol_program(*modules): end <= other_start or start >= other_end ), f"module {len(address_ranges)} overlaps module {i}" address_ranges.append((start, end)) - module = prog.extra_module(f.name, create=True)[0] + module = prog.extra_module(f.name, create=True) module.address_range = (start, end) module.try_file(f.name, force=True) return prog From d6576fb31d97c6743b30f517bb8b4acec313d59b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 00:55:30 -0700 Subject: [PATCH 141/166] docs: split up table of contents into sections Signed-off-by: Omar Sandoval --- docs/_static/custom.css | 5 +++++ docs/index.rst | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index b7ad6d98c..3949b818c 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,8 @@ +div.sphinxsidebar p.caption { + font-weight: 300; + font-size: 1.4rem; +} + details { margin-block-start: 1em; margin-block-end: 1em; diff --git a/docs/index.rst b/docs/index.rst index f46454c85..b0b79910e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,15 +26,26 @@ Table of Contents ----------------- .. toctree:: + :caption: Using drgn :maxdepth: 3 installation user_guide advanced_usage + getting_debugging_symbols + +.. toctree:: + :caption: Tutorials + :maxdepth: 3 + + tutorials + case_studies + +.. toctree:: + :caption: Reference + :maxdepth: 3 + api_reference helpers support_matrix - tutorials - case_studies - getting_debugging_symbols release_highlights From baa9a14c4993cd4d13a83676066c0ec66407a945 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 00:57:57 -0700 Subject: [PATCH 142/166] docs: add man page drgn --help has a good overview, but there are some things that could use more detail which would be overkill for --help. Signed-off-by: Omar Sandoval --- docs/conf.py | 6 ++ docs/index.rst | 1 + docs/man/drgn.rst | 179 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 docs/man/drgn.rst diff --git a/docs/conf.py b/docs/conf.py index 24f630406..8405000b7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,6 +6,12 @@ master_doc = "index" +man_pages = [ + ("man/drgn", "drgn", "programmable debugger", "", "1"), +] + +option_emphasise_placeholders = True + extensions = [ "details", "drgndoc.ext", diff --git a/docs/index.rst b/docs/index.rst index b0b79910e..e67e849f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,5 +47,6 @@ Table of Contents api_reference helpers + Man Page support_matrix release_highlights diff --git a/docs/man/drgn.rst b/docs/man/drgn.rst new file mode 100644 index 000000000..0436e6ad3 --- /dev/null +++ b/docs/man/drgn.rst @@ -0,0 +1,179 @@ +drgn +==== + +Synopsis +-------- + +| **drgn** [*OPTION*...] +| **drgn** [*OPTION*...] *SCRIPT* [*ARGUMENT*...] +| **drgn** [*OPTION*...] **-e** *CODE* [*ARGUMENT*...] + +Description +----------- + +:command:`drgn` (pronounced "dragon") is a debugger with an emphasis on +programmability. It provides APIs for using the types, variables, and stack +traces in a program or core dump from Python, allowing for easy, expressive +scripting and more complex debugging. + +Full documentation is available online at https://drgn.readthedocs.io/. + +For in-program documentation, try ``help(drgn)``. + +Options +------- + +.. program:: drgn + +If no positional arguments are given (and :option:`-e` is not given), then drgn +runs in *interactive mode*: commands are read from the terminal. Relevant +helpers are automatically imported. An empty string is prepended to +:py:data:`sys.path`. + +If positional arguments are given (and :option:`-e` is not given), then drgn +runs in *script mode*: *SCRIPT* is executed with the given *ARGUMENT*\ s. +Nothing is imported automatically. :py:data:`sys.argv[0] ` is set to +*SCRIPT* and the remaining arguments are added to :py:data:`sys.argv`. The +parent directory of *SCRIPT* is prepended to :py:data:`sys.path`. + +.. option:: -e {CODE} + + Evaluate the given code and exit. Relevant helpers are automatically + imported. :py:data:`sys.argv[0] ` is set to *-e* and the + remaining arguments are added to :py:data:`sys.argv`. An empty string is + prepended to :py:data:`sys.path`. + +Program Selection +^^^^^^^^^^^^^^^^^ + +One of these options may be given to specify what program to debug. + +.. option:: -k, --kernel + + Debug the running kernel. This is the default. + +.. option:: -c, --core {PATH} + + Debug the given core dump. + +.. option:: -p, --pid {PID} + + Debug the running process with the given process ID. + +Debugging Symbols +^^^^^^^^^^^^^^^^^ + +.. option:: -s, --symbols {PATH} + + Load debugging symbols from the given file. If the file does not correspond + to a loaded executable, library, or module, then a warning is printed and + it is ignored; see :option:`--extra-symbols` for an alternative. + + This option may be given more than once. + +.. option:: --main-symbols + + Only load debugging symbols for the main executable and those added with + :option:`-s` or :option:`--extra-symbols`. + +.. option:: --no-default-symbols + + Don't load any debugging symbols that were not explicitly added with + :option:`-s` or :option:`--extra-symbols`. + +.. option:: --extra-symbols {PATH} + + Load additional debugging symbols from the given file, which is assumed not + to correspond to a loaded executable, library, or module. + + This option may be given more than once. + +The following options correspond to :py:attr:`drgn.Program.debug_info_options` +in the Python API. + +.. option:: --try-symbols-by {METHOD[,METHOD...]} + + Enable loading debugging symbols using the given methods. *METHOD* may be: + + * The name of a debugging information finder (``standard``, ``debuginfod``, + or any added by plugins). + * ``module-name``: if the name of a module looks like a filesystem path, try the + file at that path. + * ``build-id``: search by build ID. + * ``debug-link``: search by debug link (e.g., ``.gnu_debuglink``). + * ``procfs``: try :file:`/proc/{pid}/exe` or :file:`/proc/{pid}/map_files`. + * ``embedded-vdso``: try vDSO data saved in a core dump. + * ``reuse``: try reusing a previously used file. + * ``supplementary``: try finding supplementary files (e.g., + ``.gnu_debugaltlink``). + * ``kmod=depmod``: search using *depmod* metadata. + * ``kmod=walk``: search by walking kernel directories. + * ``kmod=depmod-or-walk``: search using *depmod* metadata if it is + available or by walking kernel directories if *depmod* metadata does not + exist. + * ``kmod=depmod-and-walk``: search using *depmod* metadata if it is + available, then by walking kernel directories if *depmod* metadata does + not exist or does not contain the desired module. + + Multiple methods may be enabled by passing a comma-separated list. This + option may be given more than once, in which case the lists will be + combined. + +.. option:: --no-symbols-by {METHOD[,METHOD...]} + + Disable loading debugging symbols using the given methods. *METHOD* may be + the name of a debugging information finder, ``module-name``, ``build-id``, + ``debug-link``, ``procfs``, ``embedded-vdso``, ``reuse``, + ``supplementary``, or ``kmod``. + + Multiple methods may be disabled by passing a comma-separated list. This + option may be given more than once, in which case the lists will be + combined. + +.. option:: --debug-directory {PATH} + + Search for debugging symbols by build ID and debug link in the given + directory. + + This option may be given more than once to search in multiple directories. + +.. option:: --no-default-debug-directories + + Don't search for debugging symbols by build ID and debug link in the + standard directories. + +.. option:: --kernel-directory {PATH} + + Search for the kernel image and loadable kernel modules in the given + directory. + + This option may be given more than once to search in multiple directories. + +.. option:: --no-default-kernel-directories + + Don't search for the kernel image and loadable kernel modules in the + standard directories. + +Logging +^^^^^^^ + +.. option:: --log-level {\{debug,info,warning,error,critical,none\}} + + Log messages of at least the given level to standard error. The default is + *warning*. + +.. option:: -q, --quiet + + Don't print any logs or download progress. This is equivalent to + :option:`--log-level none <--log-level>`. + +Generic Information +^^^^^^^^^^^^^^^^^^^ + +.. option:: -h, --help + + Show a help message and exit. + +.. option:: --version + + Show :command:`drgn`'s version information and exit. From de5d4cc396dd826e65b5f91090c570c76b8bd4f1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 01:14:12 -0700 Subject: [PATCH 143/166] docs: update Advanced Usage Reorder things, update stale APIs, and use :option: roles where appropriate. Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 174 +++++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 84 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index b500b92a8..b78b9a888 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -22,9 +22,11 @@ levels of control and complexity. Loading Debugging Symbols From Non-Standard Locations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. program:: drgn + drgn searches standard locations for debugging symbols. If you have debugging symbols available in a non-standard location, you can provide it to the CLI -with the ``-s``/``--symbols`` option: +with the :option:`-s`/:option:`--symbols` option: .. code-block:: console @@ -37,19 +39,19 @@ Or with the :meth:`drgn.Program.load_debug_info()` method:: Loading Debugging Symbols For Specific Modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``-s`` and ``load_debug_info()`` try the given files against all of the modules -loaded in the program based on build IDs. You can also :ref:`look up -` a specific module and try a given file for just that -module with :meth:`drgn.Module.try_file()`:: +:option:`-s` and :meth:`~drgn.Program.load_debug_info()` try the given files +against all of the modules loaded in the program based on build IDs. You can +also :ref:`look up ` a specific module and try a given +file for just that module with :meth:`drgn.Module.try_file()`:: >>> prog.main_module().try_file("build/vmlinux") Loading Additional Debugging Symbols ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``-s`` and ``load_debug_info()`` ignore files that don't correspond to a loaded -module. To load debugging symbols from an arbitrary file, pass -``--extra-symbols`` to the CLI: +:option:`-s` and :meth:`~drgn.Program.load_debug_info()` ignore files that +don't correspond to a loaded module. To load debugging symbols from an +arbitrary file, pass :option:`--extra-symbols` to the CLI: .. code-block:: console @@ -57,14 +59,14 @@ module. To load debugging symbols from an arbitrary file, pass Or create a :class:`drgn.ExtraModule`:: - >>> module = prog.extra_module("my_extra_symbols") + >>> module = prog.extra_module("my_extra_symbols", create=True) >>> module.try_file("./my_extra_symbols.debug") Listing Modules ^^^^^^^^^^^^^^^ By default, drgn creates a module for everything loaded in the program. You can -disable this in the CLI with ``-no-default-symbols``. +disable this in the CLI with :option:`--no-default-symbols`. You can find or create the loaded modules programmatically with :meth:`drgn.Program.loaded_modules()`:: @@ -81,8 +83,8 @@ You can create modules with the :ref:`module factory functions `. You can also modify various attributes of the :class:`drgn.Module` class. -Debug Info Finders -^^^^^^^^^^^^^^^^^^ +Debugging Information Finders +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A callback for automatically finding debugging symbols for a set of modules can be registered with :meth:`drgn.Program.register_debug_info_finder()`. Here is @@ -127,78 +129,8 @@ an example for getting debugging symbols on Fedora Linux using DNF: prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=-1) -Currently, debug info finders must be configured explicitly by the user. In the -future, there will be a plugin system for doing so automatically. - -Library -------- - -In addition to the CLI, drgn is also available as a library. -:func:`drgn.program_from_core_dump()`, :func:`drgn.program_from_kernel()`, and -:func:`drgn.program_from_pid()` correspond to the ``-c``, ``-k``, and ``-p`` -command line options, respectively; they return a :class:`drgn.Program` that -can be used just like the one initialized by the CLI:: - - >>> import drgn - >>> prog = drgn.program_from_kernel() - -C Library ---------- - -The core functionality of drgn is implemented in C and is available as a C -library, ``libdrgn``. See |drgn.h|_. - -.. |drgn.h| replace:: ``drgn.h`` -.. _drgn.h: https://github.com/osandov/drgn/blob/main/libdrgn/drgn.h - -Full documentation can be generated by running ``doxygen`` in the ``libdrgn`` -directory of the source code. Note that the API and ABI are not yet stable. - -Custom Programs ---------------- - -The main components of a :class:`drgn.Program` are the program memory, types, -and symbols. The CLI and equivalent library interfaces automatically determine -these. However, it is also possible to create a "blank" ``Program`` and plug in -the main components. The :func:`drgn.cli.run_interactive()` function allows you -to run the same drgn CLI once you've created a :class:`drgn.Program`, so it's -easy to make a custom program which allows interactive debugging. - -:meth:`drgn.Program.add_memory_segment()` defines a range of memory and how to -read that memory. The following example uses a Btrfs filesystem image as the -program "memory": - -.. code-block:: python3 - - import drgn - import os - import sys - from drgn.cli import run_interactive - - - def btrfs_debugger(dev): - file = open(dev, 'rb') - size = file.seek(0, 2) - - def read_file(address, count, offset, physical): - file.seek(offset) - return file.read(count) - - platform = drgn.Platform(drgn.Architecture.UNKNOWN, - drgn.PlatformFlags.IS_LITTLE_ENDIAN) - prog = drgn.Program(platform) - prog.add_memory_segment(0, size, read_file) - prog.load_debug_info([f'/lib/modules/{os.uname().release}/kernel/fs/btrfs/btrfs.ko']) - return prog - - - prog = btrfs_debugger(sys.argv[1] if len(sys.argv) >= 2 else '/dev/sda') - print(drgn.Object(prog, 'struct btrfs_super_block', address=65536)) - run_interactive(prog, banner_func=lambda _: "BTRFS debugger") - -:meth:`drgn.Program.register_type_finder()` and -:meth:`drgn.Program.register_object_finder()` are the equivalent methods for -plugging in types and objects. +Custom debugging information finders can even be configured automatically +through the :ref:`plugin system `. .. _writing-plugins: @@ -259,6 +191,80 @@ finders. See :ref:`plugins` for more details. After creating the above files, the plugin can be installed with ``pip install .``. +Library +------- + +In addition to the CLI, drgn is also available as a library. +:func:`drgn.program_from_core_dump()`, :func:`drgn.program_from_kernel()`, and +:func:`drgn.program_from_pid()` correspond to the :option:`-c`, :option:`-k`, +and :option:`-p` command line options, respectively; they return a +:class:`drgn.Program` that can be used just like the one initialized by the +CLI:: + + >>> import drgn + >>> prog = drgn.program_from_kernel() + +C Library +--------- + +The core functionality of drgn is implemented in C and is available as a C +library, ``libdrgn``. See |drgn.h|_. + +.. |drgn.h| replace:: ``drgn.h`` +.. _drgn.h: https://github.com/osandov/drgn/blob/main/libdrgn/drgn.h + +Full documentation can be generated by running ``doxygen`` in the ``libdrgn`` +directory of the source code. Note that the API and ABI are not yet stable. + +Custom Programs +--------------- + +The main components of a :class:`drgn.Program` are the program memory, types, +and objects. The CLI and equivalent library interfaces automatically determine +these. However, it is also possible to create a "blank" ``Program`` and plug in +the main components. The :func:`drgn.cli.run_interactive()` function allows you +to run the same drgn CLI once you've created a :class:`drgn.Program`, so it's +easy to make a custom program which allows interactive debugging. + +:meth:`drgn.Program.add_memory_segment()` defines a range of memory and how to +read that memory. The following example uses a Btrfs filesystem image as the +program "memory": + +.. code-block:: python3 + + import os + import sys + + import drgn + from drgn.cli import run_interactive + + + def btrfs_debugger(dev): + file = open(dev, "rb") + size = file.seek(0, 2) + + def read_file(address, count, offset, physical): + file.seek(offset) + return file.read(count) + + platform = drgn.Platform( + drgn.Architecture.UNKNOWN, drgn.PlatformFlags.IS_LITTLE_ENDIAN + ) + prog = drgn.Program(platform) + prog.add_memory_segment(0, size, read_file) + module = prog.extra_module("btrfs", create=True) + module.try_file(f"/lib/modules/{os.uname().release}/kernel/fs/btrfs/btrfs.ko") + return prog + + + prog = btrfs_debugger(sys.argv[1] if len(sys.argv) >= 2 else "/dev/sda") + print(drgn.Object(prog, "struct btrfs_super_block", address=65536)) + run_interactive(prog, banner_func=lambda _: "BTRFS debugger") + +:meth:`drgn.Program.register_type_finder()` and +:meth:`drgn.Program.register_object_finder()` are the equivalent methods for +plugging in types and objects. + Environment Variables --------------------- From 63d9fe30756aa86a8cfb04764add32ada845fecf Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 01:42:36 -0700 Subject: [PATCH 144/166] docs: update User Guide Tweak style, split up some examples for clarification, and a couple more small updates. Signed-off-by: Omar Sandoval --- README.rst | 18 ++++----- docs/user_guide.rst | 99 +++++++++++++++++++++++++++------------------ 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/README.rst b/README.rst index 9ccc576b9..3952bc02f 100644 --- a/README.rst +++ b/README.rst @@ -228,20 +228,20 @@ Quick Start .. start-quick-start -drgn debugs the running kernel by default; run ``sudo drgn``. To debug a -running program, run ``sudo drgn -p $PID``. To debug a core dump (either a -kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to -`install debugging symbols +drgn debugs the running kernel by default; simply run ``drgn``. To debug a +running program, run ``drgn -p $PID``. To debug a core dump (either a kernel +vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to `install +debugging symbols `_ for whatever you are debugging. -Then, you can access variables in the program with ``prog['name']`` and access +Then, you can access variables in the program with ``prog["name"]`` and access structure members with ``.``: .. code-block:: pycon - $ sudo drgn - >>> prog['init_task'].comm + $ drgn + >>> prog["init_task"].comm (char [16])"swapper/0" You can use various predefined helpers: @@ -255,14 +255,14 @@ You can use various predefined helpers: [b'findmnt', b'-p'] You can get stack traces with ``stack_trace()`` and access parameters or local -variables with ``trace['name']``: +variables with ``trace["name"]``: .. code-block:: pycon >>> trace = stack_trace(task) >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) - >>> poll_list = trace[5]['list'] + >>> poll_list = trace[5]["list"] >>> file = fget(task, poll_list.entries[0].fd) >>> d_path(file.f_path.address_of_()) b'/proc/115/mountinfo' diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 1040ea8c7..23218e9ef 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -26,10 +26,14 @@ the only ``Program`` you will need. A ``Program`` is used to look up type definitions, access variables, and read arbitrary memory:: - >>> prog.type('unsigned long') - prog.int_type(name='unsigned long', size=8, is_signed=False) - >>> prog['jiffies'] - Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) + + >>> prog.type("struct list_head") + struct list_head { + struct list_head *next; + struct list_head *prev; + } + >>> prog["jiffies"] + (volatile unsigned long)4416739513 >>> prog.read(0xffffffffbe411e10, 16) b'swapper/0\x00\x00\x00\x00\x00\x00\x00' @@ -40,7 +44,7 @@ memory from the program's address space. The :meth:`[] ` operator looks up a variable, constant, or function:: - >>> prog['jiffies'] == prog.variable('jiffies') + >>> prog["jiffies"] == prog.variable("jiffies") True It is usually more convenient to use the ``[]`` operator rather than the @@ -67,11 +71,11 @@ members can be accessed with the dot (``.``) operator, arrays can be subscripted with ``[]``, arithmetic can be performed, and objects can be compared:: - >>> print(prog['init_task'].comm[0]) + >>> print(prog["init_task"].comm[0]) (char)115 - >>> print(repr(prog['init_task'].nsproxy.mnt_ns.mounts + 1)) + >>> print(repr(prog["init_task"].nsproxy.mnt_ns.mounts + 1)) Object(prog, 'unsigned int', value=34) - >>> prog['init_task'].nsproxy.mnt_ns.pending_mounts > 0 + >>> prog["init_task"].nsproxy.mnt_ns.pending_mounts > 0 False Python doesn't have all of the operators that C or C++ do, so some @@ -111,16 +115,19 @@ References vs. Values The main difference between reference objects and value objects is how they are evaluated. References are read from the program's memory every time they are -evaluated; values simply return the stored value (:meth:`drgn.Object.read_()` -reads a reference object and returns it as a value object):: +evaluated:: >>> import time - >>> jiffies = prog['jiffies'] + >>> jiffies = prog["jiffies"] >>> jiffies.value_() 4391639989 >>> time.sleep(1) >>> jiffies.value_() 4391640290 + +Values simply return the stored value (:meth:`drgn.Object.read_()` reads a +reference object and returns it as a value object):: + >>> jiffies2 = jiffies.read_() >>> jiffies2.value_() 4391640291 @@ -131,24 +138,28 @@ reads a reference object and returns it as a value object):: 4391640593 References have a :attr:`drgn.Object.address_` attribute, which is the object's -address as a Python ``int``. This is slightly different from the -:meth:`drgn.Object.address_of_()` method, which returns the address as a -``drgn.Object``. Of course, both references and values can have a pointer type; -``address_`` refers to the address of the pointer object itself, and -:meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the -address it points to):: +address as a Python ``int``:: - >>> address = prog['jiffies'].address_ + >>> address = prog["jiffies"].address_ >>> type(address) >>> print(hex(address)) 0xffffffffbe405000 - >>> jiffiesp = prog['jiffies'].address_of_() - >>> jiffiesp + +This is slightly different from the :meth:`drgn.Object.address_of_()` method, +which returns the address as a ``drgn.Object``:: + + >>> jiffiesp = prog["jiffies"].address_of_() + >>> print(repr(jiffiesp)) Object(prog, 'volatile unsigned long *', value=0xffffffffbe405000) >>> print(hex(jiffiesp.value_())) 0xffffffffbe405000 +Of course, both references and values can have a pointer type; +``address_`` refers to the address of the pointer object itself, and +:meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the +address it points to). + .. _absent-objects: Absent Objects @@ -268,9 +279,7 @@ Stack Traces drgn represents stack traces with the :class:`drgn.StackTrace` and :class:`drgn.StackFrame` classes. :func:`drgn.stack_trace()`, :meth:`drgn.Program.stack_trace()`, and :meth:`drgn.Thread.stack_trace()` -return the call stack for a thread. The :meth:`[] -` operator looks up an object in the scope of a -``StackFrame``:: +return the call stack for a thread:: >>> trace = stack_trace(115) >>> trace @@ -288,11 +297,22 @@ return the call stack for a thread. The :meth:`[] #11 do_syscall_64 (./arch/x86/entry/common.c:80:7) #12 entry_SYSCALL_64+0x7c/0x15b (./arch/x86/entry/entry_64.S:113) #13 0x7f3344072af7 + +The :meth:`[] ` operator on a ``StackTrace`` gets +the ``StackFrame`` at the given index:: + >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) - >>> prog['do_poll'] - (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time)) - >>> trace[5]['list'] + +The :meth:`[] ` operator on a ``StackFrame`` looks +up an object in the scope of that frame. :meth:`drgn.StackFrame.locals()` +returns a list of the available names:: + + >>> prog["do_poll"] + (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time))0xffffffff905c6e10 + >>> trace[5].locals() + ['list', 'wait', 'end_time', 'pt', 'expire', 'to', 'timed_out', 'count', 'slack', 'busy_flag', 'busy_start', 'walk', 'can_busy_loop'] + >>> trace[5]["list"] *(struct poll_list *)0xffffacca402e3b50 = { .next = (struct poll_list *)0x0, .len = (int)1, @@ -313,7 +333,7 @@ drgn automatically obtains type definitions from the program. Types are represented by the :class:`drgn.Type` class and created by various factory functions like :meth:`drgn.Program.int_type()`:: - >>> prog.type('int') + >>> prog.type("int") prog.int_type(name='int', size=4, is_signed=True) You won't usually need to work with types directly, but see @@ -388,7 +408,7 @@ along with any arguments: pid = int(sys.argv[1]) uid = find_task(pid).cred.uid.val.value_() - print(f'PID {pid} is being run by UID {uid}') + print(f"PID {pid} is being run by UID {uid}") $ sudo drgn script.py 601 PID 601 is being run by UID 1000 @@ -398,8 +418,8 @@ It's even possible to run drgn scripts directly with the proper `shebang $ cat script2.py #!/usr/bin/env drgn - mounts = prog['init_task'].nsproxy.mnt_ns.mounts.value_() - print(f'You have {mounts} filesystems mounted') + mounts = prog["init_task"].nsproxy.mnt_ns.mounts.value_() + print(f"You have {mounts} filesystems mounted") $ sudo ./script2.py You have 36 filesystems mounted @@ -422,18 +442,18 @@ The default behavior of the Python `REPL print the output of :func:`repr()`. For :class:`drgn.Object` and :class:`drgn.Type`, this is a raw representation:: - >>> print(repr(prog['jiffies'])) + >>> print(repr(prog["jiffies"])) Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) - >>> print(repr(prog.type('atomic_t'))) + >>> print(repr(prog.type("atomic_t"))) prog.typedef_type(name='atomic_t', type=prog.struct_type(tag=None, size=4, members=(TypeMember(prog.type('int'), name='counter', bit_offset=0),))) The standard :func:`print()` function uses the output of :func:`str()`. For drgn objects and types, this is a representation in programming language syntax:: - >>> print(prog['jiffies']) + >>> print(prog["jiffies"]) (volatile unsigned long)4395387628 - >>> print(prog.type('atomic_t')) + >>> print(prog.type("atomic_t")) typedef struct { int counter; } atomic_t @@ -442,10 +462,10 @@ In interactive mode, the drgn CLI automatically uses ``str()`` instead of ``repr()`` for objects and types, so you don't need to call ``print()`` explicitly:: - $ sudo drgn - >>> prog['jiffies'] + $ drgn + >>> prog["jiffies"] (volatile unsigned long)4395387628 - >>> prog.type('atomic_t') + >>> prog.type("atomic_t") typedef struct { int counter; } atomic_t @@ -453,7 +473,8 @@ explicitly:: Next Steps ---------- -Refer to the :doc:`api_reference`. Look through the :doc:`helpers`. Read some -:doc:`case_studies`. Browse through the `tools +Follow along with a :doc:`tutorial ` or :doc:`case study +`. Refer to the :doc:`api_reference` and look through the +:doc:`helpers`. Browse through the `tools `_. Check out the `community contributions `_. From dee6d4d78973aba50b84f7aed6916655e0106ace Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 11 Apr 2025 14:35:50 -0700 Subject: [PATCH 145/166] Fix the URLs of Oracle Linux documentation I'm not really even sure how or why the URLs changed. Again. Signed-off-by: Stephen Brennan --- docs/getting_debugging_symbols.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst index ad36238da..4aa063aea 100644 --- a/docs/getting_debugging_symbols.rst +++ b/docs/getting_debugging_symbols.rst @@ -120,8 +120,8 @@ request `_. Oracle Linux ------------ -Oracle Linux provides documentation on using installing the necessary debugging +Oracle Linux provides documentation on installing the necessary debugging symbols. See the documentation for `Oracle Linux 9 -`_ +`_ and `Oracle Linux 8 -`_. +`_. From d04dee96885e40430a5811cd26690b44ebc45bd3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 10:36:31 -0700 Subject: [PATCH 146/166] libdrgn: Makefile: add plugins.h to SOURCES Fixes: adf64729095b ("Add plugin system") Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 7b0682bfc..4ac365fb8 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -113,6 +113,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ path.h \ platform.c \ platform.h \ + plugins.h \ pp.h \ program.c \ program.h \ From 22ac3f37e70cdc838d0ba90b6f5e1e46a0a7393c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 11:26:46 -0700 Subject: [PATCH 147/166] tests: don't shell out for CLI tests In some test environments, like Buck, it's not easy to reexecute the Python interpreter and have it find the drgn module. Instead of using subprocess, fork and call drgn.cli._main() directly (setting up the necessary pipes ourselves). This gives us slightly less realistic end-to-end coverage, but it makes the tests more robust. Signed-off-by: Omar Sandoval --- tests/test_cli.py | 87 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index bb98ce531..5e842ca72 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,30 +2,73 @@ # SPDX-License-Identifier: LGPL-2.1-or-later -import subprocess +import os import sys import tempfile +import types +import drgn.cli from tests import TestCase class TestCli(TestCase): + def run_cli(self, args, *, input=None): + stdout_r, stdout_w = os.pipe() + stderr_r, stderr_w = os.pipe() + if input is not None: + stdin_r, stdin_w = os.pipe() - def run_cli(self, *args: str, **kwargs): - try: - return subprocess.run( - [sys.executable, "-m", "drgn"] + list(args), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - **kwargs, + pid = os.fork() + if pid == 0: + os.close(stdout_r) + sys.stdout = open(stdout_w, "w") + os.close(stderr_r) + sys.stderr = open(stderr_w, "w") + + if input is not None: + os.close(stdin_w) + sys.stdin = open(stdin_r, "r") + + sys.argv = ["drgn"] + args + + drgn.cli._main() + + sys.stdout.flush() + sys.stderr.flush() + os._exit(0) + + os.close(stdout_w) + os.close(stderr_w) + + if input is not None: + os.close(stdin_r) + with open(stdin_w, "w") as f: + f.write(input) + + with open(stdout_r, "r") as f: + stdout = f.read() + with open(stderr_r, "r") as f: + stderr = f.read() + + _, wstatus = os.waitpid(pid, 0) + if not os.WIFEXITED(wstatus) or os.WEXITSTATUS(wstatus) != 0: + if os.WIFEXITED(wstatus): + msg = f"Exited with status {os.WEXITSTATUS(wstatus)}" + elif os.WIFSIGNALED(wstatus): + msg = f"Terminated by signal {os.WTERMSIG(wstatus)}" + else: + msg = "Exited abnormally" + self.fail( + f"""\ +{msg} +STDOUT: +{stdout.decode()} +STDERR: +{stderr.decode()} +""" ) - except subprocess.CalledProcessError as e: - # With captured output, there's nothing left to debug in CI logs. - # Print output on a failure so we can debug. - print(f"STDOUT:\n{e.stdout.decode()}") - print(f"STDERR:\n{e.stderr.decode()}") - raise + + return types.SimpleNamespace(stdout=stdout, stderr=stderr) def test_e(self): script = r""" @@ -38,9 +81,9 @@ def test_e(self): print(sys.argv) """ proc = self.run_cli( - "--quiet", "--pid", "0", "--no-default-symbols", "-e", script, "pass" + ["--quiet", "--pid", "0", "--no-default-symbols", "-e", script, "pass"] ) - self.assertEqual(proc.stdout, b"['-e', 'pass']\n") + self.assertEqual(proc.stdout, "['-e', 'pass']\n") def test_script(self): with tempfile.NamedTemporaryFile() as f: @@ -61,12 +104,12 @@ def test_script(self): ) f.flush() proc = self.run_cli( - "--quiet", "--pid", "0", "--no-default-symbols", f.name, "pass" + ["--quiet", "--pid", "0", "--no-default-symbols", f.name, "pass"] ) - self.assertEqual(proc.stdout, f"[{f.name!r}, 'pass']\n".encode()) + self.assertEqual(proc.stdout, f"[{f.name!r}, 'pass']\n") def test_pipe(self): - script = rb""" + script = r""" import sys assert drgn.get_default_prog() is prog @@ -78,6 +121,6 @@ def test_pipe(self): print(sys.argv) """ proc = self.run_cli( - "--quiet", "--pid", "0", "--no-default-symbols", input=script + ["--quiet", "--pid", "0", "--no-default-symbols"], input=script ) - self.assertEqual(proc.stdout, b"['']\n") + self.assertEqual(proc.stdout, "['']\n") From 26d6ef8efcddb4469fbc28c812fde38c8b391d0f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 11:41:32 -0700 Subject: [PATCH 148/166] tests: fix socket ResourceWarning in debuginfod tests We need to close the socket. Signed-off-by: Omar Sandoval --- tests/test_debug_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py index 28d50760c..180397394 100644 --- a/tests/test_debug_info.py +++ b/tests/test_debug_info.py @@ -2427,6 +2427,7 @@ def tearDownClass(cls): cls.server.socket.shutdown(socket.SHUT_RD) cls.server.shutdown() cls.server_thread.join() + cls.server.server_close() def setUp(self): self.prog = Program() From 82bfff488c64bb2554678aab67899980a2d67412 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Apr 2025 14:28:14 -0700 Subject: [PATCH 149/166] libdrgn: x86_64: fix restoring rbp for ORC_TYPE_REGS Our handling of rbp in ORC_TYPE_REGS is incorrect. First, we set the CFI rule to restore rbp from the on-stack pt_regs. Then, we set it again based on bp_reg, which is always ORC_REG_UNDEFINED, meaning to use the previous rbp. The intention is to keep the value we just got from pt_regs, but we interpret it as the previous frame's rbp, which is incorrect. Fix it by skipping the bp_reg handling for ORC_TYPE_REGS. Reported-by: Leo Martins Signed-off-by: Omar Sandoval --- libdrgn/arch_x86_64.c | 48 +++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index 574dfe9ba..cb17abc3e 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -175,29 +175,33 @@ drgn_orc_to_cfi_x86_64(const struct drgn_orc_entry *orc, drgn_orc_type(orc)); } - switch (drgn_orc_bp_reg(orc)) { - case DRGN_ORC_REG_UNDEFINED: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.regno = DRGN_REGISTER_NUMBER(rbp); - rule.offset = 0; - break; - case DRGN_ORC_REG_PREV_SP: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - rule.offset = orc->bp_offset; - break; - case DRGN_ORC_REG_BP: - rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET; - rule.regno = DRGN_REGISTER_NUMBER(rbp); - rule.offset = orc->bp_offset; - break; - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown ORC BP base register %d", - drgn_orc_bp_reg(orc)); + // For ORC_TYPE_REGS, rbp is already set. + if (drgn_orc_type(orc) != DRGN_ORC_TYPE_REGS) { + switch (drgn_orc_bp_reg(orc)) { + case DRGN_ORC_REG_UNDEFINED: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.regno = DRGN_REGISTER_NUMBER(rbp); + rule.offset = 0; + break; + case DRGN_ORC_REG_PREV_SP: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + rule.offset = orc->bp_offset; + break; + case DRGN_ORC_REG_BP: + rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET; + rule.regno = DRGN_REGISTER_NUMBER(rbp); + rule.offset = orc->bp_offset; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown ORC BP base register %d", + drgn_orc_bp_reg(orc)); + } + if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp), + &rule)) + return &drgn_enomem; } - if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp), - &rule)) - return &drgn_enomem; + *interrupted_ret = drgn_orc_signal(orc); *ret_addr_regno_ret = DRGN_REGISTER_NUMBER(rip); return NULL; From 4c90e748c03dd4f2d286a6cab8bd005daad64f32 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Apr 2025 13:56:37 -0700 Subject: [PATCH 150/166] libdrgn: optionally split up libdrgn and _drgn Python extension .so files again Since the fixes commit, the _drgn Python extension module links directly to libpython. However, manylinux extensions are not supposed to link against libpython [1], and the manylinux container images intentionally omit libpython, so building drgn for manylinux fails. Add a few modes of building libdrgn to address this: * --disable-libdrgn --enable-python-extension: build _drgn.so (the Python extension module), which doesn't link against libpython, and don't build libdrgn.so. This is what setup.py uses since Python-only installations don't need libdrgn. It works in manylinux. * --enable-python: build libdrgn.so with Python support, which links against libpython, and don't build _drgn.so. In this configuration, libdrgn.so also works as the Python extension module, so _drgn.so can be installed as a symlink to libdrgn.so. * (No options, equivalent to --enable-libdrgn --disable-python --disable-python-extension): build libdrgn.so with no Python support, don't build _drgn.so. For backwards-compatibility, this is the default when building libdrgn directly, but I doubt anyone does that these days. * --enable-libdrgn --enable-python-extension: build libdrgn.so with Python support, which links against libpython, and build _drgn.so, which doesn't link against libpython. I guess this works but I don't see a reason to prefer it over --enable-python + a symlink. This also requires refactoring our libtool convenience libraries. Hopefully this sucks less in Meson. [1]: https://peps.python.org/pep-0513/#libpythonx-y-so-1 Fixes: 3d493604e62d ("libdrgn: combine libdrgn and _drgn Python extension into one .so") Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 314 +++++++++++++++++++--------------- libdrgn/configure.ac | 28 ++- libdrgn/m4/my_python_devel.m4 | 52 ++++-- libdrgn/no_python.c | 40 +++++ libdrgn/plugins.h | 6 - libdrgn/program.c | 24 --- libdrgn/program.h | 11 -- setup.py | 5 +- 8 files changed, 275 insertions(+), 205 deletions(-) create mode 100644 libdrgn/no_python.c diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 4ac365fb8..db59c3342 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -33,158 +33,168 @@ BUILT_SOURCES = $(ARCH_DEFS_INCS) \ drgn_section_name_to_index.inc \ elf_sections.h -noinst_LTLIBRARIES = libdrgnimpl.la - -libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ - $(ARCH_DEFS_INCS) \ - $(STRSWITCH_INCS) \ - accessors.c \ - arch_i386.c \ - arch_riscv.c \ - array.h \ - binary_buffer.c \ - binary_buffer.h \ - binary_search.h \ - binary_search_tree.h \ - bitops.h \ - c_keywords.inc \ - c_lexer.h \ - cfi.c \ - cfi.h \ - cityhash.h \ - cleanup.h \ - crc32.c \ - crc32.h \ - debug_info.c \ - debug_info.h \ - debug_info_options.c \ - debug_info_options.h \ - drgn_internal.h \ - drgn_section_name_to_index.inc \ - dwarf_constants.c \ - dwarf_constants.h \ - dwarf_info.c \ - dwarf_info.h \ - elf_file.c \ - elf_file.h \ - elf_notes.c \ - elf_notes.h \ - elf_symtab.c \ - elf_symtab.h \ - elf_sections.h \ - error.c \ - error.h \ - generics.h \ - handler.c \ - handler.h \ - hash_table.c \ - hash_table.h \ - helpers.h \ - hexlify.c \ - hexlify.h \ - io.c \ - io.h \ - kallsyms.c \ - kallsyms.h \ - language.c \ - language.h \ - language_c.c \ - lazy_object.c \ - lazy_object.h \ - lexer.c \ - lexer.h \ - linux_kernel.c \ - linux_kernel.h \ - linux_kernel_helpers.c \ - log.c \ - log.h \ - memory_reader.c \ - memory_reader.h \ - minmax.h \ - nstring.h \ - object.c \ - object.h \ - openmp.c \ - openmp.h \ - orc.h \ - orc_info.c \ - orc_info.h \ - path.c \ - path.h \ - platform.c \ - platform.h \ - plugins.h \ - pp.h \ - program.c \ - program.h \ - register_state.c \ - register_state.h \ - serialize.c \ - serialize.h \ - splay_tree.c \ - stack_trace.c \ - stack_trace.h \ - string_builder.c \ - string_builder.h \ - symbol.c \ - symbol.h \ - type.c \ - type.h \ - util.c \ - util.h \ - vector.h - -libdrgnimpl_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ - $(elfutils_CFLAGS) -libdrgnimpl_la_CPPFLAGS = $(AM_CPPFLAGS) -libdrgnimpl_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) -lm +if ENABLE_PYTHON +BUILT_SOURCES += python/docstrings.h +endif + +noinst_LTLIBRARIES = libdrgn_common.la + +libdrgn_common_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ + $(ARCH_DEFS_INCS) \ + $(STRSWITCH_INCS) \ + accessors.c \ + arch_i386.c \ + arch_riscv.c \ + array.h \ + binary_buffer.c \ + binary_buffer.h \ + binary_search.h \ + binary_search_tree.h \ + bitops.h \ + c_keywords.inc \ + c_lexer.h \ + cfi.c \ + cfi.h \ + cityhash.h \ + cleanup.h \ + crc32.c \ + crc32.h \ + debug_info.c \ + debug_info.h \ + debug_info_options.c \ + debug_info_options.h \ + drgn_internal.h \ + drgn_section_name_to_index.inc \ + dwarf_constants.c \ + dwarf_constants.h \ + dwarf_info.c \ + dwarf_info.h \ + elf_file.c \ + elf_file.h \ + elf_notes.c \ + elf_notes.h \ + elf_symtab.c \ + elf_symtab.h \ + elf_sections.h \ + error.c \ + error.h \ + generics.h \ + handler.c \ + handler.h \ + hash_table.c \ + hash_table.h \ + helpers.h \ + hexlify.c \ + hexlify.h \ + io.c \ + io.h \ + kallsyms.c \ + kallsyms.h \ + language.c \ + language.h \ + language_c.c \ + lazy_object.c \ + lazy_object.h \ + lexer.c \ + lexer.h \ + linux_kernel.c \ + linux_kernel.h \ + linux_kernel_helpers.c \ + log.c \ + log.h \ + memory_reader.c \ + memory_reader.h \ + minmax.h \ + nstring.h \ + object.c \ + object.h \ + openmp.c \ + openmp.h \ + orc.h \ + orc_info.c \ + orc_info.h \ + path.c \ + path.h \ + platform.c \ + platform.h \ + plugins.h \ + pp.h \ + program.c \ + program.h \ + register_state.c \ + register_state.h \ + serialize.c \ + serialize.h \ + splay_tree.c \ + stack_trace.c \ + stack_trace.h \ + string_builder.c \ + string_builder.h \ + symbol.c \ + symbol.h \ + type.c \ + type.h \ + util.c \ + util.h \ + vector.h + +libdrgn_common_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ + $(elfutils_CFLAGS) +libdrgn_common_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) -lm if WITH_DEBUGINFOD if ENABLE_DLOPEN_DEBUGINFOD -libdrgnimpl_la_LIBADD += -ldl +libdrgn_common_la_LIBADD += -ldl else -libdrgnimpl_la_CFLAGS += $(libdebuginfod_CFLAGS) -libdrgnimpl_la_LIBADD += $(libdebuginfod_LIBS) +libdrgn_common_la_CFLAGS += $(libdebuginfod_CFLAGS) +libdrgn_common_la_LIBADD += $(libdebuginfod_LIBS) endif endif if WITH_LIBKDUMPFILE -libdrgnimpl_la_SOURCES += kdump.c -libdrgnimpl_la_CFLAGS += $(libkdumpfile_CFLAGS) -libdrgnimpl_la_LIBADD += $(libkdumpfile_LIBS) +libdrgn_common_la_SOURCES += kdump.c +libdrgn_common_la_CFLAGS += $(libkdumpfile_CFLAGS) +libdrgn_common_la_LIBADD += $(libkdumpfile_LIBS) endif if ENABLE_PYTHON -BUILT_SOURCES += python/docstrings.h - -libdrgnimpl_la_SOURCES += python/constants.c \ - python/debug_info_options.c \ - python/docstrings.c \ - python/docstrings.h \ - python/drgnpy.h \ - python/error.c \ - python/helpers.c \ - python/language.c \ - python/main.c \ - python/module.c \ - python/module_section_addresses.c \ - python/object.c \ - python/platform.c \ - python/plugins.c \ - python/program.c \ - python/stack_trace.c \ - python/symbol.c \ - python/symbol_index.c \ - python/test.c \ - python/thread.c \ - python/type.c \ - python/type_kind_set.c \ - python/util.c - -libdrgnimpl_la_CPPFLAGS += $(PYTHON_CPPFLAGS) -libdrgnimpl_la_LIBADD += $(PYTHON_LIBS) +noinst_LTLIBRARIES += libdrgn_common_python.la + +libdrgn_common_python_la_SOURCES = python/constants.c \ + python/debug_info_options.c \ + python/docstrings.c \ + python/docstrings.h \ + python/drgnpy.h \ + python/error.c \ + python/helpers.c \ + python/language.c \ + python/main.c \ + python/module.c \ + python/module_section_addresses.c \ + python/object.c \ + python/platform.c \ + python/plugins.c \ + python/program.c \ + python/stack_trace.c \ + python/symbol.c \ + python/symbol_index.c \ + python/test.c \ + python/thread.c \ + python/type.c \ + python/type_kind_set.c \ + python/util.c + +libdrgn_common_python_la_CFLAGS = $(libdrgn_common_la_CFLAGS) +libdrgn_common_python_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) +libdrgn_common_python_la_LIBADD = libdrgn_common.la endif +EXTRA_LTLIBRARIES = libdrgn_common_no_python.la + +libdrgn_common_no_python_la_SOURCES = no_python.c +libdrgn_common_no_python_la_CFLAGS = $(libdrgn_common_la_CFLAGS) +libdrgn_common_no_python_la_LIBADD = libdrgn_common.la + %: %.strswitch build-aux/gen_strswitch.py build-aux/codegen_utils.py $(AM_V_GEN)$(PYTHON) $(word 2, $^) -o $@ $< @@ -212,11 +222,31 @@ python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps) python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps) $(AM_V_GEN)$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@ +if ENABLE_LIBDRGN +if ENABLE_PYTHON +libdrgn_libs = $(PYTHON_LIBS) libdrgn_common_python.la +else +libdrgn_libs = libdrgn_common_no_python.la +endif +else +libdrgn_libs = libdrgn_common_no_python.la +endif + +if ENABLE_LIBDRGN lib_LTLIBRARIES = libdrgn.la libdrgn_la_SOURCES = libdrgn_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:0:0 -libdrgn_la_LIBADD = libdrgnimpl.la +libdrgn_la_LIBADD = $(libdrgn_libs) +endif + +if ENABLE_PYTHON_EXTENSION +noinst_LTLIBRARIES += _drgn.la + +_drgn_la_SOURCES = +_drgn_la_LDFLAGS = $(AM_LDFLAGS) -avoid-version -module -shared -rpath $(pkgpyexecdir) +_drgn_la_LIBADD = libdrgn_common_python.la +endif EXTRA_DIST = $(ARCH_DEFS_PYS) \ $(STRSWITCH_INCS:.inc=.inc.strswitch) \ @@ -232,7 +262,7 @@ EXTRA_DIST = $(ARCH_DEFS_PYS) \ EXTRA_PROGRAMS = examples/load_debug_info examples_load_debug_info_SOURCES = examples/load_debug_info.c -examples_load_debug_info_LDADD = libdrgnimpl.la +examples_load_debug_info_LDADD = $(libdrgn_libs) # Only test internals here. Anything exposed via Python should be tested in # Python unit tests instead. @@ -260,7 +290,7 @@ tests/%.c: build-aux/checkmk tests/%.c.in test_cflags = $(AM_CFLAGS) $(check_CFLAGS) test_cppflags = $(AM_CPPFLAGS) -iquote $(srcdir)/tests -test_ldadd = $(check_LIBS) libdrgnimpl.la +test_ldadd = $(check_LIBS) $(libdrgn_libs) tests_binary_search_CFLAGS = $(test_cflags) tests_binary_search_CPPFLAGS = $(test_cppflags) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 5e6b34be5..5d53a62a5 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -46,15 +46,35 @@ AC_SUBST(OPENMP_LIBS) dnl We need Python for code generation even if we're not building the bindings. AM_PATH_PYTHON([3.6]) +AC_ARG_ENABLE([libdrgn], + [AS_HELP_STRING([--disable-libdrgn], + [don't build the libdrgn C library. Note that + libdrgn's API and ABI are unstable])], + [], [enable_libdrgn=yes]) + AC_ARG_ENABLE([python], [AS_HELP_STRING([--enable-python], - [build Python bindings @<:@default=no@:>@])], + [enable Python support in libdrgn, which allows + it to use Python plugins and double as the + Python extension module])], [], [enable_python=no]) -AM_CONDITIONAL([ENABLE_PYTHON], [test "x$enable_python" != xno]) +AC_ARG_ENABLE([python-extension], + [AS_HELP_STRING([--enable-python-extension], + [build the drgn Python extension module separate + from libdrgn])], + [], [enable_python_extension=no]) + +AM_CONDITIONAL([ENABLE_LIBDRGN], [test "x$enable_libdrgn" != xno]) +AM_CONDITIONAL([ENABLE_PYTHON], + [test "x$enable_python" != xno || test "x$enable_python_extension" != xno]) +AM_CONDITIONAL([ENABLE_PYTHON_EXTENSION], + [test "x$enable_python_extension" != xno]) + AM_COND_IF([ENABLE_PYTHON], - [MY_PYTHON_DEVEL - AC_DEFINE(ENABLE_PYTHON)]) + [AS_IF([test "x$enable_libdrgn" != xno], + [find_libpython=yes], [find_libpython=no]) + MY_PYTHON_DEVEL([$find_libpython])]) PKG_PROG_PKG_CONFIG diff --git a/libdrgn/m4/my_python_devel.m4 b/libdrgn/m4/my_python_devel.m4 index ba34edad6..f69fc030c 100644 --- a/libdrgn/m4/my_python_devel.m4 +++ b/libdrgn/m4/my_python_devel.m4 @@ -1,6 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +# MY_PYTHON_DEVEL([find-libpython=no]) # Check for Python development files and define PYTHON_CPPFLAGS and PYTHON_LIBS # accordingly. AC_DEFUN([MY_PYTHON_DEVEL], @@ -14,19 +15,22 @@ if platinclude != include: include_paths.append(plat_include) print(' '.join('-I' + path for path in include_paths))" PYTHON_CPPFLAGS=`"$PYTHON" -c "$prog"`]) -AC_SUBST(PYTHON_CPPFLAGS) -AS_IF([test -z "$PYTHON_LIBS"], - [prog="import sysconfig + +save_CPPFLAGS="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" + +AS_IF([test "x$1" = xyes], + [AS_IF([test -z "$PYTHON_LIBS"] + [prog="import sysconfig print('-L' + sysconfig.get_config_var('LIBDIR') + ' -lpython' + sysconfig.get_config_var('LDVERSION'))" PYTHON_LIBS=`"$PYTHON" -c "$prog"`]) -AC_SUBST(PYTHON_LIBS) -AC_MSG_CHECKING([for $PYTHON development files]) -save_CPPFLAGS="$CPPFLAGS" -save_LIBS="$LIBS" -CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" -LIBS="$LIBS $PYTHON_LIBS" -AC_LINK_IFELSE([AC_LANG_SOURCE([[ + + save_LIBS="$LIBS" + LIBS="$LIBS $PYTHON_LIBS" + + AC_MSG_CHECKING([for $PYTHON development headers and library]) + AC_LINK_IFELSE([AC_LANG_SOURCE([[ #include int main(void) @@ -34,14 +38,30 @@ int main(void) Py_Initialize(); } ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR( -[Could not compile Python development test program. + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR( +[Could not compile and link test program with Python headers and library. You may need to install your distribution's Python development package (e.g., -python3-devel or python3-dev) or set the PYTHON_CPPFLAGS and PYTHON_LIBS +python3-devel or python3-dev) or specify the location of the Python development +headers and/or library by setting the PYTHON_CPPFLAGS and PYTHON_LIBS environment variables.])]) + + LIBS="$save_LIBS"], + [AC_MSG_CHECKING([for $PYTHON development headers]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR( +[Could not compile test program with Python headers. + +You may need to install your distribution's Python development package (e.g., +python3-devel or python3-dev) or specify the location of the Python development +headers by setting the PYTHON_CPPFLAGS environment variable.])])]) + CPPFLAGS="$save_CPPFLAGS" -LIBS="$save_LIBS" + +AC_SUBST(PYTHON_CPPFLAGS) +AC_SUBST(PYTHON_LIBS) ]) diff --git a/libdrgn/no_python.c b/libdrgn/no_python.c new file mode 100644 index 000000000..ea41b180e --- /dev/null +++ b/libdrgn/no_python.c @@ -0,0 +1,40 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +// Fallback implementations for builds without Python support. + +#include "plugins.h" +#include "program.h" + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_create(const struct drgn_platform *platform, + struct drgn_program **ret) +{ + struct drgn_program *prog = malloc(sizeof(*prog)); + if (!prog) + return &drgn_enomem; + drgn_program_init(prog, platform); + *ret = prog; + return NULL; +} + +LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) +{ + if (prog) { + drgn_program_deinit(prog); + free(prog); + } +} + +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) +{ +} + +void *drgn_begin_blocking(void) +{ + return NULL; +} + +void drgn_end_blocking(void *state) +{ +} diff --git a/libdrgn/plugins.h b/libdrgn/plugins.h index 897ec556f..5fd7f4e80 100644 --- a/libdrgn/plugins.h +++ b/libdrgn/plugins.h @@ -4,14 +4,8 @@ #ifndef DRGN_PLUGINS_H #define DRGN_PLUGINS_H -#include - struct drgn_program; -#if ENABLE_PYTHON void drgn_call_plugins_prog(const char *name, struct drgn_program *prog); -#else -static inline void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) {} -#endif #endif /* DRGN_PLUGINS_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 9789d384e..c4522c6f2 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -178,30 +178,6 @@ void drgn_program_deinit(struct drgn_program *prog) drgn_debug_info_deinit(&prog->dbinfo); } -#if !ENABLE_PYTHON -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_create(const struct drgn_platform *platform, - struct drgn_program **ret) -{ - struct drgn_program *prog; - - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - drgn_program_init(prog, platform); - *ret = prog; - return NULL; -} - -LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) -{ - if (prog) { - drgn_program_deinit(prog); - free(prog); - } -} -#endif - LIBDRGN_PUBLIC struct drgn_error * drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, uint64_t size, drgn_memory_read_fn read_fn, diff --git a/libdrgn/program.h b/libdrgn/program.h index 79cb67e2c..17afe8630 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -462,7 +462,6 @@ drgn_program_register_symbol_finder_impl(struct drgn_program *prog, const struct drgn_symbol_finder_ops *ops, void *arg, size_t enable_index); -#if ENABLE_PYTHON /** * Call before a blocking (I/O or long-running) operation. * @@ -478,16 +477,6 @@ void *drgn_begin_blocking(void); * @param[in] state Return value of @ref drgn_begin_blocking(). */ void drgn_end_blocking(void *state); -#else -static inline void *drgn_begin_blocking(void) -{ - return NULL; -} - -static inline void drgn_end_blocking(void *state) -{ -} -#endif static inline void drgn_blocking_guard_cleanup(void **statep) { diff --git a/setup.py b/setup.py index 19ba12bf0..78c1018f5 100755 --- a/setup.py +++ b/setup.py @@ -92,7 +92,8 @@ def _run_configure(self): args = [ os.path.relpath("libdrgn/configure", self.build_temp), "--disable-static", - "--enable-python", + "--disable-libdrgn", + "--enable-python-extension", ] try: args.extend(shlex.split(os.environ["CONFIGURE_FLAGS"])) @@ -123,7 +124,7 @@ def make(self, *make_args): def run(self): self.make() - so = os.path.join(self.build_temp, ".libs/libdrgn.so") + so = os.path.join(self.build_temp, ".libs/_drgn.so") if self.inplace: self.copy_file(so, self.get_ext_fullpath("_drgn")) old_inplace, self.inplace = self.inplace, 0 From a49bb111d5908d24641e8abb813baed9a90c03e5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Apr 2025 16:56:58 -0700 Subject: [PATCH 151/166] Update libkdumpfile in manylinux wheels to 0.5.5 The new version includes a new kdumpid binary with a dependency on BFD that we don't want, so disable it. Signed-off-by: Omar Sandoval --- scripts/build_manylinux_in_docker.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index 40b9025fb..dd2e99fb1 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -44,12 +44,12 @@ curl -L "$elfutils_url" | tar -xj --strip-components=1 make -j$(($(nproc) + 1)) make install -libkdumpfile_version=0.5.4 +libkdumpfile_version=0.5.5 libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/releases/download/v$libkdumpfile_version/libkdumpfile-$libkdumpfile_version.tar.gz mkdir /tmp/libkdumpfile cd /tmp/libkdumpfile curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 -./configure --with-libzstd --with-lzo2 --with-snappy --with-zlib --without-python +./configure --with-libzstd --with-lzo2 --with-snappy --with-zlib --without-python --disable-kdumpid make -j$(($(nproc) + 1)) make install From 4b31a32b2c3e2f90c9eaeee281082a593e780be4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Apr 2025 23:29:38 -0700 Subject: [PATCH 152/166] vmtest.kbuild: add patch to work around ppc64 build failure Some versions of the kernel fail to build for ppc64 with GCC 12 and CONFIG_KPROBES enabled due to -Wdangling-pointer. Backport the commit that disabled that warning. Fixes: 9b7297dfd2c4 ("vmtest.config: enable CONFIG_KPROBES for upcoming kmodify breakpoints") Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 15 ++++ ...le-Wdangling-pointer-warning-for-now.patch | 72 +++++++++++++++++++ ...le-Wdangling-pointer-warning-for-now.patch | 65 +++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch create mode 100644 vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index f20397186..8ffd44e68 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -146,6 +146,21 @@ class _Patch(NamedTuple): name="5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch", versions=((KernelVersion("5.18.18"), KernelVersion("5.19")),), ), + # We could backport this further, but we currently only need it between + # Linux kernel commits 50428fdc53ba ("powerpc: Add a ppc_inst_as_str() + # helper") (in v5.9) and 2a83afe72a2b ("powerpc/64: Drop + # ppc_inst_as_str()") (in v6.0). + _Patch( + name="gcc-12-disable-Wdangling-pointer-warning-for-now.patch", + versions=((KernelVersion("5.16"), KernelVersion("5.18.6")),), + ), + _Patch( + name="5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch", + versions=( + (KernelVersion("5.11"), KernelVersion("5.15.49")), + (KernelVersion("5.9"), KernelVersion("5.10.183")), + ), + ), ) diff --git a/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch b/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch new file mode 100644 index 000000000..ad9c67305 --- /dev/null +++ b/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch @@ -0,0 +1,72 @@ +From 1d8693376aaa10f12bbd67cfecb72a26a83058c5 Mon Sep 17 00:00:00 2001 +Message-ID: <1d8693376aaa10f12bbd67cfecb72a26a83058c5.1744698211.git.osandov@fb.com> +From: Linus Torvalds +Date: Thu, 9 Jun 2022 09:41:42 -0700 +Subject: [PATCH] gcc-12: disable '-Wdangling-pointer' warning for now +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit f7d63b50898172b9eb061b9e2daad61b428792d0 upstream. + +[ Upstream commit 49beadbd47c270a00754c107a837b4f29df4c822 ] + +While the concept of checking for dangling pointers to local variables +at function exit is really interesting, the gcc-12 implementation is not +compatible with reality, and results in false positives. + +For example, gcc sees us putting things on a local list head allocated +on the stack, which involves exactly those kinds of pointers to the +local stack entry: + + In function ‘__list_add’, + inlined from ‘list_add_tail’ at include/linux/list.h:102:2, + inlined from ‘rebuild_snap_realms’ at fs/ceph/snap.c:434:2: + include/linux/list.h:74:19: warning: storing the address of local variable ‘realm_queue’ in ‘*&realm_27(D)->rebuild_item.prev’ [-Wdangling-pointer=] + 74 | new->prev = prev; + | ~~~~~~~~~~^~~~~~ + +But then gcc - understandably - doesn't really understand the big +picture how the doubly linked list works, so doesn't see how we then end +up emptying said list head in a loop and the pointer we added has been +removed. + +Gcc also complains about us (intentionally) using this as a way to store +a kind of fake stack trace, eg + + drivers/acpi/acpica/utdebug.c:40:38: warning: storing the address of local variable ‘current_sp’ in ‘acpi_gbl_entry_stack_pointer’ [-Wdangling-pointer=] + 40 | acpi_gbl_entry_stack_pointer = ¤t_sp; + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ + +which is entirely reasonable from a compiler standpoint, and we may want +to change those kinds of patterns, but not not. + +So this is one of those "it would be lovely if the compiler were to +complain about us leaving dangling pointers to the stack", but not this +way. + +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/Makefile b/Makefile +index 2f0efde21902..209d5ae9ddb5 100644 +--- a/Makefile ++++ b/Makefile +@@ -808,6 +808,10 @@ endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ++ ++# These result in bogus false positives ++KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer) ++ + ifdef CONFIG_FRAME_POINTER + KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else +-- +2.49.0 + diff --git a/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch b/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch new file mode 100644 index 000000000..2e547c2c9 --- /dev/null +++ b/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch @@ -0,0 +1,65 @@ +From 49beadbd47c270a00754c107a837b4f29df4c822 Mon Sep 17 00:00:00 2001 +Message-ID: <49beadbd47c270a00754c107a837b4f29df4c822.1744656535.git.osandov@fb.com> +From: Linus Torvalds +Date: Thu, 9 Jun 2022 09:41:42 -0700 +Subject: [PATCH] gcc-12: disable '-Wdangling-pointer' warning for now +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +While the concept of checking for dangling pointers to local variables +at function exit is really interesting, the gcc-12 implementation is not +compatible with reality, and results in false positives. + +For example, gcc sees us putting things on a local list head allocated +on the stack, which involves exactly those kinds of pointers to the +local stack entry: + + In function ‘__list_add’, + inlined from ‘list_add_tail’ at include/linux/list.h:102:2, + inlined from ‘rebuild_snap_realms’ at fs/ceph/snap.c:434:2: + include/linux/list.h:74:19: warning: storing the address of local variable ‘realm_queue’ in ‘*&realm_27(D)->rebuild_item.prev’ [-Wdangling-pointer=] + 74 | new->prev = prev; + | ~~~~~~~~~~^~~~~~ + +But then gcc - understandably - doesn't really understand the big +picture how the doubly linked list works, so doesn't see how we then end +up emptying said list head in a loop and the pointer we added has been +removed. + +Gcc also complains about us (intentionally) using this as a way to store +a kind of fake stack trace, eg + + drivers/acpi/acpica/utdebug.c:40:38: warning: storing the address of local variable ‘current_sp’ in ‘acpi_gbl_entry_stack_pointer’ [-Wdangling-pointer=] + 40 | acpi_gbl_entry_stack_pointer = ¤t_sp; + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ + +which is entirely reasonable from a compiler standpoint, and we may want +to change those kinds of patterns, but not not. + +So this is one of those "it would be lovely if the compiler were to +complain about us leaving dangling pointers to the stack", but not this +way. + +Signed-off-by: Linus Torvalds +--- + Makefile | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/Makefile b/Makefile +index c43d825a3c4c..09208ffca353 100644 +--- a/Makefile ++++ b/Makefile +@@ -805,6 +805,9 @@ endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) + ++# These result in bogus false positives ++KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer) ++ + ifdef CONFIG_FRAME_POINTER + KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else +-- +2.49.0 + From 78ef790bb8274a212e742c7e94b97a123a96469a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Apr 2025 09:52:19 -0700 Subject: [PATCH 153/166] CI: update to Ubuntu 22.04, drop Python 3.6 and 3.7 The GitHub Actions Ubuntu 20.04 image has just been shut down: actions/runner-images#11101. Update to Ubuntu 22.04. Since 22.04 doesn't include Python 3.6 or 3.7, also drop those. I've already announced that this upcoming release will be the last one with support for those Python versions (#467). I'll test them manually before cutting the release. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 17 ++++------------- .github/workflows/vmtest-build.yml | 4 +--- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f03b4eaf8..22d36f82b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,14 +35,12 @@ concurrency: jobs: test: - # We're stuck on Ubuntu 20.04 as long as we want to keep testing on Python - # 3.6 due to actions/setup-python#544. - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: python-version: ${{ (github.event_name == 'push' || inputs.test_all_python_versions) - && fromJSON('["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7", "3.6"]') - || fromJSON('["3.12", "3.6"]')}} + && fromJSON('["3.13", "3.12", "3.11", "3.10", "3.9", "3.8"]') + || fromJSON('["3.13", "3.8"]')}} cc: [gcc, clang] fail-fast: false env: @@ -55,12 +53,6 @@ jobs: with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - name: Check Python version for pre-commit - # Only run pre-commit / mypy on upstream supported Python versions - run: | - if [[ "${{ matrix.python-version }}" =~ ^3\.([89]|[0-9][0-9])$ ]]; then - echo USE_PRE_COMMIT=1 >> $GITHUB_ENV - fi - name: Install dependencies run: | sudo apt-get update -y @@ -69,11 +61,10 @@ jobs: if [[ "${{ matrix.python-version }}" =~ ^3\.[678]$ ]]; then pyroute2_version="<0.9.1" fi - pip install "pyroute2$pyroute2_version" setuptools ${USE_PRE_COMMIT/1/pre-commit} + pip install "pyroute2$pyroute2_version" setuptools pre-commit - name: Generate version.py run: python setup.py --version - name: Check with mypy - if: ${{ env.USE_PRE_COMMIT == '1' }} run: pre-commit run --all-files mypy - name: Build and test with ${{ matrix.cc }} run: CONFIGURE_FLAGS="--enable-compiler-warnings=error" python setup.py test -K ${{ inputs.test_all_kernel_flavors && '-F' || '' }} diff --git a/.github/workflows/vmtest-build.yml b/.github/workflows/vmtest-build.yml index e442240be..010ec0f15 100644 --- a/.github/workflows/vmtest-build.yml +++ b/.github/workflows/vmtest-build.yml @@ -13,9 +13,7 @@ jobs: arch: [x86_64, aarch64, ppc64, s390x, arm] fail-fast: false max-parallel: 5 - # Build on 20.04 so that we don't get host binaries (e.g., objtool) that - # depend on libraries too new for other distros. - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 permissions: contents: write env: From 235944ed2448f5072e0ae317009779581899d9da Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Apr 2025 14:55:47 -0700 Subject: [PATCH 154/166] libdrgn: linux_kernel: don't log every module section address I've found that these flood the logs and often make me lose more interesting stuff in the scrollback. The values are available in Module.section_addresses, so just log where we got them. Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index d7c2da04f..1b889a016 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1601,6 +1601,7 @@ kernel_module_set_section_addresses_live(struct drgn_module *module) { struct drgn_error *err; struct drgn_program *prog = module->prog; + bool logged = false; _cleanup_free_ char *path; if (asprintf(&path, "/sys/module/%s/sections", module->name) < 0) { @@ -1633,8 +1634,12 @@ kernel_module_set_section_addresses_live(struct drgn_module *module) path, ent->d_name); } - drgn_log_debug(prog, "found section %s@0x%" PRIx64 " in %s", - ent->d_name, address, path); + if (!logged) { + drgn_log_debug(prog, + "getting section addresses from %s", + path); + logged = true; + } err = drgn_module_set_section_address(module, ent->d_name, address); if (err) @@ -1693,8 +1698,12 @@ kernel_module_set_section_addresses(struct drgn_module *module, // to the non-live path. if (!err || err->code != DRGN_ERROR_OS || err->errnum != EACCES) return err; - drgn_error_log_debug(prog, err, "falling back to sect_attrs: "); + drgn_error_log_debug(prog, err, + "falling back to section addresses from sect_attrs: "); drgn_error_destroy(err); + } else { + drgn_log_debug(prog, + "getting section addresses from sect_attrs"); } DRGN_OBJECT(attrs, prog); @@ -1814,9 +1823,6 @@ kernel_module_set_section_addresses(struct drgn_module *module, if (err) return err; - drgn_log_debug(prog, - "found section %s@0x%" PRIx64 " in sect_attrs", - name, address); err = drgn_module_set_section_address(module, name, address); if (err) return err; From 97a3c5a56a41c242be6ef40f51f7ab54702a1507 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Apr 2025 15:01:01 -0700 Subject: [PATCH 155/166] cli: document --no-default-{debug,kernel}-directories behavior with plugins These options also disable directories added by plugins, so document that. Signed-off-by: Omar Sandoval --- docs/man/drgn.rst | 4 ++-- drgn/cli.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/man/drgn.rst b/docs/man/drgn.rst index 0436e6ad3..58a3a5f07 100644 --- a/docs/man/drgn.rst +++ b/docs/man/drgn.rst @@ -140,7 +140,7 @@ in the Python API. .. option:: --no-default-debug-directories Don't search for debugging symbols by build ID and debug link in the - standard directories. + standard directories or those added by plugins. .. option:: --kernel-directory {PATH} @@ -152,7 +152,7 @@ in the Python API. .. option:: --no-default-kernel-directories Don't search for the kernel image and loadable kernel modules in the - standard directories. + standard directories or those added by plugins. Logging ^^^^^^^ diff --git a/drgn/cli.py b/drgn/cli.py index c29e3d259..58d3db16a 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -465,7 +465,8 @@ def _main() -> None: symbol_group.add_argument( "--no-default-debug-directories", action="store_true", - help="don't search for debugging symbols by build ID and debug link in the standard directories", + help="don't search for debugging symbols by build ID and debug link " + "in the standard directories or those added by plugins", ) symbol_group.add_argument( "--kernel-directory", @@ -479,7 +480,8 @@ def _main() -> None: symbol_group.add_argument( "--no-default-kernel-directories", action="store_true", - help="don't search for the kernel image and loadable kernel modules in the standard directories", + help="don't search for the kernel image and loadable kernel modules " + "in the standard directories or those added by plugins", ) advanced_group = parser.add_argument_group("advanced") From ef3f73bab3f00db500567c8b8b72597d48307c0c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 15 Apr 2025 15:57:59 -0700 Subject: [PATCH 156/166] docs: improve debugging information finder examples dnf_debug_info_finder() needs a couple more comments, and example_debug_info_finder() wouldn't actually run as is. Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index b78b9a888..5ac8c7b92 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -100,6 +100,7 @@ an example for getting debugging symbols on Fedora Linux using DNF: # this is mainly for demonstration purposes; debuginfod, which drgn supports # out of the box, is more reliable. def dnf_debug_info_finder(modules: list[drgn.Module]) -> None: + # Determine all of the packages for the given modules. packages = set() for module in modules: if not module.wants_debug_file(): @@ -123,7 +124,9 @@ an example for getting debugging symbols on Fedora Linux using DNF: + sorted(packages) ) - # Now that it's installed, try the standard locations. + # Now that it's installed, try the standard locations. Other finders may + # need to try specific files for specific modules with module.try_file() + # instead. modules[0].prog.find_standard_debug_info(modules) @@ -174,8 +177,9 @@ Create ``drgn_plugin_example.py`` with the following contents: import drgn def example_debug_info_finder(modules: list[drgn.Module]) -> None: - if isinstance(module, drgn.MainModule): - module.try_file("/my/vmlinux") + for module in modules: + if isinstance(module, drgn.MainModule): + module.try_file("/my/vmlinux") def drgn_prog_set(prog: drgn.Program) -> None: if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: From 4834a956e912ac767724892b2d6abbaccca3688c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 16 Apr 2025 00:39:42 -0700 Subject: [PATCH 157/166] drgn.helpers.linux.mm: fix in_direct_map() docstring It's missing a blank line, which messes up the formatting. Also reword it a tiny bit. Fixes: 545aa52fe1ce ("mm, slab: Fix test failures on kernels with SLOB") Signed-off-by: Omar Sandoval --- drgn/helpers/linux/mm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index a7a02ed5d..401150fdb 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -1424,7 +1424,8 @@ def totalram_pages(prog: Program) -> int: @takes_program_or_default def in_direct_map(prog: Program, addr: IntegerLike) -> bool: """ - Return True if an address is within the kernel's direct memory mapping + Return whether an address is within the kernel's direct memory mapping. + :param addr: address to check """ addr = operator.index(addr) From a938677d1cdfae931ab8510595a90e3997172da9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 16 Apr 2025 09:56:22 -0700 Subject: [PATCH 158/166] docs: revamp Getting Debugging Symbols page The major reworks for the module/debug info finder APIs and proper debuginfod support substantially change the story for getting debugging symbols. Revamp our documentation: * Document how to use debuginfod on different distributions. * Add flow charts with our recommendations on each distribution. * Add openSUSE documentation. * Also document how to build with debugging symbols on different build systems. Closes #380. Signed-off-by: Omar Sandoval --- .readthedocs.yaml | 2 + README.rst | 2 + docs/conf.py | 1 + docs/getting_debugging_symbols.rst | 572 ++++++++++++++++++++++++++--- 4 files changed, 525 insertions(+), 52 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 03853730e..14fff4e2e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,6 +3,8 @@ build: os: ubuntu-22.04 tools: python: "3" + apt_packages: + - graphviz sphinx: configuration: docs/conf.py python: diff --git a/README.rst b/README.rst index 3952bc02f..cc1c11a83 100644 --- a/README.rst +++ b/README.rst @@ -150,6 +150,8 @@ Note that RHEL/CentOS 6, Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and older) ship Python versions which are too old. Python 3.6 or newer must be installed. +.. _installation-from-source: + From Source ^^^^^^^^^^^ diff --git a/docs/conf.py b/docs/conf.py index 8405000b7..ded0634c4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,6 +18,7 @@ "linuxsrc", "setuptools_config", "sphinx.ext.extlinks", + "sphinx.ext.graphviz", "sphinx.ext.intersphinx", ] diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst index 4aa063aea..841c67565 100644 --- a/docs/getting_debugging_symbols.rst +++ b/docs/getting_debugging_symbols.rst @@ -3,44 +3,324 @@ Getting Debugging Symbols .. highlight:: console -Most Linux distributions don't install debugging symbols for installed packages -by default. This page documents how to install debugging symbols on common -distributions. If drgn prints an error like:: +drgn needs debugging symbols in order to interpret the target program. If drgn +prints a warning like:: - $ sudo drgn - could not get debugging information for: - kernel (could not find vmlinux for 5.14.14-200.fc34.x86_64) + $ drgn + warning: missing debugging symbols for kernel 6.13.8-200.fc41.x86_64 + critical: missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html ... -Then you need to install debugging symbols. +then you need to get debugging symbols. The method depends on whether the +binary that is missing debugging symbols was built manually or is provided by +your Linux distribution. + +Note that you only need debugging symbols for the binaries you're actually +debugging. If the warnings are for modules, shared libraries, etc. that you +don't care about, feel free to ignore them. + +Since drgn 0.0.31, you can run drgn with ``--log-level debug`` to get logs of +where drgn looked for debugging symbols. + +Building With Debugging Symbols +------------------------------- + +If the binary that drgn warns about is one that you built yourself, then you +need to rebuild it with debugging symbols. Here is a quick overview of how to +do that in different build systems: + +.. list-table:: + :header-rows: 1 + + * - Build System + - Instructions + * - Linux Kernel + - Since Linux 5.18: In ``menuconfig``, set ``Kernel hacking -> + Compile-time checks and compiler options -> Debug information`` to + ``Rely on the toolchain's implicit default DWARF version``. Or, add + ``CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y`` to :file:`.config`. + + Before Linux 5.18: In ``menuconfig``, enable ``Kernel hacking -> + Compile-time checks and compiler options -> Compile the kernel with + debug info``. Or, add ``CONFIG_DEBUG_INFO=y`` to :file:`.config`. + * - `Meson `_ + - Run ``meson setup --buildtype=debugoptimized $builddir`` or + ``meson setup --buildtype=debug $builddir``. + * - `CMake `_ + - Run ``cmake --build $builddir -DCMAKE_BUILD_TYPE=RelWithDebInfo`` or + ``cmake --build $builddir -DCMAKE_BUILD_TYPE=Debug``. + + Or, add ``set(CMAKE_BUILD_TYPE RelWithDebInfo)`` or + ``set(CMAKE_BUILD_TYPE Debug)`` to :file:`CMakeLists.txt`. + * - Autotools + - Depends on the project, but usually ``CFLAGS="-Og -g" ./configure``. + * - Make + - Depends on the project, but usually ``CFLAGS="-Og -g" make``. + * - None (GCC or Clang directly) + - Pass ``-Og -g`` options. + +Consult your build system's documentation for details. + +Debugging Symbols for Linux Distribution Packages +------------------------------------------------- + +Most Linux distributions don't install debugging symbols for installed packages +by default. If the binary that drgn warns about is part of your Linux +distribution, then you have two options: manual installation through the +package manager or automatic downloads using debuginfod. This section documents +how to do both on common Linux distributions, including flow charts for +recommended practices. + +.. contents:: Contents + :depth: 1 + :local: + :backlinks: none + +Debuginfod +^^^^^^^^^^ + +`debuginfod `_ is a service +providing debugging symbols via an HTTP API. Many Linux distributions run a +debuginfod server for their packages, and some automatically enable it. + +Debugging symbols can be downloaded via debuginfod automatically, so it +typically provides the best user experience. However, there are a few caveats, +especially when debugging the Linux kernel: + +1. Before drgn 0.0.31, drgn did not support using debuginfod for the Linux kernel. +2. Except on Fedora's debuginfod server, downloading debugging symbols for the + Linux kernel is extremely slow due to `technical limitations that have been + fixed upstream + `_ + but not yet deployed on other distributions. As a result, since drgn 0.0.31, + when debugging the Linux kernel, drgn only uses debuginfod on Fedora. +3. Before drgn 0.0.31, while drgn is downloading from debuginfod, it can't be + interrupted with :kbd:`Ctrl-C`, and it doesn't print a progress bar. + +.. _debuginfod-support: + +Since drgn 0.0.31, drgn includes whether it was built with debuginfod support +in its version string (look for "with debuginfod"):: + + $ drgn --version + drgn 0.0.31 (using Python 3.13.2, elfutils 0.192, with debuginfod (dlopen), with libkdumpfile) + +If you built drgn from source and the version string includes "without +debuginfod", make sure you installed the :ref:`necessary dependencies +` and rebuild drgn. Before drgn 0.0.31, drgn doesn't +need to be built specifically with debuginfod support. Fedora ------- +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Fedora" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_dnf [ + label = "Manually install with\ndnf debuginfo-install" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> use_debuginfod [ label = ">= 0.0.31" ] + drgn_version -> use_dnf [ label = "< 0.0.31" ] + } + +Debuginfod +"""""""""" + +Fedora automatically enables debuginfod by default. Since drgn 0.0.31, drgn can +even use debuginfod for Linux kernel debugging symbols. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo dnf install elfutils-debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +Also see the `Fedora debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +Debugging symbols can also be installed manually on Fedora with ``sudo dnf +debuginfo-install $package``. -Fedora makes it very easy to install debugging symbols with the `DNF -debuginfo-install plugin -`_, -which is installed by default. Simply run ``sudo dnf debuginfo-install $package``:: +To install symbols for the running kernel:: - $ sudo dnf debuginfo-install python3 + $ sudo dnf debuginfo-install kernel-$(uname -r) To find out what package owns a binary, use ``rpm -qf``:: - $ rpm -qf $(command -v python3) - python3-3.9.7-1.fc34.x86_64 + $ rpm -qf "$(command -v python3)" + python3-3.13.2-1.fc41.x86_64 + $ sudo dnf debuginfo-install python3 + +Also see the `Fedora documentation +`_. + +CentOS Stream +^^^^^^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non CentOS Stream" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_dnf [ + label = "Manually install with\ndnf debuginfo-install" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> use_dnf [ label = "< 0.0.31" ] + kernel -> use_dnf [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +CentOS Stream automatically enables debuginfod by default since CentOS Stream +9. drgn will not use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo dnf install elfutils-debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +Manual Installation +""""""""""""""""""" + +Debugging symbols can be installed manually on CentOS Stream with ``sudo dnf +debuginfo-install $package``. To install symbols for the running kernel:: $ sudo dnf debuginfo-install kernel-$(uname -r) -Also see the `Fedora documentation -`_. - -Debian ------- +To find out what package owns a binary, use ``rpm -qf``:: -Debian requires you to manually add the debugging symbol repositories:: + $ rpm -qf "$(command -v python3)" + python3-3.12.9-1.el10.x86_64 + $ sudo dnf debuginfo-install python3 +Debian +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Debian" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + enable_debug_repos [ + label = "Enable debug\nrepositories" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_apt [ + label = "Manually install\nwith apt" + style = filled + fillcolor = palegreen + ] + enable_debuginfod [ + label = "Enable debuginfod" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_debuginfod [ + label = "Use debuginfod" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] + kernel -> enable_debug_repos [ label = "Yes" ] + enable_debug_repos -> use_apt + kernel -> enable_debuginfod [ label = "No" ] + enable_debuginfod -> use_debuginfod + } + +Debuginfod +"""""""""" + +On Debian, debuginfod must be enabled manually:: + + $ sudo apt install libdebuginfod-common + $ sudo ln -s /usr/share/libdebuginfod-common/debuginfod.sh /usr/share/libdebuginfod-common/debuginfod.csh /etc/profile.d + $ source /etc/profile.d/debuginfod.sh + +drgn will not use it for Linux kernel debugging symbols by default. + +Also see the `Debian debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +On Debian, the debugging symbol repositories must be added manually:: + + $ sudo apt install lsb-release $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-debug main deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-proposed-updates-debug main @@ -48,12 +328,15 @@ Debian requires you to manually add the debugging symbol repositories:: $ sudo apt update Then, debugging symbol packages can be installed with ``sudo apt install``. -Some debugging symbol packages are named with a ``-dbg`` suffix:: - $ sudo apt install python3-dbg +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbg -And some are named with a ``-dbgsym`` suffix:: +Some debugging symbol packages are named with a ``-dbg`` suffix and some are +named with a ``-dbgsym`` suffix:: + $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` @@ -61,25 +344,83 @@ package to find the correct name:: $ sudo apt install debian-goodies $ find-dbgsym-packages $(command -v python3) - libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + libc6-dbg libexpat1-dbgsym python3.11-dbg zlib1g-dbgsym $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg -To install symbols for the running kernel:: - - $ sudo apt install linux-image-$(uname -r)-dbg - Also see the `Debian documentation `_. Ubuntu ------- - -On Ubuntu, you must install the debugging symbol archive signing key and -manually add the debugging symbol repositories:: - - $ sudo apt update - $ sudo apt install ubuntu-dbgsym-keyring +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Ubuntu" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + enable_debug_repos [ + label = "Enable debug\nrepositories" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_apt [ + label = "Manually install\nwith apt" + style = filled + fillcolor = palegreen + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] + kernel -> enable_debug_repos [ label = "Yes" ] + enable_debug_repos -> use_apt + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +Ubuntu automatically enables debuginfod by default since Ubuntu 22.04 (Jammy +Jellyfish). drgn will not use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo apt install libdebuginfod-common + $ source /etc/profile.d/debuginfod.sh + +Also see the `Ubuntu debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +On Ubuntu, the debugging symbol archive signing key must be installed and the +debugging symbol repositories must be added manually:: + + $ sudo apt install lsb-release ubuntu-dbgsym-keyring $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse @@ -87,8 +428,14 @@ manually add the debugging symbol repositories:: EOF $ sudo apt update -Like Debian, some debugging symbol packages are named with a ``-dbg`` suffix -and some are named with a ``-dbgsym`` suffix:: +Then, debugging symbol packages can be installed with ``sudo apt install``. + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbgsym + +Some debugging symbol packages are named with a ``-dbg`` suffix and some are +named with a ``-dbgsym`` suffix:: $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym @@ -98,30 +445,151 @@ package to find the correct name:: $ sudo apt install debian-goodies $ find-dbgsym-packages $(command -v python3) - libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + libc6-dbg libexpat1-dbgsym python3.12-dbg zlib1g-dbgsym $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg -To install symbols for the running kernel:: - - $ sudo apt install linux-image-$(uname -r)-dbgsym - Also see the `Ubuntu documentation -`_. +`_. Arch Linux ----------- +^^^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non Arch Linux" + style = filled + fillcolor = lightpink + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + not_available [ + label = "Debugging symbols\nare not available" + style = filled + fillcolor = lightpink + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + + start -> kernel + kernel -> not_available [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +Arch Linux automatically enables debuginfod by default. However, debugging +symbols are not available for the Linux kernel. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo pacman -S --needed libelf + $ source /etc/profile.d/debuginfod.sh + +Also see the `Arch Linux debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +Arch Linux does not provide debugging symbol packages. + +openSUSE +^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non openSUSE" + style = filled + fillcolor = lightpink + ] + distribution [ + label = "Which\ndistribution?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_zypper [ + label = "Manually install\nwith zypper" + style = filled + fillcolor = palegreen + ] + + start -> distribution + distribution -> drgn_version [ label = "Tumbleweed" ] + distribution -> use_zypper [ label = "Leap" ] + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> use_zypper [ label = "< 0.0.31" ] + kernel -> use_zypper [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +openSUSE Tumbleweed automatically enables debuginfod by default. drgn will not +use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo zypper install debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +openSUSE Leap does not support debuginfod. + +Manual Installation +""""""""""""""""""" + +Debugging symbols can be installed manually on openSUSE with:: + + $ sudo zypper --plus-content debug install "${package}-debuginfo" + +To install symbols for the running kernel:: + + $ zypper --plus-content debug install "$(rpm --qf '%{NAME}-debuginfo-%{VERSION}-%{RELEASE}.%{ARCH}' -qf /boot/vmlinuz-"$(uname -r)")" + +To find out what package owns a binary, use ``rpm -qf``:: -Arch Linux unfortunately does not make debugging symbols available. Packages -must be manually rebuilt with debugging symbols enabled. See the `ArchWiki -`_ and the `feature -request `_. + $ rpm -qf "$(command -v python3)" + python313-base-3.13.2-3.1.x86_64 + $ sudo zypper --plus-content debug install python313-base-debuginfo Oracle Linux ------------- +^^^^^^^^^^^^ -Oracle Linux provides documentation on installing the necessary debugging -symbols. See the documentation for `Oracle Linux 9 +Oracle Linux provides documentation on installing debugging symbols for the +Linux kernel. See the documentation for `Oracle Linux 9 `_ and `Oracle Linux 8 `_. From 8449bd7044b80cf16326eddcff4d008f550276ec Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 16 Apr 2025 11:25:03 -0700 Subject: [PATCH 159/166] setup.py: depend on setuptools (for pkg_resources) on Python < 3.8 Fixes: adf64729095b ("Add plugin system") Signed-off-by: Omar Sandoval --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 78c1018f5..a1f530073 100755 --- a/setup.py +++ b/setup.py @@ -467,6 +467,8 @@ def get_version(): }, entry_points={"console_scripts": ["drgn=drgn.cli:_main"]}, python_requires=">=3.6", + # We use pkg_resources on Python < 3.8. + install_requires=['setuptools;python_version<"3.8"'], author="Omar Sandoval", author_email="osandov@osandov.com", description="Programmable debugger", From 0824ffe9eb1ff49575094c87dc59935fb6df2336 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 16 Apr 2025 11:39:43 -0700 Subject: [PATCH 160/166] docs: add 0.0.31 release highlights Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 2 + docs/release_highlights.rst | 1 + docs/release_highlights/0.0.31.rst | 192 +++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 docs/release_highlights/0.0.31.rst diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 5ac8c7b92..5baa4a473 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -83,6 +83,8 @@ You can create modules with the :ref:`module factory functions `. You can also modify various attributes of the :class:`drgn.Module` class. +.. _debugging-information-finders-example: + Debugging Information Finders ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/release_highlights.rst b/docs/release_highlights.rst index d5142f08d..cf393661d 100644 --- a/docs/release_highlights.rst +++ b/docs/release_highlights.rst @@ -6,6 +6,7 @@ from the full `release notes `_. .. toctree:: + release_highlights/0.0.31.rst release_highlights/0.0.30.rst release_highlights/0.0.28.rst release_highlights/0.0.27.rst diff --git a/docs/release_highlights/0.0.31.rst b/docs/release_highlights/0.0.31.rst new file mode 100644 index 000000000..3f4b11657 --- /dev/null +++ b/docs/release_highlights/0.0.31.rst @@ -0,0 +1,192 @@ +0.0.31 (Released April 16th, 2025) +================================== + +These are some of the highlights of drgn 0.0.31. See the `GitHub release +`_ for the full release +notes, including more improvements and bug fixes. + +Fun fact: this is the largest release of drgn since the first ever release, +both in terms of number of commits and changed lines of code. + +.. highlight:: pycon +.. program:: drgn + +Module API +---------- + +One of the first things drgn does when it starts up is figure out what binaries +are loaded in your program: executables, shared libraries, Linux kernel image, +Linux kernel modules, etc. Until this release, this all happened internally to +drgn with no way to inspect or override it. This release adds new APIs to +address this. + +First, the :class:`drgn.Module` class and its subclasses were added to +represent binaries used by a program. + +:class:`drgn.Program` gained a couple of methods for querying what modules were +created for a program, :meth:`drgn.Program.modules()` and +:meth:`drgn.Program.module()`:: + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='kernel') + prog.relocatable_module(name='scsi_dh_rdac', address=0xffffffffc02fb000) + prog.relocatable_module(name='nvme', address=0xffffffffc051f000) + prog.relocatable_module(name='spi_intel', address=0xffffffffc0fa3000) + ... + >>> prog.module("kernel") + prog.main_module(name='kernel') + >>> prog.module(0xffffffff92811100) + prog.main_module(name='kernel') + +Modules are normally created automatically for all loaded binaries when +debugging symbols are loaded. This can also be done manually with +:meth:`drgn.Program.loaded_modules()` or +:meth:`drgn.Program.create_loaded_modules()`. Arbitrary modules can also be +:ref:`created manually `. This enables more +:ref:`advanced use cases `. + +Options for Finding Debugging Symbols +------------------------------------- + +drgn now provides much more control over how debugging symbols are found. + +The :option:`--try-symbols-by` and :option:`--no-symbols-by` command line +options allow enabling or disabling methods of searching for debugging symbols. +The :option:`--debug-directory` and :option:`--no-default-debug-directories` +options allow controlling the directories that are searched for debugging +symbols. The :option:`--kernel-directory` and +:option:`--no-default-kernel-directories` options allow controlling the +directories that are searched for Linux kernel files. + +For example, if you have a kernel core dump and a directory containing kernel +debugging symbols: + +.. code-block:: console + + $ ls + kernel-6.15.0-rc1-debuginfo vmcore + $ drgn -c vmcore --kernel-directory kernel-6.15.0-rc1-debuginfo + +These options are also available programmatically as +:attr:`drgn.Program.debug_info_options`. + +Stricter Debugging Symbol File Matching +--------------------------------------- + +A common pitfall for users is passing the wrong debugging symbol file to +:option:`-s` (for example, the vmlinux from a different kernel build, or a +kernel module or library that wasn't loaded at the time). Before this release, +drgn was quite permissive and would use the file anyways, usually with +confusing results. + +Starting in this release, drgn now always checks that files passed to +:option:`-s` or :meth:`drgn.Program.load_debug_info()` correspond to a loaded +module (based on build IDs). If not, it logs a warning and ignores them. + +However, there are valid use cases for adding unloaded files, like corrupted +core dumps or reading debugging symbols from arbitrary files. If you really +want to use a file for a specific module, then you can find the module with +:meth:`drgn.Program.modules()` or :meth:`drgn.Program.module()` and add the +file with :meth:`drgn.Module.try_file(path, force=True) +`. If you really want to load debugging symbols from a +file without associating it with a loaded module, you can use +:option:`--extra-symbols` or +:meth:`drgn.Program.extra_module(...).try_file(path) +`. + +Debuginfod Integration +---------------------- + +`debuginfod `_ is a service +for automatically downloading debugging symbols. drgn has had partial +debuginfod support for a long time (via the libdwfl library), with a few +important limitations: + +1. It couldn't use debuginfod for the Linux kernel. +2. Downloads couldn't be interrupted with Ctrl-C. +3. The download progress bar wasn't very pretty. + +This release improves drgn's integration with debuginfod and fixes these +issues. + +There's still one caveat for the Linux kernel: drgn only enables debuginfod for +the Linux kernel on Fedora, because other distributions haven't yet deployed +the `fix for extremely slow downloads of kernel debugging symbols +`_ +on their debuginfod servers. Contact your distribution to request that they +update their debuginfod server to at least elfutils 0.192 and compress their +kernel debug info packages with parallel xz. + +Custom Debugging Information Finders +------------------------------------ + +If the above options for finding debugging symbols don't provide enough +flexibility, you can define totally custom ways of finding debugging symbols by +registering a debugging information finder. See :ref:`here +` for an example. + +Plugins +------- + +drgn now has a basic plugin system. Currently, the main use case is +automatically setting system- or user-specific configuration when drgn starts +up. For example, system administrators may install a plugin that registers a +debugging information finder for their specific system. See :ref:`here +` for an overview and :ref:`here ` for an example. + +Running Code Snippets on the Command Line +----------------------------------------- + +Sometimes, you don't want an interactive drgn session or a full drgn script; +you just want to run a short snippet of code. In this release, Stephen Brennan +added the :option:`-e` option, which takes a string of code to evaluate: + +.. code-block:: console + + $ python3 -m drgn -e 'print(kaslr_offset())' + 251658240 + +(We would have used ``-c`` like the Python CLI, but that is already used to +specify a core dump.) + +Kernel Stack Unwinding Without Debugging Symbols +------------------------------------------------ + +drgn has had support for the Linux kernel's `ORC unwinder +`_ for a long time. +However, although ORC data is typically saved in kernel core dumps, drgn +previously only supported reading ORC data from the kernel debugging symbol +files. + +In this release, Stephen Brennan expanded drgn's ORC support to be able to read +ORC data directly from the core dump. This enables reliable stack unwinding +even through unknown or out-of-tree kernel modules. This is the latest step +towards support for `debugging the Linux kernel without full DWARF debugging +information `_. + +Linux 6.14 and 6.15 Support +--------------------------- + +A change in Linux 6.14 broke how drgn determines module section addresses. This +error on startup is fixed in this release:: + + /lib/modules/6.14.2/kernel/fs/binfmt_misc.ko (could not get section addresses: 'struct module_sect_attrs' has no member 'nsections') + +A change in Linux 6.15 broke the :mod:`~drgn.helpers.linux.kernfs` helpers. +This error is fixed in this release:: + + AttributeError: 'struct kernfs_node' has no member 'parent' + +Another change in Linux 6.15 broke the +:func:`~drgn.helpers.linux.fs.path_lookup()` helper's handling of mount points. +This is fixed in this release. + +Last Release With Python 3.6 & 3.7 Support +------------------------------------------ + +This will be the last release of drgn with support for Python 3.6 and 3.7. Both +versions have been EOL for awhile, and the maintenance burden has become +unsustainable. See `here `_ for the +announcement. Python 3.8 support will probably follow suit soon. From 8209a147fb61deed38ca376d063bbff0343ca234 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 16 Apr 2025 11:40:23 -0700 Subject: [PATCH 161/166] drgn 0.0.31 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- libdrgn/drgn.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 5d53a62a5..295e70aca 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: LGPL-2.1-or-later -AC_INIT([libdrgn], [0.0.30], +AC_INIT([libdrgn], [0.0.31], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index 15f4c43da..56df73a7f 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -44,7 +44,7 @@ /** Minor version of drgn. */ #define DRGN_VERSION_MINOR 0 /** Patch level of drgn. */ -#define DRGN_VERSION_PATCH 30 +#define DRGN_VERSION_PATCH 31 /** * @defgroup ErrorHandling Error handling From 3977bdc75cdd3941effdf7522166600c00505fc1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 19 Apr 2025 02:10:56 -0700 Subject: [PATCH 162/166] tests: fix deserialize_struct64 tests on i386 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct foo64 is only 20 bytes with i386's alignment requirements, so we get the following compiler warning on i386: ‘memcpy’ forming offset [20, 23] is out of the bounds [0, 20] of object ‘foo’ with type ‘struct foo64’ and a crash when running the tests. Fix it by adding padding manually. I verified that this only affects the test cases. All of the structs we actually use deserialize_struct64 for are properly defined. Fixes #493. Signed-off-by: Omar Sandoval --- libdrgn/tests/serialize.c.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdrgn/tests/serialize.c.in b/libdrgn/tests/serialize.c.in index 1940a0371..96143c812 100644 --- a/libdrgn/tests/serialize.c.in +++ b/libdrgn/tests/serialize.c.in @@ -11,6 +11,8 @@ struct foo64 { uint16_t small; uint8_t tiny; uint8_t array[3]; + // Add padding so size is consistent on all architectures. + uint8_t pad[6]; }; struct foo32 { @@ -19,6 +21,8 @@ struct foo32 { uint8_t small; uint8_t tiny; uint8_t array[3]; + // Add padding so size is consistent on all architectures. + uint8_t pad; }; #define visit_foo_members(visit_scalar_member, visit_raw_member) do { \ From 20b0ff71f58e2449610e9e72622c36dc0d84c2ec Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Apr 2025 13:07:41 -0700 Subject: [PATCH 163/166] tests: fix issues with CLI test error handling We need to catch and print exceptions in the forked process. stdout and stderr are also str, so we shouldn't be calling .decode(). Fixes: 22ac3f37e70c ("tests: don't shell out for CLI tests") Signed-off-by: Omar Sandoval --- tests/test_cli.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 5e842ca72..91414f166 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,6 +5,7 @@ import os import sys import tempfile +import traceback import types import drgn.cli @@ -20,22 +21,26 @@ def run_cli(self, args, *, input=None): pid = os.fork() if pid == 0: - os.close(stdout_r) - sys.stdout = open(stdout_w, "w") - os.close(stderr_r) - sys.stderr = open(stderr_w, "w") - - if input is not None: - os.close(stdin_w) - sys.stdin = open(stdin_r, "r") - - sys.argv = ["drgn"] + args - - drgn.cli._main() - - sys.stdout.flush() - sys.stderr.flush() - os._exit(0) + try: + os.close(stdout_r) + sys.stdout = open(stdout_w, "w") + os.close(stderr_r) + sys.stderr = open(stderr_w, "w") + + if input is not None: + os.close(stdin_w) + sys.stdin = open(stdin_r, "r") + + sys.argv = ["drgn"] + args + + drgn.cli._main() + finally: + exception = sys.exc_info()[1] is not None + if exception: + traceback.print_exc() + sys.stdout.flush() + sys.stderr.flush() + os._exit(1 if exception else 0) os.close(stdout_w) os.close(stderr_w) @@ -62,9 +67,9 @@ def run_cli(self, args, *, input=None): f"""\ {msg} STDOUT: -{stdout.decode()} +{stdout} STDERR: -{stderr.decode()} +{stderr} """ ) From 7237106c5e1b70598626e0288d4d3de4e73b105f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Apr 2025 13:14:32 -0700 Subject: [PATCH 164/166] cli: don't enter interactive mode for empty -e argument drgn -e '' should start drgn, execute the empty statement, and exit, not enter interactive mode. Fix it by checking whether args.exec is None instead of bool(args.exec). Fixes: 150ee760dcd3 ("cli: add -e option to exec() code directly") Signed-off-by: Omar Sandoval --- drgn/cli.py | 12 ++++++------ tests/test_cli.py | 7 +++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drgn/cli.py b/drgn/cli.py index 58d3db16a..3b0502cd7 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -530,8 +530,8 @@ def _main() -> None: args = parser.parse_args() - script = bool(not args.exec and args.args) - interactive = bool(not args.exec and not args.args and _is_tty(sys.stdin)) + script = bool(args.exec is None and args.args) + interactive = bool(args.exec is None and not args.args and _is_tty(sys.stdin)) if script: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with @@ -609,13 +609,13 @@ def _main() -> None: else: sys.path.insert(0, "") exec_globals = default_globals(prog) - if args.exec: - sys.argv = ["-e"] + args.args - exec(args.exec, exec_globals) - else: + if args.exec is None: sys.argv = [""] exec_globals["__file__"] = "" exec(compile(sys.stdin.read(), "", "exec"), exec_globals) + else: + sys.argv = ["-e"] + args.args + exec(args.exec, exec_globals) def run_interactive( diff --git a/tests/test_cli.py b/tests/test_cli.py index 91414f166..0660a754c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -90,6 +90,13 @@ def test_e(self): ) self.assertEqual(proc.stdout, "['-e', 'pass']\n") + def test_e_empty(self): + self.run_cli( + ["--quiet", "--pid", "0", "--no-default-symbols", "-e", ""], + # This shouldn't be executed. + input="raise Exception('-e was ignored')", + ) + def test_script(self): with tempfile.NamedTemporaryFile() as f: f.write( From 89260b18b9fa01ffc7d5bfaedeb34aeec0198557 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Apr 2025 13:18:35 -0700 Subject: [PATCH 165/166] vmtest.rootfsbuild: install check We don't have a nice automated way to run the C unit tests from the main vmtest CLI yet (I'm probably going to wait until we convert to Meson to see how that will work), but at least install check so it can be done manually. Signed-off-by: Omar Sandoval --- vmtest/rootfsbuild.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vmtest/rootfsbuild.py b/vmtest/rootfsbuild.py index e7eaefaac..42d05af80 100644 --- a/vmtest/rootfsbuild.py +++ b/vmtest/rootfsbuild.py @@ -37,6 +37,7 @@ "python3-setuptools", # Test dependencies. "btrfs-progs", + "check", "iproute2", "kexec-tools", "kmod", From d61671ed3436f47d3152933bc44382176001d2c0 Mon Sep 17 00:00:00 2001 From: Sebastien Roy Date: Thu, 1 May 2025 09:56:44 -0400 Subject: [PATCH 166/166] add back .pre-commit-config.yaml --- .pre-commit-config.yaml | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..2c4383f56 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +exclude: ^contrib/ +repos: +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) +- repo: https://github.com/psf/black + rev: 24.8.0 + hooks: + - id: black + exclude: ^docs/exts/details\.py$ +- repo: https://github.com/pycqa/flake8 + rev: 7.1.1 + hooks: + - id: flake8 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.11.2 + hooks: + - id: mypy + args: [--show-error-codes, --strict, --no-warn-return-any, --no-warn-unused-ignores] + files: ^(drgn/.*\.py|_drgn.pyi|_drgn_util/.*\.py|tools/.*\.py)$ +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + exclude_types: [diff] + - id: end-of-file-fixer + exclude_types: [diff] + - id: check-yaml + - id: check-added-large-files + - id: debug-statements + - id: check-merge-conflict +- repo: https://github.com/netromdk/vermin + rev: v1.6.0 + hooks: + - id: vermin + # The vmtest package in general should adhere to the same version + # requirements as drgn, with the following exceptions: The manage & + # kbuild scripts are used by Github Actions and need not be broadly + # compatible. + exclude: "^vmtest/(manage|kbuild).py$" + args: ['-t=3.6-', '--violations', '--eval-annotations']