From 31cbc187ce842f0c9bf455267579820bda11eb22 Mon Sep 17 00:00:00 2001 From: Davide Cavalca Date: Tue, 6 Apr 2021 12:43:20 -0700 Subject: [PATCH 01/56] add COPYING to sdist Signed-off-by: Davide Cavalca --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index f4ea67285..ef05b5490 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ recursive-include examples *.py recursive-include tests *.py recursive-include tools *.py recursive-include vmtest *.c *.py *.rst -include util.py vmtest/config +include COPYING util.py vmtest/config From 6332f9846a20c40e7714af72485d917c4f901a0c Mon Sep 17 00:00:00 2001 From: Davide Cavalca Date: Mon, 5 Apr 2021 14:35:39 -0700 Subject: [PATCH 02/56] examples: add missing shebangs Signed-off-by: Davide Cavalca --- examples/linux/cgroup.py | 1 + examples/linux/fs_inodes.py | 1 + examples/linux/lsmod.py | 1 + examples/linux/ps.py | 1 + examples/linux/tcp_sock.py | 1 + 5 files changed, 5 insertions(+) diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index eab17d4f4..e215136a2 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 6bc32de23..856d13f76 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index a60917eb6..1585e2c22 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 3f1b8d230..0eda3be28 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index 1df33cf6d..ad628974c 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later From 08498967f7f7401c184ae561fadc2bbc7f6a3e1a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 21 Apr 2021 13:19:49 -0700 Subject: [PATCH 03/56] libdrgn: configure with large file support /proc/pid/mem is indexed by address. On 32-bit systems, addresses may be out of the range of a 32-bit signed off_t. This results in pread() returning EINVAL in drgn_read_memory_file(). Use AC_SYS_LARGEFILE in configure.ac so that we use 64-bit off_t by default. Closes #98. Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 35fee53cb..ba5cfb3bb 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -15,6 +15,8 @@ AM_PROG_AR LT_INIT +AC_SYS_LARGEFILE + AC_ARG_ENABLE([openmp], [AS_HELP_STRING([--enable-openmp@<:@=ARG@:>@], [use OpenMP. ARG may be yes, no, or the name of From c768e97394f4af75db7b903e4d8282441b8f39fc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Apr 2021 00:46:41 -0700 Subject: [PATCH 04/56] libdrgn: python: use _Thread_local instead of PyThreadState for drgn_in_python Using a Python dictionary for this is much more heavyweight than just using a thread-local variable (with no benefit as far as I can tell). This also gets rid of a call to _PyDict_GetItem(). Signed-off-by: Omar Sandoval --- libdrgn/python/error.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 1fb2b1364..5e674ca23 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -66,47 +66,24 @@ static struct drgn_error drgn_error_python = { .message = "error in Python callback", }; -_Py_IDENTIFIER(drgn_in_python); +static _Thread_local bool drgn_in_python = false; bool set_drgn_in_python(void) { - PyObject *dict, *key, *value; - - dict = PyThreadState_GetDict(); - if (!dict) - return false; - key = _PyUnicode_FromId(&PyId_drgn_in_python); - if (!key) { - PyErr_Clear(); - return false; - } - value = PyDict_GetItemWithError(dict, key); - if (value == Py_True) + if (drgn_in_python) return false; - if ((!value && PyErr_Occurred()) || - PyDict_SetItem(dict, key, Py_True) == -1) { - PyErr_Clear(); - return false; - } + drgn_in_python = true; return true; } void clear_drgn_in_python(void) { - PyObject *exc_type, *exc_value, *exc_traceback; - PyObject *dict; - - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - dict = PyThreadState_GetDict(); - if (dict) - _PyDict_SetItemId(dict, &PyId_drgn_in_python, Py_False); - PyErr_Restore(exc_type, exc_value, exc_traceback); + drgn_in_python = false; } struct drgn_error *drgn_error_from_python(void) { PyObject *exc_type, *exc_value, *exc_traceback, *exc_message; - PyObject *dict; const char *type, *message; struct drgn_error *err; @@ -114,8 +91,7 @@ struct drgn_error *drgn_error_from_python(void) if (!exc_type) return NULL; - dict = PyThreadState_GetDict(); - if (dict && _PyDict_GetItemId(dict, &PyId_drgn_in_python) == Py_True) { + if (drgn_in_python) { PyErr_Restore(exc_type, exc_value, exc_traceback); return &drgn_error_python; } From 0e2703dd4ed7843d272cc6d160e6afbdc29c6ec6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Apr 2021 00:53:56 -0700 Subject: [PATCH 05/56] libdrgn: python: use _PyDict_GetItemIdWithError() CPython commit fb5db7ec5862 ("bpo-42006: Stop using PyDict_GetItem, PyDict_GetItemString and _PyDict_GetItemId. (GH-22648)") (in v3.10) removed _PyDict_GetItemId() because it suppresses errors. Use _PyDict_GetItemIdWithError() instead (which we should've been using anyways). Closes #101. Signed-off-by: Omar Sandoval --- libdrgn/python/type.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 817f36bc9..c0f6afbc0 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -422,13 +422,14 @@ DrgnType_ATTR(template_parameters); static PyObject *DrgnType_getter(DrgnType *self, struct DrgnType_Attr *attr) { - PyObject *value; - - value = _PyDict_GetItemId(self->attr_cache, &attr->id); + PyObject *value = _PyDict_GetItemIdWithError(self->attr_cache, + &attr->id); if (value) { Py_INCREF(value); return value; } + if (PyErr_Occurred()) + return NULL; value = attr->getter(self); if (!value) From 6b79b21ab5682a1270f07e0d84b4952cec7b314a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Apr 2021 01:10:33 -0700 Subject: [PATCH 06/56] tests: fix test depending on repr(enum.Flag) format CPython commit b775106d940e ("bpo-40066: Enum: modify `repr()` and `str()` (GH-22392)") changed repr(enum.Flag) from, e.g., to Qualifiers.CONST|Qualifiers.VOLATILE. Fix tests.test_type.TestType.test_qualifiers to not assume the format. Signed-off-by: Omar Sandoval --- tests/test_type.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_type.py b/tests/test_type.py index 2ebfa1ad6..7ac393e8c 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -945,7 +945,8 @@ def test_qualifiers(self): t = self.prog.void_type(qualifiers=Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual(t.qualifiers, Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual( - repr(t), "prog.void_type(qualifiers=)" + repr(t), + f"prog.void_type(qualifiers={repr(Qualifiers.CONST | Qualifiers.VOLATILE)})", ) self.assertIdentical( From 155ec92ef2b2bd71b1ed7cce74441ba86062fe74 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Apr 2021 09:45:41 -0700 Subject: [PATCH 07/56] libdrgn: fix reading 32-bit float object values on big-endian Closes #99. Signed-off-by: Omar Sandoval --- libdrgn/object.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libdrgn/object.c b/libdrgn/object.c index 011f64f0d..60deb2f94 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -233,7 +233,12 @@ static void drgn_value_deserialize(union drgn_value *value, const void *buf, int64_t svalue; uint64_t uvalue; double fvalue64; - float fvalue32; + struct { +#if !HOST_LITTLE_ENDIAN + float pad; +#endif + float fvalue32; + }; } tmp; tmp.uvalue = deserialize_bits(buf, bit_offset, bit_size, little_endian); From 33300d426e0ec9ed7bb8a993cc9eb91d07d4c899 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Apr 2021 16:31:31 -0700 Subject: [PATCH 08/56] libdrgn: debug_info: don't overwrite Dwarf_Die passed to drgn_type_from_dwarf_internal() If the DIE passed to drgn_type_from_dwarf_internal() is a declaration, then we overwrite it with dwarf_offdie(). As far as I can tell, this doesn't break anything at the moment, but it's sketchy to overwrite an input parameter and may cause issues in the future. Use a temporary DIE on the stack in this case instead. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 051b44c09..bd7f9065a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2986,6 +2986,7 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, } /* If we got a declaration, try to find the definition. */ + Dwarf_Die definition_die; bool declaration; if (dwarf_flag(die, DW_AT_declaration, &declaration)) return drgn_error_libdw(); @@ -3001,8 +3002,10 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, return drgn_error_libdwfl(); uintptr_t start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - if (!dwarf_offdie(dwarf, die_addr - start, die)) + if (!dwarf_offdie(dwarf, die_addr - start, + &definition_die)) return drgn_error_libdw(); + die = &definition_die; } } From 9dabec12641f1033b8dc6f2049205a982e0043a1 Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Fri, 6 Nov 2020 16:09:27 -0800 Subject: [PATCH 09/56] libdrgn: add support for parsing type units Adds support for parsing of type units as enabled by -fdebug-types-section. If a module has both a debug info section and type unit section, both are read. Signed-off-by: Jay Kamat --- libdrgn/debug_info.c | 17 ++++++++++++++--- libdrgn/debug_info.h | 1 + libdrgn/dwarf_index.c | 39 +++++++++++++++++++++++++++++++++------ 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index bd7f9065a..58c3aee60 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -97,6 +97,7 @@ static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_INFO] = ".debug_info", + [DRGN_SCN_DEBUG_TYPES] = ".debug_types", [DRGN_SCN_DEBUG_ABBREV] = ".debug_abbrev", [DRGN_SCN_DEBUG_STR] = ".debug_str", [DRGN_SCN_DEBUG_LINE] = ".debug_line", @@ -3002,9 +3003,19 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, return drgn_error_libdwfl(); uintptr_t start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - if (!dwarf_offdie(dwarf, die_addr - start, - &definition_die)) - return drgn_error_libdw(); + size_t size = + module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die_addr >= start && die_addr < start + size) { + if (!dwarf_offdie(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + /* Assume .debug_types */ + if (!dwarf_offdie_types(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } die = &definition_die; } } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index ef56c9739..15800b95c 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -52,6 +52,7 @@ enum drgn_debug_info_module_state { enum drgn_debug_info_scn { /* Sections whose data we should cache when loading the module. */ DRGN_SCN_DEBUG_INFO, + DRGN_SCN_DEBUG_TYPES, DRGN_SCN_DEBUG_ABBREV, DRGN_SCN_DEBUG_STR, DRGN_SCN_DEBUG_LINE, diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 8575fe9e1..5b80fefdf 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -82,6 +82,7 @@ struct drgn_dwarf_index_cu { uint8_t version; uint8_t address_size; bool is_64_bit; + bool is_type_unit; /* * This is indexed on the DWARF abbreviation code minus one. It maps the * abbreviation code to an index in abbrev_insns where the instruction @@ -585,6 +586,12 @@ static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) &buffer->cu->address_size))) return err; + /* Skip type_signature and type_offset for type units. */ + if (buffer->cu->is_type_unit && + (err = binary_buffer_skip(&buffer->bb, + buffer->cu->is_64_bit ? 16 : 12))) + return err; + return read_abbrev_table(buffer->cu, debug_abbrev_offset); } @@ -776,7 +783,8 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, { struct drgn_error *err; struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_info = cu->module->scn_data[DRGN_SCN_DEBUG_INFO]; + Elf_Data *debug_info = cu->module->scn_data[ + cu->is_type_unit ? DRGN_SCN_DEBUG_TYPES : DRGN_SCN_DEBUG_INFO]; const char *debug_info_buffer = debug_info->d_buf; unsigned int depth = 0; for (;;) { @@ -997,12 +1005,13 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, return NULL; } -void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module) +static void drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) { struct drgn_error *err; struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_INFO); + drgn_debug_info_buffer_init(&buffer, module, scn); while (binary_buffer_has_next(&buffer.bb)) { const char *cu_buf = buffer.bb.pos; uint32_t unit_length32; @@ -1036,6 +1045,7 @@ void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, .buf = cu_buf, .len = cu_len, .is_64_bit = is_64_bit, + .is_type_unit = scn == DRGN_SCN_DEBUG_TYPES, }; struct drgn_dwarf_index_cu_buffer cu_buffer; drgn_dwarf_index_cu_buffer_init(&cu_buffer, &cu); @@ -1064,6 +1074,14 @@ void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, drgn_dwarf_index_update_cancel(state, err); } +void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module) +{ + drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); + if (module->scn_data[DRGN_SCN_DEBUG_TYPES]) + drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_TYPES); +} + bool drgn_dwarf_index_find_definition(struct drgn_dwarf_index *dindex, uintptr_t die_addr, @@ -1532,6 +1550,8 @@ drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) struct drgn_dwarf_index_cu_buffer buffer; drgn_dwarf_index_cu_buffer_init(&buffer, cu); buffer.bb.pos += cu->is_64_bit ? 23 : 11; + if (cu->is_type_unit) + buffer.bb.pos += cu->is_64_bit ? 16 : 12; struct drgn_error *cu_err = index_cu_second_pass(&dindex->global, &buffer); if (cu_err) @@ -1692,7 +1712,14 @@ struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, return drgn_error_libdwfl(); uintptr_t start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); + size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die->addr >= start && die->addr < start + size) { + if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } return NULL; } From 6be21f674af3319a135f946c3bd46fd954631401 Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Wed, 11 Nov 2020 16:19:40 -0800 Subject: [PATCH 10/56] libdrgn: follow DW_AT_signature when parsing DWARF types When using type units, skeleton declarations are made instead of concrete ones. However, these declarations have signature tags attached that point to the type unit with the definition, so we can simply follow the signature to get the concrete type. Signed-off-by: Jay Kamat --- libdrgn/debug_info.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 58c3aee60..23266dab8 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2986,8 +2986,19 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, "maximum DWARF type parsing depth exceeded"); } - /* If we got a declaration, try to find the definition. */ + /* If the DIE has a type unit signature, follow it. */ Dwarf_Die definition_die; + { + Dwarf_Attribute attr_mem, *attr; + if ((attr = dwarf_attr_integrate(die, DW_AT_signature, + &attr_mem))) { + if (!dwarf_formref_die(attr, &definition_die)) + return drgn_error_libdw(); + die = &definition_die; + } + } + + /* If we got a declaration, try to find the definition. */ bool declaration; if (dwarf_flag(die, DW_AT_declaration, &declaration)) return drgn_error_libdw(); From c108f9a24cf166afd80a9ad8bf9038c5611baf26 Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Wed, 17 Feb 2021 16:27:04 -0800 Subject: [PATCH 11/56] tests: add basic tests for type units Signed-off-by: Jay Kamat --- tests/dwarfwriter.py | 141 +++++++++++++++++++++++++++---------------- tests/test_dwarf.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 52 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index c0bacca53..2ae056d4e 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -35,7 +35,7 @@ def _append_sleb128(buf, value): buf.append(byte | 0x80) -def _compile_debug_abbrev(cu_die): +def _compile_debug_abbrev(unit_dies): buf = bytearray() code = 1 @@ -54,26 +54,20 @@ def aux(die): for child in die.children: aux(child) - aux(cu_die) + for die in unit_dies: + aux(die) buf.append(0) return buf -def _compile_debug_info(cu_die, little_endian, bits): - buf = bytearray() +def _compile_debug_info(unit_dies, little_endian, bits): byteorder = "little" if little_endian else "big" - - buf.extend(b"\0\0\0\0") # unit_length - buf.extend((4).to_bytes(2, byteorder)) # version - buf.extend((0).to_bytes(4, byteorder)) # debug_abbrev_offset - buf.append(bits // 8) # address_size - die_offsets = [] relocations = [] code = 1 decl_file = 1 - def aux(die, depth): + def aux(buf, die, depth): nonlocal code, decl_file if depth == 1: die_offsets.append(len(buf)) @@ -108,6 +102,8 @@ def aux(die, depth): elif attrib.form == DW_FORM.ref4: relocations.append((len(buf), value)) buf.extend(b"\0\0\0\0") + elif attrib.form == DW_FORM.ref_sig8: + buf.extend((value + 1).to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: buf.extend(b"\0\0\0\0") elif attrib.form == DW_FORM.flag_present: @@ -119,20 +115,41 @@ def aux(die, depth): assert False, attrib.form if die.children: for child in die.children: - aux(child, depth + 1) + aux(buf, child, depth + 1) buf.append(0) - aux(cu_die, 0) + debug_info = bytearray() + debug_types = bytearray() + tu_id = 1 + for die in unit_dies: + relocations.clear() + die_offsets.clear() + buf = debug_info if die.tag == DW_TAG.compile_unit else debug_types + orig_len = len(buf) + buf.extend(b"\0\0\0\0") # unit_length + buf.extend((4).to_bytes(2, byteorder)) # version + buf.extend((0).to_bytes(4, byteorder)) # debug_abbrev_offset + buf.append(bits // 8) # address_size - unit_length = len(buf) - 4 - buf[:4] = unit_length.to_bytes(4, byteorder) + if die.tag == DW_TAG.type_unit: + buf.extend(tu_id.to_bytes(8, byteorder)) # type_signature + tu_id += 1 + # For now, we assume that the first child is the type. + relocations.append((len(buf), 0)) + buf.extend(b"\0\0\0\0") # type_offset - for offset, index in relocations: - buf[offset : offset + 4] = die_offsets[index].to_bytes(4, byteorder) - return buf + aux(buf, die, 0) + + unit_length = len(buf) - orig_len - 4 + buf[orig_len : orig_len + 4] = unit_length.to_bytes(4, byteorder) + for offset, index in relocations: + die_offset = die_offsets[index] - orig_len + buf[offset : offset + 4] = die_offset.to_bytes(4, byteorder) + return debug_info, debug_types -def _compile_debug_line(cu_die, little_endian): + +def _compile_debug_line(unit_dies, little_endian): buf = bytearray() byteorder = "little" if little_endian else "big" @@ -159,7 +176,8 @@ def compile_include_directories(die): for child in die.children: compile_include_directories(child) - compile_include_directories(cu_die) + for die in unit_dies: + compile_include_directories(die) buf.append(0) decl_file = 1 @@ -185,7 +203,8 @@ def compile_file_names(die): for child in die.children: compile_file_names(child) - compile_file_names(cu_die) + for die in unit_dies: + compile_file_names(die) buf.append(0) unit_length = len(buf) - 4 @@ -195,39 +214,57 @@ def compile_file_names(die): return buf +UNIT_HEADER_TYPES = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) + + def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): if isinstance(dies, DwarfDie): dies = (dies,) assert all(isinstance(die, DwarfDie) for die in dies) - cu_attribs = [ - DwarfAttrib(DW_AT.comp_dir, DW_FORM.string, "/usr/src"), - DwarfAttrib(DW_AT.stmt_list, DW_FORM.sec_offset, 0), - ] + + if dies and dies[0].tag in UNIT_HEADER_TYPES: + unit_dies = dies + else: + unit_dies = (DwarfDie(DW_TAG.compile_unit, (), dies),) + assert all(die.tag in UNIT_HEADER_TYPES for die in unit_dies) + + unit_attribs = [DwarfAttrib(DW_AT.stmt_list, DW_FORM.sec_offset, 0)] if lang is not None: - cu_attribs.append(DwarfAttrib(DW_AT.language, DW_FORM.data1, lang)) - cu_die = DwarfDie(DW_TAG.compile_unit, cu_attribs, dies) - - return create_elf_file( - ET.EXEC, - [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), - ElfSection( - name=".debug_abbrev", - sh_type=SHT.PROGBITS, - data=_compile_debug_abbrev(cu_die), - ), - ElfSection( - name=".debug_info", - sh_type=SHT.PROGBITS, - data=_compile_debug_info(cu_die, little_endian, bits), - ), - ElfSection( - name=".debug_line", - sh_type=SHT.PROGBITS, - data=_compile_debug_line(cu_die, little_endian), - ), - ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), - ], - little_endian=little_endian, - bits=bits, - ) + unit_attribs.append(DwarfAttrib(DW_AT.language, DW_FORM.data1, lang)) + cu_attribs = unit_attribs + [ + DwarfAttrib(DW_AT.comp_dir, DW_FORM.string, "/usr/src") + ] + + unit_dies = [ + DwarfDie( + die.tag, + list(die.attribs) + + (cu_attribs if die.tag == DW_TAG.compile_unit else unit_attribs), + die.children, + ) + for die in unit_dies + ] + + debug_info, debug_types = _compile_debug_info(unit_dies, little_endian, bits) + + sections = [ + ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), + ElfSection( + name=".debug_abbrev", + sh_type=SHT.PROGBITS, + data=_compile_debug_abbrev(unit_dies), + ), + ElfSection(name=".debug_info", sh_type=SHT.PROGBITS, data=debug_info), + ElfSection( + name=".debug_line", + sh_type=SHT.PROGBITS, + data=_compile_debug_line(unit_dies, little_endian), + ), + ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), + ] + if debug_types: + sections.append( + ElfSection(name=".debug_types", sh_type=SHT.PROGBITS, data=debug_types) + ) + + return create_elf_file(ET.EXEC, sections, little_endian=little_endian, bits=bits) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 8a63ab575..f0de6df5c 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -3469,6 +3469,104 @@ def test_language(self): prog.int_type("int", 4, True, language=DEFAULT_LANGUAGE), ) + def test_base_type_unit(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.base_type, + (DwarfAttrib(DW_AT.signature, DW_FORM.ref_sig8, 0),), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ), + ), + DwarfDie(DW_TAG.type_unit, (), ((int_die,))), + ) + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical(prog.type("int"), prog.type("TEST").type) + + def test_struct_type_unit(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.signature, DW_FORM.ref_sig8, 0),), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ), + ), + DwarfDie( + DW_TAG.type_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ), + ), + ) + ) + + self.assertIdentical( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) + self.assertIdentical(prog.type("struct point"), prog.type("TEST").type) + class TestObjects(TestCase): def test_constant_signed_enum(self): From 2d40d6e146f49784f6b5ae1d3e0e2e1190074260 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 23 Apr 2021 09:18:16 -0700 Subject: [PATCH 12/56] libdrgn: add configure~ to .gitignore Signed-off-by: Omar Sandoval --- libdrgn/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/libdrgn/.gitignore b/libdrgn/.gitignore index cf9d307a9..88373b6a1 100644 --- a/libdrgn/.gitignore +++ b/libdrgn/.gitignore @@ -11,6 +11,7 @@ /config.log /config.status /configure +/configure~ /html /libtool /python/constants.c From 2ad52cb5f40368c691271612e0621338f4679b4e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 23 Apr 2021 09:28:26 -0700 Subject: [PATCH 13/56] libdrgn: add option to time load_debug_info example program I often use examples/load_debug_info to benchmark loading/DWARF indexing, so add a -T option that prints the time it takes to load debug info. Signed-off-by: Omar Sandoval --- libdrgn/examples/load_debug_info.c | 34 +++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/libdrgn/examples/load_debug_info.c b/libdrgn/examples/load_debug_info.c index 247c1ebae..3868af1f0 100644 --- a/libdrgn/examples/load_debug_info.c +++ b/libdrgn/examples/load_debug_info.c @@ -1,10 +1,27 @@ +#include #include #include #include +#include #include #include "drgn.h" +static inline struct timespec timespec_sub(struct timespec a, struct timespec b) +{ + if (a.tv_nsec < b.tv_nsec) { + return (struct timespec){ + .tv_sec = a.tv_sec - 1 - b.tv_sec, + .tv_nsec = a.tv_nsec + 1000000000L - b.tv_nsec, + }; + } else { + return (struct timespec){ + .tv_sec = a.tv_sec - b.tv_sec, + .tv_nsec = a.tv_nsec - b.tv_nsec, + }; + } +} + static void usage(bool error) { fprintf(error ? stderr : stdout, @@ -16,6 +33,7 @@ static void usage(bool error) " -k, --kernel debug the running kernel (default)\n" " -c PATH, --core PATH debug the given core dump\n" " -p PID, --pid PID debug the running process with the given PID\n" + " -T, --time print how long loading debug info took in seconds\n" " -h, --help display this help message and exit\n"); exit(error ? EXIT_FAILURE : EXIT_SUCCESS); } @@ -26,14 +44,16 @@ int main(int argc, char **argv) {"kernel", no_argument, NULL, 'k'}, {"core", required_argument, NULL, 'c'}, {"pid", required_argument, NULL, 'p'}, + {"time", no_argument, NULL, 'T'}, {"help", no_argument, NULL, 'h'}, {}, }; bool kernel = false; const char *core = NULL; const char *pid = NULL; + bool print_time = false; for (;;) { - int c = getopt_long(argc, argv, "kc:p:h", long_options, NULL); + int c = getopt_long(argc, argv, "kc:p:Th", long_options, NULL); if (c == -1) break; switch (c) { @@ -46,6 +66,9 @@ int main(int argc, char **argv) case 'p': pid = optarg; break; + case 'T': + print_time = true; + break; case 'h': usage(false); default: @@ -71,7 +94,16 @@ int main(int argc, char **argv) if (err) goto out; + struct timespec start, end; + if (print_time && clock_gettime(CLOCK_MONOTONIC, &start)) + abort(); err = drgn_program_load_debug_info(prog, NULL, 0, true, true); + if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO) && print_time) { + if (clock_gettime(CLOCK_MONOTONIC, &end)) + abort(); + struct timespec diff = timespec_sub(end, start); + printf("%lld.%09ld\n", (long long)diff.tv_sec, diff.tv_nsec); + } out:; int status; From 037a510ff2c87fd5c38ded727d048a7dd5e57347 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 30 Apr 2021 17:03:16 -0700 Subject: [PATCH 14/56] Fix drgn.FaultError type annotations FaultError() also takes an error message. Fixes: 80c9fb35ff96 ("Add type hint stubs and generate documentation from them") Signed-off-by: Omar Sandoval --- _drgn.pyi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/_drgn.pyi b/_drgn.pyi index 0fcc55cb1..f879ecceb 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1952,11 +1952,14 @@ class FaultError(Exception): accessing a memory address which is not valid in a program). """ - def __init__(self, address: int) -> None: + def __init__(self, message: str, address: int) -> None: """ + :param message: :attr:`FaultError.message` :param address: :attr:`FaultError.address` """ ... + message: str + """Error message.""" address: int """Address that couldn't be accessed.""" From 85c367bf79c3678d8ba15da153e9e93bdbfd27ea Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 30 Apr 2021 17:06:47 -0700 Subject: [PATCH 15/56] Reformat empty docstrings Black 21.4b2 now replaces empty docstrings with a docstring containing a single space. Apply that formatting. Signed-off-by: Omar Sandoval --- _drgn.pyi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index f879ecceb..56d947783 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -258,16 +258,16 @@ class Program: """ ... def read_u8(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u16(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u32(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u64(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ From 609a1cafc648437103276d172769f2e7c37475fc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 23 Apr 2021 11:06:59 -0700 Subject: [PATCH 16/56] libdrgn: dwarf_index: check for attribute forms more strictly Rather than silently ignoring attributes whose form we don't recognize, return an error. This way, we won't mysteriously skip indexing DIEs. While we're doing this, split the form -> instruction mapping to its own functions. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 473 ++++++++++++++++++++++++------------------ tests/test_dwarf.py | 39 ---- 2 files changed, 269 insertions(+), 243 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 5b80fefdf..9084b151c 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -231,6 +231,244 @@ void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, state->err = err; } +static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_addr: + *insn_ret = cu->address_size; + return NULL; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + *insn_ret = 1; + return NULL; + case DW_FORM_data2: + case DW_FORM_ref2: + *insn_ret = 2; + return NULL; + case DW_FORM_data4: + case DW_FORM_ref4: + *insn_ret = 4; + return NULL; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + *insn_ret = 8; + return NULL; + case DW_FORM_block1: + *insn_ret = ATTRIB_BLOCK1; + return NULL; + case DW_FORM_block2: + *insn_ret = ATTRIB_BLOCK2; + return NULL; + case DW_FORM_block4: + *insn_ret = ATTRIB_BLOCK4; + return NULL; + case DW_FORM_exprloc: + *insn_ret = ATTRIB_EXPRLOC; + return NULL; + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_LEB128; + return NULL; + case DW_FORM_ref_addr: + case DW_FORM_sec_offset: + case DW_FORM_strp: + *insn_ret = cu->is_64_bit ? 8 : 4; + return NULL; + case DW_FORM_string: + *insn_ret = ATTRIB_STRING; + return NULL; + case DW_FORM_flag_present: + *insn_ret = 0; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64, + form); + } +} + +static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = ATTRIB_SIBLING_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = ATTRIB_SIBLING_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = ATTRIB_SIBLING_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = ATTRIB_SIBLING_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_SIBLING_REF_UDATA; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_sibling", + form); + } +} + +static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = ATTRIB_NAME_STRP8; + else + *insn_ret = ATTRIB_NAME_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = ATTRIB_NAME_STRING; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_name", + form); + } +} + +static struct drgn_error * +dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data4: + *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_data8: + *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; + return NULL; + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; + else + *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_stmt_list", + form); + } +} + +static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data1: + *insn_ret = ATTRIB_DECL_FILE_DATA1; + return NULL; + case DW_FORM_data2: + *insn_ret = ATTRIB_DECL_FILE_DATA2; + return NULL; + case DW_FORM_data4: + *insn_ret = ATTRIB_DECL_FILE_DATA4; + return NULL; + case DW_FORM_data8: + *insn_ret = ATTRIB_DECL_FILE_DATA8; + return NULL; + /* + * decl_file must be positive, so if the compiler uses + * DW_FORM_sdata for some reason, just treat it as udata. + */ + case DW_FORM_sdata: + case DW_FORM_udata: + *insn_ret = ATTRIB_DECL_FILE_UDATA; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_decl_file", + form); + } +} + +static struct drgn_error * +dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret, uint8_t *die_flags) +{ + switch (form) { + case DW_FORM_flag: + *insn_ret = ATTRIB_DECLARATION_FLAG; + return NULL; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as we have a free + * DIE flag bit, we might as well use it. + */ + *insn_ret = 0; + *die_flags |= DIE_FLAG_DECLARATION; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_declaration", + form); + } +} + +static struct drgn_error * +dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = ATTRIB_SPECIFICATION_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = ATTRIB_SPECIFICATION_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = ATTRIB_SPECIFICATION_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = ATTRIB_SPECIFICATION_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_SPECIFICATION_REF_UDATA; + return NULL; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; + else + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; + else + return binary_buffer_error(bb, + "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", + cu->address_size); + } + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_specification", + form); + } +} + static struct drgn_error * read_abbrev_decl(struct drgn_debug_info_buffer *buffer, struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, @@ -303,221 +541,48 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, break; if (name == DW_AT_sibling) { - switch (form) { - case DW_FORM_ref1: - insn = ATTRIB_SIBLING_REF1; - goto append_insn; - case DW_FORM_ref2: - insn = ATTRIB_SIBLING_REF2; - goto append_insn; - case DW_FORM_ref4: - insn = ATTRIB_SIBLING_REF4; - goto append_insn; - case DW_FORM_ref8: - insn = ATTRIB_SIBLING_REF8; - goto append_insn; - case DW_FORM_ref_udata: - insn = ATTRIB_SIBLING_REF_UDATA; - goto append_insn; - default: - break; - } + err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); } else if (name == DW_AT_name && should_index) { - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(&buffer->bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - insn = ATTRIB_NAME_STRP8; - else - insn = ATTRIB_NAME_STRP4; - goto append_insn; - case DW_FORM_string: - insn = ATTRIB_NAME_STRING; - goto append_insn; - default: - break; - } - } else if (name == DW_AT_stmt_list && - cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { - switch (form) { - case DW_FORM_data4: - insn = ATTRIB_STMT_LIST_LINEPTR4; - goto append_insn; - case DW_FORM_data8: - insn = ATTRIB_STMT_LIST_LINEPTR8; - goto append_insn; - case DW_FORM_sec_offset: - if (cu->is_64_bit) - insn = ATTRIB_STMT_LIST_LINEPTR8; - else - insn = ATTRIB_STMT_LIST_LINEPTR4; - goto append_insn; - default: - break; + err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); + } else if (name == DW_AT_stmt_list) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_stmt_list without .debug_line section"); } + err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, + &insn); } else if (name == DW_AT_decl_file && should_index && /* Namespaces are merged, so we ignore their file. */ tag != DW_TAG_namespace) { - switch (form) { - case DW_FORM_data1: - insn = ATTRIB_DECL_FILE_DATA1; - goto append_insn; - case DW_FORM_data2: - insn = ATTRIB_DECL_FILE_DATA2; - goto append_insn; - case DW_FORM_data4: - insn = ATTRIB_DECL_FILE_DATA4; - goto append_insn; - case DW_FORM_data8: - insn = ATTRIB_DECL_FILE_DATA8; - goto append_insn; - /* - * decl_file must be positive, so if the compiler uses - * DW_FORM_sdata for some reason, just treat it as - * udata. - */ - case DW_FORM_sdata: - case DW_FORM_udata: - insn = ATTRIB_DECL_FILE_UDATA; - goto append_insn; - default: - break; - } + err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn); } else if (name == DW_AT_declaration && should_index) { - switch (form) { - case DW_FORM_flag: - insn = ATTRIB_DECLARATION_FLAG; - goto append_insn; - case DW_FORM_flag_present: - /* - * This could be an instruction, but as long as - * we have a free DIE flag bit, we might as well - * use it. - */ - die_flags |= DIE_FLAG_DECLARATION; - break; - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_AT_declaration", - form); - } + err = dw_at_declaration_to_insn(&buffer->bb, form, + &insn, &die_flags); } else if (name == DW_AT_specification && should_index) { - switch (form) { - case DW_FORM_ref1: - insn = ATTRIB_SPECIFICATION_REF1; - goto append_insn; - case DW_FORM_ref2: - insn = ATTRIB_SPECIFICATION_REF2; - goto append_insn; - case DW_FORM_ref4: - insn = ATTRIB_SPECIFICATION_REF4; - goto append_insn; - case DW_FORM_ref8: - insn = ATTRIB_SPECIFICATION_REF8; - goto append_insn; - case DW_FORM_ref_udata: - insn = ATTRIB_SPECIFICATION_REF_UDATA; - goto append_insn; - case DW_FORM_ref_addr: - if (cu->version >= 3) { - if (cu->is_64_bit) - insn = ATTRIB_SPECIFICATION_REF_ADDR8; - else - insn = ATTRIB_SPECIFICATION_REF_ADDR4; - } else { - if (cu->address_size == 8) - insn = ATTRIB_SPECIFICATION_REF_ADDR8; - else if (cu->address_size == 4) - insn = ATTRIB_SPECIFICATION_REF_ADDR4; - else - return binary_buffer_error(&buffer->bb, - "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", - cu->address_size); - } - goto append_insn; - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_AT_specification", - form); - } - } - - switch (form) { - case DW_FORM_addr: - insn = cu->address_size; - break; - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_flag: - insn = 1; - break; - case DW_FORM_data2: - case DW_FORM_ref2: - insn = 2; - break; - case DW_FORM_data4: - case DW_FORM_ref4: - insn = 4; - break; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - insn = 8; - break; - case DW_FORM_block1: - insn = ATTRIB_BLOCK1; - goto append_insn; - case DW_FORM_block2: - insn = ATTRIB_BLOCK2; - goto append_insn; - case DW_FORM_block4: - insn = ATTRIB_BLOCK4; - goto append_insn; - case DW_FORM_exprloc: - insn = ATTRIB_EXPRLOC; - goto append_insn; - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - insn = ATTRIB_LEB128; - goto append_insn; - case DW_FORM_ref_addr: - case DW_FORM_sec_offset: - case DW_FORM_strp: - insn = cu->is_64_bit ? 8 : 4; - break; - case DW_FORM_string: - insn = ATTRIB_STRING; - goto append_insn; - case DW_FORM_flag_present: - continue; - case DW_FORM_indirect: - return binary_buffer_error(&buffer->bb, - "DW_FORM_indirect is not implemented"); - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64, - form); + err = dw_at_specification_to_insn(cu, &buffer->bb, form, + &insn); + } else { + err = dw_form_to_insn(cu, &buffer->bb, form, &insn); } + if (err) + return err; - if (!first) { - uint8_t last_insn = insns->data[insns->size - 1]; - if (last_insn + insn <= INSN_MAX_SKIP) { - insns->data[insns->size - 1] += insn; - continue; - } else if (last_insn < INSN_MAX_SKIP) { - insn = last_insn + insn - INSN_MAX_SKIP; - insns->data[insns->size - 1] = INSN_MAX_SKIP; + if (insn != 0) { + if (!first && insn <= INSN_MAX_SKIP) { + uint8_t last_insn = insns->data[insns->size - 1]; + if (last_insn + insn <= INSN_MAX_SKIP) { + insns->data[insns->size - 1] += insn; + continue; + } else if (last_insn < INSN_MAX_SKIP) { + insn = last_insn + insn - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; + } } - } -append_insn: - first = false; - if (!uint8_vector_append(insns, &insn)) - return &drgn_enomem; + if (!uint8_vector_append(insns, &insn)) + return &drgn_enomem; + first = false; + } } insn = 0; if (!uint8_vector_append(insns, &insn) || diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index f0de6df5c..4e02855bb 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -711,22 +711,6 @@ def test_struct_missing_size(self): "TEST", ) - def test_struct_invalid_name(self): - prog = dwarf_program( - wrap_test_type_dies( - DwarfDie( - DW_TAG.structure_type, - ( - DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), - ), - ) - ) - ) - self.assertRaisesRegex( - Exception, "DW_TAG_structure_type has invalid DW_AT_name", prog.type, "TEST" - ) - def test_incomplete_to_complete(self): prog = dwarf_program( wrap_test_type_dies( @@ -1919,29 +1903,6 @@ def test_enum_missing_compatible_type_and_byte_size(self): "TEST", ) - def test_enum_invalid_name(self): - prog = dwarf_program( - wrap_test_type_dies( - ( - DwarfDie( - DW_TAG.enumeration_type, - ( - DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ), - ), - unsigned_int_die, - ) - ) - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_enumeration_type has invalid DW_AT_name", - prog.type, - "TEST", - ) - def test_enum_enumerator_missing_name(self): prog = dwarf_program( wrap_test_type_dies( From 95646b47c955db4a32d80985b86888f2eba90678 Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Thu, 28 Jan 2021 16:37:57 -0800 Subject: [PATCH 17/56] libdrgn: dwarf_index: add support for DW_FORM_indirect First, add instructions for DW_FORM_indirect. Then, we can call the function to convert a form to an instruction whenever we see an indirect instruction. Note that without elfutils commit d63b26b8d21f ("libdw: handle DW_FORM_indirect when reading attributes") (queued for elfutils 0.184), DW_FORM_indirect will cause errors later when parsing with libdw. Signed-off-by: Jay Kamat --- libdrgn/dwarf_index.c | 99 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 9084b151c..3d52630df 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -30,7 +30,7 @@ * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 226, + INSN_MAX_SKIP = 219, ATTRIB_BLOCK1, ATTRIB_BLOCK2, ATTRIB_BLOCK4, @@ -60,7 +60,14 @@ enum { ATTRIB_SPECIFICATION_REF_UDATA, ATTRIB_SPECIFICATION_REF_ADDR4, ATTRIB_SPECIFICATION_REF_ADDR8, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_ADDR8, + ATTRIB_INDIRECT, + ATTRIB_SIBLING_INDIRECT, + ATTRIB_NAME_INDIRECT, + ATTRIB_STMT_LIST_INDIRECT, + ATTRIB_DECL_FILE_INDIRECT, + ATTRIB_DECLARATION_INDIRECT, + ATTRIB_SPECIFICATION_INDIRECT, + ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_INDIRECT, }; enum { @@ -285,6 +292,9 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_flag_present: *insn_ret = 0; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64, @@ -312,6 +322,9 @@ static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, case DW_FORM_ref_udata: *insn_ret = ATTRIB_SIBLING_REF_UDATA; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_SIBLING_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_sibling", @@ -337,6 +350,9 @@ static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_string: *insn_ret = ATTRIB_NAME_STRING; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_NAME_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_name", @@ -362,6 +378,9 @@ dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, else *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_STMT_LIST_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_stmt_list", @@ -394,6 +413,9 @@ static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, case DW_FORM_udata: *insn_ret = ATTRIB_DECL_FILE_UDATA; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_DECL_FILE_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_decl_file", @@ -417,6 +439,9 @@ dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, *insn_ret = 0; *die_flags |= DIE_FLAG_DECLARATION; return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_DECLARATION_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_declaration", @@ -462,6 +487,9 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, cu->address_size); } return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_SPECIFICATION_INDIRECT; + return NULL; default: return binary_buffer_error(bb, "unknown attribute form %" PRIu64 " for DW_AT_specification", @@ -838,6 +866,35 @@ index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, return ret == -1 ? &drgn_enomem : NULL; } +static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint8_t insn, uint8_t *insn_ret, + uint8_t *die_flags) +{ + struct drgn_error *err; + uint64_t form; + if ((err = binary_buffer_next_uleb128(bb, &form))) + return err; + switch (insn) { + case ATTRIB_INDIRECT: + return dw_form_to_insn(cu, bb, form, insn_ret); + case ATTRIB_SIBLING_INDIRECT: + return dw_at_sibling_to_insn(bb, form, insn_ret); + case ATTRIB_NAME_INDIRECT: + return dw_at_name_to_insn(cu, bb, form, insn_ret); + case ATTRIB_STMT_LIST_INDIRECT: + return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); + case ATTRIB_DECL_FILE_INDIRECT: + return dw_at_decl_file_to_insn(bb, form, insn_ret); + case ATTRIB_DECLARATION_INDIRECT: + return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); + case ATTRIB_SPECIFICATION_INDIRECT: + return dw_at_specification_to_insn(cu, bb, form, insn_ret); + default: + UNREACHABLE(); + } +} + /* * First pass: read the file name tables and index DIEs with * DW_AT_specification. This recurses into namespaces. @@ -876,7 +933,9 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, uint64_t stmt_list; const char *sibling = NULL; uint8_t insn; + uint8_t extra_die_flags = 0; while ((insn = *insnp++)) { +indirect_insn:; uint64_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: @@ -1019,6 +1078,21 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, specification_ref_addr: specification = (uintptr_t)debug_info_buffer + tmp; break; + case ATTRIB_INDIRECT: + case ATTRIB_SIBLING_INDIRECT: + case ATTRIB_NAME_INDIRECT: + case ATTRIB_STMT_LIST_INDIRECT: + case ATTRIB_DECL_FILE_INDIRECT: + case ATTRIB_DECLARATION_INDIRECT: + case ATTRIB_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; default: skip = insn; skip: @@ -1028,7 +1102,7 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, break; } } - insn = *insnp; + insn = *insnp | extra_die_flags; if (depth == 0) { if (stmt_list_ptr) { @@ -1308,7 +1382,9 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, bool specification = false; const char *sibling = NULL; uint8_t insn; + uint8_t extra_die_flags = 0; while ((insn = *insnp++)) { +indirect_insn:; uint64_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: @@ -1460,6 +1536,21 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, specification = true; skip = 8; goto skip; + case ATTRIB_INDIRECT: + case ATTRIB_SIBLING_INDIRECT: + case ATTRIB_NAME_INDIRECT: + case ATTRIB_STMT_LIST_INDIRECT: + case ATTRIB_DECL_FILE_INDIRECT: + case ATTRIB_DECLARATION_INDIRECT: + case ATTRIB_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; default: skip = insn; skip: @@ -1469,7 +1560,7 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, break; } } - insn = *insnp; + insn = *insnp | extra_die_flags; uint8_t tag = insn & DIE_FLAG_TAG_MASK; if (depth == 1) { From cf371594f3126c0fdd887bbd9d870eab7aa4739a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 4 May 2021 16:51:40 -0700 Subject: [PATCH 18/56] tests: run a few test cases with DW_FORM_indirect Pick a few DWARF parsing test cases that exercise the interesting cases for DW_FORM_indirect and run them with and without DW_FORM_indirect. We only test DW_FORM_indirect if libdw is new enough to support it. Signed-off-by: Omar Sandoval --- tests/dwarfwriter.py | 20 ++++++--- tests/test_dwarf.py | 96 +++++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 39 deletions(-) diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 2ae056d4e..bc1320dc5 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -35,7 +35,7 @@ def _append_sleb128(buf, value): buf.append(byte | 0x80) -def _compile_debug_abbrev(unit_dies): +def _compile_debug_abbrev(unit_dies, use_dw_form_indirect): buf = bytearray() code = 1 @@ -47,7 +47,9 @@ def aux(die): buf.append(bool(die.children)) for attrib in die.attribs: _append_uleb128(buf, attrib.name) - _append_uleb128(buf, attrib.form) + _append_uleb128( + buf, DW_FORM.indirect if use_dw_form_indirect else attrib.form + ) buf.append(0) buf.append(0) if die.children: @@ -60,7 +62,7 @@ def aux(die): return buf -def _compile_debug_info(unit_dies, little_endian, bits): +def _compile_debug_info(unit_dies, little_endian, bits, use_dw_form_indirect): byteorder = "little" if little_endian else "big" die_offsets = [] relocations = [] @@ -74,6 +76,8 @@ def aux(buf, die, depth): _append_uleb128(buf, code) code += 1 for attrib in die.attribs: + if use_dw_form_indirect: + _append_uleb128(buf, attrib.form) if attrib.name == DW_AT.decl_file: value = decl_file decl_file += 1 @@ -217,7 +221,9 @@ def compile_file_names(die): UNIT_HEADER_TYPES = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) -def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): +def compile_dwarf( + dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False +): if isinstance(dies, DwarfDie): dies = (dies,) assert all(isinstance(die, DwarfDie) for die in dies) @@ -245,14 +251,16 @@ def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): for die in unit_dies ] - debug_info, debug_types = _compile_debug_info(unit_dies, little_endian, bits) + debug_info, debug_types = _compile_debug_info( + unit_dies, little_endian, bits, use_dw_form_indirect + ) sections = [ ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), ElfSection( name=".debug_abbrev", sh_type=SHT.PROGBITS, - data=_compile_debug_abbrev(unit_dies), + data=_compile_debug_abbrev(unit_dies, use_dw_form_indirect), ), ElfSection(name=".debug_info", sh_type=SHT.PROGBITS, data=debug_info), ElfSection( diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 4e02855bb..018409b8c 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1,6 +1,8 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +import ctypes +import functools import os.path import re import tempfile @@ -22,6 +24,12 @@ from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_TAG from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf +libdw = ctypes.CDLL("libdw.so") +libdw.dwfl_version.argtypes = [ctypes.c_void_p] +libdw.dwfl_version.restype = ctypes.c_char_p +libdw_version = tuple(int(x) for x in libdw.dwfl_version(None).split(b".")[:2]) + + bool_die = DwarfDie( DW_TAG.base_type, ( @@ -204,6 +212,21 @@ def wrap_test_type_dies(dies): ) +def with_and_without_dw_form_indirect(f): + @functools.wraps(f) + def wrapper(self): + with self.subTest(): + f(self, False) + # elfutils does not support DW_FORM_indirect properly before commit + # d63b26b8d21f ("libdw: handle DW_FORM_indirect when reading + # attributes"). + if libdw_version >= (0, 184): + with self.subTest(msg="with DW_FORM_indirect"): + f(self, True) + + return wrapper + + class TestTypes(TestCase): def test_unknown_tag(self): prog = dwarf_program(wrap_test_type_dies(DwarfDie(0x9999, ()))) @@ -371,7 +394,8 @@ def test_byteorder_by_name(self): ) self.assertIdentical(prog.type("int"), prog.int_type("int", 4, True, "little")) - def test_qualifier(self): + @with_and_without_dw_form_indirect + def test_qualifier(self, use_dw_form_indirect): prog = dwarf_program( wrap_test_type_dies( ( @@ -380,7 +404,8 @@ def test_qualifier(self): ), int_die, ) - ) + ), + use_dw_form_indirect=use_dw_form_indirect, ) self.assertIdentical( prog.type("TEST").type, @@ -3874,7 +3899,8 @@ def test_variable_const_block_too_small(self): ) self.assertRaisesRegex(Exception, "too small", prog.variable, "p") - def test_specification(self): + @with_and_without_dw_form_indirect + def test_specification(self, use_dw_form_indirect): prog = dwarf_program( wrap_test_type_dies( ( @@ -3899,7 +3925,8 @@ def test_specification(self): ), ), ) - ) + ), + use_dw_form_indirect=use_dw_form_indirect, ) self.assertIdentical( @@ -3907,41 +3934,44 @@ def test_specification(self): Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), ) - def test_namespace_reverse_specification(self): + @with_and_without_dw_form_indirect + def test_namespace_reverse_specification(self, use_dw_form_indirect): """Test specification inside namespace while declaration is outside of it.""" - dies = ( - int_die, - DwarfDie( - DW_TAG.namespace, - [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), - DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), - ], - [ - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), - DwarfAttrib( - DW_AT.location, - DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), + ], + [ + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), ), - ), - ) - ], - ), - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ) + ], + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), ), + use_dw_form_indirect=use_dw_form_indirect, ) - prog = dwarf_program(dies) self.assertIdentical( prog["x"], Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), From 8f7e524b6b206c7bef906ff6188e16d22e3c1696 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 4 May 2021 17:25:33 -0700 Subject: [PATCH 19/56] docs: update links to main branch The master branch was renamed to main. GitHub redirects links to the old branch, but we might as well update them explicitly. Signed-off-by: Omar Sandoval --- README.rst | 2 +- docs/advanced_usage.rst | 2 +- docs/user_guide.rst | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 087950b1c..8e8bd4951 100644 --- a/README.rst +++ b/README.rst @@ -40,7 +40,7 @@ scripting support, drgn aims to make scripting as natural as possible so that debugging feels like coding. This makes it well-suited for introspecting the complex, inter-connected state in large programs. It is also designed as a library that can be used to build debugging and introspection tools; see the -official `tools `_. +official `tools `_. drgn was developed for debugging the Linux kernel (as an alternative to the `crash `_ utility), but it can also debug diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 8252801cc..f951d3056 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -35,7 +35,7 @@ The core functionality of drgn is implemented in C and is available as a C library, ``libdrgn``. See |drgn.h|_. .. |drgn.h| replace:: ``drgn.h`` -.. _drgn.h: https://github.com/osandov/drgn/blob/master/libdrgn/drgn.h.in +.. _drgn.h: https://github.com/osandov/drgn/blob/main/libdrgn/drgn.h.in Full documentation can be generated by running ``doxygen`` in the ``libdrgn`` directory of the source code. Note that the API and ABI are not yet stable. diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 97efe150d..66694a5fe 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -317,5 +317,5 @@ Next Steps Refer to the :doc:`api_reference`. Look through the :doc:`helpers`. Browse through the official `examples -`_ and `tools -`_. +`_ and `tools +`_. From 841a3dae88805d8aec8a58502cde7ef9daddc986 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 00:28:56 -0700 Subject: [PATCH 20/56] Move vmtest assets to GitHub releases As noted by commit 738261290faf ("CI: temporarily disable vmtest"), vmtest was generating too much traffic to the Dropbox shared folder that hosted vmtest kernels. Instead, we can store kernel packages as GitHub release assets. Update the code for downloading and uploading vmtest assets, and also add a scheduled GitHub action to build new kernels every Monday so I don't have to remember to do it manually. This also drops vmtest support for 5.6-5.9, which now fail to build with newer binutils due to the issue fixed in Linux kernel commit 1d489151e9f9 ("objtool: Don't fail on missing symbol table"). Signed-off-by: Omar Sandoval --- setup.py | 32 +- util.py | 4 + vmtest/README.rst | 16 +- vmtest/asynciosubprocess.py | 46 ++ vmtest/config | 55 --- vmtest/download.py | 300 +++++++------ vmtest/githubapi.py | 174 ++++++++ vmtest/kbuild.py | 340 +++++++++++++++ vmtest/manage.py | 840 ++++++++---------------------------- vmtest/vm.py | 13 +- 10 files changed, 956 insertions(+), 864 deletions(-) create mode 100644 vmtest/asynciosubprocess.py delete mode 100644 vmtest/config create mode 100644 vmtest/githubapi.py create mode 100644 vmtest/kbuild.py diff --git a/setup.py b/setup.py index 240fe1c61..3b610ae5b 100755 --- a/setup.py +++ b/setup.py @@ -125,20 +125,7 @@ def make_release_tree(self, base_dir, files): class test(Command): description = "run unit tests after in-place build" - KERNELS = [ - "5.12", - "5.11", - "5.10", - "5.9", - "5.8", - "5.7", - "5.6", - "5.4", - "4.19", - "4.14", - "4.9", - "4.4", - ] + KERNELS = ["5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( @@ -203,11 +190,13 @@ def _run_vm(self, kernel_dir): def run(self): from pathlib import Path - from vmtest.download import KernelDownloader + from vmtest.download import download_kernels_in_thread # Start downloads ASAP so that they're hopefully done by the time we # need them. - with KernelDownloader(self.kernels, Path(self.vmtest_dir)) as downloader: + with download_kernels_in_thread( + Path(self.vmtest_dir), "x86_64", self.kernels + ) as kernel_downloads: if self.kernels: self.announce("downloading kernels in the background", log.INFO) self.run_command("egg_info") @@ -225,14 +214,17 @@ def run(self): failed.append("local") if self.kernels: - for kernel in downloader: + for kernel in kernel_downloads: + kernel_release = kernel.name + if kernel_release.startswith("kernel-"): + kernel_release = kernel_release[len("kernel-") :] self.announce( - f"running tests in VM on Linux {kernel.name}", log.INFO + f"running tests in VM on Linux {kernel_release}", log.INFO ) if self._run_vm(kernel): - passed.append(kernel.name) + passed.append(kernel_release) else: - failed.append(kernel.name) + failed.append(kernel_release) if passed: self.announce(f'Passed: {", ".join(passed)}', log.INFO) diff --git a/util.py b/util.py index 6a3070ab1..23f178692 100644 --- a/util.py +++ b/util.py @@ -97,6 +97,7 @@ class KernelVersion: """ def __init__(self, release: str) -> None: + self._release = release # ~ sorts before anything, including the end of the version. self._key = re.sub(r"-(rc[0-9])", r"~\1", release) @@ -109,3 +110,6 @@ def __lt__(self, other: object) -> bool: if not isinstance(other, KernelVersion): return NotImplemented return verrevcmp(self._key, other._key) < 0 + + def __str__(self) -> str: + return self._release diff --git a/vmtest/README.rst b/vmtest/README.rst index cff9b3681..88bd4f5be 100644 --- a/vmtest/README.rst +++ b/vmtest/README.rst @@ -9,7 +9,7 @@ zstd to be installed. Tests can also be run on specific kernels with ``-k``. This takes a comma-separated list of kernels which are wildcard patterns (e.g., ``5.6.*``) -matching a kernel release hosted on Dropbox (see below). +matching a kernel release hosted on GitHub (see below). Architecture ------------ @@ -32,13 +32,13 @@ the exit status via `virtio-serial This infrastructure is all generic. The drgn-specific parts are: -1. The kernel builds. The `kernel configuration `_ includes everything - required to run drgn and the Linux kernel helper tests. Each build is - packaged as a tarball containing ``vmlinux``, ``vmlinuz``, and kernel - modules. These packages are hosted on `Dropbox - `_. - They are managed via the Dropbox API by the `vmtest.manage `_ CLI - and downloaded by the `vmtest.download `_ module. +1. The kernel builds. These are configured with a minimal configuration + including everything required to run drgn and the Linux kernel helper tests. + Each build is packaged as a tarball containing ``vmlinux``, ``vmlinuz``, and + kernel modules. These packages are hosted in a `GitHub release + `_. They are + managed via the GitHub API by the `vmtest.manage `_ CLI and + downloaded by the `vmtest.download `_ module. 2. The test command itself. This is just some ``setup.py`` glue and the proper invocation of the Python `unittest command line interface `_. diff --git a/vmtest/asynciosubprocess.py b/vmtest/asynciosubprocess.py new file mode 100644 index 000000000..b4b90d0a5 --- /dev/null +++ b/vmtest/asynciosubprocess.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import asyncio +from contextlib import contextmanager +import os +from subprocess import CalledProcessError as CalledProcessError +from typing import Any, Iterator, Tuple + + +async def check_call(*args: Any, **kwds: Any) -> None: + proc = await asyncio.create_subprocess_exec(*args, **kwds) + returncode = await proc.wait() + if returncode != 0: + raise CalledProcessError(returncode, args) + + +async def check_output(*args: Any, **kwds: Any) -> bytes: + kwds["stdout"] = asyncio.subprocess.PIPE + proc = await asyncio.create_subprocess_exec(*args, **kwds) + stdout = (await proc.communicate())[0] + if proc.returncode: + raise CalledProcessError(proc.returncode, args) + return stdout + + +async def check_output_shell(cmd: str, **kwds: Any) -> bytes: + kwds["stdout"] = asyncio.subprocess.PIPE + proc = await asyncio.create_subprocess_shell(cmd, **kwds) + stdout = (await proc.communicate())[0] + if proc.returncode: + raise CalledProcessError(proc.returncode, cmd) + return stdout + + +@contextmanager +def pipe_context() -> Iterator[Tuple[int, int]]: + pipe_r = pipe_w = None + try: + pipe_r, pipe_w = os.pipe() + yield pipe_r, pipe_w + finally: + if pipe_r is not None: + os.close(pipe_r) + if pipe_w is not None: + os.close(pipe_w) diff --git a/vmtest/config b/vmtest/config deleted file mode 100644 index c625242c6..000000000 --- a/vmtest/config +++ /dev/null @@ -1,55 +0,0 @@ -# Minimal Linux kernel configuration for booting into vmtest and running drgn -# tests. - -CONFIG_LOCALVERSION="-vmtest2" - -CONFIG_SMP=y -CONFIG_MODULES=y - -# We run the tests in KVM. -CONFIG_HYPERVISOR_GUEST=y -CONFIG_KVM_GUEST=y -CONFIG_PARAVIRT=y -CONFIG_PARAVIRT_SPINLOCKS=y - -# Minimum requirements for vmtest. -CONFIG_9P_FS=y -CONFIG_DEVTMPFS=y -CONFIG_INET=y -CONFIG_NET=y -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_OVERLAY_FS=y -CONFIG_PCI=y -CONFIG_PROC_FS=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_PCI=y - -# drgn needs /proc/kcore for live debugging. -CONFIG_PROC_KCORE=y -# In some cases, it also needs /proc/kallsyms. -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y - -# drgn needs debug info. -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y - -# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define -# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some -# important information in VMCOREINFO is initialized by the kexec code. -CONFIG_KEXEC=y - -# For block tests. -CONFIG_BLK_DEV_LOOP=m - -# For kconfig tests. -CONFIG_IKCONFIG=m -CONFIG_IKCONFIG_PROC=y diff --git a/vmtest/download.py b/vmtest/download.py index bfaad391c..3514408f5 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -1,146 +1,204 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +import argparse +from contextlib import contextmanager import fnmatch import glob -import http.client +import logging +import os from pathlib import Path import queue import re import shutil import subprocess +import tempfile import threading -from typing import Any, Dict, Iterator, Optional, Sequence, Union -import urllib.request +from typing import Any, Dict, Iterator, Optional, Sequence from util import KernelVersion - -# This URL contains a mapping from file names to URLs where those files can be -# downloaded. This is needed because the files under a Dropbox shared folder -# have randomly-generated links. -_INDEX_URL = "https://www.dropbox.com/sh/2mcf2xvg319qdaw/AAC_AbpvQPRrHF-99B2REpXja/x86_64/INDEX?dl=1" - - -class KernelDownloader: - def __init__(self, kernels: Sequence[str], download_dir: Path) -> None: - self._kernels = kernels - self._arch_download_dir = download_dir / "x86_64" - self._cached_index: Optional[Dict[str, str]] = None - self._index_lock = threading.Lock() - self._queue: queue.Queue[Union[Path, Exception, None]] = queue.Queue() - self._thread: Optional[threading.Thread] - # Don't create the thread if we don't have anything to do. - if kernels: - self._thread = threading.Thread(target=self._download_all, daemon=True) - self._thread.start() +from vmtest.githubapi import GitHubApi + +logger = logging.getLogger(__name__) + +VMTEST_GITHUB_RELEASE = ("osandov", "drgn", "vmtest-assets") + + +def available_kernel_releases( + github_release: Dict[str, Any], arch: str +) -> Dict[str, Dict[str, Any]]: + pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + "\.tar\.zst") + releases = {} + for asset in github_release["assets"]: + match = pattern.fullmatch(asset["name"]) + if match: + releases[match.group(1)] = asset + return releases + + +def _download_kernel(gh: GitHubApi, url: str, dir: Path) -> None: + dir.parent.mkdir(parents=True, exist_ok=True) + with tempfile.TemporaryDirectory(dir=dir.parent) as tmp_name: + tmp_dir = Path(tmp_name) + # Don't assume that the available version of tar has zstd support or + # the non-standard -I/--use-compress-program option. + with subprocess.Popen( + ["zstd", "-d", "-", "--stdout"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) as zstd_proc, subprocess.Popen( + ["tar", "-C", str(tmp_dir), "-x"], + stdin=zstd_proc.stdout, + ) as tar_proc, gh.download( + url + ) as resp: + assert zstd_proc.stdin is not None + shutil.copyfileobj(resp, zstd_proc.stdin) + zstd_proc.stdin.close() + if zstd_proc.returncode != 0: + raise subprocess.CalledProcessError(zstd_proc.returncode, zstd_proc.args) + if tar_proc.returncode != 0: + raise subprocess.CalledProcessError(tar_proc.returncode, tar_proc.args) + + tmp_dir.rename(dir) + + +def download_kernels( + download_dir: Path, arch: str, kernels: Sequence[str] +) -> Iterator[Path]: + gh = GitHubApi(os.getenv("GITHUB_TOKEN")) + + # We don't want to make any API requests if we don't have to, so we don't + # fetch this until we need it. + cached_kernel_releases = None + + def get_available_kernel_releases() -> Dict[str, Dict[str, Any]]: + nonlocal cached_kernel_releases + if cached_kernel_releases is None: + logger.info("getting available kernel releases") + download_dir.mkdir(parents=True, exist_ok=True) + cached_kernel_releases = available_kernel_releases( + gh.get_release_by_tag( + *VMTEST_GITHUB_RELEASE, cache=download_dir / "github_release.json" + ), + arch, + ) + return cached_kernel_releases + + arch_download_dir = download_dir / arch + + # Make sure all of the given kernels exist first. + to_download = [] + for kernel in kernels: + if kernel != glob.escape(kernel): + try: + match = max( + ( + available + for available in get_available_kernel_releases() + if fnmatch.fnmatch(available, kernel) + ), + key=KernelVersion, + ) + except ValueError: + raise Exception(f"no available kernel release matches {kernel!r}") + else: + logger.info("kernel release pattern %s matches %s", kernel, match) + kernel = match + kernel_dir = arch_download_dir / ("kernel-" + kernel) + if kernel_dir.exists(): + # As a policy, vmtest assets will never be updated with the same + # name. Therefore, if the kernel was previously downloaded, we + # don't need to download it again. + url = None else: - self._thread = None - self._queue.put(None) - - def __enter__(self) -> "KernelDownloader": - return self - - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: - if self._thread: - self._thread.join() - - @property - def _index(self) -> Dict[str, str]: - if self._cached_index is None: - with self._index_lock: - if self._cached_index is None: - index = {} - with urllib.request.urlopen(_INDEX_URL) as u: - for line in u: - name, url = line.decode().rstrip("\n").split("\t", 1) - index[name] = url - self._cached_index = index - return self._cached_index - - def _find_kernel(self, pattern: str) -> str: - matches = [] - for name, url in self._index.items(): - match = re.fullmatch(r"kernel-(.*)\.tar\.zst", name) - if match and fnmatch.fnmatch(match.group(1), pattern): - matches.append(match.group(1)) - if not matches: - raise Exception(f"no kernel release matches {pattern!r}") - return max(matches, key=KernelVersion) - - def _download(self, release: str) -> Path: - # Only do the wildcard lookup if the release is a wildcard - # pattern. - if release != glob.escape(release): - release = self._find_kernel(release) - path = self._arch_download_dir / release - if not path.exists(): - name = f"kernel-{release}.tar.zst" - tmp = path.with_name(path.name + ".tmp") - tmp.mkdir(parents=True) - remove_tmp = True try: - # Don't assume that the available version of tar has zstd - # support or the non-standard -I/--use-compress-program option. - with subprocess.Popen( - ["zstd", "-d", "-", "--stdout"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - ) as zstd_proc, subprocess.Popen( - ["tar", "-C", str(tmp), "-x"], stdin=zstd_proc.stdout - ) as tar_proc, urllib.request.urlopen( - self._index[name] - ) as u: - assert zstd_proc.stdin is not None - shutil.copyfileobj(u, zstd_proc.stdin) - zstd_proc.stdin.close() - if u.length: - raise http.client.IncompleteRead(b"", u.length) - if zstd_proc.returncode != 0: - raise subprocess.CalledProcessError( - zstd_proc.returncode, zstd_proc.args - ) - if tar_proc.returncode != 0: - raise subprocess.CalledProcessError( - tar_proc.returncode, tar_proc.args - ) - tmp.rename(path) - remove_tmp = False - finally: - if remove_tmp: - shutil.rmtree(tmp) - return path - - def _download_all(self) -> None: - try: - for kernel in self._kernels: - self._queue.put(self._download(kernel)) - self._queue.put(None) - except Exception as e: - self._queue.put(e) - - def __iter__(self) -> Iterator[Path]: + asset = get_available_kernel_releases()[kernel] + except KeyError: + raise Exception(f"kernel release {kernel} not found") + url = asset["url"] + to_download.append((kernel, kernel_dir, url)) + + for release, kernel_dir, url in to_download: + if url is None: + logger.info( + "kernel release %s already downloaded to %s", release, kernel_dir + ) + else: + logger.info( + "downloading kernel release %s to %s from %s", release, kernel_dir, url + ) + _download_kernel(gh, url, kernel_dir) + yield kernel_dir + + +def _download_kernels_thread( + download_dir: Path, + arch: str, + kernels: Sequence[str], + q: "queue.Queue[Optional[Path]]", +) -> None: + for kernel in download_kernels(download_dir, arch, kernels): + q.put(kernel) + q.put(None) + + +@contextmanager +def download_kernels_in_thread( + download_dir: Path, arch: str, kernels: Sequence[str] +) -> Iterator[Iterator[Path]]: + q: "queue.Queue[Optional[Path]]" = queue.Queue() + + def aux() -> Iterator[Path]: while True: - result = self._queue.get() - if isinstance(result, Exception): - raise result - elif result is None: + kernel = q.get() + if kernel is None: break - yield result - - -if __name__ == "__main__": - import argparse + yield kernel + + thread = None + try: + thread = threading.Thread( + target=_download_kernels_thread, + args=(download_dir, arch, kernels, q), + daemon=True, + ) + thread.start() + yield aux() + finally: + if thread: + thread.join() + + +def main() -> None: + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) parser = argparse.ArgumentParser( - description="download vmtest kernels", + description="Download drgn vmtest assets", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - "-d", "--directory", default="build/vmtest", help="directory to download to" + "-k", + "--kernel", + action="append", + dest="kernels", + help="download latest kernel matching glob pattern; may be given multiple times", + ) + parser.add_argument( + "-d", + "--download-directory", + metavar="DIR", + type=Path, + default="build/vmtest", + help="directory to download assets to", ) - parser.add_argument("kernels", metavar="KERNEL", nargs="*") args = parser.parse_args() - with KernelDownloader(args.kernels, Path(args.directory)) as downloader: - for kernel in downloader: - print(kernel) + for path in download_kernels(args.download_directory, "x86_64", args.kernels or ()): + print(path) + + +if __name__ == "__main__": + main() diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py new file mode 100644 index 000000000..d37dd73bc --- /dev/null +++ b/vmtest/githubapi.py @@ -0,0 +1,174 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import json +from pathlib import Path +import typing +from typing import Any, Dict, Mapping, Optional, Union +import urllib.error +import urllib.parse +import urllib.request + +if typing.TYPE_CHECKING: + import aiohttp + + +_CACHE = Optional[Union[str, bytes, Path]] + + +# Hacky base class because we want the GitHub API from async and non-async +# code. +# +# This provides a slapdash interface for caching a response in a file so that +# we can do conditional requests +# (https://docs.github.com/en/rest/overview/resources-in-the-rest-api#conditional-requests). +# A more complete implementation would be something like a SQLite database +# indexed by endpoint, but this is simpler and good enough for now. +class _GitHubApiBase: + _HOST = "https://api.github.com" + + def __init__(self, token: Optional[str]) -> None: + self._headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "osandov/drgn vmtest", + } + if token is not None: + self._headers["Authorization"] = "token " + token + + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + raise NotImplementedError() + + def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + raise NotImplementedError() + + def _read_cache(self, cache: _CACHE) -> Optional[Mapping[str, Any]]: + if not cache: + return None + try: + with open(cache, "r") as f: + return json.load(f) # type: ignore[no-any-return] + except FileNotFoundError: + return None + + def _cached_get_headers( + self, cached: Optional[Mapping[str, Any]] + ) -> Dict[str, str]: + if cached is not None: + if "etag" in cached: + return {**self._headers, "If-None-Match": cached["etag"]} + elif "last_modified" in cached: + return {**self._headers, "If-Modified-Since": cached["last_modified"]} + return self._headers + + def _write_cache( + self, cache: _CACHE, body: Any, headers: Mapping[str, str] + ) -> None: + if cache is not None and ("ETag" in headers or "Last-Modified" in headers): + to_cache = {"body": body} + if "ETag" in headers: + to_cache["etag"] = headers["ETag"] + if "Last-Modified" in headers: + to_cache["last_modified"] = headers["Last-Modified"] + with open(cache, "w") as f: + json.dump(to_cache, f) + + def get_release_by_tag( + self, owner: str, repo: str, tag: str, *, cache: _CACHE = None + ) -> Any: + return self._cached_get_json(f"repos/{owner}/{repo}/releases/tags/{tag}", cache) + + def download(self, url: str) -> Any: + return self._request( + "GET", url, headers={**self._headers, "Accept": "application/octet-stream"} + ) + + def upload(self, url: str, data: Any, content_type: str) -> Any: + return self._request( + "POST", + url, + headers={**self._headers, "Content-Type": content_type}, + data=data, + ) + + +class GitHubApi(_GitHubApiBase): + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + if params: + url += "?" + urllib.parse.urlencode(params) + return urllib.request.urlopen( + urllib.request.Request( + url, + data=data, + headers={} if headers is None else headers, + method=method, + ) + ) + + def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + cached = self._read_cache(cache) + try: + with urllib.request.urlopen( + urllib.request.Request( + self._HOST + "/" + endpoint, + headers=self._cached_get_headers(cached), + ) + ) as resp: + body = json.load(resp) + self._write_cache(cache, body, resp.headers) + return body + except urllib.error.HTTPError as e: + if e.code == 304 and cached is not None: + return cached["body"] + else: + raise + + +class AioGitHubApi(_GitHubApiBase): + def __init__(self, session: "aiohttp.ClientSession", token: Optional[str]) -> None: + super().__init__(token) + self._session = session + + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + return self._session.request( + method, url, params=params, headers=headers, data=data + ) + + async def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + cached = self._read_cache(cache) + async with self._session.get( + self._HOST + "/" + endpoint, + headers=self._cached_get_headers(cached), + raise_for_status=True, + ) as resp: + if resp.status == 304: + if cached is None: + raise Exception("got HTTP 304 but response was not cached") + return cached["body"] + else: + body = await resp.json() + self._write_cache(cache, body, resp.headers) + return body diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py new file mode 100644 index 000000000..f41c85265 --- /dev/null +++ b/vmtest/kbuild.py @@ -0,0 +1,340 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse +import asyncio +import filecmp +import logging +from pathlib import Path +import shlex +import shutil +import sys +import tempfile +from typing import IO, Any, Optional, Tuple, Union + +from util import nproc +from vmtest.asynciosubprocess import ( + CalledProcessError, + check_call, + check_output, + check_output_shell, + pipe_context, +) + +logger = logging.getLogger(__name__) + +KERNEL_LOCALVERSION = "-vmtest3" + + +def kconfig() -> str: + return rf"""# Minimal Linux kernel configuration for booting into vmtest and running drgn +# tests. + +CONFIG_LOCALVERSION="{KERNEL_LOCALVERSION}" + +CONFIG_SMP=y +CONFIG_MODULES=y + +# We run the tests in KVM. +CONFIG_HYPERVISOR_GUEST=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y + +# Minimum requirements for vmtest. +CONFIG_9P_FS=y +CONFIG_DEVTMPFS=y +CONFIG_INET=y +CONFIG_NET=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_OVERLAY_FS=y +CONFIG_PCI=y +CONFIG_PROC_FS=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_PCI=y + +# drgn needs /proc/kcore for live debugging. +CONFIG_PROC_KCORE=y +# In some cases, it also needs /proc/kallsyms. +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y + +# drgn needs debug info. +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y + +# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define +# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some +# important information in VMCOREINFO is initialized by the kexec code. +CONFIG_KEXEC=y + +# For block tests. +CONFIG_BLK_DEV_LOOP=m + +# For kconfig tests. +CONFIG_IKCONFIG=m +CONFIG_IKCONFIG_PROC=y +""" + + +class KBuild: + def __init__( + self, + kernel_dir: Path, + build_dir: Path, + arch: str, + build_log_file: Union[int, IO[Any], None] = None, + ) -> None: + self._build_dir = build_dir + self._kernel_dir = kernel_dir + self._arch = arch + self._build_stdout = build_log_file + self._build_stderr = ( + None if build_log_file is None else asyncio.subprocess.STDOUT + ) + self._cached_make_args: Optional[Tuple[str, ...]] = None + self._cached_kernel_release: Optional[str] = None + + async def _prepare_make(self) -> Tuple[str, ...]: + if self._cached_make_args is None: + self._build_dir.mkdir(parents=True, exist_ok=True) + + debug_prefix_map = [] + # GCC uses the "logical" working directory, i.e., the PWD + # environment variable, when it can. See + # https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/getpwd.c;hb=HEAD. + # Map both the canonical and logical paths. + build_dir_real = self._build_dir.resolve() + debug_prefix_map.append(str(build_dir_real) + "=.") + build_dir_logical = ( + await check_output_shell( + f"cd {shlex.quote(str(self._build_dir))}; pwd -L", + ) + ).decode()[:-1] + if build_dir_logical != str(build_dir_real): + debug_prefix_map.append(build_dir_logical + "=.") + + # Before Linux kernel commit 25b146c5b8ce ("kbuild: allow Kbuild to + # start from any directory") (in v5.2), O= forces the source + # directory to be absolute. Since Linux kernel commit 95fd3f87bfbe + # ("kbuild: add a flag to force absolute path for srctree") (in + # v5.3), KBUILD_ABS_SRCTREE=1 does the same. This means that except + # for v5.2, which we don't support, the source directory will + # always be absolute, and we don't need to worry about mapping it + # from a relative path. + kernel_dir_real = self._kernel_dir.resolve() + if kernel_dir_real != build_dir_real: + debug_prefix_map.append(str(kernel_dir_real) + "/=./") + + cflags = " ".join(["-fdebug-prefix-map=" + map for map in debug_prefix_map]) + + self._cached_make_args = ( + "-C", + str(self._kernel_dir), + "ARCH=" + str(self._arch), + "O=" + str(build_dir_real), + "KBUILD_ABS_SRCTREE=1", + "KBUILD_BUILD_USER=drgn", + "KBUILD_BUILD_HOST=drgn", + "KAFLAGS=" + cflags, + "KCFLAGS=" + cflags, + "-j", + str(nproc()), + ) + return self._cached_make_args + + async def _kernel_release(self) -> str: + if self._cached_kernel_release is None: + # Must call _prepare_make() first. + assert self._cached_make_args is not None + self._cached_kernel_release = ( + ( + await check_output( + "make", *self._cached_make_args, "-s", "kernelrelease" + ) + ) + .decode() + .strip() + ) + return self._cached_kernel_release + + async def build(self) -> None: + logger.info("building kernel in %s", self._build_dir) + build_log_file_name = getattr(self._build_stdout, "name", None) + if build_log_file_name is not None: + logger.info("build logs in %s", build_log_file_name) + + make_args = await self._prepare_make() + + config = self._build_dir / ".config" + tmp_config = self._build_dir / ".config.vmtest.tmp" + + tmp_config.write_text(kconfig()) + await check_call( + "make", + *make_args, + "KCONFIG_CONFIG=" + tmp_config.name, + "olddefconfig", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + try: + equal = filecmp.cmp(config, tmp_config) + if not equal: + logger.info("kernel configuration changed") + except FileNotFoundError: + equal = False + logger.info("no previous kernel configuration") + if equal: + logger.info("kernel configuration did not change") + tmp_config.unlink() + else: + tmp_config.rename(config) + + kernel_release = await self._kernel_release() + logger.info("kernel release is %s", kernel_release) + await check_call( + "make", + *make_args, + "all", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + logger.info("built kernel %s in %s", kernel_release, self._build_dir) + + async def package(self, output_dir: Path) -> Path: + make_args = await self._prepare_make() + kernel_release = await self._kernel_release() + + tarball = output_dir / f"kernel-{kernel_release}.{self._arch}.tar.zst" + + logger.info( + "packaging kernel %s from %s to %s", + kernel_release, + self._build_dir, + tarball, + ) + + image_name = ( + (await check_output("make", *make_args, "-s", "image_name")) + .decode() + .strip() + ) + + with tempfile.TemporaryDirectory( + prefix="install.", dir=self._build_dir + ) as tmp_name: + install_dir = Path(tmp_name) + modules_dir = install_dir / "lib" / "modules" / kernel_release + + logger.info("installing modules") + await check_call( + "make", + *make_args, + "INSTALL_MOD_PATH=" + str(install_dir.resolve()), + "modules_install", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + # Don't want these symlinks. + (modules_dir / "build").unlink() + (modules_dir / "source").unlink() + + logger.info("copying vmlinux") + vmlinux = modules_dir / "vmlinux" + await check_call( + "objcopy", + "--remove-relocations=*", + self._build_dir / "vmlinux", + str(vmlinux), + ) + vmlinux.chmod(0o644) + + logger.info("copying vmlinuz") + vmlinuz = modules_dir / "vmlinuz" + shutil.copy(self._build_dir / image_name, vmlinuz) + vmlinuz.chmod(0o644) + + logger.info("creating tarball") + tarball.parent.mkdir(parents=True, exist_ok=True) + tar_cmd = ("tar", "-C", str(modules_dir), "-c", ".") + zstd_cmd = ("zstd", "-T0", "-19", "-q", "-", "-o", str(tarball), "-f") + with pipe_context() as (pipe_r, pipe_w): + tar_proc, zstd_proc = await asyncio.gather( + asyncio.create_subprocess_exec(*tar_cmd, stdout=pipe_w), + asyncio.create_subprocess_exec(*zstd_cmd, stdin=pipe_r), + ) + tar_returncode, zstd_returncode = await asyncio.gather( + tar_proc.wait(), zstd_proc.wait() + ) + if tar_returncode != 0: + raise CalledProcessError(tar_returncode, tar_cmd) + if zstd_returncode != 0: + raise CalledProcessError(zstd_returncode, zstd_cmd) + + logger.info( + "packaged kernel %s from %s to %s", kernel_release, self._build_dir, tarball + ) + return tarball + + +async def main() -> None: + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) + + parser = argparse.ArgumentParser( + description="Build a drgn vmtest kernel", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "-k", + "--kernel-directory", + metavar="DIR", + type=Path, + help="kernel source tree directory", + default=".", + ) + parser.add_argument( + "-b", + "--build-directory", + metavar="DIR", + type=Path, + help="build output directory", + default=".", + ) + parser.add_argument( + "-p", + "--package", + metavar="DIR", + type=Path, + help="also package the built kernel and place it in DIR", + default=argparse.SUPPRESS, + ) + parser.add_argument( + "--dump-kconfig", + action="store_true", + help="dump kernel configuration file to standard output instead of building", + ) + args = parser.parse_args() + + if args.dump_kconfig: + sys.stdout.write(kconfig()) + return + + kbuild = KBuild(args.kernel_directory, args.build_directory, "x86_64") + await kbuild.build() + if hasattr(args, "package"): + await kbuild.package(args.package) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/vmtest/manage.py b/vmtest/manage.py index 0f53a123e..ed7ff1cb0 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -3,576 +3,136 @@ import argparse import asyncio -import difflib -import getpass -import io -import json import logging import os from pathlib import Path import re -import shlex -import shutil import sys -import time -from typing import ( - Any, - AsyncGenerator, - BinaryIO, - Dict, - List, - Optional, - Sequence, - Set, - SupportsFloat, - SupportsRound, - TextIO, - Tuple, -) -import urllib.parse +from typing import AsyncIterator, Dict, List, NamedTuple, Optional, Sequence, Union import aiohttp -from yarl import URL +import uritemplate -from util import nproc +from util import KernelVersion +from vmtest.asynciosubprocess import check_call, check_output +from vmtest.download import VMTEST_GITHUB_RELEASE, available_kernel_releases +from vmtest.githubapi import AioGitHubApi +from vmtest.kbuild import KERNEL_LOCALVERSION, KBuild logger = logging.getLogger(__name__) +# [inclusive, exclusive) ranges of kernel versions to ignore when building +# latest releases of each version. +IGNORE_KERNEL_RANGES = ( + (KernelVersion("~"), KernelVersion("4.4")), + (KernelVersion("4.5~"), KernelVersion("4.9")), + (KernelVersion("4.10~"), KernelVersion("4.14")), + (KernelVersion("4.15~"), KernelVersion("4.19")), + (KernelVersion("4.20~"), KernelVersion("5.4")), + (KernelVersion("5.5~"), KernelVersion("5.10")), +) -KERNEL_CONFIG_PATH = Path(__file__).parent / "config" - -KERNEL_ORG_JSON = "https://www.kernel.org/releases.json" - -DROPBOX_API_URL = "https://api.dropboxapi.com" -CONTENT_API_URL = "https://content.dropboxapi.com" - - -def humanize_size(n: SupportsFloat, precision: int = 1) -> str: - n = float(n) - for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: - if abs(n) < 1024: - break - n /= 1024 - else: - unit = "Yi" - if n.is_integer(): - precision = 0 - return f"{n:.{precision}f}{unit}B" - - -def humanize_duration(seconds: SupportsRound[Any]) -> str: - seconds = round(seconds) - return f"{seconds // 60}m{seconds % 60}s" - - -async def raise_for_status_body(resp: aiohttp.ClientResponse) -> None: - """ - Like aiohttp.ClientResponse.raise_for_status(), but includes the response - body. - """ - if resp.status >= 400: - message = resp.reason or "" - body = await resp.text() - if body: - if message: - message += ": " - message += body - raise aiohttp.ClientResponseError( - resp.request_info, - resp.history, - status=resp.status, - message=message, - headers=resp.headers, - ) - - -def get_current_localversion() -> str: - with KERNEL_CONFIG_PATH.open("r") as f: - match = re.search(r'^CONFIG_LOCALVERSION="([^"]*)"', f.read(), re.MULTILINE) - return match.group(1) if match else "" - - -async def get_kernel_org_versions(http_client: aiohttp.ClientSession) -> List[str]: - async with http_client.get(KERNEL_ORG_JSON, raise_for_status=True) as resp: - releases = (await resp.json())["releases"] - return [ - release["version"] - for release in releases - if release["moniker"] in {"mainline", "stable", "longterm"} - ] +# Use the GitHub mirror rather than the official kernel.org repository since +# this script usually runs in GitHub Actions. +STABLE_LINUX_GIT_URL = "https://github.com/gregkh/linux.git" -async def get_available_kernel_releases( - http_client: aiohttp.ClientSession, token: str -) -> Set[str]: - headers = {"Authorization": "Bearer " + token} - params = {"path": "/Public/x86_64"} - url = DROPBOX_API_URL + "/2/files/list_folder" - available = set() - while True: - async with http_client.post(url, headers=headers, json=params) as resp: - if resp.status == 409 and (await resp.json())["error_summary"].startswith( - "path/not_found/" - ): +async def get_latest_kernel_tags() -> List[str]: + ls_remote = ( + await check_output("git", "ls-remote", "--tags", "--refs", STABLE_LINUX_GIT_URL) + ).decode() + latest: Dict[str, KernelVersion] = {} + for match in re.finditer( + r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(-rc[0-9]+|\.[0-9]+)?$", + ls_remote, + re.M, + ): + version = KernelVersion(match.group(1) + (match.group(2) or "")) + for start_version, end_version in IGNORE_KERNEL_RANGES: + if start_version <= version < end_version: break - await raise_for_status_body(resp) - obj = await resp.json() - for entry in obj["entries"]: - if entry[".tag"] != "file": - continue - match = re.fullmatch(r"kernel-(.*)\.tar\.zst", entry["name"]) - if match: - available.add(match.group(1)) - if not obj["has_more"]: - break - url = DROPBOX_API_URL + "/2/files/list_folder/continue" - params = {"cursor": obj["cursor"]} - return available - - -class CalledProcessError(Exception): - def __init__(self, returncode: int, cmd: Sequence[str]) -> None: - self.returncode = returncode - self.cmd = cmd - - def __str__(self) -> str: - command = " ".join(shlex.quote(arg) for arg in self.cmd) - raise Exception( - f"Command {command!r} returned non-zero exit status {self.returncode}" - ) - - -async def check_call(*args: Any, **kwds: Any) -> None: - proc = await asyncio.create_subprocess_exec(*args, **kwds) - returncode = await proc.wait() - if returncode != 0: - raise CalledProcessError(returncode, args) - - -async def check_output(*args: Any, **kwds: Any) -> bytes: - kwds["stdout"] = asyncio.subprocess.PIPE - proc = await asyncio.create_subprocess_exec(*args, **kwds) - stdout = (await proc.communicate())[0] - if proc.returncode: - raise CalledProcessError(proc.returncode, args) - return stdout - - -def getpwd() -> str: - """ - Get the current working directory in the same way that GCC does. See - https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/getpwd.c;hb=HEAD. - """ - try: - pwd = os.environ["PWD"] - if pwd.startswith("/"): - pwdstat = os.stat(pwd) - dotstat = os.stat(".") - if dotstat.st_ino == pwdstat.st_ino and dotstat.st_dev == pwdstat.st_dev: - return pwd - except (KeyError, OSError): - pass - return os.getcwd() - - -async def build_kernel( - commit: str, build_dir: Path, log_file: TextIO -) -> Tuple[str, Path]: - """ - Returns built kernel release (i.e., `uname -r`) and image name (e.g., - `arch/x86/boot/bzImage`). - """ - await check_call( - "git", "checkout", commit, stdout=log_file, stderr=asyncio.subprocess.STDOUT - ) - - shutil.copy(KERNEL_CONFIG_PATH, build_dir / ".config") - - logger.info("building %s", commit) - start = time.monotonic() - cflags = f"-fdebug-prefix-map={getpwd() / build_dir}=" - kbuild_args = [ - "KBUILD_BUILD_USER=drgn", - "KBUILD_BUILD_HOST=drgn", - "KAFLAGS=" + cflags, - "KCFLAGS=" + cflags, - "O=" + str(build_dir), - "-j", - str(nproc()), - ] - await check_call( - "make", - *kbuild_args, - "olddefconfig", - "all", - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, + else: + latest[match.group(1)] = max(version, latest.get(match.group(1), version)) + return ["v" + str(version) for version in sorted(latest.values(), reverse=True)] + + +def kernel_tag_to_release(tag: str) -> str: + match = re.fullmatch(r"v([0-9]+\.[0-9]+)(\.[0-9]+)?(-rc\d+)?", tag) + assert match + return "".join( + [ + match.group(1), + match.group(2) or ".0", + match.group(3) or "", + KERNEL_LOCALVERSION, + ] ) - elapsed = time.monotonic() - start - logger.info("built %s in %s", commit, humanize_duration(elapsed)) - - logger.info("packaging %s", commit) - start = time.monotonic() - release = ( - ( - await check_output( - "make", *kbuild_args, "-s", "kernelrelease", stderr=log_file - ) - ) - .decode() - .strip() - ) - image_name = ( - (await check_output("make", *kbuild_args, "-s", "image_name", stderr=log_file)) - .decode() - .strip() - ) - install_dir = build_dir / "install" - modules_dir = install_dir / "lib" / "modules" / release +async def fetch_kernel_tags(kernel_dir: Path, tags: Sequence[str]) -> None: + if not kernel_dir.exists(): + logger.info("creating kernel repository in %s", kernel_dir) + await check_call("git", "init", "-q", str(kernel_dir)) + logger.info("fetching kernel tags: %s", ", ".join(tags)) await check_call( - "make", - *kbuild_args, - "INSTALL_MOD_PATH=install", - "modules_install", - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, + "git", + "-C", + str(kernel_dir), + "fetch", + "--depth", + "1", + STABLE_LINUX_GIT_URL, + *(f"refs/tags/{tag}:refs/tags/{tag}" for tag in tags), ) - # Don't want these symlinks. - (modules_dir / "build").unlink() - (modules_dir / "source").unlink() - vmlinux = modules_dir / "vmlinux" - await check_call( - "objcopy", - "--remove-relocations=*", - str(build_dir / "vmlinux"), - str(vmlinux), - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ) - vmlinux.chmod(0o644) - - vmlinuz = modules_dir / "vmlinuz" - shutil.copy(build_dir / image_name, vmlinuz) - vmlinuz.chmod(0o644) - - tarball = build_dir / "kernel.tar.zst" - tar_command = ("tar", "-C", str(modules_dir), "-c", ".") - zstd_command = ("zstd", "-T0", "-19", "-q", "-", "-o", str(tarball)) - pipe_r, pipe_w = os.pipe() - try: - tar_proc, zstd_proc = await asyncio.gather( - asyncio.create_subprocess_exec( - *tar_command, stdout=pipe_w, stderr=log_file - ), - asyncio.create_subprocess_exec( - *zstd_command, - stdin=pipe_r, - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ), - ) - finally: - os.close(pipe_r) - os.close(pipe_w) - tar_returncode, zstd_returncode = await asyncio.gather( - tar_proc.wait(), zstd_proc.wait() - ) - if tar_returncode != 0: - raise CalledProcessError(tar_returncode, tar_command) - if zstd_returncode != 0: - raise CalledProcessError(zstd_returncode, zstd_command) - shutil.rmtree(install_dir) - elapsed = time.monotonic() - start - logger.info("packaged %s in %s", commit, humanize_duration(elapsed)) - return release, tarball +async def build_kernels( + kernel_dir: Path, build_dir: Path, arch: str, kernel_revs: Sequence[str] +) -> AsyncIterator[Path]: + build_dir.mkdir(parents=True, exist_ok=True) + for rev in kernel_revs: + rev_build_dir = build_dir / ("build-" + rev) + logger.info("checking out %s in %s", rev, rev_build_dir) + await check_call("git", "-C", str(kernel_dir), "checkout", "-q", rev) + with open(build_dir / f"build-{rev}.log", "w") as build_log_file: + kbuild = KBuild(kernel_dir, rev_build_dir, arch, build_log_file) + await kbuild.build() + yield await kbuild.package(build_dir) -async def try_build_kernel(commit: str) -> Optional[Tuple[str, Path]]: - """Returns (kernel release, tarball path) on success, None on error.""" - proc = await asyncio.create_subprocess_exec( - "git", - "rev-parse", - "--verify", - "-q", - commit + "^{commit}", - stdout=asyncio.subprocess.DEVNULL, - ) - if (await proc.wait()) != 0: - logger.error("unknown revision: %s", commit) - return None - - build_dir = Path("build-" + commit) - try: - log_path = build_dir / "build.log" - logger.info("preparing %r; logs in %r", str(build_dir), str(log_path)) - build_dir.mkdir(0o755) - with log_path.open("w") as log_file: - try: - return await build_kernel(commit, build_dir, log_file) - except Exception: - logger.exception("building %s failed; see %r", commit, str(log_path)) - return None - except Exception: - logger.exception("preparing %r failed", str(build_dir)) - return None - - -class Uploader: - CHUNK_SIZE = 8 * 1024 * 1024 - - def __init__(self, http_client: aiohttp.ClientSession, token: str) -> None: - self._http_client = http_client - self._token = token - self._pending: List[Tuple[str, asyncio.Task[bool]]] = [] - - async def _upload_file_obj(self, file: BinaryIO, commit: Dict[str, Any]) -> None: - headers = { - "Authorization": "Bearer " + self._token, - "Content-Type": "application/octet-stream", - } - offset = 0 - session_id = None - while True: - data = file.read(Uploader.CHUNK_SIZE) - last = len(data) < Uploader.CHUNK_SIZE - if session_id is None: - if last: - endpoint = "upload" - params = commit - else: - endpoint = "upload_session/start" - params = {} - else: - params = { - "cursor": {"offset": offset, "session_id": session_id}, - } - if last: - endpoint = "upload_session/finish" - params["commit"] = commit - else: - endpoint = "upload_session/append_v2" - offset += len(data) - headers["Dropbox-API-Arg"] = json.dumps(params) - url = CONTENT_API_URL + "/2/files/" + endpoint - async with self._http_client.post(url, headers=headers, data=data) as resp: - await raise_for_status_body(resp) - if endpoint == "upload_session/start": - session_id = (await resp.json())["session_id"] - if last: - break +class AssetUploadWork(NamedTuple): + upload_url: str + path: Union[str, bytes, Path] + name: str + content_type: str - async def _try_upload_file_obj( - self, file: BinaryIO, commit: Dict[str, Any] - ) -> bool: - try: - logger.info("uploading %r", commit["path"]) - start = time.monotonic() - await self._upload_file_obj(file, commit) - elapsed = time.monotonic() - start - logger.info("uploaded %r in %s", commit["path"], humanize_duration(elapsed)) - return True - except Exception: - logger.exception("uploading %r failed", commit["path"]) - return False - async def _try_upload_file(self, path: str, commit: Dict[str, Any]) -> bool: - try: - logger.info("uploading %r to %r", path, commit["path"]) - start = time.monotonic() - with open(path, "rb") as f: - await self._upload_file_obj(f, commit) - elapsed = time.monotonic() - start - logger.info( - "uploaded %r to %r in %s", - path, - commit["path"], - humanize_duration(elapsed), - ) - return True - except Exception: - logger.exception("uploading %r to %r failed", path, commit["path"]) - return False - - @staticmethod - def _make_commit( - dst_path: str, *, mode: Optional[str] = None, autorename: Optional[bool] = None - ) -> Dict[str, Any]: - commit: Dict[str, Any] = {"path": dst_path} - if mode is not None: - commit["mode"] = mode - if autorename is not None: - commit["autorename"] = autorename - return commit - - def queue_file_obj(self, file: BinaryIO, *args: Any, **kwds: Any) -> None: - commit = self._make_commit(*args, **kwds) - task = asyncio.create_task(self._try_upload_file_obj(file, commit)) - self._pending.append((commit["path"], task)) - - def queue_file(self, src_path: str, *args: Any, **kwds: Any) -> None: - commit = self._make_commit(*args, **kwds) - task = asyncio.create_task(self._try_upload_file(src_path, commit)) - self._pending.append((commit["path"], task)) - - async def wait(self) -> Tuple[List[str], List[str]]: - """ - Returns list of successfully uploaded paths and list of paths that - failed to upload. - """ - succeeded = [] - failed = [] - for path, task in self._pending: - if await task: - succeeded.append(path) - else: - failed.append(path) - self._pending.clear() - return succeeded, failed - - -async def list_shared_folder( - http_client: aiohttp.ClientSession, url: str -) -> AsyncGenerator[Tuple[str, bool, str], None]: - """ - List a Dropbox shared folder. The Dropbox API doesn't provide a way to get - the links for entries inside of a shared folder, so we're forced to scrape - them from the webpage and XHR endpoint. - - Generates filename, whether it is a directory, and its shared link. - """ - method = "GET" - data = None +async def asset_uploader( + gh: AioGitHubApi, + queue: "asyncio.Queue[Optional[AssetUploadWork]]", +) -> bool: + success = True while True: - async with http_client.request(method, url, data=data) as resp: - if method == "GET": - resp.raise_for_status() - match = re.search( - r'"\{\\"shared_link_infos\\".*[^\\]\}"', (await resp.text()) + work = await queue.get() + if not work: + queue.task_done() + return success + logger.info("uploading %s", work.name) + try: + with open(work.path, "rb") as f: + await gh.upload( + uritemplate.expand(work.upload_url, name=work.name), + f, + work.content_type, ) - assert match - obj = json.loads(json.loads(match.group())) - else: - await raise_for_status_body(resp) - obj = await resp.json() - for entry in obj["entries"]: - yield entry["filename"], entry["is_dir"], entry["href"] - if not obj["has_more_entries"]: - break - if method == "GET": - method = "POST" - url = "https://www.dropbox.com/list_shared_link_folder_entries" - data = { - "t": http_client.cookie_jar.filter_cookies(URL(url))["t"].value, - "link_key": obj["folder_share_token"]["linkKey"], - "link_type": obj["folder_share_token"]["linkType"], - "secure_hash": obj["folder_share_token"]["secureHash"], - "sub_path": obj["folder_share_token"]["subPath"], - } - assert data is not None - data["voucher"] = obj["next_request_voucher"] - - -async def walk_shared_folder( - http_client: aiohttp.ClientSession, url: str -) -> AsyncGenerator[Tuple[str, List[Tuple[str, str]], List[Tuple[str, str]]], None]: - """ - Walk a Dropbox shared folder, similar to os.walk(). Generates path, list of - files and their shared links, and list of folders and their shared links. - """ - stack = [("", url)] - while stack: - path, url = stack.pop() - dirs = [] - files = [] - async for filename, is_dir, href in list_shared_folder(http_client, url): - if is_dir: - dirs.append((filename, href)) - else: - files.append((filename, href)) - yield path, files, dirs - if path: - path += "/" - stack.extend((path + filename, href) for filename, href in dirs) - - -def make_download_url(url: str) -> str: - parsed = urllib.parse.urlsplit(url) - query = [ - (name, value) - for name, value in urllib.parse.parse_qsl(parsed.query) - if name != "dl" - ] - query.append(("dl", "1")) - return urllib.parse.urlunsplit(parsed._replace(query=urllib.parse.urlencode(query))) - - -async def update_index( - http_client: aiohttp.ClientSession, token: str, uploader: Uploader -) -> bool: - try: - logger.info("finding shared folder link") - headers = {"Authorization": "Bearer " + token} - params = { - "path": "/Public", - "direct_only": True, - } - async with http_client.post( - DROPBOX_API_URL + "/2/sharing/list_shared_links", - headers=headers, - json=params, - ) as resp: - await raise_for_status_body(resp) - for link in (await resp.json())["links"]: - if link[".tag"] != "folder": - continue - try: - visibility = link["link_permissions"]["resolved_visibility"][".tag"] - except KeyError: - continue - if visibility == "public": - break - else: - raise Exception("shared folder link not found") - - logger.info("walking shared folder") - async for path, files, dirs in walk_shared_folder(http_client, link["url"]): - lines = [] - old_lines = [] - for name, href in files: - href = make_download_url(href) - lines.append(name + "\t" + href + "\n") - if name == "INDEX": - async with http_client.get(href, raise_for_status=True) as resp: - old_lines = (await resp.text()).splitlines(keepends=True) - lines.extend(name + "/\t" + href + "\n" for name, href in dirs) - lines.sort() - - index_path = (path + "/" if path else "") + "INDEX" - if lines == old_lines: - logger.info("%s is up to date", index_path) - continue - diff = difflib.unified_diff( - old_lines, lines, fromfile="a/" + index_path, tofile="b/" + index_path - ) - logger.info("updating %s:\n%s", index_path, "".join(diff).rstrip("\n")) - uploader.queue_file_obj( - io.BytesIO("".join(lines).encode()), - "/Public/" + index_path, - mode="overwrite", - ) - succeeded, failed = await uploader.wait() - if failed: - logger.info("updates failed: %s", ", ".join(failed)) - return False - return True - except Exception: - logger.exception("updating INDEX files failed") - return False + except Exception: + logger.exception("uploading %s failed", work.name) + success = False + else: + logger.info("uploaded %s", work.name) + finally: + queue.task_done() async def main() -> None: @@ -581,138 +141,110 @@ async def main() -> None: ) parser = argparse.ArgumentParser( - description="Tool for managing drgn vmtest kernel builds and files" + description="Build and upload drgn vmtest assets", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - "-b", - "--build", - type=str, - action="append", - metavar="COMMIT", - help="build the given kernel release; may be given multiple times. " - "Must be run from a Linux kernel git repository", + "-K", + "--latest-kernels", + action="store_true", + help="build and upload latest supported kernel releases", ) parser.add_argument( - "-k", - "--build-kernel-org", + "--dry-run", action="store_true", - help="build new kernels listed on kernel.org", + help="build but don't upload anything to GitHub", ) parser.add_argument( - "-u", "--upload", action="store_true", help="upload built kernels" + "--kernel-directory", + metavar="DIR", + type=Path, + help="kernel Git repository directory (created if needed)", + default=".", ) parser.add_argument( - "-U", - "--upload-file", - type=str, - action="append", - dest="upload_files", - metavar=("SRC_PATH", "DST_PATH"), - nargs=2, - help="upload the given file; may be given multiple times", + "--build-directory", + metavar="DIR", + type=Path, + help="directory for build artifacts", + default=".", ) parser.add_argument( - "-i", "--index", action="store_true", help="update the INDEX files" + "--cache-directory", + metavar="DIR", + type=Path, + default="build/vmtest", + help="directory to cache API calls in", ) args = parser.parse_args() - if (args.build or args.build_kernel_org) and ( - not Path(".git").exists() or not Path("kernel").exists() - ): - sys.exit("-b/-k must be run from linux.git") + arch = "x86_64" - if args.build_kernel_org or args.upload or args.upload_files or args.index: - if os.isatty(sys.stdin.fileno()): - dropbox_token = getpass.getpass("Enter Dropbox app API token: ") + async with aiohttp.ClientSession(trust_env=True) as session: + GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") + if GITHUB_TOKEN is None and not args.dry_run: + sys.exit("GITHUB_TOKEN environment variable is not set") + gh = AioGitHubApi(session, GITHUB_TOKEN) + + args.cache_directory.mkdir(parents=True, exist_ok=True) + github_release_coro = gh.get_release_by_tag( + *VMTEST_GITHUB_RELEASE, cache=args.cache_directory / "github_release.json" + ) + if args.latest_kernels: + github_release, latest_kernel_tags = await asyncio.gather( + github_release_coro, get_latest_kernel_tags() + ) else: - dropbox_token = input() - - builds_succeeded = [] - builds_failed = [] - uploads_succeeded = [] - uploads_failed = [] - - async with aiohttp.ClientSession(trust_env=True) as http_client: - # dict rather than set to preserve insertion order. - to_build = dict.fromkeys(args.build or ()) - if args.build_kernel_org: - localversion = get_current_localversion() - logger.info("current localversion: %s", localversion) - try: - # In this context, "version" is a tag name without the "v" - # prefix and "release" is a uname release string. - logger.info( - "getting list of kernel.org versions and available releases" - ) - kernel_org, available = await asyncio.gather( - get_kernel_org_versions(http_client), - get_available_kernel_releases(http_client, dropbox_token), + github_release = await github_release_coro + + kernel_releases = available_kernel_releases(github_release, arch) + logger.info( + "available %s kernel releases: %s", + arch, + ", ".join(sorted(kernel_releases, key=KernelVersion, reverse=True)), + ) + + if args.latest_kernels: + logger.info("latest kernel versions: %s", ", ".join(latest_kernel_tags)) + kernel_tags = [ + tag + for tag in latest_kernel_tags + if kernel_tag_to_release(tag) not in kernel_releases + ] + else: + kernel_tags = [] + + if kernel_tags: + logger.info("kernel versions to build: %s", ", ".join(kernel_tags)) + + if not args.dry_run: + upload_queue: "asyncio.Queue[Optional[AssetUploadWork]]" = ( + asyncio.Queue() ) - logger.info("kernel.org versions: %s", ", ".join(kernel_org)) - logger.info("available releases: %s", ", ".join(sorted(available))) - for version in kernel_org: - match = re.fullmatch(r"(\d+\.\d+)(\.\d+)?(-rc\d+)?", version) - if not match: - logger.error("couldn't parse kernel.org version %r", version) - sys.exit(1) - release = "".join( - [ - match.group(1), - match.group(2) or ".0", - match.group(3) or "", - localversion, - ] + uploader = asyncio.create_task(asset_uploader(gh, upload_queue)) + + await fetch_kernel_tags(args.kernel_directory, kernel_tags) + + async for kernel_package in build_kernels( + args.kernel_directory, args.build_directory, arch, kernel_tags + ): + if args.dry_run: + logger.info("would upload %s", kernel_package) + else: + await upload_queue.put( + AssetUploadWork( + upload_url=github_release["upload_url"], + path=kernel_package, + name=kernel_package.name, + content_type="application/zstd", + ) ) - if release not in available: - to_build["v" + version] = None - except Exception: - logger.exception( - "failed to get kernel.org releases and/or available releases" - ) - sys.exit(1) - - if args.upload or args.upload_files or args.index: - uploader = Uploader(http_client, dropbox_token) - - for src_path, dst_path in args.upload_files or (): - uploader.queue_file(src_path, dst_path, autorename=False) - - if to_build: - logger.info("releases to build: %s", ", ".join(to_build)) - for kernel in to_build: - result = await try_build_kernel(kernel) - if result is None: - builds_failed.append(kernel) - continue - builds_succeeded.append(kernel) - release, tarball = result - if args.upload: - uploader.queue_file( - str(tarball), - f"/Public/x86_64/kernel-{release}.tar.zst", - autorename=False, - ) - if args.upload or args.upload_files: - succeeded, failed = await uploader.wait() - uploads_succeeded.extend(succeeded) - uploads_failed.extend(failed) - - if builds_succeeded: - logger.info("successfully built: %s", ", ".join(builds_succeeded)) - if builds_failed: - logger.error("builds failed: %s", ", ".join(builds_failed)) - if uploads_succeeded: - logger.info("successfully uploaded: %s", ", ".join(uploads_succeeded)) - if uploads_failed: - logger.info("uploads failed: %s", ", ".join(uploads_failed)) - - if builds_failed or uploads_failed: - logger.error("builds and/or uploads failed; exiting") - sys.exit(1) - - if args.index and not await update_index(http_client, dropbox_token, uploader): - sys.exit(1) + if not args.dry_run: + await upload_queue.put(None) + await upload_queue.join() + if not await uploader: + sys.exit("some uploads failed") if __name__ == "__main__": diff --git a/vmtest/vm.py b/vmtest/vm.py index eae3c4e7c..20a61d673 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -275,6 +275,8 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: parser.add_argument( "-d", "--directory", + metavar="DIR", + type=Path, default="build/vmtest", help="directory for build artifacts and downloaded kernels", ) @@ -303,16 +305,15 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: if kernel.startswith(".") or kernel.startswith("/"): kernel_dir = Path(kernel) else: - from vmtest.download import KernelDownloader + from vmtest.download import download_kernels - with KernelDownloader( - [getattr(args, "kernel", "*")], download_dir=Path(args.directory) - ) as downloader: - kernel_dir = next(iter(downloader)) + kernel_dir = next( + download_kernels(args.directory, "x86_64", getattr(args, "kernel", "*")) + ) try: command = " ".join(args.command) if args.command else '"$BUSYBOX" sh -i' - sys.exit(run_in_vm(command, kernel_dir, Path(args.directory))) + sys.exit(run_in_vm(command, kernel_dir, args.directory)) except LostVMError as e: print("error:", e, file=sys.stderr) sys.exit(args.lost_status) From ebca2d2f8a6b9531737eb68f993b7f54733cb552 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 00:30:34 -0700 Subject: [PATCH 21/56] CI: add action to build vmtest kernels weekly The previous commit mentioned this action but forgot to add it. Signed-off-by: Omar Sandoval --- .github/workflows/vmtest-build.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/vmtest-build.yml diff --git a/.github/workflows/vmtest-build.yml b/.github/workflows/vmtest-build.yml new file mode 100644 index 000000000..9b2bb62e1 --- /dev/null +++ b/.github/workflows/vmtest-build.yml @@ -0,0 +1,28 @@ +name: vmtest Build + +on: + schedule: + - cron: '16 6 * * MON' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libelf-dev + pip install aiohttp uritemplate + - name: Build and upload assets + run: python3 -m manage --kernel-directory build/vmtest/linux.git --build-directory build/vmtest/kbuild -K + - name: Upload kernel build logs + if: always() + uses: actions/upload-artifact@v2 + with: + name: kernel-build-logs + path: build/vmtest/kbuild/*.log + if-no-files-found: ignore From 47c8e0e53e43ce5b24be342ecb1dfdc79420fab4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 00:34:11 -0700 Subject: [PATCH 22/56] vmtest: fix vmtest kernel build command Signed-off-by: Omar Sandoval --- .github/workflows/vmtest-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vmtest-build.yml b/.github/workflows/vmtest-build.yml index 9b2bb62e1..449f3f6fd 100644 --- a/.github/workflows/vmtest-build.yml +++ b/.github/workflows/vmtest-build.yml @@ -18,7 +18,7 @@ jobs: sudo apt-get install libelf-dev pip install aiohttp uritemplate - name: Build and upload assets - run: python3 -m manage --kernel-directory build/vmtest/linux.git --build-directory build/vmtest/kbuild -K + run: python3 -m vmtest.manage --kernel-directory build/vmtest/linux.git --build-directory build/vmtest/kbuild -K - name: Upload kernel build logs if: always() uses: actions/upload-artifact@v2 From a1dc80bc381d5d18f49fbb6dba5944969b825a46 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 01:15:40 -0700 Subject: [PATCH 23/56] CI: reenable vmtest Now that the vmtest kernels have a new home on GitHub, reenable vmtest for the CI workflow. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9eb2fb625..017060bd7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: - name: Check with mypy run: mypy --strict --no-warn-return-any drgn _drgn.pyi - name: Build and test with ${{ matrix.cc }} - run: python setup.py test + run: python setup.py test -K lint: runs-on: ubuntu-latest From a15de13a7c091be7314359deb18c11d61af8f519 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 13:19:44 -0700 Subject: [PATCH 24/56] vmtest: handle exceptions when downloading in thread Otherwise, an exception causes setup.py test -K to hang. Signed-off-by: Omar Sandoval --- vmtest/download.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/vmtest/download.py b/vmtest/download.py index 3514408f5..4531fd886 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -14,7 +14,7 @@ import subprocess import tempfile import threading -from typing import Any, Dict, Iterator, Optional, Sequence +from typing import Any, Dict, Iterator, Sequence, Union from util import KernelVersion from vmtest.githubapi import GitHubApi @@ -136,25 +136,30 @@ def _download_kernels_thread( download_dir: Path, arch: str, kernels: Sequence[str], - q: "queue.Queue[Optional[Path]]", + q: "queue.Queue[Union[Path, Exception]]", ) -> None: - for kernel in download_kernels(download_dir, arch, kernels): - q.put(kernel) - q.put(None) + try: + it = download_kernels(download_dir, arch, kernels) + while True: + q.put(next(it)) + except Exception as e: + q.put(e) @contextmanager def download_kernels_in_thread( download_dir: Path, arch: str, kernels: Sequence[str] ) -> Iterator[Iterator[Path]]: - q: "queue.Queue[Optional[Path]]" = queue.Queue() + q: "queue.Queue[Union[Path, Exception]]" = queue.Queue() def aux() -> Iterator[Path]: while True: - kernel = q.get() - if kernel is None: + obj = q.get() + if isinstance(obj, StopIteration): break - yield kernel + elif isinstance(obj, Exception): + raise obj + yield obj thread = None try: From abeea40b8a5281ede0e68e2c8d38138e0cf0f31a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 13:40:25 -0700 Subject: [PATCH 25/56] vmtest: fix cleanup of temporary directory on Python < 3.8 In Python before 3.8, tempfile.TemporaryDirectory.cleanup() fails when the directory doesn't exist. Since we rename the temporary download directory to its final name, this always fails. Switch to using tempfile.mkdtemp() directly instead. Signed-off-by: Omar Sandoval --- vmtest/download.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vmtest/download.py b/vmtest/download.py index 4531fd886..8565d7619 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -38,8 +38,8 @@ def available_kernel_releases( def _download_kernel(gh: GitHubApi, url: str, dir: Path) -> None: dir.parent.mkdir(parents=True, exist_ok=True) - with tempfile.TemporaryDirectory(dir=dir.parent) as tmp_name: - tmp_dir = Path(tmp_name) + tmp_dir = Path(tempfile.mkdtemp(dir=dir.parent)) + try: # Don't assume that the available version of tar has zstd support or # the non-standard -I/--use-compress-program option. with subprocess.Popen( @@ -59,7 +59,10 @@ def _download_kernel(gh: GitHubApi, url: str, dir: Path) -> None: raise subprocess.CalledProcessError(zstd_proc.returncode, zstd_proc.args) if tar_proc.returncode != 0: raise subprocess.CalledProcessError(tar_proc.returncode, tar_proc.args) - + except: + shutil.rmtree(tmp_dir, ignore_errors=True) + raise + else: tmp_dir.rename(dir) From 47ab6142d54e43311dd6d556be8a293996eea7f2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 May 2021 01:35:27 -0700 Subject: [PATCH 26/56] vmtest: fix vmtest.vm -k The argument to download_kernels() is an iterable of kernels, but we're passing it a single kernel string. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vmtest/vm.py b/vmtest/vm.py index 20a61d673..c3fe15c78 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -307,9 +307,7 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: else: from vmtest.download import download_kernels - kernel_dir = next( - download_kernels(args.directory, "x86_64", getattr(args, "kernel", "*")) - ) + kernel_dir = next(download_kernels(args.directory, "x86_64", (kernel,))) try: command = " ".join(args.command) if args.command else '"$BUSYBOX" sh -i' From 1cc3868955b07c8a99426660466113b4dc55d5e4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 May 2021 01:43:46 -0700 Subject: [PATCH 27/56] CI: temporarily disable Clang Clang tests are hitting this assertion in libomp (libomp5-10 1:10.0.0-4ubuntu1): Assertion failure at kmp_tasking.cpp(501): taskdata->td_flags.tasktype == 1. OMP: Error #13: Assertion failure at kmp_tasking.cpp(501). OMP: Hint Please submit a bug report with this message, compile and run commands used, and machine configuration info including native compiler and operating system versions. Faster response will be obtained by including all program sources. For information on submitting this issue, please see https://bugs.llvm.org/. Disable Clang builds until we figure out what's going on. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 017060bd7..f4627b7c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: python-version: [3.9, 3.8, 3.7, 3.6] - cc: [gcc, clang] + cc: [gcc] fail-fast: false env: CC: ${{ matrix.cc }} From e0921c5bdbef661aab6dc06f3d6740af78d27c43 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 May 2021 14:52:24 -0700 Subject: [PATCH 28/56] libdrgn: don't use OpenMP tasking libomp (at least in LLVM 9 and 10) seems to have buggy OpenMP tasking support. See commit 1cc3868955b0 ("CI: temporarily disable Clang") for one example. OpenMP tasks aren't buying us much; they simplify DWARF index updates in some places but complicate it in others. Let's ditch tasks and go back to building an array of CUs to index similar to what we did before commit f83bb7c71bb8 ("libdrgn: move debugging information tracking into drgn_debug_info"). There is no significant performance difference. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 39 +++++---- libdrgn/dwarf_index.c | 195 +++++++++++++++++++++++++----------------- libdrgn/dwarf_index.h | 87 +++++++------------ 3 files changed, 167 insertions(+), 154 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 23266dab8..161baa536 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -981,8 +981,8 @@ drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, continue; } module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - drgn_dwarf_index_read_module(dindex_state, module); - return NULL; + return drgn_dwarf_index_read_module(dindex_state, + module); } } /* @@ -1023,29 +1023,32 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) c_string_set_size(&dbinfo->module_names) + load->new_modules.size)) return &drgn_enomem; + struct drgn_dwarf_index_update_state dindex_state; - drgn_dwarf_index_update_begin(&dindex_state, &dbinfo->dindex); - /* - * In OpenMP 5.0, this could be "#pragma omp parallel master taskloop" - * (added in GCC 9 and Clang 10). - */ - #pragma omp parallel - #pragma omp master - #pragma omp taskloop + if (!drgn_dwarf_index_update_state_init(&dindex_state, &dbinfo->dindex)) + return &drgn_enomem; + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) for (size_t i = 0; i < load->new_modules.size; i++) { - if (drgn_dwarf_index_update_cancelled(&dindex_state)) + if (err) continue; struct drgn_error *module_err = drgn_debug_info_read_module(load, &dindex_state, load->new_modules.data[i]); - if (module_err) - drgn_dwarf_index_update_cancel(&dindex_state, module_err); + if (module_err) { + #pragma omp critical(drgn_debug_info_update_index_error) + if (err) + drgn_error_destroy(module_err); + else + err = module_err; + } } - struct drgn_error *err = drgn_dwarf_index_update_end(&dindex_state); - if (err) - return err; - drgn_debug_info_free_modules(dbinfo, true, false); - return NULL; + if (!err) + err = drgn_dwarf_index_update(&dindex_state); + drgn_dwarf_index_update_state_deinit(&dindex_state); + if (!err) + drgn_debug_info_free_modules(dbinfo, true, false); + return err; } struct drgn_error * diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 3d52630df..5308f6ca6 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -19,6 +19,16 @@ #include "siphash.h" #include "util.h" +struct drgn_dwarf_index_pending_cu { + struct drgn_debug_info_module *module; + const char *buf; + size_t len; + bool is_64_bit; + enum drgn_debug_info_scn scn; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) + /* * The DWARF abbreviation table gets translated into a series of instructions. * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped @@ -220,22 +230,26 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) drgn_dwarf_index_namespace_deinit(&dindex->global); } -void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, +bool +drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, struct drgn_dwarf_index *dindex) { state->dindex = dindex; - state->old_cus_size = dindex->cus.size; - state->err = NULL; + state->max_threads = omp_get_max_threads(); + state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); + if (!state->cus) + return false; + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); + return true; } -void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, - struct drgn_error *err) +void +drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state) { - #pragma omp critical(drgn_dwarf_index_update_cancel) - if (state->err) - drgn_error_destroy(err); - else - state->err = err; + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); + free(state->cus); } static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, @@ -1144,81 +1158,62 @@ indirect_insn:; return NULL; } -static void drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn) +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) { + struct drgn_dwarf_index_pending_cu_vector *cus = + &state->cus[omp_get_thread_num()]; + struct drgn_error *err; struct drgn_debug_info_buffer buffer; drgn_debug_info_buffer_init(&buffer, module, scn); while (binary_buffer_has_next(&buffer.bb)) { - const char *cu_buf = buffer.bb.pos; + struct drgn_dwarf_index_pending_cu *cu = + drgn_dwarf_index_pending_cu_vector_append_entry(cus); + if (!cu) + return &drgn_enomem; + cu->module = module; + cu->buf = buffer.bb.pos; uint32_t unit_length32; if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) - goto err; - bool is_64_bit = unit_length32 == UINT32_C(0xffffffff); - if (is_64_bit) { + return err; + cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); + if (cu->is_64_bit) { uint64_t unit_length64; if ((err = binary_buffer_next_u64(&buffer.bb, &unit_length64))) - goto err; + return err; if (unit_length64 > SIZE_MAX) { - err = binary_buffer_error(&buffer.bb, - "unit length is too large"); - goto err; + return binary_buffer_error(&buffer.bb, + "unit length is too large"); } if ((err = binary_buffer_skip(&buffer.bb, unit_length64))) - goto err; + return err; } else { if ((err = binary_buffer_skip(&buffer.bb, unit_length32))) - goto err; - } - size_t cu_len = buffer.bb.pos - cu_buf; - - #pragma omp task - { - struct drgn_dwarf_index_cu cu = { - .module = module, - .buf = cu_buf, - .len = cu_len, - .is_64_bit = is_64_bit, - .is_type_unit = scn == DRGN_SCN_DEBUG_TYPES, - }; - struct drgn_dwarf_index_cu_buffer cu_buffer; - drgn_dwarf_index_cu_buffer_init(&cu_buffer, &cu); - struct drgn_error *cu_err = read_cu(&cu_buffer); - if (cu_err) - goto cu_err; - - cu_err = index_cu_first_pass(state->dindex, &cu_buffer); - if (cu_err) - goto cu_err; - - #pragma omp critical(drgn_dwarf_index_cus) - if (!drgn_dwarf_index_cu_vector_append(&state->dindex->cus, - &cu)) - cu_err = &drgn_enomem; - if (cu_err) { -cu_err: - drgn_dwarf_index_cu_deinit(&cu); - drgn_dwarf_index_update_cancel(state, cu_err); - } + return err; } + cu->len = buffer.bb.pos - cu->buf; + cu->scn = scn; } - return; - -err: - drgn_dwarf_index_update_cancel(state, err); + return NULL; } -void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module) +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module) { - drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); - if (module->scn_data[DRGN_SCN_DEBUG_TYPES]) - drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_TYPES); + struct drgn_error *err; + err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); + if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(state, module, + DRGN_SCN_DEBUG_TYPES); + } + return err; } bool @@ -1691,16 +1686,56 @@ static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) } struct drgn_error * -drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) +drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) { struct drgn_dwarf_index *dindex = state->dindex; - if (state->err) + size_t old_cus_size = dindex->cus.size; + size_t new_cus_size = old_cus_size; + for (size_t i = 0; i < state->max_threads; i++) + new_cus_size += state->cus[i].size; + if (!drgn_dwarf_index_cu_vector_reserve(&dindex->cus, new_cus_size)) + return &drgn_enomem; + for (size_t i = 0; i < state->max_threads; i++) { + for (size_t j = 0; j < state->cus[i].size; j++) { + struct drgn_dwarf_index_pending_cu *pending_cu = + &state->cus[i].data[j]; + dindex->cus.data[dindex->cus.size++] = (struct drgn_dwarf_index_cu){ + .module = pending_cu->module, + .buf = pending_cu->buf, + .len = pending_cu->len, + .is_64_bit = pending_cu->is_64_bit, + .is_type_unit = + pending_cu->scn == DRGN_SCN_DEBUG_TYPES, + }; + } + } + + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; + struct drgn_dwarf_index_cu_buffer cu_buffer; + drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); + struct drgn_error *cu_err = read_cu(&cu_buffer); + if (!cu_err) + cu_err = index_cu_first_pass(state->dindex, &cu_buffer); + if (cu_err) { + #pragma omp critical(drgn_dwarf_index_update_end_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + if (err) goto err; #pragma omp parallel for schedule(dynamic) - for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) { - if (drgn_dwarf_index_update_cancelled(state)) + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { + if (err) continue; struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; struct drgn_dwarf_index_cu_buffer buffer; @@ -1710,20 +1745,22 @@ drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) buffer.bb.pos += cu->is_64_bit ? 16 : 12; struct drgn_error *cu_err = index_cu_second_pass(&dindex->global, &buffer); - if (cu_err) - drgn_dwarf_index_update_cancel(state, cu_err); - } - if (state->err) { - drgn_dwarf_index_rollback(state->dindex); - goto err; + if (cu_err) { + #pragma omp critical(drgn_dwarf_index_update_end_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } } - return NULL; - + if (err) { + drgn_dwarf_index_rollback(dindex); err: - for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - dindex->cus.size = state->old_cus_size; - return state->err; + for (size_t i = old_cus_size; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + dindex->cus.size = old_cus_size; + } + return err; } static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 116c8273a..ce3f65193 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -25,6 +25,14 @@ typedef struct {} omp_lock_t; #define omp_destroy_lock(lock) do {} while (0) #define omp_set_lock(lock) do {} while (0) #define omp_unset_lock(lock) do {} while (0) +static inline int omp_get_thread_num(void) +{ + return 0; +} +static inline int omp_get_max_threads(void) +{ + return 1; +} #endif #include "hash_table.h" @@ -183,78 +191,43 @@ void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); */ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, + struct drgn_dwarf_index_pending_cu) + /** State tracked while updating a @ref drgn_dwarf_index. */ struct drgn_dwarf_index_update_state { struct drgn_dwarf_index *dindex; - size_t old_cus_size; - struct drgn_error *err; + /** Per-thread arrays of CUs to be indexed. */ + struct drgn_dwarf_index_pending_cu_vector *cus; + size_t max_threads; }; /** - * Prepare to update a @ref drgn_dwarf_index. + * Initialize state for updating a @ref drgn_dwarf_index. * - * @param[out] state Initialized update state. Must be passed to @ref - * drgn_dwarf_index_update_end(). + * @return @c true on success, @c false on failure to allocate memory. */ -void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, +bool +drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, struct drgn_dwarf_index *dindex); -/** - * Finish updating a @ref drgn_dwarf_index. - * - * This should be called once all of the tasks created by @ref - * drgn_dwarf_index_read_module() have completed (even if the update was - * cancelled). - * - * If the update was not cancelled, this finishes indexing all modules reported - * by @ref drgn_dwarf_index_read_module(). If it was cancelled or there is an - * error while indexing, this rolls back the index and removes the newly - * reported modules. - * - * @return @c NULL on success, non-@c NULL if the update was cancelled or there - * was another error. - */ -struct drgn_error * -drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state); - -/** - * Cancel an update of a @ref drgn_dwarf_index. - * - * This should be called if there is a fatal error and the update must be - * aborted. - * - * @param[in] err Error to report. This will be returned from @ref - * drgn_dwarf_index_update_end(). If an error has already been reported, this - * error is destroyed. - */ -void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, - struct drgn_error *err); +/** Deinitialize state for updating a @ref drgn_dwarf_index. */ +void +drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state); -/** - * Return whether an update of a @ref drgn_dwarf_index has been cancelled by - * @ref drgn_dwarf_index_update_cancel(). - * - * Because updating is parallelized, this allows tasks other than the one that - * encountered the error to "fail fast". - */ -static inline bool -drgn_dwarf_index_update_cancelled(struct drgn_dwarf_index_update_state *state) -{ - /* - * No need for omp critical/omp atomic since this is a best-effort - * optimization. - */ - return state->err != NULL; -} +/** Read a module for updating a @ref drgn_dwarf_index. */ +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module); /** - * Read a module for updating a @ref drgn_dwarf_index. + * Update a @ref drgn_dwarf_index. * - * This creates OpenMP tasks to begin indexing the module. It may cancel the - * update. + * This should be called once all modules have been read with @ref + * drgn_dwarf_index_read_module() to finish indexing those modules. */ -void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module); +struct drgn_error * +drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state); /** * Iterator over DWARF debugging information. From fcb46d5ab80571186c0f80b6bd9e64c7995caf9d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 May 2021 16:56:42 -0700 Subject: [PATCH 29/56] Revert "CI: temporarily disable Clang" This reverts commit 1cc3868955b07c8a99426660466113b4dc55d5e4. The previous commit (hopefully) works around the libomp bugs that caused Clang builds to fail. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4627b7c6..017060bd7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: python-version: [3.9, 3.8, 3.7, 3.6] - cc: [gcc] + cc: [gcc, clang] fail-fast: false env: CC: ${{ matrix.cc }} From 6a8d335a1fbe4fe135f153e62d30e3a5d191a58e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 May 2021 09:50:13 -0700 Subject: [PATCH 30/56] Add scripts to build manylinux2010 wheels One nice side effect of commit e0921c5bdbef ("libdrgn: don't use OpenMP tasking") is that drgn now works with older versions of libgomp that don't implement taskloop, including version 4.4 in manylinux2010. So, we can finally build manylinux2010 wheels. These scripts are based on scripts from Stephen Brennan, with some cleanups and updates for changes in drgn's build requirements. Closes #69. Signed-off-by: Omar Sandoval --- scripts/build_dists.sh | 21 +++++++ scripts/build_manylinux_in_docker.sh | 83 ++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100755 scripts/build_dists.sh create mode 100755 scripts/build_manylinux_in_docker.sh diff --git a/scripts/build_dists.sh b/scripts/build_dists.sh new file mode 100755 index 000000000..54d660356 --- /dev/null +++ b/scripts/build_dists.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +set -eux + +: "${PYTHON=python3}" +"$PYTHON" setup.py sdist +SDIST=dist/drgn-"$("$PYTHON" setup.py --version)".tar.gz + +: "${DOCKER=docker}" +$DOCKER pull quay.io/pypa/manylinux2010_x86_64 +$DOCKER run -it \ + --env PLAT=manylinux2010_x86_64 \ + --env SDIST="$SDIST" \ + --env OWNER="$(id -u):$(id -g)" \ + --volume "$(pwd)":/io:ro \ + --volume "$(pwd)/dist":/io/dist \ + --workdir /io \ + --hostname drgn \ + --rm \ + quay.io/pypa/manylinux2010_x86_64 \ + ./scripts/build_manylinux_in_docker.sh diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh new file mode 100755 index 000000000..e001b6c55 --- /dev/null +++ b/scripts/build_manylinux_in_docker.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +set -eux + +# Drop into a shell if something fails. +trap 'if [ $? -ne 0 ]; then exec bash -i; fi' EXIT + +yum install -y \ + bzip2-devel \ + libzstd-devel \ + lzo-devel \ + snappy-devel \ + xz-devel \ + zlib-devel + +# The manylinux image contains an upgraded autotools in /usr/local, but the +# pkg-config macros are not present for this upgraded package. See +# https://github.com/pypa/manylinux/issues/731. +ln -s /usr/share/aclocal/pkg.m4 /usr/local/share/aclocal/ + +# Install a recent version of elfutils instead of whatever is in the manylinux +# image. +elfutils_version=0.183 +elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 +mkdir /tmp/elfutils +cd /tmp/elfutils +curl -L "$elfutils_url" | tar -xj --strip-components=1 +# We don't bother with debuginfod support for a few reasons: +# +# 1. It depends on libcurl, which would pull in a bunch of transitive +# dependencies. +# 2. libdw loads libdebuginfod with dlopen(), which auditwheel misses. +# 3. drgn hasn't been tested with debuginfod. +./configure --disable-libdebuginfod --disable-debuginfod +make -j$(($(nproc) + 1)) +make install + +libkdumpfile_commit=v0.4.0 +libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/archive/$libkdumpfile_commit/libkdumpfile-$libkdumpfile_commit.tar.gz +mkdir /tmp/libkdumpfile +cd /tmp/libkdumpfile +curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 +autoreconf -fiv +# z_const was added in zlib 1.2.5.2, but CentOS 6 has 1.2.3. +CPPFLAGS="-Dz_const=const" ./configure --with-lzo --with-snappy --with-zlib --without-python +make -j$(($(nproc) + 1)) +make install + +ldconfig + +mkdir /tmp/drgn +cd /tmp/drgn +tar -xf "/io/$SDIST" --strip-components=1 + +python_supported() { + "$1" -c 'import sys; sys.exit(sys.version_info < (3, 6))' +} + +for pybin in /opt/python/*/bin; do + if python_supported "$pybin/python"; then + # static_assert was added to assert.h in glibc 2.16, but CentOS + # 6 has 2.12. + CPPFLAGS="-Dstatic_assert=_Static_assert" "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ + fi +done + +for wheel in /tmp/wheels/*.whl; do + if auditwheel show "$wheel"; then + auditwheel repair "$wheel" --plat "$PLAT" -w /tmp/manylinux_wheels/ + else + echo "Skipping non-platform wheel $wheel" + fi +done + +for pybin in /opt/python/*/bin; do + if python_supported "$pybin/python"; then + "$pybin/pip" install drgn --no-index -f /tmp/manylinux_wheels/ + "$pybin/drgn" --version + fi +done + +chown "$OWNER" /tmp/manylinux_wheels/* +mv /tmp/manylinux_wheels/* /io/dist/ From 92fd967a3ab61f15a0d25e3825644e5ac2e69bd6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 May 2021 15:32:15 -0700 Subject: [PATCH 31/56] libdrgn: print uint8_t as hex with PRIx8 format, not x In practice, they're probably always the same, but PRIx8 is more correct. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 161baa536..b3cccf4cf 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1604,7 +1604,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, */ default: return binary_buffer_error(&expr->bb, - "unknown DWARF expression opcode %#x", + "unknown DWARF expression opcode %#" PRIx8, opcode); } } @@ -3355,7 +3355,8 @@ drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, if (encoding & DW_EH_PE_indirect) { unknown_fde_encoding: return binary_buffer_error(&buffer->bb, - "unknown EH encoding %#x", encoding); + "unknown EH encoding %#" PRIx8, + encoding); } size_t pos = (buffer->bb.pos - @@ -4201,7 +4202,7 @@ drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, if (!initial_row) { invalid_for_initial: err = binary_buffer_error(&buffer.bb, - "invalid initial DWARF CFI opcode %#x", + "invalid initial DWARF CFI opcode %#" PRIx8, opcode); goto out; } @@ -4246,7 +4247,7 @@ drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, break; default: err = binary_buffer_error(&buffer.bb, - "unknown DWARF CFI opcode %#x", + "unknown DWARF CFI opcode %#" PRIx8, opcode); goto out; } From 68fae425b619faceb047035fff4b4e7e42f7d6ce Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 May 2021 10:50:59 -0700 Subject: [PATCH 32/56] vmtest: manage: get mainline releases from mainline linux repository I incorrectly assumed that the stable repo would have the mainline tags soon after they were released, but this is not the case. We also need to check the mainline repo for tags and fetch them from the mainline repo. Signed-off-by: Omar Sandoval --- vmtest/manage.py | 63 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/vmtest/manage.py b/vmtest/manage.py index ed7ff1cb0..daddf592b 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -3,6 +3,7 @@ import argparse import asyncio +import itertools import logging import os from pathlib import Path @@ -32,20 +33,29 @@ (KernelVersion("5.5~"), KernelVersion("5.10")), ) -# Use the GitHub mirror rather than the official kernel.org repository since +# Use the GitHub mirrors rather than the official kernel.org repositories since # this script usually runs in GitHub Actions. +LINUX_GIT_URL = "https://github.com/torvalds/linux.git" STABLE_LINUX_GIT_URL = "https://github.com/gregkh/linux.git" async def get_latest_kernel_tags() -> List[str]: - ls_remote = ( - await check_output("git", "ls-remote", "--tags", "--refs", STABLE_LINUX_GIT_URL) - ).decode() + mainline_refs, stable_refs = await asyncio.gather( + check_output("git", "ls-remote", "--tags", "--refs", LINUX_GIT_URL), + check_output("git", "ls-remote", "--tags", "--refs", STABLE_LINUX_GIT_URL), + ) latest: Dict[str, KernelVersion] = {} - for match in re.finditer( - r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(-rc[0-9]+|\.[0-9]+)?$", - ls_remote, - re.M, + for match in itertools.chain( + re.finditer( + r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(-rc[0-9]+)?$", + mainline_refs.decode(), + re.M, + ), + re.finditer( + r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(\.[0-9]+)$", + stable_refs.decode(), + re.M, + ), ): version = KernelVersion(match.group(1) + (match.group(2) or "")) for start_version, end_version in IGNORE_KERNEL_RANGES: @@ -69,22 +79,35 @@ def kernel_tag_to_release(tag: str) -> str: ) -async def fetch_kernel_tags(kernel_dir: Path, tags: Sequence[str]) -> None: +async def fetch_kernel_tags(kernel_dir: Path, kernel_tags: Sequence[str]) -> None: if not kernel_dir.exists(): logger.info("creating kernel repository in %s", kernel_dir) await check_call("git", "init", "-q", str(kernel_dir)) - logger.info("fetching kernel tags: %s", ", ".join(tags)) - await check_call( - "git", - "-C", - str(kernel_dir), - "fetch", - "--depth", - "1", - STABLE_LINUX_GIT_URL, - *(f"refs/tags/{tag}:refs/tags/{tag}" for tag in tags), - ) + mainline_tags = [] + stable_tags = [] + for tag in kernel_tags: + if re.fullmatch("v[0-9]+\.[0-9]+\.[0-9]+", tag): + stable_tags.append(tag) + else: + mainline_tags.append(tag) + + for (name, url, tags) in ( + ("mainline", LINUX_GIT_URL, mainline_tags), + ("stable", STABLE_LINUX_GIT_URL, stable_tags), + ): + if tags: + logger.info("fetching %s kernel tags: %s", name, ", ".join(tags)) + await check_call( + "git", + "-C", + str(kernel_dir), + "fetch", + "--depth", + "1", + url, + *(f"refs/tags/{tag}:refs/tags/{tag}" for tag in tags), + ) async def build_kernels( From e7865e2b09b15c070b520e16da093a2d58bd3454 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 May 2021 11:15:46 -0700 Subject: [PATCH 33/56] setup.py: add 5.13 to vmtest kernels Signed-off-by: Omar Sandoval --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3b610ae5b..d60253092 100755 --- a/setup.py +++ b/setup.py @@ -125,7 +125,7 @@ def make_release_tree(self, base_dir, files): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = ["5.13", "5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( From 179b33c76f82d0b6109c3eb3299b5e9d85b96d04 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 11 May 2021 00:33:25 -0700 Subject: [PATCH 34/56] python: fix comment in execscript() The runpy.run_code() function mentioned in the comment doesn't exist; execscript() is based on runpy.run_path(). Signed-off-by: Omar Sandoval --- drgn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drgn/__init__.py b/drgn/__init__.py index f3998cc0c..48cad2650 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -195,7 +195,7 @@ def task_exe_path(task): :param args: Zero or more additional arguments to pass to the script. This is a :ref:`variable argument list `. """ - # This is based on runpy.run_code, which we can't use because we want to + # This is based on runpy.run_path(), which we can't use because we want to # update globals even if the script throws an exception. saved_module = [] try: From ad37c79cba4f7bbf06316faf2dd930e0453ffabe Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 12 May 2021 16:19:42 -0700 Subject: [PATCH 35/56] libdrgn: python: add documentation and type annotation for Program.__contains__() drgn.Program has supported the "in" operator since commit 25e7a9d3b802 ("libdrgn/python: implement Program.__contains__"), but it's undocumented and unannotated. Add a type annotation with a docstring along with a METH_COEXIST method. Signed-off-by: Omar Sandoval --- _drgn.pyi | 10 +++++++++- libdrgn/python/program.c | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 56d947783..3b4b516cf 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -105,7 +105,15 @@ class Program: >>> prog['jiffies'] Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) - :param name: The object name. + :param name: Object name. + """ + ... + def __contains__(self, name: str) -> bool: + """ + Implement ``name in self``. Return whether an object (variable, + constant, or function) with the given name exists in the program. + + :param name: Object name. """ ... def variable(self, name: str, filename: Optional[str] = None) -> Object: diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index c28d32f8a..453edf628 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -879,6 +879,8 @@ static PyMethodDef Program_methods[] = { drgn_Program_load_default_debug_info_DOC}, {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, drgn_Program___getitem___DOC}, + {"__contains__", (PyCFunction)Program_contains, METH_O | METH_COEXIST, + drgn_Program___contains___DOC}, {"read", (PyCFunction)Program_read, METH_VARARGS | METH_KEYWORDS, drgn_Program_read_DOC}, #define METHOD_DEF_READ(x) \ @@ -950,7 +952,6 @@ static PyMappingMethods Program_as_mapping = { .mp_subscript = (binaryfunc)Program_subscript, }; - static PySequenceMethods Program_as_sequence = { .sq_contains = (objobjproc)Program_contains, }; From 43b90ffb1b2ec50b9b00cdec4df9e5eb11b4d547 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 23 May 2021 11:45:03 -0700 Subject: [PATCH 36/56] libdrgn: debug_info: add missing stack size check for DW_OP_deref Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index b3cccf4cf..2bd55ba58 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1387,7 +1387,6 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, deref_size = address_size; goto deref; case DW_OP_deref_size: - CHECK(1); if ((err = binary_buffer_next_u8(&expr->bb, &deref_size))) return err; @@ -1397,6 +1396,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, } deref: { + CHECK(1); char deref_buf[8]; err = drgn_program_read_memory(prog, deref_buf, ELEM(0), deref_size, false); From dd0885bacd2dad20ccf889f4af0ecb437db4bfeb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 1 Jun 2021 16:34:02 -0700 Subject: [PATCH 37/56] CI: install correct version of libomp actions/virtual-environments@15a610677be406d250c1f6732b03c8b87e693a0a changed the default version of Clang from 10 to 11, but `apt-get install libomp-dev` still installs libomp-10-dev. Ideally, the correct version of libomp would already be pre-installed (see actions/virtual-environments#3506), but for now make sure we install the correct version. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 017060bd7..041ab32a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-dev' || '' }} + sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} pip install mypy - name: Generate version.py run: python setup.py --version From e5ff1ea7ac4e1d7086eebf0c9f1d53c78455feec Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 3 Jun 2021 16:58:45 -0700 Subject: [PATCH 38/56] libdrgn: program: use preset platform in drgn_program_set_core_dump() If the program already had a platform set, we should its callbacks instead of the ones from the ELF file's platform. Signed-off-by: Omar Sandoval --- libdrgn/program.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/libdrgn/program.c b/libdrgn/program.c index 2694428d4..0a09ceadc 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -191,7 +191,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) { struct drgn_error *err; GElf_Ehdr ehdr_mem, *ehdr; - struct drgn_platform platform; + bool had_platform; bool is_64_bit, is_kdump; size_t phnum, i; size_t num_file_segments, j; @@ -232,13 +232,17 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) "not an ELF core file"); goto out_elf; } - - drgn_platform_from_elf(ehdr, &platform); + had_platform = prog->has_platform; + if (!had_platform) { + struct drgn_platform platform; + drgn_platform_from_elf(ehdr, &platform); + drgn_program_set_platform(prog, &platform); + } is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; if (elf_getphdrnum(prog->core, &phnum) != 0) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } /* @@ -252,7 +256,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phdr = gelf_getphdr(prog->core, i, &phdr_mem); if (!phdr) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } if (phdr->p_type == PT_LOAD) { @@ -270,7 +274,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) note_header_type(phdr)); if (!data) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } offset = 0; @@ -310,7 +314,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (fstatfs(prog->core_fd, &fs) == -1) { err = drgn_error_create_os("fstatfs", errno, path); if (err) - goto out_elf; + goto out_platform; } is_proc_kcore = fs.f_type == 0x9fa0; /* PROC_SUPER_MAGIC */ } else { @@ -325,7 +329,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (env && atoi(env)) { err = drgn_program_set_kdump(prog); if (err) - goto out_elf; + goto out_platform; return NULL; } } @@ -334,11 +338,11 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) sizeof(*prog->file_segments)); if (!prog->file_segments) { err = &drgn_enomem; - goto out_elf; + goto out_platform; } if ((is_proc_kcore || vmcoreinfo_note) && - platform.arch->linux_kernel_pgtable_iterator_next) { + prog->platform.arch->linux_kernel_pgtable_iterator_next) { /* * Try to read any memory that isn't in the core dump via the * page table. @@ -397,12 +401,11 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) * pass, as we may need to read virtual memory to determine the mapping. */ if (is_proc_kcore && !have_phys_addrs && - platform.arch->linux_kernel_live_direct_mapping_fallback) { + prog->platform.arch->linux_kernel_live_direct_mapping_fallback) { uint64_t direct_mapping, direct_mapping_size; - - err = platform.arch->linux_kernel_live_direct_mapping_fallback(prog, - &direct_mapping, - &direct_mapping_size); + err = prog->platform.arch->linux_kernel_live_direct_mapping_fallback(prog, + &direct_mapping, + &direct_mapping_size); if (err) goto out_segments; @@ -467,7 +470,6 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) prog->lang = &drgn_language_c; } - drgn_program_set_platform(prog, &platform); return NULL; out_segments: @@ -475,6 +477,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) drgn_memory_reader_init(&prog->reader); free(prog->file_segments); prog->file_segments = NULL; +out_platform: + prog->has_platform = had_platform; out_elf: elf_end(prog->core); prog->core = NULL; From 0e3054a0ba2c14bcbd5026ada965700801619939 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 3 Jun 2021 17:43:27 -0700 Subject: [PATCH 39/56] libdrgn: make addresses wrap around when reading memory Define that addresses for memory reads wrap around after the maximum address rather than the current unpredictable behavior. This is done by: 1. Reworking drgn_memory_reader to work with an inclusive address range so that a segment can contain UINT64_MAX. drgn_memory_reader remains agnostic to the maximum address and requires that address ranges do not overflow a uint64_t. 2. Adding the overflow/wrap-around logic to drgn_program_add_memory_segment() and drgn_program_read_memory(). 3. Changing direct uses of drgn_memory_reader_reader() to drgn_program_read_memory() now that they are no longer equivalent. (For some platforms, a fault might be more appropriate than wrapping around, but this is a step in the right direction.) Signed-off-by: Omar Sandoval --- libdrgn/drgn.h.in | 3 +- libdrgn/language_c.c | 12 +++-- libdrgn/linux_kernel.c | 8 ++-- libdrgn/linux_kernel.h | 4 +- libdrgn/memory_reader.c | 97 +++++++++++++++++------------------------ libdrgn/memory_reader.h | 34 ++++++++++----- libdrgn/object.c | 10 ++--- libdrgn/program.c | 68 ++++++++++++++++++++--------- tests/test_program.py | 35 +++++++++++---- 9 files changed, 153 insertions(+), 118 deletions(-) diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 89f642051..db19a2251 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -497,7 +497,8 @@ typedef struct drgn_error *(*drgn_memory_read_fn)(void *buf, uint64_t address, * Register a segment of memory in a @ref drgn_program. * * If the segment overlaps a previously registered segment, the new segment - * takes precedence. + * takes precedence. If any part of the segment is beyond the maximum address, + * that part is ignored. * * @param[in] address Address of the segment. * @param[in] size Size of the segment in bytes. diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 732b31f10..0a29715bc 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -15,7 +15,6 @@ #include "hash_table.h" #include "language.h" // IWYU pragma: associated #include "lexer.h" -#include "memory_reader.h" #include "minmax.h" #include "object.h" #include "program.h" @@ -653,8 +652,8 @@ c_format_character(unsigned char c, bool escape_single_quote, } static struct drgn_error * -c_format_string(struct drgn_memory_reader *reader, uint64_t address, - uint64_t length, struct string_builder *sb) +c_format_string(struct drgn_program *prog, uint64_t address, uint64_t length, + struct string_builder *sb) { struct drgn_error *err; @@ -662,8 +661,7 @@ c_format_string(struct drgn_memory_reader *reader, uint64_t address, return &drgn_enomem; while (length) { unsigned char c; - - err = drgn_memory_reader_read(reader, &c, address++, 1, false); + err = drgn_program_read_memory(prog, &c, address++, 1, false); if (err) return err; @@ -1318,7 +1316,7 @@ c_format_pointer_object(const struct drgn_object *obj, return &drgn_enomem; if (c_string) { - err = c_format_string(&drgn_object_program(obj)->reader, uvalue, + err = c_format_string(drgn_object_program(obj), uvalue, UINT64_MAX, sb); } else { struct drgn_object dereferenced; @@ -1474,7 +1472,7 @@ c_format_array_object(const struct drgn_object *obj, return NULL; } case DRGN_OBJECT_REFERENCE: - return c_format_string(&drgn_object_program(obj)->reader, + return c_format_string(drgn_object_program(obj), obj->address, iter.length, sb); case DRGN_OBJECT_ABSENT: ) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index e353b695c..769c26b5c 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -24,7 +24,6 @@ #include "helpers.h" #include "language.h" #include "linux_kernel.h" -#include "memory_reader.h" #include "platform.h" #include "program.h" #include "type.h" @@ -195,8 +194,7 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, * we can read from the physical address of the vmcoreinfo note exported in * sysfs. */ -struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, - struct vmcoreinfo *ret) +struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog) { struct drgn_error *err; FILE *file; @@ -221,7 +219,7 @@ struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, if (!buf) return &drgn_enomem; - err = drgn_memory_reader_read(reader, buf, address, size, true); + err = drgn_program_read_memory(prog, buf, address, size, true); if (err) goto out; @@ -239,7 +237,7 @@ struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, goto out; } - err = parse_vmcoreinfo(buf + 24, nhdr->n_descsz, ret); + err = parse_vmcoreinfo(buf + 24, nhdr->n_descsz, &prog->vmcoreinfo); out: free(buf); return err; diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 936155fc3..62bead4c4 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -7,7 +7,6 @@ #include "drgn.h" struct drgn_debug_info_load_state; -struct drgn_memory_reader; struct vmcoreinfo; struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, @@ -20,8 +19,7 @@ struct drgn_error *parse_vmcoreinfo(const char *desc, size_t descsz, struct drgn_error *proc_kallsyms_symbol_addr(const char *name, unsigned long *ret); -struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, - struct vmcoreinfo *ret); +struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, const char *filename, diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 8ac6e09f8..18ad93b62 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later +#include #include #include #include @@ -45,25 +46,15 @@ bool drgn_memory_reader_empty(struct drgn_memory_reader *reader) struct drgn_error * drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, - uint64_t address, uint64_t size, + uint64_t min_address, uint64_t max_address, drgn_memory_read_fn read_fn, void *arg, bool physical) { + assert(min_address <= max_address); + struct drgn_memory_segment_tree *tree = (physical ? &reader->physical_segments : &reader->virtual_segments); - struct drgn_memory_segment_tree_iterator it; - struct drgn_memory_segment *stolen = NULL, *segment; - struct drgn_memory_segment *truncate_head = NULL, *truncate_tail = NULL; - uint64_t end, existing_end; - - if (size == 0) - return NULL; - - if (__builtin_add_overflow(address, size, &end)) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "memory segment end is too large"); - } /* * This is split into two steps: the first step handles an overlapping @@ -72,22 +63,23 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * can steal an existing segment instead of allocating a new one. */ - it = drgn_memory_segment_tree_search_le(tree, &address); + struct drgn_memory_segment *stolen = NULL, *segment; + struct drgn_memory_segment *truncate_head = NULL, *truncate_tail = NULL; + struct drgn_memory_segment_tree_iterator it = + drgn_memory_segment_tree_search_le(tree, &min_address); if (it.entry) { - existing_end = it.entry->address + it.entry->size; - if (end < existing_end) { + if (max_address < it.entry->max_address) { /* * The new segment lies entirely within an existing * segment, and part of the existing segment extends * after the new segment (a "tail"). */ - struct drgn_memory_segment *tail; - - tail = malloc(sizeof(*tail)); + struct drgn_memory_segment *tail = + malloc(sizeof(*tail)); if (!tail) return &drgn_enomem; - if (it.entry->address == address) { + if (it.entry->min_address == min_address) { /* * The new segment starts at the same address as * the existing segment, so we can steal the @@ -108,23 +100,22 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, truncate_tail = it.entry; } - tail->address = end; - tail->size = existing_end - end; - tail->orig_address = it.entry->orig_address; + tail->min_address = max_address + 1; + tail->max_address = it.entry->max_address; + tail->orig_min_address = it.entry->orig_min_address; tail->read_fn = it.entry->read_fn; tail->arg = it.entry->arg; - drgn_memory_segment_tree_insert(tree, tail, - NULL); + drgn_memory_segment_tree_insert(tree, tail, NULL); goto insert; } - if (it.entry->address == address) { + if (it.entry->min_address == min_address) { /* * The new segment subsumes an existing segment at the * same address. We can steal the existing segment. */ stolen = it.entry; - } else if (address < existing_end) { + } else if (min_address <= it.entry->max_address) { /* * The new segment overlaps an existing segment before * it, and part of the existing segment extends before @@ -145,8 +136,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, } while (it.entry) { - existing_end = it.entry->address + it.entry->size; - if (end >= existing_end) { + if (max_address >= it.entry->max_address) { /* * The new segment subsumes an existing segment after * it. @@ -158,9 +148,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * allocate a new segment later, it's safe to * modify the tree now. */ - struct drgn_memory_segment *existing_segment; - - existing_segment = it.entry; + struct drgn_memory_segment *existing_segment = it.entry; it = drgn_memory_segment_tree_delete_iterator(tree, it); free(existing_segment); } else { @@ -180,7 +168,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, } continue; } - if (end > it.entry->address) { + if (max_address >= it.entry->min_address) { /* * The new segment overlaps an existing segment after * it, and part of the existing segment extends after @@ -208,14 +196,12 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * Now that we've allocated the new segment if necessary, we can safely * modify the tree. */ - if (truncate_head) { - truncate_head->size -= end - truncate_head->address; - truncate_head->address = end; - } + if (truncate_head) + truncate_head->min_address = max_address + 1; if (truncate_tail) - truncate_tail->size = address - truncate_tail->address; - segment->address = segment->orig_address = address; - segment->size = size; + truncate_tail->max_address = min_address - 1; + segment->min_address = segment->orig_min_address = min_address; + segment->max_address = max_address; segment->read_fn = read_fn; segment->arg = arg; /* If the segment is stolen, then it's already in the tree. */ @@ -228,33 +214,32 @@ struct drgn_error *drgn_memory_reader_read(struct drgn_memory_reader *reader, void *buf, uint64_t address, size_t count, bool physical) { + assert(count == 0 || count - 1 <= UINT64_MAX - address); + + struct drgn_error *err; struct drgn_memory_segment_tree *tree = (physical ? &reader->physical_segments : &reader->virtual_segments); - struct drgn_error *err; - size_t read = 0; - - while (read < count) { - struct drgn_memory_segment *segment; - size_t n; - - segment = drgn_memory_segment_tree_search_le(tree, - &address).entry; - if (!segment || segment->address + segment->size <= address) { + char *p = buf; + while (count > 0) { + struct drgn_memory_segment *segment = + drgn_memory_segment_tree_search_le(tree, + &address).entry; + if (!segment || segment->max_address < address) { return drgn_error_create_fault("could not find memory segment", address); } - n = min(segment->address + segment->size - address, - (uint64_t)(count - read)); - err = segment->read_fn((char *)buf + read, address, n, - address - segment->orig_address, + size_t n = min((uint64_t)(count - 1), + segment->max_address - address) + 1; + err = segment->read_fn(p, address, n, + address - segment->orig_min_address, segment->arg, physical); if (err) return err; - - read += n; + p += n; address += n; + count -= n; } return NULL; } diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index 8d4eaff3d..9e608995a 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -25,23 +25,24 @@ * @ref drgn_memory_reader provides a common interface for registering regions * of memory in a program and reading from memory. * + * @ref drgn_memory_reader does not have a notion of the maximum address or + * address overflow/wrap-around. Those must be handled at a higher layer. + * * @{ */ /** Memory segment in a @ref drgn_memory_reader. */ struct drgn_memory_segment { struct binary_tree_node node; - /** Address of the segment in memory. */ - uint64_t address; - /** Size of the segment in bytes; */ - uint64_t size; + /** Address range of the segment in memory (inclusive). */ + uint64_t min_address, max_address; /** * The address of the segment when it was added, before any truncations. * - * This is always greater than or equal to @ref - * drgn_memory_segment::address. + * This is always less than or equal to @ref + * drgn_memory_segment::min_address. */ - uint64_t orig_address; + uint64_t orig_min_address; /** Read callback. */ drgn_memory_read_fn read_fn; /** Argument to pass to @ref drgn_memory_segment::read_fn. */ @@ -51,7 +52,7 @@ struct drgn_memory_segment { static inline uint64_t drgn_memory_segment_to_key(const struct drgn_memory_segment *entry) { - return entry->address; + return entry->min_address; } DEFINE_BINARY_SEARCH_TREE_TYPE(drgn_memory_segment_tree, @@ -84,10 +85,20 @@ void drgn_memory_reader_deinit(struct drgn_memory_reader *reader); /** Return whether a @ref drgn_memory_reader has no segments. */ bool drgn_memory_reader_empty(struct drgn_memory_reader *reader); -/** @sa drgn_program_add_memory_segment() */ +/** + * Add a segment to a @ref drgn_memory_reader. + * + * @param[in] reader Memory reader. + * @param[in] min_address Start address (inclusive). + * @param[in] max_address End address (inclusive). Must be `>= min_address`. + * @param[in] read_fn Callback to read from segment. + * @param[in] arg Argument to pass to @p read_fn. + * @param[in] physical Whether to add a physical memory segment. + * @return @c NULL on success, non-@c NULL on error. + */ struct drgn_error * drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, - uint64_t address, uint64_t size, + uint64_t min_address, uint64_t max_address, drgn_memory_read_fn read_fn, void *arg, bool physical); @@ -97,7 +108,8 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * @param[in] reader Memory reader. * @param[out] buf Buffer to read into. * @param[in] address Starting address in memory to read. - * @param[in] count Number of bytes to read. + * @param[in] count Number of bytes to read. `address + count - 1` must be + * `<= UINT64_MAX` * @param[in] physical Whether @c address is physical. * @return @c NULL on success, non-@c NULL on error. */ diff --git a/libdrgn/object.c b/libdrgn/object.c index 60deb2f94..efd862b99 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -10,7 +10,6 @@ #include "drgn.h" #include "error.h" #include "language.h" -#include "memory_reader.h" #include "minmax.h" #include "object.h" #include "program.h" @@ -537,8 +536,8 @@ drgn_object_read_reference(const struct drgn_object *obj, if (!dst) return &drgn_enomem; } - err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, - dst, obj->address, size, false); + err = drgn_program_read_memory(drgn_object_program(obj), dst, + obj->address, size, false); if (err) { if (dst != value->ibuf) free(dst); @@ -553,9 +552,8 @@ drgn_object_read_reference(const struct drgn_object *obj, uint64_t read_size = drgn_value_size(bit_offset + bit_size); char buf[9]; assert(read_size <= sizeof(buf)); - err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, - buf, obj->address, read_size, - false); + err = drgn_program_read_memory(drgn_object_program(obj), buf, + obj->address, read_size, false); if (err) return err; drgn_value_deserialize(value, buf, bit_offset, obj->encoding, diff --git a/libdrgn/program.c b/libdrgn/program.c index 0a09ceadc..e4b5c6cdd 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -23,6 +23,7 @@ #include "language.h" #include "linux_kernel.h" #include "memory_reader.h" +#include "minmax.h" #include "object_index.h" #include "program.h" #include "symbol.h" @@ -141,8 +142,16 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, uint64_t size, drgn_memory_read_fn read_fn, void *arg, bool physical) { - return drgn_memory_reader_add_segment(&prog->reader, address, size, - read_fn, arg, physical); + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; + if (size == 0 || address > address_mask) + return NULL; + uint64_t max_address = address + min(size - 1, address_mask - address); + return drgn_memory_reader_add_segment(&prog->reader, address, + max_address, read_fn, arg, + physical); } LIBDRGN_PUBLIC struct drgn_error * @@ -347,9 +356,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) * Try to read any memory that isn't in the core dump via the * page table. */ - err = drgn_program_add_memory_segment(prog, 0, - is_64_bit ? - UINT64_MAX : UINT32_MAX, + err = drgn_program_add_memory_segment(prog, 0, UINT64_MAX, read_memory_via_pgtable, prog, false); if (err) @@ -448,8 +455,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (is_proc_kcore) { if (!vmcoreinfo_note) { - err = read_vmcoreinfo_fallback(&prog->reader, - &prog->vmcoreinfo); + err = read_vmcoreinfo_fallback(prog); if (err) goto out_segments; } @@ -498,17 +504,20 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_set_pid(struct drgn_program *prog, pid_t pid) { struct drgn_error *err; - char buf[64]; err = drgn_program_check_initialized(prog); if (err) return err; + char buf[64]; sprintf(buf, "/proc/%ld/mem", (long)pid); prog->core_fd = open(buf, O_RDONLY); if (prog->core_fd == -1) return drgn_error_create_os("open", errno, buf); + bool had_platform = prog->has_platform; + drgn_program_set_platform(prog, &drgn_host_platform); + prog->file_segments = malloc(sizeof(*prog->file_segments)); if (!prog->file_segments) { err = &drgn_enomem; @@ -526,7 +535,6 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) prog->pid = pid; prog->flags |= DRGN_PROGRAM_IS_LIVE; - drgn_program_set_platform(prog, &drgn_host_platform); return NULL; out_segments: @@ -535,6 +543,7 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) free(prog->file_segments); prog->file_segments = NULL; out_fd: + prog->has_platform = had_platform; close(prog->core_fd); prog->core_fd = -1; return err; @@ -943,8 +952,23 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_memory(struct drgn_program *prog, void *buf, uint64_t address, size_t count, bool physical) { - return drgn_memory_reader_read(&prog->reader, buf, address, count, - physical); + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; + char *p = buf; + address &= address_mask; + while (count > 0) { + size_t n = min((uint64_t)(count - 1), address_mask - address) + 1; + err = drgn_memory_reader_read(&prog->reader, p, address, n, + physical); + if (err) + return err; + p += n; + address = 0; + count -= n; + } + return NULL; } DEFINE_VECTOR(char_vector, char) @@ -953,9 +977,13 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_c_string(struct drgn_program *prog, uint64_t address, bool physical, size_t max_size, char **ret) { - struct drgn_error *err; + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; struct char_vector str = VECTOR_INIT; for (;;) { + address &= address_mask; char *c = char_vector_append_entry(&str); if (!c) { char_vector_deinit(&str); @@ -985,8 +1013,8 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_u8(struct drgn_program *prog, uint64_t address, bool physical, uint8_t *ret) { - return drgn_memory_reader_read(&prog->reader, ret, address, - sizeof(*ret), physical); + return drgn_program_read_memory(prog, ret, address, sizeof(*ret), + physical); } #define DEFINE_PROGRAM_READ_U(n) \ @@ -999,8 +1027,8 @@ drgn_program_read_u##n(struct drgn_program *prog, uint64_t address, \ if (err) \ return err; \ uint##n##_t tmp; \ - err = drgn_memory_reader_read(&prog->reader, &tmp, address, \ - sizeof(tmp), physical); \ + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), \ + physical); \ if (err) \ return err; \ if (bswap) \ @@ -1027,8 +1055,8 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, return err; if (is_64_bit) { uint64_t tmp; - err = drgn_memory_reader_read(&prog->reader, &tmp, address, - sizeof(tmp), physical); + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), + physical); if (err) return err; if (bswap) @@ -1036,8 +1064,8 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, *ret = tmp; } else { uint32_t tmp; - err = drgn_memory_reader_read(&prog->reader, &tmp, address, - sizeof(tmp), physical); + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), + physical); if (err) return err; if (bswap) diff --git a/tests/test_program.py b/tests/test_program.py index 87d2f0b1e..d3a9c94c4 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -173,10 +173,27 @@ def test_adjacent_segments(self): ) self.assertEqual(prog.read(0xFFFF0000, 14), data[:14]) + def test_address_overflow(self): + for bits in (64, 32): + with self.subTest(bits=bits): + prog = mock_program( + segments=[ + MockMemorySegment(b"cd", 0x0), + MockMemorySegment(b"abyz", 2 ** bits - 2), + ], + platform=MOCK_PLATFORM if bits == 64 else MOCK_32BIT_PLATFORM, + ) + for start in range(3): + for size in range(4 - start): + self.assertEqual( + prog.read((2 ** bits - 2 + start) % 2 ** 64, size), + b"abcd"[start : start + size], + ) + def test_overlap_same_address_smaller_size(self): # Existing segment: |_______| # New segment: |___| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -188,7 +205,7 @@ def test_overlap_same_address_smaller_size(self): def test_overlap_within_segment(self): # Existing segment: |_______| # New segment: |___| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -205,7 +222,7 @@ def test_overlap_within_segment(self): def test_overlap_same_segment(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -217,7 +234,7 @@ def test_overlap_same_segment(self): def test_overlap_same_address_larger_size(self): # Existing segment: |___| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 64, segment1) @@ -229,7 +246,7 @@ def test_overlap_same_address_larger_size(self): def test_overlap_segment_tail(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -241,7 +258,7 @@ def test_overlap_segment_tail(self): def test_overlap_subsume_after(self): # Existing segments: |_|_|_|_| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) segment3 = unittest.mock.Mock(side_effect=zero_memory_read) @@ -258,7 +275,7 @@ def test_overlap_subsume_after(self): def test_overlap_segment_head(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0040, 128, segment1) @@ -270,7 +287,7 @@ def test_overlap_segment_head(self): def test_overlap_segment_head_and_tail(self): # Existing segment: |_______||_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) segment3 = unittest.mock.Mock(side_effect=zero_memory_read) @@ -285,7 +302,7 @@ def test_overlap_segment_head_and_tail(self): def test_overlap_subsume_at_and_after(self): # Existing segments: |_|_|_|_| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 32, segment1) From 5fc879ef3e1b05c4d79ce3c827375394ce2b1d5d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 23 May 2021 13:30:08 -0700 Subject: [PATCH 40/56] libdrgn: debug_info: limit number of DWARF expression operations executed A malformed DWARF expression can easily get us into an infinite loop. Avoid this by capping the number of operations that we'll execute. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 2bd55ba58..b51eca175 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1242,7 +1242,14 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define PUSH_MASK(x) PUSH((x) & address_mask) + /* Arbitrary limit so we don't go into an infinite loop. */ + int remaining_ops = 10000; while (binary_buffer_has_next(&expr->bb)) { + if (remaining_ops <= 0) { + return binary_buffer_error(&expr->bb, + "DWARF expression executed too many operations"); + } + remaining_ops--; uint8_t opcode; if ((err = binary_buffer_next_u8(&expr->bb, &opcode))) return err; From dcda688c9ac7496ca82accd4c46d0c414928797e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Jun 2021 10:03:40 -0700 Subject: [PATCH 41/56] libdrgn: debug_info: parenthesize PUSH() macro argument It doesn't make a difference anywhere it's currently used, but let's do it just in case that changes in the future. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index b51eca175..4272f52bf 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1235,7 +1235,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define ELEM(i) stack->data[stack->size - 1 - (i)] #define PUSH(x) do { \ - uint64_t push = x; \ + uint64_t push = (x); \ if (!uint64_vector_append(stack, &push)) \ return &drgn_enomem; \ } while (0) From e105be6c18bd8386c3ed9a13f281fa71a16c9be1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Jun 2021 09:43:39 -0700 Subject: [PATCH 42/56] libdrgn: debug_info: add helper to cache module section Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 4272f52bf..52bc53324 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -960,6 +960,15 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) return NULL; } +static struct drgn_error * +drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) +{ + if (module->scn_data[scn]) + return NULL; + return read_elf_section(module->scns[scn], &module->scn_data[scn]); +} + static struct drgn_error * drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, struct drgn_dwarf_index_update_state *dindex_state, @@ -3640,7 +3649,7 @@ drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, if (!module->scns[scn]) return NULL; - err = read_elf_section(module->scns[scn], &module->scn_data[scn]); + err = drgn_debug_info_module_cache_section(module, scn); if (err) return err; Elf_Data *data = module->scn_data[scn]; @@ -4480,20 +4489,14 @@ drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) return drgn_error_libelf(); module->orc_pc_base = shdr->sh_addr; - if (!module->scn_data[DRGN_SCN_ORC_UNWIND_IP]) { - err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND_IP], - &module->scn_data[DRGN_SCN_ORC_UNWIND_IP]); - if (err) - return err; - } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_ORC_UNWIND_IP); + if (err) + return err; + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); + if (err) + return err; Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; - - if (!module->scn_data[DRGN_SCN_ORC_UNWIND]) { - err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND], - &module->scn_data[DRGN_SCN_ORC_UNWIND]); - if (err) - return err; - } Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); From d5b68455b89b7af1d731a83c76576ae6873fca4c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 May 2021 01:26:49 -0700 Subject: [PATCH 43/56] libdrgn: debug_info: save .debug_loc .debug_loc will be used for variable resolution. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 1 + libdrgn/debug_info.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 52bc53324..d3546f03f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -105,6 +105,7 @@ static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_EH_FRAME] = ".eh_frame", [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", [DRGN_SCN_ORC_UNWIND] = ".orc_unwind", + [DRGN_SCN_DEBUG_LOC] = ".debug_loc", [DRGN_SCN_TEXT] = ".text", [DRGN_SCN_GOT] = ".got", }; diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 15800b95c..ab04365cb 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -64,6 +64,7 @@ enum drgn_debug_info_scn { DRGN_SCN_EH_FRAME, DRGN_SCN_ORC_UNWIND_IP, DRGN_SCN_ORC_UNWIND, + DRGN_SCN_DEBUG_LOC, DRGN_NUM_DEBUG_SCN_DATA, From 8335450ecba886df2337f42eafbc274af462e408 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Jun 2021 09:49:47 -0700 Subject: [PATCH 44/56] libdrgn: debug_info: implement DW_OP_fbreg Implement looking up location descriptions and evaluating DW_OP_fbreg. This isn't actually used yet since CFI expressions don't have a current function DIE, but it will be used for parameters/local variables in stack traces. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 241 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 235 insertions(+), 6 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index d3546f03f..b2f5eef9a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -34,6 +34,12 @@ #include "type.h" #include "util.h" +/** + * Arbitrary limit for number of operations to execute in a DWARF expression to + * avoid infinite loops. + */ +static const int MAX_DWARF_EXPR_OPS = 10000; + struct drgn_dwarf_cie { /* Whether this CIE is from .eh_frame. */ bool is_eh; @@ -1157,6 +1163,114 @@ bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } +static struct drgn_error * +drgn_dwarf_location(struct drgn_debug_info_module *module, + Dwarf_Attribute *attr, + const struct drgn_register_state *regs, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + switch (attr->form) { + case DW_FORM_sec_offset: { + if (!module->scns[DRGN_SCN_DEBUG_LOC]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr without .debug_loc section"); + } + + struct optional_uint64 pc; + if (!regs || + !(pc = drgn_register_state_get_pc(regs)).has_value) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOC); + if (err) + return err; + + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, + NULL, NULL, NULL); + pc.value = pc.value - !regs->interrupted - bias; + + Dwarf_Word offset; + if (dwarf_formudata(attr, &offset)) + return drgn_error_libdw(); + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, + DRGN_SCN_DEBUG_LOC); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr is out of bounds"); + } + buffer.bb.pos += offset; + + uint8_t address_size = + drgn_platform_address_size(&module->platform); + uint64_t address_max = uint_max(address_size); + uint64_t base; + bool base_valid = false; + for (;;) { + uint64_t start, end; + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, + address_size, &end))) + return err; + if (start == 0 && end == 0) { + break; + } else if (start == address_max) { + base = end; + base_valid = true; + } else { + if (!base_valid) { + Dwarf_Die cu_die; + if (!dwarf_cu_die(attr->cu, &cu_die, + NULL, NULL, NULL, + NULL, NULL, NULL)) + return drgn_error_libdw(); + Dwarf_Addr low_pc; + if (dwarf_lowpc(&cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + uint16_t expr_size; + if ((err = binary_buffer_next_u16(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (base + start <= pc.value && + pc.value < base + end) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + } + } + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + default: { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + *expr_ret = (char *)block.data; + *expr_size_ret = block.length; + return NULL; + } + } +} + struct drgn_dwarf_expression_buffer { struct binary_buffer bb; const char *start; @@ -1218,11 +1332,19 @@ drgn_dwarf_expression_buffer_init(struct drgn_dwarf_expression_buffer *buffer, buffer->module = module; } +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + int *remaining_ops, + const struct drgn_register_state *regs, uint64_t *ret); + /* Returns &drgn_not_found if it tried to use an unknown register value. */ static struct drgn_error * drgn_eval_dwarf_expression(struct drgn_program *prog, struct drgn_dwarf_expression_buffer *expr, struct uint64_vector *stack, + int *remaining_ops, + Dwarf_Die *function_die, const struct drgn_register_state *regs) { struct drgn_error *err; @@ -1252,14 +1374,12 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define PUSH_MASK(x) PUSH((x) & address_mask) - /* Arbitrary limit so we don't go into an infinite loop. */ - int remaining_ops = 10000; while (binary_buffer_has_next(&expr->bb)) { - if (remaining_ops <= 0) { + if (*remaining_ops <= 0) { return binary_buffer_error(&expr->bb, "DWARF expression executed too many operations"); } - remaining_ops--; + (*remaining_ops)--; uint8_t opcode; if ((err = binary_buffer_next_u8(&expr->bb, &opcode))) return err; @@ -1338,6 +1458,19 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, PUSH_MASK(uvalue); break; /* Register values. */ + case DW_OP_fbreg: { + err = drgn_dwarf_frame_base(prog, expr->module, + function_die, remaining_ops, + regs, &uvalue); + if (err) + return err; + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&expr->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } case DW_OP_breg0 ... DW_OP_breg31: dwarf_regno = opcode - DW_OP_breg0; goto breg; @@ -1604,7 +1737,6 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, /* * We don't yet support: * - * - DW_OP_fbreg * - DW_OP_push_object_address * - DW_OP_form_tls_address * - DW_OP_entry_value @@ -1634,6 +1766,101 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, return NULL; } +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + int *remaining_ops, + const struct drgn_register_state *regs, uint64_t *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + if (!die) + return &drgn_not_found; + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) + return &drgn_not_found; + const char *expr; + size_t expr_size; + err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); + if (err) + return err; + + struct uint64_vector stack = VECTOR_INIT; + struct drgn_dwarf_expression_buffer buffer; + drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); + for (;;) { + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, + remaining_ops, NULL, regs); + if (err) + goto out; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +reg: + { + if (!regs) { + err = &drgn_not_found; + goto out; + } + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) { + err = &drgn_not_found; + goto out; + } + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + /* + * Note that this doesn't mask the address since + * the caller does that. + */ + copy_lsbytes(ret, sizeof(*ret), + HOST_LITTLE_ENDIAN, + ®s->buf[layout->offset], + layout->size, little_endian); + if (binary_buffer_has_next(&buffer.bb)) { + err = binary_buffer_error(&buffer.bb, + "stray operations in DW_AT_frame_base expression"); + } else { + err = NULL; + } + goto out; + } + default: + err = binary_buffer_error(&buffer.bb, + "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", + opcode); + goto out; + } + } else if (stack.size) { + *ret = stack.data[stack.size - 1]; + err = NULL; + break; + } else { + err = &drgn_not_found; + break; + } + } +out: + uint64_vector_deinit(&stack); + return err; +} + DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) @@ -4674,10 +4901,12 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, } } + int remaining_ops = MAX_DWARF_EXPR_OPS; struct drgn_dwarf_expression_buffer buffer; drgn_dwarf_expression_buffer_init(&buffer, regs->module, rule->expr, rule->expr_size); - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, regs); + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, &remaining_ops, + NULL, regs); if (err) goto out; if (stack.size == 0) { From ffcb9ccb19e747494f12d24d45fc609baa290543 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Jun 2021 13:17:22 -0700 Subject: [PATCH 45/56] libdrgn: debug_info: implement creating objects from DWARF location descriptions Add support for evaluating a DWARF location description and translating it into a drgn object. In this commit, this is just used for global variables, but an upcoming commit will wire this up to stack traces for parameters and local variables. There are a few locations that drgn's object model can't represent yet. DW_OP_piece/DW_OP_bit_piece can describe objects that are only partially known or partially in memory; we approximate these where we can. We don't have a good way to support DW_OP_implicit_pointer at all yet. This also adds test cases for DWARF expressions, which we couldn't easily test before. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 491 ++++++++++++-- libdrgn/object.c | 2 +- libdrgn/object.h | 30 + libdrgn/serialize.c | 80 +++ libdrgn/serialize.h | 39 ++ tests/__init__.py | 32 +- tests/assembler.py | 62 ++ tests/dwarfwriter.py | 23 +- tests/test_dwarf.py | 1537 +++++++++++++++++++++++++++++++++++++++++- 9 files changed, 2200 insertions(+), 96 deletions(-) create mode 100644 tests/assembler.py diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index b2f5eef9a..4adf1d9e1 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -25,6 +25,7 @@ #include "language.h" #include "lazy_object.h" #include "linux_kernel.h" +#include "minmax.h" #include "object.h" #include "orc.h" #include "path.h" @@ -1338,7 +1339,11 @@ drgn_dwarf_frame_base(struct drgn_program *prog, int *remaining_ops, const struct drgn_register_state *regs, uint64_t *ret); -/* Returns &drgn_not_found if it tried to use an unknown register value. */ +/* + * Evaluate a DWARF expression up to the next location description operation. + * + * Returns &drgn_not_found if it tried to use an unknown register value. + */ static struct drgn_error * drgn_eval_dwarf_expression(struct drgn_program *prog, struct drgn_dwarf_expression_buffer *expr, @@ -1734,16 +1739,24 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, /* Special operations. */ case DW_OP_nop: break; + /* Location description operations. */ + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + case DW_OP_implicit_value: + case DW_OP_stack_value: + case DW_OP_piece: + case DW_OP_bit_piece: + /* The caller must handle it. */ + expr->bb.pos = expr->bb.prev; + return NULL; /* * We don't yet support: * * - DW_OP_push_object_address * - DW_OP_form_tls_address * - DW_OP_entry_value + * DW_OP_implicit_pointer * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. - * - Location description operations: DW_OP_reg0-DW_OP_reg31, - * DW_OP_regx, DW_OP_implicit_value, DW_OP_stack_value, - * DW_OP_implicit_pointer, DW_OP_piece, DW_OP_bit_piece. * - Operations that use .debug_addr: DW_OP_addrx, * DW_OP_constx. * - Typed operations: DW_OP_const_type, DW_OP_regval_type, @@ -2119,6 +2132,380 @@ drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, 0); } +static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, + unsigned int dst_bit_offset, uint64_t src, + unsigned int src_bit_offset, + uint64_t bit_size, bool lsb0) +{ + struct drgn_error *err; + + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return NULL; + + if (dst_bit_offset == src_bit_offset) { + /* + * We can read directly into the the destination buffer, but we + * may have to preserve some bits at the start and/or end. + */ + uint8_t *d = dst; + uint64_t last_bit = dst_bit_offset + bit_size - 1; + uint8_t first_byte = d[0]; + uint8_t last_byte = d[last_bit / 8]; + err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, + false); + if (err) + return err; + if (dst_bit_offset != 0) { + uint8_t mask = + copy_bits_first_mask(dst_bit_offset, lsb0); + d[0] = (first_byte & ~mask) | (d[0] & mask); + } + if (last_bit % 8 != 7) { + uint8_t mask = copy_bits_last_mask(last_bit, lsb0); + d[last_bit / 8] = ((last_byte & ~mask) + | (d[last_bit / 8] & mask)); + } + return NULL; + } else { + /* + * If the source and destination have different offsets, then + * depending on the size and source offset, we may have to read + * one more byte than is available in the destination. To keep + * things simple, we always read into a temporary buffer (rather + * than adding a special case for reading directly into the + * destination and shifting bits around). + */ + uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; + char stack_tmp[16], *tmp; + if (src_bytes <= sizeof(stack_tmp)) { + tmp = stack_tmp; + } else { + tmp = malloc64(src_bytes); + if (!tmp) + return &drgn_enomem; + } + err = drgn_program_read_memory(prog, tmp, src, src_bytes, + false); + if (!err) { + copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, + bit_size, lsb0); + } + if (src_bytes > sizeof(stack_tmp)) + free(tmp); + return err; + } +} + +static struct drgn_error * +drgn_object_from_dwarf_location(struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + const char *expr, size_t expr_size, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + uint64_t address_mask = drgn_platform_address_mask(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + struct drgn_object_type type; + err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + + union drgn_value value; + char *value_buf = NULL; + + uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ + int bit_offset = -1; /* -1 means that we don't have an address. */ + + uint64_t bit_pos = 0; + + struct uint64_vector stack = VECTOR_INIT; + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_buffer buffer; + drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); + do { + stack.size = 0; + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, + &remaining_ops, function_die, + regs); + if (err == &drgn_not_found) + goto absent; + else if (err) + goto out; + + const void *src = NULL; + size_t src_size; + + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t uvalue; + uint64_t dwarf_regno; + drgn_register_number regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +reg: + if (!regs) + goto absent; + regno = dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) + goto absent; + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + src = ®s->buf[layout->offset]; + src_size = layout->size; + break; + case DW_OP_implicit_value: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &uvalue))) + goto out; + if (uvalue > buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "DW_OP_implicit_value size is out of bounds"); + goto out; + } + src = buffer.bb.pos; + src_size = uvalue; + buffer.bb.pos += uvalue; + break; + case DW_OP_stack_value: + if (!stack.size) + goto absent; + if (little_endian != HOST_LITTLE_ENDIAN) { + stack.data[stack.size - 1] = + bswap_64(stack.data[stack.size - 1]); + } + src = &stack.data[stack.size - 1]; + src_size = sizeof(stack.data[0]); + break; + default: + buffer.bb.pos = buffer.bb.prev; + break; + } + } + + uint64_t piece_bit_size; + uint64_t piece_bit_offset; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + switch (opcode) { + case DW_OP_piece: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_size))) + goto out; + /* + * It's probably bogus for the piece size to be + * larger than the remaining value size, but + * that's not explicitly stated in the DWARF 5 + * specification, so clamp it instead. + */ + if (__builtin_mul_overflow(piece_bit_size, 8U, + &piece_bit_size) || + piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + break; + case DW_OP_bit_piece: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_size)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_offset))) + goto out; + if (piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + break; + default: + err = binary_buffer_error(&buffer.bb, + "unknown DWARF expression opcode %#" PRIx8 " after simple location description", + opcode); + goto out; + } + } else { + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + } + + if (piece_bit_size == 0) + continue; + + /* + * TODO: there are a few cases that a DWARF location can + * describe that can't be represented in drgn's object model: + * + * 1. An object that is partially known and partially unknown. + * 2. An object that is partially in memory and partially a + * value. + * 3. An object that is in memory at non-contiguous addresses. + * 4. A pointer object whose pointer value is not known but + * whose referenced value is known (DW_OP_implicit_pointer). + * + * For case 1, we consider the whole object as absent. For cases + * 2 and 3, we convert the whole object to a value. Case 4 is + * not supported at all. We should add a way to represent all of + * these situations precisely. + */ + if (src) { + if (!value_buf && + !drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, &value_buf)) { + err = &drgn_enomem; + goto out; + } + if (bit_offset >= 0) { + /* + * We previously had an address. Read it into + * the value. + */ + err = read_bits(prog, value_buf, 0, address, + bit_offset, bit_pos, + little_endian); + if (err) + goto out; + bit_offset = -1; + } + /* + * It's probably safe to assume that we don't have an + * implicit value larger than 2 exabytes. + */ + assert(src_size <= UINT64_MAX / 8); + uint64_t src_bit_size = UINT64_C(8) * src_size; + if (piece_bit_offset > src_bit_size) + piece_bit_offset = src_bit_size; + uint64_t copy_bit_size = + min(piece_bit_size, + src_bit_size - piece_bit_offset); + uint64_t copy_bit_offset = bit_pos; + if (!little_endian) { + copy_bit_offset += piece_bit_size - copy_bit_size; + piece_bit_offset = (src_bit_size + - copy_bit_size + - piece_bit_offset); + } + copy_bits(&value_buf[copy_bit_offset / 8], + copy_bit_offset % 8, + (const char *)src + (piece_bit_offset / 8), + piece_bit_offset % 8, copy_bit_size, + little_endian); + } else if (stack.size) { + uint64_t piece_address = + ((stack.data[stack.size - 1] + piece_bit_offset / 8) + & address_mask); + piece_bit_offset %= 8; + if (bit_offset >= 0) { + /* + * We already had an address. Merge the pieces + * if the addresses are contiguous, otherwise + * convert to a value. + * + * The obvious way to write this is + * (address + (bit_pos + bit_offset) / 8), but + * (bit_pos + bit_offset) can overflow uint64_t. + */ + uint64_t end_address = + ((address + + bit_pos / 8 + + (bit_pos % 8 + bit_offset) / 8) + & address_mask); + unsigned int end_bit_offset = + (bit_offset + bit_pos) % 8; + if (piece_address == end_address && + piece_bit_offset == end_bit_offset) { + /* Piece is contiguous. */ + piece_address = address; + piece_bit_offset = bit_offset; + } else { + if (!drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, + &value_buf)) { + err = &drgn_enomem; + goto out; + } + err = read_bits(prog, value_buf, 0, + address, bit_offset, + bit_pos, little_endian); + if (err) + goto out; + bit_offset = -1; + } + } + if (value_buf) { + /* We already have a value. Read into it. */ + err = read_bits(prog, &value_buf[bit_pos / 8], + bit_pos % 8, piece_address, + piece_bit_offset, + piece_bit_size, little_endian); + if (err) + goto out; + } else { + address = piece_address; + bit_offset = piece_bit_offset; + } + } else { + goto absent; + } + bit_pos += piece_bit_size; + } while (binary_buffer_has_next(&buffer.bb)); + + if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { +absent: + if (dwarf_tag(die) == DW_TAG_template_value_parameter) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_template_value_parameter is missing value"); + } + drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + err = NULL; + } else if (bit_offset >= 0) { + Dwarf_Addr start, end, bias; + dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, + NULL, NULL, NULL); + /* + * If the address is not in the module's address range, then + * it's probably something special like a Linux per-CPU variable + * (which isn't actually a variable address but an offset). + * Don't apply the bias in that case. + */ + if (start <= address + bias && address + bias < end) + address += bias; + err = drgn_object_set_reference_internal(ret, &type, address, + bit_offset); + } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { + drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); + ret->value = value; + value_buf = NULL; + err = NULL; + } else { + err = drgn_object_set_from_buffer_internal(ret, &type, + value_buf, 0); + } + +out: + if (value_buf != value.ibuf) + free(value_buf); + uint64_vector_deinit(&stack); + return err; +} + static struct drgn_error * drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, struct drgn_qualified_type qualified_type, @@ -2159,58 +2546,53 @@ drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, } static struct drgn_error * -drgn_object_from_dwarf_variable(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, struct drgn_object *ret) +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) { + struct drgn_error *err; + if (dwarf_tag(die) == DW_TAG_subprogram) { + return drgn_object_from_dwarf_subprogram(dbinfo, module, die, + ret); + } /* * The DWARF 5 specifications mentions that data object entries can have * DW_AT_endianity, but that doesn't seem to be used in practice. It * would be inconvenient to support, so ignore it for now. */ struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, - die, NULL, true, - true, NULL, - &qualified_type); + if (type_die) { + err = drgn_type_from_dwarf(dbinfo, module, type_die, + &qualified_type); + } else { + err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, + true, NULL, &qualified_type); + } if (err) return err; Dwarf_Attribute attr_mem, *attr; + const char *expr; + size_t expr_size; if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { - Dwarf_Op *loc; - size_t nloc; - if (dwarf_getlocation(attr, &loc, &nloc)) - return drgn_error_libdw(); - if (nloc != 1 || loc[0].atom != DW_OP_addr) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_location has unimplemented operation"); - } - uint64_t address = loc[0].number; - Dwarf_Addr start, end, bias; - dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, - NULL, NULL, NULL); - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (start <= address + bias && address + bias < end) - address += bias; - return drgn_object_set_reference(ret, qualified_type, address, - 0, 0); + err = drgn_dwarf_location(module, attr, regs, &expr, + &expr_size); + if (err) + return err; } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { return drgn_object_from_dwarf_constant(dbinfo, die, qualified_type, attr, ret); } else { - if (dwarf_tag(die) == DW_TAG_template_value_parameter) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_template_value_parameter is missing value"); - } - return drgn_object_set_absent(ret, qualified_type, 0); + expr = NULL; + expr_size = 0; } + return drgn_object_from_dwarf_location(dbinfo->prog, module, die, + qualified_type, expr, expr_size, + function_die, regs, ret); } static struct drgn_error * @@ -2545,9 +2927,9 @@ drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, struct drgn_error *err; struct drgn_dwarf_die_thunk_arg *arg = arg_; if (res) { - err = drgn_object_from_dwarf_variable(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, - res); + err = drgn_object_from_dwarf(drgn_object_program(res)->_dbinfo, + arg->module, &arg->die, NULL, NULL, + NULL, res); if (err) return err; } @@ -3523,22 +3905,15 @@ drgn_debug_info_find_object(const char *name, size_t name_len, return err; if (!die_matches_filename(&die, filename)) continue; - switch (dwarf_tag(&die)) { - case DW_TAG_enumeration_type: + if (dwarf_tag(&die) == DW_TAG_enumeration_type) { return drgn_object_from_dwarf_enumerator(dbinfo, index_die->module, &die, name, ret); - case DW_TAG_subprogram: - return drgn_object_from_dwarf_subprogram(dbinfo, - index_die->module, - &die, ret); - case DW_TAG_variable: - return drgn_object_from_dwarf_variable(dbinfo, - index_die->module, - &die, ret); - default: - UNREACHABLE(); + } else { + return drgn_object_from_dwarf(dbinfo, index_die->module, + &die, NULL, NULL, NULL, + ret); } } return &drgn_not_found; @@ -4909,6 +5284,16 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, NULL, regs); if (err) goto out; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + err = binary_buffer_next_u8(&buffer.bb, &opcode); + if (!err) { + err = binary_buffer_error(&buffer.bb, + "invalid opcode %#" PRIx8 " for CFI expression", + opcode); + } + goto out; + } if (stack.size == 0) { err = &drgn_not_found; } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { diff --git a/libdrgn/object.c b/libdrgn/object.c index efd862b99..9e2fa4d2a 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -318,7 +318,7 @@ drgn_object_set_from_buffer(struct drgn_object *res, bit_offset); } -static struct drgn_error * +struct drgn_error * drgn_object_set_reference_internal(struct drgn_object *res, const struct drgn_object_type *type, uint64_t address, uint64_t bit_offset) diff --git a/libdrgn/object.h b/libdrgn/object.h index 923ebfd5a..d008cdcc2 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -12,6 +12,9 @@ #ifndef DRGN_OBJECT_H #define DRGN_OBJECT_H +#include +#include + #include "drgn.h" #include "type.h" @@ -31,6 +34,24 @@ * @{ */ +/** Allocate a zero-initialized @ref drgn_value. */ +static inline bool drgn_value_zalloc(uint64_t size, union drgn_value *value_ret, + char **buf_ret) +{ + if (size <= sizeof(value_ret->ibuf)) { + memset(value_ret->ibuf, 0, sizeof(value_ret->ibuf)); + *buf_ret = value_ret->ibuf; + } else { + if (size > SIZE_MAX) + return false; + char *buf = calloc(1, size); + if (!buf) + return false; + value_ret->bufp = *buf_ret = buf; + } + return true; +} + /** * Get whether an object is zero. * @@ -139,6 +160,15 @@ drgn_object_set_from_buffer_internal(struct drgn_object *res, const struct drgn_object_type *type, const void *buf, uint64_t bit_offset); +/** + * Like @ref drgn_object_set_reference() but @ref drgn_object_type() was already + * called. + */ +struct drgn_error * +drgn_object_set_reference_internal(struct drgn_object *res, + const struct drgn_object_type *type, + uint64_t address, uint64_t bit_offset); + /** * Binary operator implementation. * diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index b50d80194..3904f4fb5 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -6,6 +6,86 @@ #include "serialize.h" +static inline uint8_t copy_bits_step(const uint8_t *s, unsigned int src_bit_offset, + unsigned int bit_size, + unsigned int dst_bit_offset, bool lsb0) +{ + uint8_t result; + if (lsb0) { + result = s[0] >> src_bit_offset; + if (bit_size > 8 - src_bit_offset) + result |= s[1] << (8 - src_bit_offset); + result <<= dst_bit_offset; + } else { + result = s[0] << src_bit_offset; + if (bit_size > 8 - src_bit_offset) + result |= s[1] >> (8 - src_bit_offset); + result >>= dst_bit_offset; + } + return result; +} + +void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, + unsigned int src_bit_offset, uint64_t bit_size, bool lsb0) +{ + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return; + + uint8_t *d = dst; + const uint8_t *s = src; + uint64_t dst_last_bit = dst_bit_offset + bit_size - 1; + uint8_t dst_first_mask = copy_bits_first_mask(dst_bit_offset, lsb0); + uint8_t dst_last_mask = copy_bits_last_mask(dst_last_bit, lsb0); + + if (dst_bit_offset == src_bit_offset) { + /* + * In the common case that the source and destination have the + * same offset, we can use memcpy(), preserving bits at the + * start and/or end if necessary. + */ + uint8_t first_byte = d[0]; + uint8_t last_byte = d[dst_last_bit / 8]; + memcpy(d, s, dst_last_bit / 8 + 1); + if (dst_bit_offset != 0) { + d[0] = ((first_byte & ~dst_first_mask) + | (d[0] & dst_first_mask)); + } + if (dst_last_bit % 8 != 7) { + d[dst_last_bit / 8] = ((last_byte & ~dst_last_mask) + | (d[dst_last_bit / 8] & dst_last_mask)); + } + } else if (bit_size <= 8 - dst_bit_offset) { + /* Destination is only one byte. */ + uint8_t dst_mask = dst_first_mask & dst_last_mask; + d[0] = ((d[0] & ~dst_mask) + | (copy_bits_step(&s[0], src_bit_offset, bit_size, + dst_bit_offset, lsb0) & dst_mask)); + } else { + /* Destination is two or more bytes. */ + d[0] = ((d[0] & ~dst_first_mask) + | (copy_bits_step(&s[0], src_bit_offset, + 8 - dst_bit_offset, dst_bit_offset, + lsb0) & dst_first_mask)); + src_bit_offset += 8 - dst_bit_offset; + size_t si = src_bit_offset / 8; + src_bit_offset %= 8; + size_t di = 1; + while (di < dst_last_bit / 8) { + d[di] = copy_bits_step(&s[si], src_bit_offset, 8, 0, + lsb0); + di++; + si++; + } + d[di] = ((d[di] & ~dst_last_mask) + | (copy_bits_step(&s[si], src_bit_offset, + dst_last_bit % 8 + 1, 0, lsb0) + & dst_last_mask)); + } +} + void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint8_t bit_size, bool little_endian) { diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 281c54a1e..da3ba91a0 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -73,6 +73,45 @@ static inline void copy_lsbytes(void *dst, size_t dst_size, } } +/** + * Return a bit mask with bits `[bit_offset, 7]` set. + * + * @param[in] lsb0 See @ref copy_bits(). + */ +static inline uint8_t copy_bits_first_mask(unsigned int bit_offset, bool lsb0) +{ + return lsb0 ? 0xff << bit_offset : 0xff >> bit_offset; +} + +/** + * Return a bit mask with bits `[0, last_bit % 8]` set. + * + * @param[in] lsb0 See @ref copy_bits(). + */ +static inline uint8_t copy_bits_last_mask(uint64_t last_bit, bool lsb0) +{ + return lsb0 ? 0xff >> (7 - last_bit % 8) : 0x7f80 >> (last_bit % 8); +} + +/** + * Copy @p bit_size bits from @p src at bit offset @p src_bit_offset to @p dst + * at bit offset @p dst_bit_offset. + * + * @param[in] dst Destination buffer. + * @param[in] dst_bit_offset Offset in bits from the beginning of @p dst to copy + * to. Must be < 8. + * @param[in] src Source buffer. + * @param[in] src_bit_offset Offset in bits from the beginning of @p src to copy + * from. Must be < 8. + * @param[in] bit_size Number of bits to copy. + * @param[in] lsb0 If @c true, bits within a byte are numbered from least + * significant (0) to most significant (7); if @c false, they are numbered from + * most significant (0) to least significant (7). This determines the + * interpretation of @p dst_bit_offset and @p src_bit_offset. + */ +void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, + unsigned int src_bit_offset, uint64_t bit_size, bool lsb0); + /** * Serialize bits to a memory buffer. * diff --git a/tests/__init__.py b/tests/__init__.py index 028062aec..43367aec4 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -41,6 +41,23 @@ def mock_memory_read(data, address, count, offset, physical): return data[offset : offset + count] +def add_mock_memory_segments(prog, segments): + for segment in segments: + if segment.virt_addr is not None: + prog.add_memory_segment( + segment.virt_addr, + len(segment.buf), + functools.partial(mock_memory_read, segment.buf), + ) + if segment.phys_addr is not None: + prog.add_memory_segment( + segment.phys_addr, + len(segment.buf), + functools.partial(mock_memory_read, segment.buf), + True, + ) + + class MockObject(NamedTuple): name: str type: Type @@ -84,20 +101,7 @@ def mock_object_find(prog, name, flags, filename): prog = Program(platform) if segments is not None: - for segment in segments: - if segment.virt_addr is not None: - prog.add_memory_segment( - segment.virt_addr, - len(segment.buf), - functools.partial(mock_memory_read, segment.buf), - ) - if segment.phys_addr is not None: - prog.add_memory_segment( - segment.phys_addr, - len(segment.buf), - functools.partial(mock_memory_read, segment.buf), - True, - ) + add_mock_memory_segments(prog, segments) if types is not None: prog.add_type_finder(mock_find_type) if objects is not None: diff --git a/tests/assembler.py b/tests/assembler.py new file mode 100644 index 000000000..b4aa7aaf5 --- /dev/null +++ b/tests/assembler.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import namedtuple + + +def _append_uleb128(buf, value): + while True: + byte = value & 0x7F + value >>= 7 + if value: + buf.append(byte | 0x80) + else: + buf.append(byte) + break + + +def _append_sleb128(buf, value): + while True: + byte = value & 0x7F + value >>= 7 + if (not value and not (byte & 0x40)) or (value == -1 and (byte & 0x40)): + buf.append(byte) + break + else: + buf.append(byte | 0x80) + + +U8 = namedtuple("U8", ["value"]) +U8._append = lambda self, buf, byteorder: buf.append(self.value) +S8 = namedtuple("S8", ["value"]) +S8._append = lambda self, buf, byteorder: buf.append(self.value & 0xFF) +U16 = namedtuple("U16", ["value"]) +U16._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(2, byteorder)) +S16 = namedtuple("S16", ["value"]) +S16._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(2, byteorder, signed=True) +) +U32 = namedtuple("U32", ["value"]) +U32._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(4, byteorder)) +S32 = namedtuple("S32", ["value"]) +S32._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(4, byteorder, signed=True) +) +U64 = namedtuple("U64", ["value"]) +U64._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(8, byteorder)) +S64 = namedtuple("S64", ["value"]) +S64._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(8, byteorder, signed=True) +) +ULEB128 = namedtuple("ULEB128", ["value"]) +ULEB128._append = lambda self, buf, byteorder: _append_uleb128(buf, self.value) +SLEB128 = namedtuple("SLEB128", ["value"]) +SLEB128._append = lambda self, buf, byteorder: _append_sleb128(buf, self.value) + + +def assemble(*args, little_endian=True): + byteorder = "little" if little_endian else "big" + buf = bytearray() + for arg in args: + arg._append(buf, byteorder) + return buf diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index bc1320dc5..9e82f9df1 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -4,6 +4,7 @@ from collections import namedtuple import os.path +from tests.assembler import _append_sleb128, _append_uleb128 from tests.dwarf import DW_AT, DW_FORM, DW_TAG from tests.elf import ET, PT, SHT from tests.elfwriter import ElfSection, create_elf_file @@ -13,28 +14,6 @@ DwarfDie.__new__.__defaults__ = (None,) -def _append_uleb128(buf, value): - while True: - byte = value & 0x7F - value >>= 7 - if value: - buf.append(byte | 0x80) - else: - buf.append(byte) - break - - -def _append_sleb128(buf, value): - while True: - byte = value & 0x7F - value >>= 7 - if (not value and not (byte & 0x40)) or (value == -1 and (byte & 0x40)): - buf.append(byte) - break - else: - buf.append(byte | 0x80) - - def _compile_debug_abbrev(unit_dies, use_dw_form_indirect): buf = bytearray() code = 1 diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 018409b8c..beec5662c 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -3,12 +3,14 @@ import ctypes import functools +import operator import os.path import re import tempfile import unittest from drgn import ( + FaultError, FindObjectFlags, Language, Object, @@ -20,8 +22,15 @@ TypeParameter, TypeTemplateParameter, ) -from tests import DEFAULT_LANGUAGE, TestCase, identical -from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_TAG +from tests import ( + DEFAULT_LANGUAGE, + MockMemorySegment, + TestCase, + add_mock_memory_segments, + identical, +) +import tests.assembler as assembler +from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_OP, DW_TAG from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf libdw = ctypes.CDLL("libdw.so") @@ -189,12 +198,14 @@ ) -def dwarf_program(*args, **kwds): +def dwarf_program(*args, segments=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: f.write(compile_dwarf(*args, **kwds)) f.flush() prog.load_debug_info([f.name]) + if segments is not None: + add_mock_memory_segments(prog, segments) return prog @@ -3759,7 +3770,7 @@ def test_variable_no_address(self): ) self.assertIdentical(prog.object("x"), Object(prog, "int")) - def test_variable_unimplemented_location(self): + def test_variable_expr_empty(self): prog = dwarf_program( wrap_test_type_dies( ( @@ -3769,13 +3780,1527 @@ def test_variable_unimplemented_location(self): ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0"), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b""), ), ), ) ) ) - self.assertRaisesRegex(Exception, "unimplemented operation", prog.object, "x") + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_bit_piece(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFFFFFF01020304), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(32), + assembler.ULEB128(4), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", address=0xFFFFFFFF01020304, bit_offset=4), + ) + + def test_variable_expr_implicit_value(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0x12345678), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x5678 if little_endian else 0x1234 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x1234 if little_endian else 0x5678 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_pieces_too_large(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x5678 if little_endian else 0x1234 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32( + 0x1234 if little_endian else 0x5678 + ), + assembler.U8(DW_OP.piece), + # Piece size is larger than remaining size of object. + assembler.ULEB128(4), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0), + # There is nothing remaining in the object. + assembler.U8(DW_OP.piece), + assembler.ULEB128(4), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_too_small(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(1), + assembler.U8(0x99), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x99)) + + def test_variable_expr_implicit_value_bit_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(1), + assembler.U8( + 0x8F if little_endian else 0x1F + ), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(4), + assembler.ULEB128(4), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32( + 0x1234567 + if little_endian + else 0x2345678 + ), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(28), + assembler.ULEB128(0), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_piece_empty(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16(0), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_stack_value(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.stack_value), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 31)) + + def test_variable_expr_stack_value_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8( + DW_OP.lit2 + if little_endian + else DW_OP.lit1 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128( + 3 if little_endian else 1 + ), + assembler.U8( + DW_OP.lit1 + if little_endian + else DW_OP.lit2 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128( + 1 if little_endian else 3 + ), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x1000002)) + + def test_variable_expr_stack_value_bit_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8( + DW_OP.lit2 + if little_endian + else DW_OP.lit31 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128( + 4 if little_endian else 28 + ), + assembler.ULEB128( + 0 if little_endian else 4 + ), + assembler.U8( + DW_OP.lit31 + if little_endian + else DW_OP.lit2 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128( + 28 if little_endian else 4 + ), + assembler.ULEB128( + 4 if little_endian else 0 + ), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12)) + + def test_variable_expr_stack_value_piece_empty(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_contiguous_piece_addresses(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_contiguous_bit_piece_addresses(self): + for bit_offset in (0, 1): + with self.subTest(bit_offset=bit_offset): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(10), + assembler.ULEB128(bit_offset), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0001), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(22), + assembler.ULEB128(bit_offset + 2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", address=0xFFFF0000, bit_offset=bit_offset), + ) + + def test_variable_expr_non_contiguous_piece_addresses(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + 4, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x56781234)) + + def test_variable_expr_non_contiguous_piece_addresses_too_large(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(256), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + 4, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x56781234)) + + def test_variable_expr_non_contiguous_bit_piece_addresses(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(4), + assembler.ULEB128(0), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(28), + assembler.ULEB128(5), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + ( + (0x2468ACE8).to_bytes(5, "little") + if little_endian + else (0x111A2B3C00).to_bytes(5, "big") + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_unknown(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xdf"), + ), + ), + ) + ) + ) + self.assertRaisesRegex( + Exception, "unknown DWARF expression opcode", prog.object, "x" + ) + + def test_variable_expr_unknown_after_location(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0), + assembler.U8(0xDF), + ), + ), + ), + ), + ) + ) + ) + self.assertRaisesRegex( + Exception, "unknown DWARF expression opcode", prog.object, "x" + ) + + def _eval_dwarf_expr(self, ops, **kwds): + assemble_kwds = { + key: value for key, value in kwds.items() if key == "little_endian" + } + return dwarf_program( + wrap_test_type_dies( + ( + unsigned_long_long_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + *ops, + assembler.U8(DW_OP.stack_value), + **assemble_kwds, + ), + ), + ), + ), + ) + ), + **kwds, + )["x"].value_() + + def _assert_dwarf_expr_eval(self, ops, expected, **kwds): + self.assertEqual(self._eval_dwarf_expr(ops, **kwds), expected) + + def _assert_dwarf_expr_stack_underflow(self, ops, **kwds): + with self.assertRaisesRegex(Exception, "stack underflow"): + self._eval_dwarf_expr(ops, **kwds) + + def test_variable_expr_op_lit(self): + for i in range(32): + with self.subTest(i=i): + self._assert_dwarf_expr_eval([assembler.U8(DW_OP.lit0 + i)], i) + + def test_variable_expr_op_addr(self): + with self.subTest(bits=64): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.addr), assembler.U64(2 ** 64 - 1)], + 2 ** 64 - 1, + bits=64, + ) + with self.subTest(bits=32): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.addr), assembler.U32(2 ** 32 - 1)], + 2 ** 32 - 1, + bits=32, + ) + + def test_variable_expr_op_constu(self): + for bits in (64, 32): + for size in (1, 2, 4, 8): + op_name = f"const{size}u" + with self.subTest(bits=bits, op=op_name): + op = getattr(DW_OP, op_name) + type_ = getattr(assembler, f"U{size * 8}") + self._assert_dwarf_expr_eval( + [assembler.U8(op), type_(2 ** (size * 8) - 1)], + (2 ** (size * 8) - 1) & (2 ** bits - 1), + bits=bits, + ) + with self.subTest(bits=bits, op="constu"): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.constu), assembler.ULEB128(0x123456789)], + 0x123456789 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_consts(self): + for bits in (64, 32): + for size in (1, 2, 4, 8): + op_name = f"const{size}s" + with self.subTest(bits=bits, op=op_name): + op = getattr(DW_OP, op_name) + type_ = getattr(assembler, f"S{size * 8}") + self._assert_dwarf_expr_eval( + [assembler.U8(op), type_(-1)], + -1 & (2 ** bits - 1), + bits=bits, + ) + with self.subTest(bits=bits, op="consts"): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.consts), assembler.SLEB128(-0x123456789)], + -0x123456789 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_dup(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.dup), + assembler.U8(DW_OP.plus), + ], + 2, + ) + + def test_variable_expr_op_drop(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.drop), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.plus), + ], + 4, + ) + + def test_variable_expr_op_pick(self): + for i, value in enumerate((30, 20, 10)): + with self.subTest(i=i): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ], + value, + ) + + def test_variable_expr_op_pick_underflow(self): + for i in (3, 255): + with self.subTest(i=i): + self._assert_dwarf_expr_stack_underflow( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ] + ) + + def test_variable_expr_op_over(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.over), + ], + 10, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.over), + ], + 20, + ) + + def test_variable_expr_op_swap(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.minus), + ], + 2, + ) + + def test_variable_expr_op_rot(self): + for i, value in enumerate((5, 3, 7, 1)): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.rot), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ], + value, + ) + + def test_variable_expr_op_deref(self): + for bits in (64, 32): + for little_endian in (True, False): + with self.subTest(bits=bits, little_endian=little_endian): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.addr), + (assembler.U64 if bits == 64 else assembler.U32)( + 0xFFFF0000 + ), + assembler.U8(DW_OP.deref), + ], + 0x12345678, + bits=bits, + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + bits // 8, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + + def test_variable_expr_op_deref_fault(self): + with self.assertRaises(FaultError): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.deref), + ] + ) + + def test_variable_expr_op_deref_size(self): + for bits in (64, 32): + for little_endian in (True, False): + with self.subTest(bits=bits, little_endian=little_endian): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.addr), + (assembler.U64 if bits == 64 else assembler.U32)( + 0xFFFF0000 + ), + assembler.U8(DW_OP.deref_size), + assembler.U8(2), + ], + 0x1337, + bits=bits, + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x1337).to_bytes( + 2, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + + def test_variable_expr_op_deref_size_fault(self): + with self.assertRaises(FaultError): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.deref_size), + assembler.U8(1), + ] + ) + + def test_variable_expr_stack_underflow(self): + for case in [ + (DW_OP.dup, 1), + (DW_OP.drop, 1), + (DW_OP.over, 2), + (DW_OP.swap, 2), + (DW_OP.rot, 3), + (DW_OP.deref, 1), + (DW_OP.deref_size, 1, assembler.U8(1)), + (DW_OP.abs, 1), + (DW_OP.and_, 2), + (DW_OP.div, 2), + (DW_OP.minus, 2), + (DW_OP.mod, 2), + (DW_OP.mul, 2), + (DW_OP.neg, 1), + (DW_OP.not_, 1), + (DW_OP.or_, 2), + (DW_OP.plus, 2), + (DW_OP.plus_uconst, 1, assembler.ULEB128(1)), + (DW_OP.shl, 2), + (DW_OP.shr, 2), + (DW_OP.shra, 2), + (DW_OP.xor, 2), + (DW_OP.le, 2), + (DW_OP.ge, 2), + (DW_OP.eq, 2), + (DW_OP.lt, 2), + (DW_OP.gt, 2), + (DW_OP.ne, 2), + (DW_OP.bra, 1, assembler.S16(1)), + ]: + op = case[0] + min_entries = case[1] + extra_args = case[2:] + with self.subTest(op=op): + for i in range(min_entries): + self._assert_dwarf_expr_stack_underflow( + [assembler.U8(DW_OP.lit1)] * i + [assembler.U8(op), *extra_args] + ) + + def test_variable_expr_op_abs(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-9), + assembler.U8(DW_OP.abs), + ], + 9, + bits=bits, + ) + + def test_variable_expr_op_and(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.and_), + ], + 1, + bits=bits, + ) + + def test_variable_expr_op_div(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.div), + ], + 2, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.div), + ], + 0, + bits=bits, + ) + # The DWARF 5 specification doesn't specify how signed division + # should be rounded. We assume truncation towards zero like C. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.div), + ], + -2 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_div_by_zero(self): + with self.assertRaisesRegex(Exception, "division by zero"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + ] + ) + + def test_variable_expr_op_minus(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.minus), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.minus), + ], + -3 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_mod(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mod), + ], + 1, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.mod), + ], + 0, + bits=bits, + ) + # Although DW_OP_div is signed, DW_OP_mod is unsigned. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mod), + ], + 1, + bits=bits, + ) + + def test_variable_expr_op_mod_by_zero(self): + with self.assertRaisesRegex(Exception, "modulo by zero"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.mod), + ] + ) + + def test_variable_expr_op_mul(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mul), + ], + 10, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mul), + ], + ((-5 & (2 ** bits - 1)) * 2) & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_neg(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.neg), + ], + -7 & (2 ** bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-7), + assembler.U8(DW_OP.neg), + ], + 7, + bits=bits, + ) + + def test_variable_expr_op_not(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.not_), + ], + 2 ** bits - 1, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.not_), + ], + ~31 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_or(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.or_), + ], + 7, + bits=bits, + ) + + def test_variable_expr_op_plus(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit6), + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.plus), + ], + 13, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.S8(DW_OP.const1s), + assembler.S8(-3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.plus), + ], + 2, + bits=bits, + ) + + def test_variable_expr_op_plus_uconst(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit6), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(7), + ], + 13, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.S8(DW_OP.const1s), + assembler.S8(-3), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(5), + ], + 2, + bits=bits, + ) + + def test_variable_expr_op_shl(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shl), + ], + 48, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 2)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shl), + ], + 2 ** (bits - 1), + bits=bits, + ) + # The DWARF specification doesn't define the behavior of + # shifting by a number of bits larger than the width of the + # type. We evaluate it to zero. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shl), + ], + 0, + bits=bits, + ) + + def test_variable_expr_op_shr(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1u), + assembler.U8(48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shr), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 1)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shr), + ], + 2 ** (bits - 2), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-1), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shr), + ], + 0, + bits=bits, + ) + + def test_variable_expr_op_shra(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1u), + assembler.U8(48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shra), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shra), + ], + -3 & (2 ** bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 1)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shra), + ], + 2 ** (bits - 2) + 2 ** (bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-2), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shra), + ], + -1 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_xor(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.xor), + ], + 6, + bits=bits, + ) + + def test_variable_expr_relational(self): + for op, py_op in [ + (DW_OP.le, operator.le), + (DW_OP.ge, operator.ge), + (DW_OP.eq, operator.eq), + (DW_OP.lt, operator.lt), + (DW_OP.gt, operator.gt), + (DW_OP.ne, operator.ne), + ]: + for bits in (64, 32): + for val1, val2 in [ + (3, 5), + (3, -5), + (-3, 5), + (-3, -5), + (5, 5), + (5, -5), + (-5, 5), + (-5, -5), + (6, 5), + (6, -5), + (-6, 5), + (-6, -5), + ]: + with self.subTest(bits=bits, val1=val1, val2=val2): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(val1), + assembler.U8(DW_OP.const1s), + assembler.S8(val2), + assembler.U8(op), + ], + int(py_op(val1, val2)), + ) + + def test_variable_expr_op_skip(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.skip), + assembler.S16(3), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + assembler.U8(DW_OP.lit20), + ], + 20, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.skip), + assembler.S16(4), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.skip), + assembler.S16(4), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.skip), + assembler.S16(-8), + ], + 3, + ) + + def test_variable_expr_op_skip_infinite(self): + with self.assertRaisesRegex(Exception, "too many operations"): + self._eval_dwarf_expr([assembler.U8(DW_OP.skip), assembler.S16(-3)]) + + def test_variable_expr_op_skip_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "out of bounds"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.skip), + # 1 extra for for the DW_OP_stack_value added by + # _eval_dwarf_expr(). + assembler.U16(3), + assembler.U8(DW_OP.nop), + ], + ) + + def test_variable_expr_op_bra(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.bra), + assembler.S16(3), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + assembler.U8(DW_OP.lit20), + ], + 20, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.bra), + assembler.S16(1), + assembler.U8(DW_OP.lit2), + ], + 2, + ) + # More complicated expression implementing something like this: + # i = 0 + # x = 0 + # do { + # x += 2; + # i += 1; + # while (i <= 5); + # return x; + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(2), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(1), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.over), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lt), + assembler.U8(DW_OP.bra), + assembler.S16(-12), + ], + 10, + ) + + def test_variable_expr_op_bra_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "out of bounds"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.bra), + # 1 extra for for the DW_OP_stack_value added by + # _eval_dwarf_expr(). + assembler.U16(3), + assembler.U8(DW_OP.nop), + ], + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.bra), + assembler.U16(3), + assembler.U8(DW_OP.lit2), + ], + 2, + ) + + def test_variable_expr_op_nop(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.lit25), + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.nop), + ], + 25, + ) def test_variable_const_signed(self): for form in ( From b6d810b344e38d1c6085f3d296e8653dfbce2441 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 20 Jan 2021 16:28:45 -0800 Subject: [PATCH 46/56] libdrgn: debug_info: add DWARF DIE iterator We have a couple of upcoming use cases for iterating through all of the DIEs in a module: searching for scopes and searching for a DIE's ancestors. Add a DIE iterator interface to abstract away the details of walking DIEs and allows us to efficiently track ancestors. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 199 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 4adf1d9e1..616787af7 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -75,6 +75,8 @@ DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) DEFINE_VECTOR(uint64_vector, uint64_t) +DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) + #define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" #define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) @@ -140,6 +142,203 @@ struct drgn_error *drgn_debug_info_buffer_error(struct binary_buffer *bb, message); } + +/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ +struct drgn_dwarf_die_iterator { + /** Stack of current DIE and its ancestors. */ + struct dwarf_die_vector dies; + Dwarf *dwarf; + /** End of current CU (for bounds checking). */ + const char *cu_end; + /** Offset of next CU. */ + Dwarf_Off next_cu_off; + /** Whether current CU is from .debug_types. */ + bool debug_types; +}; + +__attribute__((__unused__)) +static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, + Dwarf *dwarf) +{ + dwarf_die_vector_init(&it->dies); + it->dwarf = dwarf; + it->next_cu_off = 0; + it->debug_types = false; +} + +__attribute__((__unused__)) +static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) +{ + dwarf_die_vector_deinit(&it->dies); +} + +/** + * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. + * + * The first call returns the top-level DIE for the first unit in the module. + * Subsequent calls return children, siblings, and unit DIEs. + * + * This includes the .debug_types section. + * + * @param[in] children If @c true and the last returned DIE has children, return + * its first child (this is a pre-order traversal). Otherwise, return the next + * DIE at the level less than or equal to the last returned DIE, i.e., the last + * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit + * DIE. + * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, + * stop after returning all DIEs in the subtree rooted at the DIE that was + * returned in the last call as `(*dies_ret)[subtree - 1]`. + * @param[out] dies_ret Returned array containing DIE and its ancestors. + * `(*dies_ret)[*length_ret - 1]` is the DIE itself, + * `(*dies_ret)[*length_ret - 2]` is its parent, `(*dies_ret)[*length_ret - 3]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * This is valid until the next call to @ref drgn_dwarf_die_iterator_next() or + * @ref drgn_dwarf_die_iterator_deinit(). + * @param[out] length_ret Returned length of @p dies_ret. + * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which + * case `*length_ret` equals @p subtree and @p dies_ret refers to the root of + * the iterated subtree, non-@c NULL on error, in which case this should not be + * called again. + */ +__attribute__((__unused__)) +static struct drgn_error * +drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, + size_t subtree, Dwarf_Die **dies_ret, + size_t *length_ret) +{ +#define TOP() (&it->dies.data[it->dies.size - 1]) + struct drgn_error *err = NULL; + int r; + Dwarf_Die die; + assert(subtree <= it->dies.size); + if (it->dies.size == 0) { + /* This is the first call. Get the first unit DIE. */ + if (!dwarf_die_vector_append_entry(&it->dies)) { + err = &drgn_enomem; + goto out; + } + } else { + if (children) { + r = dwarf_child(TOP(), &die); + if (r == 0) { + /* The previous DIE has a child. Return it. */ + if (!dwarf_die_vector_append(&it->dies, &die)) + err = &drgn_enomem; + goto out; + } else if (r < 0) { + err = drgn_error_libdw(); + goto out; + } + /* The previous DIE has no children. */ + } + + if (it->dies.size == subtree) { + /* + * The previous DIE is the root of the subtree. We're + * done. + */ + err = &drgn_stop; + goto out; + } + + if (it->dies.size > 1) { + r = dwarf_siblingof(TOP(), &die); + if (r == 0) { + /* The previous DIE has a sibling. Return it. */ + *TOP() = die; + goto out; + } else if (r > 0) { + if (!die.addr) + goto next_unit; + /* + * The previous DIE is the last child of its + * parent. + */ + char *addr = die.addr; + do { + /* + * addr points to the null terminator + * for the list of siblings. Go back up + * to its parent. The next byte is + * either the parent's sibling or + * another null terminator. + */ + it->dies.size--; + addr++; + if (it->dies.size == subtree) { + /* + * We're back to the root of the + * subtree. We're done. + */ + err = &drgn_stop; + goto out; + } + if (it->dies.size == 1 || + addr >= it->cu_end) + goto next_unit; + } while (*addr == '\0'); + /* + * addr now points to the next DIE. Return it. + */ + *TOP() = (Dwarf_Die){ + .cu = it->dies.data[0].cu, + .addr = addr, + }; + goto out; + } else { + err = drgn_error_libdw(); + goto out; + } + } + } + +next_unit:; + /* There are no more DIEs in the current unit. */ + Dwarf_Off cu_off = it->next_cu_off; + size_t cu_header_size; + uint64_t type_signature; + r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, + &cu_header_size, NULL, NULL, NULL, NULL, + it->debug_types ? &type_signature : NULL, NULL); + if (r == 0) { + /* Got the next unit. Return the unit DIE. */ + if (it->debug_types) { + r = !dwarf_offdie_types(it->dwarf, + cu_off + cu_header_size, TOP()); + } else { + r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, + TOP()); + } + if (r) { + err = drgn_error_libdw(); + goto out; + } + it->cu_end = ((const char *)TOP()->addr + - dwarf_dieoffset(TOP()) + + it->next_cu_off); + } else if (r > 0) { + if (!it->debug_types) { + it->next_cu_off = 0; + it->debug_types = true; + goto next_unit; + } + /* There are no more units. */ + err = &drgn_stop; + } else { + err = drgn_error_libdw(); + } + +out: + /* + * Return these even in the error case to avoid maybe uninitialized + * warnings in the caller. + */ + *dies_ret = it->dies.data; + *length_ret = it->dies.size; + return err; +#undef TOP +} + DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) static inline struct hash_pair From d8d4157346873d47883565cfdf38a6ee9560ff96 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 20 Jan 2021 16:58:55 -0800 Subject: [PATCH 47/56] libdrgn: debug_info: add drgn_debug_info_module_find_dwarf_scopes() This will be used for finding functions, inlined functions, and blocks containing a PC for stack unwinding and variable lookups. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 94 ++++++++++++++++++++++++++++++++++++++++++-- libdrgn/debug_info.h | 22 +++++++++++ 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 616787af7..5be6546a8 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -156,7 +156,6 @@ struct drgn_dwarf_die_iterator { bool debug_types; }; -__attribute__((__unused__)) static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, Dwarf *dwarf) { @@ -166,7 +165,6 @@ static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, it->debug_types = false; } -__attribute__((__unused__)) static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) { dwarf_die_vector_deinit(&it->dies); @@ -200,7 +198,6 @@ static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) * the iterated subtree, non-@c NULL on error, in which case this should not be * called again. */ -__attribute__((__unused__)) static struct drgn_error * drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, size_t subtree, Dwarf_Die **dies_ret, @@ -339,6 +336,97 @@ next_unit:; #undef TOP } +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdw(); + *bias_ret = bias; + pc -= bias; + + /* First, try to get the CU containing the PC. */ + Dwarf_Aranges *aranges; + size_t naranges; + if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + bool children; + size_t subtree; + if (naranges > 0) { + Dwarf_Off offset; + if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, + NULL, &offset) < 0) { + /* No ranges match the PC. */ + *dies_ret = NULL; + *length_ret = 0; + return NULL; + } + + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + if (!dwarf_offdie(dwarf, offset, cu_die)) { + err = drgn_error_libdw(); + goto err; + } + if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, + NULL, NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = ((const char *)cu_die->addr + - dwarf_dieoffset(cu_die) + + it.next_cu_off); + children = true; + subtree = 1; + } else { + /* + * .debug_aranges is empty or missing. Fall back to checking + * each CU. + */ + drgn_dwarf_die_iterator_init(&it, dwarf); + children = false; + subtree = 0; + } + + /* Now find DIEs containing the PC. */ + Dwarf_Die *dies; + size_t length; + while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree, + &dies, &length))) { + int r = dwarf_haspc(&dies[length - 1], pc); + if (r > 0) { + children = true; + subtree = length; + } else if (r < 0) { + err = drgn_error_libdw(); + goto err; + } + } + if (err != &drgn_stop) + goto err; + + *dies_ret = dies; + *length_ret = length; + return NULL; + +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) static inline struct hash_pair diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index ab04365cb..bd9499fcd 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,6 +12,7 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H +#include #include #include @@ -202,6 +203,27 @@ drgn_debug_info_buffer_init(struct drgn_debug_info_buffer *buffer, buffer->scn = scn; } +/** + * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing + * a given program counter. + * + * @param[in] module Module containing @p pc. + * @param[in] pc Program counter. + * @param[out] bias_ret Returned difference between addresses in the loaded + * module and addresses in the returned DIEs. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the + * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent + * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its + * grandparent, etc. Must be freed with @c free(). + * @param[out] length_ret Returned length of @p dies_ret. + */ +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(1, 3, 4, 5))); + struct drgn_debug_info_module_key { const void *build_id; size_t build_id_len; From 0e113ecc8d99aa01d72666b75bcd16df2bb77d8f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 21 May 2021 17:35:48 -0700 Subject: [PATCH 48/56] libdrgn: debug_info: add drgn_find_die_ancestors() This will be used for finding the ancestors of the abstract instance root corresponding to a concrete inlined instance root for variable lookups in inlined functions. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ libdrgn/debug_info.h | 16 +++++++++++++ 2 files changed, 69 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 5be6546a8..9dcf25558 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -427,6 +427,59 @@ drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, return err; } +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); + if (!dwarf) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + Dwarf_Half cu_version; + Dwarf_Off type_offset; + if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, + &type_offset)) { + err = drgn_error_libdw(); + goto err; + } + it.debug_types = cu_version == 4 && type_offset != 0; + uint64_t type_signature; + Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); + if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, NULL, + it.debug_types ? &type_signature : NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = (const char *)cu_die->addr - cu_die_offset + it.next_cu_off; + + Dwarf_Die *dies; + size_t length; + while (!(err = drgn_dwarf_die_iterator_next(&it, true, 1, &dies, + &length))) { + if (dies[length - 1].addr == die->addr) { + *dies_ret = dies; + *length_ret = length - 1; + return NULL; + } + } + if (err == &drgn_stop) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "could not find DWARF DIE ancestors"); + } +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) static inline struct hash_pair diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index bd9499fcd..c8241aabb 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -224,6 +224,22 @@ drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, size_t *length_ret) __attribute__((__nonnull__(1, 3, 4, 5))); +/** + * Find the ancestors of a DWARF DIE. + * + * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. + * + * @param[in] module Module containing @p die. + * @param[in] die DIE to find. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, + * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * @param[out] length_ret Returned number of ancestors in @p dies_ret. + */ +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(2, 3))); + struct drgn_debug_info_module_key { const void *build_id; size_t build_id_len; From 38573cfdde8065cc2cc900f1b6b4b4d0b83b308b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Dec 2020 11:24:05 -0800 Subject: [PATCH 49/56] libdrgn: stack_trace: pretty print frames and add frames for inline functions If we want to access a parameter or local variable in an inlined function, then we need a stack frame for that function. It's also much more useful to see inlined functions in the stack trace in general. So, when we've unwound the registers for a stack frame, walk the debugging information to find all of the (possibly inlined) functions at the program counter, and add a drgn stack frame for each of those. Also add StackFrame.name and StackFrame.is_inline so that we can distinguish inline frames. Also add StackFrame.source() to get the filename and line and column numbers. Finally, add the source code location to pretty-printed stack traces and add pretty-printing for individual stack frames that includes extra information. Signed-off-by: Omar Sandoval --- _drgn.pyi | 59 +++++-- drgn/internal/cli.py | 2 +- libdrgn/drgn.h.in | 36 ++++ libdrgn/python/stack_trace.c | 46 +++++ libdrgn/stack_trace.c | 333 +++++++++++++++++++++++++++++++++-- libdrgn/stack_trace.h | 3 + libdrgn/util.h | 8 + 7 files changed, 459 insertions(+), 28 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 3b4b516cf..ccc86c721 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -19,6 +19,7 @@ from typing import ( Mapping, Optional, Sequence, + Tuple, Union, overload, ) @@ -1382,23 +1383,24 @@ class StackTrace: .. code-block:: python3 for frame in trace: - if frame.symbol().name == 'io_schedule': + if frame.name == 'io_schedule': print('Thread is doing I/O') :class:`str() ` returns a pretty-printed stack trace: - >>> print(prog.stack_trace(1)) - #0 __schedule+0x25c/0x8ba - #1 schedule+0x3c/0x7e - #2 schedule_hrtimeout_range_clock+0x10c/0x118 - #3 ep_poll+0x3ca/0x40a - #4 do_epoll_wait+0xb0/0xc6 - #5 __x64_sys_epoll_wait+0x1a/0x1d - #6 do_syscall_64+0x55/0x17c - #7 entry_SYSCALL_64+0x7c/0x156 + >>> prog.stack_trace(1) + #0 context_switch (kernel/sched/core.c:4339:2) + #1 __schedule (kernel/sched/core.c:5147:8) + #2 schedule (kernel/sched/core.c:5226:3) + #3 do_wait (kernel/exit.c:1534:4) + #4 kernel_wait4 (kernel/exit.c:1678:8) + #5 __do_sys_wait4 (kernel/exit.c:1706:13) + #6 do_syscall_64 (arch/x86/entry/common.c:47:14) + #7 entry_SYSCALL_64+0x7c/0x15b (arch/x86/entry/entry_64.S:112) + #8 0x4d49dd - The drgn CLI is set up so that stack traces are displayed with ``str()`` by - default. + The format is subject to change. The drgn CLI is set up so that stack + traces are displayed with ``str()`` by default. """ def __getitem__(self, idx: IntegerLike) -> StackFrame: ... @@ -1406,6 +1408,30 @@ class StackTrace: class StackFrame: """ A ``StackFrame`` represents a single *frame* in a thread's call stack. + + :class:`str() ` returns a pretty-printed stack frame: + + >>> prog.stack_trace(1)[0] + #0 at 0xffffffffb64ac287 (__schedule+0x227/0x606) in context_switch at kernel/sched/core.c:4339:2 (inlined) + + This includes more information than when printing the full stack trace. The + format is subject to change. The drgn CLI is set up so that stack frames + are displayed with ``str()`` by default. + """ + + name: Optional[str] + """ + Name of the function at this frame, or ``None`` if it could not be + determined. + """ + + is_inline: bool + """ + Whether this frame is for an inlined call. + + An inline frame shares the same stack frame in memory as its caller. + Therefore, it has the same registers (including program counter and thus + symbol). """ interrupted: bool @@ -1421,8 +1447,17 @@ class StackFrame: particular, the program counter is the return address, which is typically the instruction after the call instruction. """ + pc: int """Program counter at this stack frame.""" + def source(self) -> Tuple[str, int, int]: + """ + Get the source code location of this frame. + + :return: Location as a ``(filename, line, column)`` triple. + :raises LookupError: if the source code location is not available + """ + ... def symbol(self) -> Symbol: """ Get the function symbol at this stack frame. diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index ac2f9acd6..77b280de7 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -23,7 +23,7 @@ def displayhook(value: Any) -> None: setattr(builtins, "_", None) if isinstance(value, drgn.Object): text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) - elif isinstance(value, (drgn.StackTrace, drgn.Type)): + elif isinstance(value, (drgn.StackFrame, drgn.StackTrace, drgn.Type)): text = str(value) else: text = repr(value) diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index db19a2251..477e81ae3 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -2608,6 +2608,42 @@ struct drgn_error *drgn_format_stack_trace(struct drgn_stack_trace *trace, /** Return whether a stack frame was interrupted (e.g., by a signal). */ bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame); +/** + * Format a stack frame as a string. + * + * @param[out] ret Returned string. On success, it must be freed with @c free(). + * On error, its contents are undefined. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_format_stack_frame(struct drgn_stack_trace *trace, + size_t frame, char **ret); + +/** + * Get the name of the function at a stack frame. + * + * @return Function name. This is valid until the stack trace is destroyed; it + * should not be freed. @c NULL if the name could not be determined. + */ +const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, size_t frame); + +/** Return whether a stack frame is for an inlined call. */ +bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, size_t frame); + +/** + * Get the source code location of a stack frame. + * + * @param[out] line_ret Returned line number. Returned as 0 if unknown. May be + * @c NULL if not needed. + * @param[out] column_ret Returned column number. Returned as 0 if unknown. May + * be @c NULL if not needed. + * @return Filename. This is valid until the stack trace is destroyed; it should + * not be freed. @c NULL if the location could not be determined (in which case + * `*line_ret` and `*column_ret` are undefined). + */ +const char *drgn_stack_frame_source(struct drgn_stack_trace *trace, + size_t frame, int *line_ret, + int *column_ret); + /** * Get the program counter at a stack frame. * diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index be54db9c0..270b1ce66 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -72,6 +72,32 @@ static void StackFrame_dealloc(StackFrame *self) Py_TYPE(self)->tp_free((PyObject *)self); } +static PyObject *StackFrame_str(StackFrame *self) +{ + struct drgn_error *err; + char *str; + err = drgn_format_stack_frame(self->trace->trace, self->i, &str); + if (err) + return set_drgn_error(err); + PyObject *ret = PyUnicode_FromString(str); + free(str); + return ret; +} + +static PyObject *StackFrame_source(StackFrame *self) +{ + int line; + int column; + const char *filename = drgn_stack_frame_source(self->trace->trace, + self->i, &line, &column); + if (!filename) { + PyErr_SetString(PyExc_LookupError, + "source code location not available"); + return NULL; + } + return Py_BuildValue("sii", filename, line, column); +} + static PyObject *StackFrame_symbol(StackFrame *self) { struct drgn_error *err; @@ -148,6 +174,20 @@ static PyObject *StackFrame_registers(StackFrame *self) return dict; } +static PyObject *StackFrame_get_name(StackFrame *self, void *arg) +{ + const char *name = drgn_stack_frame_name(self->trace->trace, self->i); + if (name) + return PyUnicode_FromString(name); + else + Py_RETURN_NONE; +} + +static PyObject *StackFrame_get_is_inline(StackFrame *self, void *arg) +{ + Py_RETURN_BOOL(drgn_stack_frame_is_inline(self->trace->trace, self->i)); +} + static PyObject *StackFrame_get_interrupted(StackFrame *self, void *arg) { Py_RETURN_BOOL(drgn_stack_frame_interrupted(self->trace->trace, @@ -167,6 +207,8 @@ static PyObject *StackFrame_get_pc(StackFrame *self, void *arg) } static PyMethodDef StackFrame_methods[] = { + {"source", (PyCFunction)StackFrame_source, METH_NOARGS, + drgn_StackFrame_source_DOC}, {"symbol", (PyCFunction)StackFrame_symbol, METH_NOARGS, drgn_StackFrame_symbol_DOC}, {"register", (PyCFunction)StackFrame_register, @@ -177,6 +219,9 @@ static PyMethodDef StackFrame_methods[] = { }; static PyGetSetDef StackFrame_getset[] = { + {"name", (getter)StackFrame_get_name, NULL, drgn_StackFrame_name_DOC}, + {"is_inline", (getter)StackFrame_get_is_inline, NULL, + drgn_StackFrame_is_inline_DOC}, {"interrupted", (getter)StackFrame_get_interrupted, NULL, drgn_StackFrame_interrupted_DOC}, {"pc", (getter)StackFrame_get_pc, NULL, drgn_StackFrame_pc_DOC}, @@ -188,6 +233,7 @@ PyTypeObject StackFrame_type = { .tp_name = "_drgn.StackFrame", .tp_basicsize = sizeof(StackFrame), .tp_dealloc = (destructor)StackFrame_dealloc, + .tp_str = (reprfunc)StackFrame_str, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_StackFrame_DOC, .tp_methods = StackFrame_methods, diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 9a4f4cdd2..50d24d196 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -25,16 +27,18 @@ #include "type.h" #include "util.h" -static bool drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, - size_t *capacity, - struct drgn_register_state *regs) +static struct drgn_error * +drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, size_t *capacity, + struct drgn_register_state *regs, + Dwarf_Die *scopes, size_t num_scopes, + size_t function_scope) { if ((*trace)->num_frames == *capacity) { static const size_t max_capacity = (SIZE_MAX - sizeof(struct drgn_stack_trace)) / sizeof(struct drgn_stack_frame); if (*capacity == max_capacity) - return false; + return &drgn_enomem; size_t new_capacity; if (*capacity > max_capacity / 2) new_capacity = max_capacity; @@ -45,14 +49,17 @@ static bool drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, offsetof(struct drgn_stack_trace, frames[new_capacity])); if (!new_trace) - return false; + return &drgn_enomem; *trace = new_trace; *capacity = new_capacity; } struct drgn_stack_frame *frame = &(*trace)->frames[(*trace)->num_frames++]; frame->regs = regs; - return true; + frame->scopes = scopes; + frame->num_scopes = num_scopes; + frame->function_scope = function_scope; + return NULL; } static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, @@ -71,8 +78,15 @@ static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, LIBDRGN_PUBLIC void drgn_stack_trace_destroy(struct drgn_stack_trace *trace) { - for (size_t i = 0; i < trace->num_frames; i++) - drgn_register_state_destroy(trace->frames[i].regs); + struct drgn_register_state *regs = NULL; + for (size_t i = 0; i < trace->num_frames; i++) { + if (trace->frames[i].regs != regs) { + drgn_register_state_destroy(regs); + regs = trace->frames[i].regs; + } + free(trace->frames[i].scopes); + } + drgn_register_state_destroy(regs); free(trace); } @@ -91,8 +105,12 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) goto enomem; struct drgn_register_state *regs = trace->frames[frame].regs; - struct optional_uint64 pc = drgn_register_state_get_pc(regs); - if (pc.has_value) { + struct optional_uint64 pc; + const char *name = drgn_stack_frame_name(trace, frame); + if (name) { + if (!string_builder_append(&str, name)) + goto enomem; + } else if ((pc = drgn_register_state_get_pc(regs)).has_value) { Dwfl_Module *dwfl_module = regs->module ? regs->module->dwfl_module : NULL; struct drgn_symbol sym; @@ -117,6 +135,19 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) goto enomem; } + int line, column; + const char *filename = drgn_stack_frame_source(trace, frame, + &line, &column); + if (filename && column) { + if (!string_builder_appendf(&str, " (%s:%d:%d)", + filename, line, column)) + goto enomem; + } else if (filename) { + if (!string_builder_appendf(&str, " (%s:%d)", filename, + line)) + goto enomem; + } + if (frame != trace->num_frames - 1 && !string_builder_appendc(&str, '\n')) goto enomem; @@ -130,6 +161,154 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) return &drgn_enomem; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret) +{ + struct string_builder str = {}; + struct drgn_register_state *regs = trace->frames[frame].regs; + if (!string_builder_appendf(&str, "#%zu at ", frame)) + goto enomem; + + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (pc.has_value) { + if (!string_builder_appendf(&str, "%#" PRIx64, pc.value)) + goto enomem; + + Dwfl_Module *dwfl_module = + regs->module ? regs->module->dwfl_module : NULL; + struct drgn_symbol sym; + if (dwfl_module && + drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + dwfl_module, + &sym) && + !string_builder_appendf(&str, " (%s+0x%" PRIx64 "/0x%" PRIx64 ")", + sym.name, pc.value - sym.address, + sym.size)) + goto enomem; + } else { + if (!string_builder_append(&str, "???")) + goto enomem; + } + + const char *name = drgn_stack_frame_name(trace, frame); + if (name && !string_builder_appendf(&str, " in %s", name)) + goto enomem; + + int line, column; + const char *filename = drgn_stack_frame_source(trace, frame, &line, + &column); + if (filename && column) { + if (!string_builder_appendf(&str, " at %s:%d:%d", filename, + line, column)) + goto enomem; + } else if (filename) { + if (!string_builder_appendf(&str, " at %s:%d", filename, line)) + goto enomem; + } + + if (drgn_stack_frame_is_inline(trace, frame) && + !string_builder_append(&str, " (inlined)")) + goto enomem; + + if (!string_builder_finalize(&str, ret)) + goto enomem; + return NULL; + +enomem: + free(str.str); + return &drgn_enomem; +} + +LIBDRGN_PUBLIC const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame) +{ + Dwarf_Die *scopes = trace->frames[frame].scopes; + size_t num_scopes = trace->frames[frame].num_scopes; + size_t function_scope = trace->frames[frame].function_scope; + if (function_scope >= num_scopes) + return NULL; + return dwarf_diename(&scopes[function_scope]); +} + +LIBDRGN_PUBLIC bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, + size_t frame) +{ + Dwarf_Die *scopes = trace->frames[frame].scopes; + size_t num_scopes = trace->frames[frame].num_scopes; + size_t function_scope = trace->frames[frame].function_scope; + return (function_scope < num_scopes && + dwarf_tag(&scopes[function_scope]) == + DW_TAG_inlined_subroutine); +} + +LIBDRGN_PUBLIC const char * +drgn_stack_frame_source(struct drgn_stack_trace *trace, size_t frame, + int *line_ret, int *column_ret) +{ + if (frame > 0 && + trace->frames[frame].regs == trace->frames[frame - 1].regs) { + /* + * This frame is the caller of an inline frame. Get the call + * location from the inlined_subroutine of the callee. + */ + Dwarf_Die *inlined_scopes = trace->frames[frame - 1].scopes; + size_t inlined_num_scopes = trace->frames[frame - 1].num_scopes; + size_t inlined_function_scope = + trace->frames[frame - 1].function_scope; + if (inlined_function_scope >= inlined_num_scopes) + return NULL; + Dwarf_Die *inlined = &inlined_scopes[inlined_function_scope]; + + Dwarf_Die inlined_cu; + Dwarf_Files *files; + if (!dwarf_diecu(inlined, &inlined_cu, NULL, NULL) || + dwarf_getsrcfiles(&inlined_cu, &files, NULL)) + return NULL; + + Dwarf_Attribute attr; + Dwarf_Word value; + if (dwarf_formudata(dwarf_attr(inlined, DW_AT_call_file, &attr), + &value)) + return NULL; + + const char *filename = dwarf_filesrc(files, value, NULL, NULL); + if (!filename) + return NULL; + if (line_ret) { + if (dwarf_formudata(dwarf_attr(inlined, DW_AT_call_line, + &attr), &value)) + *line_ret = 0; + else + *line_ret = value; + } + if (column_ret) { + if (dwarf_formudata(dwarf_attr(inlined, + DW_AT_call_column, + &attr), &value)) + *column_ret = 0; + else + *column_ret = value; + } + return filename; + } else { + struct drgn_register_state *regs = trace->frames[frame].regs; + Dwfl_Module *dwfl_module = + regs->module ? regs->module->dwfl_module : NULL; + if (!dwfl_module) + return NULL; + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (!pc.has_value) + return NULL; + pc.value -= !regs->interrupted; + Dwfl_Line *line = dwfl_module_getsrc(dwfl_module, pc.value); + if (!line) + return NULL; + return dwfl_lineinfo(line, NULL, line_ret, column_ret, NULL, + NULL); + } +} + LIBDRGN_PUBLIC bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame) { @@ -440,6 +619,132 @@ static void drgn_add_to_register(void *dst, size_t dst_size, const void *src, } +static struct drgn_error * +drgn_stack_trace_add_frames(struct drgn_stack_trace **trace, + size_t *trace_capacity, + struct drgn_register_state *regs) +{ + struct drgn_error *err; + + if (!regs->module) { + err = drgn_stack_trace_append_frame(trace, trace_capacity, regs, + NULL, 0, 0); + goto out; + } + + uint64_t pc = regs->_pc - !regs->interrupted; + uint64_t bias; + Dwarf_Die *scopes; + size_t num_scopes; + err = drgn_debug_info_module_find_dwarf_scopes(regs->module, pc, &bias, + &scopes, &num_scopes); + if (err) + goto out; + pc -= bias; + + size_t orig_num_frames = (*trace)->num_frames; + /* + * Walk backwards through scopes, splitting into frames. Stop at index 1 + * because 0 must be a unit DIE. + */ + size_t frame_end = num_scopes; + for (size_t i = num_scopes; i-- > 1;) { + bool has_pc; + if (i == num_scopes - 1) { + /* + * The last scope is guaranteed to contain PC, so avoid + * a call to dwarf_haspc(). + */ + has_pc = true; + } else { + int r = dwarf_haspc(&scopes[i], pc); + if (r < 0) { + err = drgn_error_libdw(); + goto out_scopes; + } + has_pc = r > 0; + } + if (has_pc) { + Dwarf_Die *frame_scopes; + switch (dwarf_tag(&scopes[i])) { + case DW_TAG_subprogram: + /* + * Reuse the original scopes array (shrinking it + * if necessary). + */ + if (frame_end == num_scopes || + !(frame_scopes = realloc(scopes, + frame_end * + sizeof(scopes[i])))) + frame_scopes = scopes; + err = drgn_stack_trace_append_frame(trace, + trace_capacity, + regs, + frame_scopes, + frame_end, + i); + if (err) { + free(frame_scopes); + /* + * We stole scopes for frame_scopes, so + * not out_scopes. + */ + goto out; + } + /* + * Added the DW_TAG_subprogram frame. We're + * done. + */ + return NULL; + case DW_TAG_inlined_subroutine: + frame_scopes = memdup(&scopes[i], + (frame_end - i) * + sizeof(scopes[i])); + if (!frame_scopes) { + err = &drgn_enomem; + goto out_scopes; + } + err = drgn_stack_trace_append_frame(trace, + trace_capacity, + regs, + frame_scopes, + frame_end - i, + 0); + if (err) { + free(frame_scopes); + goto out_scopes; + } + frame_end = i; + break; + default: + break; + } + } else { + /* + * This DIE doesn't contain PC. Ignore it and everything + * after it. + */ + frame_end = i; + } + } + + /* + * We didn't find a matching DW_TAG_subprogram. Free any matching + * DW_TAG_inlined_subroutine frames we found and add a scopeless frame. + */ + for (size_t i = orig_num_frames; i < (*trace)->num_frames; i++) + free((*trace)->frames[i].scopes); + (*trace)->num_frames = orig_num_frames; + err = drgn_stack_trace_append_frame(trace, trace_capacity, regs, NULL, + 0, 0); +out_scopes: + free(scopes); +out: + if (err) + drgn_register_state_destroy(regs); + return err; +} + static struct drgn_error * drgn_unwind_one_register(struct drgn_program *prog, const struct drgn_cfi_rule *rule, @@ -640,12 +945,10 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, /* Limit iterations so we don't get caught in a loop. */ for (int i = 0; i < 1024; i++) { - if (!drgn_stack_trace_append_frame(&trace, &trace_capacity, - regs)) { - err = &drgn_enomem; - drgn_register_state_destroy(regs); + err = drgn_stack_trace_add_frames(&trace, &trace_capacity, + regs); + if (err) goto out; - } err = drgn_unwind_with_cfi(prog, &row, regs, ®s); if (err == &drgn_not_found) { diff --git a/libdrgn/stack_trace.h b/libdrgn/stack_trace.h index 20f5eb6c0..1a932a1a8 100644 --- a/libdrgn/stack_trace.h +++ b/libdrgn/stack_trace.h @@ -28,6 +28,9 @@ struct drgn_stack_frame { struct drgn_register_state *regs; + Dwarf_Die *scopes; + size_t num_scopes; + size_t function_scope; }; struct drgn_stack_trace { diff --git a/libdrgn/util.h b/libdrgn/util.h index 166c38a1f..9129a2b8b 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -118,6 +118,14 @@ static inline void *malloc64(uint64_t size) return malloc(size); } +static inline void *memdup(void *ptr, size_t size) +{ + void *copy = malloc(size); + if (copy) + memcpy(copy, ptr, size); + return copy; +} + /** Return the maximum value of an @p n-byte unsigned integer. */ static inline uint64_t uint_max(int n) { From bc85767e5fde24ceefb45b6b123092363c10a8e0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 2 Jun 2021 13:17:41 -0700 Subject: [PATCH 50/56] libdrgn: support looking up parameters and variables in stack traces After all of the preparatory work, the last two missing pieces are a way to find a variable by name in the list of scopes that we saved while unwinding, and a way to find the containing scopes of an inlined function. With that, we can finally look up parameters and variables in stack traces. Signed-off-by: Omar Sandoval --- _drgn.pyi | 27 +++++++++ docs/user_guide.rst | 2 + libdrgn/debug_info.c | 78 +++++++++++++++++++++++- libdrgn/debug_info.h | 38 ++++++++++++ libdrgn/drgn.h.in | 13 ++++ libdrgn/python/stack_trace.c | 71 ++++++++++++++++++++++ libdrgn/stack_trace.c | 81 +++++++++++++++++++++++++ tests/helpers/linux/test_stack_trace.py | 17 ++++++ 8 files changed, 326 insertions(+), 1 deletion(-) diff --git a/_drgn.pyi b/_drgn.pyi index ccc86c721..9c2669a0e 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1417,6 +1417,14 @@ class StackFrame: This includes more information than when printing the full stack trace. The format is subject to change. The drgn CLI is set up so that stack frames are displayed with ``str()`` by default. + + The :meth:`[] <.__getitem__>` operator can look up function parameters, + local variables, and global variables in the scope of the stack frame: + + >>> prog.stack_trace(1)[0]['prev'].pid + (pid_t)1 + >>> prog.stack_trace(1)[0]['scheduler_running'] + (int)1 """ name: Optional[str] @@ -1450,6 +1458,25 @@ class StackFrame: pc: int """Program counter at this stack frame.""" + def __getitem__(self, name: str) -> Object: + """ + Implement ``self[name]``. Get the object (variable, function parameter, + constant, or function) with the given name in the scope of this frame. + + If the object exists but has been optimized out, this returns an + :ref:`absent object `. + + :param name: Object name. + """ + ... + def __contains__(self, name: str) -> bool: + """ + Implement ``name in self``. Return whether an object with the given + name exists in the scope of this frame. + + :param name: Object name. + """ + ... def source(self) -> Tuple[str, int, int]: """ Get the source code location of this frame. diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 66694a5fe..c56eccff7 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -139,6 +139,8 @@ address it points to):: >>> print(hex(jiffiesp.value_())) 0xffffffffbe405000 +.. _absent-objects: + Absent Objects """""""""""""" diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 9dcf25558..30c5bed62 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2885,7 +2885,7 @@ drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, } } -static struct drgn_error * +struct drgn_error * drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, struct drgn_debug_info_module *module, Dwarf_Die *die, Dwarf_Die *type_die, @@ -2935,6 +2935,82 @@ drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, function_die, regs, ret); } +static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, + const char *name, + Dwarf_Die *ret) +{ + int r = dwarf_child(enumeration_type, ret); + while (r == 0) { + if (dwarf_tag(ret) == DW_TAG_enumerator && + strcmp(dwarf_diename(ret), name) == 0) + return NULL; + r = dwarf_siblingof(ret, ret); + } + if (r < 0) + return drgn_error_libdw(); + ret->addr = NULL; + return NULL; +} + +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret) +{ + struct drgn_error *err; + Dwarf_Die die; + for (size_t scope = num_scopes; scope--;) { + bool have_declaration = false; + if (dwarf_child(&scopes[scope], &die) != 0) + continue; + do { + switch (dwarf_tag(&die)) { + case DW_TAG_variable: + case DW_TAG_formal_parameter: + case DW_TAG_subprogram: + if (strcmp(dwarf_diename(&die), name) == 0) { + *die_ret = die; + bool declaration; + if (dwarf_flag(&die, DW_AT_declaration, + &declaration)) + return drgn_error_libdw(); + if (declaration) + have_declaration = true; + else + return NULL; + } + break; + case DW_TAG_enumeration_type: { + bool enum_class; + if (dwarf_flag_integrate(&die, DW_AT_enum_class, + &enum_class)) + return drgn_error_libdw(); + if (!enum_class) { + Dwarf_Die enumerator; + err = find_dwarf_enumerator(&die, name, + &enumerator); + if (err) + return err; + if (enumerator.addr) { + *die_ret = enumerator; + *type_ret = die; + return NULL; + } + } + break; + } + default: + continue; + } + } while (dwarf_siblingof(&die, &die) == 0); + if (have_declaration) + return NULL; + } + die_ret->addr = NULL; + return NULL; +} + static struct drgn_error * drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, struct drgn_debug_info_module *module, Dwarf_Die *die, diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index c8241aabb..b7f313ea5 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -418,6 +418,44 @@ drgn_debug_info_find_object(const char *name, size_t name_len, enum drgn_find_object_flags flags, void *arg, struct drgn_object *ret); +/** + * Find an object DIE in an array of DWARF scopes. + * + * @param[in] scopes Array of scopes, from outermost to innermost. + * @param[in] num_scopes Number of scopes in @p scopes. + * @param[out] die_ret Returned object DIE. + * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. + * Otherwise, undefined. + */ +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret); + +/** + * Create a @ref drgn_object from a `Dwarf_Die`. + * + * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, + * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, + * `DW_TAG_template_value_parameter`). + * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` + * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be + * its parent. + * @param[in] function_die DIE of current function. @c NULL if not in function + * context. + * @param[in] regs Registers of current stack frame. @c NULL if not in stack + * frame context. + * @param[out] ret Returned object. + */ +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret); + /** * Get the Call Frame Information in a @ref drgn_debug_info_module at a given * program counter. diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 477e81ae3..b6b0827a5 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -2663,6 +2663,19 @@ bool drgn_stack_frame_pc(struct drgn_stack_trace *trace, size_t frame, struct drgn_error *drgn_stack_frame_symbol(struct drgn_stack_trace *trace, size_t frame, struct drgn_symbol **ret); + +/** + * Find an object in the scope of a stack frame. + * + * @param[in] name Object name. + * @param[out] ret Returned object. This must have already been initialized with + * @ref drgn_object_init(). + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame, + const char *name, struct drgn_object *ret); + /** * Get the value of a register in a stack frame. * diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 270b1ce66..5fe9fdf6e 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -84,6 +84,63 @@ static PyObject *StackFrame_str(StackFrame *self) return ret; } +static DrgnObject *StackFrame_subscript(StackFrame *self, PyObject *key) +{ + struct drgn_error *err; + Program *prog = container_of(self->trace->trace->prog, Program, prog); + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return NULL; + DrgnObject *ret = DrgnObject_alloc(prog); + if (!ret) + return NULL; + bool clear = set_drgn_in_python(); + err = drgn_stack_frame_find_object(self->trace->trace, self->i, name, + &ret->obj); + if (clear) + clear_drgn_in_python(); + if (err) { + if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + } else { + set_drgn_error(err); + } + Py_DECREF(ret); + return NULL; + } + return ret; +} + +static int StackFrame_contains(StackFrame *self, PyObject *key) +{ + struct drgn_error *err; + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + struct drgn_object tmp; + drgn_object_init(&tmp, self->trace->trace->prog); + err = drgn_stack_frame_find_object(self->trace->trace, self->i, name, + &tmp); + drgn_object_deinit(&tmp); + if (!err) { + return 1; + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + return 0; + } else { + return -1; + } +} + static PyObject *StackFrame_source(StackFrame *self) { int line; @@ -207,6 +264,10 @@ static PyObject *StackFrame_get_pc(StackFrame *self, void *arg) } static PyMethodDef StackFrame_methods[] = { + {"__getitem__", (PyCFunction)StackFrame_subscript, + METH_O | METH_COEXIST, drgn_StackFrame___getitem___DOC}, + {"__contains__", (PyCFunction)StackFrame_contains, + METH_O | METH_COEXIST, drgn_StackFrame___contains___DOC}, {"source", (PyCFunction)StackFrame_source, METH_NOARGS, drgn_StackFrame_source_DOC}, {"symbol", (PyCFunction)StackFrame_symbol, METH_NOARGS, @@ -228,11 +289,21 @@ static PyGetSetDef StackFrame_getset[] = { {}, }; +static PyMappingMethods StackFrame_as_mapping = { + .mp_subscript = (binaryfunc)StackFrame_subscript, +}; + +static PySequenceMethods StackFrame_as_sequence = { + .sq_contains = (objobjproc)StackFrame_contains, +}; + PyTypeObject StackFrame_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.StackFrame", .tp_basicsize = sizeof(StackFrame), .tp_dealloc = (destructor)StackFrame_dealloc, + .tp_as_sequence = &StackFrame_as_sequence, + .tp_as_mapping = &StackFrame_as_mapping, .tp_str = (reprfunc)StackFrame_str, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_StackFrame_DOC, diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 50d24d196..8dc05e294 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -352,6 +352,87 @@ drgn_stack_frame_symbol(struct drgn_stack_trace *trace, size_t frame, return NULL; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame_i, + const char *name, struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_stack_frame *frame = &trace->frames[frame_i]; + + if (frame->num_scopes == 0) + goto not_found; + + Dwarf_Die die, type_die; + err = drgn_find_in_dwarf_scopes(frame->scopes, frame->num_scopes, name, + &die, &type_die); + if (err) + return err; + if (!die.addr && frame->function_scope == 0) { + /* + * Scope 0 must be a DW_TAG_inlined_subroutine, and we didn't + * find the name in the concrete inlined instance tree. We need + * to find the scopes that contain the the abstract instance + * root (i.e, the DW_TAG_subprogram definition). (We could do + * this ahead of time when unwinding the stack, but for + * efficiency we do it lazily.) + */ + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(frame->scopes, DW_AT_abstract_origin, + &attr_mem))) + goto not_found; + Dwarf_Die abstract_origin; + if (!dwarf_formref_die(attr, &abstract_origin)) + return drgn_error_libdw(); + + Dwarf_Die *ancestors; + size_t num_ancestors; + err = drgn_find_die_ancestors(&abstract_origin, &ancestors, + &num_ancestors); + if (err) + return err; + + size_t new_num_scopes = num_ancestors + frame->num_scopes; + Dwarf_Die *new_scopes = realloc(ancestors, + new_num_scopes * + sizeof(*new_scopes)); + if (!new_scopes) { + free(ancestors); + return &drgn_enomem; + } + memcpy(&new_scopes[num_ancestors], frame->scopes, + frame->num_scopes * sizeof(*new_scopes)); + free(frame->scopes); + frame->scopes = new_scopes; + frame->num_scopes = new_num_scopes; + frame->function_scope = num_ancestors; + + /* Look for the name in the new scopes. */ + err = drgn_find_in_dwarf_scopes(frame->scopes, num_ancestors, + name, &die, &type_die); + if (err) + return err; + } + if (!die.addr) { +not_found:; + const char *frame_name = drgn_stack_frame_name(trace, frame_i); + if (frame_name) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s' in '%s'", + name, frame_name); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s'", name); + } + } + + Dwarf_Die function_die = frame->scopes[frame->function_scope]; + return drgn_object_from_dwarf(trace->prog->_dbinfo, frame->regs->module, + &die, + dwarf_tag(&die) == DW_TAG_enumerator ? + &type_die : NULL, + &function_die, frame->regs, ret); +} + LIBDRGN_PUBLIC bool drgn_stack_frame_register(struct drgn_stack_trace *trace, size_t frame, const struct drgn_register *reg, diff --git a/tests/helpers/linux/test_stack_trace.py b/tests/helpers/linux/test_stack_trace.py index 1420813c2..0716439b0 100644 --- a/tests/helpers/linux/test_stack_trace.py +++ b/tests/helpers/linux/test_stack_trace.py @@ -44,6 +44,23 @@ def test_by_pid_dwarf(self): def test_by_pid_orc(self): self._test_by_pid(True) + def test_local_variable(self): + pid = fork_and_pause() + wait_until(lambda: proc_state(pid) == "S") + for frame in self.prog.stack_trace(pid): + if frame.name in ("context_switch", "__schedule"): + try: + prev = frame["prev"] + except KeyError: + continue + if not prev.absent_: + self.assertEqual(prev.pid, pid) + break + else: + self.skipTest("prev not found in context_switch or __schedule") + os.kill(pid, signal.SIGKILL) + os.waitpid(pid, 0) + def test_pt_regs(self): # This won't unwind anything useful, but at least make sure it accepts # a struct pt_regs. From dbe1d4539b680a1d0b9a6154e15faf7534ec5b6e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Jun 2021 01:10:59 -0700 Subject: [PATCH 51/56] scripts: only build manylinux wheels for CPython. The manylinux image apparently added a PyPy interpreter, which drgn doesn't support. Signed-off-by: Omar Sandoval --- scripts/build_manylinux_in_docker.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index e001b6c55..dc556945a 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -56,7 +56,7 @@ python_supported() { "$1" -c 'import sys; sys.exit(sys.version_info < (3, 6))' } -for pybin in /opt/python/*/bin; do +for pybin in /opt/python/cp*/bin; do if python_supported "$pybin/python"; then # static_assert was added to assert.h in glibc 2.16, but CentOS # 6 has 2.12. @@ -72,7 +72,7 @@ for wheel in /tmp/wheels/*.whl; do fi done -for pybin in /opt/python/*/bin; do +for pybin in /opt/python/cp*/bin; do if python_supported "$pybin/python"; then "$pybin/pip" install drgn --no-index -f /tmp/manylinux_wheels/ "$pybin/drgn" --version From 6357cea46b15c61414a6f080d1a2160151fc200b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Jun 2021 01:13:28 -0700 Subject: [PATCH 52/56] drgn 0.0.12 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index ba5cfb3bb..c49ba4193 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Facebook, Inc. and its affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.11], +AC_INIT([libdrgn], [0.0.12], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) From f7fe93e57370261a92dd1ba7c155802a818a28fd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Jun 2021 10:56:15 -0700 Subject: [PATCH 53/56] cli: show elfutils version in use drgn depends heavily on libelf and libdw, so it's useful to know what version we're using. Add drgn._elfutils_version and use that in the CLI and in the test cases where we currently check the libdw version. Signed-off-by: Omar Sandoval --- _drgn.pyi | 1 + drgn/__init__.py | 1 + drgn/internal/cli.py | 2 +- libdrgn/python/module.c | 5 +++++ tests/test_dwarf.py | 13 +++++-------- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 9c2669a0e..7774ab84b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2054,6 +2054,7 @@ class OutOfBoundsError(Exception): ... +_elfutils_version: str _with_libkdumpfile: bool def _linux_helper_read_vm( diff --git a/drgn/__init__.py b/drgn/__init__.py index 48cad2650..893c36dce 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -73,6 +73,7 @@ TypeMember, TypeParameter, TypeTemplateParameter, + _elfutils_version as _elfutils_version, _with_libkdumpfile as _with_libkdumpfile, cast, container_of, diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 77b280de7..77a25d1a2 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -43,7 +43,7 @@ def displayhook(value: Any) -> None: def main() -> None: python_version = ".".join(str(v) for v in sys.version_info[:3]) libkdumpfile = f'with{"" if drgn._with_libkdumpfile else "out"} libkdumpfile' - version = f"drgn {drgn.__version__} (using Python {python_version}, {libkdumpfile})" + version = f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" parser = argparse.ArgumentParser(prog="drgn", description="Scriptable debugger") program_group = parser.add_argument_group( diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 1e55e7aa8..a0657a38f 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later +#include #ifdef WITH_KDUMPFILE #include #endif @@ -280,6 +281,10 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) goto err; } + if (PyModule_AddStringConstant(m, "_elfutils_version", + dwfl_version(NULL))) + goto err; + PyObject *with_libkdumpfile; #ifdef WITH_LIBKDUMPFILE with_libkdumpfile = Py_True; diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index beec5662c..8bc6406e6 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1,7 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -import ctypes import functools import operator import os.path @@ -9,6 +8,7 @@ import tempfile import unittest +import drgn from drgn import ( FaultError, FindObjectFlags, @@ -33,12 +33,6 @@ from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_OP, DW_TAG from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf -libdw = ctypes.CDLL("libdw.so") -libdw.dwfl_version.argtypes = [ctypes.c_void_p] -libdw.dwfl_version.restype = ctypes.c_char_p -libdw_version = tuple(int(x) for x in libdw.dwfl_version(None).split(b".")[:2]) - - bool_die = DwarfDie( DW_TAG.base_type, ( @@ -223,6 +217,9 @@ def wrap_test_type_dies(dies): ) +elfutils_version = tuple(int(x) for x in drgn._elfutils_version.split(".")[:2]) + + def with_and_without_dw_form_indirect(f): @functools.wraps(f) def wrapper(self): @@ -231,7 +228,7 @@ def wrapper(self): # elfutils does not support DW_FORM_indirect properly before commit # d63b26b8d21f ("libdw: handle DW_FORM_indirect when reading # attributes"). - if libdw_version >= (0, 184): + if elfutils_version >= (0, 184): with self.subTest(msg="with DW_FORM_indirect"): f(self, True) From faad25d7b26a5cdaed510b399914ef9cf2d354a5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Jun 2021 14:37:22 -0700 Subject: [PATCH 54/56] libdrgn: debug_info: fix address of objects with size zero The stack trace variable work introduced a regression that causes objects with size zero to always be marked absent even if they have an address. This matters because GCC sometimes seems to omit the complete array type for arrays declared without a length, so an array variable can end up with an incomplete array type. I saw this with the "swapper_spaces" variable in mm/swap_state.c from the Linux kernel. Make sure to use the address of an empty piece if the variable is also empty. Fixes: ffcb9ccb19e7 ("libdrgn: debug_info: implement creating objects from DWARF location descriptions") Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 16 ++--- tests/test_dwarf.py | 158 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 8 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 30c5bed62..f7095280d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -2686,9 +2686,6 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, piece_bit_offset = 0; } - if (piece_bit_size == 0) - continue; - /* * TODO: there are a few cases that a DWARF location can * describe that can't be represented in drgn's object model: @@ -2705,7 +2702,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, * not supported at all. We should add a way to represent all of * these situations precisely. */ - if (src) { + if (src && piece_bit_size == 0) { + /* Ignore empty value. */ + } else if (src) { if (!value_buf && !drgn_value_zalloc(drgn_value_size(type.bit_size), &value, &value_buf)) { @@ -2752,7 +2751,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, ((stack.data[stack.size - 1] + piece_bit_offset / 8) & address_mask); piece_bit_offset %= 8; - if (bit_offset >= 0) { + if (bit_pos > 0 && bit_offset >= 0) { /* * We already had an address. Merge the pieces * if the addresses are contiguous, otherwise @@ -2769,8 +2768,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, & address_mask); unsigned int end_bit_offset = (bit_offset + bit_pos) % 8; - if (piece_address == end_address && - piece_bit_offset == end_bit_offset) { + if (piece_bit_size == 0 || + (piece_address == end_address && + piece_bit_offset == end_bit_offset)) { /* Piece is contiguous. */ piece_address = address; piece_bit_offset = bit_offset; @@ -2801,7 +2801,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, address = piece_address; bit_offset = piece_bit_offset; } - } else { + } else if (piece_bit_size > 0) { goto absent; } bit_pos += piece_bit_size; diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 8bc6406e6..fd1b6f3bc 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -3750,6 +3750,39 @@ def test_variable(self): FindObjectFlags.CONSTANT, ) + def test_zero_size_variable(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), + ), + ) + ) + ) + self.assertIdentical( + prog["x"], + Object( + prog, + prog.array_type(prog.int_type("int", 4, True)), + address=0xFFFFFFFF01020304, + ), + ) + def test_variable_no_address(self): prog = dwarf_program( wrap_test_type_dies( @@ -4400,6 +4433,131 @@ def test_variable_expr_non_contiguous_bit_piece_addresses(self): ) self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + def test_variable_expr_empty_piece_non_contiguous_address(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + # This piece is not contiguous with + # the previous one, but it is zero + # bits so it should be ignored. + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_previous_empty_piece_non_contiguous_address(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + # This piece is not contiguous with + # the previous one, but the + # previous one was zero bits so it + # should be ignored. + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(4), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_address_empty_piece(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_absent_empty_piece(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + def test_variable_expr_unknown(self): prog = dwarf_program( wrap_test_type_dies( From 5a03d6b13fb2ba2f4208dbdc4512e96449f13baa Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 7 Jun 2021 16:17:11 -0700 Subject: [PATCH 55/56] drgn 0.0.13 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index c49ba4193..88378aefe 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Facebook, Inc. and its affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.12], +AC_INIT([libdrgn], [0.0.13], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) From 82ca5634b513cca4bf40fd99e97fef6652dcc75f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Jun 2021 12:05:42 -0700 Subject: [PATCH 56/56] libdrgn: fix copying value to big-endian from little-endian copy_lsbytes() doesn't copy enough bytes when copying from a smaller little-endian value to a larger big-endian value. This was caught by the test cases for DW_OP_deref{,_size}, but it can affect other places when debugging a little-endian target from a big-endian host or vice-versa. Closes #105. Signed-off-by: Omar Sandoval --- libdrgn/serialize.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index da3ba91a0..0f4783f35 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -65,7 +65,7 @@ static inline void copy_lsbytes(void *dst, size_t dst_size, } else { memset(d, 0, dst_size - size); if (src_little_endian) { - for (size_t i = dst_size - size; i < size; i++) + for (size_t i = dst_size - size; i < dst_size; i++) d[i] = s[dst_size - 1 - i]; } else { memcpy(d + dst_size - size, s + src_size - size, size);