From 81053a1c57e80d3be7d426bd0118b68a23c7997e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Jun 2021 17:40:52 -0700 Subject: [PATCH 001/139] libdrgn: dwarf_index: support DWARF 5 The main changes are: 1. Skipping the new attribute forms. 2. Handling DW_FORM_strx*, DW_FORM_line_strp, and DW_FORM_implicit_const for the attributes that we care about. 3. Parsing the new unit header format. 4. Parsing the new line number program header format. Note that Clang currently produces an incorrect DWARF 5 line number program header for the Linux kernel (https://reviews.llvm.org/D105662), so some types are not properly deduplicated in that case. Closes #104. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 27 +- libdrgn/debug_info.h | 2 + libdrgn/dwarf_index.c | 789 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 733 insertions(+), 85 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index f79635231..124829760 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -109,7 +109,9 @@ static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_TYPES] = ".debug_types", [DRGN_SCN_DEBUG_ABBREV] = ".debug_abbrev", [DRGN_SCN_DEBUG_STR] = ".debug_str", + [DRGN_SCN_DEBUG_STR_OFFSETS] = ".debug_str_offsets", [DRGN_SCN_DEBUG_LINE] = ".debug_line", + [DRGN_SCN_DEBUG_LINE_STR] = ".debug_line_str", [DRGN_SCN_DEBUG_FRAME] = ".debug_frame", [DRGN_SCN_EH_FRAME] = ".eh_frame", [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", @@ -1295,6 +1297,18 @@ drgn_debug_info_find_sections(struct drgn_debug_info_module *module) return NULL; } +static void truncate_null_terminated_section(Elf_Data *data) +{ + if (data) { + const char *buf = data->d_buf; + const char *nul = memrchr(buf, '\0', data->d_size); + if (nul) + data->d_size = nul - buf + 1; + else + data->d_size = 0; + } +} + static struct drgn_error * drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) { @@ -1311,17 +1325,10 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) /* * Truncate any extraneous bytes so that we can assume that a pointer - * within .debug_str is always null-terminated. + * within .debug_{,line_}str is always null-terminated. */ - Elf_Data *debug_str = module->scn_data[DRGN_SCN_DEBUG_STR]; - if (debug_str) { - const char *buf = debug_str->d_buf; - const char *nul = memrchr(buf, '\0', debug_str->d_size); - if (nul) - debug_str->d_size = nul - buf + 1; - else - debug_str->d_size = 0; - } + truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_STR]); + truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_LINE_STR]); return NULL; } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index b7f313ea5..8440da54c 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -56,7 +56,9 @@ enum drgn_debug_info_scn { DRGN_SCN_DEBUG_TYPES, DRGN_SCN_DEBUG_ABBREV, DRGN_SCN_DEBUG_STR, + DRGN_SCN_DEBUG_STR_OFFSETS, DRGN_SCN_DEBUG_LINE, + DRGN_SCN_DEBUG_LINE_STR, DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 6ca1b4f2e..3fd9c9153 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -39,7 +39,7 @@ DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 215, + INSN_MAX_SKIP = 199, ATTRIB_BLOCK, ATTRIB_BLOCK1, ATTRIB_BLOCK2, @@ -54,9 +54,23 @@ enum { ATTRIB_NAME_STRP4, ATTRIB_NAME_STRP8, ATTRIB_NAME_STRING, + ATTRIB_NAME_STRX, + ATTRIB_NAME_STRX1, + ATTRIB_NAME_STRX2, + ATTRIB_NAME_STRX3, + ATTRIB_NAME_STRX4, ATTRIB_COMP_DIR_STRP4, ATTRIB_COMP_DIR_STRP8, + ATTRIB_COMP_DIR_LINE_STRP4, + ATTRIB_COMP_DIR_LINE_STRP8, ATTRIB_COMP_DIR_STRING, + ATTRIB_COMP_DIR_STRX, + ATTRIB_COMP_DIR_STRX1, + ATTRIB_COMP_DIR_STRX2, + ATTRIB_COMP_DIR_STRX3, + ATTRIB_COMP_DIR_STRX4, + ATTRIB_STR_OFFSETS_BASE4, + ATTRIB_STR_OFFSETS_BASE8, ATTRIB_STMT_LIST_LINEPTR4, ATTRIB_STMT_LIST_LINEPTR8, ATTRIB_DECL_FILE_DATA1, @@ -64,6 +78,11 @@ enum { ATTRIB_DECL_FILE_DATA4, ATTRIB_DECL_FILE_DATA8, ATTRIB_DECL_FILE_UDATA, + /* + * This instruction is the only one with an operand: the ULEB128 + * implicit constant. + */ + ATTRIB_DECL_FILE_IMPLICIT, ATTRIB_DECLARATION_FLAG, ATTRIB_SPECIFICATION_REF1, ATTRIB_SPECIFICATION_REF2, @@ -76,6 +95,7 @@ enum { ATTRIB_SIBLING_INDIRECT, ATTRIB_NAME_INDIRECT, ATTRIB_COMP_DIR_INDIRECT, + ATTRIB_STR_OFFSETS_BASE_INDIRECT, ATTRIB_STMT_LIST_INDIRECT, ATTRIB_DECL_FILE_INDIRECT, ATTRIB_DECLARATION_INDIRECT, @@ -95,14 +115,21 @@ DEFINE_VECTOR(uint8_vector, uint8_t) DEFINE_VECTOR(uint32_vector, uint32_t) DEFINE_VECTOR(uint64_vector, uint64_t) +/* + * Placeholder for drgn_dwarf_index_cu::file_name_hashes if the CU has no + * filenames. + */ +static const uint64_t no_file_name_hashes[1] = { 0 }; + struct drgn_dwarf_index_cu { struct drgn_debug_info_module *module; const char *buf; size_t len; uint8_t version; + uint8_t unit_type; uint8_t address_size; bool is_64_bit; - bool is_type_unit; + enum drgn_debug_info_scn scn; /* * This is indexed on the DWARF abbreviation code minus one. It maps the * abbreviation code to an index in abbrev_insns where the instruction @@ -117,6 +144,7 @@ struct drgn_dwarf_index_cu { uint8_t *abbrev_insns; uint64_t *file_name_hashes; size_t num_file_names; + const char *str_offsets; }; struct drgn_dwarf_index_cu_buffer { @@ -197,7 +225,8 @@ void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) { - free(cu->file_name_hashes); + if (cu->file_name_hashes != no_file_name_hashes) + free(cu->file_name_hashes); free(cu->abbrev_insns); free(cu->abbrev_decls); } @@ -259,6 +288,7 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, struct binary_buffer *bb, uint64_t form, uint8_t *insn_ret) { + struct drgn_error *err; switch (form) { case DW_FORM_addr: *insn_ret = cu->address_size; @@ -266,21 +296,36 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_data1: case DW_FORM_ref1: case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: *insn_ret = 1; return NULL; case DW_FORM_data2: case DW_FORM_ref2: + case DW_FORM_strx2: + case DW_FORM_addrx2: *insn_ret = 2; return NULL; + case DW_FORM_strx3: + case DW_FORM_addrx3: + *insn_ret = 3; + return NULL; case DW_FORM_data4: case DW_FORM_ref4: + case DW_FORM_ref_sup4: + case DW_FORM_strx4: + case DW_FORM_addrx4: *insn_ret = 4; return NULL; case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: *insn_ret = 8; return NULL; + case DW_FORM_data16: + *insn_ret = 16; + return NULL; case DW_FORM_block: case DW_FORM_exprloc: *insn_ret = ATTRIB_BLOCK; @@ -297,6 +342,10 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_sdata: case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: *insn_ret = ATTRIB_LEB128; return NULL; case DW_FORM_ref_addr: @@ -307,11 +356,17 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, /* fallthrough */ case DW_FORM_sec_offset: case DW_FORM_strp: + case DW_FORM_strp_sup: + case DW_FORM_line_strp: *insn_ret = cu->is_64_bit ? 8 : 4; return NULL; case DW_FORM_string: *insn_ret = ATTRIB_STRING; return NULL; + case DW_FORM_implicit_const: + if ((err = binary_buffer_skip_leb128(bb))) + return err; + /* fallthrough */ case DW_FORM_flag_present: *insn_ret = 0; return NULL; @@ -373,6 +428,21 @@ static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_string: *insn_ret = ATTRIB_NAME_STRING; return NULL; + case DW_FORM_strx: + *insn_ret = ATTRIB_NAME_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = ATTRIB_NAME_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = ATTRIB_NAME_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = ATTRIB_NAME_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = ATTRIB_NAME_STRX4; + return NULL; case DW_FORM_indirect: *insn_ret = ATTRIB_NAME_INDIRECT; return NULL; @@ -399,9 +469,34 @@ static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, else *insn_ret = ATTRIB_COMP_DIR_STRP4; return NULL; + case DW_FORM_line_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]) { + return binary_buffer_error(bb, + "DW_FORM_line_strp without .debug_line_str section"); + } + if (cu->is_64_bit) + *insn_ret = ATTRIB_COMP_DIR_LINE_STRP8; + else + *insn_ret = ATTRIB_COMP_DIR_LINE_STRP4; + return NULL; case DW_FORM_string: *insn_ret = ATTRIB_COMP_DIR_STRING; return NULL; + case DW_FORM_strx: + *insn_ret = ATTRIB_COMP_DIR_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = ATTRIB_COMP_DIR_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = ATTRIB_COMP_DIR_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = ATTRIB_COMP_DIR_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = ATTRIB_COMP_DIR_STRX4; + return NULL; case DW_FORM_indirect: *insn_ret = ATTRIB_COMP_DIR_INDIRECT; return NULL; @@ -412,6 +507,28 @@ static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, } } +static struct drgn_error * +dw_at_str_offsets_base_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = ATTRIB_STR_OFFSETS_BASE8; + else + *insn_ret = ATTRIB_STR_OFFSETS_BASE4; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_STR_OFFSETS_BASE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_str_offsets_base", + form); + } +} + static struct drgn_error * dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, struct binary_buffer *bb, uint64_t form, @@ -442,7 +559,8 @@ dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) + uint8_t *insn_ret, + uint64_t *implicit_const_ret) { switch (form) { case DW_FORM_data1: @@ -465,6 +583,9 @@ static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, case DW_FORM_udata: *insn_ret = ATTRIB_DECL_FILE_UDATA; return NULL; + case DW_FORM_implicit_const: + *insn_ret = ATTRIB_DECL_FILE_IMPLICIT; + return binary_buffer_next_uleb128(bb, implicit_const_ret); case DW_FORM_indirect: *insn_ret = ATTRIB_DECL_FILE_INDIRECT; return NULL; @@ -549,6 +670,19 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, } } +static bool append_uleb128(struct uint8_vector *insns, uint64_t value) +{ + do { + uint8_t byte = value & 0x7f; + value >>= 7; + if (value != 0) + byte |= 0x80; + if (!uint8_vector_append(insns, &byte)) + return false; + } while (value != 0); + return true; +} + static struct drgn_error * read_abbrev_decl(struct drgn_debug_info_buffer *buffer, struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, @@ -609,10 +743,10 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, if (children) die_flags |= DIE_FLAG_CHILDREN; - bool first = true; - uint8_t insn; + uint8_t insn, last_insn = UINT8_MAX; for (;;) { uint64_t name, form; + uint64_t implicit_const; if ((err = binary_buffer_next_uleb128(&buffer->bb, &name))) return err; if ((err = binary_buffer_next_uleb128(&buffer->bb, &form))) @@ -627,6 +761,13 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, } else if (name == DW_AT_comp_dir) { err = dw_at_comp_dir_to_insn(cu, &buffer->bb, form, &insn); + } else if (name == DW_AT_str_offsets_base) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base without .debug_str_offsets section"); + } + err = dw_at_str_offsets_base_to_insn(cu, &buffer->bb, + form, &insn); } else if (name == DW_AT_stmt_list) { if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { return binary_buffer_error(&buffer->bb, @@ -637,7 +778,8 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, } else if (name == DW_AT_decl_file && should_index && /* Namespaces are merged, so we ignore their file. */ tag != DW_TAG_namespace) { - err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn); + err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn, + &implicit_const); } else if (name == DW_AT_declaration && should_index) { err = dw_at_declaration_to_insn(&buffer->bb, form, &insn, &die_flags); @@ -651,8 +793,7 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, return err; if (insn != 0) { - if (!first && insn <= INSN_MAX_SKIP) { - uint8_t last_insn = insns->data[insns->size - 1]; + if (insn <= INSN_MAX_SKIP) { if (last_insn + insn <= INSN_MAX_SKIP) { insns->data[insns->size - 1] += insn; continue; @@ -661,10 +802,14 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, insns->data[insns->size - 1] = INSN_MAX_SKIP; } } + last_insn = insn; if (!uint8_vector_append(insns, &insn)) return &drgn_enomem; - first = false; + + if (insn == ATTRIB_DECL_FILE_IMPLICIT && + !append_uleb128(insns, implicit_const)) + return &drgn_enomem; } } insn = 0; @@ -700,6 +845,35 @@ static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, return NULL; } +/* Get the size of a unit header beyond that of a normal compilation unit. */ +static size_t cu_header_extra_size(struct drgn_dwarf_index_cu *cu) +{ + switch (cu->unit_type) { + case DW_UT_compile: + case DW_UT_partial: + return 0; + case DW_UT_skeleton: + case DW_UT_split_compile: + /* dwo_id */ + return 8; + case DW_UT_type: + case DW_UT_split_type: + /* type_signature and type_offset */ + return cu->is_64_bit ? 16 : 12; + default: + UNREACHABLE(); + } +} + +static size_t cu_header_size(struct drgn_dwarf_index_cu *cu) +{ + size_t size = cu->is_64_bit ? 23 : 11; + if (cu->version >= 5) + size++; + size += cu_header_extra_size(cu); + return size; +} + static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) { struct drgn_error *err; @@ -707,13 +881,33 @@ static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) uint16_t version; if ((err = binary_buffer_next_u16(&buffer->bb, &version))) return err; - if (version < 2 || version > 4) { + if (version < 2 || version > 5) { return binary_buffer_error(&buffer->bb, "unknown DWARF CU version %" PRIu16, version); } buffer->cu->version = version; + if (version >= 5) { + if ((err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->unit_type))) + return err; + if (buffer->cu->unit_type < DW_UT_compile || + buffer->cu->unit_type > DW_UT_split_type) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF unit type"); + } + } else if (buffer->cu->scn == DRGN_SCN_DEBUG_TYPES) { + buffer->cu->unit_type = DW_UT_type; + } else { + buffer->cu->unit_type = DW_UT_compile; + } + + if (version >= 5 && + (err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->address_size))) + return err; + uint64_t debug_abbrev_offset; if (buffer->cu->is_64_bit) { if ((err = binary_buffer_next_u64(&buffer->bb, @@ -730,7 +924,8 @@ static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) "debug_abbrev_offset is out of bounds"); } - if ((err = binary_buffer_next_u8(&buffer->bb, + if (version < 5 && + (err = binary_buffer_next_u8(&buffer->bb, &buffer->cu->address_size))) return err; if (buffer->cu->address_size > 8) { @@ -739,16 +934,54 @@ static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) buffer->cu->address_size); } - /* Skip type_signature and type_offset for type units. */ - if (buffer->cu->is_type_unit && - (err = binary_buffer_skip(&buffer->bb, - buffer->cu->is_64_bit ? 16 : 12))) + if ((err = binary_buffer_skip(&buffer->bb, + cu_header_extra_size(buffer->cu)))) return err; return read_abbrev_table(buffer->cu, debug_abbrev_offset); } -static struct drgn_error *skip_lnp_header(struct drgn_debug_info_buffer *buffer) +static struct drgn_error *read_strx(struct drgn_dwarf_index_cu_buffer *buffer, + uint64_t strx, const char **ret) +{ + if (!buffer->cu->str_offsets) { + return binary_buffer_error(&buffer->bb, + "string index without DW_AT_str_offsets_base"); + } + Elf_Data *debug_str_offsets = + buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]; + size_t offset_size = buffer->cu->is_64_bit ? 8 : 4; + if (((char *)debug_str_offsets->d_buf + debug_str_offsets->d_size + - buffer->cu->str_offsets) + / offset_size <= strx) { + return binary_buffer_error(&buffer->bb, + "string index out of bounds"); + } + uint64_t strp; + if (buffer->cu->is_64_bit) { + memcpy(&strp, (uint64_t *)buffer->cu->str_offsets + strx, + sizeof(strp)); + if (buffer->bb.bswap) + strp = bswap_64(strp); + } else { + uint32_t strp32; + memcpy(&strp32, (uint32_t *)buffer->cu->str_offsets + strx, + sizeof(strp32)); + if (buffer->bb.bswap) + strp32 = bswap_32(strp32); + strp = strp32; + } + if (strp >= buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { + return binary_buffer_error(&buffer->bb, + "indirect string is out of bounds"); + } + *ret = ((char *)buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_buf + + strp); + return NULL; +} + +static struct drgn_error *read_lnp_header(struct drgn_debug_info_buffer *buffer, + bool *is_64_bit_ret, int *version_ret) { struct drgn_error *err; uint32_t tmp; @@ -758,29 +991,28 @@ static struct drgn_error *skip_lnp_header(struct drgn_debug_info_buffer *buffer) if (is_64_bit && (err = binary_buffer_skip(&buffer->bb, sizeof(uint64_t)))) return err; + *is_64_bit_ret = is_64_bit; uint16_t version; if ((err = binary_buffer_next_u16(&buffer->bb, &version))) return err; - if (version < 2 || version > 4) { + if (version < 2 || version > 5) { return binary_buffer_error(&buffer->bb, "unknown DWARF LNP version %" PRIu16, version); } + *version_ret = version; - /* - * Skip: - * header_length - * minimum_instruction_length - * maximum_operations_per_instruction (DWARF 4 only) - * default_is_stmt - * line_base - * line_range - * standard_opcode_lengths - */ uint8_t opcode_base; if ((err = binary_buffer_skip(&buffer->bb, - (is_64_bit ? 8 : 4) + 4 + (version >= 4))) || + /* address_size + segment_selector_size */ + + (version >= 5 ? 2 : 0) + + (is_64_bit ? 8 : 4) /* header_length */ + + 1 /* minimum_instruction_length */ + + (version >= 4) /* maximum_operations_per_instruction */ + + 1 /* default_is_stmt */ + + 1 /* line_base */ + + 1 /* line_range */)) || (err = binary_buffer_next_u8(&buffer->bb, &opcode_base)) || (err = binary_buffer_skip(&buffer->bb, opcode_base - 1))) return err; @@ -874,6 +1106,21 @@ struct path_hash_chunk { DEFINE_VECTOR(path_hash_vector, const struct path_hash *) +struct lnp_entry_format { + uint64_t content_type; + uint64_t form; +}; + +static const struct lnp_entry_format dwarf4_directory_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, +}; +static const struct lnp_entry_format dwarf4_file_name_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, + { DW_LNCT_directory_index, DW_FORM_udata }, + { DW_LNCT_timestamp, DW_FORM_udata }, + { DW_LNCT_size, DW_FORM_udata }, +}; + /** * Cache of hashed file paths. * @@ -882,6 +1129,9 @@ DEFINE_VECTOR(path_hash_vector, const struct path_hash *) * exhausted, a new @ref path_hash_chunk is allocated from the heap. The * allocated chunks are kept and reused for each DWARF line number program; they * are freed at the end of the first indexing pass. + * + * This also caches the allocations for directory hashes and line number program + * header entry formats. */ struct path_hash_cache { /** Next @ref path_hash object to be allocated. */ @@ -892,6 +1142,10 @@ struct path_hash_cache { struct path_hash_chunk *first_chunk; /** Hashed directory paths. */ struct path_hash_vector directories; + /** Line number program header entry formats. */ + struct lnp_entry_format *entry_formats; + /** Allocated size of @ref path_hash_cache::entry_formats. */ + size_t entry_formats_capacity; }; static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) @@ -957,6 +1211,152 @@ static const struct path_hash *hash_path(struct path_hash_cache *cache, return path_hash; } +static struct drgn_error * +read_lnp_entry_formats(struct drgn_debug_info_buffer *buffer, + struct path_hash_cache *cache, int *count_ret) +{ + struct drgn_error *err; + uint8_t count; + if ((err = binary_buffer_next_u8(&buffer->bb, &count))) + return err; + if (count > cache->entry_formats_capacity) { + free(cache->entry_formats); + cache->entry_formats = malloc_array(count, + sizeof(cache->entry_formats[0])); + if (!cache->entry_formats) { + cache->entry_formats_capacity = 0; + return &drgn_enomem; + } + cache->entry_formats_capacity = count; + } + bool have_path = false; + for (int i = 0; i < count; i++) { + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].content_type))) + return err; + if (cache->entry_formats[i].content_type == DW_LNCT_path) + have_path = true; + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].form))) + return err; + } + if (!have_path) { + return binary_buffer_error(&buffer->bb, + "DWARF line number program header entry does not include DW_LNCT_path"); + } + *count_ret = count; + return NULL; +} + +static struct drgn_error *skip_lnp_form(struct binary_buffer *bb, + bool is_64_bit, uint64_t form) +{ + struct drgn_error *err; + uint64_t skip; + switch (form) { + case DW_FORM_block: + if ((err = binary_buffer_next_uleb128(bb, &skip))) + return err; +block: + return binary_buffer_skip(bb, skip); + case DW_FORM_block1: + if ((err = binary_buffer_next_u8_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block2: + if ((err = binary_buffer_next_u16_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block4: + if ((err = binary_buffer_next_u32_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_strx1: + return binary_buffer_skip(bb, 1); + case DW_FORM_data2: + case DW_FORM_strx2: + return binary_buffer_skip(bb, 2); + case DW_FORM_strx3: + return binary_buffer_skip(bb, 3); + case DW_FORM_data4: + case DW_FORM_strx4: + return binary_buffer_skip(bb, 4); + case DW_FORM_data8: + return binary_buffer_skip(bb, 8); + case DW_FORM_data16: + return binary_buffer_skip(bb, 16); + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp: + return binary_buffer_skip(bb, is_64_bit ? 8 : 4); + case DW_FORM_sdata: + case DW_FORM_strx: + case DW_FORM_udata: + return binary_buffer_skip_leb128(bb); + case DW_FORM_string: + return binary_buffer_skip_string(bb); + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for line number program", + form); + } +} + +static struct drgn_error *read_lnp_string(struct drgn_debug_info_buffer *buffer, + bool is_64_bit, uint64_t form, + const char **ret) +{ + struct drgn_error *err; + uint64_t strp; + Elf_Data *data; + switch (form) { + case DW_FORM_string: + *ret = buffer->bb.pos; + return binary_buffer_skip_string(&buffer->bb); + case DW_FORM_line_strp: + case DW_FORM_strp: + if (is_64_bit) + err = binary_buffer_next_u64(&buffer->bb, &strp); + else + err = binary_buffer_next_u32_into_u64(&buffer->bb, &strp); + if (err) + return err; + data = buffer->module->scn_data[ + form == DW_FORM_line_strp ? + DRGN_SCN_DEBUG_LINE_STR : DRGN_SCN_DEBUG_STR]; + if (!data || strp >= data->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_LNCT_path is out of bounds"); + } + *ret = (const char *)data->d_buf + strp; + return NULL; + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %" PRIu64 " for DW_LNCT_path", + form); + } +} + +static struct drgn_error * +read_lnp_directory_index(struct drgn_debug_info_buffer *buffer, uint64_t form, + uint64_t *ret) +{ + switch (form) { + case DW_FORM_data1: + return binary_buffer_next_u8_into_u64(&buffer->bb, ret); + case DW_FORM_data2: + return binary_buffer_next_u16_into_u64(&buffer->bb, ret); + case DW_FORM_udata: + return binary_buffer_next_uleb128(&buffer->bb, ret); + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %" PRIu64 " for DW_LNCT_directory_index", + form); + } +} + static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, struct drgn_dwarf_index_cu *cu, const char *comp_dir, @@ -969,58 +1369,147 @@ static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, /* Checked in index_cu_first_pass(). */ buffer.bb.pos += stmt_list; - if ((err = skip_lnp_header(&buffer))) + bool is_64_bit; + int version; + if ((err = read_lnp_header(&buffer, &is_64_bit, &version))) return err; cache->current_chunk = cache->first_chunk; cache->next_object = cache->first_chunk->objects; cache->directories.size = 0; - const struct path_hash *path_hash = hash_path(cache, comp_dir, - &empty_path_hash); - if (!path_hash || - !path_hash_vector_append(&cache->directories, &path_hash)) - return &drgn_enomem; - for (;;) { - const char *path; - size_t path_len; - if ((err = binary_buffer_next_string(&buffer.bb, &path, - &path_len))) + const struct lnp_entry_format *entry_formats; + int entry_format_count; + uint64_t entry_count = 0; /* For -Wmaybe-uninitialized. */ + const struct path_hash *path_hash, *parent; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) return err; - if (!path_len) - break; - path_hash = hash_path(cache, path, cache->directories.data[0]); + entry_formats = cache->entry_formats; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &entry_count))) + return err; + if (entry_count > SIZE_MAX || + !path_hash_vector_reserve(&cache->directories, entry_count)) + return err; + parent = &empty_path_hash; + } else { + entry_formats = dwarf4_directory_entry_formats; + entry_format_count = ARRAY_SIZE(dwarf4_directory_entry_formats); + path_hash = hash_path(cache, comp_dir, &empty_path_hash); if (!path_hash || !path_hash_vector_append(&cache->directories, &path_hash)) return &drgn_enomem; + parent = path_hash; } - struct uint64_vector file_name_hashes = VECTOR_INIT; - for (;;) { + while (version < 5 || entry_count-- > 0) { const char *path; - size_t path_len; - if ((err = binary_buffer_next_string(&buffer.bb, &path, - &path_len))) - goto err; - if (!path_len) - break; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (version < 5 && path[0] == '\0') + goto file_name_entries; + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + return err; + } + path_hash = hash_path(cache, path, parent); + if (!path_hash || + !path_hash_vector_append(&cache->directories, &path_hash)) + return &drgn_enomem; + parent = cache->directories.data[0]; + } - uint64_t directory_index; +file_name_entries:; + /* + * File name 0 needs special treatment. In DWARF 2-4, file name entries + * are numbered starting at 1, and a DW_AT_decl_file of 0 indicates that + * no file was specified. In DWARF 5, file name entries are numbered + * starting at 0, and entry 0 is the current compilation file name. The + * DWARF 5 specification still states that a DW_AT_decl_file of 0 + * indicates that no file was specified, but some producers (including + * Clang) and consumers (including elfutils and GDB) treat a + * DW_AT_decl_file of 0 as specifying the current compilation file name, + * so we do the same. + * + * So, for DWARF 5, we hash entry 0 as usual, and for DWARF 4, we insert + * a placeholder for entry 0. If there are no file names at all, we keep + * the no_file_name_hashes placeholder. + */ + struct uint64_vector file_name_hashes; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) + return err; + entry_formats = cache->entry_formats; if ((err = binary_buffer_next_uleb128(&buffer.bb, - &directory_index))) + &entry_count))) + return err; + if (entry_count == 0) + return NULL; + if (entry_count > SIZE_MAX) + return &drgn_enomem; + uint64_vector_init(&file_name_hashes); + if (!uint64_vector_reserve(&file_name_hashes, entry_count)) { + err = &drgn_enomem; goto err; + } + } else { + entry_formats = dwarf4_file_name_entry_formats; + entry_format_count = ARRAY_SIZE(dwarf4_file_name_entry_formats); + uint64_vector_init(&file_name_hashes); + } + + while (version < 5 || entry_count-- > 0) { + const char *path; + uint64_t directory_index = 0; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (!err && version < 5) { + if (path[0] == '\0') { + if (file_name_hashes.size == 0) { + uint64_vector_deinit(&file_name_hashes); + return NULL; + } + goto done; + } else if (file_name_hashes.size == 0) { + uint64_t zero = 0; + if (!uint64_vector_append(&file_name_hashes, + &zero)) { + err = &drgn_enomem; + goto err; + } + } + } + } else if (entry_formats[j].content_type == + DW_LNCT_directory_index) { + err = read_lnp_directory_index(&buffer, + entry_formats[j].form, + &directory_index); + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + goto err; + } + if (directory_index >= cache->directories.size) { err = binary_buffer_error(&buffer.bb, "directory index %" PRIu64 " is invalid", directory_index); goto err; } - - /* mtime, size */ - if ((err = binary_buffer_skip_leb128(&buffer.bb)) || - (err = binary_buffer_skip_leb128(&buffer.bb))) - goto err; - struct path_hash *prev_object = cache->next_object; struct path_hash_chunk *prev_chunk = cache->current_chunk; path_hash = hash_path(cache, path, @@ -1036,6 +1525,7 @@ static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, cache->current_chunk = prev_chunk; } +done: uint64_vector_shrink_to_fit(&file_name_hashes); cu->file_name_hashes = file_name_hashes.data; cu->num_file_names = file_name_hashes.size; @@ -1078,6 +1568,10 @@ static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, uint64_t form; if ((err = binary_buffer_next_uleb128(bb, &form))) return err; + if (form == DW_FORM_implicit_const) { + return binary_buffer_error(bb, + "DW_FORM_implicit_const in DW_FORM_indirect"); + } switch (insn) { case ATTRIB_INDIRECT: return dw_form_to_insn(cu, bb, form, insn_ret); @@ -1087,10 +1581,12 @@ static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, return dw_at_name_to_insn(cu, bb, form, insn_ret); case ATTRIB_COMP_DIR_INDIRECT: return dw_at_comp_dir_to_insn(cu, bb, form, insn_ret); + case ATTRIB_STR_OFFSETS_BASE_INDIRECT: + return dw_at_str_offsets_base_to_insn(cu, bb, form, insn_ret); case ATTRIB_STMT_LIST_INDIRECT: return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); case ATTRIB_DECL_FILE_INDIRECT: - return dw_at_decl_file_to_insn(bb, form, insn_ret); + return dw_at_decl_file_to_insn(bb, form, insn_ret, NULL); case ATTRIB_DECLARATION_INDIRECT: return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); case ATTRIB_SPECIFICATION_INDIRECT: @@ -1109,12 +1605,17 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, struct drgn_dwarf_index_cu_buffer *buffer, struct path_hash_cache *path_hash_cache) { + /* + * If DW_AT_comp_dir uses a strx* form, we can't read it right away + * because we might not have seen DW_AT_str_offsets_base yet. Rather + * than adding an extra flag to indicate that we need to read it later, + * we set comp_dir to this sentinel value. + */ + static const char comp_dir_is_strx; + struct drgn_error *err; struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - Elf_Data *debug_info = cu->module->scn_data[ - cu->is_type_unit ? DRGN_SCN_DEBUG_TYPES : DRGN_SCN_DEBUG_INFO]; - const char *debug_info_buffer = debug_info->d_buf; + const char *debug_info_buffer = cu->module->scn_data[cu->scn]->d_buf; unsigned int depth = 0; for (;;) { size_t die_addr = (uintptr_t)buffer->bb.pos; @@ -1137,6 +1638,7 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, bool declaration = false; uintptr_t specification = 0; const char *comp_dir = ""; + uint64_t comp_dir_strx; const char *stmt_list_ptr = NULL; uint64_t stmt_list; const char *sibling = NULL; @@ -1145,6 +1647,7 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, while ((insn = *insnp++)) { indirect_insn:; uint64_t skip, tmp; + Elf_Data *strp_scn; switch (insn) { case ATTRIB_BLOCK: if ((err = binary_buffer_next_uleb128(&buffer->bb, @@ -1167,6 +1670,7 @@ indirect_insn:; return err; goto skip; case ATTRIB_LEB128: + case ATTRIB_NAME_STRX: case ATTRIB_DECL_FILE_UDATA: if ((err = binary_buffer_skip_leb128(&buffer->bb))) return err; @@ -1219,16 +1723,78 @@ indirect_insn:; if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, &tmp))) return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; goto comp_dir_strp; case ATTRIB_COMP_DIR_STRP8: if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + goto comp_dir_strp; + case ATTRIB_COMP_DIR_LINE_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; + goto comp_dir_strp; + case ATTRIB_COMP_DIR_LINE_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; comp_dir_strp: - if (tmp >= debug_str->d_size) { + if (tmp >= strp_scn->d_size) { return binary_buffer_error(&buffer->bb, "DW_AT_comp_dir is out of bounds"); } - comp_dir = (const char *)debug_str->d_buf + tmp; + comp_dir = (const char *)strp_scn->d_buf + tmp; + break; + case ATTRIB_COMP_DIR_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case ATTRIB_COMP_DIR_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case ATTRIB_COMP_DIR_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case ATTRIB_COMP_DIR_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case ATTRIB_COMP_DIR_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case ATTRIB_STR_OFFSETS_BASE4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto str_offsets_base; + case ATTRIB_STR_OFFSETS_BASE8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +str_offsets_base: + if (tmp > cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base is out of bounds"); + } + cu->str_offsets = + (char *)cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_buf + + tmp; break; case ATTRIB_STMT_LIST_LINEPTR4: stmt_list_ptr = buffer->bb.pos; @@ -1242,13 +1808,19 @@ indirect_insn:; &stmt_list))) return err; break; + case ATTRIB_NAME_STRX1: case ATTRIB_DECL_FILE_DATA1: skip = 1; goto skip; + case ATTRIB_NAME_STRX2: case ATTRIB_DECL_FILE_DATA2: skip = 2; goto skip; + case ATTRIB_NAME_STRX3: + skip = 3; + goto skip; case ATTRIB_NAME_STRP4: + case ATTRIB_NAME_STRX4: case ATTRIB_DECL_FILE_DATA4: skip = 4; goto skip; @@ -1256,6 +1828,10 @@ indirect_insn:; case ATTRIB_DECL_FILE_DATA8: skip = 8; goto skip; + case ATTRIB_DECL_FILE_IMPLICIT: + while (*insnp++ & 0x80) + ; + break; case ATTRIB_DECLARATION_FLAG: { uint8_t flag; if ((err = binary_buffer_next_u8(&buffer->bb, @@ -1308,6 +1884,7 @@ indirect_insn:; case ATTRIB_SIBLING_INDIRECT: case ATTRIB_NAME_INDIRECT: case ATTRIB_COMP_DIR_INDIRECT: + case ATTRIB_STR_OFFSETS_BASE_INDIRECT: case ATTRIB_STMT_LIST_INDIRECT: case ATTRIB_DECL_FILE_INDIRECT: case ATTRIB_DECLARATION_INDIRECT: @@ -1339,6 +1916,10 @@ indirect_insn:; stmt_list_ptr, "DW_AT_stmt_list is out of bounds"); } + if (comp_dir == &comp_dir_is_strx && + (err = read_strx(buffer, comp_dir_strx, + &comp_dir))) + return err; if ((err = read_file_name_table(path_hash_cache, cu, comp_dir, stmt_list))) @@ -1581,7 +2162,7 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; const char *name = NULL; const char *decl_file_ptr = NULL; - uint64_t decl_file = 0; + uint64_t decl_file = 0; /* For -Wmaybe-uninitialized. */ bool declaration = false; bool specification = false; const char *sibling = NULL; @@ -1615,6 +2196,7 @@ indirect_insn:; specification = true; /* fallthrough */ case ATTRIB_LEB128: + case ATTRIB_COMP_DIR_STRX: if ((err = binary_buffer_skip_leb128(&buffer->bb))) return err; break; @@ -1678,11 +2260,44 @@ indirect_insn:; name = (const char *)debug_str->d_buf + tmp; __builtin_prefetch(name); break; + case ATTRIB_NAME_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case ATTRIB_NAME_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case ATTRIB_NAME_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case ATTRIB_NAME_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, &tmp))) + return err; + goto name_strx; + case ATTRIB_NAME_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; +name_strx: + if ((err = read_strx(buffer, tmp, &name))) + return err; + __builtin_prefetch(name); + break; case ATTRIB_COMP_DIR_STRP4: + case ATTRIB_COMP_DIR_LINE_STRP4: + case ATTRIB_STR_OFFSETS_BASE4: case ATTRIB_STMT_LIST_LINEPTR4: skip = 4; goto skip; case ATTRIB_COMP_DIR_STRP8: + case ATTRIB_COMP_DIR_LINE_STRP8: + case ATTRIB_STR_OFFSETS_BASE8: case ATTRIB_STMT_LIST_LINEPTR8: skip = 8; goto skip; @@ -1716,6 +2331,16 @@ indirect_insn:; &decl_file))) return err; break; + case ATTRIB_DECL_FILE_IMPLICIT: + decl_file_ptr = buffer->bb.pos; + decl_file = 0; + for (int shift = 0; ; shift += 7) { + uint8_t byte = *insnp++; + decl_file |= (uint64_t)(byte & 0x7f) << shift; + if (!(byte & 0x80)) + break; + } + break; case ATTRIB_DECLARATION_FLAG: { uint8_t flag; if ((err = binary_buffer_next_u8(&buffer->bb, @@ -1727,15 +2352,24 @@ indirect_insn:; } case ATTRIB_SPECIFICATION_REF1: specification = true; + /* fallthrough */ + case ATTRIB_COMP_DIR_STRX1: skip = 1; goto skip; case ATTRIB_SPECIFICATION_REF2: specification = true; + /* fallthrough */ + case ATTRIB_COMP_DIR_STRX2: skip = 2; goto skip; + case ATTRIB_COMP_DIR_STRX3: + skip = 3; + goto skip; case ATTRIB_SPECIFICATION_REF4: case ATTRIB_SPECIFICATION_REF_ADDR4: specification = true; + /* fallthrough */ + case ATTRIB_COMP_DIR_STRX4: skip = 4; goto skip; case ATTRIB_SPECIFICATION_REF8: @@ -1747,6 +2381,7 @@ indirect_insn:; case ATTRIB_SIBLING_INDIRECT: case ATTRIB_NAME_INDIRECT: case ATTRIB_COMP_DIR_INDIRECT: + case ATTRIB_STR_OFFSETS_BASE_INDIRECT: case ATTRIB_STMT_LIST_INDIRECT: case ATTRIB_DECL_FILE_INDIRECT: case ATTRIB_DECLARATION_INDIRECT: @@ -1799,14 +2434,14 @@ indirect_insn:; } uint64_t file_name_hash; - if (decl_file) { - if (decl_file > cu->num_file_names) { + if (decl_file_ptr) { + if (decl_file >= cu->num_file_names) { return binary_buffer_error_at(&buffer->bb, decl_file_ptr, "invalid DW_AT_decl_file %" PRIu64, decl_file); } - file_name_hash = cu->file_name_hashes[decl_file - 1]; + file_name_hash = cu->file_name_hashes[decl_file]; } else { file_name_hash = 0; } @@ -1918,8 +2553,11 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) .buf = pending_cu->buf, .len = pending_cu->len, .is_64_bit = pending_cu->is_64_bit, - .is_type_unit = - pending_cu->scn == DRGN_SCN_DEBUG_TYPES, + .scn = pending_cu->scn, + .file_name_hashes = + (uint64_t *)no_file_name_hashes, + .num_file_names = + ARRAY_SIZE(no_file_name_hashes), }; } } @@ -1929,6 +2567,8 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) { struct path_hash_cache path_hash_cache; path_hash_vector_init(&path_hash_cache.directories); + path_hash_cache.entry_formats = NULL; + path_hash_cache.entry_formats_capacity = 0; path_hash_cache.first_chunk = malloc(sizeof(struct path_hash_chunk)); if (path_hash_cache.first_chunk) { @@ -1958,6 +2598,7 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) err = cu_err; } } + free(path_hash_cache.entry_formats); path_hash_vector_deinit(&path_hash_cache.directories); struct path_hash_chunk *chunk = path_hash_cache.first_chunk; while (chunk) { @@ -1976,9 +2617,7 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; struct drgn_dwarf_index_cu_buffer buffer; drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu->is_64_bit ? 23 : 11; - if (cu->is_type_unit) - buffer.bb.pos += cu->is_64_bit ? 16 : 12; + buffer.bb.pos += cu_header_size(cu); struct drgn_error *cu_err = index_cu_second_pass(&dindex->global, &buffer); if (cu_err) { From 215f7d79d7f43fea7a84af5e407ffe9893b9abf1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 6 Jul 2021 16:26:31 -0700 Subject: [PATCH 002/139] libdrgn: debug_info: implement DW_OP_{addr,const}x These were added in DWARF 5. They need to know the CU that they're being evaluated in, but the parameters for drgn_eval_dwarf_expression() were already getting unwieldy. Wrap the evaluation context in a new struct drgn_dwarf_expression_context, add the additional CU information, and implement the operations. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 335 ++++++++++++++++++++++++++++--------------- libdrgn/debug_info.h | 3 +- 2 files changed, 224 insertions(+), 114 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 124829760..4da099b12 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -112,6 +112,7 @@ static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_STR_OFFSETS] = ".debug_str_offsets", [DRGN_SCN_DEBUG_LINE] = ".debug_line", [DRGN_SCN_DEBUG_LINE_STR] = ".debug_line_str", + [DRGN_SCN_DEBUG_ADDR] = ".debug_addr", [DRGN_SCN_DEBUG_FRAME] = ".debug_frame", [DRGN_SCN_EH_FRAME] = ".eh_frame", [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", @@ -1528,6 +1529,74 @@ bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } +static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +{ + if (address_size < 1 || address_size > 8) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + address_size); + } + return NULL; +} + +static struct drgn_error * +drgn_dwarf_next_addrx(struct binary_buffer *bb, + struct drgn_debug_info_module *module, Dwarf_Die *cu_die, + uint8_t address_size, const char **addr_base, + uint64_t *ret) +{ + struct drgn_error *err; + + if (!*addr_base) { + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_addr_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without DW_AT_addr_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_ADDR]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without .debug_addr section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_ADDR); + if (err) + return err; + + if (base > module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size || + base == 0) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_addr_base is out of bounds"); + } + + *addr_base = (char *)module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; + uint8_t segment_selector_size = ((uint8_t *)*addr_base)[-1]; + if (segment_selector_size != 0) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported segment selector size %" PRIu8, + segment_selector_size); + } + } + + uint64_t index; + if ((err = binary_buffer_next_uleb128(bb, &index))) + return err; + + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_ADDR]; + if (index >= + ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { + return binary_buffer_error(bb, + "address index is out of bounds"); + } + copy_lsbytes(ret, sizeof(*ret), HOST_LITTLE_ENDIAN, + *addr_base + index * address_size, address_size, + drgn_platform_is_little_endian(&module->platform)); + return NULL; +} + static struct drgn_error * drgn_dwarf_location(struct drgn_debug_info_module *module, Dwarf_Attribute *attr, @@ -1636,12 +1705,6 @@ drgn_dwarf_location(struct drgn_debug_info_module *module, } } -struct drgn_dwarf_expression_buffer { - struct binary_buffer bb; - const char *start; - struct drgn_debug_info_module *module; -}; - static struct drgn_error * drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, const char *message) @@ -1676,50 +1739,81 @@ drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); } +/* A DWARF expression and the context it is being evaluated in. */ +struct drgn_dwarf_expression_context { + struct binary_buffer bb; + const char *start; + struct drgn_program *prog; + struct drgn_debug_info_module *module; + uint8_t address_size; + Dwarf_Die cu_die; + const char *cu_addr_base; + Dwarf_Die *function; + const struct drgn_register_state *regs; +}; + static struct drgn_error * drgn_dwarf_expression_buffer_error(struct binary_buffer *bb, const char *pos, const char *message) { - struct drgn_dwarf_expression_buffer *buffer = - container_of(bb, struct drgn_dwarf_expression_buffer, bb); - return drgn_error_debug_info(buffer->module, pos, message); + struct drgn_dwarf_expression_context *ctx = + container_of(bb, struct drgn_dwarf_expression_context, bb); + return drgn_error_debug_info(ctx->module, pos, message); } -static void -drgn_dwarf_expression_buffer_init(struct drgn_dwarf_expression_buffer *buffer, - struct drgn_debug_info_module *module, - const char *expr, size_t expr_size) +static inline struct drgn_error * +drgn_dwarf_expression_context_init(struct drgn_dwarf_expression_context *ctx, + struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_CU *cu, Dwarf_Die *function, + const struct drgn_register_state *regs, + const char *expr, size_t expr_size) { - binary_buffer_init(&buffer->bb, expr, expr_size, + struct drgn_error *err; + binary_buffer_init(&ctx->bb, expr, expr_size, drgn_platform_is_little_endian(&module->platform), drgn_dwarf_expression_buffer_error); - buffer->start = expr; - buffer->module = module; + ctx->start = expr; + ctx->prog = prog; + ctx->module = module; + if (cu) { + if (!dwarf_cu_die(cu, &ctx->cu_die, NULL, NULL, + &ctx->address_size, NULL, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(ctx->address_size))) + return err; + } else { + ctx->cu_die.addr = NULL; + ctx->address_size = + drgn_platform_address_size(&module->platform); + } + ctx->cu_addr_base = NULL; + ctx->function = function; + ctx->regs = regs; + return NULL; } static struct drgn_error * drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_debug_info_module *module, Dwarf_Die *die, - int *remaining_ops, - const struct drgn_register_state *regs, uint64_t *ret); + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret); /* - * Evaluate a DWARF expression up to the next location description operation. + * Evaluate a DWARF expression up to the next location description operation or + * operation that can't be evaluated in the given context. * * Returns &drgn_not_found if it tried to use an unknown register value. */ static struct drgn_error * -drgn_eval_dwarf_expression(struct drgn_program *prog, - struct drgn_dwarf_expression_buffer *expr, +drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, struct uint64_vector *stack, - int *remaining_ops, - Dwarf_Die *function_die, - const struct drgn_register_state *regs) + int *remaining_ops) { struct drgn_error *err; - const struct drgn_platform *platform = &expr->module->platform; + const struct drgn_platform *platform = &ctx->module->platform; bool little_endian = drgn_platform_is_little_endian(platform); - uint8_t address_size = drgn_platform_address_size(platform); + uint8_t address_size = ctx->address_size; uint8_t address_bits = address_size * CHAR_BIT; uint64_t address_mask = uint_max(address_size); drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = @@ -1728,7 +1822,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define CHECK(n) do { \ size_t _n = (n); \ if (stack->size < _n) { \ - return binary_buffer_error(&expr->bb, \ + return binary_buffer_error(&ctx->bb, \ "DWARF expression stack underflow"); \ } \ } while (0) @@ -1743,14 +1837,14 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define PUSH_MASK(x) PUSH((x) & address_mask) - while (binary_buffer_has_next(&expr->bb)) { + while (binary_buffer_has_next(&ctx->bb)) { if (*remaining_ops <= 0) { - return binary_buffer_error(&expr->bb, + return binary_buffer_error(&ctx->bb, "DWARF expression executed too many operations"); } (*remaining_ops)--; uint8_t opcode; - if ((err = binary_buffer_next_u8(&expr->bb, &opcode))) + if ((err = binary_buffer_next_u8(&ctx->bb, &opcode))) return err; uint64_t uvalue; uint64_t dwarf_regno; @@ -1761,80 +1855,94 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, PUSH(opcode - DW_OP_lit0); break; case DW_OP_addr: - if ((err = binary_buffer_next_uint(&expr->bb, + if ((err = binary_buffer_next_uint(&ctx->bb, address_size, &uvalue))) return err; PUSH(uvalue); break; case DW_OP_const1u: - if ((err = binary_buffer_next_u8_into_u64(&expr->bb, + if ((err = binary_buffer_next_u8_into_u64(&ctx->bb, &uvalue))) return err; PUSH(uvalue); break; case DW_OP_const2u: - if ((err = binary_buffer_next_u16_into_u64(&expr->bb, + if ((err = binary_buffer_next_u16_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const4u: - if ((err = binary_buffer_next_u32_into_u64(&expr->bb, + if ((err = binary_buffer_next_u32_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const8u: - if ((err = binary_buffer_next_u64(&expr->bb, &uvalue))) + if ((err = binary_buffer_next_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const1s: - if ((err = binary_buffer_next_s8_into_u64(&expr->bb, + if ((err = binary_buffer_next_s8_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const2s: - if ((err = binary_buffer_next_s16_into_u64(&expr->bb, + if ((err = binary_buffer_next_s16_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const4s: - if ((err = binary_buffer_next_s32_into_u64(&expr->bb, + if ((err = binary_buffer_next_s32_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_const8s: - if ((err = binary_buffer_next_s64_into_u64(&expr->bb, + if ((err = binary_buffer_next_s64_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_constu: - if ((err = binary_buffer_next_uleb128(&expr->bb, + if ((err = binary_buffer_next_uleb128(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; case DW_OP_consts: - if ((err = binary_buffer_next_sleb128_into_u64(&expr->bb, + if ((err = binary_buffer_next_sleb128_into_u64(&ctx->bb, &uvalue))) return err; PUSH_MASK(uvalue); break; + case DW_OP_addrx: + case DW_OP_constx: + if (!ctx->cu_die.addr) { + ctx->bb.pos = ctx->bb.prev; + return NULL; + } + if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->module, + &ctx->cu_die, + address_size, + &ctx->cu_addr_base, + &uvalue))) + return err; + PUSH(uvalue); + break; /* Register values. */ case DW_OP_fbreg: { - err = drgn_dwarf_frame_base(prog, expr->module, - function_die, remaining_ops, - regs, &uvalue); + err = drgn_dwarf_frame_base(ctx->prog, ctx->module, + ctx->function, ctx->regs, + remaining_ops, &uvalue); if (err) return err; int64_t svalue; - if ((err = binary_buffer_next_sleb128(&expr->bb, + if ((err = binary_buffer_next_sleb128(&ctx->bb, &svalue))) return err; PUSH_MASK(uvalue + svalue); @@ -1844,25 +1952,25 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, dwarf_regno = opcode - DW_OP_breg0; goto breg; case DW_OP_bregx: - if ((err = binary_buffer_next_uleb128(&expr->bb, + if ((err = binary_buffer_next_uleb128(&ctx->bb, &dwarf_regno))) return err; breg: { - if (!regs) + if (!ctx->regs) return &drgn_not_found; drgn_register_number regno = dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, regno)) + if (!drgn_register_state_has_register(ctx->regs, regno)) return &drgn_not_found; const struct drgn_register_layout *layout = &platform->arch->register_layout[regno]; copy_lsbytes(&uvalue, sizeof(uvalue), HOST_LITTLE_ENDIAN, - ®s->buf[layout->offset], layout->size, - little_endian); + &ctx->regs->buf[layout->offset], + layout->size, little_endian); int64_t svalue; - if ((err = binary_buffer_next_sleb128(&expr->bb, + if ((err = binary_buffer_next_sleb128(&ctx->bb, &svalue))) return err; PUSH_MASK(uvalue + svalue); @@ -1879,7 +1987,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, break; case DW_OP_pick: { uint8_t index; - if ((err = binary_buffer_next_u8(&expr->bb, &index))) + if ((err = binary_buffer_next_u8(&ctx->bb, &index))) return err; CHECK(index + 1); PUSH(ELEM(index)); @@ -1906,19 +2014,20 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, deref_size = address_size; goto deref; case DW_OP_deref_size: - if ((err = binary_buffer_next_u8(&expr->bb, + if ((err = binary_buffer_next_u8(&ctx->bb, &deref_size))) return err; if (deref_size > address_size) { - return binary_buffer_error(&expr->bb, + return binary_buffer_error(&ctx->bb, "DW_OP_deref_size has invalid size"); } deref: { CHECK(1); char deref_buf[8]; - err = drgn_program_read_memory(prog, deref_buf, ELEM(0), - deref_size, false); + err = drgn_program_read_memory(ctx->prog, deref_buf, + ELEM(0), deref_size, + false); if (err) return err; copy_lsbytes(&ELEM(0), sizeof(ELEM(0)), @@ -1927,7 +2036,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, break; } case DW_OP_call_frame_cfa: { - if (!regs) + if (!ctx->regs) return &drgn_not_found; /* * The DWARF 5 specification says that @@ -1940,7 +2049,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, * we don't bother enforcing it. */ struct optional_uint64 cfa = - drgn_register_state_get_cfa(regs); + drgn_register_state_get_cfa(ctx->regs); if (!cfa.has_value) return &drgn_not_found; PUSH(cfa.value); @@ -1972,7 +2081,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, case DW_OP_div: CHECK(2); if (ELEM(0) == 0) { - return binary_buffer_error(&expr->bb, + return binary_buffer_error(&ctx->bb, "division by zero in DWARF expression"); } ELEM(1) = ((truncate_signed(ELEM(1), address_bits) @@ -1986,7 +2095,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, case DW_OP_mod: CHECK(2); if (ELEM(0) == 0) { - return binary_buffer_error(&expr->bb, + return binary_buffer_error(&ctx->bb, "modulo by zero in DWARF expression"); } ELEM(1) = ELEM(1) % ELEM(0); @@ -2009,7 +2118,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, break; case DW_OP_plus_uconst: CHECK(1); - if ((err = binary_buffer_next_uleb128(&expr->bb, + if ((err = binary_buffer_next_uleb128(&ctx->bb, &uvalue))) return err; ELEM(0) = (ELEM(0) + uvalue) & address_mask; @@ -2079,14 +2188,14 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, branch: { int16_t skip; - if ((err = binary_buffer_next_s16(&expr->bb, &skip))) + if ((err = binary_buffer_next_s16(&ctx->bb, &skip))) return err; - if ((skip >= 0 && skip > expr->bb.end - expr->bb.pos) || - (skip < 0 && -skip > expr->bb.pos - expr->start)) { - return binary_buffer_error(&expr->bb, + if ((skip >= 0 && skip > ctx->bb.end - ctx->bb.pos) || + (skip < 0 && -skip > ctx->bb.pos - ctx->start)) { + return binary_buffer_error(&ctx->bb, "DWARF expression branch is out of bounds"); } - expr->bb.pos += skip; + ctx->bb.pos += skip; break; } case DW_OP_bra: @@ -2096,7 +2205,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, goto branch; } else { stack->size--; - if ((err = binary_buffer_skip(&expr->bb, 2))) + if ((err = binary_buffer_skip(&ctx->bb, 2))) return err; } break; @@ -2111,7 +2220,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, case DW_OP_piece: case DW_OP_bit_piece: /* The caller must handle it. */ - expr->bb.pos = expr->bb.prev; + ctx->bb.pos = ctx->bb.prev; return NULL; /* * We don't yet support: @@ -2121,15 +2230,13 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, * - DW_OP_entry_value * DW_OP_implicit_pointer * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. - * - Operations that use .debug_addr: DW_OP_addrx, - * DW_OP_constx. * - Typed operations: DW_OP_const_type, DW_OP_regval_type, * DW_OP_deref_type, DW_OP_convert, DW_OP_reinterpret. * - Operations for multiple address spaces: DW_OP_xderef, * DW_OP_xderef_size, DW_OP_xderef_type. */ default: - return binary_buffer_error(&expr->bb, + return binary_buffer_error(&ctx->bb, "unknown DWARF expression opcode %#" PRIx8, opcode); } @@ -2146,8 +2253,8 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, static struct drgn_error * drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_debug_info_module *module, Dwarf_Die *die, - int *remaining_ops, - const struct drgn_register_state *regs, uint64_t *ret) + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret) { struct drgn_error *err; bool little_endian = drgn_platform_is_little_endian(&module->platform); @@ -2165,17 +2272,19 @@ drgn_dwarf_frame_base(struct drgn_program *prog, if (err) return err; + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, NULL, regs, expr, + expr_size))) + return err; struct uint64_vector stack = VECTOR_INIT; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); for (;;) { - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, - remaining_ops, NULL, regs); + err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); if (err) goto out; - if (binary_buffer_has_next(&buffer.bb)) { + if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) goto out; uint64_t dwarf_regno; @@ -2184,7 +2293,7 @@ drgn_dwarf_frame_base(struct drgn_program *prog, dwarf_regno = opcode - DW_OP_reg0; goto reg; case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&buffer.bb, + if ((err = binary_buffer_next_uleb128(&ctx.bb, &dwarf_regno))) goto out; reg: @@ -2210,8 +2319,8 @@ drgn_dwarf_frame_base(struct drgn_program *prog, HOST_LITTLE_ENDIAN, ®s->buf[layout->offset], layout->size, little_endian); - if (binary_buffer_has_next(&buffer.bb)) { - err = binary_buffer_error(&buffer.bb, + if (binary_buffer_has_next(&ctx.bb)) { + err = binary_buffer_error(&ctx.bb, "stray operations in DW_AT_frame_base expression"); } else { err = NULL; @@ -2219,7 +2328,7 @@ drgn_dwarf_frame_base(struct drgn_program *prog, goto out; } default: - err = binary_buffer_error(&buffer.bb, + err = binary_buffer_error(&ctx.bb, "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", opcode); goto out; @@ -2592,15 +2701,16 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, uint64_t bit_pos = 0; - struct uint64_vector stack = VECTOR_INIT; int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, function_die, + regs, expr, expr_size))) + return err; + struct uint64_vector stack = VECTOR_INIT; do { stack.size = 0; - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, - &remaining_ops, function_die, - regs); + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); if (err == &drgn_not_found) goto absent; else if (err) @@ -2609,9 +2719,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, const void *src = NULL; size_t src_size; - if (binary_buffer_has_next(&buffer.bb)) { + if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) goto out; uint64_t uvalue; @@ -2622,7 +2732,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, dwarf_regno = opcode - DW_OP_reg0; goto reg; case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&buffer.bb, + if ((err = binary_buffer_next_uleb128(&ctx.bb, &dwarf_regno))) goto out; reg: @@ -2638,17 +2748,17 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, src_size = layout->size; break; case DW_OP_implicit_value: - if ((err = binary_buffer_next_uleb128(&buffer.bb, + if ((err = binary_buffer_next_uleb128(&ctx.bb, &uvalue))) goto out; - if (uvalue > buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, + if (uvalue > ctx.bb.end - ctx.bb.pos) { + err = binary_buffer_error(&ctx.bb, "DW_OP_implicit_value size is out of bounds"); goto out; } - src = buffer.bb.pos; + src = ctx.bb.pos; src_size = uvalue; - buffer.bb.pos += uvalue; + ctx.bb.pos += uvalue; break; case DW_OP_stack_value: if (!stack.size) @@ -2661,21 +2771,21 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, src_size = sizeof(stack.data[0]); break; default: - buffer.bb.pos = buffer.bb.prev; + ctx.bb.pos = ctx.bb.prev; break; } } uint64_t piece_bit_size; uint64_t piece_bit_offset; - if (binary_buffer_has_next(&buffer.bb)) { + if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) goto out; switch (opcode) { case DW_OP_piece: - if ((err = binary_buffer_next_uleb128(&buffer.bb, + if ((err = binary_buffer_next_uleb128(&ctx.bb, &piece_bit_size))) goto out; /* @@ -2691,16 +2801,16 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, piece_bit_offset = 0; break; case DW_OP_bit_piece: - if ((err = binary_buffer_next_uleb128(&buffer.bb, + if ((err = binary_buffer_next_uleb128(&ctx.bb, &piece_bit_size)) || - (err = binary_buffer_next_uleb128(&buffer.bb, + (err = binary_buffer_next_uleb128(&ctx.bb, &piece_bit_offset))) goto out; if (piece_bit_size > type.bit_size - bit_pos) piece_bit_size = type.bit_size - bit_pos; break; default: - err = binary_buffer_error(&buffer.bb, + err = binary_buffer_error(&ctx.bb, "unknown DWARF expression opcode %#" PRIx8 " after simple location description", opcode); goto out; @@ -2829,7 +2939,7 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, goto absent; } bit_pos += piece_bit_size; - } while (binary_buffer_has_next(&buffer.bb)); + } while (binary_buffer_has_next(&ctx.bb)); if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { absent: @@ -5715,18 +5825,17 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, } int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, regs->module, rule->expr, - rule->expr_size); - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, &remaining_ops, - NULL, regs); + struct drgn_dwarf_expression_context ctx; + drgn_dwarf_expression_context_init(&ctx, prog, regs->module, NULL, NULL, + regs, rule->expr, rule->expr_size); + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); if (err) goto out; - if (binary_buffer_has_next(&buffer.bb)) { + if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; - err = binary_buffer_next_u8(&buffer.bb, &opcode); + err = binary_buffer_next_u8(&ctx.bb, &opcode); if (!err) { - err = binary_buffer_error(&buffer.bb, + err = binary_buffer_error(&ctx.bb, "invalid opcode %#" PRIx8 " for CFI expression", opcode); } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 8440da54c..c4e350520 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -63,7 +63,8 @@ enum drgn_debug_info_scn { DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, /* Sections whose data we should cache when it is first used. */ - DRGN_SCN_DEBUG_FRAME = DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, + DRGN_SCN_DEBUG_ADDR = DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, + DRGN_SCN_DEBUG_FRAME, DRGN_SCN_EH_FRAME, DRGN_SCN_ORC_UNWIND_IP, DRGN_SCN_ORC_UNWIND, From 26001733f60e57dd5d5f7c7143fdbd23dc237ca7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 1 Jul 2021 14:02:38 -0700 Subject: [PATCH 003/139] libdrgn: debug_info: support DWARF 5 location lists The DWARF 5 format is a little more complicated than DWARF 2-4 but functionally very similar. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 361 ++++++++++++++++++++++++++++++++++--------- libdrgn/debug_info.h | 1 + 2 files changed, 288 insertions(+), 74 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 4da099b12..556df8193 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -118,6 +118,7 @@ static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", [DRGN_SCN_ORC_UNWIND] = ".orc_unwind", [DRGN_SCN_DEBUG_LOC] = ".debug_loc", + [DRGN_SCN_DEBUG_LOCLISTS] = ".debug_loclists", [DRGN_SCN_TEXT] = ".text", [DRGN_SCN_GOT] = ".got", }; @@ -1597,6 +1598,262 @@ drgn_dwarf_next_addrx(struct binary_buffer *bb, return NULL; } +static struct drgn_error * +drgn_dwarf_read_loclistx(struct drgn_debug_info_module *module, + Dwarf_Die *cu_die, uint8_t offset_size, + Dwarf_Word index, Dwarf_Word *ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_loclists_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without DW_AT_loclists_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; + + if (base > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_loclists_base is out of bounds"); + } + assert(offset_size == 4 || offset_size == 8); + if (index >= (data->d_size - base) / offset_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx is out of bounds"); + } + const char *basep = (char *)data->d_buf + base; + if (offset_size == 8) { + uint64_t offset; + memcpy(&offset, (uint64_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_64(offset); + *ret = base + offset; + } else { + uint32_t offset; + memcpy(&offset, (uint32_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_32(offset); + *ret = base + offset; + } + return NULL; +} + +static struct drgn_error * +drgn_dwarf5_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOCLISTS); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist is out of bounds"); + } + buffer.bb.pos += offset; + + const char *addr_base = NULL; + uint64_t base; + bool base_valid = false; + /* Default is unknown. May be overridden by DW_LLE_default_location. */ + *expr_ret = NULL; + *expr_size_ret = 0; + for (;;) { + uint8_t kind; + if ((err = binary_buffer_next_u8(&buffer.bb, &kind))) + return err; + uint64_t start, length, expr_size; + switch (kind) { + case DW_LLE_end_of_list: + return NULL; + case DW_LLE_base_addressx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &base))) + return err; + base_valid = true; + break; + case DW_LLE_startx_endx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &length))) + return err; + length -= start; +counted_location_description: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (pc >= start && pc - start < length) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + break; + case DW_LLE_startx_length: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + case DW_LLE_offset_pair: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + length -= start; + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + start += base; + goto counted_location_description; + case DW_LLE_default_location: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + buffer.bb.pos += expr_size; + break; + case DW_LLE_base_address: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &base))) + return err; + base_valid = true; + break; + case DW_LLE_start_end: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, + address_size, + &length))) + return err; + length -= start; + goto counted_location_description; + case DW_LLE_start_length: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + default: + return binary_buffer_error(&buffer.bb, + "unknown location list entry kind %#" PRIx8, + kind); + } + } +} + +static struct drgn_error * +drgn_dwarf4_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOC]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr without .debug_loc section"); + } + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_DEBUG_LOC); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOC); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr is out of bounds"); + } + buffer.bb.pos += offset; + + uint64_t address_max = uint_max(address_size); + uint64_t base; + bool base_valid = false; + for (;;) { + uint64_t start, end; + if ((err = binary_buffer_next_uint(&buffer.bb, address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, address_size, + &end))) + return err; + if (start == 0 && end == 0) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } else if (start == address_max) { + base = end; + base_valid = true; + } else { + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + uint16_t expr_size; + if ((err = binary_buffer_next_u16(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (base + start <= pc && pc < base + end) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + } + } +} + static struct drgn_error * drgn_dwarf_location(struct drgn_debug_info_module *module, Dwarf_Attribute *attr, @@ -1605,11 +1862,26 @@ drgn_dwarf_location(struct drgn_debug_info_module *module, { struct drgn_error *err; switch (attr->form) { - case DW_FORM_sec_offset: { - if (!module->scns[DRGN_SCN_DEBUG_LOC]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr without .debug_loc section"); - } + case DW_FORM_sec_offset: + case DW_FORM_loclistx: { + Dwarf_Die cu_die; + Dwarf_Half cu_version; + uint8_t address_size; + uint8_t offset_size; + if (!dwarf_cu_die(attr->cu, &cu_die, &cu_version, NULL, + &address_size, &offset_size, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(address_size))) + return err; + + Dwarf_Word offset; + if (dwarf_formudata(attr, &offset)) + return drgn_error_libdw(); + if (attr->form == DW_FORM_loclistx && + ((err = drgn_dwarf_read_loclistx(module, &cu_die, + offset_size, offset, + &offset)))) + return err; struct optional_uint64 pc; if (!regs || @@ -1618,81 +1890,22 @@ drgn_dwarf_location(struct drgn_debug_info_module *module, *expr_size_ret = 0; return NULL; } - - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_DEBUG_LOC); - if (err) - return err; - Dwarf_Addr bias; dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, NULL, NULL); pc.value = pc.value - !regs->interrupted - bias; - Dwarf_Word offset; - if (dwarf_formudata(attr, &offset)) - return drgn_error_libdw(); - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, - DRGN_SCN_DEBUG_LOC); - if (offset > buffer.bb.end - buffer.bb.pos) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr is out of bounds"); - } - buffer.bb.pos += offset; - - uint8_t address_size = - drgn_platform_address_size(&module->platform); - uint64_t address_max = uint_max(address_size); - uint64_t base; - bool base_valid = false; - for (;;) { - uint64_t start, end; - if ((err = binary_buffer_next_uint(&buffer.bb, - address_size, - &start)) || - (err = binary_buffer_next_uint(&buffer.bb, - address_size, &end))) - return err; - if (start == 0 && end == 0) { - break; - } else if (start == address_max) { - base = end; - base_valid = true; - } else { - if (!base_valid) { - Dwarf_Die cu_die; - if (!dwarf_cu_die(attr->cu, &cu_die, - NULL, NULL, NULL, - NULL, NULL, NULL)) - return drgn_error_libdw(); - Dwarf_Addr low_pc; - if (dwarf_lowpc(&cu_die, &low_pc)) - return drgn_error_libdw(); - base = low_pc; - base_valid = true; - } - uint16_t expr_size; - if ((err = binary_buffer_next_u16(&buffer.bb, - &expr_size))) - return err; - if (expr_size > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "location description size is out of bounds"); - } - if (base + start <= pc.value && - pc.value < base + end) { - *expr_ret = buffer.bb.pos; - *expr_size_ret = expr_size; - return NULL; - } - buffer.bb.pos += expr_size; - } + if (cu_version >= 5) { + return drgn_dwarf5_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); + } else { + return drgn_dwarf4_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); } - *expr_ret = NULL; - *expr_size_ret = 0; - return NULL; } default: { Dwarf_Block block; diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index c4e350520..f8476c599 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -69,6 +69,7 @@ enum drgn_debug_info_scn { DRGN_SCN_ORC_UNWIND_IP, DRGN_SCN_ORC_UNWIND, DRGN_SCN_DEBUG_LOC, + DRGN_SCN_DEBUG_LOCLISTS, DRGN_NUM_DEBUG_SCN_DATA, From a863f1e439e55034cd2980d14793e6d5453c31d0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 9 Jul 2021 01:50:34 -0700 Subject: [PATCH 004/139] libdrgn: dwarf_index: print unknown forms in hexadecimal The DWARF spec and dwarf.h list them in hexadecimal, so make it easier to cross reference. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 3fd9c9153..e8cd85d61 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -375,7 +375,7 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64, + "unknown attribute form %#" PRIx64, form); } } @@ -405,7 +405,7 @@ static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_sibling", + "unknown attribute form %#" PRIx64 " for DW_AT_sibling", form); } } @@ -448,7 +448,7 @@ static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_name", + "unknown attribute form %#" PRIx64 " for DW_AT_name", form); } } @@ -502,7 +502,7 @@ static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_comp_dir", + "unknown attribute form %#" PRIx64 " for DW_AT_comp_dir", form); } } @@ -524,7 +524,7 @@ dw_at_str_offsets_base_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_str_offsets_base", + "unknown attribute form %#" PRIx64 " for DW_AT_str_offsets_base", form); } } @@ -552,7 +552,7 @@ dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_stmt_list", + "unknown attribute form %#" PRIx64 " for DW_AT_stmt_list", form); } } @@ -591,7 +591,7 @@ static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_decl_file", + "unknown attribute form %#" PRIx64 " for DW_AT_decl_file", form); } } @@ -617,7 +617,7 @@ dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_declaration", + "unknown attribute form %#" PRIx64 " for DW_AT_declaration", form); } } @@ -665,7 +665,7 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_specification", + "unknown attribute form %#" PRIx64 " for DW_AT_specification", form); } } @@ -1299,7 +1299,7 @@ static struct drgn_error *skip_lnp_form(struct binary_buffer *bb, return binary_buffer_skip_string(bb); default: return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for line number program", + "unknown attribute form %#" PRIx64 " for line number program", form); } } @@ -1334,7 +1334,7 @@ static struct drgn_error *read_lnp_string(struct drgn_debug_info_buffer *buffer, return NULL; default: return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_LNCT_path", + "unknown attribute form %#" PRIx64 " for DW_LNCT_path", form); } } @@ -1352,7 +1352,7 @@ read_lnp_directory_index(struct drgn_debug_info_buffer *buffer, uint64_t form, return binary_buffer_next_uleb128(&buffer->bb, ret); default: return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_LNCT_directory_index", + "unknown attribute form %#" PRIx64 " for DW_LNCT_directory_index", form); } } From aa76e0ff1fb44916d7db825253b551e38b5bb55b Mon Sep 17 00:00:00 2001 From: Pavel Zakharov Date: Tue, 13 Jul 2021 12:10:29 -0400 Subject: [PATCH 005/139] TOOL-11725 drgn: fix python build dependencies (#28) --- debian/control | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/control b/debian/control index e01adad2f..25e0cab76 100644 --- a/debian/control +++ b/debian/control @@ -6,6 +6,7 @@ Standards-Version: 4.1.2 Build-Depends: autoconf, automake, bison, + dh-python, elfutils, flex, gawk, @@ -24,7 +25,7 @@ Build-Depends: autoconf, pkg-config, python3, python3-distutils, - python3.6-dev, + python3-dev, zlib1g-dev Package: drgn From a74716828dea68e0ed109d4c07daf7ce00fae270 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 21 Jul 2021 19:38:53 -0700 Subject: [PATCH 006/139] libdrgn: fix comment typo in serialize.h s/grather/greater/g Signed-off-by: Omar Sandoval --- libdrgn/serialize.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 0f4783f35..9e9604209 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -122,7 +122,7 @@ void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, * @param[in] bit_offset Offset in bits from the beginning of @p buf to where to * write. This is interpreted differently based on @p little_endian. * @param[in] uvalue Bits to write, in host order. - * @param[in] bit_size Number of bits in @p uvalue. This must be grather than + * @param[in] bit_size Number of bits in @p uvalue. This must be greater than * zero and no more than 64. Note that this is not checked or truncated, so if * @p uvalue has more than this many bits, the results will likely be incorrect. * @param[in] little_endian Whether the bits should be written out in @@ -140,7 +140,7 @@ void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, * @param[in] buf Memory buffer to read from. * @param[in] bit_offset Offset in bits from the beginning of @p buf to where to * read from. This is interpreted differently based on @p little_endian. - * @param[in] bit_size Number of bits to read. This must be grather than zero + * @param[in] bit_size Number of bits to read. This must be greater than zero * and no more than 64. * @param[in] little_endian Whether the bits should be interpreted in * little-endian order. From ee0b8efc303329bdb52f25c9acfef108f5c40ddc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 26 Jul 2021 13:53:34 -0700 Subject: [PATCH 007/139] helpers: use correct size for for_each_cpu() If the kernel is compiled with CONFIG_CPUMASK_OFFSTACK, then the full struct cpumask::bits array may not be allocated. Use nr_cpu_ids as the limit instead of the length of the array. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/cpumask.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 84e572098..6821075ef 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -27,11 +27,15 @@ def for_each_cpu(mask: Object) -> Iterator[int]: :param mask: ``struct cpumask`` """ + try: + nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() + except KeyError: + nr_cpu_ids = 1 bits = mask.bits word_bits = 8 * sizeof(bits.type_.type) - for i in range(bits.type_.length): # type: ignore + for i in range((nr_cpu_ids + word_bits - 1) // word_bits): word = bits[i].value_() - for j in range(word_bits): + for j in range(min(word_bits, nr_cpu_ids - word_bits * i)): if word & (1 << j): yield (word_bits * i) + j From 9c00552007dac335e543fcad9e5e3117bf47fdc9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 21 Jul 2021 19:00:43 -0700 Subject: [PATCH 008/139] libdrgn: python: add Object.from_bytes_() Add a way to create an object from raw bytes. One example where I've wanted this is creating a struct pt_regs from a PRSTATUS note or other source. Signed-off-by: Omar Sandoval --- _drgn.pyi | 22 +++++++ libdrgn/python/object.c | 52 ++++++++++++++++ tests/test_object.py | 128 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) diff --git a/_drgn.pyi b/_drgn.pyi index 7774ab84b..fb5b86ecf 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1177,6 +1177,28 @@ class Object: ``void``) """ ... + @classmethod + def from_bytes_( + cls, + prog: Program, + type: Union[str, Type], + bytes: bytes, + *, + bit_offset: IntegerLike = 0, + bit_field_size: Optional[IntegerLike] = None, + ) -> Object: + """ + Return a value object from its binary representation. + + :param prog: Program to create the object in. + :param type: Type of the object. + :param bytes: Buffer containing value of the object. + :param bit_offset: Offset in bits from the beginning of *bytes* to the + beginning of the object. + :param bit_field_size: Size in bits of the object if it is a bit field. + The default is ``None``, which means the object is not a bit field. + """ + ... def format_( self, *, diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index e8bd59031..a4255a537 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -746,6 +746,55 @@ static DrgnObject *DrgnObject_read(DrgnObject *self) ) } +static DrgnObject *DrgnObject_from_bytes(PyTypeObject *type, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = { + "prog", "type", "bytes", "bit_offset", "bit_field_size", NULL + }; + struct drgn_error *err; + Program *prog; + PyObject *type_obj = Py_None; + Py_buffer bytes; + struct index_arg bit_offset = {}; + struct index_arg bit_field_size = { .allow_none = true, .is_none = true }; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!Oy*|O&O&:from_bytes_", + keywords, &Program_type, &prog, + &type_obj, &bytes, index_converter, + &bit_offset, index_converter, + &bit_field_size)) + return NULL; + + DrgnObject *res = NULL; + struct drgn_qualified_type qualified_type; + if (Program_type_arg(prog, type_obj, false, &qualified_type) == -1) + goto out; + + if (!bit_field_size.is_none && bit_field_size.uvalue == 0) { + PyErr_SetString(PyExc_ValueError, + "bit field size cannot be zero"); + goto out; + } + + res = DrgnObject_alloc(prog); + if (!res) + goto out; + + err = drgn_object_set_from_buffer(&res->obj, qualified_type, bytes.buf, + bytes.len, bit_offset.uvalue, + bit_field_size.uvalue); + if (err) { + set_drgn_error(err); + Py_DECREF(res); + res = NULL; + goto out; + } + +out: + PyBuffer_Release(&bytes); + return res; +} + static int append_bit_offset(PyObject *parts, uint8_t bit_offset) { if (bit_offset == 0) @@ -1582,6 +1631,9 @@ static PyMethodDef DrgnObject_methods[] = { drgn_Object_address_of__DOC}, {"read_", (PyCFunction)DrgnObject_read, METH_NOARGS, drgn_Object_read__DOC}, + {"from_bytes_", (PyCFunction)DrgnObject_from_bytes, + METH_CLASS | METH_VARARGS | METH_KEYWORDS, + drgn_Object_from_bytes__DOC}, {"format_", (PyCFunction)DrgnObject_format, METH_VARARGS | METH_KEYWORDS, drgn_Object_format__DOC}, {"__round__", (PyCFunction)DrgnObject_round, diff --git a/tests/test_object.py b/tests/test_object.py index 0d6994bfc..e3fb67156 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -930,6 +930,134 @@ def test_index(self): Object(self.prog, "int []", address=0), ) + def test_int_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.int_type("int", 4, True, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, type_, (0x12345678).to_bytes(4, byteorder) + ), + Object(self.prog, type_, 0x12345678), + ) + + def test_int_from_bytes_bit_offset(self): + self.assertIdentical( + Object.from_bytes_(self.prog, "int", b"\xe0Y\xd1H\x00", bit_offset=2), + Object(self.prog, "int", 0x12345678), + ) + + def test_int_from_bytes_big_endian_bit_offset(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, + self.prog.int_type("int", 4, True, "big"), + b"\x04\x8d\x15\x9e\x00", + bit_offset=2, + ), + Object(self.prog, self.prog.int_type("int", 4, True, "big"), 0x12345678), + ) + + def test_int_from_bytes_bit_field(self): + self.assertIdentical( + Object.from_bytes_(self.prog, "int", b"\xcc", bit_field_size=8), + Object(self.prog, "int", 0xCC, bit_field_size=8), + ) + + def test_float64_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.float_type("double", 8, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, + type_, + struct.pack( + ("<" if byteorder == "little" else ">") + "d", math.e + ), + ), + Object(self.prog, type_, math.e), + ) + + def test_float32_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.float_type("float", 4, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, + type_, + struct.pack( + ("<" if byteorder == "little" else ">") + "f", math.e + ), + ), + Object(self.prog, type_, math.e), + ) + + def test_struct_from_bytes(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, self.point_type, b"\x01\x00\x00\x00\x02\x00\x00\x00" + ), + Object(self.prog, self.point_type, {"x": 1, "y": 2}), + ) + + def test_struct_from_bytes_bit_offset(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, + self.point_type, + b"\xff\x01\x00\x00\x00\x02\x00\x00\x00", + bit_offset=8, + ), + Object(self.prog, self.point_type, {"x": 1, "y": 2}), + ) + + def test_struct_from_bytes_invalid_bit_offset(self): + self.assertRaisesRegex( + ValueError, + "non-scalar must be byte-aligned", + Object.from_bytes_, + self.prog, + self.point_type, + b"\xff\x01\x00\x00\x00\x02\x00\x00\x00", + bit_offset=2, + ) + + def test_from_bytes_invalid_bit_field_size(self): + self.assertRaisesRegex( + ValueError, + "bit field size cannot be zero", + Object.from_bytes_, + self.prog, + "int", + b"", + bit_field_size=0, + ) + + def test_from_bytes_buffer_too_small(self): + self.assertRaisesRegex( + ValueError, + "buffer is too small", + Object.from_bytes_, + self.prog, + "int", + bytes(3), + ) + + def test_from_bytes_incomplete_type(self): + self.assertRaisesRegex( + TypeError, + "cannot create object with void type", + Object.from_bytes_, + self.prog, + "void", + b"", + ) + + def test_from_bytes_bad_type(self): + self.assertRaises(TypeError, Object.from_bytes_, self.prog, None, b"") + class TestInvalidBitField(MockProgramTestCase): def test_integer(self): From 7335df114c3d167e0ee3283f1ddcaf0f4cca203f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 22 Jul 2021 02:02:20 -0700 Subject: [PATCH 009/139] libdrgn: python: add Object.to_bytes_() And the libdrgn implementation, drgn_object_read_bytes(). Signed-off-by: Omar Sandoval --- _drgn.pyi | 3 ++ libdrgn/drgn.h.in | 9 ++++ libdrgn/object.c | 68 ++++++++++++++++++++++++++++ libdrgn/python/object.c | 17 +++++++ tests/test_object.py | 99 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 196 insertions(+) diff --git a/_drgn.pyi b/_drgn.pyi index fb5b86ecf..0630d3736 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1177,6 +1177,9 @@ class Object: ``void``) """ ... + def to_bytes_(self) -> bytes: + """Return the binary representation of this object's value.""" + ... @classmethod def from_bytes_( cls, diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index b6b0827a5..ca96f3008 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -1403,6 +1403,15 @@ struct drgn_error *drgn_object_read_value(const struct drgn_object *obj, void drgn_object_deinit_value(const struct drgn_object *obj, const union drgn_value *value); +/** + * Get the binary representation of the value of a @ref drgn_object. + * + * @param[out] Buffer to read into. Size must be at least + * `drgn_object_size(obj)`. + */ +struct drgn_error *drgn_object_read_bytes(const struct drgn_object *obj, + void *buf); + /** * Get the value of an object encoded with @ref * drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED. diff --git a/libdrgn/object.c b/libdrgn/object.c index 9e2fa4d2a..e5ecc47f7 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -609,6 +609,74 @@ drgn_object_read_value(const struct drgn_object *obj, union drgn_value *value, ) } +LIBDRGN_PUBLIC struct drgn_error * +drgn_object_read_bytes(const struct drgn_object *obj, void *buf) +{ + struct drgn_error *err; + + if (!drgn_object_encoding_is_complete(obj->encoding)) { + return drgn_error_incomplete_type("cannot read object with %s type", + obj->type); + } + + SWITCH_ENUM(obj->kind, + case DRGN_OBJECT_VALUE: + if (obj->encoding == DRGN_OBJECT_ENCODING_BUFFER) { + memcpy(buf, drgn_object_buffer(obj), + drgn_object_size(obj)); + } else { + union { + uint64_t uvalue; + struct { +#if !HOST_LITTLE_ENDIAN + uint32_t pad; +#endif + float fvalue32; +#if HOST_LITTLE_ENDIAN + uint32_t pad; +#endif + }; + } tmp; + ((uint8_t *)buf)[drgn_object_size(obj) - 1] = 0; + if (obj->encoding == DRGN_OBJECT_ENCODING_FLOAT && + obj->bit_size == 32) { + tmp.fvalue32 = (float)obj->value.fvalue; + tmp.pad = 0; + } else { + tmp.uvalue = obj->value.uvalue; + } + serialize_bits(buf, 0, + truncate_unsigned(tmp.uvalue, obj->bit_size), + obj->bit_size, obj->little_endian); + } + return NULL; + case DRGN_OBJECT_REFERENCE: { + uint8_t bit_offset = obj->bit_offset; + uint64_t bit_size = obj->bit_size; + uint64_t read_size = drgn_value_size(bit_offset + bit_size); + if (bit_offset == 0) { + return drgn_program_read_memory(drgn_object_program(obj), + buf, obj->address, + read_size, false); + } else { + char tmp[9]; + assert(read_size <= sizeof(tmp)); + err = drgn_program_read_memory(drgn_object_program(obj), + tmp, obj->address, + read_size, false); + if (err) + return err; + ((uint8_t *)buf)[drgn_value_size(bit_size) - 1] = 0; + copy_bits(buf, 0, tmp, bit_offset, obj->bit_size, + obj->little_endian); + return NULL; + } + } + case DRGN_OBJECT_ABSENT: + return &drgn_error_object_absent; + ) +} + static struct drgn_error * drgn_object_value_signed(const struct drgn_object *obj, int64_t *ret) { diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index a4255a537..f2a1d695b 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -746,6 +746,21 @@ static DrgnObject *DrgnObject_read(DrgnObject *self) ) } +static PyObject *DrgnObject_to_bytes(DrgnObject *self) +{ + struct drgn_error *err; + PyObject *buf = PyBytes_FromStringAndSize(NULL, + drgn_object_size(&self->obj)); + if (!buf) + return NULL; + err = drgn_object_read_bytes(&self->obj, PyBytes_AS_STRING(buf)); + if (err) { + Py_DECREF(buf); + return set_drgn_error(err); + } + return buf; +} + static DrgnObject *DrgnObject_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds) { @@ -1631,6 +1646,8 @@ static PyMethodDef DrgnObject_methods[] = { drgn_Object_address_of__DOC}, {"read_", (PyCFunction)DrgnObject_read, METH_NOARGS, drgn_Object_read__DOC}, + {"to_bytes_", (PyCFunction)DrgnObject_to_bytes, METH_NOARGS, + drgn_Object_to_bytes__DOC}, {"from_bytes_", (PyCFunction)DrgnObject_from_bytes, METH_CLASS | METH_VARARGS | METH_KEYWORDS, drgn_Object_from_bytes__DOC}, diff --git a/tests/test_object.py b/tests/test_object.py index e3fb67156..4ed895da6 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -930,6 +930,105 @@ def test_index(self): Object(self.prog, "int []", address=0), ) + def test_signed_int_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.int_type("int", 4, True, byteorder), -100 + ).to_bytes_(), + (-100).to_bytes(4, byteorder, signed=True), + ) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("long", 8, True, byteorder), + -(2 ** 32), + ).to_bytes_(), + (-(2 ** 32)).to_bytes(8, byteorder, signed=True), + ) + + def test_unsigned_int_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, + self.prog.int_type("unsigned int", 4, False, byteorder), + 2 ** 31, + ).to_bytes_(), + (2 ** 31).to_bytes(4, byteorder), + ) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("unsigned long", 8, False, byteorder), + 2 ** 60, + ).to_bytes_(), + (2 ** 60).to_bytes(8, byteorder), + ) + + def test_float64_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.float_type("double", 8, byteorder), math.e + ).to_bytes_(), + struct.pack(("<" if byteorder == "little" else ">") + "d", math.e), + ) + + def test_float32_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.float_type("float", 4, byteorder), math.e + ).to_bytes_(), + struct.pack(("<" if byteorder == "little" else ">") + "f", math.e), + ) + + def test_struct_value_to_bytes(self): + self.assertEqual( + Object(self.prog, self.point_type, {"x": 1, "y": 2}).to_bytes_(), + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ) + + def test_int_reference_to_bytes(self): + self.add_memory_segment(b"\x78\x56\x34\x12", virt_addr=0xFFFF0000) + self.assertEqual( + Object(self.prog, "int", address=0xFFFF0000).to_bytes_(), + b"\x78\x56\x34\x12", + ) + + def test_int_reference_bit_offset_to_bytes(self): + self.add_memory_segment(b"\xe0Y\xd1H\x00", virt_addr=0xFFFF0000) + self.assertEqual( + Object(self.prog, "int", address=0xFFFF0000, bit_offset=2).to_bytes_(), + b"\x78\x56\x34\x12", + ) + + def test_int_reference_big_endian_bit_offset_to_bytes(self): + self.add_memory_segment(b"\x04\x8d\x15\x9e\x00", virt_addr=0xFFFF0000) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("int", 4, True, "big"), + address=0xFFFF0000, + bit_offset=2, + ).to_bytes_(), + b"\x12\x34\x56\x78", + ) + + def test_struct_reference_to_bytes(self): + self.add_memory_segment( + b"\x01\x00\x00\x00\x02\x00\x00\x00", virt_addr=0xFFFF0000 + ) + self.assertEqual( + Object(self.prog, self.point_type, address=0xFFFF0000).to_bytes_(), + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ) + def test_int_from_bytes(self): for byteorder in ("little", "big"): with self.subTest(byteorder=byteorder): From 50e3cf936a4e45f353cd0cf900e1d8f293125885 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 29 Jul 2021 17:04:52 -0700 Subject: [PATCH 010/139] vmtest: add CONFIG_NUMA=y We're adding NUMA node mask helpers in #107, so make sure we can run them. Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index f41c85265..2c9e4fc8c 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest3" +KERNEL_LOCALVERSION = "-vmtest4" def kconfig() -> str: @@ -82,6 +82,9 @@ def kconfig() -> str: # For kconfig tests. CONFIG_IKCONFIG=m CONFIG_IKCONFIG_PROC=y + +# For nodemask tests. +CONFIG_NUMA=y """ From df8da55a5724585a0d45a74a95ad233a3c73e62f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 29 Jul 2021 17:40:49 -0700 Subject: [PATCH 011/139] helpers: update task_state_to_char() for v5.14 Linux v5.14 renamed task_struct::state to task_struct::__state. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/sched.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index f6abf9f06..dce340ead 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -29,7 +29,9 @@ def task_state_to_char(task: Object) -> str: task_state_chars: str TASK_REPORT: int try: - task_state_chars, TASK_REPORT = prog.cache["task_state_to_char"] + task_state_chars, TASK_REPORT, task_state_name = prog.cache[ + "task_state_to_char" + ] except KeyError: task_state_array = prog["task_state_array"] # Walk through task_state_array backwards looking for the largest state @@ -45,13 +47,30 @@ def task_state_to_char(task: Object) -> str: if chars is None: raise Exception("could not parse task_state_array") task_state_chars = chars.decode("ascii") - prog.cache["task_state_to_char"] = task_state_chars, TASK_REPORT - task_state = task.state.value_() + + # Since Linux kernel commit 2f064a59a11f ("sched: Change + # task_struct::state") (in v5.14), the task state is named "__state". + # Before that, it is named "state". + try: + task_state = task.__state + task_state_name = "__state" + except AttributeError: + task_state = task.state + task_state_name = "state" + + prog.cache["task_state_to_char"] = ( + task_state_chars, + TASK_REPORT, + task_state_name, + ) + else: + task_state = getattr(task, task_state_name) + task_state = task_state.value_() exit_state = task.exit_state.value_() state = (task_state | exit_state) & TASK_REPORT char = task_state_chars[state.bit_length()] - # States beyond TASK_REPORT are special. As of Linux v5.8, TASK_IDLE is the - # only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. + # States beyond TASK_REPORT are special. As of Linux v5.14, TASK_IDLE is + # the only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. if char == "D" and (task_state & ~state) == _TASK_NOLOAD: return "I" else: From dc0c0e05f8c312b4fd158ddf9db1b315af993768 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 29 Jul 2021 17:52:58 -0700 Subject: [PATCH 012/139] setup.py: add 5.14 to vmtest kernels Signed-off-by: Omar Sandoval --- setup.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d60253092..802207e4e 100755 --- a/setup.py +++ b/setup.py @@ -125,7 +125,18 @@ def make_release_tree(self, base_dir, files): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.13", "5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = [ + "5.14", + "5.13", + "5.12", + "5.11", + "5.10", + "5.4", + "4.19", + "4.14", + "4.9", + "4.4", + ] user_options = [ ( From 2f97cb4f75d71978cd174168c3788d5de068e00d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 15 Jul 2021 21:09:53 +0800 Subject: [PATCH 013/139] helpers: add kernel nodemask helpers Sometimes we want to traverse numa nodes in the system, so add kernel nodemask helpers to support this. Signed-off-by: Qi Zheng --- drgn/helpers/linux/cpumask.py | 17 +++++---- drgn/helpers/linux/nodemask.py | 55 ++++++++++++++++++++++++++++ tests/helpers/linux/__init__.py | 11 ++++++ tests/helpers/linux/test_cpumask.py | 15 +------- tests/helpers/linux/test_nodemask.py | 25 +++++++++++++ 5 files changed, 103 insertions(+), 20 deletions(-) create mode 100644 drgn/helpers/linux/nodemask.py create mode 100644 tests/helpers/linux/test_nodemask.py diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 6821075ef..e26a6c9f9 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -21,6 +21,15 @@ ) +def _for_each_set_bit(bitmap: Object, size: int) -> Iterator[int]: + word_bits = 8 * sizeof(bitmap.type_.type) + for i in range((size + word_bits - 1) // word_bits): + word = bitmap[i].value_() + for j in range(min(word_bits, size - word_bits * i)): + if word & (1 << j): + yield (word_bits * i) + j + + def for_each_cpu(mask: Object) -> Iterator[int]: """ Iterate over all of the CPUs in the given mask. @@ -31,13 +40,7 @@ def for_each_cpu(mask: Object) -> Iterator[int]: nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() except KeyError: nr_cpu_ids = 1 - bits = mask.bits - word_bits = 8 * sizeof(bits.type_.type) - for i in range((nr_cpu_ids + word_bits - 1) // word_bits): - word = bits[i].value_() - for j in range(min(word_bits, nr_cpu_ids - word_bits * i)): - if word & (1 << j): - yield (word_bits * i) + j + return _for_each_set_bit(mask.bits, nr_cpu_ids) def _for_each_cpu_mask(prog: Program, name: str) -> Iterator[int]: diff --git a/drgn/helpers/linux/nodemask.py b/drgn/helpers/linux/nodemask.py new file mode 100644 index 000000000..aeefe251e --- /dev/null +++ b/drgn/helpers/linux/nodemask.py @@ -0,0 +1,55 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +NUMA Node Masks +--------------- + +The ``drgn.helpers.linux.nodemask`` module provides helpers for working with +NUMA node masks from :linux:`include/linux/nodemask.h`. +""" + +from typing import Iterator + +from drgn import IntegerLike, Object, Program +from drgn.helpers.linux.cpumask import _for_each_set_bit + +__all__ = ( + "for_each_node", + "for_each_node_mask", + "for_each_node_state", + "for_each_online_node", +) + + +def for_each_node_mask(mask: Object) -> Iterator[int]: + """ + Iterate over all of the NUMA nodes in the given mask. + + :param mask: ``nodemask_t`` + """ + try: + nr_node_ids = mask.prog_["nr_node_ids"].value_() + except KeyError: + nr_node_ids = 1 + return _for_each_set_bit(mask.bits, nr_node_ids) + + +def for_each_node_state(prog: Program, state: IntegerLike) -> Iterator[int]: + """ + Iterate over all NUMA nodes in the given state. + + :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) + """ + mask = prog["node_states"][state] + return for_each_node_mask(mask) + + +def for_each_node(prog: Program) -> Iterator[int]: + """Iterate over all possible NUMA nodes.""" + return for_each_node_state(prog, prog["N_POSSIBLE"]) + + +def for_each_online_node(prog: Program) -> Iterator[int]: + """Iterate over all online NUMA nodes.""" + return for_each_node_state(prog, prog["N_ONLINE"]) diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index 5b5754a6c..82704ee87 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -93,6 +93,17 @@ def proc_state(pid): return re.search(r"State:\s*(\S)", f.read(), re.M).group(1) +def parse_range_list(s): + values = set() + for range_str in s.split(","): + first, sep, last = range_str.partition("-") + if sep: + values.update(range(int(first), int(last) + 1)) + else: + values.add(int(first)) + return values + + _c = ctypes.CDLL(None, use_errno=True) _mount = _c.mount diff --git a/tests/helpers/linux/test_cpumask.py b/tests/helpers/linux/test_cpumask.py index 17dd4378c..7979456cd 100644 --- a/tests/helpers/linux/test_cpumask.py +++ b/tests/helpers/linux/test_cpumask.py @@ -8,27 +8,16 @@ for_each_possible_cpu, for_each_present_cpu, ) -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import LinuxHelperTestCase, parse_range_list CPU_PATH = Path("/sys/devices/system/cpu") -def parse_cpulist(cpulist): - cpus = set() - for cpu_range in cpulist.split(","): - first, sep, last = cpu_range.partition("-") - if sep: - cpus.update(range(int(first), int(last) + 1)) - else: - cpus.add(int(first)) - return cpus - - class TestCpuMask(LinuxHelperTestCase): def _test_for_each_cpu(self, func, name): self.assertEqual( list(func(self.prog)), - sorted(parse_cpulist((CPU_PATH / name).read_text())), + sorted(parse_range_list((CPU_PATH / name).read_text())), ) def test_for_each_online_cpu(self): diff --git a/tests/helpers/linux/test_nodemask.py b/tests/helpers/linux/test_nodemask.py new file mode 100644 index 000000000..8b38f0180 --- /dev/null +++ b/tests/helpers/linux/test_nodemask.py @@ -0,0 +1,25 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from pathlib import Path +import unittest + +from drgn.helpers.linux.nodemask import for_each_node, for_each_online_node +from tests.helpers.linux import LinuxHelperTestCase, parse_range_list + +NODE_PATH = Path("/sys/devices/system/node") + + +@unittest.skipUnless(NODE_PATH.exists(), "kernel does not support NUMA") +class TestNodeMask(LinuxHelperTestCase): + def _test_for_each_node(self, func, name): + self.assertEqual( + list(func(self.prog)), + sorted(parse_range_list((NODE_PATH / name).read_text())), + ) + + def test_for_each_node(self): + self._test_for_each_node(for_each_node, "possible") + + def test_for_each_online_node(self): + self._test_for_each_node(for_each_online_node, "online") From 39b76e8486130f570fc88df98358884f898aa0e1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 2 Aug 2021 15:49:29 -0700 Subject: [PATCH 014/139] docs: update repr(drgn.Type) and type constructors in documentation Commit a97f6c4fa2bb ("Associate types with program") changed repr() for drgn.Type to include a "prog." prefix, but it didn't update the documentation to reflect that. It also forgot to update a global type constructor to the new Program methods. Signed-off-by: Omar Sandoval --- _drgn.pyi | 4 ++-- docs/user_guide.rst | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 0630d3736..4701ee559 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -235,7 +235,7 @@ class Program: Get the type with the given name. >>> prog.type('long') - int_type(name='long', size=8, is_signed=True) + prog.int_type(name='long', size=8, is_signed=True) :param name: The type name. :param filename: The source code file that contains the definition. See @@ -1547,7 +1547,7 @@ class Type: :func:`repr()` of a ``Type`` returns a Python representation of the type: >>> print(repr(prog.type('sector_t'))) - typedef_type(name='sector_t', type=int_type(name='unsigned long', size=8, is_signed=False)) + prog.typedef_type(name='sector_t', type=prog.int_type(name='unsigned long', size=8, is_signed=False)) :class:`str() ` returns a representation of the type in programming language syntax: diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 633d3249f..2b82f34c6 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -27,7 +27,7 @@ A ``Program`` is used to look up type definitions, access variables, and read arbitrary memory:: >>> prog.type('unsigned long') - int_type(name='unsigned long', size=8, is_signed=False) + prog.int_type(name='unsigned long', size=8, is_signed=False) >>> prog['jiffies'] Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) >>> prog.read(0xffffffffbe411e10, 16) @@ -218,10 +218,10 @@ Types drgn automatically obtains type definitions from the program. Types are represented by the :class:`drgn.Type` class and created by various factory -functions like :func:`drgn.int_type()`:: +functions like :meth:`drgn.Program.int_type()`:: >>> prog.type('int') - int_type(name='int', size=4, is_signed=True) + prog.int_type(name='int', size=4, is_signed=True) You won't usually need to work with types directly, but see :ref:`api-reference-types` if you do. @@ -289,7 +289,7 @@ print the output of :func:`repr()`. For :class:`drgn.Object` and >>> print(repr(prog['jiffies'])) Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) >>> print(repr(prog.type('atomic_t'))) - typedef_type(name='atomic_t', type=struct_type(tag=None, size=4, members=((int_type(name='int', size=4, is_signed=True), 'counter', 0, 0),))) + prog.typedef_type(name='atomic_t', type=prog.struct_type(tag=None, size=4, members=(TypeMember(prog.type('int'), name='counter', bit_offset=0),))) The standard :func:`print()` function uses the output of :func:`str()`. For drgn objects and types, this is a representation in programming language From 7382856a4169aa9f8c42eae7d0b5c1b15dde4855 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 31 Jul 2021 12:16:38 -0700 Subject: [PATCH 015/139] docs: improve quick start documentation Add an example of stack traces and parameters/local variables and use some more interesting helpers. Signed-off-by: Omar Sandoval --- README.rst | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 8e8bd4951..2315994e4 100644 --- a/README.rst +++ b/README.rst @@ -106,25 +106,42 @@ running program, run ``sudo drgn -p $PID``. To debug a core dump (either a kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. The program must have debugging symbols available. -Then, you can access variables in the program with ``prog['name']``, access -structure members with ``.``, use various predefined helpers, and more: +Then, you can access variables in the program with ``prog['name']`` and access +structure members with ``.``: .. code-block:: pycon $ sudo drgn >>> prog['init_task'].comm (char [16])"swapper/0" - >>> d_path(fget(find_task(prog, 1), 0).f_path.address_of_()) - b'/dev/null' - >>> max(task.stime for task in for_each_task(prog)) - (u64)4192109975952 - >>> sum(disk.gendisk.part0.nr_sects for disk in for_each_disk(prog)) - (sector_t)999705952 + +You can use various predefined helpers: + +.. code-block:: pycon + + >>> len(list(bpf_prog_for_each(prog))) + 11 + >>> task = find_task(prog, 115) + >>> cmdline(task) + [b'findmnt', b'-p'] + +You can get stack traces with ``prog.stack_trace()`` and access parameters or +local variables with ``stack_trace['name']``: + +.. code-block:: pycon + + >>> trace = prog.stack_trace(task) + >>> trace[5] + #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) + >>> poll_list = trace[5]['list'] + >>> file = fget(task, poll_list.entries[0].fd) + >>> d_path(file.f_path.address_of_()) + b'/proc/115/mountinfo' .. end-quick-start See the `user guide `_ -for more information. +for more details and features. License ------- From d6a47f86981c736f7e299eddb4594097174c32f8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 2 Aug 2021 16:08:00 -0700 Subject: [PATCH 016/139] docs: improve stack trace documentation in user guide The API reference has all of the details, but add a short example to the user guide (and move it before symbols, as stack traces are probably more interesting/important). Signed-off-by: Omar Sandoval --- docs/user_guide.rst | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 2b82f34c6..6bcf32cac 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -199,6 +199,41 @@ Other Concepts In addition to the core concepts above, drgn provides a few additional abstractions. +Stack Traces +^^^^^^^^^^^^ + +drgn represents stack traces with the :class:`drgn.StackTrace` and +:class:`drgn.StackFrame` classes. :meth:`drgn.Program.stack_trace()` returns +the call stack for a thread. The :meth:`[] ` +operator looks up an object in the scope of a ``StackFrame``:: + + >>> trace = prog.stack_trace(115) + >>> trace + #0 context_switch (./kernel/sched/core.c:4683:2) + #1 __schedule (./kernel/sched/core.c:5940:8) + #2 schedule (./kernel/sched/core.c:6019:3) + #3 schedule_hrtimeout_range_clock (./kernel/time/hrtimer.c:2148:3) + #4 poll_schedule_timeout (./fs/select.c:243:8) + #5 do_poll (./fs/select.c:961:8) + #6 do_sys_poll (./fs/select.c:1011:12) + #7 __do_sys_poll (./fs/select.c:1076:8) + #8 __se_sys_poll (./fs/select.c:1064:1) + #9 __x64_sys_poll (./fs/select.c:1064:1) + #10 do_syscall_x64 (./arch/x86/entry/common.c:50:14) + #11 do_syscall_64 (./arch/x86/entry/common.c:80:7) + #12 entry_SYSCALL_64+0x7c/0x15b (./arch/x86/entry/entry_64.S:113) + #13 0x7f3344072af7 + >>> trace[5] + #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) + >>> prog['do_poll'] + (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time)) + >>> trace[5]['list'] + *(struct poll_list *)0xffffacca402e3b50 = { + .next = (struct poll_list *)0x0, + .len = (int)1, + .entries = (struct pollfd []){}, + } + Symbols ^^^^^^^ @@ -206,13 +241,6 @@ The symbol table of a program is a list of identifiers along with their address and size. drgn represents symbols with the :class:`drgn.Symbol` class, which is returned by :meth:`drgn.Program.symbol()`. -Stack Traces -^^^^^^^^^^^^ - -drgn represents stack traces with the :class:`drgn.StackTrace` and -:class:`drgn.StackFrame` classes. :meth:`drgn.Program.stack_trace()` returns -the call stack for a thread. - Types ^^^^^ From 1213eb8f4909c65b67ee03a4a57e296438c444a0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 3 Aug 2021 14:54:36 -0700 Subject: [PATCH 017/139] helpers: add bit operation helpers Extract for_each_set_bit() that was added internally for the cpumask and nodemask helpers, and add for_each_clear_bit() and test_bit() to go with it. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/bitops.py | 64 ++++++++++++++++++++++++++++++ drgn/helpers/linux/cpumask.py | 14 ++----- drgn/helpers/linux/nodemask.py | 4 +- tests/helpers/linux/test_bitops.py | 48 ++++++++++++++++++++++ 4 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 drgn/helpers/linux/bitops.py create mode 100644 tests/helpers/linux/test_bitops.py diff --git a/drgn/helpers/linux/bitops.py b/drgn/helpers/linux/bitops.py new file mode 100644 index 000000000..114ee3d49 --- /dev/null +++ b/drgn/helpers/linux/bitops.py @@ -0,0 +1,64 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Bit Operations +-------------- + +The ``drgn.helpers.linux.bitops`` module provides helpers for common bit +operations in the Linux kernel. +""" + +from typing import Iterator + +from drgn import IntegerLike, Object, sizeof + +__all__ = ( + "for_each_clear_bit", + "for_each_set_bit", + "test_bit", +) + + +def for_each_set_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: + """ + Iterate over all set (one) bits in a bitmap. + + :param bitmap: ``unsigned long *`` + :param size: Size of *bitmap* in bits. + """ + size = int(size) + word_bits = 8 * sizeof(bitmap.type_.type) + for i in range((size + word_bits - 1) // word_bits): + word = bitmap[i].value_() + for j in range(min(word_bits, size - word_bits * i)): + if word & (1 << j): + yield (word_bits * i) + j + + +def for_each_clear_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: + """ + Iterate over all clear (zero) bits in a bitmap. + + :param bitmap: ``unsigned long *`` + :param size: Size of *bitmap* in bits. + """ + size = int(size) + word_bits = 8 * sizeof(bitmap.type_.type) + for i in range((size + word_bits - 1) // word_bits): + word = bitmap[i].value_() + for j in range(min(word_bits, size - word_bits * i)): + if not (word & (1 << j)): + yield (word_bits * i) + j + + +def test_bit(nr: IntegerLike, bitmap: Object) -> bool: + """ + Return whether a bit in a bitmap is set. + + :param nr: Bit number. + :param bitmap: ``unsigned long *`` + """ + nr = int(nr) + word_bits = 8 * sizeof(bitmap.type_.type) + return ((bitmap[nr // word_bits].value_() >> (nr & (word_bits - 1))) & 1) != 0 diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index e26a6c9f9..e3f45ebb6 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -11,7 +11,8 @@ from typing import Iterator -from drgn import Object, Program, sizeof +from drgn import Object, Program +from drgn.helpers.linux.bitops import for_each_set_bit __all__ = ( "for_each_cpu", @@ -21,15 +22,6 @@ ) -def _for_each_set_bit(bitmap: Object, size: int) -> Iterator[int]: - word_bits = 8 * sizeof(bitmap.type_.type) - for i in range((size + word_bits - 1) // word_bits): - word = bitmap[i].value_() - for j in range(min(word_bits, size - word_bits * i)): - if word & (1 << j): - yield (word_bits * i) + j - - def for_each_cpu(mask: Object) -> Iterator[int]: """ Iterate over all of the CPUs in the given mask. @@ -40,7 +32,7 @@ def for_each_cpu(mask: Object) -> Iterator[int]: nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() except KeyError: nr_cpu_ids = 1 - return _for_each_set_bit(mask.bits, nr_cpu_ids) + return for_each_set_bit(mask.bits, nr_cpu_ids) def _for_each_cpu_mask(prog: Program, name: str) -> Iterator[int]: diff --git a/drgn/helpers/linux/nodemask.py b/drgn/helpers/linux/nodemask.py index aeefe251e..dc4589a5e 100644 --- a/drgn/helpers/linux/nodemask.py +++ b/drgn/helpers/linux/nodemask.py @@ -12,7 +12,7 @@ from typing import Iterator from drgn import IntegerLike, Object, Program -from drgn.helpers.linux.cpumask import _for_each_set_bit +from drgn.helpers.linux.bitops import for_each_set_bit __all__ = ( "for_each_node", @@ -32,7 +32,7 @@ def for_each_node_mask(mask: Object) -> Iterator[int]: nr_node_ids = mask.prog_["nr_node_ids"].value_() except KeyError: nr_node_ids = 1 - return _for_each_set_bit(mask.bits, nr_node_ids) + return for_each_set_bit(mask.bits, nr_node_ids) def for_each_node_state(prog: Program, state: IntegerLike) -> Iterator[int]: diff --git a/tests/helpers/linux/test_bitops.py b/tests/helpers/linux/test_bitops.py new file mode 100644 index 000000000..0e5eb29af --- /dev/null +++ b/tests/helpers/linux/test_bitops.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from drgn import Object +from drgn.helpers.linux.bitops import for_each_clear_bit, for_each_set_bit, test_bit +from tests import MockProgramTestCase + + +class TestBitOps(MockProgramTestCase): + BITMAP = [0xB351BC986648A680, 0x80DDB6615A80BC63] + # fmt: off + SET_BITS = [ + 7, 9, 10, 13, 15, 19, 22, 25, 26, 29, 30, 35, 36, 39, 42, 43, 44, 45, + 47, 48, 52, 54, 56, 57, 60, 61, 63, 64, 65, 69, 70, 74, 75, 76, 77, 79, + 87, 89, 91, 92, 94, 96, 101, 102, 105, 106, 108, 109, 111, 112, 114, + 115, 116, 118, 119, 127, + ] + CLEAR_BITS = [ + 0, 1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 16, 17, 18, 20, 21, 23, 24, 27, 28, + 31, 32, 33, 34, 37, 38, 40, 41, 46, 49, 50, 51, 53, 55, 58, 59, 62, 66, + 67, 68, 71, 72, 73, 78, 80, 81, 82, 83, 84, 85, 86, 88, 90, 93, 95, 97, + 98, 99, 100, 103, 104, 107, 110, 113, 117, 120, 121, 122, 123, 124, + 125, 126, + ] + # fmt: on + + def test_for_each_set_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + self.assertEqual(list(for_each_set_bit(bitmap, 128)), self.SET_BITS) + self.assertEqual( + list(for_each_set_bit(bitmap, 101)), + [bit for bit in self.SET_BITS if bit < 101], + ) + + def test_for_each_clear_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + self.assertEqual(list(for_each_clear_bit(bitmap, 128)), self.CLEAR_BITS) + self.assertEqual( + list(for_each_clear_bit(bitmap, 100)), + [bit for bit in self.CLEAR_BITS if bit < 100], + ) + + def test_test_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + for bit in self.SET_BITS: + self.assertTrue(test_bit(bit, bitmap)) + for bit in self.CLEAR_BITS: + self.assertFalse(test_bit(bit, bitmap)) From 51f63bb53b2fe8adb5b685f9215773aaaf7fb55b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 3 Aug 2021 17:06:36 -0700 Subject: [PATCH 018/139] helpers: add node_state() to nodemask helpers Signed-off-by: Omar Sandoval --- drgn/helpers/linux/nodemask.py | 13 ++++++++++++- tests/helpers/linux/__init__.py | 14 ++++++++------ tests/helpers/linux/test_nodemask.py | 27 ++++++++++++++++++++++----- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drgn/helpers/linux/nodemask.py b/drgn/helpers/linux/nodemask.py index dc4589a5e..5afe1b42e 100644 --- a/drgn/helpers/linux/nodemask.py +++ b/drgn/helpers/linux/nodemask.py @@ -12,13 +12,14 @@ from typing import Iterator from drgn import IntegerLike, Object, Program -from drgn.helpers.linux.bitops import for_each_set_bit +from drgn.helpers.linux.bitops import for_each_set_bit, test_bit __all__ = ( "for_each_node", "for_each_node_mask", "for_each_node_state", "for_each_online_node", + "node_state", ) @@ -53,3 +54,13 @@ def for_each_node(prog: Program) -> Iterator[int]: def for_each_online_node(prog: Program) -> Iterator[int]: """Iterate over all online NUMA nodes.""" return for_each_node_state(prog, prog["N_ONLINE"]) + + +def node_state(node: IntegerLike, state: Object) -> bool: + """ + Return whether the given NUMA node has the given state. + + :param node: NUMA node number. + :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) + """ + return test_bit(node, state.prog_["node_states"][state].bits) diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index 82704ee87..1df529536 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -95,12 +95,14 @@ def proc_state(pid): def parse_range_list(s): values = set() - for range_str in s.split(","): - first, sep, last = range_str.partition("-") - if sep: - values.update(range(int(first), int(last) + 1)) - else: - values.add(int(first)) + s = s.strip() + if s: + for range_str in s.split(","): + first, sep, last = range_str.partition("-") + if sep: + values.update(range(int(first), int(last) + 1)) + else: + values.add(int(first)) return values diff --git a/tests/helpers/linux/test_nodemask.py b/tests/helpers/linux/test_nodemask.py index 8b38f0180..f7506fc7d 100644 --- a/tests/helpers/linux/test_nodemask.py +++ b/tests/helpers/linux/test_nodemask.py @@ -4,7 +4,7 @@ from pathlib import Path import unittest -from drgn.helpers.linux.nodemask import for_each_node, for_each_online_node +from drgn.helpers.linux.nodemask import for_each_node, for_each_online_node, node_state from tests.helpers.linux import LinuxHelperTestCase, parse_range_list NODE_PATH = Path("/sys/devices/system/node") @@ -12,14 +12,31 @@ @unittest.skipUnless(NODE_PATH.exists(), "kernel does not support NUMA") class TestNodeMask(LinuxHelperTestCase): + @staticmethod + def _parse_node_list(name): + return parse_range_list((NODE_PATH / name).read_text()) + def _test_for_each_node(self, func, name): - self.assertEqual( - list(func(self.prog)), - sorted(parse_range_list((NODE_PATH / name).read_text())), - ) + self.assertEqual(list(func(self.prog)), sorted(self._parse_node_list(name))) def test_for_each_node(self): self._test_for_each_node(for_each_node, "possible") def test_for_each_online_node(self): self._test_for_each_node(for_each_online_node, "online") + + def _test_node_state(self, state_name, file_name): + possible = self._parse_node_list("possible") + expected = self._parse_node_list(file_name) + state = self.prog[state_name] + for node in possible: + self.assertEqual(node_state(node, state), node in expected) + + def test_node_state(self): + self._test_node_state("N_NORMAL_MEMORY", "has_normal_memory") + # N_GENERIC_INITIATOR was added in Linux kernel commit 894c26a1c274 + # ("ACPI: Support Generic Initiator only domains") (in v5.10). Most of + # the time it is unset, so if it exists we can use it to test the unset + # case. + if (NODE_PATH / "has_generic_initiator").exists(): + self._test_node_state("N_GENERIC_INITIATOR", "has_generic_initiator") From e9915886f65c2f644a8a32a9083e5d1e78d66877 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 11 Aug 2021 08:40:37 +0800 Subject: [PATCH 019/139] helpers: Add netdev_get_by_index() Add a helper to find the corresponding "struct net_device *" object given an interface index number. As an example: >>> netdev = netdev_get_by_index(prog["init_net"], 1) >>> netdev.name.string_().decode() 'lo' Or pass a "Program" as the first argument, and let the helper find in its initial network namespace (i.e. "init_net"): >>> netdev = netdev_get_by_index(prog, 3) >>> netdev.name.string_().decode() 'enp0s3' Also add a test for this new helper to tests/helpers/linux/test_net.py. For now, a user may combine this new helper with socket.if_nametoindex() to look up by interface name: >>> netdev = find_netdev_by_index(prog, socket.if_nametoindex("dummy0")) >>> netdev.name.string_().decode() 'dummy0' However, as mentioned by Cong, one should keep in mind that socket.if_nametoindex() is based on system's current name-to-index mapping, which may be different from that of e.g. a kdump. Thus, as suggested by Omar, a better way to do name lookups would be simply linear-searching the name hash table, which is slower, but less erorr-prone. Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 37 +++++++++++++++++++++++++++++++-- tests/helpers/linux/test_net.py | 8 ++++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 621f7652f..a153cb535 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -9,18 +9,51 @@ Linux kernel networking subsystem. """ -from typing import Iterator +import operator +from typing import Iterator, Union -from drgn import Object +from drgn import NULL, IntegerLike, Object, Program +from drgn.helpers.linux.list import hlist_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry from drgn.helpers.linux.tcp import sk_tcpstate __all__ = ( + "netdev_get_by_index", "sk_fullsock", "sk_nulls_for_each", ) +_NETDEV_HASHBITS = 8 +_NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS + + +def netdev_get_by_index( + prog_or_net: Union[Program, Object], ifindex: IntegerLike +) -> Object: + """ + Get the network device with the given interface index number. + + :param prog_or_net: ``struct net *`` containing the device, or + :class:`Program` to use the initial network namespace. + :param ifindex: Network interface index number. + :return: ``struct net_device *`` (``NULL`` if not found) + """ + if isinstance(prog_or_net, Program): + prog_or_net = prog_or_net["init_net"] + if isinstance(ifindex, Object): + ifindex = ifindex.read_() + + head = prog_or_net.dev_index_head[ + operator.index(ifindex) & (_NETDEV_HASHENTRIES - 1) + ] + for netdev in hlist_for_each_entry("struct net_device", head, "index_hlist"): + if netdev.ifindex == ifindex: + return netdev + + return NULL(prog_or_net.prog_, "struct net_device *") + + def sk_fullsock(sk: Object) -> bool: """ Check whether a socket is a full socket, i.e., not a time-wait or request diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index b0373fcfc..50908c437 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -2,10 +2,11 @@ # SPDX-License-Identifier: GPL-3.0-or-later import os +import socket from drgn import cast from drgn.helpers.linux.fs import fget -from drgn.helpers.linux.net import sk_fullsock +from drgn.helpers.linux.net import netdev_get_by_index, sk_fullsock from drgn.helpers.linux.pid import find_task from tests.helpers.linux import LinuxHelperTestCase, create_socket @@ -16,3 +17,8 @@ def test_sk_fullsock(self): file = fget(find_task(self.prog, os.getpid()), sock.fileno()) sk = cast("struct socket *", file.private_data).sk.read_() self.assertTrue(sk_fullsock(sk)) + + def test_netdev_get_by_index(self): + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_index(self.prog, index) + self.assertEqual(netdev.name.string_().decode(), name) From 65b65b27b09347542792678f0e0106fa3e70d560 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 11 Aug 2021 13:43:11 -0700 Subject: [PATCH 020/139] helpers: fix drgn.helpers.linux.user.find_user() documented return type And document that it returns NULL if the UID is not found. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index d9fd7475c..7eeca5bb8 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -32,7 +32,7 @@ def find_user(prog: Program, uid: Union[Object, IntegerLike]) -> Object: Return the user structure with the given UID. :param uid: ``kuid_t`` object or integer. - :return: ``struct user_state *`` + :return: ``struct user_struct *`` (``NULL`` if not found) """ try: uidhashentry = prog.cache["uidhashentry"] From 5541fad063aa2298a5d349ab98320c80fe25acf8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 11 Aug 2021 14:22:39 -0700 Subject: [PATCH 021/139] Fix some flake8 errors Mainly unused imports, unused variables, unnecessary f-strings, and regex literals missing the r prefix. I'm not adding it to the CI linter because it's too noisy, though. Signed-off-by: Omar Sandoval --- docs/exts/drgndoc/docstrings.py | 6 ++--- docs/exts/drgndoc/ext.py | 2 +- docs/exts/drgndoc/namespace.py | 3 +-- docs/exts/drgndoc/visitor.py | 1 - drgn/helpers/__init__.py | 2 +- drgn/helpers/linux/idr.py | 2 +- drgn/helpers/linux/mm.py | 2 +- drgn/helpers/linux/net.py | 1 - drgn/helpers/linux/pid.py | 4 ++-- examples/linux/fs_inodes.py | 1 - libdrgn/build-aux/gen_constants.py | 3 +-- scripts/iwyu.py | 2 +- setup.py | 2 +- tests/__init__.py | 1 - tests/helpers/linux/test_fs.py | 4 ++-- tests/libdrgn.py | 1 - tests/test_dwarf.py | 36 ++++++++++++++++-------------- tests/test_object.py | 1 - tests/test_type.py | 2 -- tools/bpf_inspect.py | 8 +------ vmtest/download.py | 2 +- vmtest/manage.py | 2 +- vmtest/vm.py | 4 ++-- 23 files changed, 39 insertions(+), 53 deletions(-) diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index e6bab5369..a07994fc7 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -5,7 +5,7 @@ import argparse import functools import sys -from typing import Union, cast +from typing import cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode @@ -74,7 +74,7 @@ def escape_string(s: str) -> str: if args.header: output_file.write( - f"""\ + """\ /* * Generated by drgndoc.docstrings -H. * @@ -86,7 +86,7 @@ def escape_string(s: str) -> str: """ ) else: - output_file.write(f"/* Generated by drgndoc.docstrings. */\n\n") + output_file.write("/* Generated by drgndoc.docstrings. */\n\n") def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 6ca33b9d8..2d252f8ed 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -42,7 +42,7 @@ import os.path import re -from typing import Any, Dict, List, cast +from typing import Any, Dict, cast import docutils.nodes import docutils.parsers.rst.directives diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index 808c1cb09..9b19f72ea 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later import itertools -from typing import Generic, Iterator, List, Mapping, Optional, Sequence, TypeVar, Union +from typing import Generic, Iterator, List, Mapping, Sequence, TypeVar, Union from drgndoc.parse import ( Class, @@ -14,7 +14,6 @@ Node, Variable, ) -from drgndoc.util import dot_join NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) diff --git a/docs/exts/drgndoc/visitor.py b/docs/exts/drgndoc/visitor.py index 6cf132ffd..b527b22e6 100644 --- a/docs/exts/drgndoc/visitor.py +++ b/docs/exts/drgndoc/visitor.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-3.0-or-later import ast -import sys from typing import Any, Optional diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index 552451c95..b52f7e47f 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -23,7 +23,7 @@ import enum import typing -from typing import Container, Iterable, List, Tuple +from typing import Container, Iterable from drgn import Type diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index b9497cd83..623fc0ca7 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -15,7 +15,7 @@ from _drgn import _linux_helper_idr_find as idr_find from drgn import Object -from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup +from drgn.helpers.linux.radixtree import radix_tree_for_each __all__ = ( "idr_find", diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 8a5b9d9e2..927f0caab 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -11,7 +11,7 @@ """ import operator -from typing import Any, Iterator, List, Optional, Union, overload +from typing import Iterator, List, Optional, Union, overload from _drgn import _linux_helper_read_vm from drgn import IntegerLike, Object, Program, cast diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index a153cb535..3b23694df 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -15,7 +15,6 @@ from drgn import NULL, IntegerLike, Object, Program from drgn.helpers.linux.list import hlist_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry -from drgn.helpers.linux.tcp import sk_tcpstate __all__ = ( "netdev_get_by_index", diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 9f4d7cab4..d2e786ed0 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -16,8 +16,8 @@ _linux_helper_find_task as find_task, _linux_helper_pid_task as pid_task, ) -from drgn import NULL, Object, Program, cast, container_of -from drgn.helpers.linux.idr import idr_find, idr_for_each +from drgn import Object, Program, cast, container_of +from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 856d13f76..6e1a4c190 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -6,7 +6,6 @@ import os import sys -import time from drgn.helpers.linux.fs import for_each_mount, inode_path from drgn.helpers.linux.list import list_for_each_entry diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index b859dfd84..441a559c2 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -1,7 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -import os.path import re import sys @@ -63,7 +62,7 @@ def gen_constant_class(drgn_h, output_file, class_name, enum_class, constants, r def gen_constants(input_file, output_file): drgn_h = input_file.read() output_file.write( - f"""\ + """\ /* Generated by libdrgn/build-aux/gen_constants.py. */ #include "drgnpy.h" diff --git a/scripts/iwyu.py b/scripts/iwyu.py index 42eb2a067..85c684d51 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -122,7 +122,7 @@ def gen_python_mapping_file(mapping_path): # For some reason, include-what-you-mean wants struct _typeobject, but # find-all-symbols only reports PyTypeObject. Add it manually. imp.write( - f' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' + ' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' ) imp.write("]\n") diff --git a/setup.py b/setup.py index 802207e4e..1f0051fe0 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later # setuptools must be imported before distutils (see pypa/setuptools#2230). -import setuptools # isort: skip +import setuptools # isort: skip # noqa: F401 import contextlib from distutils import log diff --git a/tests/__init__.py b/tests/__init__.py index 43367aec4..07d307df8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-3.0-or-later import functools -import types from typing import Any, NamedTuple, Optional import unittest diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index aff6e1a9d..d127c3515 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -51,9 +51,9 @@ def test_inode_paths(self): with tempfile.TemporaryDirectory(prefix="drgn-tests-") as dir: path1 = os.fsencode(os.path.abspath(os.path.join(dir, "a"))) path2 = os.fsencode(os.path.abspath(os.path.join(dir, "b"))) - with open(path1, "w") as f: + with open(path1, "w"): os.link(path1, path2) - with open(path2, "r") as f: + with open(path2, "r"): inode = path_lookup(self.prog, path1).dentry.d_inode paths = list(inode_paths(inode)) self.assertEqual(len(paths), 2) diff --git a/tests/libdrgn.py b/tests/libdrgn.py index 900ecccae..bdf3b6932 100644 --- a/tests/libdrgn.py +++ b/tests/libdrgn.py @@ -7,7 +7,6 @@ import os import _drgn -import drgn _drgn_pydll = ctypes.PyDLL(_drgn.__file__) _drgn_cdll = ctypes.CDLL(_drgn.__file__) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index cffc8d7f4..d266bd55f 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -6,7 +6,6 @@ import os.path import re import tempfile -import unittest import drgn from drgn import ( @@ -1025,22 +1024,25 @@ def test_filename(self): ), ] - point_type = lambda prog: prog.struct_type( - "point", - 8, - ( - TypeMember(prog.int_type("int", 4, True), "x"), - TypeMember(prog.int_type("int", 4, True), "y", 32), - ), - ) - other_point_type = lambda prog: prog.struct_type( - "point", - 8, - ( - TypeMember(prog.int_type("int", 4, True), "a"), - TypeMember(prog.int_type("int", 4, True), "b", 32), - ), - ) + def point_type(prog): + return prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + + def other_point_type(prog): + return prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "a"), + TypeMember(prog.int_type("int", 4, True), "b", 32), + ), + ) prog = dwarf_program(dies) for dir in ["", "src", "usr/src", "/usr/src"]: diff --git a/tests/test_object.py b/tests/test_object.py index 4ed895da6..811d78388 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -11,7 +11,6 @@ ObjectAbsentError, OutOfBoundsError, Qualifiers, - Type, TypeMember, cast, reinterpret, diff --git a/tests/test_type.py b/tests/test_type.py index 7ac393e8c..435db974d 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1,8 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -import operator - from drgn import ( Language, Object, diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 177494050..0c51044c2 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -2,13 +2,7 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -DESCRIPTION = """ -drgn script to list BPF programs or maps and their properties -unavailable via kernel API. -""" - import argparse -import sys from drgn.helpers import enum_type_to_class from drgn.helpers.linux import bpf_map_for_each, bpf_prog_for_each, hlist_for_each_entry @@ -116,7 +110,7 @@ def list_bpf_maps(args): def main(): parser = argparse.ArgumentParser( - description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter + description="drgn script to list BPF programs or maps and their properties unavailable via kernel API" ) subparsers = parser.add_subparsers(title="subcommands", dest="subcommand") diff --git a/vmtest/download.py b/vmtest/download.py index 8565d7619..b5abf1f5a 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -27,7 +27,7 @@ def available_kernel_releases( github_release: Dict[str, Any], arch: str ) -> Dict[str, Dict[str, Any]]: - pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + "\.tar\.zst") + pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + r"\.tar\.zst") releases = {} for asset in github_release["assets"]: match = pattern.fullmatch(asset["name"]) diff --git a/vmtest/manage.py b/vmtest/manage.py index daddf592b..0bbff359f 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -87,7 +87,7 @@ async def fetch_kernel_tags(kernel_dir: Path, kernel_tags: Sequence[str]) -> Non mainline_tags = [] stable_tags = [] for tag in kernel_tags: - if re.fullmatch("v[0-9]+\.[0-9]+\.[0-9]+", tag): + if re.fullmatch(r"v[0-9]+\.[0-9]+\.[0-9]+", tag): stable_tags.append(tag) else: mainline_tags.append(tag) diff --git a/vmtest/vm.py b/vmtest/vm.py index c3fe15c78..1d9971c1b 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -162,7 +162,7 @@ class LostVMError(Exception): def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: match = re.search( - "QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", + r"QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", subprocess.check_output( ["qemu-system-x86_64", "-version"], universal_newlines=True ), @@ -238,7 +238,7 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: # fmt: on ], env=env, - ) as qemu: + ): server_sock.settimeout(5) try: sock = server_sock.accept()[0] From 980d1c64188211b7b4ca57c440637aa345a15921 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 11 Aug 2021 17:19:16 -0700 Subject: [PATCH 022/139] helpers: fix annotation for type of entry helpers All of these take a type which can also be a drgn.Type, not just a str. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/list.py | 10 +++++++--- drgn/helpers/linux/list_nulls.py | 6 +++--- drgn/helpers/linux/rbtree.py | 8 ++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index b72d1e0e0..6e557b267 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -156,7 +156,9 @@ def list_for_each_reverse(head: Object) -> Iterator[Object]: pos = pos.prev.read_() -def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: +def list_for_each_entry( + type: Union[str, Type], head: Object, member: str +) -> Iterator[Object]: """ Iterate over all of the entries in a list. @@ -170,7 +172,7 @@ def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object def list_for_each_entry_reverse( - type: str, head: Object, member: str + type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a list in reverse order. @@ -206,7 +208,9 @@ def hlist_for_each(head: Object) -> Iterator[Object]: pos = pos.next.read_() -def hlist_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: +def hlist_for_each_entry( + type: Union[str, Type], head: Object, member: str +) -> Iterator[Object]: """ Iterate over all of the entries in a hash list. diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index f007c68c8..a3b9536bb 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -11,9 +11,9 @@ list is not a ``NULL`` pointer, but a "nulls" marker. """ -from typing import Iterator +from typing import Iterator, Union -from drgn import Object, container_of +from drgn import Object, Type, container_of __all__ = ( "hlist_nulls_empty", @@ -41,7 +41,7 @@ def hlist_nulls_empty(head: Object) -> bool: def hlist_nulls_for_each_entry( - type: str, head: Object, member: str + type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all the entries in a nulls hash list. diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index c81e2bb96..32d025674 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -9,9 +9,9 @@ red-black trees from :linux:`include/linux/rbtree.h`. """ -from typing import Callable, Iterator, TypeVar +from typing import Callable, Iterator, TypeVar, Union -from drgn import NULL, Object, container_of +from drgn import NULL, Object, Type, container_of __all__ = ( "RB_EMPTY_NODE", @@ -158,7 +158,7 @@ def aux(node: Object) -> Iterator[Object]: def rbtree_inorder_for_each_entry( - type: str, root: Object, member: str + type: Union[str, Type], root: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a red-black tree in sorted order. @@ -176,7 +176,7 @@ def rbtree_inorder_for_each_entry( def rb_find( - type: str, + type: Union[str, Type], root: Object, member: str, key: KeyType, From 557b8152cca01006c26063374e4a4b9f27e2bc9f Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 12 Aug 2021 07:27:31 +0800 Subject: [PATCH 023/139] helpers: Add netdev_get_by_name() Add a helper to get the network device ("struct net_device *") given an interface name. As an example: >>> netdev = netdev_get_by_name(prog["init_net"], "lo") >>> netdev.ifindex.value_() 1 Or pass a "Program" as the first argument, and let the helper find in the initial network namespace (i.e. "init_net"): >>> netdev = netdev_get_by_index(prog, "dummy0") >>> netdev.ifindex.value_() 2 Also add a test for this new helper to tests/helpers/linux/test_net.py. This helper simply does a linear search over the name hash table of the network namespace, since implementing hashing in drgn is non-trivial. It is obviously slower than net/core/dev.c:netdev_name_node_lookup() in the kernel, but still useful. Linux kernel commit ff92741270bf ("net: introduce name_node struct to be used in hashlist") introduced struct netdev_name_node for name lookups. Start by assuming that the kernel has this commit, and fall back to the old path if that fails. Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 42 +++++++++++++++++++++++++++++++++ tests/helpers/linux/test_net.py | 7 +++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 3b23694df..fbfed2f5b 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -18,6 +18,7 @@ __all__ = ( "netdev_get_by_index", + "netdev_get_by_name", "sk_fullsock", "sk_nulls_for_each", ) @@ -53,6 +54,47 @@ def netdev_get_by_index( return NULL(prog_or_net.prog_, "struct net_device *") +def netdev_get_by_name( + prog_or_net: Union[Program, Object], name: Union[str, bytes] +) -> Object: + """ + Get the network device with the given interface name. + + :param prog_or_net: ``struct net *`` containing the device, or + :class:`Program` to use the initial network namespace. + :param name: Network interface name. + :return: ``struct net_device *`` (``NULL`` if not found) + """ + if isinstance(prog_or_net, Program): + prog_or_net = prog_or_net["init_net"] + if isinstance(name, str): + name = name.encode() + + # Since Linux kernel commit ff92741270bf ("net: introduce name_node struct + # to be used in hashlist") (in v5.5), the device name hash table contains + # struct netdev_name_node entries. Before that, it contained the struct + # net_device directly. + try: + entry_type = prog_or_net.prog_.type("struct netdev_name_node") + member = "hlist" + entry_is_name_node = True + except LookupError: + entry_type = prog_or_net.prog_.type("struct net_device") + member = "name_hlist" + entry_is_name_node = False + + for i in range(_NETDEV_HASHENTRIES): + head = prog_or_net.dev_name_head[i] + for entry in hlist_for_each_entry(entry_type, head, member): + if entry.name.string_() == name: + if entry_is_name_node: + return entry.dev + else: + return entry + + return NULL(prog_or_net.prog_, "struct net_device *") + + def sk_fullsock(sk: Object) -> bool: """ Check whether a socket is a full socket, i.e., not a time-wait or request diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index 50908c437..c15a20b92 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -6,7 +6,7 @@ from drgn import cast from drgn.helpers.linux.fs import fget -from drgn.helpers.linux.net import netdev_get_by_index, sk_fullsock +from drgn.helpers.linux.net import netdev_get_by_index, netdev_get_by_name, sk_fullsock from drgn.helpers.linux.pid import find_task from tests.helpers.linux import LinuxHelperTestCase, create_socket @@ -22,3 +22,8 @@ def test_netdev_get_by_index(self): for index, name in socket.if_nameindex(): netdev = netdev_get_by_index(self.prog, index) self.assertEqual(netdev.name.string_().decode(), name) + + def test_netdev_get_by_name(self): + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_name(self.prog, name) + self.assertEqual(netdev.ifindex, index) From 861c61eda0e51ba574ed63236cfc32062ac1336b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 12 Aug 2021 11:53:22 -0700 Subject: [PATCH 024/139] Update elfutils in manylinux wheels Use the latest version of elfutils (0.185) and apply the fix "libdwfl: fix potential NULL pointer dereference when reading link map" since that hasn't been released yet and is needed to avoid crashing when debugging userspace core dumps. Signed-off-by: Omar Sandoval --- scripts/build_manylinux_in_docker.sh | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index dc556945a..c742fa5b4 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -20,11 +20,28 @@ ln -s /usr/share/aclocal/pkg.m4 /usr/local/share/aclocal/ # Install a recent version of elfutils instead of whatever is in the manylinux # image. -elfutils_version=0.183 +elfutils_version=0.185 elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 mkdir /tmp/elfutils cd /tmp/elfutils curl -L "$elfutils_url" | tar -xj --strip-components=1 +# Apply "libdwfl: fix potential NULL pointer dereference when reading link map" +# manually since it isn't in a release yet. +patch -p1 << "EOF" +diff --git a/libdwfl/link_map.c b/libdwfl/link_map.c +index 0d8d1c17..1e7d4502 100644 +--- a/libdwfl/link_map.c ++++ b/libdwfl/link_map.c +@@ -254,7 +254,7 @@ read_addrs (struct memory_closure *closure, + Dwfl *dwfl = closure->dwfl; + + /* Read a new buffer if the old one doesn't cover these words. */ +- if (buffer == NULL ++ if (*buffer == NULL + || vaddr < *read_vaddr + || vaddr - (*read_vaddr) + nb > *buffer_available) + { +EOF # We don't bother with debuginfod support for a few reasons: # # 1. It depends on libcurl, which would pull in a bunch of transitive From ec3cb15bad52427d9fddaade46c57e0e918a42a9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 12 Aug 2021 11:57:07 -0700 Subject: [PATCH 025/139] drgn 0.0.14 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 88378aefe..17b466570 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Facebook, Inc. and its affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.13], +AC_INIT([libdrgn], [0.0.14], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) From ad2119aaa3a08896a94c0ae0ad5c12a3af96197a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 12 Aug 2021 14:39:29 -0700 Subject: [PATCH 026/139] Tell pytest not to match classes/functions starting with "test" I run tests with setup.py or with the unittest module, but Fedora uses pytest. pytest assumes that any class or function starting with "test" is a test case, which is not always the case (e.g., drgn.helpers.linux.bitops.test_bit()). We've hit this at least twice, in #94 and #112. All of our tests are unittest.TestCase cases, so we can tell pytest to not match anything else. I'm using pytest.ini instead of pyproject.toml because pytest only started supporting the latter relatively recently. Closes #112. Signed-off-by: Omar Sandoval --- pytest.ini | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..976c84825 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +python_classes = +python_functions = From 611e4d90b229da7afb5d0524c5d42751884632b5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 13 Aug 2021 17:12:18 -0700 Subject: [PATCH 027/139] libdrgn: debug_info: support DWARF 3 forms for loclistptr DWARF 3 uses DW_FORM_data4 or DW_FORM_data8 for DW_AT_location loclistptrs. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 556df8193..dfd49d13f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1862,7 +1862,12 @@ drgn_dwarf_location(struct drgn_debug_info_module *module, { struct drgn_error *err; switch (attr->form) { + /* DWARF 3 */ + case DW_FORM_data4: + case DW_FORM_data8: + /* DWARF 4-5 */ case DW_FORM_sec_offset: + /* DWARF 5 */ case DW_FORM_loclistx: { Dwarf_Die cu_die; Dwarf_Half cu_version; From 8b4532ca0aa74ab0ea7472b513aec24cd6140b40 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 11:25:33 -0700 Subject: [PATCH 028/139] libdrgn: debug_info: improve handling of DW_AT_data_member_location There are a couple of issues with how we interpret DW_AT_data_member_location: 1. DW_AT_data_member_location can be a location list, and we shouldn't interpret the section offset as the member offset. 2. DW_AT_data_member_location can be location description block, and in DWARF 2, it cannot be a constant. We should handle constant offset expressions as generated by GCC and Clang. Closes #13. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 102 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 6 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index dfd49d13f..f0a8c560f 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -3510,6 +3510,98 @@ drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) return NULL; } +static inline bool drgn_dwarf_attribute_is_block(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + case DW_FORM_block: + return true; + default: + return false; + } +} + +static inline bool drgn_dwarf_attribute_is_ptr(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_sec_offset: + return true; + case DW_FORM_data4: + case DW_FORM_data8: { + /* + * dwarf_cu_die() always returns the DIE. We should use + * dwarf_cu_info(), but that requires elfutils >= 0.171. + */ + Dwarf_Die unused; + Dwarf_Half cu_version; + dwarf_cu_die(attr->cu, &unused, &cu_version, NULL, NULL, NULL, + NULL, NULL); + return cu_version <= 3; + } + default: + return false; + } +} + +static struct drgn_error *invalid_data_member_location(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_member_location"); +} + +static struct drgn_error * +drgn_parse_dwarf_data_member_location(Dwarf_Attribute *attr, uint64_t *ret) +{ + struct drgn_error *err; + + if (drgn_dwarf_attribute_is_block(attr)) { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + /* + * In DWARF 2, DW_AT_data_member_location is always a location + * description. We can translate a DW_OP_plus_uconst expression + * into a constant offset; other expressions aren't supported + * yet. + */ + struct binary_buffer bb; + /* + * Right now we only parse u8 and ULEB128, so the byte order + * doesn't matter. + */ + binary_buffer_init(&bb, block.data, block.length, + HOST_LITTLE_ENDIAN, + invalid_data_member_location); + uint8_t opcode; + err = binary_buffer_next_u8(&bb, &opcode); + if (err) + return err; + if (opcode != DW_OP_plus_uconst) { +unsupported: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has unsupported DW_AT_data_member_location"); + } + err = binary_buffer_next_uleb128(&bb, ret); + if (err) + return err; + if (binary_buffer_has_next(&bb)) + goto unsupported; + } else if (drgn_dwarf_attribute_is_ptr(attr)) { + goto unsupported; + } else { + + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) + return invalid_data_member_location(NULL, NULL, NULL); + *ret = word; + } + return NULL; +} + static struct drgn_error * parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, bool little_endian, uint64_t *ret) @@ -3540,12 +3632,10 @@ parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, */ attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); if (attr) { - Dwarf_Word byte_offset; - if (dwarf_formudata(attr, &byte_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_member_location"); - } - *ret = 8 * byte_offset; + err = drgn_parse_dwarf_data_member_location(attr, ret); + if (err) + return err; + *ret *= 8; } else { *ret = 0; } From 333652dba3e02fb50082666b120cb075f91843e9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 12:43:14 -0700 Subject: [PATCH 029/139] vmtest: fix deprecated QEMU option warnings QEMU 6.0 deprecated boolean options without an explicit =on or =off (https://wiki.qemu.org/ChangeLog/6.0#New_deprecated_options_and_features). Change readonly to readonly=on. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vmtest/vm.py b/vmtest/vm.py index 1d9971c1b..b0680db36 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -222,10 +222,10 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-no-reboot", "-virtfs", - f"local,id=root,path=/,mount_tag=/dev/root,security_model=none,readonly{multidevs}", + f"local,id=root,path=/,mount_tag=/dev/root,security_model=none,readonly=on{multidevs}", "-virtfs", - f"local,path={kernel_dir},mount_tag=modules,security_model=none,readonly", + f"local,path={kernel_dir},mount_tag=modules,security_model=none,readonly=on", "-device", "virtio-serial", "-chardev", f"socket,id=vmtest,path={socket_path}", From 5977dcc1e8b75e412e8513769fefcc2817f495ab Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 12:44:00 -0700 Subject: [PATCH 030/139] vmtest: use larger msize for 9pfs mounts QEMU warns about the default 8k msize (https://wiki.qemu.org/Documentation/9psetup#msize). I wasn't able to measure any performance difference, but bump it to 1MiB to silence the warning. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/vmtest/vm.py b/vmtest/vm.py index b0680db36..93d629255 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -14,6 +14,8 @@ from util import nproc, out_of_date +_9PFS_MSIZE = 1024 * 1024 + # Script run as init in the virtual machine. This only depends on busybox. We # don't assume that any regular commands are built in (not even echo or test), # so we always explicitly run busybox. @@ -65,7 +67,7 @@ # Load kernel modules. "$BUSYBOX" mkdir -p "/lib/modules/$RELEASE" -"$BUSYBOX" mount -t 9p -o trans=virtio,cache=loose,ro modules "/lib/modules/$RELEASE" +"$BUSYBOX" mount -t 9p -o trans=virtio,cache=loose,ro,msize={_9PFS_MSIZE} modules "/lib/modules/$RELEASE" "$BUSYBOX" modprobe configs # Create static device nodes. @@ -204,7 +206,9 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: with open(init, "w") as init_file: init_file.write( _INIT_TEMPLATE.format( - busybox=shlex.quote(busybox), command=shlex.quote(command) + _9PFS_MSIZE=_9PFS_MSIZE, + busybox=shlex.quote(busybox), + command=shlex.quote(command), ) ) os.chmod(init, 0o755) @@ -234,7 +238,7 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-kernel", str(kernel_dir / "vmlinuz"), "-append", - f"rootfstype=9p rootflags=trans=virtio,cache=loose ro console=0,115200 panic=-1 init={init}", + f"rootfstype=9p rootflags=trans=virtio,cache=loose,msize={_9PFS_MSIZE} ro console=0,115200 panic=-1 init={init}", # fmt: on ], env=env, From cf06be1813fc0805397f3d762ccc16ec2b9f3635 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 17 Aug 2021 05:58:58 +0800 Subject: [PATCH 031/139] helpers: Add for_each_net() Add a helper to iterate over all network namespaces in the system. As an example: >>> for net in for_each_net(prog): ... if netdev_get_by_name(net, "enp0s3"): ... print(net.ipv4.sysctl_ip_early_demux.value_()) ... 1 Also add a test for this new helper to tests/helpers/linux/test_net.py. Suggested-by: Cong Wang Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 15 ++++++++++++++- tests/helpers/linux/test_net.py | 10 +++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index fbfed2f5b..89cc9c9e5 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -13,10 +13,11 @@ from typing import Iterator, Union from drgn import NULL, IntegerLike, Object, Program -from drgn.helpers.linux.list import hlist_for_each_entry +from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry __all__ = ( + "for_each_net", "netdev_get_by_index", "netdev_get_by_name", "sk_fullsock", @@ -24,6 +25,18 @@ ) +def for_each_net(prog: Program) -> Iterator[Object]: + """ + Iterate over all network namespaces in the system. + + :return: Iterator of ``struct net *`` objects. + """ + for net in list_for_each_entry( + "struct net", prog["net_namespace_list"].address_of_(), "list" + ): + yield net + + _NETDEV_HASHBITS = 8 _NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index c15a20b92..b75474953 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -6,7 +6,12 @@ from drgn import cast from drgn.helpers.linux.fs import fget -from drgn.helpers.linux.net import netdev_get_by_index, netdev_get_by_name, sk_fullsock +from drgn.helpers.linux.net import ( + for_each_net, + netdev_get_by_index, + netdev_get_by_name, + sk_fullsock, +) from drgn.helpers.linux.pid import find_task from tests.helpers.linux import LinuxHelperTestCase, create_socket @@ -27,3 +32,6 @@ def test_netdev_get_by_name(self): for index, name in socket.if_nameindex(): netdev = netdev_get_by_name(self.prog, name) self.assertEqual(netdev.ifindex, index) + + def test_for_each_net(self): + self.assertIn(self.prog["init_net"].address_of_(), for_each_net(self.prog)) From c9cb28b6491951c59c3e3e32b402e2458a2cbff2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Aug 2021 14:50:17 -0700 Subject: [PATCH 032/139] docs: set required Sphinx version for Read the Docs Read the Docs defaults to Sphinx 1.8.5. This version was released in 2019 and doesn't know about the :classmethod: option, so the documentation for Object.from_bytes_() is missing from drgn.readthedocs.io. Set the required version to the current latest version as recommended by Read the Docs: https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html#pinning-dependencies. Signed-off-by: Omar Sandoval --- .readthedocs.yml | 3 +++ docs/requirements.txt | 1 + 2 files changed, 4 insertions(+) create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml index 4e368ddf7..fe579b18e 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,3 +1,6 @@ version: 2 sphinx: configuration: docs/conf.py +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..f2bba1b52 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx==4.1.2 From 242d1484f90f0f662542c7c1d6a2b3c03b6b0a06 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 20 Aug 2021 05:21:43 +0800 Subject: [PATCH 033/139] helpers: Add get_net_ns_by_{inode,fd}() Add a helper, get_net_ns_by_inode(), to get the network namespace ("netns") descriptor (struct net *) given an netns NSFS pseudo-file inode (struct inode *) e.g. "/proc/$PID/ns/net" or "/run/netns/$NAME". As an example: >>> inode = path_lookup(prog, "/run/netns/foo").dentry.d_inode >>> net = get_net_ns_by_inode(inode) >>> netdev = netdev_get_by_name(net, "eth3") >>> netdev.ifindex.value_() 5 Conventionally ip netns files can be found under "/var/run/netns/", while Docker netns files can be found under "/var/run/docker/netns". However, as pointed out by Omar, path_lookup() doesn't know how to deal with symlinks; resolve it using something like "pwd -P" before passing it to path_lookup(). Also add a get_net_ns_by_fd() wrapper around it as suggested by Omar. Example: >>> import os >>> pid = os.getpid() >>> task = find_task(prog, pid) >>> file = open(f"/proc/{pid}/ns/net") >>> net = get_net_ns_by_fd(task, file.fileno()) Add a test for get_net_ns_by_inode(). Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 41 ++++++++++++++++++++++++++++++++- tests/helpers/linux/test_net.py | 29 +++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 89cc9c9e5..8f04a516e 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -12,12 +12,15 @@ import operator from typing import Iterator, Union -from drgn import NULL, IntegerLike, Object, Program +from drgn import NULL, IntegerLike, Object, Program, cast, container_of +from drgn.helpers.linux.fs import fget from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry __all__ = ( "for_each_net", + "get_net_ns_by_inode", + "get_net_ns_by_fd", "netdev_get_by_index", "netdev_get_by_name", "sk_fullsock", @@ -37,6 +40,42 @@ def for_each_net(prog: Program) -> Iterator[Object]: yield net +_CLONE_NEWNET = 0x40000000 + + +def get_net_ns_by_inode(inode: Object) -> Object: + """ + Get a network namespace from a network namespace NSFS inode, e.g. + ``/proc/$PID/ns/net`` or ``/var/run/netns/$NAME``. + + :param inode: ``struct inode *`` + :return: ``struct net *`` + :raises ValueError: if *inode* is not a network namespace inode + """ + if inode.i_fop != inode.prog_["ns_file_operations"].address_of_(): + raise ValueError("not a namespace inode") + + ns = cast("struct ns_common *", inode.i_private) + if ns.ops.type != _CLONE_NEWNET: + raise ValueError("not a network namespace inode") + + return container_of(ns, "struct net", "ns") + + +def get_net_ns_by_fd(task: Object, fd: IntegerLike) -> Object: + """ + Get a network namespace from a task and a file descriptor referring to a + network namespace NSFS inode, e.g. ``/proc/$PID/ns/net`` or + ``/var/run/netns/$NAME``. + + :param task: ``struct task_struct *`` + :param fd: File descriptor. + :return: ``struct net *`` + :raises ValueError: If *fd* does not refer to a network namespace inode + """ + return get_net_ns_by_inode(fget(task, fd).f_inode) + + _NETDEV_HASHBITS = 8 _NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index b75474953..fe8f73ecc 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -3,11 +3,13 @@ import os import socket +import tempfile from drgn import cast from drgn.helpers.linux.fs import fget from drgn.helpers.linux.net import ( for_each_net, + get_net_ns_by_fd, netdev_get_by_index, netdev_get_by_name, sk_fullsock, @@ -35,3 +37,30 @@ def test_netdev_get_by_name(self): def test_for_each_net(self): self.assertIn(self.prog["init_net"].address_of_(), for_each_net(self.prog)) + + def test_get_net_ns_by_fd(self): + pid = os.getpid() + task = find_task(self.prog, pid) + with open(f"/proc/{pid}/ns/net") as file: + net = get_net_ns_by_fd(task, file.fileno()) + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_index(net, index) + self.assertEqual(netdev.name.string_().decode(), name) + + with tempfile.TemporaryFile("rb") as file: + self.assertRaisesRegex( + ValueError, + "not a namespace inode", + get_net_ns_by_fd, + task, + file.fileno(), + ) + + with open(f"/proc/{pid}/ns/mnt") as file: + self.assertRaisesRegex( + ValueError, + "not a network namespace inode", + get_net_ns_by_fd, + task, + file.fileno(), + ) From 27906d0cf174c2b346dfb8c140f072078cef2157 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 20 Aug 2021 14:58:33 -0700 Subject: [PATCH 034/139] libdrgn: python: cast enums when wrapping with Python call The "k" format expects an unsigned long, so make sure we cast C enums to the proper type. This probably doesn't matter for x86 in practice, but it's better to be safe. Signed-off-by: Omar Sandoval --- libdrgn/python/program.c | 3 ++- libdrgn/python/type.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 453edf628..70a20f24a 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -217,7 +217,8 @@ static struct drgn_error *py_type_find_fn(enum drgn_type_kind kind, PyObject *type_obj; gstate = PyGILState_Ensure(); - kind_obj = PyObject_CallFunction(TypeKind_class, "k", kind); + kind_obj = PyObject_CallFunction(TypeKind_class, "k", + (unsigned long)kind); if (!kind_obj) { err = drgn_error_from_python(); goto out_gstate; diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index c0f6afbc0..1e5f4175b 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -60,7 +60,7 @@ static Program *DrgnType_get_prog(DrgnType *self, void *arg) static PyObject *DrgnType_get_kind(DrgnType *self) { return PyObject_CallFunction(TypeKind_class, "k", - drgn_type_kind(self->type)); + (unsigned long)drgn_type_kind(self->type)); } static PyObject *DrgnType_get_primitive(DrgnType *self) @@ -68,7 +68,7 @@ static PyObject *DrgnType_get_primitive(DrgnType *self) if (drgn_type_primitive(self->type) == DRGN_NOT_PRIMITIVE_TYPE) Py_RETURN_NONE; return PyObject_CallFunction(PrimitiveType_class, "k", - drgn_type_primitive(self->type)); + (unsigned long)drgn_type_primitive(self->type)); } static PyObject *DrgnType_get_qualifiers(DrgnType *self) From 8d383fb89a137311a6778addb95e92d77f3f61a3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 20 Aug 2021 15:02:31 -0700 Subject: [PATCH 035/139] libdrgn: fix alphabetization in gen_constants.py PlatformFlags obviously comes before PrimitiveType. Signed-off-by: Omar Sandoval --- libdrgn/build-aux/gen_constants.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 441a559c2..25d9f3f1c 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -69,8 +69,8 @@ def gen_constants(input_file, output_file): PyObject *Architecture_class; PyObject *FindObjectFlags_class; -PyObject *PrimitiveType_class; PyObject *PlatformFlags_class; +PyObject *PrimitiveType_class; PyObject *ProgramFlags_class; PyObject *Qualifiers_class; PyObject *TypeKind_class; @@ -90,18 +90,18 @@ def gen_constants(input_file, output_file): gen_constant_class( drgn_h, output_file, - "PrimitiveType", - "Enum", + "PlatformFlags", + "Flag", (), - r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)", + r"DRGN_PLATFORM_([a-zA-Z0-9_]+)(? Date: Thu, 19 Aug 2021 14:12:44 -0700 Subject: [PATCH 036/139] libdrgn: Add kind and binding fields to drgn_symbol Signed-off-by: Stephen Brennan --- libdrgn/drgn.h.in | 36 ++++++++++++++++++++++++++++++++++++ libdrgn/program.c | 9 +++------ libdrgn/symbol.c | 34 +++++++++++++++++++++++++++++++++- libdrgn/symbol.h | 10 +++++++++- 4 files changed, 81 insertions(+), 8 deletions(-) diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index ca96f3008..d161513dd 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -2566,6 +2566,36 @@ struct drgn_error *drgn_format_type(struct drgn_qualified_type qualified_type, * @{ */ +/** Symbol linkage behavior and visibility. */ +enum drgn_symbol_binding { + DRGN_SYMBOL_BINDING_UNKNOWN, + /* + * These values match the ELF STB_* definitions (offset by 1). This is + * an implementation detail; future values may not correspond 1:1 with + * ELF definitions. + */ + DRGN_SYMBOL_BINDING_LOCAL, + DRGN_SYMBOL_BINDING_GLOBAL, + DRGN_SYMBOL_BINDING_WEAK, + DRGN_SYMBOL_BINDING_UNIQUE = 11, /* STB_GNU_UNIQUE + 1 */ +}; + +/** Kind of entity represented by a symbol. */ +enum drgn_symbol_kind { + /* + * Like enum drgn_symbol_binding, these values match the ELF STT_* + * definitions, but this will not necessarily be true for future values. + */ + DRGN_SYMBOL_KIND_UNKNOWN, + DRGN_SYMBOL_KIND_OBJECT, + DRGN_SYMBOL_KIND_FUNC, + DRGN_SYMBOL_KIND_SECTION, + DRGN_SYMBOL_KIND_FILE, + DRGN_SYMBOL_KIND_COMMON, + DRGN_SYMBOL_KIND_TLS, + DRGN_SYMBOL_KIND_IFUNC = 10, /* STT_GNU_IFUNC */ +}; + /** Destroy a @ref drgn_symbol. */ void drgn_symbol_destroy(struct drgn_symbol *sym); @@ -2583,6 +2613,12 @@ uint64_t drgn_symbol_address(struct drgn_symbol *sym); /** Get the size in bytes of a @ref drgn_symbol. */ uint64_t drgn_symbol_size(struct drgn_symbol *sym); +/** Get the binding of a @ref drgn_symbol. */ +enum drgn_symbol_binding drgn_symbol_binding(struct drgn_symbol *sym); + +/** Get the kind of a @ref drgn_symbol. */ +enum drgn_symbol_kind drgn_symbol_kind(struct drgn_symbol *sym); + /** Return whether two symbols are identical. */ bool drgn_symbol_eq(struct drgn_symbol *a, struct drgn_symbol *b); diff --git a/libdrgn/program.c b/libdrgn/program.c index e4b5c6cdd..4a3d7cf86 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1110,9 +1110,7 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, &elf_sym, NULL, NULL, NULL); if (!name) return false; - ret->name = name; - ret->address = address - offset; - ret->size = elf_sym.st_size; + drgn_symbol_from_elf(name, address - offset, &elf_sym, ret); return true; } @@ -1173,9 +1171,8 @@ static int find_symbol_by_name_cb(Dwfl_Module *dwfl_module, void **userdatap, sym = malloc(sizeof(*sym)); if (sym) { - sym->name = name; - sym->address = elf_addr; - sym->size = elf_sym.st_size; + drgn_symbol_from_elf(name, elf_addr, &elf_sym, + sym); *arg->ret = sym; } else { arg->err = &drgn_enomem; diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index e9ef9a2d0..f48925391 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -5,6 +5,7 @@ #include #include +#include "drgn.h" #include "symbol.h" #include "util.h" @@ -13,6 +14,24 @@ LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) free(sym); } +void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, struct drgn_symbol *ret) +{ + ret->name = name; + ret->address = address; + ret->size = elf_sym->st_size; + int binding = GELF_ST_BIND(elf_sym->st_info); + if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) + ret->binding = binding + 1; + else + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + int type = GELF_ST_TYPE(elf_sym->st_info); + if (type <= STT_TLS || type == STT_GNU_IFUNC) + ret->kind = type; + else + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; +} + LIBDRGN_PUBLIC const char *drgn_symbol_name(struct drgn_symbol *sym) { return sym->name; @@ -28,8 +47,21 @@ LIBDRGN_PUBLIC uint64_t drgn_symbol_size(struct drgn_symbol *sym) return sym->size; } + +LIBDRGN_PUBLIC enum drgn_symbol_binding +drgn_symbol_binding(struct drgn_symbol *sym) +{ + return sym->binding; +} + +LIBDRGN_PUBLIC enum drgn_symbol_kind drgn_symbol_kind(struct drgn_symbol *sym) +{ + return sym->kind; +} + LIBDRGN_PUBLIC bool drgn_symbol_eq(struct drgn_symbol *a, struct drgn_symbol *b) { return (strcmp(a->name, b->name) == 0 && a->address == b->address && - a->size == b->size); + a->size == b->size && a->binding == b->binding && + a->kind == b->kind); } diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index 508a35ba2..b4f1d3ef2 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -4,12 +4,20 @@ #ifndef DRGN_SYMBOL_H #define DRGN_SYMBOL_H -#include +#include + +#include "drgn.h" struct drgn_symbol { const char *name; uint64_t address; uint64_t size; + enum drgn_symbol_binding binding; + enum drgn_symbol_kind kind; }; +/** Initialize a @ref drgn_symbol from an ELF symbol. */ +void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, struct drgn_symbol *ret); + #endif /* DRGN_SYMBOL_H */ From 1744d8d93c5c4df2a420684de1b2a71504c43ba4 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 19 Aug 2021 14:12:48 -0700 Subject: [PATCH 037/139] libdrgn: python: Add binding, kind to drgn.Symbol Signed-off-by: Stephen Brennan --- _drgn.pyi | 60 ++++++++++++++++++++++++++++++ docs/api_reference.rst | 2 + drgn/__init__.py | 4 ++ libdrgn/build-aux/gen_constants.py | 20 ++++++++++ libdrgn/python/drgnpy.h | 2 + libdrgn/python/symbol.c | 60 ++++++++++++++++++++++-------- 6 files changed, 132 insertions(+), 16 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 4701ee559..56a3fe6d5 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1394,6 +1394,66 @@ class Symbol: size: int """Size of this symbol in bytes.""" + binding: SymbolBinding + """Linkage behavior and visibility of this symbol.""" + + kind: SymbolKind + """Kind of entity represented by this symbol.""" + +class SymbolBinding(enum.Enum): + """ + A ``SymbolBinding`` describes the linkage behavior and visibility of a + symbol. + """ + + UNKNOWN = ... + """Unknown.""" + + LOCAL = ... + """Not visible outside of the object file containing its definition.""" + + GLOBAL = ... + """Globally visible.""" + + WEAK = ... + """Globally visible but may be overridden by a non-weak global symbol.""" + + UNIQUE = ... + """ + Globally visible even if dynamic shared object is loaded locally. See GCC's + ``-fno-gnu-unique`` `option + `_. + """ + +class SymbolKind(enum.Enum): + """ + A ``SymbolKind`` describes the kind of entity that a symbol represents. + """ + + UNKNOWN = ... + """Unknown or not defined.""" + + OBJECT = ... + """Data object (e.g., variable or array).""" + + FUNC = ... + """Function or other executable code.""" + + SECTION = ... + """Object file section.""" + + FILE = ... + """Source file.""" + + COMMON = ... + """Data object in common block.""" + + TLS = ... + """Thread-local storage entity.""" + + IFUNC = ... + """`Indirect function `_.""" + class StackTrace: """ A ``StackTrace`` is a :ref:`sequence ` of diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 15fbdd8cf..79f2190f2 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -66,6 +66,8 @@ Symbols ------- .. drgndoc:: Symbol +.. drgndoc:: SymbolBinding +.. drgndoc:: SymbolKind Stack Traces ------------ diff --git a/drgn/__init__.py b/drgn/__init__.py index 893c36dce..85bd2c713 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -67,6 +67,8 @@ StackFrame, StackTrace, Symbol, + SymbolBinding, + SymbolKind, Type, TypeEnumerator, TypeKind, @@ -110,6 +112,8 @@ "StackFrame", "StackTrace", "Symbol", + "SymbolBinding", + "SymbolKind", "Type", "TypeEnumerator", "TypeKind", diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 25d9f3f1c..4719c4cfc 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -73,6 +73,8 @@ def gen_constants(input_file, output_file): PyObject *PrimitiveType_class; PyObject *ProgramFlags_class; PyObject *Qualifiers_class; +PyObject *SymbolBinding_class; +PyObject *SymbolKind_class; PyObject *TypeKind_class; """ ) @@ -119,6 +121,22 @@ def gen_constants(input_file, output_file): [("NONE", "0")], r"DRGN_QUALIFIER_([a-zA-Z0-9_]+)", ) + gen_constant_class( + drgn_h, + output_file, + "SymbolBinding", + "Enum", + (), + "DRGN_SYMBOL_BINDING_([a-z-A-Z0-9_]+)" + ) + gen_constant_class( + drgn_h, + output_file, + "SymbolKind", + "Enum", + (), + "DRGN_SYMBOL_KIND_([a-z-A-Z0-9_]+)" + ) gen_constant_class( drgn_h, output_file, "TypeKind", "Enum", (), r"DRGN_TYPE_([a-zA-Z0-9_]+)" ) @@ -139,6 +157,8 @@ def gen_constants(input_file, output_file): add_PrimitiveType(m, enum_module) == -1 || add_ProgramFlags(m, enum_module) == -1 || add_Qualifiers(m, enum_module) == -1 || + add_SymbolBinding(m, enum_module) == -1 || + add_SymbolKind(m, enum_module) == -1 || add_TypeKind(m, enum_module) == -1) ret = -1; else diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 2094e15d8..29502538c 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -161,6 +161,8 @@ extern PyObject *PlatformFlags_class; extern PyObject *PrimitiveType_class; extern PyObject *ProgramFlags_class; extern PyObject *Qualifiers_class; +extern PyObject *SymbolBinding_class; +extern PyObject *SymbolKind_class; extern PyObject *TypeKind_class; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index 42140ea7f..f47fd8f40 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -25,22 +25,6 @@ static void Symbol_dealloc(Symbol *self) Py_TYPE(self)->tp_free((PyObject *)self); } -static PyObject *Symbol_repr(Symbol *self) -{ - PyObject *tmp, *ret; - char address[19], size[19]; - - tmp = PyUnicode_FromString(drgn_symbol_name(self->sym)); - if (!tmp) - return NULL; - sprintf(address, "0x%" PRIx64, drgn_symbol_address(self->sym)); - sprintf(size, "0x%" PRIx64, drgn_symbol_size(self->sym)); - ret = PyUnicode_FromFormat("Symbol(name=%R, address=%s, size=%s)", tmp, - address, size); - Py_DECREF(tmp); - return ret; -} - static PyObject *Symbol_richcompare(Symbol *self, PyObject *other, int op) { if (!PyObject_TypeCheck(other, &Symbol_type) || @@ -67,10 +51,54 @@ static PyObject *Symbol_get_size(Symbol *self, void *arg) return PyLong_FromUnsignedLongLong(drgn_symbol_size(self->sym)); } +static PyObject *Symbol_get_binding(Symbol *self, void *arg) +{ + return PyObject_CallFunction(SymbolBinding_class, "k", + (unsigned long)drgn_symbol_binding(self->sym)); +} + +static PyObject *Symbol_get_kind(Symbol *self, void *arg) +{ + return PyObject_CallFunction(SymbolKind_class, "k", + (unsigned long)drgn_symbol_kind(self->sym)); +} + +static PyObject *Symbol_repr(Symbol *self) +{ + PyObject *ret = NULL; + PyObject *tmp = PyUnicode_FromString(drgn_symbol_name(self->sym)); + if (!tmp) + return NULL; + + PyObject *binding = Symbol_get_binding(self, NULL); + if (!binding) + goto out_tmp; + + PyObject *kind = Symbol_get_kind(self, NULL); + if (!kind) + goto out_binding; + + char address[19], size[19]; + sprintf(address, "0x%" PRIx64, drgn_symbol_address(self->sym)); + sprintf(size, "0x%" PRIx64, drgn_symbol_size(self->sym)); + ret = PyUnicode_FromFormat("Symbol(name=%R, address=%s, size=%s, binding=%R, kind=%R)", + tmp, address, size, binding, kind); + + Py_DECREF(kind); +out_binding: + Py_DECREF(binding); +out_tmp: + Py_DECREF(tmp); + return ret; + +} + static PyGetSetDef Symbol_getset[] = { {"name", (getter)Symbol_get_name, NULL, drgn_Symbol_name_DOC}, {"address", (getter)Symbol_get_address, NULL, drgn_Symbol_address_DOC}, {"size", (getter)Symbol_get_size, NULL, drgn_Symbol_size_DOC}, + {"binding", (getter)Symbol_get_binding, NULL, drgn_Symbol_binding_DOC}, + {"kind", (getter)Symbol_get_kind, NULL, drgn_Symbol_kind_DOC}, {}, }; From 207ca0e16bc93163d039c7b27de3cbc957ad06d2 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 20 Aug 2021 16:09:06 -0700 Subject: [PATCH 038/139] tests: Add Symbol test Signed-off-by: Stephen Brennan --- tests/helpers/linux/test_symbol.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/helpers/linux/test_symbol.py diff --git a/tests/helpers/linux/test_symbol.py b/tests/helpers/linux/test_symbol.py new file mode 100644 index 000000000..a33ffb870 --- /dev/null +++ b/tests/helpers/linux/test_symbol.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, Oracle and/or its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from drgn import SymbolBinding, SymbolKind +from tests.helpers.linux import LinuxHelperTestCase + + +class TestSymbol(LinuxHelperTestCase): + def test_global_symbol(self): + symbol = self.prog.symbol("jiffies") + self.assertEqual(symbol.name, "jiffies") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) From 84f6142879713032108074ed54f1e0b4725066c7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 23 Aug 2021 16:37:44 -0700 Subject: [PATCH 039/139] libdrgn: dwarf_index: remove any_name functionality from dwarf_index_iterator This hasn't been used since commit 06960f591c57 ("libdrgn: look up primitive types on demand"). Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 74 +++++++++++-------------------------------- libdrgn/dwarf_index.h | 3 +- 2 files changed, 19 insertions(+), 58 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index e8cd85d61..666eb6583 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2683,31 +2683,16 @@ drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, if (err) return err; it->ns = ns; - if (name) { - struct string key = { - .str = name, - .len = name_len, - }; - struct hash_pair hp; - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die_map_iterator map_it; - - hp = drgn_dwarf_index_die_map_hash(&key); - it->shard = hash_pair_to_shard(hp); - shard = &ns->shards[it->shard]; - map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map, - &key, hp); - it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; - it->any_name = false; - } else { - it->index = 0; - for (it->shard = 0; it->shard < ARRAY_SIZE(ns->shards); - it->shard++) { - if (ns->shards[it->shard].dies.size) - break; - } - it->any_name = true; - } + struct string key = { + .str = name, + .len = name_len, + }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); + it->shard = hash_pair_to_shard(hp); + struct drgn_dwarf_index_shard *shard = &ns->shards[it->shard]; + struct drgn_dwarf_index_die_map_iterator map_it = + drgn_dwarf_index_die_map_search_hashed(&shard->map, &key, hp); + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; it->tags = tags; it->num_tags = num_tags; return NULL; @@ -2733,40 +2718,17 @@ drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { struct drgn_dwarf_index_namespace *ns = it->ns; struct drgn_dwarf_index_die *die; - if (it->any_name) { - for (;;) { - if (it->shard >= ARRAY_SIZE(ns->shards)) - return NULL; - - struct drgn_dwarf_index_shard *shard = - &ns->shards[it->shard]; - die = &shard->dies.data[it->index]; - - if (++it->index >= shard->dies.size) { - it->index = 0; - while (++it->shard < ARRAY_SIZE(ns->shards)) { - if (ns->shards[it->shard].dies.size) - break; - } - } - - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - break; - } - } else { - for (;;) { - if (it->index == UINT32_MAX) - return NULL; + for (;;) { + if (it->index == UINT32_MAX) + return NULL; - struct drgn_dwarf_index_shard *shard = - &ns->shards[it->shard]; - die = &shard->dies.data[it->index]; + struct drgn_dwarf_index_shard *shard = &ns->shards[it->shard]; + die = &shard->dies.data[it->index]; - it->index = die->next; + it->index = die->next; - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - break; - } + if (drgn_dwarf_index_iterator_matches_tag(it, die)) + break; } return die; } diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index ce3f65193..7551111da 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -242,7 +242,6 @@ struct drgn_dwarf_index_iterator { size_t num_tags; size_t shard; uint32_t index; - bool any_name; }; /** @@ -250,7 +249,7 @@ struct drgn_dwarf_index_iterator { * * @param[out] it DWARF index iterator to initialize. * @param[in] ns DWARF index namespace. - * @param[in] name Name of DIE to search for, or @c NULL for any name. + * @param[in] name Name of DIE to search for. * @param[in] name_len Length of @c name. * @param[in] tags List of DIE tags to search for. * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. From 4755cfac7c1ea8a91325d5197658eee71151e0cf Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 23 Aug 2021 16:52:54 -0700 Subject: [PATCH 040/139] libdrgn: dwarf_index: increment correct variable when rolling back We need to increment to the next DIE, not the next shard here. Fixes: 1c9ab2e7d168 ("libdrgn: dwarf_index: fix leak of DWARF index entries on failure") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 666eb6583..d14429a39 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2500,7 +2500,7 @@ static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) * entries must also be new, so there's no need to preserve * them. */ - for (size_t index = 0; index < shard->dies.size; i++) { + for (size_t index = 0; index < shard->dies.size; index++) { struct drgn_dwarf_index_die *die = &shard->dies.data[index]; if (die->next != UINT32_MAX && From fba5947fec221fb3dd232af8f4d6f0f954c128e9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 23 Aug 2021 17:07:26 -0700 Subject: [PATCH 041/139] libdrgn: add array_for_each() And use it in a few appropriate places. This should hopefully make it harder to make iteration mistakes like the one fixed by commit 4755cfac7c1e ("libdrgn: dwarf_index: increment correct variable when rolling back"). While we're doing this, move ARRAY_SIZE() into a new header file with array_for_each() and make it lowercase. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 1 + libdrgn/arch_x86_64.c | 3 ++- libdrgn/array.h | 42 ++++++++++++++++++++++++++++++++++++++++++ libdrgn/debug_info.c | 3 ++- libdrgn/dwarf_index.c | 20 ++++++++------------ libdrgn/language_c.c | 15 +++++++-------- libdrgn/program.c | 3 ++- libdrgn/type.c | 10 +++++----- libdrgn/util.h | 3 --- 9 files changed, 69 insertions(+), 31 deletions(-) create mode 100644 libdrgn/array.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 3eede7f47..83830cf47 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -25,6 +25,7 @@ noinst_LTLIBRARIES = libdrgnimpl.la libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ $(ARCH_INCS) \ arch_register_layout.h \ + array.h \ binary_buffer.c \ binary_buffer.h \ binary_search_tree.h \ diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index 2368b422c..00cb3a669 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -6,6 +6,7 @@ #include #include +#include "array.h" #include "drgn.h" #include "error.h" #include "linux_kernel.h" @@ -683,7 +684,7 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it, /* Find the lowest level with cached entries. */ for (level = 0; level < levels; level++) { - if (arch->index[level] < ARRAY_SIZE(arch->table[level])) + if (arch->index[level] < array_size(arch->table[level])) break; } /* For every level below that, refill the cache/return pages. */ diff --git a/libdrgn/array.h b/libdrgn/array.h new file mode 100644 index 000000000..60837d260 --- /dev/null +++ b/libdrgn/array.h @@ -0,0 +1,42 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * Helpers for C arrays. + */ + +#ifndef DRGN_ARRAY_H +#define DRGN_ARRAY_H + +#include "pp.h" +#include "util.h" + +/** @cond */ +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) + +#define array_for_each_impl(var, arr, unique_end) \ + for (typeof((arr)[0]) *var = (arr), \ + *unique_end = var + array_size(arr); \ + var < unique_end; var++) +/** @endcond */ + +/** + * Return the number of elements in an array. + * + * @hideinitializer + */ +#define array_size(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +/** + * Iterate over every element in an array. + * + * The element is declared as `element_type *var` in the scope of the loop. + * + * @hideinitializer + */ +#define array_for_each(var, arr) \ + array_for_each_impl(var, arr, PP_UNIQUE(end)) + +#endif /* DRGN_ARRAY_H */ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index f0a8c560f..8d371e4fe 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -20,6 +20,7 @@ #include #include +#include "array.h" #include "debug_info.h" #include "error.h" #include "language.h" @@ -1929,7 +1930,7 @@ drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, { uintptr_t p = (uintptr_t)ptr; int end_match = -1; - for (int i = 0; i < ARRAY_SIZE(module->scn_data); i++) { + for (int i = 0; i < array_size(module->scn_data); i++) { if (!module->scn_data[i]) continue; uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index d14429a39..b347f2211 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -9,6 +9,7 @@ #include #include +#include "array.h" #include "binary_buffer.h" #include "debug_info.h" #include "drgn.h" @@ -205,8 +206,7 @@ static void drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, struct drgn_dwarf_index *dindex) { - for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + array_for_each(shard, ns->shards) { omp_init_lock(&shard->lock); drgn_dwarf_index_die_map_init(&shard->map); drgn_dwarf_index_die_vector_init(&shard->dies); @@ -236,8 +236,7 @@ drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) { drgn_error_destroy(ns->saved_err); drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); - for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + array_for_each(shard, ns->shards) { for (size_t j = 0; j < shard->dies.size; j++) { struct drgn_dwarf_index_die *die = &shard->dies.data[j]; if (die->tag == DW_TAG_namespace) { @@ -1152,7 +1151,7 @@ static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) { struct path_hash_chunk *current_chunk = cache->current_chunk; if (cache->next_object < - ¤t_chunk->objects[ARRAY_SIZE(current_chunk->objects)]) + ¤t_chunk->objects[array_size(current_chunk->objects)]) return cache->next_object++; struct path_hash_chunk *next_chunk = current_chunk->next; if (!next_chunk) { @@ -1396,7 +1395,7 @@ static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, parent = &empty_path_hash; } else { entry_formats = dwarf4_directory_entry_formats; - entry_format_count = ARRAY_SIZE(dwarf4_directory_entry_formats); + entry_format_count = array_size(dwarf4_directory_entry_formats); path_hash = hash_path(cache, comp_dir, &empty_path_hash); if (!path_hash || !path_hash_vector_append(&cache->directories, &path_hash)) @@ -1463,7 +1462,7 @@ file_name_entries:; } } else { entry_formats = dwarf4_file_name_entry_formats; - entry_format_count = ARRAY_SIZE(dwarf4_file_name_entry_formats); + entry_format_count = array_size(dwarf4_file_name_entry_formats); uint64_vector_init(&file_name_hashes); } @@ -2472,10 +2471,7 @@ indirect_insn:; static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) { - for (size_t i = 0; i < ARRAY_SIZE(dindex->global.shards); i++) { - struct drgn_dwarf_index_shard *shard = - &dindex->global.shards[i]; - + array_for_each(shard, dindex->global.shards) { /* * Because we're deleting everything that was added since the * last update, we can just shrink the dies array to the first @@ -2557,7 +2553,7 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) .file_name_hashes = (uint64_t *)no_file_name_hashes, .num_file_names = - ARRAY_SIZE(no_file_name_hashes), + array_size(no_file_name_hashes), }; } } diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 0a29715bc..674360fb0 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -10,6 +10,7 @@ #include #include +#include "array.h" #include "bitops.h" #include "error.h" #include "hash_table.h" @@ -59,7 +60,7 @@ static struct drgn_error *c_append_qualifiers(enum drgn_qualifiers qualifiers, bool first = true; unsigned int i; - static_assert((1 << ARRAY_SIZE(qualifier_names)) - 1 == + static_assert((1 << array_size(qualifier_names)) - 1 == DRGN_ALL_QUALIFIERS, "missing C qualifier name"); for (i = 0; (1U << i) & DRGN_ALL_QUALIFIERS; i++) { @@ -2708,15 +2709,13 @@ struct drgn_error *c_integer_literal(struct drgn_object *res, uint64_t uvalue) DRGN_C_TYPE_UNSIGNED_LONG_LONG, }; struct drgn_error *err; - unsigned int bits; - struct drgn_qualified_type qualified_type; - size_t i; - bits = fls(uvalue); + unsigned int bits = fls(uvalue); + struct drgn_qualified_type qualified_type; qualified_type.qualifiers = 0; - for (i = 0; i < ARRAY_SIZE(types); i++) { + array_for_each(type, types) { err = drgn_program_find_primitive_type(drgn_object_program(res), - types[i], + *type, &qualified_type.type); if (err) return err; @@ -2860,7 +2859,7 @@ static struct drgn_error *c_integer_promotions(struct drgn_program *prog, * promotes it to the full width, but GCC does not. We implement the GCC * behavior of preserving the width. */ - if (primitive >= ARRAY_SIZE(c_integer_conversion_rank) || + if (primitive >= array_size(c_integer_conversion_rank) || type->bit_field_size) { err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, &int_type); diff --git a/libdrgn/program.c b/libdrgn/program.c index 4a3d7cf86..493388f37 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -17,6 +17,7 @@ #include #include +#include "array.h" #include "debug_info.h" #include "dwarf_index.h" #include "error.h" @@ -588,7 +589,7 @@ static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) static const uint64_t tags[] = { DW_TAG_subprogram }; err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, "main", strlen("main"), tags, - ARRAY_SIZE(tags)); + array_size(tags)); if (err) { drgn_error_destroy(err); return; diff --git a/libdrgn/type.c b/libdrgn/type.c index 25000b123..7492c42a6 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -5,6 +5,7 @@ #include #include +#include "array.h" #include "error.h" #include "hash_table.h" #include "language.h" @@ -1294,7 +1295,7 @@ struct drgn_error *drgn_error_incomplete_type(const char *format, void drgn_program_init_types(struct drgn_program *prog) { - for (size_t i = 0; i < ARRAY_SIZE(prog->void_types); i++) { + for (size_t i = 0; i < array_size(prog->void_types); i++) { struct drgn_type *type = &prog->void_types[i]; type->_private.kind = DRGN_TYPE_VOID; type->_private.is_complete = false; @@ -1439,11 +1440,10 @@ default_size_t_or_ptrdiff_t(struct drgn_program *prog, err = drgn_program_address_size(prog, &address_size); if (err) return err; - for (size_t i = 0; i < ARRAY_SIZE(integer_types[0]); i++) { - enum drgn_primitive_type integer_type = - integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + array_for_each(integer_type, + integer_types[type == DRGN_C_TYPE_PTRDIFF_T]) { struct drgn_qualified_type qualified_type; - err = drgn_program_find_primitive_type(prog, integer_type, + err = drgn_program_find_primitive_type(prog, *integer_type, &qualified_type.type); if (err) return err; diff --git a/libdrgn/util.h b/libdrgn/util.h index fd09f912b..7f4c84a11 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -85,9 +85,6 @@ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ From 77b9d3ad9819617698c5aa619ac5b73fba6921b3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 31 Aug 2021 17:43:18 -0700 Subject: [PATCH 042/139] tests: change LinuxHelperTestCase.setUp to setUpClass This already caches class variables, and it's shared across all Linux helper test cases, so it makes more sense as setUpClass. This will also allow subclasses to use cls.prog in their own setUpClass. Signed-off-by: Omar Sandoval --- tests/helpers/linux/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index 1df529536..df590db42 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -18,10 +18,11 @@ class LinuxHelperTestCase(unittest.TestCase): prog = None skip_reason = None - def setUp(self): - # We only want to create the Program once, so it's cached as a class - # variable. If we can't run these tests for whatever reason, we also - # cache that. + @classmethod + def setUpClass(cls): + # We only want to create the Program once for all tests, so it's cached + # as a class variable (in the base class). If we can't run these tests + # for whatever reason, we also cache that. if LinuxHelperTestCase.prog is not None: return if LinuxHelperTestCase.skip_reason is None: @@ -37,7 +38,7 @@ def setUp(self): elif not force_run and os.geteuid() != 0: LinuxHelperTestCase.skip_reason = ( "Linux helper tests must be run as root " - "(run with env DRGN_RUN_LINUX_HELPER_TESTS=1 to force" + "(run with env DRGN_RUN_LINUX_HELPER_TESTS=1 to force)" ) else: # Some of the tests use the loop module. Open loop-control so @@ -58,7 +59,7 @@ def setUp(self): if force_run: raise LinuxHelperTestCase.skip_reason = str(e) - self.skipTest(LinuxHelperTestCase.skip_reason) + raise unittest.SkipTest(LinuxHelperTestCase.skip_reason) def wait_until(fn, *args, **kwds): From 6ee7ba4cb15ef7fd20e11adb86352276ea686610 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Sep 2021 14:13:34 -0700 Subject: [PATCH 043/139] vmtest: add some Traffic Control config options We need these to test #117. Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index 2c9e4fc8c..a646c1580 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest4" +KERNEL_LOCALVERSION = "-vmtest5" def kconfig() -> str: @@ -85,6 +85,16 @@ def kconfig() -> str: # For nodemask tests. CONFIG_NUMA=y + +# For Traffic Control tests. +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_ACT=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m """ From 3c68b2521527c300d8429af50607b9b9cfb2b75d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Sep 2021 15:08:32 -0700 Subject: [PATCH 044/139] CI: install pyroute2 This is needed to test #117. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 041ab32a2..f7601e71c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: run: | sudo apt-get update sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} - pip install mypy + pip install mypy pyroute2 - name: Generate version.py run: python setup.py --version - name: Check with mypy From ce845ad340bc0a3aa2283fb64d564a9fa1b91f1e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 2 Sep 2021 09:12:19 -0700 Subject: [PATCH 045/139] vmtest: add virtio-rng and cgroup Kconfig options We're seeing some hangs waiting for entropy when running tests, so let's enable the virtio-rng module. While we're doing a rebuild, we might as well enable cgroups so that we can finally run those tests. Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index a646c1580..b02cb61d6 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest5" +KERNEL_LOCALVERSION = "-vmtest6" def kconfig() -> str: @@ -59,6 +59,8 @@ def kconfig() -> str: CONFIG_TMPFS_XATTR=y CONFIG_VIRTIO_CONSOLE=y CONFIG_VIRTIO_PCI=y +CONFIG_HW_RANDOM=m +CONFIG_HW_RANDOM_VIRTIO=m # drgn needs /proc/kcore for live debugging. CONFIG_PROC_KCORE=y @@ -79,6 +81,9 @@ def kconfig() -> str: # For block tests. CONFIG_BLK_DEV_LOOP=m +# For cgroup tests. +CONFIG_CGROUPS=y + # For kconfig tests. CONFIG_IKCONFIG=m CONFIG_IKCONFIG_PROC=y From 62efd2aab3c8091831cdb46bf632b4a21ef3c4b5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 2 Sep 2021 09:31:09 -0700 Subject: [PATCH 046/139] vmtest: use virtio-rng Now that the kernel module is enabled, let's enable the device and load the module. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vmtest/vm.py b/vmtest/vm.py index 93d629255..d77ebff9a 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -68,7 +68,9 @@ # Load kernel modules. "$BUSYBOX" mkdir -p "/lib/modules/$RELEASE" "$BUSYBOX" mount -t 9p -o trans=virtio,cache=loose,ro,msize={_9PFS_MSIZE} modules "/lib/modules/$RELEASE" -"$BUSYBOX" modprobe configs +for module in configs rng_core virtio_rng; do + "$BUSYBOX" modprobe "$module" +done # Create static device nodes. "$BUSYBOX" grep -v '^#' "/lib/modules/$RELEASE/modules.devname" | @@ -231,6 +233,8 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-virtfs", f"local,path={kernel_dir},mount_tag=modules,security_model=none,readonly=on", + "-device", "virtio-rng-pci", + "-device", "virtio-serial", "-chardev", f"socket,id=vmtest,path={socket_path}", "-device", From a01131483d2b19d0ba150bd3e15251cb26d9612e Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 24 Aug 2021 03:40:24 +0800 Subject: [PATCH 047/139] helpers: Add netdev_for_each_tx_queue() Add a helper, netdev_for_each_tx_queue(), to iterate over all TX queues of a network device. As an example: >>> eth0 = netdev_get_by_name(prog, "eth0") >>> for txq in netdev_for_each_tx_queue(eth0): ... print(txq.qdisc.ops.id.string_().decode()) ... sfq tbf prio pfifo_fast Set up `net` in setUpClass(), since now several tests use it. Also use it in test_netdev_get_by_{index,name}(), instead of assuming `init_net`. Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 12 +++++++++++ tests/helpers/linux/test_net.py | 37 +++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 8f04a516e..66db17ef0 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -21,6 +21,7 @@ "for_each_net", "get_net_ns_by_inode", "get_net_ns_by_fd", + "netdev_for_each_tx_queue", "netdev_get_by_index", "netdev_get_by_name", "sk_fullsock", @@ -76,6 +77,17 @@ def get_net_ns_by_fd(task: Object, fd: IntegerLike) -> Object: return get_net_ns_by_inode(fget(task, fd).f_inode) +def netdev_for_each_tx_queue(dev: Object) -> Iterator[Object]: + """ + Iterate over all TX queues for a network device. + + :param dev: ``struct net_device *`` + :return: Iterator of ``struct netdev_queue *`` objects. + """ + for i in range(dev.num_tx_queues): + yield dev._tx + i + + _NETDEV_HASHBITS = 8 _NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index fe8f73ecc..364b32493 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -10,6 +10,7 @@ from drgn.helpers.linux.net import ( for_each_net, get_net_ns_by_fd, + netdev_for_each_tx_queue, netdev_get_by_index, netdev_get_by_name, sk_fullsock, @@ -19,48 +20,58 @@ class TestNet(LinuxHelperTestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.pid = os.getpid() + cls.task = find_task(cls.prog, cls.pid) + with open(f"/proc/{cls.pid}/ns/net") as file: + cls.net = get_net_ns_by_fd(cls.task, file.fileno()) + def test_sk_fullsock(self): with create_socket() as sock: - file = fget(find_task(self.prog, os.getpid()), sock.fileno()) + file = fget(self.task, sock.fileno()) sk = cast("struct socket *", file.private_data).sk.read_() self.assertTrue(sk_fullsock(sk)) def test_netdev_get_by_index(self): for index, name in socket.if_nameindex(): - netdev = netdev_get_by_index(self.prog, index) + netdev = netdev_get_by_index(self.net, index) self.assertEqual(netdev.name.string_().decode(), name) def test_netdev_get_by_name(self): for index, name in socket.if_nameindex(): - netdev = netdev_get_by_name(self.prog, name) + netdev = netdev_get_by_name(self.net, name) self.assertEqual(netdev.ifindex, index) def test_for_each_net(self): self.assertIn(self.prog["init_net"].address_of_(), for_each_net(self.prog)) def test_get_net_ns_by_fd(self): - pid = os.getpid() - task = find_task(self.prog, pid) - with open(f"/proc/{pid}/ns/net") as file: - net = get_net_ns_by_fd(task, file.fileno()) - for index, name in socket.if_nameindex(): - netdev = netdev_get_by_index(net, index) - self.assertEqual(netdev.name.string_().decode(), name) + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_index(self.net, index) + self.assertEqual(netdev.name.string_().decode(), name) with tempfile.TemporaryFile("rb") as file: self.assertRaisesRegex( ValueError, "not a namespace inode", get_net_ns_by_fd, - task, + self.task, file.fileno(), ) - with open(f"/proc/{pid}/ns/mnt") as file: + with open(f"/proc/{self.pid}/ns/mnt") as file: self.assertRaisesRegex( ValueError, "not a network namespace inode", get_net_ns_by_fd, - task, + self.task, file.fileno(), ) + + def test_netdev_for_each_tx_queue(self): + for index, _ in socket.if_nameindex(): + netdev = netdev_get_by_index(self.net, index) + for queue in netdev_for_each_tx_queue(netdev): + self.assertEqual(queue.dev, netdev) From f82273749dc1adb07844c0f5ab911c3034312787 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 26 Aug 2021 06:45:09 +0800 Subject: [PATCH 048/139] helpers: Add qdisc_lookup() Add a helper, qdisc_lookup(), to get a Qdisc (struct Qdisc *) from a network device and a major handle number. As an example: >>> eth0 = netdev_get_by_name(prog, "eth0") >>> tbf = qdisc_lookup(eth0, 0x20) >>> tbf.ops.id.string_().decode() tbf >>> ingress = qdisc_lookup(eth0, 0xffff) >>> ingress.ops.id.string_().decode() ingress Testing depends on pyroute2. `TestTc` is skipped if pyroute2 is not found; test_qdisc_lookup() is skipped if the kernel is not built with the following options: CONFIG_DUMMY CONFIG_NET_SCH_PRIO CONFIG_NET_SCH_SFQ CONFIG_NET_SCH_TBF CONFIG_NET_SCH_INGRESS Suggested-by: Cong Wang Signed-off-by: Peilin Ye --- drgn/helpers/linux/tc.py | 60 +++++++++++++++++ tests/helpers/linux/test_tc.py | 114 +++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 drgn/helpers/linux/tc.py create mode 100644 tests/helpers/linux/test_tc.py diff --git a/drgn/helpers/linux/tc.py b/drgn/helpers/linux/tc.py new file mode 100644 index 000000000..42022453c --- /dev/null +++ b/drgn/helpers/linux/tc.py @@ -0,0 +1,60 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Traffic Control (TC) +-------------------- + +The ``drgn.helpers.linux.tc`` module provides helpers for working with the +Linux kernel Traffic Control (TC) subsystem. +""" + +import operator + +from drgn import NULL, IntegerLike, Object +from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry + +__all__ = ("qdisc_lookup",) + + +def qdisc_lookup(dev: Object, major: IntegerLike) -> Object: + """ + Get a Qdisc from a device and a major handle number. It is worth noting + that conventionally handles are hexadecimal, e.g. ``10:`` in a ``tc`` + command means major handle 0x10. + + :param dev: ``struct net_device *`` + :param major: Qdisc major handle number. + :return: ``struct Qdisc *`` (``NULL`` if not found) + """ + major = operator.index(major) << 16 + + roots = [dev.qdisc] + if dev.ingress_queue: + roots.append(dev.ingress_queue.qdisc_sleeping) + + # Since Linux kernel commit 59cc1f61f09c ("net: sched: convert qdisc linked + # list to hashtable") (in v4.7), a device's child Qdiscs are maintained in + # a hashtable in its struct net_device. Before that, they are maintained in + # a linked list in their root Qdisc. + use_hashtable = dev.prog_.type("struct net_device").has_member("qdisc_hash") + + for root in roots: + if root.handle == major: + return root + + if use_hashtable: + for head in root.dev_queue.dev.qdisc_hash: + for qdisc in hlist_for_each_entry( + "struct Qdisc", head.address_of_(), "hash" + ): + if qdisc.handle == major: + return qdisc + else: + for qdisc in list_for_each_entry( + "struct Qdisc", root.list.address_of_(), "list" + ): + if qdisc.handle == major: + return qdisc + + return NULL(dev.prog_, "struct Qdisc *") diff --git a/tests/helpers/linux/test_tc.py b/tests/helpers/linux/test_tc.py new file mode 100644 index 000000000..6f16205c4 --- /dev/null +++ b/tests/helpers/linux/test_tc.py @@ -0,0 +1,114 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import random +import string +import unittest + +from drgn.helpers.linux.fs import path_lookup +from drgn.helpers.linux.net import get_net_ns_by_inode, netdev_get_by_name +from drgn.helpers.linux.tc import qdisc_lookup +from tests.helpers.linux import LinuxHelperTestCase + +try: + from pyroute2 import NetNS + from pyroute2.netlink.exceptions import NetlinkError + + have_pyroute2 = True +except ImportError: + have_pyroute2 = False + + +@unittest.skipUnless(have_pyroute2, "pyroute2 not found") +class TestTc(LinuxHelperTestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.ns = None + while cls.ns is None: + try: + cls.name = "".join( + random.choice(string.ascii_letters) for _ in range(16) + ) + cls.ns = NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) + except FileExistsError: + pass + + @classmethod + def tearDownClass(cls): + cls.ns.remove() + super().tearDownClass() + + def test_qdisc_lookup(self): + try: + self.ns.link("add", ifname="dummy0", kind="dummy") + except NetlinkError: + self.skipTest("kernel does not support dummy interface (CONFIG_DUMMY)") + + dummy = self.ns.link_lookup(ifname="dummy0")[0] + + # tc qdisc add dev dummy0 root handle 1: prio + try: + self.ns.tc( + "add", + kind="prio", + index=dummy, + handle="1:", + # default TCA_OPTIONS for sch_prio, see [iproute2] tc/q_prio.c:prio_parse_opt() + bands=3, + priomap=[1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + ) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Multi Band Priority Queueing (CONFIG_NET_SCH_PRIO)" + ) + # tc qdisc add dev dummy0 parent 1:1 handle 10: sfq + try: + self.ns.tc("add", kind="sfq", index=dummy, parent="1:1", handle="10:") + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Stochastic Fairness Queueing (CONFIG_NET_SCH_SFQ)" + ) + # tc qdisc add dev dummy0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000 + try: + self.ns.tc( + "add", + kind="tbf", + index=dummy, + parent="1:2", + handle="20:", + rate=2500, + burst=1600, + limit=3000, + ) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Token Bucket Filter (CONFIG_NET_SCH_TBF)" + ) + # tc qdisc add dev dummy0 parent 1:3 handle 30: sfq + self.ns.tc("add", kind="sfq", index=dummy, parent="1:3", handle="30:") + # tc qdisc add dev dummy0 ingress + try: + self.ns.tc("add", kind="ingress", index=dummy) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support ingress Qdisc (CONFIG_NET_SCH_INGRESS)" + ) + + inode = path_lookup( + self.prog, os.path.realpath(f"/var/run/netns/{self.name}") + ).dentry.d_inode + netdev = netdev_get_by_name(get_net_ns_by_inode(inode), "dummy0") + + self.assertEqual(qdisc_lookup(netdev, 0x1).ops.id.string_(), b"prio") + self.assertEqual(qdisc_lookup(netdev, 0x10).ops.id.string_(), b"sfq") + self.assertEqual(qdisc_lookup(netdev, 0x20).ops.id.string_(), b"tbf") + self.assertEqual(qdisc_lookup(netdev, 0x30).ops.id.string_(), b"sfq") + self.assertEqual(qdisc_lookup(netdev, 0xFFFF).ops.id.string_(), b"ingress") + + self.ns.link("delete", ifname="dummy0") From 801f9d645cfefc62a83597576760ef0ec05c13b8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 2 Sep 2021 16:05:46 -0700 Subject: [PATCH 049/139] tests: improve cgroup helper tests These haven't been running in vmtest since they were added. Enable cgroup2 in vmtest and rework the cgroup tests to create cgroups that we can test with. Signed-off-by: Omar Sandoval --- tests/helpers/linux/__init__.py | 32 +++++- tests/helpers/linux/test_cgroup.py | 158 +++++++++++++++++++++++------ tests/helpers/linux/test_fs.py | 2 +- vmtest/vm.py | 2 + 4 files changed, 158 insertions(+), 36 deletions(-) diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index df590db42..f4866be85 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -118,17 +118,41 @@ def parse_range_list(s): ctypes.c_ulong, ctypes.c_void_p, ] +MS_RDONLY = 1 +MS_NOSUID = 2 +MS_NODEV = 4 +MS_NOEXEC = 8 +MS_SYNCHRONOUS = 16 +MS_REMOUNT = 32 +MS_MANDLOCK = 64 +MS_DIRSYNC = 128 +MS_NOSYMFOLLOW = 256 +MS_NOATIME = 1024 +MS_NODIRATIME = 2048 MS_BIND = 4096 - - -def mount(source, target, fstype, flags, data): +MS_MOVE = 8192 +MS_REC = 16384 +MS_SILENT = 32768 +MS_POSIXACL = 1 << 16 +MS_UNBINDABLE = 1 << 17 +MS_PRIVATE = 1 << 18 +MS_SLAVE = 1 << 19 +MS_SHARED = 1 << 20 +MS_RELATIME = 1 << 21 +MS_KERNMOUNT = 1 << 22 +MS_I_VERSION = 1 << 23 +MS_STRICTATIME = 1 << 24 +MS_LAZYTIME = 1 << 25 + + +def mount(source, target, fstype, flags=0, data=None): if ( _mount( os.fsencode(source), os.fsencode(target), fstype.encode(), flags, - data.encode(), + None if data is None else data.encode(), ) == -1 ): diff --git a/tests/helpers/linux/test_cgroup.py b/tests/helpers/linux/test_cgroup.py index fa10d226e..7324d56e6 100644 --- a/tests/helpers/linux/test_cgroup.py +++ b/tests/helpers/linux/test_cgroup.py @@ -2,57 +2,153 @@ # SPDX-License-Identifier: GPL-3.0-or-later import os +from pathlib import Path +import signal +import tempfile +import unittest +from drgn import NULL from drgn.helpers.linux.cgroup import ( cgroup_name, + cgroup_parent, cgroup_path, css_for_each_child, css_for_each_descendant_pre, ) from drgn.helpers.linux.pid import find_task -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import ( + MS_NODEV, + MS_NOEXEC, + MS_NOSUID, + LinuxHelperTestCase, + fork_and_pause, + mount, + umount, +) class TestCgroup(LinuxHelperTestCase): - def setUp(self): - super().setUp() + @classmethod + def setUpClass(cls): + # It'd be nice to just use addClassCleanup(), but that was added in + # Python 3.8. + cls.__cleanups = [] try: - with open("/proc/self/cgroup", "rb") as f: - for line in f: - if line.startswith(b"0::"): - self.cgroup = line[3:].rstrip(b"\n") - break - else: - self.skipTest("process is not using cgroup v2") - except FileNotFoundError: - self.skipTest("kernel does not support cgroup") + super().setUpClass() - def test_cgroup_name(self): - task = find_task(self.prog, os.getpid()) + # Don't enable cgroup2 on systems that aren't already using it (or + # don't support it). + cgroup2_enabled = False + try: + with open("/proc/self/cgroup", "rb") as f: + for line in f: + if line.startswith(b"0::"): + cgroup2_enabled = True + break + except FileNotFoundError: + pass + if not cgroup2_enabled: + raise unittest.SkipTest("cgroup2 not enabled") + + # It's easier to mount the cgroup2 filesystem than to find it. + cgroup2_mount = Path(tempfile.mkdtemp(prefix="drgn-tests-")) + cls.__cleanups.append((cgroup2_mount.rmdir,)) + mount("cgroup2", cgroup2_mount, "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC) + cls.__cleanups.append((umount, cgroup2_mount)) + + cls.root_cgroup = cls.prog["cgrp_dfl_root"].cgrp.address_of_() + + pid = fork_and_pause() + try: + task = find_task(cls.prog, pid) + + parent_cgroup_dir = Path( + tempfile.mkdtemp(prefix="drgn-tests-", dir=cgroup2_mount) + ) + cls.__cleanups.append((parent_cgroup_dir.rmdir,)) + cls.parent_cgroup_name = os.fsencode(parent_cgroup_dir.name) + cls.parent_cgroup_path = b"/" + cls.parent_cgroup_name + + (parent_cgroup_dir / "cgroup.procs").write_text(str(pid)) + cls.parent_cgroup = task.cgroups.dfl_cgrp.read_() + + child_cgroup_dir = parent_cgroup_dir / "child" + child_cgroup_dir.mkdir() + cls.__cleanups.append((child_cgroup_dir.rmdir,)) + cls.child_cgroup_name = os.fsencode(child_cgroup_dir.name) + cls.child_cgroup_path = ( + cls.parent_cgroup_path + b"/" + cls.child_cgroup_name + ) + + (child_cgroup_dir / "cgroup.procs").write_text(str(pid)) + cls.child_cgroup = task.cgroups.dfl_cgrp.read_() + finally: + os.kill(pid, signal.SIGKILL) + os.waitpid(pid, 0) + except: + for cleanup in reversed(cls.__cleanups): + cleanup[0](*cleanup[1:]) + raise + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + for cleanup in reversed(cls.__cleanups): + cleanup[0](*cleanup[1:]) + + def test_cgroup_parent(self): + self.assertEqual(cgroup_parent(self.child_cgroup), self.parent_cgroup) + self.assertEqual(cgroup_parent(self.parent_cgroup), self.root_cgroup) self.assertEqual( - cgroup_name(task.cgroups.dfl_cgrp), os.path.basename(self.cgroup) + cgroup_parent(self.root_cgroup), NULL(self.prog, "struct cgroup *") ) + def test_cgroup_name(self): + self.assertEqual(cgroup_name(self.root_cgroup), b"/") + self.assertEqual(cgroup_name(self.parent_cgroup), self.parent_cgroup_name) + self.assertEqual(cgroup_name(self.child_cgroup), self.child_cgroup_name) + def test_cgroup_path(self): - task = find_task(self.prog, os.getpid()) - self.assertEqual(cgroup_path(task.cgroups.dfl_cgrp), self.cgroup) + self.assertEqual(cgroup_path(self.root_cgroup), b"/") + self.assertEqual(cgroup_path(self.parent_cgroup), self.parent_cgroup_path) + self.assertEqual(cgroup_path(self.child_cgroup), self.child_cgroup_path) + + @staticmethod + def _cgroup_iter_paths(fn, cgroup): + return [cgroup_path(css.cgroup) for css in fn(cgroup.self.address_of_())] def test_css_for_each_child(self): - self.assertTrue( - any( - self.cgroup.startswith(cgroup_path(css.cgroup)) - for css in css_for_each_child( - self.prog["cgrp_dfl_root"].cgrp.self.address_of_() - ) - ) + children = self._cgroup_iter_paths(css_for_each_child, self.root_cgroup) + self.assertIn(self.parent_cgroup_path, children) + self.assertNotIn(self.child_cgroup_path, children) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_child, self.parent_cgroup), + [self.child_cgroup_path], + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_child, self.child_cgroup), [] ) def test_css_for_each_descendant_pre(self): - self.assertTrue( - any( - cgroup_path(css.cgroup) == self.cgroup - for css in css_for_each_descendant_pre( - self.prog["cgrp_dfl_root"].cgrp.self.address_of_() - ) - ) + descendants = self._cgroup_iter_paths( + css_for_each_descendant_pre, self.root_cgroup + ) + self.assertEqual(descendants[0], b"/") + self.assertIn(self.parent_cgroup_path, descendants) + self.assertIn(self.child_cgroup_path, descendants) + self.assertLess( + descendants.index(self.parent_cgroup_path), + descendants.index(self.child_cgroup_path), + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_descendant_pre, self.parent_cgroup), + [self.parent_cgroup_path, self.child_cgroup_path], + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_descendant_pre, self.child_cgroup), + [self.child_cgroup_path], ) diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index d127c3515..336a037a1 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -31,7 +31,7 @@ def test_path_lookup(self): def test_path_lookup_bind_mount(self): with tempfile.NamedTemporaryFile(prefix="drgn-tests-") as f: old_mnt = path_lookup(self.prog, os.path.abspath(f.name)).mnt - mount(f.name, f.name, "", MS_BIND, "") + mount(f.name, f.name, "", MS_BIND) try: new_mnt = path_lookup(self.prog, os.path.abspath(f.name)).mnt self.assertNotEqual(old_mnt, new_mnt) diff --git a/vmtest/vm.py b/vmtest/vm.py index d77ebff9a..62186850e 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -59,6 +59,8 @@ "$BUSYBOX" mount -t devtmpfs -o nosuid,noexec dev /dev "$BUSYBOX" mount -t proc -o nosuid,nodev,noexec proc /proc "$BUSYBOX" mount -t sysfs -o nosuid,nodev,noexec sys /sys +# cgroup2 was added in Linux v4.5. +"$BUSYBOX" mount -t cgroup2 -o nosuid,nodev,noexec cgroup2 /sys/fs/cgroup || "$BUSYBOX" true # Ideally we'd just be able to create an opaque directory for /tmp on the upper # layer. However, before Linux kernel commit 51f7e52dc943 ("ovl: share inode # for hard link") (in v4.8), overlayfs doesn't handle hard links correctly, From c64d87e41ae1d57460c99de538bc586e792510c6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Sep 2021 09:59:22 -0700 Subject: [PATCH 050/139] setup.py: add 5.15 to vmtest kernels Signed-off-by: Omar Sandoval --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1f0051fe0..d6fbfee10 100755 --- a/setup.py +++ b/setup.py @@ -126,6 +126,7 @@ class test(Command): description = "run unit tests after in-place build" KERNELS = [ + "5.15", "5.14", "5.13", "5.12", From 2baee6fe162e5dfbdd828148eb0e9111109ff414 Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Fri, 24 Sep 2021 14:36:41 -0700 Subject: [PATCH 051/139] dwarf_index.c: Shrink abbrev tables before saving them in CUs In larger binaries, there can be a large number of CUs, and since we store an abbrev table for each CU the extra space starts to add up. The simplest way to mitigate this is to shrink the vectors before saving them. On a large binary, I noticed a memory reduction from 20.4G RES to 18.6G RES (on initial load-in). --- libdrgn/dwarf_index.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index b347f2211..734fbe65b 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -838,6 +838,8 @@ static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, return err; } } + uint8_vector_shrink_to_fit(&insns); + uint32_vector_shrink_to_fit(&decls); cu->abbrev_decls = decls.data; cu->num_abbrev_decls = decls.size; cu->abbrev_insns = insns.data; From b0ae2867d51e0e47462ec1a2a96b03d327d7cf58 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Tue, 24 Aug 2021 15:42:21 +0100 Subject: [PATCH 052/139] splay_tree.c: Rename splay_tree to avoid conflicts with splay-tree.h When linking libdrgn as a static library, the name 'splay_tree' can conflict with splay-tree.h in libiberty (namely splay_tree_splay). Rename relevant functions to have a 'drgn_' prefix Signed-off-by: Jake Hillion --- libdrgn/binary_search_tree.h | 53 ++++++++++++++++++------------------ libdrgn/splay_tree.c | 26 +++++++++--------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index ba6fb4f57..dc9214aba 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -234,41 +234,41 @@ struct binary_tree_search_result { /* * Binary search tree variants need to define three functions: * - * variant##_tree_insert_fixup(root, node, parent) is called after a node is - * inserted (as *root, parent->left, or parent->right). It must set the node's - * parent pointer and rebalance the tree. + * drgn_##variant##_tree_insert_fixup(root, node, parent) is called after a node + * is inserted (as *root, parent->left, or parent->right). It must set the + * node's parent pointer and rebalance the tree. * - * variant##_tree_found(root, node) is called when a duplicate node is found for - * an insert operation or when a node is found for a search operation (but not - * for a delete operation). It may rebalance the tree or do nothing. + * drgn_##variant##_tree_found(root, node) is called when a duplicate node is + * found for an insert operation or when a node is found for a search operation + * (but not for a delete operation). It may rebalance the tree or do nothing. * - * variant##_tree_delete(root, node) must delete the node and rebalance the - * tree. + * drgn_##variant##_tree_delete(root, node) must delete the node and rebalance + * the tree. */ -void splay_tree_splay(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent); +void drgn_splay_tree_splay(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent); -static inline void splay_tree_insert_fixup(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent) +static inline void drgn_splay_tree_insert_fixup(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent) { if (parent) - splay_tree_splay(root, node, parent); + drgn_splay_tree_splay(root, node, parent); else node->parent = NULL; } -static inline void splay_tree_found(struct binary_tree_node **root, - struct binary_tree_node *node) +static inline void drgn_splay_tree_found(struct binary_tree_node **root, + struct binary_tree_node *node) { if (node->parent) - splay_tree_splay(root, node, node->parent); + drgn_splay_tree_splay(root, node, node->parent); } -void splay_tree_delete(struct binary_tree_node **root, - struct binary_tree_node *node); +void drgn_splay_tree_delete(struct binary_tree_node **root, + struct binary_tree_node *node); /** * Define a binary search tree type without defining its functions. @@ -372,14 +372,14 @@ static int tree##_insert(struct tree *tree, tree##_entry_type *entry, \ if (*res.nodep) { \ if (it_ret) \ it_ret->entry = tree##_node_to_entry(*res.nodep); \ - variant##_tree_found(&tree->root, *res.nodep); \ + drgn_##variant##_tree_found(&tree->root, *res.nodep); \ return 0; \ } \ \ node = tree##_entry_to_node(entry); \ node->left = node->right = NULL; \ *res.nodep = node; \ - variant##_tree_insert_fixup(&tree->root, node, res.parent); \ + drgn_##variant##_tree_insert_fixup(&tree->root, node, res.parent); \ return 1; \ } \ \ @@ -392,7 +392,7 @@ static struct tree##_iterator tree##_search(struct tree *tree, \ node = *tree##_search_internal(tree, key).nodep; \ if (!node) \ return (struct tree##_iterator){}; \ - variant##_tree_found(&tree->root, node); \ + drgn_##variant##_tree_found(&tree->root, node); \ return (struct tree##_iterator){ tree##_node_to_entry(node), }; \ } \ \ @@ -422,7 +422,8 @@ static struct tree##_iterator tree##_search_le(struct tree *tree, \ } \ } \ if (entry) \ - variant##_tree_found(&tree->root, tree##_entry_to_node(entry)); \ + drgn_##variant##_tree_found(&tree->root, \ + tree##_entry_to_node(entry)); \ return (struct tree##_iterator){ entry, }; \ } \ \ @@ -434,7 +435,7 @@ static bool tree##_delete(struct tree *tree, const tree##_key_type *key) \ node = *tree##_search_internal(tree, key).nodep; \ if (!node) \ return false; \ - variant##_tree_delete(&tree->root, node); \ + drgn_##variant##_tree_delete(&tree->root, node); \ return true; \ } \ \ @@ -486,7 +487,7 @@ tree##_delete_iterator(struct tree *tree, struct tree##_iterator it) \ \ node = tree##_entry_to_node(it.entry); \ it = tree##_next_impl(it); \ - variant##_tree_delete(&tree->root, node); \ + drgn_##variant##_tree_delete(&tree->root, node); \ return it; \ } \ \ diff --git a/libdrgn/splay_tree.c b/libdrgn/splay_tree.c index a0bfa6964..6021db001 100644 --- a/libdrgn/splay_tree.c +++ b/libdrgn/splay_tree.c @@ -15,9 +15,9 @@ * 1: "Self-Adjusting Binary Search Trees" (Sleator & Tarjan, 1985): * http://www.cs.cmu.edu/~sleator/papers/self-adjusting.pdf */ -void splay_tree_splay(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent) +void drgn_splay_tree_splay(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent) { for (;;) { struct binary_tree_node *grandparent, *great_grandparent; @@ -146,9 +146,9 @@ void splay_tree_splay(struct binary_tree_node **root, node->parent = NULL; } -static inline void transplant(struct binary_tree_node **root, - struct binary_tree_node *old, - struct binary_tree_node *new) +static inline void drgn_splay_tree_transplant(struct binary_tree_node **root, + struct binary_tree_node *old, + struct binary_tree_node *new) { if (!old->parent) *root = new; @@ -160,13 +160,13 @@ static inline void transplant(struct binary_tree_node **root, new->parent = old->parent; } -void splay_tree_delete(struct binary_tree_node **root, - struct binary_tree_node *node) +void drgn_splay_tree_delete(struct binary_tree_node **root, + struct binary_tree_node *node) { if (node->left == NULL) { - transplant(root, node, node->right); + drgn_splay_tree_transplant(root, node, node->right); } else if (node->right == NULL) { - transplant(root, node, node->left); + drgn_splay_tree_transplant(root, node, node->left); } else { struct binary_tree_node *successor; @@ -175,14 +175,14 @@ void splay_tree_delete(struct binary_tree_node **root, do { successor = successor->left; } while (successor->left); - transplant(root, successor, successor->right); + drgn_splay_tree_transplant(root, successor, successor->right); successor->right = node->right; successor->right->parent = successor; } - transplant(root, node, successor); + drgn_splay_tree_transplant(root, node, successor); successor->left = node->left; successor->left->parent = successor; } if (node->parent && node->parent->parent) - splay_tree_splay(root, node->parent, node->parent->parent); + drgn_splay_tree_splay(root, node->parent, node->parent->parent); } From b4a82c30abf9abec00b667b10c829b1cd4b04a47 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 28 Sep 2021 12:32:15 -0700 Subject: [PATCH 053/139] Add GitHub action to check for DCO sign-off on pull requests Signed-off-by: Omar Sandoval --- .github/workflows/dco-check.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/dco-check.yml diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml new file mode 100644 index 000000000..0b6bcf8ec --- /dev/null +++ b/.github/workflows/dco-check.yml @@ -0,0 +1,24 @@ +name: DCO Check + +on: pull_request + +jobs: + check: + runs-on: ubuntu-latest + steps: + - name: Checkout commit logs + run: | + git init + git fetch --filter=blob:none "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" "$GITHUB_BASE_REF" "$GITHUB_REF" + - name: Check for DCO sign-offs + run: | + no_sign_off="$(git log --no-merges --grep=Signed-off-by --invert-grep "FETCH_HEAD..$GITHUB_SHA")" + if [ -z "$no_sign_off" ]; then + echo "All commits have a Developer Certificate of Origin sign-off" + else + echo "The following commits are missing a Developer Certificate of Origin sign-off;" + echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" + echo + echo "$no_sign_off" + exit 1 + fi From 734cbe5c7b787fea7b9f719ff7526a6e99fd5241 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Aug 2021 13:57:28 -0700 Subject: [PATCH 054/139] libdrgn: dwarf_index: free pending DIEs after indexing namespace Once we've cleared the pending DIEs vector, we won't use the vector again (unless we load more debugging information), so we can free it. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 734fbe65b..bc200ac40 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2668,6 +2668,7 @@ static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) return drgn_error_copy(ns->saved_err); } ns->pending_dies.size = 0; + drgn_dwarf_index_pending_die_vector_shrink_to_fit(&ns->pending_dies); return err; } From 1d4dbc2b693b6608a75086ebcc4c8a2f4d69ae97 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 12 Oct 2021 18:01:42 -0700 Subject: [PATCH 055/139] libdrgn: python: remove unused declaration drgnpy_linux_helper_task_state_to_char() was removed by commit ff96c75da05c ("helpers: translate task_state_to_char() to Python"), but I left behind the declaration. Signed-off-by: Omar Sandoval --- libdrgn/python/drgnpy.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 29502538c..ad5218aee 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -293,8 +293,6 @@ DrgnObject *drgnpy_linux_helper_pid_task(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *drgnpy_linux_helper_find_task(PyObject *self, PyObject *args, PyObject *kwds); -PyObject *drgnpy_linux_helper_task_state_to_char(PyObject *self, PyObject *args, - PyObject *kwds); PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *args, PyObject *kwds); PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *args, From c1e16ae3ec29de4c1db458be1ed8cf267d345225 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 23 Oct 2021 00:40:57 -0700 Subject: [PATCH 056/139] libdrgn: fold drgn_program_get_dbinfo() into only caller The only time that we want to create the drgn_debug_info is when we're loading debugging information. Everywhere else, we fail fast if there is no debugging information. Signed-off-by: Omar Sandoval --- libdrgn/program.c | 57 ++++++++++++++++++----------------------------- libdrgn/program.h | 3 --- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/libdrgn/program.c b/libdrgn/program.c index 493388f37..5069bda05 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -550,37 +550,6 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, - struct drgn_debug_info **ret) -{ - struct drgn_error *err; - - if (!prog->_dbinfo) { - struct drgn_debug_info *dbinfo; - err = drgn_debug_info_create(prog, &dbinfo); - if (err) - return err; - err = drgn_program_add_object_finder(prog, - drgn_debug_info_find_object, - dbinfo); - if (err) { - drgn_debug_info_destroy(dbinfo); - return err; - } - err = drgn_program_add_type_finder(prog, - drgn_debug_info_find_type, - dbinfo); - if (err) { - drgn_object_index_remove_finder(&prog->oindex); - drgn_debug_info_destroy(dbinfo); - return err; - } - prog->_dbinfo = dbinfo; - } - *ret = prog->_dbinfo; - return NULL; -} - /* Set the default language from the language of "main". */ static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) { @@ -645,10 +614,28 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, if (!n && !load_default && !load_main) return NULL; - struct drgn_debug_info *dbinfo; - err = drgn_program_get_dbinfo(prog, &dbinfo); - if (err) - return err; + struct drgn_debug_info *dbinfo = prog->_dbinfo; + if (!dbinfo) { + err = drgn_debug_info_create(prog, &dbinfo); + if (err) + return err; + err = drgn_program_add_object_finder(prog, + drgn_debug_info_find_object, + dbinfo); + if (err) { + drgn_debug_info_destroy(dbinfo); + return err; + } + err = drgn_program_add_type_finder(prog, + drgn_debug_info_find_type, + dbinfo); + if (err) { + drgn_object_index_remove_finder(&prog->oindex); + drgn_debug_info_destroy(dbinfo); + return err; + } + prog->_dbinfo = dbinfo; + } err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { diff --git a/libdrgn/program.h b/libdrgn/program.h index e58434d80..a6fb8dca7 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -257,9 +257,6 @@ drgn_program_address_mask(const struct drgn_program *prog, uint64_t *ret) return NULL; } -struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, - struct drgn_debug_info **ret); - /** * Find the @c NT_PRSTATUS note for the given CPU. * From 802d6cc9ff99b801e5a5fb75f236e47baefbb0b6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 23 Oct 2021 00:50:39 -0700 Subject: [PATCH 057/139] libdrgn: rename drgn_program::_dbinfo to dbinfo The underscore was meant to discourage direct access in favor of using drgn_program_get_dbinfo(), but it turns out that it's more normal to access it directly. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 8 ++++---- libdrgn/program.c | 16 ++++++++-------- libdrgn/program.h | 2 +- libdrgn/register_state.c | 4 ++-- libdrgn/stack_trace.c | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 8d371e4fe..e5f193a82 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -3480,7 +3480,7 @@ drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) struct drgn_dwarf_member_thunk_arg *arg = arg_; if (res) { struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, arg->module, &arg->die, NULL, false, arg->can_be_incomplete_array, @@ -3764,7 +3764,7 @@ drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) struct drgn_dwarf_die_thunk_arg *arg = arg_; if (res) { struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, arg->module, &arg->die, NULL, true, true, NULL, &qualified_type); @@ -3786,7 +3786,7 @@ drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, struct drgn_error *err; struct drgn_dwarf_die_thunk_arg *arg = arg_; if (res) { - err = drgn_object_from_dwarf(drgn_object_program(res)->_dbinfo, + err = drgn_object_from_dwarf(drgn_object_program(res)->dbinfo, arg->module, &arg->die, NULL, NULL, NULL, res); if (err) @@ -4328,7 +4328,7 @@ drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) struct drgn_dwarf_die_thunk_arg *arg = arg_; if (res) { struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, arg->module, &arg->die, NULL, false, true, NULL, &qualified_type); diff --git a/libdrgn/program.c b/libdrgn/program.c index 5069bda05..b17dec1a7 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -113,7 +113,7 @@ void drgn_program_deinit(struct drgn_program *prog) if (prog->core_fd != -1) close(prog->core_fd); - drgn_debug_info_destroy(prog->_dbinfo); + drgn_debug_info_destroy(prog->dbinfo); } LIBDRGN_PUBLIC struct drgn_error * @@ -614,7 +614,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, if (!n && !load_default && !load_main) return NULL; - struct drgn_debug_info *dbinfo = prog->_dbinfo; + struct drgn_debug_info *dbinfo = prog->dbinfo; if (!dbinfo) { err = drgn_debug_info_create(prog, &dbinfo); if (err) @@ -634,7 +634,7 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, drgn_debug_info_destroy(dbinfo); return err; } - prog->_dbinfo = dbinfo; + prog->dbinfo = dbinfo; } err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); @@ -1083,8 +1083,8 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, struct drgn_symbol *ret) { if (!module) { - if (prog->_dbinfo) { - module = dwfl_addrmodule(prog->_dbinfo->dwfl, address); + if (prog->dbinfo) { + module = dwfl_addrmodule(prog->dbinfo->dwfl, address); if (!module) return false; } else { @@ -1180,9 +1180,9 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, .ret = ret, }; - if (prog->_dbinfo && - dwfl_getmodules(prog->_dbinfo->dwfl, find_symbol_by_name_cb, - &arg, 0)) + if (prog->dbinfo && + dwfl_getmodules(prog->dbinfo->dwfl, find_symbol_by_name_cb, &arg, + 0)) return arg.err; return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find symbol with name '%s'%s", name, diff --git a/libdrgn/program.h b/libdrgn/program.h index a6fb8dca7..ed1217bcf 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -122,7 +122,7 @@ struct drgn_program { * Debugging information. */ struct drgn_object_index oindex; - struct drgn_debug_info *_dbinfo; + struct drgn_debug_info *dbinfo; /* * Program information. diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index 3fd8f8db7..5c5364f19 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -86,8 +86,8 @@ void drgn_register_state_set_pc(struct drgn_program *prog, pc &= drgn_platform_address_mask(&prog->platform); regs->_pc = pc; drgn_register_state_set_known(regs, 0); - if (prog->_dbinfo) { - Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->_dbinfo->dwfl, + if (prog->dbinfo) { + Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->dbinfo->dwfl, pc - !regs->interrupted); if (dwfl_module) { void **userdatap; diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index ee330d380..4304dba54 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -443,7 +443,7 @@ not_found:; } Dwarf_Die function_die = frame->scopes[frame->function_scope]; - return drgn_object_from_dwarf(trace->prog->_dbinfo, frame->regs->module, + return drgn_object_from_dwarf(trace->prog->dbinfo, frame->regs->module, &die, dwarf_tag(&die) == DW_TAG_enumerator ? &type_die : NULL, From 1339dc6a2f534872c7082ee497210bef079b26e6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 11 Oct 2021 13:01:07 -0700 Subject: [PATCH 058/139] libdrgn: hash_table: move entry_to_key to DEFINE_HASH_TABLE_FUNCTIONS() DEFINE_HASH_TABLE_TYPE() doesn't actually need to know the key type. Move that argument (and some of the derived constants) to DEFINE_HASH_TABLE_FUNCTIONS(). This will allow recursive hash table types. As a nice side effect, it also reduces the size of common header files. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 24 ++++- libdrgn/debug_info.h | 19 +--- libdrgn/dwarf_index.c | 11 +- libdrgn/dwarf_index.h | 9 +- libdrgn/hash_table.h | 212 ++++++++++++++++++++++----------------- libdrgn/program.c | 2 +- libdrgn/python/program.c | 2 +- libdrgn/type.c | 10 +- 8 files changed, 157 insertions(+), 132 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index e5f193a82..007b68302 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -480,6 +480,23 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) +struct drgn_debug_info_module_key { + const void *build_id; + size_t build_id_len; + uint64_t start, end; +}; + +static inline struct drgn_debug_info_module_key +drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) +{ + return (struct drgn_debug_info_module_key){ + .build_id = (*entry)->build_id, + .build_id_len = (*entry)->build_id_len, + .start = (*entry)->start, + .end = (*entry)->end, + }; +} + static inline struct hash_pair drgn_debug_info_module_key_hash_pair(const struct drgn_debug_info_module_key *key) { @@ -497,11 +514,11 @@ drgn_debug_info_module_key_eq(const struct drgn_debug_info_module_key *a, a->start == b->start && a->end == b->end); } DEFINE_HASH_TABLE_FUNCTIONS(drgn_debug_info_module_table, + drgn_debug_info_module_key, drgn_debug_info_module_key_hash_pair, drgn_debug_info_module_key_eq) -DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_key_hash_pair, - c_string_key_eq) +DEFINE_HASH_SET_FUNCTIONS(c_string_set, c_string_key_hash_pair, c_string_key_eq) /** * @c Dwfl_Callbacks::find_elf() implementation. @@ -2566,8 +2583,7 @@ drgn_dwarf_frame_base(struct drgn_program *prog, return err; } -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, - scalar_key_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) /** * Return whether a DWARF DIE is little-endian. diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index f8476c599..b53ac8ae3 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -244,25 +244,8 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, size_t *length_ret) __attribute__((__nonnull__(2, 3))); -struct drgn_debug_info_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; -}; - -static inline struct drgn_debug_info_module_key -drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) -{ - return (struct drgn_debug_info_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; -} DEFINE_HASH_TABLE_TYPE(drgn_debug_info_module_table, - struct drgn_debug_info_module *, - drgn_debug_info_module_key) + struct drgn_debug_info_module *) DEFINE_HASH_SET_TYPE(c_string_set, const char *) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index bc200ac40..3e213ce12 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -185,10 +185,17 @@ struct drgn_dwarf_index_pending_die { DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash_pair, - string_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_index_die_map, string_hash_pair, string_eq) DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) + +static inline uintptr_t +drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) +{ + return entry->declaration; +} + DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, + drgn_dwarf_index_specification_to_key, int_key_hash_pair, scalar_key_eq) static inline size_t hash_pair_to_shard(struct hash_pair hp) diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 7551111da..b82f94123 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -123,15 +123,8 @@ struct drgn_dwarf_index_specification { uintptr_t addr; }; -static inline uintptr_t -drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) -{ - return entry->declaration; -} - DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, - struct drgn_dwarf_index_specification, - drgn_dwarf_index_specification_to_key) + struct drgn_dwarf_index_specification) DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 0b9368a9e..428a78067 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -375,15 +375,8 @@ static inline unsigned int table##_chunk_occupied(struct table##_chunk *chunk) \ * * @sa DEFINE_HASH_TABLE() */ -#define DEFINE_HASH_TABLE_TYPE(table, entry_type, entry_to_key) \ +#define DEFINE_HASH_TABLE_TYPE(table, entry_type) \ typedef typeof(entry_type) table##_entry_type; \ -typedef typeof(entry_to_key((table##_entry_type *)0)) table##_key_type; \ - \ -static inline table##_key_type \ -table##_entry_to_key(const table##_entry_type *entry) \ -{ \ - return entry_to_key(entry); \ -} \ \ enum { \ /* \ @@ -395,6 +388,79 @@ enum { \ table##_vector_policy = sizeof(table##_entry_type) >= 24, \ }; \ \ +struct table { \ + struct table##_chunk *chunks; \ + struct { \ + /* \ + * The vector storage policy stores 32-bit indices, so we only \ + * need 32-bit sizes. \ + */ \ + uint32_t chunk_mask; \ + uint32_t size; \ + /* Allocated together with chunks. */ \ + table##_entry_type *entries; \ + } vector[table##_vector_policy]; \ + struct { \ + size_t chunk_mask; \ + size_t size; \ + uintptr_t first_packed; \ + } basic[!table##_vector_policy]; \ +}; + +/* + * Common search function implementation returning an item iterator. This is + * shared by key lookups and index lookups. + */ +#define HASH_TABLE_SEARCH_IMPL(table, func, key_type, item_to_key, eq_func) \ +static struct table##_iterator table##_##func(struct table *table, \ + const key_type *key, \ + struct hash_pair hp) \ +{ \ + const size_t delta = hash_table_probe_delta(hp); \ + size_t index = hp.first; \ + for (size_t tries = 0; tries <= table##_chunk_mask(table); tries++) { \ + struct table##_chunk *chunk = \ + &table->chunks[index & table##_chunk_mask(table)]; \ + if (sizeof(*chunk) > 64) \ + __builtin_prefetch(&chunk->items[8]); \ + unsigned int mask = table##_chunk_match(chunk, hp.second), i; \ + for_each_bit(i, mask) { \ + table##_item_type *item = &chunk->items[i]; \ + key_type item_key = item_to_key(table, item); \ + if (likely(eq_func(key, &item_key))) { \ + return (struct table##_iterator){ \ + .item = item, \ + .index = i, \ + }; \ + } \ + } \ + if (likely(chunk->outbound_overflow_count == 0)) \ + break; \ + index += delta; \ + } \ + return (struct table##_iterator){}; \ +} + +#define HASH_TABLE_SEARCH_BY_INDEX_ITEM_TO_KEY(table, item) (*(item)->index) + +/** + * Define the functions for a hash table. + * + * The hash table type must have already been defined with @ref + * DEFINE_HASH_TABLE_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_TABLE() instead. + */ +#define DEFINE_HASH_TABLE_FUNCTIONS(table, entry_to_key, hash_func, eq_func) \ +typedef typeof(entry_to_key((table##_entry_type *)0)) table##_key_type; \ + \ +static inline table##_key_type \ +table##_entry_to_key(const table##_entry_type *entry) \ +{ \ + return entry_to_key(entry); \ +} \ + \ /* \ * Item stored in a chunk. \ * \ @@ -489,73 +555,6 @@ struct table##_iterator { \ }; \ }; \ \ -struct table { \ - struct table##_chunk *chunks; \ - struct { \ - /* \ - * The vector storage policy stores 32-bit indices, so we only \ - * need 32-bit sizes. \ - */ \ - uint32_t chunk_mask; \ - uint32_t size; \ - /* Allocated together with chunks. */ \ - table##_entry_type *entries; \ - } vector[table##_vector_policy]; \ - struct { \ - size_t chunk_mask; \ - size_t size; \ - uintptr_t first_packed; \ - } basic[!table##_vector_policy]; \ -}; - -/* - * Common search function implementation returning an item iterator. This is - * shared by key lookups and index lookups. - */ -#define HASH_TABLE_SEARCH_IMPL(table, func, key_type, item_to_key, eq_func) \ -static struct table##_iterator table##_##func(struct table *table, \ - const key_type *key, \ - struct hash_pair hp) \ -{ \ - const size_t delta = hash_table_probe_delta(hp); \ - size_t index = hp.first; \ - for (size_t tries = 0; tries <= table##_chunk_mask(table); tries++) { \ - struct table##_chunk *chunk = \ - &table->chunks[index & table##_chunk_mask(table)]; \ - if (sizeof(*chunk) > 64) \ - __builtin_prefetch(&chunk->items[8]); \ - unsigned int mask = table##_chunk_match(chunk, hp.second), i; \ - for_each_bit(i, mask) { \ - table##_item_type *item = &chunk->items[i]; \ - key_type item_key = item_to_key(table, item); \ - if (likely(eq_func(key, &item_key))) { \ - return (struct table##_iterator){ \ - .item = item, \ - .index = i, \ - }; \ - } \ - } \ - if (likely(chunk->outbound_overflow_count == 0)) \ - break; \ - index += delta; \ - } \ - return (struct table##_iterator){}; \ -} - -#define HASH_TABLE_SEARCH_BY_INDEX_ITEM_TO_KEY(table, item) (*(item)->index) - -/** - * Define the functions for a hash table. - * - * The hash table type must have already been defined with @ref - * DEFINE_HASH_TABLE_TYPE(). - * - * Unless the type and function definitions must be in separate places, use @ref - * DEFINE_HASH_TABLE() instead. - * - * @sa DEFINE_HASH_TABLE() - */ -#define DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) \ static inline struct hash_pair table##_hash(const table##_key_type *key) \ { \ return hash_func(key); \ @@ -1425,24 +1424,38 @@ static struct table##_iterator table##_next(struct table##_iterator it) \ * * and returns a @c bool. */ #define DEFINE_HASH_TABLE(table, entry_type, entry_to_key, hash_func, eq_func) \ -DEFINE_HASH_TABLE_TYPE(table, entry_type, entry_to_key) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) - -#define HASH_MAP_ENTRY_TO_KEY(entry) ((entry)->key) +DEFINE_HASH_TABLE_TYPE(table, entry_type) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, entry_to_key, hash_func, eq_func) /** * Define a hash map type without defining its functions. * - * The functions are defined with @ref DEFINE_HASH_TABLE_FUNCTIONS(). + * The functions are defined with @ref DEFINE_HASH_MAP_FUNCTIONS(). * * @sa DEFINE_HASH_MAP(), DEFINE_HASH_TABLE_TYPE() */ -#define DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ -struct table##_entry { \ - typeof(key_type) key; \ - typeof(value_type) value; \ -}; \ -DEFINE_HASH_TABLE_TYPE(table, struct table##_entry, HASH_MAP_ENTRY_TO_KEY) +#define DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ +struct table##_entry { \ + typeof(key_type) key; \ + typeof(value_type) value; \ +}; \ +DEFINE_HASH_TABLE_TYPE(table, struct table##_entry) + +#define HASH_MAP_ENTRY_TO_KEY(entry) ((entry)->key) + +/** + * Define the functions for a hash map. + * + * The hash map type must have already been defined with @ref + * DEFINE_HASH_MAP_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_MAP() instead. + * + * @sa DEFINE_HASH_TABLE_FUNCTIONS + */ +#define DEFINE_HASH_MAP_FUNCTIONS(table, hash_func, eq_func) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, HASH_MAP_ENTRY_TO_KEY, hash_func, eq_func) /** * Define a hash map interface. @@ -1466,19 +1479,32 @@ DEFINE_HASH_TABLE_TYPE(table, struct table##_entry, HASH_MAP_ENTRY_TO_KEY) */ #define DEFINE_HASH_MAP(table, key_type, value_type, hash_func, eq_func) \ DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) - -#define HASH_SET_ENTRY_TO_KEY(entry) (*(entry)) +DEFINE_HASH_MAP_FUNCTIONS(table, hash_func, eq_func) /** * Define a hash set type without defining its functions. * - * The functions are defined with @ref DEFINE_HASH_TABLE_FUNCTIONS(). + * The functions are defined with @ref DEFINE_HASH_SET_FUNCTIONS(). * * @sa DEFINE_HASH_SET(), DEFINE_HASH_TABLE_TYPE() */ -#define DEFINE_HASH_SET_TYPE(table, key_type) \ - DEFINE_HASH_TABLE_TYPE(table, key_type, HASH_SET_ENTRY_TO_KEY) +#define DEFINE_HASH_SET_TYPE DEFINE_HASH_TABLE_TYPE + +#define HASH_SET_ENTRY_TO_KEY(entry) (*(entry)) + +/** + * Define the functions for a hash set. + * + * The hash set type must have already been defined with @ref + * DEFINE_HASH_SET_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_SET() instead. + * + * @sa DEFINE_HASH_TABLE_FUNCTIONS + */ +#define DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, HASH_SET_ENTRY_TO_KEY, hash_func, eq_func) /** * Define a hash set interface. @@ -1494,7 +1520,7 @@ DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) */ #define DEFINE_HASH_SET(table, key_type, hash_func, eq_func) \ DEFINE_HASH_SET_TYPE(table, key_type) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) +DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) /** * Empty hash table initializer. diff --git a/libdrgn/program.c b/libdrgn/program.c index b17dec1a7..7e18157f5 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -32,7 +32,7 @@ #include "util.h" DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_prstatus_map, int_key_hash_pair, scalar_key_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_prstatus_map, int_key_hash_pair, scalar_key_eq) static Elf_Type note_header_type(GElf_Phdr *phdr) { diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 70a20f24a..06d6ce36f 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -7,7 +7,7 @@ #include "../vector.h" #include "../util.h" -DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq) +DEFINE_HASH_SET_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq) int Program_hold_object(Program *prog, PyObject *obj) { diff --git a/libdrgn/type.c b/libdrgn/type.c index 7492c42a6..cd91f6865 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -170,10 +170,10 @@ static bool drgn_member_key_eq(const struct drgn_member_key *a, (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); } -DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_key_hash_pair, - drgn_member_key_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_member_map, drgn_member_key_hash_pair, + drgn_member_key_eq) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, ptr_key_hash_pair, scalar_key_eq) +DEFINE_HASH_SET_FUNCTIONS(drgn_type_set, ptr_key_hash_pair, scalar_key_eq) LIBDRGN_PUBLIC struct drgn_error * drgn_member_object(struct drgn_type_member *member, @@ -329,8 +329,8 @@ static bool drgn_type_dedupe_eq(struct drgn_type * const *entry_a, * We don't deduplicate types with members, parameters, template parameters, or * enumerators, so the hash and comparison functions ignore those. */ -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash_pair, - drgn_type_dedupe_eq) +DEFINE_HASH_SET_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash_pair, + drgn_type_dedupe_eq) DEFINE_VECTOR_FUNCTIONS(drgn_typep_vector) From 8bf26fafbbdd044c38b4b68c2eaf0b28b263c9cb Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Tue, 12 Oct 2021 13:48:54 -0700 Subject: [PATCH 059/139] dwarf_index.c: lazily allocate shards to save memory on unused ns Previously shards were allocated as soon as a namespace was encountered, which means that we had a large array sitting around for every ns we saw. By allocating them lazily, we can reduce this usage. Signed-off-by: Jay Kamat --- libdrgn/dwarf_index.c | 58 ++++++++++++++++++++++++++++++++----------- libdrgn/dwarf_index.h | 4 +-- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 3e213ce12..f26a6030b 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -19,6 +19,9 @@ #include "platform.h" #include "util.h" +static const size_t DRGN_DWARF_INDEX_SHARD_BITS = 8; +static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; + struct drgn_dwarf_index_pending_cu { struct drgn_debug_info_module *module; const char *buf; @@ -206,21 +209,35 @@ static inline size_t hash_pair_to_shard(struct hash_pair hp) */ return ((hp.first >> (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & - (((size_t)1 << DRGN_DWARF_INDEX_SHARD_BITS) - 1)); + (DRGN_DWARF_INDEX_NUM_SHARDS - 1)); } static void drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, struct drgn_dwarf_index *dindex) { - array_for_each(shard, ns->shards) { + ns->shards = NULL; + ns->dindex = dindex; + drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); + ns->saved_err = NULL; +} + +static bool +drgn_dwarf_index_namespace_shards_init(struct drgn_dwarf_index_namespace *ns) +{ + if (ns->shards) + return true; + ns->shards = malloc_array(DRGN_DWARF_INDEX_NUM_SHARDS, + sizeof(*ns->shards)); + if (!ns->shards) + return false; + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; omp_init_lock(&shard->lock); drgn_dwarf_index_die_map_init(&shard->map); drgn_dwarf_index_die_vector_init(&shard->dies); } - ns->dindex = dindex; - drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); - ns->saved_err = NULL; + return true; } void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) @@ -243,17 +260,21 @@ drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) { drgn_error_destroy(ns->saved_err); drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); - array_for_each(shard, ns->shards) { - for (size_t j = 0; j < shard->dies.size; j++) { - struct drgn_dwarf_index_die *die = &shard->dies.data[j]; - if (die->tag == DW_TAG_namespace) { - drgn_dwarf_index_namespace_deinit(die->namespace); - free(die->namespace); + if (ns->shards) { + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &ns->shards[i]; + for (size_t j = 0; j < shard->dies.size; j++) { + struct drgn_dwarf_index_die *die = &shard->dies.data[j]; + if (die->tag == DW_TAG_namespace) { + drgn_dwarf_index_namespace_deinit(die->namespace); + free(die->namespace); + } } + drgn_dwarf_index_die_vector_deinit(&shard->dies); + drgn_dwarf_index_die_map_deinit(&shard->map); + omp_destroy_lock(&shard->lock); } - drgn_dwarf_index_die_vector_deinit(&shard->dies); - drgn_dwarf_index_die_map_deinit(&shard->map); - omp_destroy_lock(&shard->lock); + free(ns->shards); } } @@ -2480,7 +2501,8 @@ indirect_insn:; static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) { - array_for_each(shard, dindex->global.shards) { + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &dindex->global.shards[i]; /* * Because we're deleting everything that was added since the * last update, we can just shrink the dies array to the first @@ -2543,6 +2565,9 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) { struct drgn_dwarf_index *dindex = state->dindex; + if (!drgn_dwarf_index_namespace_shards_init(&dindex->global)) + return &drgn_enomem; + size_t old_cus_size = dindex->cus.size; size_t new_cus_size = old_cus_size; for (size_t i = 0; i < state->max_threads; i++) @@ -2648,6 +2673,9 @@ static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) if (ns->saved_err) return drgn_error_copy(ns->saved_err); + if (!drgn_dwarf_index_namespace_shards_init(ns)) + return &drgn_enomem; + struct drgn_error *err = NULL; #pragma omp for schedule(dynamic) for (size_t i = 0; i < ns->pending_dies.size; i++) { diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index b82f94123..b503f18ac 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -109,8 +109,6 @@ struct drgn_dwarf_index_shard { struct drgn_dwarf_index_die_vector dies; }; -#define DRGN_DWARF_INDEX_SHARD_BITS 8 - /* A DIE with a DW_AT_specification attribute. */ struct drgn_dwarf_index_specification { /* @@ -138,7 +136,7 @@ struct drgn_dwarf_index_namespace { * * This is sharded to reduce lock contention. */ - struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; + struct drgn_dwarf_index_shard *shards; /** Parent DWARF index. */ struct drgn_dwarf_index *dindex; /** DIEs we have not indexed yet. */ From 3c52b18baaf3fc7cc4181f298ae04a5346e20cd4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 28 Oct 2021 14:41:13 -0700 Subject: [PATCH 060/139] tests: skip PID memory read test if /proc/$pid/mem doesn't work This works around a QEMU bug (https://gitlab.com/qemu-project/qemu/-/issues/698) which causes Packit build failures on 32-bit ARM. This should unblock #126. Signed-off-by: Omar Sandoval --- tests/test_program.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/test_program.py b/tests/test_program.py index d3a9c94c4..7c0215d58 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -47,9 +47,6 @@ def test_set_pid(self): prog.set_pid(os.getpid()) self.assertEqual(prog.platform, host_platform) self.assertTrue(prog.flags & ProgramFlags.IS_LIVE) - data = b"hello, world!" - buf = ctypes.create_string_buffer(data) - self.assertEqual(prog.read(ctypes.addressof(buf), len(data)), data) self.assertRaisesRegex( ValueError, "program memory was already initialized", @@ -57,6 +54,31 @@ def test_set_pid(self): os.getpid(), ) + def test_pid_memory(self): + data = b"hello, world!" + buf = ctypes.create_string_buffer(data) + address = ctypes.addressof(buf) + + # QEMU user-mode emulation doesn't seem to emulate /proc/$pid/mem + # correctly on a 64-bit host with a 32-bit guest; see + # https://gitlab.com/qemu-project/qemu/-/issues/698. Packit uses mock + # to cross-compile and test packages, which in turn uses QEMU user-mode + # emulation. Skip this test if /proc/$pid/mem doesn't work so that + # those builds succeed. + try: + with open("/proc/self/mem", "rb") as f: + f.seek(address) + functional_proc_pid_mem = f.read(len(data)) == data + except OSError: + functional_proc_pid_mem = False + if not functional_proc_pid_mem: + self.skipTest("/proc/$pid/mem is not functional") + + prog = Program() + prog.set_pid(os.getpid()) + + self.assertEqual(prog.read(ctypes.addressof(buf), len(data)), data) + def test_lookup_error(self): prog = mock_program() self.assertRaisesRegex( From 7e6082707d7dfc67d20f6bd555585eca41a0d142 Mon Sep 17 00:00:00 2001 From: Davide Cavalca Date: Fri, 22 Oct 2021 15:46:08 -0700 Subject: [PATCH 061/139] Add initial Packit config Signed-off-by: Davide Cavalca --- .gitignore | 3 +++ .packit.yaml | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 .packit.yaml diff --git a/.gitignore b/.gitignore index e0391ec13..1e796ac39 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,7 @@ /drgn.egg-info /drgn/internal/version.py /htmlcov +/drgn-*.tar.gz +/python-drgn-*.src.rpm +/python-drgn.spec __pycache__ diff --git a/.packit.yaml b/.packit.yaml new file mode 100644 index 000000000..1a7b83050 --- /dev/null +++ b/.packit.yaml @@ -0,0 +1,42 @@ +# See the documentation for more information: +# https://packit.dev/docs/configuration/ + +specfile_path: python-drgn.spec +synced_files: + - python-drgn.spec + - .packit.yaml + +upstream_package_name: drgn +downstream_package_name: python-drgn +actions: + get-current-version: "python3 setup.py --version" + # Fetch the specfile from Rawhide and drop any patches + post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/python-drgn/raw/main/f/python-drgn.spec | sed '/^Patch[0-9]/d' > python-drgn.spec\"" + +jobs: +- job: copr_build + trigger: commit + metadata: + targets: + - fedora-all-aarch64 + - fedora-all-armhfp + - fedora-all-i386 + - fedora-all-ppc64le + - fedora-all-s390x + - fedora-all-x86_64 + - epel-8-aarch64 + - epel-8-ppc64le + - epel-8-x86_64 +- job: copr_build + trigger: pull_request + metadata: + targets: + - fedora-all-aarch64 + - fedora-all-armhfp + - fedora-all-i386 + - fedora-all-ppc64le + - fedora-all-s390x + - fedora-all-x86_64 + - epel-8-aarch64 + - epel-8-ppc64le + - epel-8-x86_64 From 6150935e968128f70bf456cef2c3daac43e0b223 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 29 Oct 2021 13:19:32 -0700 Subject: [PATCH 062/139] Fix some cosmetic nits in Packit config and .gitignore Fix .gitignore alphabetical order and indent YAML consistently. Signed-off-by: Omar Sandoval --- .gitignore | 2 +- .packit.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 1e796ac39..6fe438be0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,10 @@ /cscope.* /dist /docs/_build +/drgn-*.tar.gz /drgn.egg-info /drgn/internal/version.py /htmlcov -/drgn-*.tar.gz /python-drgn-*.src.rpm /python-drgn.spec __pycache__ diff --git a/.packit.yaml b/.packit.yaml index 1a7b83050..1ca1c09c8 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -3,8 +3,8 @@ specfile_path: python-drgn.spec synced_files: - - python-drgn.spec - - .packit.yaml + - python-drgn.spec + - .packit.yaml upstream_package_name: drgn downstream_package_name: python-drgn From 198499e74bd290541a76da027a8da39f0effe9df Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 29 Oct 2021 16:53:59 -0700 Subject: [PATCH 063/139] libdrgn: debug_info: optimize drgn_find_die_ancestors() Jay pointed out that when finding the ancestors for a DIE, we should use DW_AT_sibling to skip over subtrees that can't contain the target DIE. So, let's check each DIE that we encounter for a DW_AT_sibling attribute. dwarf_attr() also returns the end of the DIE if it doesn't find the attribute, which we can use to avoid parsing DIEs redundantly. This doesn't fit very well into drgn_dwarf_iterator, so let's just hand-roll this special type of iteration. In my measurements, this made drgn_find_die_ancestors() ~6x as fast on average. Closes #124. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 134 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 20 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 007b68302..90f97f8f4 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -434,13 +434,13 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, if (!dwarf) return drgn_error_libdw(); - struct drgn_dwarf_die_iterator it; - drgn_dwarf_die_iterator_init(&it, dwarf); - Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + struct dwarf_die_vector dies = VECTOR_INIT; + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); if (!cu_die) { err = &drgn_enomem; goto err; } + Dwarf_Half cu_version; Dwarf_Off type_offset; if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, @@ -448,33 +448,127 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, err = drgn_error_libdw(); goto err; } - it.debug_types = cu_version == 4 && type_offset != 0; - uint64_t type_signature; Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); + bool debug_types = cu_version == 4 && type_offset != 0; + Dwarf_Off next_cu_offset; + uint64_t type_signature; if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), - &it.next_cu_off, NULL, NULL, NULL, NULL, NULL, - it.debug_types ? &type_signature : NULL, NULL)) { + &next_cu_offset, NULL, NULL, NULL, NULL, NULL, + debug_types ? &type_signature : NULL, NULL)) { err = drgn_error_libdw(); goto err; } - it.cu_end = (const char *)cu_die->addr - cu_die_offset + it.next_cu_off; + const unsigned char *cu_end = + (unsigned char *)cu_die->addr - cu_die_offset + next_cu_offset; - Dwarf_Die *dies; - size_t length; - while (!(err = drgn_dwarf_die_iterator_next(&it, true, 1, &dies, - &length))) { - if (dies[length - 1].addr == die->addr) { - *dies_ret = dies; - *length_ret = length - 1; +#define TOP() (&dies.data[dies.size - 1]) + while ((char *)TOP()->addr <= (char *)die->addr) { + if (TOP()->addr == die->addr) { + *dies_ret = dies.data; + *length_ret = dies.size - 1; return NULL; } + + Dwarf_Attribute attr; + if (dwarf_attr(TOP(), DW_AT_sibling, &attr)) { + /* The top DIE has a DW_AT_sibling attribute. */ + Dwarf_Die sibling; + if (!dwarf_formref_die(&attr, &sibling)) { + err = drgn_error_libdw(); + goto err; + } + if (sibling.cu != TOP()->cu || + (char *)sibling.addr <= (char *)TOP()->addr) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_sibling"); + goto err; + } + + if ((char *)sibling.addr > (char *)die->addr) { + /* + * The top DIE's sibling is after the target + * DIE. Therefore, the target DIE must be a + * descendant of the top DIE. + */ + Dwarf_Die *child = + dwarf_die_vector_append_entry(&dies); + if (!child) { + err = &drgn_enomem; + goto err; + } + int r = dwarf_child(TOP() - 1, child); + if (r < 0) { + err = drgn_error_libdw(); + goto err; + } else if (r > 0) { + /* + * The top DIE didn't have any children, + * which should be impossible. + */ + goto not_found; + } + } else { + /* + * The top DIE's sibling is before or equal to + * the target DIE. Therefore, the target DIE + * isn't a descendant of the top DIE. Skip to + * the sibling. + */ + *TOP() = sibling; + } + } else { + /* + * The top DIE does not have a DW_AT_sibling attribute. + * Instead, we found the end of the top DIE. + */ + unsigned char *addr = attr.valp; + if (!addr || addr >= cu_end) + goto not_found; + + /* + * If the top DIE has children, then addr is its first + * child. Otherwise, then addr is its sibling. (Unless + * it is a null terminator.) + */ + size_t new_size = dies.size; + if (dwarf_haschildren(TOP()) > 0) + new_size++; + + while (*addr == '\0') { + /* + * addr points to the null terminator for the + * list of siblings. Go back up to its parent. + * The next byte is either the parent's sibling + * or another null terminator. + */ + new_size--; + addr++; + if (new_size <= 1 || addr >= cu_end) + goto not_found; + } + + /* addr now points to the next DIE. Go to it. */ + if (new_size > dies.size) { + if (!dwarf_die_vector_append_entry(&dies)) { + err = &drgn_enomem; + goto err; + } + } else { + dies.size = new_size; + } + *TOP() = (Dwarf_Die){ + .cu = dies.data[0].cu, + .addr = addr, + }; + } } - if (err == &drgn_stop) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "could not find DWARF DIE ancestors"); - } +#undef TOP + +not_found: + err = drgn_error_create(DRGN_ERROR_OTHER, + "could not find DWARF DIE ancestors"); err: - drgn_dwarf_die_iterator_deinit(&it); + dwarf_die_vector_deinit(&dies); return err; } From 568f4f9c2b2d810866443c2fbd683516db8c7f89 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Aug 2021 14:20:33 -0700 Subject: [PATCH 064/139] libdrgn: debug_info: remove dies and length out parameters to drgn_dwarf_die_iterator_next() These are already available in it->dies. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 81 +++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 90f97f8f4..07551dcb7 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -183,6 +183,9 @@ static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) * * This includes the .debug_types section. * + * @param[in,out] it Iterator containing the returned DIE and its ancestors. The + * last entry in `it->dies` is the DIE itself, the entry before that is its + * parent, the entry before that is its grandparent, etc. * @param[in] children If @c true and the last returned DIE has children, return * its first child (this is a pre-order traversal). Otherwise, return the next * DIE at the level less than or equal to the last returned DIE, i.e., the last @@ -190,46 +193,34 @@ static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) * DIE. * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, * stop after returning all DIEs in the subtree rooted at the DIE that was - * returned in the last call as `(*dies_ret)[subtree - 1]`. - * @param[out] dies_ret Returned array containing DIE and its ancestors. - * `(*dies_ret)[*length_ret - 1]` is the DIE itself, - * `(*dies_ret)[*length_ret - 2]` is its parent, `(*dies_ret)[*length_ret - 3]` - * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. - * This is valid until the next call to @ref drgn_dwarf_die_iterator_next() or - * @ref drgn_dwarf_die_iterator_deinit(). - * @param[out] length_ret Returned length of @p dies_ret. + * returned in the last call as entry `subtree - 1` in `it->dies`. * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which - * case `*length_ret` equals @p subtree and @p dies_ret refers to the root of - * the iterated subtree, non-@c NULL on error, in which case this should not be - * called again. + * case the size of `it->dies` equals @p subtree and `it->dies` refers to the + * root of the iterated subtree, non-@c NULL on error, in which case this should + * not be called again. */ static struct drgn_error * drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, - size_t subtree, Dwarf_Die **dies_ret, - size_t *length_ret) + size_t subtree) { #define TOP() (&it->dies.data[it->dies.size - 1]) - struct drgn_error *err = NULL; int r; Dwarf_Die die; assert(subtree <= it->dies.size); if (it->dies.size == 0) { /* This is the first call. Get the first unit DIE. */ - if (!dwarf_die_vector_append_entry(&it->dies)) { - err = &drgn_enomem; - goto out; - } + if (!dwarf_die_vector_append_entry(&it->dies)) + return &drgn_enomem; } else { if (children) { r = dwarf_child(TOP(), &die); if (r == 0) { /* The previous DIE has a child. Return it. */ if (!dwarf_die_vector_append(&it->dies, &die)) - err = &drgn_enomem; - goto out; + return &drgn_enomem; + return NULL; } else if (r < 0) { - err = drgn_error_libdw(); - goto out; + return drgn_error_libdw(); } /* The previous DIE has no children. */ } @@ -239,8 +230,7 @@ drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, * The previous DIE is the root of the subtree. We're * done. */ - err = &drgn_stop; - goto out; + return &drgn_stop; } if (it->dies.size > 1) { @@ -248,7 +238,7 @@ drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, if (r == 0) { /* The previous DIE has a sibling. Return it. */ *TOP() = die; - goto out; + return NULL; } else if (r > 0) { if (!die.addr) goto next_unit; @@ -272,8 +262,7 @@ drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, * We're back to the root of the * subtree. We're done. */ - err = &drgn_stop; - goto out; + return &drgn_stop; } if (it->dies.size == 1 || addr >= it->cu_end) @@ -286,10 +275,9 @@ drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, .cu = it->dies.data[0].cu, .addr = addr, }; - goto out; + return NULL; } else { - err = drgn_error_libdw(); - goto out; + return drgn_error_libdw(); } } } @@ -311,13 +299,12 @@ next_unit:; r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, TOP()); } - if (r) { - err = drgn_error_libdw(); - goto out; - } + if (r) + return drgn_error_libdw(); it->cu_end = ((const char *)TOP()->addr - dwarf_dieoffset(TOP()) + it->next_cu_off); + return NULL; } else if (r > 0) { if (!it->debug_types) { it->next_cu_off = 0; @@ -325,19 +312,10 @@ next_unit:; goto next_unit; } /* There are no more units. */ - err = &drgn_stop; + return &drgn_stop; } else { - err = drgn_error_libdw(); + return drgn_error_libdw(); } - -out: - /* - * Return these even in the error case to avoid maybe uninitialized - * warnings in the caller. - */ - *dies_ret = it->dies.data; - *length_ret = it->dies.size; - return err; #undef TOP } @@ -400,14 +378,11 @@ drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, } /* Now find DIEs containing the PC. */ - Dwarf_Die *dies; - size_t length; - while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree, - &dies, &length))) { - int r = dwarf_haspc(&dies[length - 1], pc); + while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree))) { + int r = dwarf_haspc(&it.dies.data[it.dies.size - 1], pc); if (r > 0) { children = true; - subtree = length; + subtree = it.dies.size; } else if (r < 0) { err = drgn_error_libdw(); goto err; @@ -416,8 +391,8 @@ drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, if (err != &drgn_stop) goto err; - *dies_ret = dies; - *length_ret = length; + *dies_ret = it.dies.data; + *length_ret = it.dies.size; return NULL; err: From 9c540838301dd03964d344e863386c22c92c317f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 24 Aug 2021 15:05:37 -0700 Subject: [PATCH 065/139] libdrgn: pp: make PP_CAT not variadic The overloaded version is slower to compile, and we don't actually need it. We can add a variadic version if we need to in the future. Also add the script used to generate the macros. Signed-off-by: Omar Sandoval --- libdrgn/pp.h | 19 +++++++++++++------ scripts/gen_pp_cat.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) create mode 100755 scripts/gen_pp_cat.py diff --git a/libdrgn/pp.h b/libdrgn/pp.h index e38cadd3f..0e44677df 100644 --- a/libdrgn/pp.h +++ b/libdrgn/pp.h @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -/* +/** * @file * * Preprocessor utilities. @@ -75,29 +75,36 @@ * Expand and concatenate arguments. * * This expands each argument and then joins them with the `##` operator. + * `PP_CAT` takes two arguments, `PP_CAT3` takes three, `PP_CAT4` takes four, + * etc. * * ``` * #define a foo * #define b bar - * PP_CAT(a, b, baz) // Expands to foobarbaz + * PP_CAT(a, b) // Expands to foobar * ``` * * Intermediate results are not expanded: * ``` * #define HELLO oops - * PP_CAT(HELL, O, WORLD) // Expands to HELLOWORLD, _not_ oopsWORLD + * PP_CAT3(HELL, O, WORLD) // Expands to HELLOWORLD, _not_ oopsWORLD * ``` * * All possible intermediate results must be valid preprocessing tokens: * ``` - * PP_CAT(1e, +, 3) // Undefined because +3 is not a valid preprocessing token + * PP_CAT3(1e, +, 3) // Undefined because +3 is not a valid preprocessing token * ``` * * @hideinitializer */ -#define PP_CAT(...) PP_OVERLOAD(PP_CAT_I, __VA_ARGS__)(__VA_ARGS__) +#define PP_CAT(_0, _1) PP_CAT_I2(_0, _1) +#define PP_CAT3(_0, _1, _2) PP_CAT_I3(_0, _1, _2) +#define PP_CAT4(_0, _1, _2, _3) PP_CAT_I4(_0, _1, _2, _3) +#define PP_CAT5(_0, _1, _2, _3, _4) PP_CAT_I5(_0, _1, _2, _3, _4) +#define PP_CAT6(_0, _1, _2, _3, _4, _5) PP_CAT_I6(_0, _1, _2, _3, _4, _5) +#define PP_CAT7(_0, _1, _2, _3, _4, _5, _6) PP_CAT_I7(_0, _1, _2, _3, _4, _5, _6) +#define PP_CAT8(_0, _1, _2, _3, _4, _5, _6, _7) PP_CAT_I8(_0, _1, _2, _3, _4, _5, _6, _7) /** @cond */ -#define PP_CAT_I1(_0) _0 #define PP_CAT_I2(_0, _1) _0##_1 #define PP_CAT_I3(_0, _1, _2) _0##_1##_2 #define PP_CAT_I4(_0, _1, _2, _3) _0##_1##_2##_3 diff --git a/scripts/gen_pp_cat.py b/scripts/gen_pp_cat.py new file mode 100755 index 000000000..6a9aaaf51 --- /dev/null +++ b/scripts/gen_pp_cat.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("max", type=int) + args = parser.parse_args() + + for i in range(2, args.max + 1): + print( + f"#define PP_CAT{str(i) if i > 2 else ''}(" + + ", ".join(f"_{j}" for j in range(i)) + + f") PP_CAT_I{i}(" + + ", ".join(f"_{j}" for j in range(i)) + + ")" + ) + print("/** @cond */") + for i in range(2, args.max + 1): + print( + f"#define PP_CAT_I{i}(" + + ", ".join(f"_{j}" for j in range(i)) + + ") " + + "##".join(f"_{j}" for j in range(i)) + ) + print("/** @endcond */") From 8358c31d265bbd5ce308f229c042c73e3a3a443a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 2 Nov 2021 17:48:36 -0700 Subject: [PATCH 066/139] docs: document how to get debugging symbols I couldn't find any good summaries of how to get debugging symbols on various distros, so I guess we'll have to maintain our own. Signed-off-by: Omar Sandoval --- README.rst | 6 +- docs/getting_debugging_symbols.rst | 119 +++++++++++++++++++++++++++++ docs/index.rst | 1 + 3 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 docs/getting_debugging_symbols.rst diff --git a/README.rst b/README.rst index 2315994e4..0c8504b97 100644 --- a/README.rst +++ b/README.rst @@ -103,8 +103,10 @@ Quick Start drgn debugs the running kernel by default; run ``sudo drgn``. To debug a running program, run ``sudo drgn -p $PID``. To debug a core dump (either a -kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. The program -must have debugging symbols available. +kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to +`install debugging symbols +`_ for +whatever you are debugging. Then, you can access variables in the program with ``prog['name']`` and access structure members with ``.``: diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst new file mode 100644 index 000000000..efea9a16d --- /dev/null +++ b/docs/getting_debugging_symbols.rst @@ -0,0 +1,119 @@ +Getting Debugging Symbols +========================= + +.. highlight:: console + +Most Linux distributions don't install debugging symbols for installed packages +by default. This page documents how to install debugging symbols on common +distributions. If drgn prints an error like:: + + $ sudo drgn + could not get debugging information for: + kernel (could not find vmlinux for 5.14.14-200.fc34.x86_64) + ... + +Then you need to install debugging symbols. + +Fedora +------ + +Fedora makes it very easy to install debugging symbols with the `DNF +debuginfo-install plugin +`_, +which is installed by default. Simply run ``sudo dnf debuginfo-install +$package``:: + + $ sudo dnf debuginfo-install python3 + +To find out what package owns a binary, use ``rpm -qf``:: + + $ rpm -qf $(which python3) + python3-3.9.7-1.fc34.x86_64 + +To install symbols for the running kernel:: + + $ sudo dnf debuginfo-install kernel-$(uname -r) + +Also see the `Fedora documentation +`_. + +Debian +------ + +Debian requires you to manually add the debugging symbol repositories:: + + $ sudo tee /etc/apt/sources.list.d/debug.list << EOF + deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-debug main + deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-proposed-updates-debug main + EOF + $ sudo apt update + +Then, debugging symbol packages can be installed with ``sudo apt install``. +Some debugging symbol packages are named with a ``-dbg`` suffix:: + + $ sudo apt install python3-dbg + +And some are named with a ``-dbgsym`` suffix:: + + $ sudo apt install coreutils-dbgsym + +You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` +package to find the correct name:: + + $ sudo apt install debian-goodies + $ find-dbgsym-packages $(which python3) + libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(which cat) + coreutils-dbgsym libc6-dbg + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbg + +Also see the `Debian documentation +`_. + +Ubuntu +------ + +On Ubuntu, you must install the debugging symbol archive signing key and +manually add the debugging symbol repositories:: + + $ sudo apt update + $ sudo apt install ubuntu-dbgsym-keyring + $ sudo tee /etc/apt/sources.list.d/debug.list << EOF + deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse + deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse + deb http://ddebs.ubuntu.com $(lsb_release -cs)-proposed main restricted universe multiverse + EOF + $ sudo apt update + +Like Debian, some debugging symbol packages are named with a ``-dbg`` suffix +and some are named with a ``-dbgsym`` suffix:: + + $ sudo apt install python3-dbg + $ sudo apt install coreutils-dbgsym + +You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` +package to find the correct name:: + + $ sudo apt install debian-goodies + $ find-dbgsym-packages $(which python3) + libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(which cat) + coreutils-dbgsym libc6-dbg + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbgsym + +Also see the `Ubuntu documentation +`_. + +Arch Linux +---------- + +Arch Linux unfortunately does not make debugging symbols available. Packages +must be manually rebuilt with debugging symbols enabled. See the `ArchWiki +`_ and the `feature +request `_. diff --git a/docs/index.rst b/docs/index.rst index f0e23d4bb..b4a68b3f3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -37,3 +37,4 @@ Table of Contents api_reference helpers case_studies + getting_debugging_symbols From bc2d5333c0cf764b0023a1f3e7c0df364ba30f1a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 3 Nov 2021 11:48:34 -0700 Subject: [PATCH 067/139] README: update link to crash Signed-off-by: Omar Sandoval --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 0c8504b97..97a090118 100644 --- a/README.rst +++ b/README.rst @@ -43,7 +43,7 @@ library that can be used to build debugging and introspection tools; see the official `tools `_. drgn was developed for debugging the Linux kernel (as an alternative to the -`crash `_ utility), but it can also debug +`crash `_ utility), but it can also debug userspace programs written in C. C++ support is in progress. .. end-introduction From e5021952c8c7f57d42877cd0f7b1aef421eae54b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 3 Nov 2021 15:58:11 -0700 Subject: [PATCH 068/139] docs: disable sphinx.ext.viewcode viewcode works by importing modules. This doesn't actually work on Read the Docs because we don't build and install the C extension. It looks like there are workarounds (viewcode-find-source), but let's disable it for now. Signed-off-by: Omar Sandoval --- docs/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index d2d981c79..efb70d3c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,6 @@ "setuptools_config", "sphinx.ext.extlinks", "sphinx.ext.intersphinx", - "sphinx.ext.viewcode", ] drgndoc_paths = ["../drgn", "../_drgn.pyi"] From d9192b72455ed00f94600cf6e52e53b84b7bf948 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 3 Nov 2021 16:01:27 -0700 Subject: [PATCH 069/139] docs: remove outdated comment about helper types As of commit 0cf3320a89b3 ("Add type annotations to helpers"), helpers have type annotations instead of C signatures. Signed-off-by: Omar Sandoval --- drgn/helpers/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index b52f7e47f..4b3a366fc 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -10,10 +10,6 @@ for the Linux kernel. In the future, there may be helpers for, e.g., glibc and libstdc++. -Parameter types and return types are :class:`drgn.Object` unless noted -otherwise. Many helpers include a C function signature indicating the expected -object types. - Generic Helpers =============== From 1b7badad0a7253b9111163327792660b9367a9dd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 3 Nov 2021 16:05:18 -0700 Subject: [PATCH 070/139] docs: expand and reorganize installation instructions * Mention installing drgn using a package manager on Fedora/EPEL. Closes #103. * Mention that pip installs a binary wheel by default. * Include instructions for installing from source in README. Signed-off-by: Omar Sandoval --- CONTRIBUTING.rst | 27 +++++++++---- README.rst | 88 ++++++++++++++++++++++++++++++++++--------- docs/installation.rst | 62 ++++++++++++------------------ 3 files changed, 114 insertions(+), 63 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9937e961a..98d884b9a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -7,18 +7,25 @@ submit changes for drgn. Building -------- -The easiest way to develop drgn is by building and running it locally. See the -`installation documentation -`_. +The easiest way to develop drgn is by building and running it locally. Please +build with warnings enabled. Install the dependencies from the `installation +instructions `_, then run: + +.. code-block:: console + + $ git clone https://github.com/osandov/drgn.git + $ cd drgn + $ CFLAGS="-Wall -Werror -g -O2" python3 setup.py build_ext -i + $ python3 -m drgn --help Testing ------- -.. highlight:: console - Tests should be added for all features and bug fixes. -drgn's test suite can be run with:: +drgn's test suite can be run with: + +.. code-block:: console $ python3 setup.py test @@ -27,7 +34,9 @@ add ``-K``. See `vmtest `_ for more details. Tests can also be run manually with `unittest `_ -after building locally:: +after building locally: + +.. code-block:: console $ python3 -m unittest discover -v @@ -74,7 +83,9 @@ Python Python code in drgn should be compatible with Python 3.6 and newer. Python code should be formatted with `black `_ -and `isort `_:: +and `isort `_: + +.. code-block:: console $ isort . && black . diff --git a/README.rst b/README.rst index 97a090118..1789e9deb 100644 --- a/README.rst +++ b/README.rst @@ -51,47 +51,99 @@ userspace programs written in C. C++ support is in progress. Documentation can be found at `drgn.readthedocs.io `_. +.. start-installation + Installation ------------ -.. start-install-dependencies +Package Manager +^^^^^^^^^^^^^^^ -Install dependencies: +drgn can be installed using the package manager on some Linux distributions. -Arch Linux: +* Fedora >= 32 -.. code-block:: console + .. code-block:: console - $ sudo pacman -S --needed gcc libelf make pkgconf python python-pip python-setuptools + $ sudo dnf install drgn -Debian/Ubuntu: +* RHEL/CentOS >= 8 -.. code-block:: console + `Enable EPEL `_. Then: + + .. code-block:: console + + $ sudo dnf install drgn + +* Arch Linux - $ sudo apt-get install gcc liblzma-dev libelf-dev libdw-dev make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + Install the `drgn `_ package from + the `AUR `_. -Note that Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and older) ship -Python versions which are too old. Python 3.6 or newer must be installed -manually. +pip +^^^ -Fedora: +If your Linux distribution doesn't package the latest release of drgn, you can +install it with `pip `_. + +First, `install pip +`_. +Then, run: .. code-block:: console - $ sudo dnf install elfutils-devel gcc make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo pip3 install drgn + +This will install a binary wheel by default. If you get a build error, then pip +wasn't able to use the binary wheel. Install the dependencies listed `below +<#from-source>`_ and try again. + +Note that RHEL/CentOS 6, Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and +older) ship Python versions which are too old. Python 3.6 or newer must be +installed. + +From Source +^^^^^^^^^^^ -Optionally, install: +To get the development version of drgn, you will need to build it from source. +First, install dependencies: -* `libkdumpfile `_ if you want - support for kdump-compressed kernel core dumps +* Fedora/RHEL/CentOS -.. end-install-dependencies + .. code-block:: console + + $ sudo dnf install autoconf automake elfutils-devel gawk gcc git libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + + Replace ``dnf`` with ``yum`` for RHEL/CentOS < 8. + +* Debian/Ubuntu + + .. code-block:: console + + $ sudo apt-get install autoconf automake gawk gcc git liblzma-dev libelf-dev libdw-dev make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + +* Arch Linux + + .. code-block:: console + + $ sudo pacman -S --needed autoconf automake gawk gcc git libelf make pkgconf python python-pip python-setuptools + +Optionally, install `libkdumpfile `_ +if you want support for the `makedumpfile +`_ compressed kernel core dump +format. ``libkdumpfile`` is currently only packaged on Fedora and EPEL. For +other distributions, you must install it manually. Then, run: .. code-block:: console - $ sudo pip3 install drgn + $ git clone https://github.com/osandov/drgn.git + $ cd drgn + $ python3 setup.py build + $ sudo python3 setup.py install + +.. end-installation See the `installation documentation `_ for more options. diff --git a/docs/installation.rst b/docs/installation.rst index e2f12d96d..e60a0381e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -11,6 +11,12 @@ drgn depends on: - `Python `_ 3.6 or newer - `elfutils `_ 0.165 or newer +It optionally depends on: + +- `libkdumpfile `_ for `makedumpfile + `_ compressed kernel core dump + format support + The build requires: - `GCC `_ @@ -18,10 +24,6 @@ The build requires: - `pkgconf `_ - `setuptools `_ -.. include:: ../README.rst - :start-after: start-install-dependencies - :end-before: end-install-dependencies - Building from the Git repository (rather than a release tarball) additionally requires: @@ -30,49 +32,35 @@ requires: - `libtool `_ - `GNU Awk `_ 4.0 or newer -Simply add ``autoconf automake gawk libtool`` to the appropriate installation -command above. - -Installation ------------- +.. include:: ../README.rst + :start-after: start-installation + :end-before: end-installation .. highlight:: console -After installing dependencies, the latest release of drgn can be installed -globally with `pip `_:: - - $ sudo pip3 install drgn - $ drgn --help +Virtual Environment +^^^^^^^^^^^^^^^^^^^ -The development version can be built and installed manually:: +The above options all install drgn globally. You can also install drgn in a +`virtual environment `_, either +with pip:: - $ git clone https://github.com/osandov/drgn.git - $ cd drgn - $ python3 setup.py build - $ sudo python3 setup.py install - $ drgn --help + $ python3 -m venv drgnenv + $ source drgnenv/bin/activate + (drgnenv) $ pip3 install drgn + (drgnenv) $ drgn --help -Both of these options can be done in a `virtual environment -`_ if you do not wish to install -drgn globally:: +Or from source:: $ python3 -m venv drgnenv $ source drgnenv/bin/activate - (drgenv) $ pip3 install drgn - (drgenv) $ drgn --help + (drgnenv) $ python3 setup.py install + (drgnenv) $ drgn --help -Development ------------ +Running Locally +--------------- -For development, drgn can be built and run locally:: +If you build drgn from source, you can also run it without installing it:: - $ CFLAGS="-Wall -Werror -g -O2" python3 setup.py build_ext -i + $ python3 setup.py build_ext -i $ python3 -m drgn --help - -libkdumpfile ------------- - -drgn supports kdump-compressed kernel core dumps when `libkdumpfile -`_ is available. libkdumpfile is not -packaged for most Linux distributions, so it must be built and installed -manually. If it is installed, then drgn is automatically built with support. From d36b12c682317dcdfdbec78be85e97b17d83695d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 3 Nov 2021 17:38:31 -0700 Subject: [PATCH 071/139] CI: add Python 3.10 Python 3.10 was released in October. No changes to drgn are required. Signed-off-by: Omar Sandoval --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7601e71c..86d518b8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9, 3.8, 3.7, 3.6] + python-version: ['3.10', '3.9', '3.8', '3.7', '3.6'] cc: [gcc, clang] fail-fast: false env: From a5845e63d48d427b559dda64c3e0aa0d18b8f961 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Nov 2021 13:47:52 -0700 Subject: [PATCH 072/139] tests: fix race condition in stack trace tests Stephen Brennan reported a flaky test while working on #121: ====================================================================== ERROR: test_by_task_struct (tests.helpers.linux.test_stack_trace.TestStackTrace) ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/runner/work/drgn/drgn/tests/helpers/linux/test_stack_trace.py", line 22, in test_by_task_struct self.assertIn("pause", str(self.prog.stack_trace(find_task(self.prog, pid)))) ValueError: cannot unwind stack of running task The problem is that the stack trace tests wait for the thread state to change to "S". However, the state is updated while the thread is still technically running. For example, the pause() system call is implemented as: SYSCALL_DEFINE0(pause) { while (!signal_pending(current)) { __set_current_state(TASK_INTERRUPTIBLE); schedule(); } return -ERESTARTNOHAND; } If Program.stack_trace() accesses the thread after the state is changed but before the thread has actually been scheduled out (namely, before task_struct::on_cpu is set to 0), it will fail. Instead, let's check /proc/$pid/syscall, which contains "running" until the thread is completely scheduled out. Signed-off-by: Omar Sandoval --- tests/helpers/linux/__init__.py | 9 +++++++++ tests/helpers/linux/test_stack_trace.py | 10 +++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index f4866be85..b6508158f 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -94,6 +94,15 @@ def proc_state(pid): return re.search(r"State:\s*(\S)", f.read(), re.M).group(1) +# Return whether a process is blocked and fully scheduled out. The process +# state is updated while the process is still running, so use this instead of +# proc_state(pid) != "R" to avoid races. This is not accurate if pid is the +# calling thread. +def proc_blocked(pid): + with open(f"/proc/{pid}/syscall", "r") as f: + return f.read() != "running\n" + + def parse_range_list(s): values = set() s = s.strip() diff --git a/tests/helpers/linux/test_stack_trace.py b/tests/helpers/linux/test_stack_trace.py index 0716439b0..92c10e1d7 100644 --- a/tests/helpers/linux/test_stack_trace.py +++ b/tests/helpers/linux/test_stack_trace.py @@ -9,7 +9,7 @@ from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, - proc_state, + proc_blocked, setenv, wait_until, ) @@ -18,7 +18,7 @@ class TestStackTrace(LinuxHelperTestCase): def test_by_task_struct(self): pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) self.assertIn("pause", str(self.prog.stack_trace(find_task(self.prog, pid)))) os.kill(pid, signal.SIGKILL) os.waitpid(pid, 0) @@ -33,7 +33,7 @@ def _test_by_pid(self, orc): prog.set_kernel() prog.load_default_debug_info() pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) self.assertIn("pause", str(prog.stack_trace(pid))) os.kill(pid, signal.SIGKILL) os.waitpid(pid, 0) @@ -46,7 +46,7 @@ def test_by_pid_orc(self): def test_local_variable(self): pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) for frame in self.prog.stack_trace(pid): if frame.name in ("context_switch", "__schedule"): try: @@ -75,7 +75,7 @@ def test_registers(self): # Smoke test that we get at least one register and that # StackFrame.registers() agrees with StackFrame.register(). pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) trace = self.prog.stack_trace(pid) have_registers = False for frame in trace: From bc85c2da08b2215eea1975f0d2d1168eeab51d71 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Nov 2021 14:36:45 -0700 Subject: [PATCH 073/139] libdrgn: kdump: fix kdump_vmcoreinfo_raw() memory leak Commit dd503c975ab3 ("Fix kdump_vmcoreinfo_raw()") in libkdumpfile changed the buffer returned by kdump_vmcoreinfo_raw() to be dynamically allocated. We need to free it on versions containing that change. Closes #76. Signed-off-by: Omar Sandoval --- libdrgn/kdump.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 2bbfd5781..5f3ec485b 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -72,7 +72,6 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) struct drgn_error *err; kdump_ctx_t *ctx; kdump_status ks; - const char *vmcoreinfo; bool had_platform; ctx = kdump_new(); @@ -96,6 +95,11 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) goto err; } +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 4, 1) + char *vmcoreinfo; +#else + const char *vmcoreinfo; +#endif ks = kdump_vmcoreinfo_raw(ctx, &vmcoreinfo); if (ks != KDUMP_OK) { err = drgn_error_format(DRGN_ERROR_OTHER, @@ -109,6 +113,14 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) if (err) goto err; + /* + * As of libkdumpfile 0.4.1, the string returned by + * kdump_vmcoreinfo_raw() needs to be freed. + */ +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 4, 1) + free(vmcoreinfo); +#endif + had_platform = prog->has_platform; if (!had_platform) { struct drgn_platform platform; From 794ffc22e8ad84d85b902cb60736530ad6c013fc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Nov 2021 14:43:15 -0700 Subject: [PATCH 074/139] libdrgn: kdump: fix leak in leak fix The previous fix still leaks the vmcoreinfo buffer if parse_vmcoreinfo() fails. Fixes: bc85c2da08b2 ("libdrgn: kdump: fix kdump_vmcoreinfo_raw() memory leak") Signed-off-by: Omar Sandoval --- libdrgn/kdump.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 5f3ec485b..adb98976a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -110,9 +110,6 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) err = parse_vmcoreinfo(vmcoreinfo, strlen(vmcoreinfo) + 1, &prog->vmcoreinfo); - if (err) - goto err; - /* * As of libkdumpfile 0.4.1, the string returned by * kdump_vmcoreinfo_raw() needs to be freed. @@ -120,6 +117,8 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) #if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 4, 1) free(vmcoreinfo); #endif + if (err) + goto err; had_platform = prog->has_platform; if (!had_platform) { From d1745755f116cf4b082074b0d720ca36ae45dff4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 10 Nov 2021 15:09:29 -0800 Subject: [PATCH 075/139] Fix some include-what-you-use warnings Also: * Rename struct string to struct nstring and move it to its own header. * Fix scripts/iwyu.py, which was broken by commit 5541fad063aa ("Fix some flake8 errors"). * Add workarounds for a few outstanding include-what-you-use issues. There is still a false positive for include-what-you-use/include-what-you-use#970, but hopefully that is fixed soon. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 1 + libdrgn/bitops.h | 20 ++++++------- libdrgn/build-aux/gen_arch.awk | 2 +- libdrgn/dwarf_index.c | 7 +++-- libdrgn/dwarf_index.h | 5 +--- libdrgn/hash_table.h | 5 ++-- libdrgn/kdump.c | 1 + libdrgn/language_c.c | 5 ++-- libdrgn/nstring.h | 37 ++++++++++++++++++++++++ libdrgn/path.c | 11 ++++---- libdrgn/path.h | 11 ++++---- libdrgn/program.c | 6 ++-- libdrgn/program.h | 9 +++--- libdrgn/python/module.c | 4 +-- libdrgn/stack_trace.c | 4 +-- libdrgn/stack_trace.h | 1 + libdrgn/symbol.c | 2 +- libdrgn/util.h | 22 --------------- scripts/iwyu.imp | 6 ++++ scripts/iwyu.py | 51 ++++++++++++++++++++++++++++++---- 20 files changed, 136 insertions(+), 74 deletions(-) create mode 100644 libdrgn/nstring.h create mode 100644 scripts/iwyu.imp diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 83830cf47..734f537b1 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -55,6 +55,7 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ memory_reader.c \ memory_reader.h \ minmax.h \ + nstring.h \ object.c \ object.h \ object_index.c \ diff --git a/libdrgn/bitops.h b/libdrgn/bitops.h index 49b1b748d..7daa64437 100644 --- a/libdrgn/bitops.h +++ b/libdrgn/bitops.h @@ -38,7 +38,7 @@ * * @param[in] x Integer. */ -#define ctz(x) generic_bitop(x, PP_UNIQUE(_x), __builtin_ctz) +#define ctz(x) generic_bitop(x, PP_UNIQUE(_x), builtin_bitop_impl, ctz) /** * Find Last Set bit. @@ -60,7 +60,7 @@ * * @param[in] x Integer. */ -#define fls(x) generic_bitop(x, PP_UNIQUE(_x), fls_) +#define fls(x) generic_bitop(x, PP_UNIQUE(_x), fls_impl,) /** @cond */ /* * The straightfoward implementation is bits - clz. However, as noted by the @@ -71,21 +71,19 @@ * This doesn't do the normal macro argument safety stuff because it should only * be used via generic_bitop() which already does it. */ -#define fls_impl(x, type, suffix) \ - (x ? 1 + ((8 * sizeof(type) - 1) ^ __builtin_clz##suffix(x)) : 0) -#define fls_(x) fls_impl(x, unsigned int,) -#define fls_l(x) fls_impl(x, unsigned long, l) -#define fls_ll(x) fls_impl(x, unsigned long long, ll) +#define fls_impl(arg, suffix, x) \ + (x ? 1 + ((8 * sizeof(0u##suffix) - 1) ^ __builtin_clz##suffix(x)) : 0) -#define generic_bitop(x, unique_x, op) ({ \ +#define builtin_bitop_impl(arg, suffix, x) __builtin_##arg##suffix(x) +#define generic_bitop(x, unique_x, impl, impl_arg) ({ \ __auto_type unique_x = (x); \ _Static_assert(sizeof(unique_x) <= sizeof(unsigned long long), \ "type is too large"); \ (unsigned int)(sizeof(unique_x) <= sizeof(unsigned int) ? \ - op(unique_x) : \ + impl(impl_arg, , unique_x) : \ sizeof(unique_x) <= sizeof(unsigned long) ? \ - op##l(unique_x) : \ - op##ll(unique_x)); \ + impl(impl_arg, l, unique_x) : \ + impl(impl_arg, ll, unique_x)); \ }) /** @endcond */ diff --git a/libdrgn/build-aux/gen_arch.awk b/libdrgn/build-aux/gen_arch.awk index 102bd7165..4bda1b7d1 100644 --- a/libdrgn/build-aux/gen_arch.awk +++ b/libdrgn/build-aux/gen_arch.awk @@ -133,7 +133,7 @@ END { print "/* Generated by libdrgn/build-aux/gen_arch.awk. */" print "" - print "#include \"arch_register_layout.h\"" + print "#include \"arch_register_layout.h\" // IWYU pragma: export" print "" print "static const struct drgn_register registers[] = {" diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index f26a6030b..eac262bf2 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include +#include #include #include #include @@ -15,7 +16,6 @@ #include "drgn.h" #include "dwarf_index.h" #include "error.h" -#include "path.h" #include "platform.h" #include "util.h" @@ -188,7 +188,8 @@ struct drgn_dwarf_index_pending_die { DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) -DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_index_die_map, string_hash_pair, string_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_index_die_map, nstring_hash_pair, + nstring_eq) DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) static inline uintptr_t @@ -2717,7 +2718,7 @@ drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, if (err) return err; it->ns = ns; - struct string key = { + struct nstring key = { .str = name, .len = name_len, }; diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index b503f18ac..d612ccab7 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -38,9 +38,6 @@ static inline int omp_get_max_threads(void) #include "hash_table.h" #include "vector.h" -struct drgn_debug_info_module; -struct drgn_error; - /** * @ingroup Internals * @@ -91,7 +88,7 @@ struct drgn_dwarf_index_die { uintptr_t addr; }; -DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, uint32_t) +DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct nstring, uint32_t) DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) struct drgn_dwarf_index_shard { diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 428a78067..d82a132a5 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -27,6 +27,7 @@ #include "bitops.h" #include "cityhash.h" #include "minmax.h" +#include "nstring.h" // IWYU pragma: export #include "util.h" /** @@ -1750,8 +1751,8 @@ bool c_string_key_eq(const char * const *a, const char * const *b); #define c_string_key_eq(a, b) ((bool)(strcmp(*(a), *(b)) == 0)) #endif -/** Double hash a @ref string. */ -static inline struct hash_pair string_hash_pair(const struct string *key) +/** Double hash a @ref nstring. */ +static inline struct hash_pair nstring_hash_pair(const struct nstring *key) { return hash_pair_from_avalanching_hash(hash_bytes(key->str, key->len)); } diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index adb98976a..dd651938a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -3,6 +3,7 @@ #include #include +#include #include #include "linux_kernel.h" diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 674360fb0..b12728dcd 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1686,7 +1686,8 @@ static const char *token_spelling[] = { [C_TOKEN_ENUM] = "enum", }; -DEFINE_HASH_MAP(c_keyword_map, struct string, int, string_hash_pair, string_eq) +DEFINE_HASH_MAP(c_keyword_map, struct nstring, int, nstring_hash_pair, + nstring_eq) static struct c_keyword_map c_keywords = HASH_TABLE_INIT; @@ -1750,7 +1751,7 @@ struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, break; default: if (isalpha(*p) || *p == '_') { - struct string key; + struct nstring key; struct c_keyword_map_iterator it; do { diff --git a/libdrgn/nstring.h b/libdrgn/nstring.h new file mode 100644 index 000000000..f916044b3 --- /dev/null +++ b/libdrgn/nstring.h @@ -0,0 +1,37 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * String with length. + */ + +#ifndef DRGN_NSTRING_H +#define DRGN_NSTRING_H + +#include + +/** A string with a stored length. */ +struct nstring { + /** + * The string, which is not necessarily null-terminated and may have + * embedded null bytes. + */ + const char *str; + /** The length in bytes of the string. */ + size_t len; +}; + +/** Compare two @ref nstring keys for equality. */ +static inline bool nstring_eq(const struct nstring *a, const struct nstring *b) +{ + /* + * len == 0 is a special case because memcmp(NULL, NULL, 0) is + * technically undefined. + */ + return (a->len == b->len && + (a->len == 0 || memcmp(a->str, b->str, a->len) == 0)); +} + +#endif /* DRGN_NSTRING_H */ diff --git a/libdrgn/path.c b/libdrgn/path.c index cafc4dea0..d7bf2f46f 100644 --- a/libdrgn/path.c +++ b/libdrgn/path.c @@ -6,12 +6,13 @@ #include #include "path.h" +#include "util.h" bool path_iterator_next(struct path_iterator *it, const char **component_ret, size_t *component_len_ret) { while (it->num_components) { - struct string *cur = &it->components[it->num_components - 1]; + struct nstring *cur = &it->components[it->num_components - 1]; while (cur->len > 0) { if (cur->str[cur->len - 1] == '/') { if (cur->len == 1) { @@ -95,7 +96,7 @@ bool die_matches_filename(Dwarf_Die *die, const char *filename) if (!filename || !filename[0]) return true; - struct string die_components[2]; + struct nstring die_components[2]; struct path_iterator die_path = { .components = die_components, }; @@ -123,7 +124,7 @@ bool die_matches_filename(Dwarf_Die *die, const char *filename) die_path.num_components++; struct path_iterator needle = { - .components = (struct string []){ + .components = (struct nstring []){ { filename, strlen(filename) } }, .num_components = 1, @@ -136,13 +137,13 @@ LIBDRGN_PUBLIC bool drgn_filename_matches(const char *haystack, const char *needle) { struct path_iterator haystack_path = { - .components = (struct string []){ + .components = (struct nstring []){ { haystack, strlen(haystack) } }, .num_components = 1, }; struct path_iterator needle_path = { - .components = (struct string []){ + .components = (struct nstring []){ { needle, strlen(needle) } }, .num_components = 1, diff --git a/libdrgn/path.h b/libdrgn/path.h index 6a36349ce..d1a53a879 100644 --- a/libdrgn/path.h +++ b/libdrgn/path.h @@ -12,12 +12,11 @@ #ifndef DRGN_PATH_H #define DRGN_PATH_H +#include #include #include -#include - -#include "util.h" +#include "nstring.h" // IWYU pragma: export /** * @ingroup Internals @@ -61,10 +60,10 @@ struct path_iterator { * Array of input components. * * The input components are treated as if they were joined with a "/". - * @ref string::str and @ref string::len should be initialized for each - * component. The latter will be modified as the path is iterated. + * @ref nstring::str and @ref nstring::len should be initialized for + * each component. The latter will be modified as the path is iterated. */ - struct string *components; + struct nstring *components; /** Number of components in @ref path_iterator::components. */ size_t num_components; /** diff --git a/libdrgn/program.c b/libdrgn/program.c index 7e18157f5..90952f4e8 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -679,7 +679,7 @@ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, size_t size) { if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - struct string *entry = + struct nstring *entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector); if (!entry) return &drgn_enomem; @@ -784,7 +784,7 @@ static struct drgn_error *drgn_program_cache_prstatus(struct drgn_program *prog) struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret, + struct nstring *ret, uint32_t *tid_ret) { assert(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL); @@ -804,7 +804,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, uint32_t tid, - struct string *ret) + struct nstring *ret) { struct drgn_error *err; struct drgn_prstatus_map_iterator it; diff --git a/libdrgn/program.h b/libdrgn/program.h index ed1217bcf..741913a17 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -28,7 +28,6 @@ #include "type.h" #include "vector.h" -struct drgn_debug_info; struct drgn_symbol; /** @@ -66,8 +65,8 @@ struct vmcoreinfo { }; DEFINE_VECTOR_TYPE(drgn_typep_vector, struct drgn_type *) -DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct string) -DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct string) +DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct nstring) +DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct nstring) struct drgn_program { /** @privatesection */ @@ -268,7 +267,7 @@ drgn_program_address_mask(const struct drgn_program *prog, uint64_t *ret) */ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret, + struct nstring *ret, uint32_t *tid_ret); /** @@ -281,7 +280,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, */ struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, uint32_t tid, - struct string *ret); + struct nstring *ret); /** * Cache the @c NT_PRSTATUS note provided by @p data in @p prog. diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 449e6f7ea..8d3260780 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -43,7 +43,7 @@ static PyObject *filename_matches(PyObject *self, PyObject *args, return NULL; struct path_iterator haystack = { - .components = (struct string [1]){}, + .components = (struct nstring [1]){}, .num_components = 0, }; if (haystack_arg.path) { @@ -52,7 +52,7 @@ static PyObject *filename_matches(PyObject *self, PyObject *args, haystack.num_components = 1; } struct path_iterator needle = { - .components = (struct string [1]){}, + .components = (struct nstring [1]){}, .num_components = 0, }; if (needle_arg.path) { diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 4304dba54..be43f1601 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -14,9 +14,9 @@ #include "debug_info.h" #include "drgn.h" #include "error.h" -#include "hash_table.h" #include "helpers.h" #include "minmax.h" +#include "nstring.h" #include "platform.h" #include "program.h" #include "register_state.h" @@ -525,7 +525,7 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, struct drgn_error *err; struct drgn_object obj; struct drgn_object tmp; - struct string prstatus; + struct nstring prstatus; drgn_object_init(&obj, prog); drgn_object_init(&tmp, prog); diff --git a/libdrgn/stack_trace.h b/libdrgn/stack_trace.h index 1a932a1a8..a2eb9a254 100644 --- a/libdrgn/stack_trace.h +++ b/libdrgn/stack_trace.h @@ -12,6 +12,7 @@ #ifndef DRGN_STACK_TRACE_H #define DRGN_STACK_TRACE_H +#include #include /** diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index f48925391..b2aae8444 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -#include +#include #include #include diff --git a/libdrgn/util.h b/libdrgn/util.h index 7f4c84a11..ff3a9c9dd 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -145,26 +145,4 @@ static inline uint64_t uint_max(int n) #define add_to_possibly_null_pointer(ptr, i) \ ((typeof(ptr))((uintptr_t)(ptr) + (i) * sizeof(*(ptr)))) -/** A string with a stored length. */ -struct string { - /** - * The string, which is not necessarily null-terminated and may have - * embedded null bytes. - */ - const char *str; - /** The length in bytes of the string. */ - size_t len; -}; - -/** Compare two @ref string keys for equality. */ -static inline bool string_eq(const struct string *a, const struct string *b) -{ - /* - * len == 0 is a special case because memcmp(NULL, NULL, 0) is - * technically undefined. - */ - return (a->len == b->len && - (a->len == 0 || memcmp(a->str, b->str, a->len) == 0)); -} - #endif /* DRGN_UTIL_H */ diff --git a/scripts/iwyu.imp b/scripts/iwyu.imp new file mode 100644 index 000000000..a1d0d181e --- /dev/null +++ b/scripts/iwyu.imp @@ -0,0 +1,6 @@ +[ + # include-what-you-use/include-what-you-use#967 + { include: [ "", public, "", public ] }, + # include-what-you-use/include-what-you-use#968 + { include: [ "", private, "", public ] }, +] diff --git a/scripts/iwyu.py b/scripts/iwyu.py index 85c684d51..07423d0fb 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -122,7 +122,7 @@ def gen_python_mapping_file(mapping_path): # For some reason, include-what-you-mean wants struct _typeobject, but # find-all-symbols only reports PyTypeObject. Add it manually. imp.write( - ' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' + ' {"symbol": ["_typeobject", "private", "", "public"]}, # From cpython/object.h\n' ) imp.write("]\n") @@ -130,6 +130,44 @@ def gen_python_mapping_file(mapping_path): os.rename(mapping_path + ".tmp", mapping_path) +def iwyu_associated_header(path): + with open(path, "r") as f: + match = re.search( + r'^\s*#\s*include\s+"([^"]+)"\s+//\s+IWYU\s+pragma:\s+associated', + f.read(), + re.M, + ) + if match: + return os.path.join(os.path.dirname(path), match.group(1)) + if path.endswith(".c"): + return path[:-2] + ".h" + return None + + +def ignore_line(path, state, line): + # include-what-you-use/include-what-you-use#969: iwyu recommends bogus + # forward declarations for the anonymous unions generated by + # BINARY_OP_SIGNED_2C. + if line.endswith("::;"): + return True + + # include-what-you-use/include-what-you-use#971: drgn.h "exports" a forward + # declaration of several opaque types, but iwyu doesn't have such a notion. + if re.fullmatch( + r"struct drgn_(language|platform|program|register|stack_trace|symbol);", line + ): + paths = [path] + associated_header = iwyu_associated_header(path) + if associated_header is not None: + paths.append(associated_header) + for path in paths: + with open(path, "r") as f: + if re.search(r'^#include "(drgn.h|drgnpy.h)"', f.read(), re.M): + return True + + return False + + def main(): parser = argparse.ArgumentParser(description="run include-what-you-use on drgn") parser.add_argument( @@ -181,6 +219,8 @@ def main(): + [ "-Xiwyu", "--mapping_file=" + os.path.abspath(python_mapping_file), + "-Xiwyu", + "--mapping_file=" + os.path.abspath("scripts/iwyu.imp"), "-w", # We don't want warnings from Clang. ], cwd=command["directory"], @@ -209,14 +249,15 @@ def main(): else: header = None lines.clear() - elif state != "include_list" and line: + elif ( + line + and state != "include_list" + and not ignore_line(path, state, line) + ): if header is not None: print("\n" + header) header = None print(line) - print( - "Please ignore suggestions to declare opaque types if the appropriate header has already been included." - ) if __name__ == "__main__": From abc3ee4da0a652172b8ead878dadc3c31faddc99 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 01:12:14 -0800 Subject: [PATCH 076/139] libdrgn: dwarf_index: clean up index_die() index_die() can only fail if it's out of memory, so return a bool instead of a struct drgn_error. Also clean up the declarations. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 63 ++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index eac262bf2..63b3df9ef 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2082,44 +2082,31 @@ static bool append_die_entry(struct drgn_dwarf_index *dindex, return true; } -static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index_cu *cu, - const char *name, uint8_t tag, - uint64_t file_name_hash, - struct drgn_debug_info_module *module, - uintptr_t addr) +static bool index_die(struct drgn_dwarf_index_namespace *ns, + struct drgn_dwarf_index_cu *cu, const char *name, + uint8_t tag, uint64_t file_name_hash, + struct drgn_debug_info_module *module, uintptr_t addr) { - struct drgn_error *err; + bool success = false; struct drgn_dwarf_index_die_map_entry entry = { - .key = { - .str = name, - .len = strlen(name), - }, + .key = { name, strlen(name) }, }; - struct hash_pair hp; - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die_map_iterator it; - size_t index; - struct drgn_dwarf_index_die *die; - - hp = drgn_dwarf_index_die_map_hash(&entry.key); - shard = &ns->shards[hash_pair_to_shard(hp)]; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&entry.key); + struct drgn_dwarf_index_shard *shard = + &ns->shards[hash_pair_to_shard(hp)]; omp_set_lock(&shard->lock); - it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, - hp); + struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, + hp); + struct drgn_dwarf_index_die *die; if (!it.entry) { if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, - module, addr)) { - err = &drgn_enomem; + module, addr)) goto err; - } entry.value = shard->dies.size - 1; if (!drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, - NULL)) { - err = &drgn_enomem; + &entry, hp, NULL)) goto err; - } die = &shard->dies.data[shard->dies.size - 1]; goto out; } @@ -2136,29 +2123,25 @@ static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, die = &shard->dies.data[die->next]; } - index = die - shard->dies.data; + size_t index = die - shard->dies.data; if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, module, - addr)) { - err = &drgn_enomem; + addr)) goto err; - } die = &shard->dies.data[shard->dies.size - 1]; shard->dies.data[index].next = shard->dies.size - 1; out: if (tag == DW_TAG_namespace) { struct drgn_dwarf_index_pending_die *pending = drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); - if (!pending) { - err = &drgn_enomem; + if (!pending) goto err; - } pending->cu = cu - ns->dindex->cus.data; pending->addr = addr; } - err = NULL; + success = true; err: omp_unset_lock(&shard->lock); - return err; + return success; } /* Second pass: index the actual DIEs. */ @@ -2475,9 +2458,9 @@ indirect_insn:; } else { file_name_hash = 0; } - if ((err = index_die(ns, cu, name, tag, file_name_hash, - module, die_addr))) - return err; + if (!index_die(ns, cu, name, tag, file_name_hash, + module, die_addr)) + return &drgn_enomem; } next: From 4b3eec40df653a468e9e1f84da954eeea5e6dc23 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 01:14:28 -0800 Subject: [PATCH 077/139] libdrgn: dwarf_index: fix hash table insertion error check table_insert_searched() returns -1 when insertion fails. Fixes: d1beb0184ac1 ("libdrgn: add support for objects in C++ namespaces") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 63b3df9ef..3619486fc 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2104,8 +2104,9 @@ static bool index_die(struct drgn_dwarf_index_namespace *ns, module, addr)) goto err; entry.value = shard->dies.size - 1; - if (!drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, NULL)) + if (drgn_dwarf_index_die_map_insert_searched(&shard->map, + &entry, hp, + NULL) < 0) goto err; die = &shard->dies.data[shard->dies.size - 1]; goto out; From 64c4afa298be117a298bd488507a3c0c24082613 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 01:18:49 -0800 Subject: [PATCH 078/139] libdrgn: type: fix hash table insertion error check table_insert_searched() returns -1 when insertion fails. Fixes: a97f6c4fa2bb ("Associate types with program") Signed-off-by: Omar Sandoval --- libdrgn/type.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdrgn/type.c b/libdrgn/type.c index cd91f6865..d561089e8 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -352,8 +352,8 @@ static struct drgn_error *find_or_create_type(struct drgn_type *key, return &drgn_enomem; *type = *key; - if (!drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, - hp, NULL)) { + if (drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, hp, + NULL) < 0) { free(type); return &drgn_enomem; } From 40357b9d9e1ac1c33a3d296f8242027ee1898a68 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 01:22:01 -0800 Subject: [PATCH 079/139] libdrgn: debug_info: don't use strlen() in drgn_debug_info_find_object() The length of the name was passed, and the name may not be null-terminated. Fixes: 565e0343ef93 ("libdrgn: make symbol index pluggable with callbacks") Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 07551dcb7..afa747eb4 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -4837,7 +4837,7 @@ drgn_debug_info_find_object(const char *name, size_t name_len, tags[num_tags++] = DW_TAG_variable; struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, ns, name, strlen(name), tags, + err = drgn_dwarf_index_iterator_init(&it, ns, name, name_len, tags, num_tags); if (err) return err; From 12ddb87c2632de405156d95643dc2958d30709c0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 17:27:01 -0800 Subject: [PATCH 080/139] libdrgn: dwarf_info: simplify DWARF index iterator code We can save a pointer to the shard itself instead of the namespace and shard index. We can also simplify drgn_dwarf_index_iterator_next() further. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 33 ++++++++++----------------------- libdrgn/dwarf_index.h | 3 +-- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 3619486fc..575320d88 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2701,16 +2701,12 @@ drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, struct drgn_error *err = index_namespace(ns); if (err) return err; - it->ns = ns; - struct nstring key = { - .str = name, - .len = name_len, - }; + struct nstring key = { name, name_len }; struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); - it->shard = hash_pair_to_shard(hp); - struct drgn_dwarf_index_shard *shard = &ns->shards[it->shard]; + it->shard = &ns->shards[hash_pair_to_shard(hp)]; struct drgn_dwarf_index_die_map_iterator map_it = - drgn_dwarf_index_die_map_search_hashed(&shard->map, &key, hp); + drgn_dwarf_index_die_map_search_hashed(&it->shard->map, + &key, hp); it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; it->tags = tags; it->num_tags = num_tags; @@ -2721,11 +2717,9 @@ static inline bool drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, struct drgn_dwarf_index_die *die) { - size_t i; - if (it->num_tags == 0) return true; - for (i = 0; i < it->num_tags; i++) { + for (size_t i = 0; i < it->num_tags; i++) { if (die->tag == it->tags[i]) return true; } @@ -2735,21 +2729,14 @@ drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, struct drgn_dwarf_index_die * drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { - struct drgn_dwarf_index_namespace *ns = it->ns; - struct drgn_dwarf_index_die *die; - for (;;) { - if (it->index == UINT32_MAX) - return NULL; - - struct drgn_dwarf_index_shard *shard = &ns->shards[it->shard]; - die = &shard->dies.data[it->index]; - + while (it->index != UINT32_MAX) { + struct drgn_dwarf_index_die *die = + &it->shard->dies.data[it->index]; it->index = die->next; - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - break; + return die; } - return die; + return NULL; } struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index d612ccab7..b38643b23 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -225,10 +225,9 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state); */ struct drgn_dwarf_index_iterator { /** @privatesection */ - struct drgn_dwarf_index_namespace *ns; const uint64_t *tags; size_t num_tags; - size_t shard; + struct drgn_dwarf_index_shard *shard; uint32_t index; }; From 2642f85a1a970bac5343021e9d866f1a646e4bcd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 17:21:50 -0800 Subject: [PATCH 081/139] libdrgn: dwarf_index: avoid OpenMP when accessing indexed namespace index_namespace() sets up an OpenMP loop everytime it is called. However, if the namespace has no pending DIEs, this is unnecessary overhead for every DWARF index lookup. Bail early if there are no pending DIEs (i.e., because we already indexed the namespace). In a microbenchmark, this was a 10x speed improvement for DWARF index iterator initialization. For a Python prog.type() lookup benchmark, it was a 10% speedup. Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 575320d88..2a95a0c87 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2655,6 +2655,9 @@ drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) { + if (ns->pending_dies.size == 0) + return NULL; + if (ns->saved_err) return drgn_error_copy(ns->saved_err); @@ -2701,13 +2704,18 @@ drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, struct drgn_error *err = index_namespace(ns); if (err) return err; - struct nstring key = { name, name_len }; - struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); - it->shard = &ns->shards[hash_pair_to_shard(hp)]; - struct drgn_dwarf_index_die_map_iterator map_it = - drgn_dwarf_index_die_map_search_hashed(&it->shard->map, - &key, hp); - it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; + if (ns->shards) { + struct nstring key = { name, name_len }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); + it->shard = &ns->shards[hash_pair_to_shard(hp)]; + struct drgn_dwarf_index_die_map_iterator map_it = + drgn_dwarf_index_die_map_search_hashed(&it->shard->map, + &key, hp); + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; + } else { + it->shard = NULL; + it->index = UINT32_MAX; + } it->tags = tags; it->num_tags = num_tags; return NULL; From a90ffdfb67c53a1f89590c750602fd684381eedb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 17 Nov 2021 17:34:04 -0800 Subject: [PATCH 082/139] libdrgn: dwarf_index: actually index namespaces in parallel index_namespace() uses `#pragma omp for` instead of `#pragma omp parallel for`, and it's not already in a parallel section. So, we're indexing namespaces single-threaded, despite sharding the index. Oops. Fixes: d1beb0184ac1 ("libdrgn: add support for objects in C++ namespaces") Signed-off-by: Omar Sandoval --- libdrgn/dwarf_index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 2a95a0c87..3fcdb8d68 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -2665,7 +2665,7 @@ static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) return &drgn_enomem; struct drgn_error *err = NULL; - #pragma omp for schedule(dynamic) + #pragma omp parallel for schedule(dynamic) for (size_t i = 0; i < ns->pending_dies.size; i++) { if (!err) { struct drgn_dwarf_index_pending_die *pending = From 3700bb75b84ce23ea374fa53fccc897b858c787c Mon Sep 17 00:00:00 2001 From: Jay Kamat Date: Thu, 11 Feb 2021 15:11:34 -0800 Subject: [PATCH 083/139] libdrgn: Follow typedefs in enum backing type lookup In C++ enums can be a typedef to an int, not just an int itself. Signed-off-by: Jay Kamat --- libdrgn/debug_info.c | 2 +- tests/test_dwarf.py | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index afa747eb4..e8fbbebfe 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -4194,7 +4194,7 @@ drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, &qualified_compatible_type); if (err) goto err; - compatible_type = qualified_compatible_type.type; + compatible_type = drgn_underlying_type(qualified_compatible_type.type); if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { err = drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index d266bd55f..165f24b58 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1723,6 +1723,65 @@ def test_enum(self): ), ) + def test_enum_typedef(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), + ), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "__uint32_t"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + unsigned_int_die, + ) + ) + ) + self.assertIdentical( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) + def test_enum_anonymous(self): prog = dwarf_program( wrap_test_type_dies( From c6b2bc41811e735a11978a9ae4fc4e03bde75fa8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 8 Nov 2021 15:39:39 -0800 Subject: [PATCH 084/139] libdrgn: debug_info: split ORC support into its own file debug_info.c currently contains code for managing ELF files with debugging information, for parsing DWARF, and for parsing ORC. Let's split it up, starting by moving ORC support to its own file. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 + libdrgn/debug_info.c | 306 +----------------------------------------- libdrgn/debug_info.h | 52 +++----- libdrgn/orc_info.c | 309 +++++++++++++++++++++++++++++++++++++++++++ libdrgn/orc_info.h | 76 +++++++++++ 5 files changed, 407 insertions(+), 338 deletions(-) create mode 100644 libdrgn/orc_info.c create mode 100644 libdrgn/orc_info.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 734f537b1..17afad084 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -61,6 +61,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ object_index.c \ object_index.h \ orc.h \ + orc_info.c \ + orc_info.h \ path.c \ path.h \ platform.c \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index e8fbbebfe..e3eb9a7e9 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -28,7 +28,6 @@ #include "linux_kernel.h" #include "minmax.h" #include "object.h" -#include "orc.h" #include "path.h" #include "program.h" #include "register_state.h" @@ -60,15 +59,6 @@ struct drgn_dwarf_cie { size_t initial_instructions_size; }; -struct drgn_dwarf_fde { - uint64_t initial_location; - uint64_t address_range; - /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ - size_t cie; - const char *instructions; - size_t instructions_size; -}; - DEFINE_VECTOR(drgn_dwarf_fde_vector, struct drgn_dwarf_fde) DEFINE_VECTOR(drgn_dwarf_cie_vector, struct drgn_dwarf_cie) DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, @@ -701,8 +691,7 @@ drgn_debug_info_module_destroy(struct drgn_debug_info_module *module) { if (module) { drgn_error_destroy(module->err); - free(module->orc_entries); - free(module->orc_pc_offsets); + drgn_orc_module_info_deinit(module); free(module->fdes); free(module->cies); elf_end(module->elf); @@ -1421,7 +1410,7 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) return NULL; } -static struct drgn_error * +struct drgn_error * drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, enum drgn_debug_info_scn scn) { @@ -5871,297 +5860,6 @@ drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, return NULL; } -/* - * Get the program counter of an ORC entry directly from the .orc_unwind_ip - * section. - */ -static inline uint64_t drgn_raw_orc_pc(struct drgn_debug_info_module *module, - size_t i) -{ - int32_t offset; - memcpy(&offset, - (int32_t *)module->scn_data[DRGN_SCN_ORC_UNWIND_IP]->d_buf + i, - sizeof(offset)); - if (drgn_platform_bswap(&module->platform)) - offset = bswap_32(offset); - return module->orc_pc_base + UINT64_C(4) * i + offset; -} - -static int compare_orc_entries(const void *a, const void *b, void *arg) -{ - struct drgn_debug_info_module *module = arg; - size_t index_a = *(size_t *)a; - size_t index_b = *(size_t *)b; - - uint64_t pc_a = drgn_raw_orc_pc(module, index_a); - uint64_t pc_b = drgn_raw_orc_pc(module, index_b); - if (pc_a < pc_b) - return -1; - else if (pc_a > pc_b) - return 1; - - /* - * If two entries have the same PC, then one is probably a "terminator" - * at the end of a compilation unit. Prefer the real entry. - */ - const struct drgn_orc_entry *entries = - module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; - uint16_t flags_a, flags_b; - memcpy(&flags_a, &entries[index_a].flags, sizeof(flags_a)); - memcpy(&flags_b, &entries[index_b].flags, sizeof(flags_b)); - if (drgn_platform_bswap(&module->platform)) { - flags_a = bswap_16(flags_a); - flags_b = bswap_16(flags_b); - } - return (drgn_orc_flags_is_terminator(flags_b) - - drgn_orc_flags_is_terminator(flags_a)); -} - -static size_t keep_orc_entry(struct drgn_debug_info_module *module, - size_t *indices, size_t num_entries, size_t i) -{ - - const struct drgn_orc_entry *entries = - module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; - if (num_entries > 0 && - memcmp(&entries[indices[num_entries - 1]], &entries[indices[i]], - sizeof(entries[0])) == 0) { - /* - * The previous entry is identical to this one, so we can skip - * this entry (which effectively merges it into the previous - * one). This usually happens for "terminator" entries. - */ - return num_entries; - } - indices[num_entries] = indices[i]; - return num_entries + 1; -} - -/* - * The vast majority of ORC entries are redundant with DWARF CFI, and it's a - * waste to store and binary search those entries. This removes ORC entries that - * are entirely shadowed by DWARF FDEs. - */ -static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module, - size_t *indices, size_t num_entries) -{ - if (module->num_fdes == 0) - return num_entries; - - struct drgn_dwarf_fde *fde = module->fdes; - struct drgn_dwarf_fde *last_fde = &module->fdes[module->num_fdes - 1]; - - size_t new_num_entries = 0; - - /* Keep any entries that start before the first DWARF FDE. */ - uint64_t start_pc; - for (;;) { - start_pc = drgn_raw_orc_pc(module, new_num_entries); - if (fde->initial_location <= start_pc) - break; - new_num_entries++; - if (new_num_entries == num_entries) - return num_entries; - } - - for (size_t i = new_num_entries; i < num_entries - 1; i++) { - uint64_t end_pc = drgn_raw_orc_pc(module, i + 1); - - /* - * Find the last FDE that starts at or before the current ORC - * entry. - */ - while (fde != last_fde && fde[1].initial_location <= start_pc) - fde++; - - /* - * Check whether the current ORC entry is completely covered by - * one or more FDEs. - */ - while (end_pc - fde->initial_location > fde->address_range) { - /* - * The current FDE doesn't cover the current ORC entry. - */ - if (fde == last_fde) { - /* - * There are no more FDEs. Keep the remaining - * ORC entries. - */ - if (i != new_num_entries) { - memmove(&indices[new_num_entries], - &indices[i], - (num_entries - i) * - sizeof(indices[0])); - } - return new_num_entries + (num_entries - i); - } - if (fde[1].initial_location - fde->initial_location - > fde->address_range) { - /* - * There is a gap between the current FDE and - * the next FDE that exposes the current ORC - * entry. Keep it. - */ - new_num_entries = keep_orc_entry(module, - indices, - new_num_entries, - i); - break; - } - fde++; - } - - start_pc = end_pc; - } - /* We don't know where the last ORC entry ends, so always keep it. */ - return keep_orc_entry(module, indices, new_num_entries, - num_entries - 1); -} - -static struct drgn_error * -drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) -{ - struct drgn_error *err; - - if (!module->platform.arch->orc_to_cfi || - !module->scns[DRGN_SCN_ORC_UNWIND_IP] || - !module->scns[DRGN_SCN_ORC_UNWIND]) - return NULL; - - GElf_Shdr shdr_mem, *shdr; - shdr = gelf_getshdr(module->scns[DRGN_SCN_ORC_UNWIND_IP], &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - module->orc_pc_base = shdr->sh_addr; - - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_ORC_UNWIND_IP); - if (err) - return err; - err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); - if (err) - return err; - Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; - Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; - - size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); - if (orc_unwind_ip->d_size % sizeof(int32_t) != 0 || - orc_unwind->d_size % sizeof(struct drgn_orc_entry) != 0 || - orc_unwind->d_size / sizeof(struct drgn_orc_entry) != num_entries) { - return drgn_error_create(DRGN_ERROR_OTHER, - ".orc_unwind_ip and/or .orc_unwind has invalid size"); - } - if (!num_entries) - return NULL; - - size_t *indices = malloc_array(num_entries, sizeof(indices[0])); - if (!indices) - return &drgn_enomem; - for (size_t i = 0; i < num_entries; i++) - indices[i] = i; - - /* - * Sort the ORC entries for binary search. Since Linux kernel commit - * f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables - * sorting") (in v5.6), this is already sorted for vmlinux, so only sort - * it if necessary. - */ - for (size_t i = 1; i < num_entries; i++) { - if (compare_orc_entries(&indices[i - 1], &indices[i], - module) > 0) { - qsort_r(indices, num_entries, sizeof(indices[0]), - compare_orc_entries, module); - break; - } - } - - num_entries = remove_fdes_from_orc(module, indices, num_entries); - - int32_t *pc_offsets = malloc_array(num_entries, sizeof(pc_offsets[0])); - if (!pc_offsets) { - err = &drgn_enomem; - goto out; - } - struct drgn_orc_entry *entries = malloc_array(num_entries, - sizeof(entries[0])); - if (!entries) { - free(pc_offsets); - err = &drgn_enomem; - goto out; - } - const int32_t *orig_offsets = orc_unwind_ip->d_buf; - const struct drgn_orc_entry *orig_entries = orc_unwind->d_buf; - bool bswap = drgn_platform_bswap(&module->platform); - for (size_t i = 0; i < num_entries; i++) { - size_t index = indices[i]; - int32_t offset; - memcpy(&offset, &orig_offsets[index], sizeof(offset)); - struct drgn_orc_entry entry; - memcpy(&entry, &orig_entries[index], sizeof(entry)); - if (bswap) { - offset = bswap_32(offset); - entry.sp_offset = bswap_16(entry.sp_offset); - entry.bp_offset = bswap_16(entry.bp_offset); - entry.flags = bswap_16(entry.flags); - } - pc_offsets[i] = UINT64_C(4) * index + offset - UINT64_C(4) * i; - entries[i] = entry; - } - - module->orc_pc_offsets = pc_offsets; - module->orc_entries = entries; - module->num_orc_entries = num_entries; - - err = NULL; -out: - free(indices); - return err; -} - -static inline uint64_t drgn_orc_pc(struct drgn_debug_info_module *module, - size_t i) -{ - return module->orc_pc_base + UINT64_C(4) * i + module->orc_pc_offsets[i]; -} - -static struct drgn_error * -drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, - struct drgn_cfi_row **row_ret, - bool *interrupted_ret, - drgn_register_number *ret_addr_regno_ret) -{ - struct drgn_error *err; - - if (!module->parsed_orc) { - err = drgn_debug_info_parse_orc(module); - if (err) - return err; - module->parsed_orc = true; - } - - /* - * We don't know the maximum program counter covered by the ORC data, - * but the last entry seems to always be a terminator, so it doesn't - * matter. All addresses beyond the max will fall into the last entry. - */ - if (!module->num_orc_entries || unbiased_pc < drgn_orc_pc(module, 0)) - return &drgn_not_found; - size_t lo = 0, hi = module->num_orc_entries, found = 0; - while (lo < hi) { - size_t mid = lo + (hi - lo) / 2; - if (drgn_orc_pc(module, mid) <= unbiased_pc) { - found = mid; - lo = mid + 1; - } else { - hi = mid; - } - } - return module->platform.arch->orc_to_cfi(&module->orc_entries[found], - row_ret, interrupted_ret, - ret_addr_regno_ret); -} - struct drgn_error * drgn_debug_info_module_find_cfi(struct drgn_program *prog, struct drgn_debug_info_module *module, diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index b53ac8ae3..c3fca253e 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -21,6 +21,7 @@ #include "drgn.h" #include "dwarf_index.h" #include "hash_table.h" +#include "orc_info.h" #include "platform.h" #include "string_builder.h" #include "vector.h" @@ -34,8 +35,8 @@ struct drgn_register_state; * * Caching of debugging information. * - * @ref drgn_debug_info caches debugging information (currently only DWARF). It - * translates the debugging information to types and objects. + * @ref drgn_debug_info caches debugging information (currently DWARF and ORC). + * It translates the debugging information to types and objects. * * @{ */ @@ -80,6 +81,15 @@ enum drgn_debug_info_scn { DRGN_NUM_DEBUG_SCNS, }; +struct drgn_dwarf_fde { + uint64_t initial_location; + uint64_t address_range; + /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ + size_t cie; + const char *instructions; + size_t instructions_size; +}; + /** * A module reported to a @ref drgn_debug_info. * @@ -119,38 +129,8 @@ struct drgn_debug_info_module { /** Number of elements in @ref drgn_debug_info_module::fdes. */ size_t num_fdes; - /** - * Base for calculating program counter corresponding to an ORC unwinder - * entry. - * - * This is the address of the `.orc_unwind_ip` ELF section. - * - * @sa drgn_debug_info_module::orc_entries - */ - uint64_t orc_pc_base; - /** - * Offsets for calculating program counter corresponding to an ORC - * unwinder entry. - * - * This is the contents of the `.orc_unwind_ip` ELF section, byte - * swapped to the host's byte order if necessary. - * - * @sa drgn_debug_info_module::orc_entries - */ - int32_t *orc_pc_offsets; - /** - * ORC unwinder entries. - * - * This is the contents of the `.orc_unwind` ELF section, byte swapped - * to the host's byte order if necessary. - * - * Entry `i` specifies how to unwind the stack if - * `orc_pc(i) <= PC < orc_pc(i + 1)`, where - * `orc_pc(i) = orc_pc_base + 4 * i + orc_pc_offsets[i]`. - */ - struct drgn_orc_entry *orc_entries; - /** Number of ORC unwinder entries. */ - size_t num_orc_entries; + /** ORC unwinder information. */ + struct drgn_orc_module_info orc; /** Whether .debug_frame and .eh_frame have been parsed. */ bool parsed_frames; @@ -179,6 +159,10 @@ struct drgn_debug_info_module { struct drgn_debug_info_module *next; }; +struct drgn_error * +drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn); + struct drgn_error * drgn_error_debug_info_scn(struct drgn_debug_info_module *module, enum drgn_debug_info_scn scn, const char *ptr, diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c new file mode 100644 index 000000000..223ced13d --- /dev/null +++ b/libdrgn/orc_info.c @@ -0,0 +1,309 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include + +#include "debug_info.h" // IWYU pragma: associated +#include "error.h" +#include "orc.h" +#include "util.h" + +void drgn_orc_module_info_deinit(struct drgn_debug_info_module *module) +{ + free(module->orc.entries); + free(module->orc.pc_offsets); +} + +/* + * Get the program counter of an ORC entry directly from the .orc_unwind_ip + * section. + */ +static inline uint64_t drgn_raw_orc_pc(struct drgn_debug_info_module *module, + size_t i) +{ + int32_t offset; + memcpy(&offset, + (int32_t *)module->scn_data[DRGN_SCN_ORC_UNWIND_IP]->d_buf + i, + sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_32(offset); + return module->orc.pc_base + UINT64_C(4) * i + offset; +} + +static int compare_orc_entries(const void *a, const void *b, void *arg) +{ + struct drgn_debug_info_module *module = arg; + size_t index_a = *(size_t *)a; + size_t index_b = *(size_t *)b; + + uint64_t pc_a = drgn_raw_orc_pc(module, index_a); + uint64_t pc_b = drgn_raw_orc_pc(module, index_b); + if (pc_a < pc_b) + return -1; + else if (pc_a > pc_b) + return 1; + + /* + * If two entries have the same PC, then one is probably a "terminator" + * at the end of a compilation unit. Prefer the real entry. + */ + const struct drgn_orc_entry *entries = + module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; + uint16_t flags_a, flags_b; + memcpy(&flags_a, &entries[index_a].flags, sizeof(flags_a)); + memcpy(&flags_b, &entries[index_b].flags, sizeof(flags_b)); + if (drgn_platform_bswap(&module->platform)) { + flags_a = bswap_16(flags_a); + flags_b = bswap_16(flags_b); + } + return (drgn_orc_flags_is_terminator(flags_b) + - drgn_orc_flags_is_terminator(flags_a)); +} + +static size_t keep_orc_entry(struct drgn_debug_info_module *module, + size_t *indices, size_t num_entries, size_t i) +{ + + const struct drgn_orc_entry *entries = + module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; + if (num_entries > 0 && + memcmp(&entries[indices[num_entries - 1]], &entries[indices[i]], + sizeof(entries[0])) == 0) { + /* + * The previous entry is identical to this one, so we can skip + * this entry (which effectively merges it into the previous + * one). This usually happens for "terminator" entries. + */ + return num_entries; + } + indices[num_entries] = indices[i]; + return num_entries + 1; +} + +/* + * The vast majority of ORC entries are redundant with DWARF CFI, and it's a + * waste to store and binary search those entries. This removes ORC entries that + * are entirely shadowed by DWARF FDEs. + */ +static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module, + size_t *indices, size_t num_entries) +{ + if (module->num_fdes == 0) + return num_entries; + + struct drgn_dwarf_fde *fde = module->fdes; + struct drgn_dwarf_fde *last_fde = &module->fdes[module->num_fdes - 1]; + + size_t new_num_entries = 0; + + /* Keep any entries that start before the first DWARF FDE. */ + uint64_t start_pc; + for (;;) { + start_pc = drgn_raw_orc_pc(module, new_num_entries); + if (fde->initial_location <= start_pc) + break; + new_num_entries++; + if (new_num_entries == num_entries) + return num_entries; + } + + for (size_t i = new_num_entries; i < num_entries - 1; i++) { + uint64_t end_pc = drgn_raw_orc_pc(module, i + 1); + + /* + * Find the last FDE that starts at or before the current ORC + * entry. + */ + while (fde != last_fde && fde[1].initial_location <= start_pc) + fde++; + + /* + * Check whether the current ORC entry is completely covered by + * one or more FDEs. + */ + while (end_pc - fde->initial_location > fde->address_range) { + /* + * The current FDE doesn't cover the current ORC entry. + */ + if (fde == last_fde) { + /* + * There are no more FDEs. Keep the remaining + * ORC entries. + */ + if (i != new_num_entries) { + memmove(&indices[new_num_entries], + &indices[i], + (num_entries - i) * + sizeof(indices[0])); + } + return new_num_entries + (num_entries - i); + } + if (fde[1].initial_location - fde->initial_location + > fde->address_range) { + /* + * There is a gap between the current FDE and + * the next FDE that exposes the current ORC + * entry. Keep it. + */ + new_num_entries = keep_orc_entry(module, + indices, + new_num_entries, + i); + break; + } + fde++; + } + + start_pc = end_pc; + } + /* We don't know where the last ORC entry ends, so always keep it. */ + return keep_orc_entry(module, indices, new_num_entries, + num_entries - 1); +} + +static struct drgn_error * +drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + if (!module->platform.arch->orc_to_cfi || + !module->scns[DRGN_SCN_ORC_UNWIND_IP] || + !module->scns[DRGN_SCN_ORC_UNWIND]) + return NULL; + + GElf_Shdr shdr_mem, *shdr; + shdr = gelf_getshdr(module->scns[DRGN_SCN_ORC_UNWIND_IP], &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + module->orc.pc_base = shdr->sh_addr; + + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_ORC_UNWIND_IP); + if (err) + return err; + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); + if (err) + return err; + Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; + Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; + + size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); + if (orc_unwind_ip->d_size % sizeof(int32_t) != 0 || + orc_unwind->d_size % sizeof(struct drgn_orc_entry) != 0 || + orc_unwind->d_size / sizeof(struct drgn_orc_entry) != num_entries) { + return drgn_error_create(DRGN_ERROR_OTHER, + ".orc_unwind_ip and/or .orc_unwind has invalid size"); + } + if (!num_entries) + return NULL; + + size_t *indices = malloc_array(num_entries, sizeof(indices[0])); + if (!indices) + return &drgn_enomem; + for (size_t i = 0; i < num_entries; i++) + indices[i] = i; + + /* + * Sort the ORC entries for binary search. Since Linux kernel commit + * f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables + * sorting") (in v5.6), this is already sorted for vmlinux, so only sort + * it if necessary. + */ + for (size_t i = 1; i < num_entries; i++) { + if (compare_orc_entries(&indices[i - 1], &indices[i], + module) > 0) { + qsort_r(indices, num_entries, sizeof(indices[0]), + compare_orc_entries, module); + break; + } + } + + num_entries = remove_fdes_from_orc(module, indices, num_entries); + + int32_t *pc_offsets = malloc_array(num_entries, sizeof(pc_offsets[0])); + if (!pc_offsets) { + err = &drgn_enomem; + goto out; + } + struct drgn_orc_entry *entries = malloc_array(num_entries, + sizeof(entries[0])); + if (!entries) { + free(pc_offsets); + err = &drgn_enomem; + goto out; + } + const int32_t *orig_offsets = orc_unwind_ip->d_buf; + const struct drgn_orc_entry *orig_entries = orc_unwind->d_buf; + bool bswap = drgn_platform_bswap(&module->platform); + for (size_t i = 0; i < num_entries; i++) { + size_t index = indices[i]; + int32_t offset; + memcpy(&offset, &orig_offsets[index], sizeof(offset)); + struct drgn_orc_entry entry; + memcpy(&entry, &orig_entries[index], sizeof(entry)); + if (bswap) { + offset = bswap_32(offset); + entry.sp_offset = bswap_16(entry.sp_offset); + entry.bp_offset = bswap_16(entry.bp_offset); + entry.flags = bswap_16(entry.flags); + } + pc_offsets[i] = UINT64_C(4) * index + offset - UINT64_C(4) * i; + entries[i] = entry; + } + + module->orc.pc_offsets = pc_offsets; + module->orc.entries = entries; + module->orc.num_entries = num_entries; + + err = NULL; +out: + free(indices); + return err; +} + +static inline uint64_t drgn_orc_pc(struct drgn_debug_info_module *module, + size_t i) +{ + return module->orc.pc_base + UINT64_C(4) * i + module->orc.pc_offsets[i]; +} + +struct drgn_error * +drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret) +{ + struct drgn_error *err; + + if (!module->parsed_orc) { + err = drgn_debug_info_parse_orc(module); + if (err) + return err; + module->parsed_orc = true; + } + + /* + * We don't know the maximum program counter covered by the ORC data, + * but the last entry seems to always be a terminator, so it doesn't + * matter. All addresses beyond the max will fall into the last entry. + */ + if (!module->orc.num_entries || unbiased_pc < drgn_orc_pc(module, 0)) + return &drgn_not_found; + size_t lo = 0, hi = module->orc.num_entries, found = 0; + while (lo < hi) { + size_t mid = lo + (hi - lo) / 2; + if (drgn_orc_pc(module, mid) <= unbiased_pc) { + found = mid; + lo = mid + 1; + } else { + hi = mid; + } + } + return module->platform.arch->orc_to_cfi(&module->orc.entries[found], + row_ret, interrupted_ret, + ret_addr_regno_ret); +} diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h new file mode 100644 index 000000000..90208505c --- /dev/null +++ b/libdrgn/orc_info.h @@ -0,0 +1,76 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * ORC unwinder support. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_ORC_INFO_H +#define DRGN_ORC_INFO_H + +#include +#include +#include + +#include "cfi.h" + +struct drgn_debug_info_module; + +/** + * @ingroup DebugInfo + * + * @{ + */ + +/** ORC unwinder data for a @ref drgn_debug_info_module. */ +struct drgn_orc_module_info { + /** + * Base for calculating program counter corresponding to an ORC unwinder + * entry. + * + * This is the address of the `.orc_unwind_ip` ELF section. + * + * @sa drgn_orc_module_info::entries + */ + uint64_t pc_base; + /** + * Offsets for calculating program counter corresponding to an ORC + * unwinder entry. + * + * This is the contents of the `.orc_unwind_ip` ELF section, byte + * swapped to the host's byte order if necessary. + * + * @sa drgn_orc_module_info::entries + */ + int32_t *pc_offsets; + /** + * ORC unwinder entries. + * + * This is the contents of the `.orc_unwind` ELF section, byte swapped + * to the host's byte order if necessary. + * + * Entry `i` specifies how to unwind the stack if + * `orc_pc(i) <= PC < orc_pc(i + 1)`, where + * `orc_pc(i) = pc_base + 4 * i + pc_offsets[i]`. + */ + struct drgn_orc_entry *entries; + /** Number of ORC unwinder entries. */ + size_t num_entries; +}; + +void drgn_orc_module_info_deinit(struct drgn_debug_info_module *module); + +struct drgn_error * +drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret); + +/** @} */ + +#endif /* DRGN_ORC_INFO_H */ From 5591d199b189ddc9838db9c751dea40e677dfc4a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 8 Nov 2021 17:05:33 -0800 Subject: [PATCH 085/139] libdrgn: debug_info: split DWARF support into its own file Continuing the refactoring from the previous commit, move the DWARF code from debug_info.c to its own file, leaving only the generic ELF file management in debug_info.c Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 + libdrgn/debug_info.c | 4869 +---------------------------------------- libdrgn/debug_info.h | 156 +- libdrgn/dwarf_info.c | 4869 +++++++++++++++++++++++++++++++++++++++++ libdrgn/dwarf_info.h | 194 ++ libdrgn/language.c | 29 - libdrgn/language.h | 14 - libdrgn/orc_info.c | 6 +- libdrgn/program.c | 46 +- libdrgn/stack_trace.c | 1 + 10 files changed, 5159 insertions(+), 5027 deletions(-) create mode 100644 libdrgn/dwarf_info.c create mode 100644 libdrgn/dwarf_info.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 17afad084..c761a2843 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -37,6 +37,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ debug_info.h \ dwarf_index.c \ dwarf_index.h \ + dwarf_info.c \ + dwarf_info.h \ error.c \ error.h \ hash_table.c \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index e3eb9a7e9..4d01bc07d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -3,98 +3,26 @@ #include #include -#include #include -#include #include #include #include #include #include #include -#include -#include #include #include #include #include #include -#include "array.h" #include "debug_info.h" +#include "dwarf_index.h" #include "error.h" -#include "language.h" -#include "lazy_object.h" #include "linux_kernel.h" -#include "minmax.h" -#include "object.h" -#include "path.h" #include "program.h" -#include "register_state.h" -#include "serialize.h" -#include "type.h" #include "util.h" -/** - * Arbitrary limit for number of operations to execute in a DWARF expression to - * avoid infinite loops. - */ -static const int MAX_DWARF_EXPR_OPS = 10000; - -struct drgn_dwarf_cie { - /* Whether this CIE is from .eh_frame. */ - bool is_eh; - /* Size of an address in this CIE in bytes. */ - uint8_t address_size; - /* DW_EH_PE_* encoding of addresses in this CIE. */ - uint8_t address_encoding; - /* Whether this CIE has a 'z' augmentation. */ - bool have_augmentation_length; - /* Whether this CIE is for a signal handler ('S' augmentation). */ - bool signal_frame; - drgn_register_number return_address_register; - uint64_t code_alignment_factor; - int64_t data_alignment_factor; - const char *initial_instructions; - size_t initial_instructions_size; -}; - -DEFINE_VECTOR(drgn_dwarf_fde_vector, struct drgn_dwarf_fde) -DEFINE_VECTOR(drgn_dwarf_cie_vector, struct drgn_dwarf_cie) -DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, - scalar_key_eq) -DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) -DEFINE_VECTOR(uint64_vector, uint64_t) - -DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) - -#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" -#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) - -/** - * Get the name of a DWARF tag. - * - * @return Static string if the tag is known or @p buf if the tag is unknown - * (populated with a description). - */ -static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) -{ - switch (tag) { -#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; - DWARF_ALL_KNOWN_DW_TAG -#undef DWARF_ONE_KNOWN_DW_TAG - default: - sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); - return buf; - } -} - -/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ -static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) -{ - return dw_tag_str(dwarf_tag(die), buf); -} - static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_INFO] = ".debug_info", [DRGN_SCN_DEBUG_TYPES] = ".debug_types", @@ -137,406 +65,6 @@ struct drgn_error *drgn_debug_info_buffer_error(struct binary_buffer *bb, message); } - -/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ -struct drgn_dwarf_die_iterator { - /** Stack of current DIE and its ancestors. */ - struct dwarf_die_vector dies; - Dwarf *dwarf; - /** End of current CU (for bounds checking). */ - const char *cu_end; - /** Offset of next CU. */ - Dwarf_Off next_cu_off; - /** Whether current CU is from .debug_types. */ - bool debug_types; -}; - -static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, - Dwarf *dwarf) -{ - dwarf_die_vector_init(&it->dies); - it->dwarf = dwarf; - it->next_cu_off = 0; - it->debug_types = false; -} - -static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) -{ - dwarf_die_vector_deinit(&it->dies); -} - -/** - * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. - * - * The first call returns the top-level DIE for the first unit in the module. - * Subsequent calls return children, siblings, and unit DIEs. - * - * This includes the .debug_types section. - * - * @param[in,out] it Iterator containing the returned DIE and its ancestors. The - * last entry in `it->dies` is the DIE itself, the entry before that is its - * parent, the entry before that is its grandparent, etc. - * @param[in] children If @c true and the last returned DIE has children, return - * its first child (this is a pre-order traversal). Otherwise, return the next - * DIE at the level less than or equal to the last returned DIE, i.e., the last - * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit - * DIE. - * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, - * stop after returning all DIEs in the subtree rooted at the DIE that was - * returned in the last call as entry `subtree - 1` in `it->dies`. - * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which - * case the size of `it->dies` equals @p subtree and `it->dies` refers to the - * root of the iterated subtree, non-@c NULL on error, in which case this should - * not be called again. - */ -static struct drgn_error * -drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, - size_t subtree) -{ -#define TOP() (&it->dies.data[it->dies.size - 1]) - int r; - Dwarf_Die die; - assert(subtree <= it->dies.size); - if (it->dies.size == 0) { - /* This is the first call. Get the first unit DIE. */ - if (!dwarf_die_vector_append_entry(&it->dies)) - return &drgn_enomem; - } else { - if (children) { - r = dwarf_child(TOP(), &die); - if (r == 0) { - /* The previous DIE has a child. Return it. */ - if (!dwarf_die_vector_append(&it->dies, &die)) - return &drgn_enomem; - return NULL; - } else if (r < 0) { - return drgn_error_libdw(); - } - /* The previous DIE has no children. */ - } - - if (it->dies.size == subtree) { - /* - * The previous DIE is the root of the subtree. We're - * done. - */ - return &drgn_stop; - } - - if (it->dies.size > 1) { - r = dwarf_siblingof(TOP(), &die); - if (r == 0) { - /* The previous DIE has a sibling. Return it. */ - *TOP() = die; - return NULL; - } else if (r > 0) { - if (!die.addr) - goto next_unit; - /* - * The previous DIE is the last child of its - * parent. - */ - char *addr = die.addr; - do { - /* - * addr points to the null terminator - * for the list of siblings. Go back up - * to its parent. The next byte is - * either the parent's sibling or - * another null terminator. - */ - it->dies.size--; - addr++; - if (it->dies.size == subtree) { - /* - * We're back to the root of the - * subtree. We're done. - */ - return &drgn_stop; - } - if (it->dies.size == 1 || - addr >= it->cu_end) - goto next_unit; - } while (*addr == '\0'); - /* - * addr now points to the next DIE. Return it. - */ - *TOP() = (Dwarf_Die){ - .cu = it->dies.data[0].cu, - .addr = addr, - }; - return NULL; - } else { - return drgn_error_libdw(); - } - } - } - -next_unit:; - /* There are no more DIEs in the current unit. */ - Dwarf_Off cu_off = it->next_cu_off; - size_t cu_header_size; - uint64_t type_signature; - r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, - &cu_header_size, NULL, NULL, NULL, NULL, - it->debug_types ? &type_signature : NULL, NULL); - if (r == 0) { - /* Got the next unit. Return the unit DIE. */ - if (it->debug_types) { - r = !dwarf_offdie_types(it->dwarf, - cu_off + cu_header_size, TOP()); - } else { - r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, - TOP()); - } - if (r) - return drgn_error_libdw(); - it->cu_end = ((const char *)TOP()->addr - - dwarf_dieoffset(TOP()) - + it->next_cu_off); - return NULL; - } else if (r > 0) { - if (!it->debug_types) { - it->next_cu_off = 0; - it->debug_types = true; - goto next_unit; - } - /* There are no more units. */ - return &drgn_stop; - } else { - return drgn_error_libdw(); - } -#undef TOP -} - -struct drgn_error * -drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, - uint64_t pc, uint64_t *bias_ret, - Dwarf_Die **dies_ret, - size_t *length_ret) -{ - struct drgn_error *err; - - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdw(); - *bias_ret = bias; - pc -= bias; - - /* First, try to get the CU containing the PC. */ - Dwarf_Aranges *aranges; - size_t naranges; - if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) - return drgn_error_libdw(); - - struct drgn_dwarf_die_iterator it; - bool children; - size_t subtree; - Dwarf_Off offset; - if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, NULL, - &offset) >= 0) { - drgn_dwarf_die_iterator_init(&it, dwarf); - Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); - if (!cu_die) { - err = &drgn_enomem; - goto err; - } - if (!dwarf_offdie(dwarf, offset, cu_die)) { - err = drgn_error_libdw(); - goto err; - } - if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), - &it.next_cu_off, NULL, NULL, NULL, NULL, - NULL, NULL, NULL)) { - err = drgn_error_libdw(); - goto err; - } - it.cu_end = ((const char *)cu_die->addr - - dwarf_dieoffset(cu_die) - + it.next_cu_off); - children = true; - subtree = 1; - } else { - /* - * Range was not found. .debug_aranges could be missing or - * incomplete, so fall back to checking each CU. - */ - drgn_dwarf_die_iterator_init(&it, dwarf); - children = false; - subtree = 0; - } - - /* Now find DIEs containing the PC. */ - while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree))) { - int r = dwarf_haspc(&it.dies.data[it.dies.size - 1], pc); - if (r > 0) { - children = true; - subtree = it.dies.size; - } else if (r < 0) { - err = drgn_error_libdw(); - goto err; - } - } - if (err != &drgn_stop) - goto err; - - *dies_ret = it.dies.data; - *length_ret = it.dies.size; - return NULL; - -err: - drgn_dwarf_die_iterator_deinit(&it); - return err; -} - -struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, - size_t *length_ret) -{ - struct drgn_error *err; - - Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); - if (!dwarf) - return drgn_error_libdw(); - - struct dwarf_die_vector dies = VECTOR_INIT; - Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); - if (!cu_die) { - err = &drgn_enomem; - goto err; - } - - Dwarf_Half cu_version; - Dwarf_Off type_offset; - if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, - &type_offset)) { - err = drgn_error_libdw(); - goto err; - } - Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); - bool debug_types = cu_version == 4 && type_offset != 0; - Dwarf_Off next_cu_offset; - uint64_t type_signature; - if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), - &next_cu_offset, NULL, NULL, NULL, NULL, NULL, - debug_types ? &type_signature : NULL, NULL)) { - err = drgn_error_libdw(); - goto err; - } - const unsigned char *cu_end = - (unsigned char *)cu_die->addr - cu_die_offset + next_cu_offset; - -#define TOP() (&dies.data[dies.size - 1]) - while ((char *)TOP()->addr <= (char *)die->addr) { - if (TOP()->addr == die->addr) { - *dies_ret = dies.data; - *length_ret = dies.size - 1; - return NULL; - } - - Dwarf_Attribute attr; - if (dwarf_attr(TOP(), DW_AT_sibling, &attr)) { - /* The top DIE has a DW_AT_sibling attribute. */ - Dwarf_Die sibling; - if (!dwarf_formref_die(&attr, &sibling)) { - err = drgn_error_libdw(); - goto err; - } - if (sibling.cu != TOP()->cu || - (char *)sibling.addr <= (char *)TOP()->addr) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_sibling"); - goto err; - } - - if ((char *)sibling.addr > (char *)die->addr) { - /* - * The top DIE's sibling is after the target - * DIE. Therefore, the target DIE must be a - * descendant of the top DIE. - */ - Dwarf_Die *child = - dwarf_die_vector_append_entry(&dies); - if (!child) { - err = &drgn_enomem; - goto err; - } - int r = dwarf_child(TOP() - 1, child); - if (r < 0) { - err = drgn_error_libdw(); - goto err; - } else if (r > 0) { - /* - * The top DIE didn't have any children, - * which should be impossible. - */ - goto not_found; - } - } else { - /* - * The top DIE's sibling is before or equal to - * the target DIE. Therefore, the target DIE - * isn't a descendant of the top DIE. Skip to - * the sibling. - */ - *TOP() = sibling; - } - } else { - /* - * The top DIE does not have a DW_AT_sibling attribute. - * Instead, we found the end of the top DIE. - */ - unsigned char *addr = attr.valp; - if (!addr || addr >= cu_end) - goto not_found; - - /* - * If the top DIE has children, then addr is its first - * child. Otherwise, then addr is its sibling. (Unless - * it is a null terminator.) - */ - size_t new_size = dies.size; - if (dwarf_haschildren(TOP()) > 0) - new_size++; - - while (*addr == '\0') { - /* - * addr points to the null terminator for the - * list of siblings. Go back up to its parent. - * The next byte is either the parent's sibling - * or another null terminator. - */ - new_size--; - addr++; - if (new_size <= 1 || addr >= cu_end) - goto not_found; - } - - /* addr now points to the next DIE. Go to it. */ - if (new_size > dies.size) { - if (!dwarf_die_vector_append_entry(&dies)) { - err = &drgn_enomem; - goto err; - } - } else { - dies.size = new_size; - } - *TOP() = (Dwarf_Die){ - .cu = dies.data[0].cu, - .addr = addr, - }; - } - } -#undef TOP - -not_found: - err = drgn_error_create(DRGN_ERROR_OTHER, - "could not find DWARF DIE ancestors"); -err: - dwarf_die_vector_deinit(&dies); - return err; -} - DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) struct drgn_debug_info_module_key { @@ -692,8 +220,7 @@ drgn_debug_info_module_destroy(struct drgn_debug_info_module *module) if (module) { drgn_error_destroy(module->err); drgn_orc_module_info_deinit(module); - free(module->fdes); - free(module->cies); + drgn_dwarf_module_info_deinit(module); elf_end(module->elf); if (module->fd != -1) close(module->fd); @@ -1484,7 +1011,8 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) return &drgn_enomem; struct drgn_dwarf_index_update_state dindex_state; - if (!drgn_dwarf_index_update_state_init(&dindex_state, &dbinfo->dindex)) + if (!drgn_dwarf_index_update_state_init(&dindex_state, + &dbinfo->dwarf.index)) return &drgn_enomem; struct drgn_error *err = NULL; #pragma omp parallel for schedule(dynamic) @@ -1606,4351 +1134,86 @@ bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } -static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + struct drgn_debug_info **ret) { - if (address_size < 1 || address_size > 8) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unsupported address size %" PRIu8, - address_size); + struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); + if (!dbinfo) + return &drgn_enomem; + dbinfo->prog = prog; + const Dwfl_Callbacks *dwfl_callbacks; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + dwfl_callbacks = &drgn_dwfl_callbacks; + else if (prog->flags & DRGN_PROGRAM_IS_LIVE) + dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; + else + dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; + dbinfo->dwfl = dwfl_begin(dwfl_callbacks); + if (!dbinfo->dwfl) { + free(dbinfo); + return drgn_error_libdwfl(); } + drgn_debug_info_module_table_init(&dbinfo->modules); + c_string_set_init(&dbinfo->module_names); + drgn_dwarf_info_init(dbinfo); + *ret = dbinfo; return NULL; } -static struct drgn_error * -drgn_dwarf_next_addrx(struct binary_buffer *bb, - struct drgn_debug_info_module *module, Dwarf_Die *cu_die, - uint8_t address_size, const char **addr_base, - uint64_t *ret) +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) { - struct drgn_error *err; - - if (!*addr_base) { - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr(cu_die, DW_AT_addr_base, &attr_mem))) { - return drgn_error_create(DRGN_ERROR_OTHER, - "indirect address without DW_AT_addr_base"); - } - Dwarf_Word base; - if (dwarf_formudata(attr, &base)) - return drgn_error_libdw(); - - if (!module->scns[DRGN_SCN_DEBUG_ADDR]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "indirect address without .debug_addr section"); - } - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_DEBUG_ADDR); - if (err) - return err; - - if (base > module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size || - base == 0) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_addr_base is out of bounds"); - } - - *addr_base = (char *)module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; - uint8_t segment_selector_size = ((uint8_t *)*addr_base)[-1]; - if (segment_selector_size != 0) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unsupported segment selector size %" PRIu8, - segment_selector_size); - } - } - - uint64_t index; - if ((err = binary_buffer_next_uleb128(bb, &index))) - return err; - - Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_ADDR]; - if (index >= - ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { - return binary_buffer_error(bb, - "address index is out of bounds"); - } - copy_lsbytes(ret, sizeof(*ret), HOST_LITTLE_ENDIAN, - *addr_base + index * address_size, address_size, - drgn_platform_is_little_endian(&module->platform)); - return NULL; + if (!dbinfo) + return; + drgn_dwarf_info_deinit(dbinfo); + c_string_set_deinit(&dbinfo->module_names); + drgn_debug_info_free_modules(dbinfo, false, true); + assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); + drgn_debug_info_module_table_deinit(&dbinfo->modules); + dwfl_end(dbinfo->dwfl); + free(dbinfo); } -static struct drgn_error * -drgn_dwarf_read_loclistx(struct drgn_debug_info_module *module, - Dwarf_Die *cu_die, uint8_t offset_size, - Dwarf_Word index, Dwarf_Word *ret) +struct drgn_error * +drgn_debug_info_module_find_cfi(struct drgn_program *prog, + struct drgn_debug_info_module *module, + uint64_t pc, struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret) { struct drgn_error *err; - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr(cu_die, DW_AT_loclists_base, &attr_mem))) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_FORM_loclistx without DW_AT_loclists_base"); - } - Dwarf_Word base; - if (dwarf_formudata(attr, &base)) - return drgn_error_libdw(); - - if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_FORM_loclistx without .debug_loclists section"); - } - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_DEBUG_LOCLISTS); - if (err) - return err; - Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, + NULL, NULL); + uint64_t unbiased_pc = pc - bias; - if (base > data->d_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_loclists_base is out of bounds"); - } - assert(offset_size == 4 || offset_size == 8); - if (index >= (data->d_size - base) / offset_size) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_FORM_loclistx is out of bounds"); - } - const char *basep = (char *)data->d_buf + base; - if (offset_size == 8) { - uint64_t offset; - memcpy(&offset, (uint64_t *)basep + index, sizeof(offset)); - if (drgn_platform_bswap(&module->platform)) - offset = bswap_64(offset); - *ret = base + offset; + if (prog->prefer_orc_unwinder) { + err = drgn_debug_info_find_orc_cfi(module, unbiased_pc, row_ret, + interrupted_ret, + ret_addr_regno_ret); + if (err != &drgn_not_found) + return err; + return drgn_debug_info_find_dwarf_cfi(module, unbiased_pc, + row_ret, interrupted_ret, + ret_addr_regno_ret); } else { - uint32_t offset; - memcpy(&offset, (uint32_t *)basep + index, sizeof(offset)); - if (drgn_platform_bswap(&module->platform)) - offset = bswap_32(offset); - *ret = base + offset; + err = drgn_debug_info_find_dwarf_cfi(module, unbiased_pc, + row_ret, interrupted_ret, + ret_addr_regno_ret); + if (err != &drgn_not_found) + return err; + return drgn_debug_info_find_orc_cfi(module, unbiased_pc, + row_ret, interrupted_ret, + ret_addr_regno_ret); } - return NULL; } -static struct drgn_error * -drgn_dwarf5_location_list(struct drgn_debug_info_module *module, - Dwarf_Word offset, Dwarf_Die *cu_die, - uint8_t address_size, uint64_t pc, - const char **expr_ret, size_t *expr_size_ret) +#if !_ELFUTILS_PREREQ(0, 175) +static Elf *dwelf_elf_begin(int fd) { - struct drgn_error *err; - - if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclist without .debug_loclists section"); - } - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_DEBUG_LOCLISTS); - if (err) - return err; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOCLISTS); - if (offset > buffer.bb.end - buffer.bb.pos) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclist is out of bounds"); - } - buffer.bb.pos += offset; - - const char *addr_base = NULL; - uint64_t base; - bool base_valid = false; - /* Default is unknown. May be overridden by DW_LLE_default_location. */ - *expr_ret = NULL; - *expr_size_ret = 0; - for (;;) { - uint8_t kind; - if ((err = binary_buffer_next_u8(&buffer.bb, &kind))) - return err; - uint64_t start, length, expr_size; - switch (kind) { - case DW_LLE_end_of_list: - return NULL; - case DW_LLE_base_addressx: - if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, - cu_die, address_size, - &addr_base, &base))) - return err; - base_valid = true; - break; - case DW_LLE_startx_endx: - if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, - cu_die, address_size, - &addr_base, &start)) || - (err = drgn_dwarf_next_addrx(&buffer.bb, module, - cu_die, address_size, - &addr_base, &length))) - return err; - length -= start; -counted_location_description: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &expr_size))) - return err; - if (expr_size > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "location description size is out of bounds"); - } - if (pc >= start && pc - start < length) { - *expr_ret = buffer.bb.pos; - *expr_size_ret = expr_size; - return NULL; - } - buffer.bb.pos += expr_size; - break; - case DW_LLE_startx_length: - if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, - cu_die, address_size, - &addr_base, &start)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &length))) - return err; - goto counted_location_description; - case DW_LLE_offset_pair: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &start)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &length))) - return err; - length -= start; - if (!base_valid) { - Dwarf_Addr low_pc; - if (dwarf_lowpc(cu_die, &low_pc)) - return drgn_error_libdw(); - base = low_pc; - base_valid = true; - } - start += base; - goto counted_location_description; - case DW_LLE_default_location: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &expr_size))) - return err; - if (expr_size > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "location description size is out of bounds"); - } - *expr_ret = buffer.bb.pos; - *expr_size_ret = expr_size; - buffer.bb.pos += expr_size; - break; - case DW_LLE_base_address: - if ((err = binary_buffer_next_uint(&buffer.bb, - address_size, - &base))) - return err; - base_valid = true; - break; - case DW_LLE_start_end: - if ((err = binary_buffer_next_uint(&buffer.bb, - address_size, - &start)) || - (err = binary_buffer_next_uint(&buffer.bb, - address_size, - &length))) - return err; - length -= start; - goto counted_location_description; - case DW_LLE_start_length: - if ((err = binary_buffer_next_uint(&buffer.bb, - address_size, - &start)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &length))) - return err; - goto counted_location_description; - default: - return binary_buffer_error(&buffer.bb, - "unknown location list entry kind %#" PRIx8, - kind); - } - } -} - -static struct drgn_error * -drgn_dwarf4_location_list(struct drgn_debug_info_module *module, - Dwarf_Word offset, Dwarf_Die *cu_die, - uint8_t address_size, uint64_t pc, - const char **expr_ret, size_t *expr_size_ret) -{ - struct drgn_error *err; - - if (!module->scns[DRGN_SCN_DEBUG_LOC]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr without .debug_loc section"); - } - err = drgn_debug_info_module_cache_section(module, DRGN_SCN_DEBUG_LOC); - if (err) - return err; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOC); - if (offset > buffer.bb.end - buffer.bb.pos) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr is out of bounds"); - } - buffer.bb.pos += offset; - - uint64_t address_max = uint_max(address_size); - uint64_t base; - bool base_valid = false; - for (;;) { - uint64_t start, end; - if ((err = binary_buffer_next_uint(&buffer.bb, address_size, - &start)) || - (err = binary_buffer_next_uint(&buffer.bb, address_size, - &end))) - return err; - if (start == 0 && end == 0) { - *expr_ret = NULL; - *expr_size_ret = 0; - return NULL; - } else if (start == address_max) { - base = end; - base_valid = true; - } else { - if (!base_valid) { - Dwarf_Addr low_pc; - if (dwarf_lowpc(cu_die, &low_pc)) - return drgn_error_libdw(); - base = low_pc; - base_valid = true; - } - uint16_t expr_size; - if ((err = binary_buffer_next_u16(&buffer.bb, - &expr_size))) - return err; - if (expr_size > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "location description size is out of bounds"); - } - if (base + start <= pc && pc < base + end) { - *expr_ret = buffer.bb.pos; - *expr_size_ret = expr_size; - return NULL; - } - buffer.bb.pos += expr_size; - } - } -} - -static struct drgn_error * -drgn_dwarf_location(struct drgn_debug_info_module *module, - Dwarf_Attribute *attr, - const struct drgn_register_state *regs, - const char **expr_ret, size_t *expr_size_ret) -{ - struct drgn_error *err; - switch (attr->form) { - /* DWARF 3 */ - case DW_FORM_data4: - case DW_FORM_data8: - /* DWARF 4-5 */ - case DW_FORM_sec_offset: - /* DWARF 5 */ - case DW_FORM_loclistx: { - Dwarf_Die cu_die; - Dwarf_Half cu_version; - uint8_t address_size; - uint8_t offset_size; - if (!dwarf_cu_die(attr->cu, &cu_die, &cu_version, NULL, - &address_size, &offset_size, NULL, NULL)) - return drgn_error_libdw(); - if ((err = drgn_check_address_size(address_size))) - return err; - - Dwarf_Word offset; - if (dwarf_formudata(attr, &offset)) - return drgn_error_libdw(); - if (attr->form == DW_FORM_loclistx && - ((err = drgn_dwarf_read_loclistx(module, &cu_die, - offset_size, offset, - &offset)))) - return err; - - struct optional_uint64 pc; - if (!regs || - !(pc = drgn_register_state_get_pc(regs)).has_value) { - *expr_ret = NULL; - *expr_size_ret = 0; - return NULL; - } - Dwarf_Addr bias; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, - NULL, NULL, NULL); - pc.value = pc.value - !regs->interrupted - bias; - - if (cu_version >= 5) { - return drgn_dwarf5_location_list(module, offset, - &cu_die, address_size, - pc.value, expr_ret, - expr_size_ret); - } else { - return drgn_dwarf4_location_list(module, offset, - &cu_die, address_size, - pc.value, expr_ret, - expr_size_ret); - } - } - default: { - Dwarf_Block block; - if (dwarf_formblock(attr, &block)) - return drgn_error_libdw(); - *expr_ret = (char *)block.data; - *expr_size_ret = block.length; - return NULL; - } - } -} - -static struct drgn_error * -drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, - const char *message) -{ - uintptr_t p = (uintptr_t)ptr; - int end_match = -1; - for (int i = 0; i < array_size(module->scn_data); i++) { - if (!module->scn_data[i]) - continue; - uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; - uintptr_t end = start + module->scn_data[i]->d_size; - if (start <= p) { - if (p < end) { - return drgn_error_debug_info_scn(module, i, ptr, - message); - } else if (p == end) { - end_match = i; - } - } - } - if (end_match != -1) { - /* - * The pointer doesn't lie within a section, but it does point - * to the end of a section. - */ - return drgn_error_debug_info_scn(module, end_match, ptr, - message); - } - /* We couldn't find the section containing the pointer. */ - const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, - NULL, NULL, NULL, NULL, NULL); - return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); -} - -/* A DWARF expression and the context it is being evaluated in. */ -struct drgn_dwarf_expression_context { - struct binary_buffer bb; - const char *start; - struct drgn_program *prog; - struct drgn_debug_info_module *module; - uint8_t address_size; - Dwarf_Die cu_die; - const char *cu_addr_base; - Dwarf_Die *function; - const struct drgn_register_state *regs; -}; - -static struct drgn_error * -drgn_dwarf_expression_buffer_error(struct binary_buffer *bb, const char *pos, - const char *message) -{ - struct drgn_dwarf_expression_context *ctx = - container_of(bb, struct drgn_dwarf_expression_context, bb); - return drgn_error_debug_info(ctx->module, pos, message); -} - -static inline struct drgn_error * -drgn_dwarf_expression_context_init(struct drgn_dwarf_expression_context *ctx, - struct drgn_program *prog, - struct drgn_debug_info_module *module, - Dwarf_CU *cu, Dwarf_Die *function, - const struct drgn_register_state *regs, - const char *expr, size_t expr_size) -{ - struct drgn_error *err; - binary_buffer_init(&ctx->bb, expr, expr_size, - drgn_platform_is_little_endian(&module->platform), - drgn_dwarf_expression_buffer_error); - ctx->start = expr; - ctx->prog = prog; - ctx->module = module; - if (cu) { - if (!dwarf_cu_die(cu, &ctx->cu_die, NULL, NULL, - &ctx->address_size, NULL, NULL, NULL)) - return drgn_error_libdw(); - if ((err = drgn_check_address_size(ctx->address_size))) - return err; - } else { - ctx->cu_die.addr = NULL; - ctx->address_size = - drgn_platform_address_size(&module->platform); - } - ctx->cu_addr_base = NULL; - ctx->function = function; - ctx->regs = regs; - return NULL; -} - -static struct drgn_error * -drgn_dwarf_frame_base(struct drgn_program *prog, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_register_state *regs, - int *remaining_ops, uint64_t *ret); - -/* - * Evaluate a DWARF expression up to the next location description operation or - * operation that can't be evaluated in the given context. - * - * Returns &drgn_not_found if it tried to use an unknown register value. - */ -static struct drgn_error * -drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, - struct uint64_vector *stack, - int *remaining_ops) -{ - struct drgn_error *err; - const struct drgn_platform *platform = &ctx->module->platform; - bool little_endian = drgn_platform_is_little_endian(platform); - uint8_t address_size = ctx->address_size; - uint8_t address_bits = address_size * CHAR_BIT; - uint64_t address_mask = uint_max(address_size); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - platform->arch->dwarf_regno_to_internal; - -#define CHECK(n) do { \ - size_t _n = (n); \ - if (stack->size < _n) { \ - return binary_buffer_error(&ctx->bb, \ - "DWARF expression stack underflow"); \ - } \ -} while (0) - -#define ELEM(i) stack->data[stack->size - 1 - (i)] - -#define PUSH(x) do { \ - uint64_t push = (x); \ - if (!uint64_vector_append(stack, &push)) \ - return &drgn_enomem; \ -} while (0) - -#define PUSH_MASK(x) PUSH((x) & address_mask) - - while (binary_buffer_has_next(&ctx->bb)) { - if (*remaining_ops <= 0) { - return binary_buffer_error(&ctx->bb, - "DWARF expression executed too many operations"); - } - (*remaining_ops)--; - uint8_t opcode; - if ((err = binary_buffer_next_u8(&ctx->bb, &opcode))) - return err; - uint64_t uvalue; - uint64_t dwarf_regno; - uint8_t deref_size; - switch (opcode) { - /* Literal encodings. */ - case DW_OP_lit0 ... DW_OP_lit31: - PUSH(opcode - DW_OP_lit0); - break; - case DW_OP_addr: - if ((err = binary_buffer_next_uint(&ctx->bb, - address_size, - &uvalue))) - return err; - PUSH(uvalue); - break; - case DW_OP_const1u: - if ((err = binary_buffer_next_u8_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH(uvalue); - break; - case DW_OP_const2u: - if ((err = binary_buffer_next_u16_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const4u: - if ((err = binary_buffer_next_u32_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const8u: - if ((err = binary_buffer_next_u64(&ctx->bb, &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const1s: - if ((err = binary_buffer_next_s8_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const2s: - if ((err = binary_buffer_next_s16_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const4s: - if ((err = binary_buffer_next_s32_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const8s: - if ((err = binary_buffer_next_s64_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_constu: - if ((err = binary_buffer_next_uleb128(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_consts: - if ((err = binary_buffer_next_sleb128_into_u64(&ctx->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_addrx: - case DW_OP_constx: - if (!ctx->cu_die.addr) { - ctx->bb.pos = ctx->bb.prev; - return NULL; - } - if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->module, - &ctx->cu_die, - address_size, - &ctx->cu_addr_base, - &uvalue))) - return err; - PUSH(uvalue); - break; - /* Register values. */ - case DW_OP_fbreg: { - err = drgn_dwarf_frame_base(ctx->prog, ctx->module, - ctx->function, ctx->regs, - remaining_ops, &uvalue); - if (err) - return err; - int64_t svalue; - if ((err = binary_buffer_next_sleb128(&ctx->bb, - &svalue))) - return err; - PUSH_MASK(uvalue + svalue); - break; - } - case DW_OP_breg0 ... DW_OP_breg31: - dwarf_regno = opcode - DW_OP_breg0; - goto breg; - case DW_OP_bregx: - if ((err = binary_buffer_next_uleb128(&ctx->bb, - &dwarf_regno))) - return err; -breg: - { - if (!ctx->regs) - return &drgn_not_found; - drgn_register_number regno = - dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(ctx->regs, regno)) - return &drgn_not_found; - const struct drgn_register_layout *layout = - &platform->arch->register_layout[regno]; - copy_lsbytes(&uvalue, sizeof(uvalue), - HOST_LITTLE_ENDIAN, - &ctx->regs->buf[layout->offset], - layout->size, little_endian); - int64_t svalue; - if ((err = binary_buffer_next_sleb128(&ctx->bb, - &svalue))) - return err; - PUSH_MASK(uvalue + svalue); - break; - } - /* Stack operations. */ - case DW_OP_dup: - CHECK(1); - PUSH(ELEM(0)); - break; - case DW_OP_drop: - CHECK(1); - stack->size--; - break; - case DW_OP_pick: { - uint8_t index; - if ((err = binary_buffer_next_u8(&ctx->bb, &index))) - return err; - CHECK(index + 1); - PUSH(ELEM(index)); - break; - } - case DW_OP_over: - CHECK(2); - PUSH(ELEM(1)); - break; - case DW_OP_swap: - CHECK(2); - uvalue = ELEM(0); - ELEM(0) = ELEM(1); - ELEM(1) = uvalue; - break; - case DW_OP_rot: - CHECK(3); - uvalue = ELEM(0); - ELEM(0) = ELEM(1); - ELEM(1) = ELEM(2); - ELEM(2) = uvalue; - break; - case DW_OP_deref: - deref_size = address_size; - goto deref; - case DW_OP_deref_size: - if ((err = binary_buffer_next_u8(&ctx->bb, - &deref_size))) - return err; - if (deref_size > address_size) { - return binary_buffer_error(&ctx->bb, - "DW_OP_deref_size has invalid size"); - } -deref: - { - CHECK(1); - char deref_buf[8]; - err = drgn_program_read_memory(ctx->prog, deref_buf, - ELEM(0), deref_size, - false); - if (err) - return err; - copy_lsbytes(&ELEM(0), sizeof(ELEM(0)), - HOST_LITTLE_ENDIAN, deref_buf, deref_size, - little_endian); - break; - } - case DW_OP_call_frame_cfa: { - if (!ctx->regs) - return &drgn_not_found; - /* - * The DWARF 5 specification says that - * DW_OP_call_frame_cfa cannot be used for CFI. For - * DW_CFA_def_cfa_expression, it is clearly invalid to - * define the CFA in terms of the CFA, and it will fail - * naturally below. This restriction doesn't make sense - * for DW_CFA_expression and DW_CFA_val_expression, as - * they push the CFA and thus depend on it anyways, so - * we don't bother enforcing it. - */ - struct optional_uint64 cfa = - drgn_register_state_get_cfa(ctx->regs); - if (!cfa.has_value) - return &drgn_not_found; - PUSH(cfa.value); - break; - } - /* Arithmetic and logical operations. */ -#define UNOP_MASK(op) do { \ - CHECK(1); \ - ELEM(0) = (op ELEM(0)) & address_mask; \ -} while (0) -#define BINOP(op) do { \ - CHECK(2); \ - ELEM(1) = ELEM(1) op ELEM(0); \ - stack->size--; \ -} while (0) -#define BINOP_MASK(op) do { \ - CHECK(2); \ - ELEM(1) = (ELEM(1) op ELEM(0)) & address_mask; \ - stack->size--; \ -} while (0) - case DW_OP_abs: - CHECK(1); - if (ELEM(0) & (UINT64_C(1) << (address_bits - 1))) - ELEM(0) = -ELEM(0) & address_mask; - break; - case DW_OP_and: - BINOP(&); - break; - case DW_OP_div: - CHECK(2); - if (ELEM(0) == 0) { - return binary_buffer_error(&ctx->bb, - "division by zero in DWARF expression"); - } - ELEM(1) = ((truncate_signed(ELEM(1), address_bits) - / truncate_signed(ELEM(0), address_bits)) - & address_mask); - stack->size--; - break; - case DW_OP_minus: - BINOP_MASK(-); - break; - case DW_OP_mod: - CHECK(2); - if (ELEM(0) == 0) { - return binary_buffer_error(&ctx->bb, - "modulo by zero in DWARF expression"); - } - ELEM(1) = ELEM(1) % ELEM(0); - stack->size--; - break; - case DW_OP_mul: - BINOP_MASK(*); - break; - case DW_OP_neg: - UNOP_MASK(-); - break; - case DW_OP_not: - UNOP_MASK(~); - break; - case DW_OP_or: - BINOP(|); - break; - case DW_OP_plus: - BINOP_MASK(+); - break; - case DW_OP_plus_uconst: - CHECK(1); - if ((err = binary_buffer_next_uleb128(&ctx->bb, - &uvalue))) - return err; - ELEM(0) = (ELEM(0) + uvalue) & address_mask; - break; - case DW_OP_shl: - CHECK(2); - if (ELEM(0) < address_bits) - ELEM(1) = (ELEM(1) << ELEM(0)) & address_mask; - else - ELEM(1) = 0; - stack->size--; - break; - case DW_OP_shr: - CHECK(2); - if (ELEM(0) < address_bits) - ELEM(1) >>= ELEM(0); - else - ELEM(1) = 0; - stack->size--; - break; - case DW_OP_shra: - CHECK(2); - if (ELEM(0) < address_bits) { - ELEM(1) = ((truncate_signed(ELEM(1), address_bits) - >> ELEM(0)) - & address_mask); - } else if (ELEM(1) & (UINT64_C(1) << (address_bits - 1))) { - ELEM(1) = -INT64_C(1) & address_mask; - } else { - ELEM(1) = 0; - } - stack->size--; - break; - case DW_OP_xor: - BINOP(^); - break; -#undef BINOP_MASK -#undef BINOP -#undef UNOP_MASK - /* Control flow operations. */ -#define RELOP(op) do { \ - CHECK(2); \ - ELEM(1) = (truncate_signed(ELEM(1), address_bits) op \ - truncate_signed(ELEM(0), address_bits)); \ - stack->size--; \ -} while (0) - case DW_OP_le: - RELOP(<=); - break; - case DW_OP_ge: - RELOP(>=); - break; - case DW_OP_eq: - RELOP(==); - break; - case DW_OP_lt: - RELOP(<); - break; - case DW_OP_gt: - RELOP(>); - break; - case DW_OP_ne: - RELOP(!=); - break; -#undef RELOP - case DW_OP_skip: -branch: - { - int16_t skip; - if ((err = binary_buffer_next_s16(&ctx->bb, &skip))) - return err; - if ((skip >= 0 && skip > ctx->bb.end - ctx->bb.pos) || - (skip < 0 && -skip > ctx->bb.pos - ctx->start)) { - return binary_buffer_error(&ctx->bb, - "DWARF expression branch is out of bounds"); - } - ctx->bb.pos += skip; - break; - } - case DW_OP_bra: - CHECK(1); - if (ELEM(0)) { - stack->size--; - goto branch; - } else { - stack->size--; - if ((err = binary_buffer_skip(&ctx->bb, 2))) - return err; - } - break; - /* Special operations. */ - case DW_OP_nop: - break; - /* Location description operations. */ - case DW_OP_reg0 ... DW_OP_reg31: - case DW_OP_regx: - case DW_OP_implicit_value: - case DW_OP_stack_value: - case DW_OP_piece: - case DW_OP_bit_piece: - /* The caller must handle it. */ - ctx->bb.pos = ctx->bb.prev; - return NULL; - /* - * We don't yet support: - * - * - DW_OP_push_object_address - * - DW_OP_form_tls_address - * - DW_OP_entry_value - * DW_OP_implicit_pointer - * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. - * - Typed operations: DW_OP_const_type, DW_OP_regval_type, - * DW_OP_deref_type, DW_OP_convert, DW_OP_reinterpret. - * - Operations for multiple address spaces: DW_OP_xderef, - * DW_OP_xderef_size, DW_OP_xderef_type. - */ - default: - return binary_buffer_error(&ctx->bb, - "unknown DWARF expression opcode %#" PRIx8, - opcode); - } - } - -#undef PUSH_MASK -#undef PUSH -#undef ELEM -#undef CHECK - - return NULL; -} - -static struct drgn_error * -drgn_dwarf_frame_base(struct drgn_program *prog, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_register_state *regs, - int *remaining_ops, uint64_t *ret) -{ - struct drgn_error *err; - bool little_endian = drgn_platform_is_little_endian(&module->platform); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - - if (!die) - return &drgn_not_found; - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) - return &drgn_not_found; - const char *expr; - size_t expr_size; - err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); - if (err) - return err; - - struct drgn_dwarf_expression_context ctx; - if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, - die->cu, NULL, regs, expr, - expr_size))) - return err; - struct uint64_vector stack = VECTOR_INIT; - for (;;) { - err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); - if (err) - goto out; - if (binary_buffer_has_next(&ctx.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) - goto out; - - uint64_t dwarf_regno; - switch (opcode) { - case DW_OP_reg0 ... DW_OP_reg31: - dwarf_regno = opcode - DW_OP_reg0; - goto reg; - case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&ctx.bb, - &dwarf_regno))) - goto out; -reg: - { - if (!regs) { - err = &drgn_not_found; - goto out; - } - drgn_register_number regno = - dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, - regno)) { - err = &drgn_not_found; - goto out; - } - const struct drgn_register_layout *layout = - &prog->platform.arch->register_layout[regno]; - /* - * Note that this doesn't mask the address since - * the caller does that. - */ - copy_lsbytes(ret, sizeof(*ret), - HOST_LITTLE_ENDIAN, - ®s->buf[layout->offset], - layout->size, little_endian); - if (binary_buffer_has_next(&ctx.bb)) { - err = binary_buffer_error(&ctx.bb, - "stray operations in DW_AT_frame_base expression"); - } else { - err = NULL; - } - goto out; - } - default: - err = binary_buffer_error(&ctx.bb, - "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", - opcode); - goto out; - } - } else if (stack.size) { - *ret = stack.data[stack.size - 1]; - err = NULL; - break; - } else { - err = &drgn_not_found; - break; - } - } -out: - uint64_vector_deinit(&stack); - return err; -} - -DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) - -/** - * Return whether a DWARF DIE is little-endian. - * - * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c - * false, only the ELF header is checked and this function cannot fail. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, - bool check_attr, bool *ret) -{ - Dwarf_Attribute endianity_attr_mem, *endianity_attr; - Dwarf_Word endianity; - if (check_attr && - (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, - &endianity_attr_mem))) { - if (dwarf_formudata(endianity_attr, &endianity)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_endianity"); - } - } else { - endianity = DW_END_default; - } - switch (endianity) { - case DW_END_default: { - Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); - *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; - return NULL; - } - case DW_END_little: - *ret = true; - return NULL; - case DW_END_big: - *ret = false; - return NULL; - default: - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_endianity"); - } -} - -/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ -static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, bool check_attr, - enum drgn_byte_order *ret) -{ - bool little_endian; - struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, - &little_endian); - /* - * dwarf_die_is_little_endian() can't fail if check_attr is false, so - * the !check_attr test suppresses maybe-uninitialized warnings. - */ - if (!err || !check_attr) - *ret = drgn_byte_order_from_little_endian(little_endian); - return err; -} - -static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) - return 1; - - return dwarf_formref_die(attr, ret) ? 0 : -1; -} - -static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr(die, name, &attr_mem))) { - *ret = false; - return 0; - } - return dwarf_formflag(attr, ret); -} - -static int dwarf_flag_integrate(Dwarf_Die *die, unsigned int name, bool *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { - *ret = false; - return 0; - } - return dwarf_formflag(attr, ret); -} - -/** - * Parse a type from a DWARF debugging information entry. - * - * This is the same as @ref drgn_type_from_dwarf() except that it can be used to - * work around a bug in GCC < 9.0 that zero length array types are encoded the - * same as incomplete array types. There are a few places where GCC allows - * zero-length arrays but not incomplete arrays: - * - * - As the type of a member of a structure with only one member. - * - As the type of a structure member other than the last member. - * - As the type of a union member. - * - As the element type of an array. - * - * In these cases, we know that what appears to be an incomplete array type must - * actually have a length of zero. In other cases, a subrange DIE without - * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array - * type. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE to parse. - * @param[in] can_be_incomplete_array Whether the type can be an incomplete - * array type. If this is @c false and the type appears to be an incomplete - * array type, its length is set to zero instead. - * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete - * array type or a typedef of an incomplete array type (regardless of @p - * can_be_incomplete_array). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret); - -/** - * Parse a type from a DWARF debugging information entry. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE to parse. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static inline struct drgn_error * -drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - struct drgn_qualified_type *ret) -{ - return drgn_type_from_dwarf_internal(dbinfo, module, die, true, NULL, - ret); -} - -/** - * Parse a type from the @c DW_AT_type attribute of a DWARF debugging - * information entry. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE with @c DW_AT_type attribute. - * @param[in] lang Language of @p die if it is already known, @c NULL if it - * should be determined from @p die. - * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, - * which is interpreted as a void type. If this is false and the @c DW_AT_type - * attribute is missing, an error is returned. - * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). - * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error * -drgn_type_from_dwarf_attr(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - bool can_be_void, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) { - if (can_be_void) { - if (!lang) { - err = drgn_language_from_die(die, true, &lang); - if (err) - return err; - } - ret->type = drgn_void_type(dbinfo->prog, lang); - ret->qualifiers = 0; - return NULL; - } else { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s is missing DW_AT_type", - dwarf_tag_str(die, tag_buf)); - } - } - - Dwarf_Die type_die; - if (!dwarf_formref_die(attr, &type_die)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_type", - dwarf_tag_str(die, tag_buf)); - } - - return drgn_type_from_dwarf_internal(dbinfo, module, &type_die, - can_be_incomplete_array, - is_incomplete_array_ret, ret); -} - -static struct drgn_error * -drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const char *name, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf(dbinfo, module, die, &qualified_type); - if (err) - return err; - const struct drgn_type_enumerator *enumerators = - drgn_type_enumerators(qualified_type.type); - size_t num_enumerators = drgn_type_num_enumerators(qualified_type.type); - for (size_t i = 0; i < num_enumerators; i++) { - if (strcmp(enumerators[i].name, name) != 0) - continue; - - if (drgn_enum_type_is_signed(qualified_type.type)) { - return drgn_object_set_signed(ret, qualified_type, - enumerators[i].svalue, 0); - } else { - return drgn_object_set_unsigned(ret, qualified_type, - enumerators[i].uvalue, - 0); - } - } - UNREACHABLE(); -} - -static struct drgn_error * -drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, struct drgn_object *ret) -{ - struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf(dbinfo, module, die, - &qualified_type); - if (err) - return err; - Dwarf_Addr low_pc; - if (dwarf_lowpc(die, &low_pc) == -1) - return drgn_object_set_absent(ret, qualified_type, 0); - Dwarf_Addr bias; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, - NULL, NULL); - return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, - 0); -} - -static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, - unsigned int dst_bit_offset, uint64_t src, - unsigned int src_bit_offset, - uint64_t bit_size, bool lsb0) -{ - struct drgn_error *err; - - assert(dst_bit_offset < 8); - assert(src_bit_offset < 8); - - if (bit_size == 0) - return NULL; - - if (dst_bit_offset == src_bit_offset) { - /* - * We can read directly into the the destination buffer, but we - * may have to preserve some bits at the start and/or end. - */ - uint8_t *d = dst; - uint64_t last_bit = dst_bit_offset + bit_size - 1; - uint8_t first_byte = d[0]; - uint8_t last_byte = d[last_bit / 8]; - err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, - false); - if (err) - return err; - if (dst_bit_offset != 0) { - uint8_t mask = - copy_bits_first_mask(dst_bit_offset, lsb0); - d[0] = (first_byte & ~mask) | (d[0] & mask); - } - if (last_bit % 8 != 7) { - uint8_t mask = copy_bits_last_mask(last_bit, lsb0); - d[last_bit / 8] = ((last_byte & ~mask) - | (d[last_bit / 8] & mask)); - } - return NULL; - } else { - /* - * If the source and destination have different offsets, then - * depending on the size and source offset, we may have to read - * one more byte than is available in the destination. To keep - * things simple, we always read into a temporary buffer (rather - * than adding a special case for reading directly into the - * destination and shifting bits around). - */ - uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; - char stack_tmp[16], *tmp; - if (src_bytes <= sizeof(stack_tmp)) { - tmp = stack_tmp; - } else { - tmp = malloc64(src_bytes); - if (!tmp) - return &drgn_enomem; - } - err = drgn_program_read_memory(prog, tmp, src, src_bytes, - false); - if (!err) { - copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, - bit_size, lsb0); - } - if (src_bytes > sizeof(stack_tmp)) - free(tmp); - return err; - } -} - -static struct drgn_error * -drgn_object_from_dwarf_location(struct drgn_program *prog, - struct drgn_debug_info_module *module, - Dwarf_Die *die, - struct drgn_qualified_type qualified_type, - const char *expr, size_t expr_size, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret) -{ - struct drgn_error *err; - bool little_endian = drgn_platform_is_little_endian(&module->platform); - uint64_t address_mask = drgn_platform_address_mask(&module->platform); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - - struct drgn_object_type type; - err = drgn_object_type(qualified_type, 0, &type); - if (err) - return err; - - union drgn_value value; - char *value_buf = NULL; - - uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ - int bit_offset = -1; /* -1 means that we don't have an address. */ - - uint64_t bit_pos = 0; - - int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_context ctx; - if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, - die->cu, function_die, - regs, expr, expr_size))) - return err; - struct uint64_vector stack = VECTOR_INIT; - do { - stack.size = 0; - err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); - if (err == &drgn_not_found) - goto absent; - else if (err) - goto out; - - const void *src = NULL; - size_t src_size; - - if (binary_buffer_has_next(&ctx.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) - goto out; - - uint64_t uvalue; - uint64_t dwarf_regno; - drgn_register_number regno; - switch (opcode) { - case DW_OP_reg0 ... DW_OP_reg31: - dwarf_regno = opcode - DW_OP_reg0; - goto reg; - case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&ctx.bb, - &dwarf_regno))) - goto out; -reg: - if (!regs) - goto absent; - regno = dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, - regno)) - goto absent; - const struct drgn_register_layout *layout = - &prog->platform.arch->register_layout[regno]; - src = ®s->buf[layout->offset]; - src_size = layout->size; - break; - case DW_OP_implicit_value: - if ((err = binary_buffer_next_uleb128(&ctx.bb, - &uvalue))) - goto out; - if (uvalue > ctx.bb.end - ctx.bb.pos) { - err = binary_buffer_error(&ctx.bb, - "DW_OP_implicit_value size is out of bounds"); - goto out; - } - src = ctx.bb.pos; - src_size = uvalue; - ctx.bb.pos += uvalue; - break; - case DW_OP_stack_value: - if (!stack.size) - goto absent; - if (little_endian != HOST_LITTLE_ENDIAN) { - stack.data[stack.size - 1] = - bswap_64(stack.data[stack.size - 1]); - } - src = &stack.data[stack.size - 1]; - src_size = sizeof(stack.data[0]); - break; - default: - ctx.bb.pos = ctx.bb.prev; - break; - } - } - - uint64_t piece_bit_size; - uint64_t piece_bit_offset; - if (binary_buffer_has_next(&ctx.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) - goto out; - - switch (opcode) { - case DW_OP_piece: - if ((err = binary_buffer_next_uleb128(&ctx.bb, - &piece_bit_size))) - goto out; - /* - * It's probably bogus for the piece size to be - * larger than the remaining value size, but - * that's not explicitly stated in the DWARF 5 - * specification, so clamp it instead. - */ - if (__builtin_mul_overflow(piece_bit_size, 8U, - &piece_bit_size) || - piece_bit_size > type.bit_size - bit_pos) - piece_bit_size = type.bit_size - bit_pos; - piece_bit_offset = 0; - break; - case DW_OP_bit_piece: - if ((err = binary_buffer_next_uleb128(&ctx.bb, - &piece_bit_size)) || - (err = binary_buffer_next_uleb128(&ctx.bb, - &piece_bit_offset))) - goto out; - if (piece_bit_size > type.bit_size - bit_pos) - piece_bit_size = type.bit_size - bit_pos; - break; - default: - err = binary_buffer_error(&ctx.bb, - "unknown DWARF expression opcode %#" PRIx8 " after simple location description", - opcode); - goto out; - } - } else { - piece_bit_size = type.bit_size - bit_pos; - piece_bit_offset = 0; - } - - /* - * TODO: there are a few cases that a DWARF location can - * describe that can't be represented in drgn's object model: - * - * 1. An object that is partially known and partially unknown. - * 2. An object that is partially in memory and partially a - * value. - * 3. An object that is in memory at non-contiguous addresses. - * 4. A pointer object whose pointer value is not known but - * whose referenced value is known (DW_OP_implicit_pointer). - * - * For case 1, we consider the whole object as absent. For cases - * 2 and 3, we convert the whole object to a value. Case 4 is - * not supported at all. We should add a way to represent all of - * these situations precisely. - */ - if (src && piece_bit_size == 0) { - /* Ignore empty value. */ - } else if (src) { - if (!value_buf && - !drgn_value_zalloc(drgn_value_size(type.bit_size), - &value, &value_buf)) { - err = &drgn_enomem; - goto out; - } - if (bit_offset >= 0) { - /* - * We previously had an address. Read it into - * the value. - */ - err = read_bits(prog, value_buf, 0, address, - bit_offset, bit_pos, - little_endian); - if (err) - goto out; - bit_offset = -1; - } - /* - * It's probably safe to assume that we don't have an - * implicit value larger than 2 exabytes. - */ - assert(src_size <= UINT64_MAX / 8); - uint64_t src_bit_size = UINT64_C(8) * src_size; - if (piece_bit_offset > src_bit_size) - piece_bit_offset = src_bit_size; - uint64_t copy_bit_size = - min(piece_bit_size, - src_bit_size - piece_bit_offset); - uint64_t copy_bit_offset = bit_pos; - if (!little_endian) { - copy_bit_offset += piece_bit_size - copy_bit_size; - piece_bit_offset = (src_bit_size - - copy_bit_size - - piece_bit_offset); - } - copy_bits(&value_buf[copy_bit_offset / 8], - copy_bit_offset % 8, - (const char *)src + (piece_bit_offset / 8), - piece_bit_offset % 8, copy_bit_size, - little_endian); - } else if (stack.size) { - uint64_t piece_address = - ((stack.data[stack.size - 1] + piece_bit_offset / 8) - & address_mask); - piece_bit_offset %= 8; - if (bit_pos > 0 && bit_offset >= 0) { - /* - * We already had an address. Merge the pieces - * if the addresses are contiguous, otherwise - * convert to a value. - * - * The obvious way to write this is - * (address + (bit_pos + bit_offset) / 8), but - * (bit_pos + bit_offset) can overflow uint64_t. - */ - uint64_t end_address = - ((address - + bit_pos / 8 - + (bit_pos % 8 + bit_offset) / 8) - & address_mask); - unsigned int end_bit_offset = - (bit_offset + bit_pos) % 8; - if (piece_bit_size == 0 || - (piece_address == end_address && - piece_bit_offset == end_bit_offset)) { - /* Piece is contiguous. */ - piece_address = address; - piece_bit_offset = bit_offset; - } else { - if (!drgn_value_zalloc(drgn_value_size(type.bit_size), - &value, - &value_buf)) { - err = &drgn_enomem; - goto out; - } - err = read_bits(prog, value_buf, 0, - address, bit_offset, - bit_pos, little_endian); - if (err) - goto out; - bit_offset = -1; - } - } - if (value_buf) { - /* We already have a value. Read into it. */ - err = read_bits(prog, &value_buf[bit_pos / 8], - bit_pos % 8, piece_address, - piece_bit_offset, - piece_bit_size, little_endian); - if (err) - goto out; - } else { - address = piece_address; - bit_offset = piece_bit_offset; - } - } else if (piece_bit_size > 0) { - goto absent; - } - bit_pos += piece_bit_size; - } while (binary_buffer_has_next(&ctx.bb)); - - if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { -absent: - if (dwarf_tag(die) == DW_TAG_template_value_parameter) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_template_value_parameter is missing value"); - } - drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); - err = NULL; - } else if (bit_offset >= 0) { - Dwarf_Addr start, end, bias; - dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, - NULL, NULL, NULL); - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (start <= address + bias && address + bias < end) - address += bias; - err = drgn_object_set_reference_internal(ret, &type, address, - bit_offset); - } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { - drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); - ret->value = value; - value_buf = NULL; - err = NULL; - } else { - err = drgn_object_set_from_buffer_internal(ret, &type, - value_buf, 0); - } - -out: - if (value_buf != value.ibuf) - free(value_buf); - uint64_vector_deinit(&stack); - return err; -} - -static struct drgn_error * -drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, - struct drgn_qualified_type qualified_type, - Dwarf_Attribute *attr, struct drgn_object *ret) -{ - struct drgn_object_type type; - struct drgn_error *err = drgn_object_type(qualified_type, 0, &type); - if (err) - return err; - Dwarf_Block block; - if (dwarf_formblock(attr, &block) == 0) { - if (block.length < drgn_value_size(type.bit_size)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_const_value block is too small"); - } - return drgn_object_set_from_buffer_internal(ret, &type, - block.data, 0); - } else if (type.encoding == DRGN_OBJECT_ENCODING_SIGNED) { - Dwarf_Sword svalue; - if (dwarf_formsdata(attr, &svalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - drgn_object_set_signed_internal(ret, &type, svalue); - return NULL; - } else if (type.encoding == DRGN_OBJECT_ENCODING_UNSIGNED) { - Dwarf_Word uvalue; - if (dwarf_formudata(attr, &uvalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - drgn_object_set_unsigned_internal(ret, &type, uvalue); - return NULL; - } else { - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_const_value form"); - } -} - -struct drgn_error * -drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, Dwarf_Die *type_die, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret) -{ - struct drgn_error *err; - if (dwarf_tag(die) == DW_TAG_subprogram) { - return drgn_object_from_dwarf_subprogram(dbinfo, module, die, - ret); - } - /* - * The DWARF 5 specifications mentions that data object entries can have - * DW_AT_endianity, but that doesn't seem to be used in practice. It - * would be inconvenient to support, so ignore it for now. - */ - struct drgn_qualified_type qualified_type; - if (type_die) { - err = drgn_type_from_dwarf(dbinfo, module, type_die, - &qualified_type); - } else { - err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, - true, NULL, &qualified_type); - } - if (err) - return err; - Dwarf_Attribute attr_mem, *attr; - const char *expr; - size_t expr_size; - if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { - err = drgn_dwarf_location(module, attr, regs, &expr, - &expr_size); - if (err) - return err; - } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, - &attr_mem))) { - return drgn_object_from_dwarf_constant(dbinfo, die, - qualified_type, attr, - ret); - } else { - expr = NULL; - expr_size = 0; - } - return drgn_object_from_dwarf_location(dbinfo->prog, module, die, - qualified_type, expr, expr_size, - function_die, regs, ret); -} - -static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, - const char *name, - Dwarf_Die *ret) -{ - int r = dwarf_child(enumeration_type, ret); - while (r == 0) { - if (dwarf_tag(ret) == DW_TAG_enumerator && - strcmp(dwarf_diename(ret), name) == 0) - return NULL; - r = dwarf_siblingof(ret, ret); - } - if (r < 0) - return drgn_error_libdw(); - ret->addr = NULL; - return NULL; -} - -struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, - size_t num_scopes, - const char *name, - Dwarf_Die *die_ret, - Dwarf_Die *type_ret) -{ - struct drgn_error *err; - Dwarf_Die die; - for (size_t scope = num_scopes; scope--;) { - bool have_declaration = false; - if (dwarf_child(&scopes[scope], &die) != 0) - continue; - do { - switch (dwarf_tag(&die)) { - case DW_TAG_variable: - case DW_TAG_formal_parameter: - case DW_TAG_subprogram: - if (strcmp(dwarf_diename(&die), name) == 0) { - *die_ret = die; - bool declaration; - if (dwarf_flag(&die, DW_AT_declaration, - &declaration)) - return drgn_error_libdw(); - if (declaration) - have_declaration = true; - else - return NULL; - } - break; - case DW_TAG_enumeration_type: { - bool enum_class; - if (dwarf_flag_integrate(&die, DW_AT_enum_class, - &enum_class)) - return drgn_error_libdw(); - if (!enum_class) { - Dwarf_Die enumerator; - err = find_dwarf_enumerator(&die, name, - &enumerator); - if (err) - return err; - if (enumerator.addr) { - *die_ret = enumerator; - *type_ret = die; - return NULL; - } - } - break; - } - default: - continue; - } - } while (dwarf_siblingof(&die, &die) == 0); - if (have_declaration) - return NULL; - } - die_ret->addr = NULL; - return NULL; -} - -static struct drgn_error * -drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_name"); - } - - Dwarf_Attribute attr; - Dwarf_Word encoding; - if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || - dwarf_formudata(&attr, &encoding)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_encoding"); - } - int size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); - } - - enum drgn_byte_order byte_order; - err = dwarf_die_byte_order(die, true, &byte_order); - if (err) - return err; - - switch (encoding) { - case DW_ATE_boolean: - return drgn_bool_type_create(dbinfo->prog, name, size, - byte_order, lang, ret); - case DW_ATE_float: - return drgn_float_type_create(dbinfo->prog, name, size, - byte_order, lang, ret); - case DW_ATE_signed: - case DW_ATE_signed_char: - return drgn_int_type_create(dbinfo->prog, name, size, true, - byte_order, lang, ret); - case DW_ATE_unsigned: - case DW_ATE_unsigned_char: - return drgn_int_type_create(dbinfo->prog, name, size, false, - byte_order, lang, ret); - /* We don't support complex types yet. */ - case DW_ATE_complex_float: - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_base_type has unknown DWARF encoding 0x%llx", - (unsigned long long)encoding); - } -} - -/* - * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and - * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of - * true). This tries to find the complete type. If it succeeds, it returns NULL. - * If it can't find a complete type, it returns &drgn_not_found. Otherwise, it - * returns an error. - */ -static struct drgn_error * -drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, - const char *name, struct drgn_type **ret) -{ - struct drgn_error *err; - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, - strlen(name), &tag, 1); - if (err) - return err; - - /* - * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs - * with DW_AT_declaration, so this will always be a complete type. - */ - struct drgn_dwarf_index_die *index_die = - drgn_dwarf_index_iterator_next(&it); - if (!index_die) - return &drgn_not_found; - /* - * Look for another matching DIE. If there is one, then we can't be sure - * which type this is, so leave it incomplete rather than guessing. - */ - if (drgn_dwarf_index_iterator_next(&it)) - return &drgn_not_found; - - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf(dbinfo, index_die->module, &die, - &qualified_type); - if (err) - return err; - *ret = qualified_type.type; - return NULL; -} - -struct drgn_dwarf_member_thunk_arg { - struct drgn_debug_info_module *module; - Dwarf_Die die; - bool can_be_incomplete_array; -}; - -static struct drgn_error * -drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_member_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, - arg->module, &arg->die, NULL, - false, - arg->can_be_incomplete_array, - NULL, &qualified_type); - if (err) - return err; - - Dwarf_Attribute attr_mem, *attr; - uint64_t bit_field_size; - if ((attr = dwarf_attr_integrate(&arg->die, DW_AT_bit_size, - &attr_mem))) { - Dwarf_Word bit_size; - if (dwarf_formudata(attr, &bit_size)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_size"); - } - bit_field_size = bit_size; - } else { - bit_field_size = 0; - } - - err = drgn_object_set_absent(res, qualified_type, - bit_field_size); - if (err) - return err; - } - free(arg); - return NULL; -} - -static inline bool drgn_dwarf_attribute_is_block(Dwarf_Attribute *attr) -{ - switch (attr->form) { - case DW_FORM_block1: - case DW_FORM_block2: - case DW_FORM_block4: - case DW_FORM_block: - return true; - default: - return false; - } -} - -static inline bool drgn_dwarf_attribute_is_ptr(Dwarf_Attribute *attr) -{ - switch (attr->form) { - case DW_FORM_sec_offset: - return true; - case DW_FORM_data4: - case DW_FORM_data8: { - /* - * dwarf_cu_die() always returns the DIE. We should use - * dwarf_cu_info(), but that requires elfutils >= 0.171. - */ - Dwarf_Die unused; - Dwarf_Half cu_version; - dwarf_cu_die(attr->cu, &unused, &cu_version, NULL, NULL, NULL, - NULL, NULL); - return cu_version <= 3; - } - default: - return false; - } -} - -static struct drgn_error *invalid_data_member_location(struct binary_buffer *bb, - const char *pos, - const char *message) -{ - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_member_location"); -} - -static struct drgn_error * -drgn_parse_dwarf_data_member_location(Dwarf_Attribute *attr, uint64_t *ret) -{ - struct drgn_error *err; - - if (drgn_dwarf_attribute_is_block(attr)) { - Dwarf_Block block; - if (dwarf_formblock(attr, &block)) - return drgn_error_libdw(); - /* - * In DWARF 2, DW_AT_data_member_location is always a location - * description. We can translate a DW_OP_plus_uconst expression - * into a constant offset; other expressions aren't supported - * yet. - */ - struct binary_buffer bb; - /* - * Right now we only parse u8 and ULEB128, so the byte order - * doesn't matter. - */ - binary_buffer_init(&bb, block.data, block.length, - HOST_LITTLE_ENDIAN, - invalid_data_member_location); - uint8_t opcode; - err = binary_buffer_next_u8(&bb, &opcode); - if (err) - return err; - if (opcode != DW_OP_plus_uconst) { -unsupported: - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has unsupported DW_AT_data_member_location"); - } - err = binary_buffer_next_uleb128(&bb, ret); - if (err) - return err; - if (binary_buffer_has_next(&bb)) - goto unsupported; - } else if (drgn_dwarf_attribute_is_ptr(attr)) { - goto unsupported; - } else { - - Dwarf_Word word; - if (dwarf_formudata(attr, &word)) - return invalid_data_member_location(NULL, NULL, NULL); - *ret = word; - } - return NULL; -} - -static struct drgn_error * -parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, - bool little_endian, uint64_t *ret) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - /* - * The simplest case is when we have DW_AT_data_bit_offset, which is - * already the offset in bits from the beginning of the containing - * object to the beginning of the member (which may be a bit field). - */ - attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_bit_offset"); - } - *ret = bit_offset; - return NULL; - } - - /* - * Otherwise, we might have DW_AT_data_member_location, which is the - * offset in bytes from the beginning of the containing object. - */ - attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); - if (attr) { - err = drgn_parse_dwarf_data_member_location(attr, ret); - if (err) - return err; - *ret *= 8; - } else { - *ret = 0; - } - - /* - * In addition to DW_AT_data_member_location, a bit field might have - * DW_AT_bit_offset, which is the offset in bits of the most significant - * bit of the bit field from the most significant bit of the containing - * object. - */ - attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_offset"); - } - - /* - * If the architecture is little-endian, then we must compute - * the location of the most significant bit from the size of the - * member, then subtract the bit offset and bit size to get the - * location of the beginning of the bit field. - * - * If the architecture is big-endian, then the most significant - * bit of the bit field is the beginning. - */ - if (little_endian) { - err = drgn_lazy_object_evaluate(member_object); - if (err) - return err; - - attr = dwarf_attr_integrate(die, DW_AT_byte_size, - &attr_mem); - /* - * If the member has an explicit byte size, we can use - * that. Otherwise, we have to get it from the member - * type. - */ - uint64_t byte_size; - if (attr) { - Dwarf_Word word; - if (dwarf_formudata(attr, &word)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_byte_size"); - } - byte_size = word; - } else { - if (!drgn_type_has_size(member_object->obj.type)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member bit field type does not have size"); - } - err = drgn_type_sizeof(member_object->obj.type, - &byte_size); - if (err) - return err; - } - *ret += 8 * byte_size - bit_offset - member_object->obj.bit_size; - } else { - *ret += bit_offset; - } - } - - return NULL; -} - -static struct drgn_error * -parse_member(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - bool little_endian, bool can_be_incomplete_array, - struct drgn_compound_type_builder *builder) -{ - struct drgn_error *err; - - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - struct drgn_dwarf_member_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - thunk_arg->can_be_incomplete_array = can_be_incomplete_array; - - union drgn_lazy_object member_object; - drgn_lazy_object_init_thunk(&member_object, dbinfo->prog, - drgn_dwarf_member_thunk_fn, thunk_arg); - - uint64_t bit_offset; - err = parse_member_offset(die, &member_object, little_endian, - &bit_offset); - if (err) - goto err; - - err = drgn_compound_type_builder_add_member(builder, &member_object, - name, bit_offset); - if (err) - goto err; - return NULL; - -err: - drgn_lazy_object_deinit(&member_object); - return err; -} - -struct drgn_dwarf_die_thunk_arg { - struct drgn_debug_info_module *module; - Dwarf_Die die; -}; - -static struct drgn_error * -drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, - arg->module, &arg->die, NULL, - true, true, NULL, - &qualified_type); - if (err) - return err; - - err = drgn_object_set_absent(res, qualified_type, 0); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, - void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - err = drgn_object_from_dwarf(drgn_object_program(res)->dbinfo, - arg->module, &arg->die, NULL, NULL, - NULL, res); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -parse_template_parameter(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - drgn_object_thunk_fn *thunk_fn, - struct drgn_template_parameters_builder *builder) -{ - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_name", - dwarf_tag_str(die, tag_buf)); - } - } else { - name = NULL; - } - - bool defaulted; - if (dwarf_flag_integrate(die, DW_AT_default_value, &defaulted)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_default_value", - dwarf_tag_str(die, tag_buf)); - } - - struct drgn_dwarf_die_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - - union drgn_lazy_object argument; - drgn_lazy_object_init_thunk(&argument, dbinfo->prog, thunk_fn, - thunk_arg); - - struct drgn_error *err = - drgn_template_parameters_builder_add(builder, &argument, name, - defaulted); - if (err) - drgn_lazy_object_deinit(&argument); - return err; -} - -static struct drgn_error * -drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - enum drgn_type_kind kind, struct drgn_type **ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_name", - dwarf_tag_str(die, tag_buf)); - } - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_declaration", - dwarf_tag_str(die, tag_buf)); - } - if (declaration && tag) { - err = drgn_debug_info_find_complete(dbinfo, dwarf_tag(die), tag, - ret); - if (err != &drgn_not_found) - return err; - } - - struct drgn_compound_type_builder builder; - drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); - - int size; - bool little_endian; - if (declaration) { - size = 0; - } else { - size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has missing or invalid DW_AT_byte_size", - dwarf_tag_str(die, tag_buf)); - } - dwarf_die_is_little_endian(die, false, &little_endian); - } - - Dwarf_Die member = {}, child; - int r = dwarf_child(die, &child); - while (r == 0) { - switch (dwarf_tag(&child)) { - case DW_TAG_member: - if (!declaration) { - if (member.addr) { - err = parse_member(dbinfo, module, - &member, - little_endian, false, - &builder); - if (err) - goto err; - } - member = child; - } - break; - case DW_TAG_template_type_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_type_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - case DW_TAG_template_value_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_value_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - default: - break; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - /* - * Flexible array members are only allowed as the last member of a - * structure with at least one other member. - */ - if (member.addr) { - err = parse_member(dbinfo, module, &member, little_endian, - kind != DRGN_TYPE_UNION && - builder.members.size > 0, - &builder); - if (err) - goto err; - } - - err = drgn_compound_type_create(&builder, tag, size, !declaration, lang, - ret); - if (err) - goto err; - return NULL; - -err: - drgn_compound_type_builder_deinit(&builder); - return err; -} - -#if !_ELFUTILS_PREREQ(0, 175) -static Elf *dwelf_elf_begin(int fd) -{ - return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); -} -#endif - -static struct drgn_error * -parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, - bool *is_signed) -{ - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has missing or invalid DW_AT_name"); - } - - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator is missing DW_AT_const_value"); - } - struct drgn_error *err; - if (attr->form == DW_FORM_sdata || - attr->form == DW_FORM_implicit_const) { - Dwarf_Sword svalue; - if (dwarf_formsdata(attr, &svalue)) - goto invalid; - err = drgn_enum_type_builder_add_signed(builder, name, - svalue); - /* - * GCC before 7.1 didn't include DW_AT_encoding for - * DW_TAG_enumeration_type DIEs, so we have to guess the sign - * for enum_compatible_type_fallback(). - */ - if (!err && svalue < 0) - *is_signed = true; - } else { - Dwarf_Word uvalue; - if (dwarf_formudata(attr, &uvalue)) - goto invalid; - err = drgn_enum_type_builder_add_unsigned(builder, name, - uvalue); - } - return err; - -invalid: - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has invalid DW_AT_const_value"); -} - -/* - * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, - * so we have to fabricate the compatible type. - */ -static struct drgn_error * -enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, - Dwarf_Die *die, bool is_signed, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - int size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); - } - enum drgn_byte_order byte_order; - dwarf_die_byte_order(die, false, &byte_order); - return drgn_int_type_create(dbinfo->prog, "", size, is_signed, - byte_order, lang, ret); -} - -static struct drgn_error * -drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_name"); - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_declaration"); - } - if (declaration && tag) { - err = drgn_debug_info_find_complete(dbinfo, - DW_TAG_enumeration_type, - tag, ret); - if (err != &drgn_not_found) - return err; - } - - if (declaration) { - return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, - ret); - } - - struct drgn_enum_type_builder builder; - drgn_enum_type_builder_init(&builder, dbinfo->prog); - bool is_signed = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_enumerator) { - err = parse_enumerator(&child, &builder, &is_signed); - if (err) - goto err; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - - struct drgn_type *compatible_type; - r = dwarf_type(die, &child); - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_type"); - goto err; - } else if (r) { - err = enum_compatible_type_fallback(dbinfo, die, is_signed, - lang, &compatible_type); - if (err) - goto err; - } else { - struct drgn_qualified_type qualified_compatible_type; - err = drgn_type_from_dwarf(dbinfo, module, &child, - &qualified_compatible_type); - if (err) - goto err; - compatible_type = drgn_underlying_type(qualified_compatible_type.type); - if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); - goto err; - } - } - - err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); - if (err) - goto err; - return NULL; - -err: - drgn_enum_type_builder_deinit(&builder); - return err; -} - -static struct drgn_error * -drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_typedef has missing or invalid DW_AT_name"); - } - - struct drgn_qualified_type aliased_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, - lang, true, - can_be_incomplete_array, - is_incomplete_array_ret, - &aliased_type); - if (err) - return err; - - return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, - ret); -} - -static struct drgn_error * -drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_qualified_type referenced_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, - lang, true, true, - NULL, - &referenced_type); - if (err) - return err; - - Dwarf_Attribute attr_mem, *attr; - uint64_t size; - if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { - Dwarf_Word word; - if (dwarf_formudata(attr, &word)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_pointer_type has invalid DW_AT_byte_size"); - } - size = word; - } else { - uint8_t address_size; - err = drgn_program_address_size(dbinfo->prog, &address_size); - if (err) - return err; - size = address_size; - } - - /* - * The DWARF 5 specification doesn't mention DW_AT_endianity for - * DW_TAG_pointer_type DIEs, and GCC as of version 10.2 doesn't emit it - * even for pointers stored in the opposite byte order (e.g., when using - * scalar_storage_order), but it probably should. - */ - enum drgn_byte_order byte_order; - dwarf_die_byte_order(die, false, &byte_order); - return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, - byte_order, lang, ret); -} - -struct array_dimension { - uint64_t length; - bool is_complete; -}; - -DEFINE_VECTOR(array_dimension_vector, struct array_dimension) - -static struct drgn_error *subrange_length(Dwarf_Die *die, - struct array_dimension *dimension) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Word word; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && - !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { - dimension->is_complete = false; - return NULL; - } - - if (dwarf_formudata(attr, &word)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_subrange_type has invalid %s", - attr->code == DW_AT_upper_bound ? - "DW_AT_upper_bound" : - "DW_AT_count"); - } - - dimension->is_complete = true; - /* - * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array - * variables without an explicit size (e.g., `int arr[] = {};`). - */ - if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && - word == (Dwarf_Word)-1) { - dimension->length = 0; - } else if (attr->code == DW_AT_upper_bound) { - if (word >= UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_upper_bound is too large"); - } - dimension->length = (uint64_t)word + 1; - } else { - if (word > UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_count is too large"); - } - dimension->length = word; - } - return NULL; -} - -static struct drgn_error * -drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct array_dimension_vector dimensions = VECTOR_INIT; - struct array_dimension *dimension; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_subrange_type) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - err = subrange_length(&child, dimension); - if (err) - goto out; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto out; - } - if (!dimensions.size) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - dimension->is_complete = false; - } - - struct drgn_qualified_type element_type; - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, false, false, - NULL, &element_type); - if (err) - goto out; - - *is_incomplete_array_ret = !dimensions.data[0].is_complete; - struct drgn_type *type; - do { - dimension = array_dimension_vector_pop(&dimensions); - if (dimension->is_complete) { - err = drgn_array_type_create(dbinfo->prog, element_type, - dimension->length, lang, - &type); - } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_array_type_create(dbinfo->prog, element_type, - 0, lang, &type); - } else { - err = drgn_incomplete_array_type_create(dbinfo->prog, - element_type, - lang, &type); - } - if (err) - goto out; - - element_type.type = type; - element_type.qualifiers = 0; - } while (dimensions.size); - - *ret = type; - err = NULL; -out: - array_dimension_vector_deinit(&dimensions); - return err; -} - -static struct drgn_error * -drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, - arg->module, &arg->die, NULL, - false, true, NULL, - &qualified_type); - if (err) - return err; - - err = drgn_object_set_absent(res, qualified_type, 0); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -parse_formal_parameter(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - struct drgn_function_type_builder *builder) -{ - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_formal_parameter has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - struct drgn_dwarf_die_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - - union drgn_lazy_object default_argument; - drgn_lazy_object_init_thunk(&default_argument, dbinfo->prog, - drgn_dwarf_formal_parameter_thunk_fn, - thunk_arg); - - struct drgn_error *err = - drgn_function_type_builder_add_parameter(builder, - &default_argument, - name); - if (err) - drgn_lazy_object_deinit(&default_argument); - return err; -} - -static struct drgn_error * -drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - struct drgn_function_type_builder builder; - drgn_function_type_builder_init(&builder, dbinfo->prog); - bool is_variadic = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - switch (dwarf_tag(&child)) { - case DW_TAG_formal_parameter: - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", - dwarf_tag_str(die, - tag_buf)); - goto err; - } - err = parse_formal_parameter(dbinfo, module, &child, - &builder); - if (err) - goto err; - break; - case DW_TAG_unspecified_parameters: - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has multiple DW_TAG_unspecified_parameters children", - dwarf_tag_str(die, - tag_buf)); - goto err; - } - is_variadic = true; - break; - case DW_TAG_template_type_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_type_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - case DW_TAG_template_value_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_value_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - default: - break; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - - struct drgn_qualified_type return_type; - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, true, - NULL, &return_type); - if (err) - goto err; - - err = drgn_function_type_create(&builder, return_type, is_variadic, - lang, ret); - if (err) - goto err; - return NULL; - -err: - drgn_function_type_builder_deinit(&builder); - return err; -} - -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - if (dbinfo->depth >= 1000) { - return drgn_error_create(DRGN_ERROR_RECURSION, - "maximum DWARF type parsing depth exceeded"); - } - - /* If the DIE has a type unit signature, follow it. */ - Dwarf_Die definition_die; - { - Dwarf_Attribute attr_mem, *attr; - if ((attr = dwarf_attr_integrate(die, DW_AT_signature, - &attr_mem))) { - if (!dwarf_formref_die(attr, &definition_die)) - return drgn_error_libdw(); - die = &definition_die; - } - } - - /* If we got a declaration, try to find the definition. */ - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) - return drgn_error_libdw(); - if (declaration) { - uintptr_t die_addr; - if (drgn_dwarf_index_find_definition(&dbinfo->dindex, - (uintptr_t)die->addr, - &module, &die_addr)) { - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, - &bias); - if (!dwarf) - return drgn_error_libdwfl(); - uintptr_t start = - (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - size_t size = - module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; - if (die_addr >= start && die_addr < start + size) { - if (!dwarf_offdie(dwarf, die_addr - start, - &definition_die)) - return drgn_error_libdw(); - } else { - start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; - /* Assume .debug_types */ - if (!dwarf_offdie_types(dwarf, die_addr - start, - &definition_die)) - return drgn_error_libdw(); - } - die = &definition_die; - } - } - - struct drgn_dwarf_type_map_entry entry = { - .key = die->addr, - }; - struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); - struct drgn_dwarf_type_map_iterator it = - drgn_dwarf_type_map_search_hashed(&dbinfo->types, &entry.key, - hp); - if (it.entry) { - if (!can_be_incomplete_array && - it.entry->value.is_incomplete_array) { - it = drgn_dwarf_type_map_search_hashed(&dbinfo->cant_be_incomplete_array_types, - &entry.key, hp); - } - if (it.entry) { - ret->type = it.entry->value.type; - ret->qualifiers = it.entry->value.qualifiers; - return NULL; - } - } - - const struct drgn_language *lang; - struct drgn_error *err = drgn_language_from_die(die, true, &lang); - if (err) - return err; - - ret->qualifiers = 0; - dbinfo->depth++; - entry.value.is_incomplete_array = false; - switch (dwarf_tag(die)) { - case DW_TAG_const_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_CONST; - break; - case DW_TAG_restrict_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; - break; - case DW_TAG_volatile_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; - break; - case DW_TAG_atomic_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; - break; - case DW_TAG_base_type: - err = drgn_base_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_structure_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_STRUCT, - &ret->type); - break; - case DW_TAG_union_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_UNION, - &ret->type); - break; - case DW_TAG_class_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_CLASS, - &ret->type); - break; - case DW_TAG_enumeration_type: - err = drgn_enum_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_typedef: - err = drgn_typedef_type_from_dwarf(dbinfo, module, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_pointer_type: - err = drgn_pointer_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_array_type: - err = drgn_array_type_from_dwarf(dbinfo, module, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_subroutine_type: - case DW_TAG_subprogram: - err = drgn_function_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - default: - err = drgn_error_format(DRGN_ERROR_OTHER, - "unknown DWARF type tag 0x%x", - dwarf_tag(die)); - break; - } - dbinfo->depth--; - if (err) - return err; - - entry.value.type = ret->type; - entry.value.qualifiers = ret->qualifiers; - struct drgn_dwarf_type_map *map; - if (!can_be_incomplete_array && entry.value.is_incomplete_array) - map = &dbinfo->cant_be_incomplete_array_types; - else - map = &dbinfo->types; - if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { - /* - * This will "leak" the type we created, but it'll still be - * cleaned up when the program is freed. - */ - return &drgn_enomem; - } - if (is_incomplete_array_ret) - *is_incomplete_array_ret = entry.value.is_incomplete_array; - return NULL; -} - -struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - struct drgn_debug_info *dbinfo = arg; - - uint64_t tag; - switch (kind) { - case DRGN_TYPE_INT: - case DRGN_TYPE_BOOL: - case DRGN_TYPE_FLOAT: - tag = DW_TAG_base_type; - break; - case DRGN_TYPE_STRUCT: - tag = DW_TAG_structure_type; - break; - case DRGN_TYPE_UNION: - tag = DW_TAG_union_type; - break; - case DRGN_TYPE_CLASS: - tag = DW_TAG_class_type; - break; - case DRGN_TYPE_ENUM: - tag = DW_TAG_enumeration_type; - break; - case DRGN_TYPE_TYPEDEF: - tag = DW_TAG_typedef; - break; - default: - UNREACHABLE(); - } - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, - name_len, &tag, 1); - if (err) - return err; - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - if (die_matches_filename(&die, filename)) { - err = drgn_type_from_dwarf(dbinfo, index_die->module, - &die, ret); - if (err) - return err; - /* - * For DW_TAG_base_type, we need to check that the type - * we found was the right kind. - */ - if (drgn_type_kind(ret->type) == kind) - return NULL; - } - } - return &drgn_not_found; -} - -struct drgn_error * -drgn_debug_info_find_object(const char *name, size_t name_len, - const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_debug_info *dbinfo = arg; - - struct drgn_dwarf_index_namespace *ns = &dbinfo->dindex.global; - if (name_len >= 2 && memcmp(name, "::", 2) == 0) { - /* Explicit global namespace. */ - name_len -= 2; - name += 2; - } - const char *colons; - while ((colons = memmem(name, name_len, "::", 2))) { - struct drgn_dwarf_index_iterator it; - uint64_t ns_tag = DW_TAG_namespace; - err = drgn_dwarf_index_iterator_init(&it, ns, name, - colons - name, &ns_tag, 1); - if (err) - return err; - struct drgn_dwarf_index_die *index_die = - drgn_dwarf_index_iterator_next(&it); - if (!index_die) - return &drgn_not_found; - ns = index_die->namespace; - name_len -= colons + 2 - name; - name = colons + 2; - } - - uint64_t tags[3]; - size_t num_tags = 0; - if (flags & DRGN_FIND_OBJECT_CONSTANT) - tags[num_tags++] = DW_TAG_enumerator; - if (flags & DRGN_FIND_OBJECT_FUNCTION) - tags[num_tags++] = DW_TAG_subprogram; - if (flags & DRGN_FIND_OBJECT_VARIABLE) - tags[num_tags++] = DW_TAG_variable; - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, ns, name, name_len, tags, - num_tags); - if (err) - return err; - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - if (!die_matches_filename(&die, filename)) - continue; - if (dwarf_tag(&die) == DW_TAG_enumeration_type) { - return drgn_object_from_dwarf_enumerator(dbinfo, - index_die->module, - &die, name, - ret); - } else { - return drgn_object_from_dwarf(dbinfo, index_die->module, - &die, NULL, NULL, NULL, - ret); - } - } - return &drgn_not_found; -} - -struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, - struct drgn_debug_info **ret) -{ - struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); - if (!dbinfo) - return &drgn_enomem; - dbinfo->prog = prog; - const Dwfl_Callbacks *dwfl_callbacks; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - dwfl_callbacks = &drgn_dwfl_callbacks; - else if (prog->flags & DRGN_PROGRAM_IS_LIVE) - dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; - else - dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - dbinfo->dwfl = dwfl_begin(dwfl_callbacks); - if (!dbinfo->dwfl) { - free(dbinfo); - return drgn_error_libdwfl(); - } - drgn_debug_info_module_table_init(&dbinfo->modules); - c_string_set_init(&dbinfo->module_names); - drgn_dwarf_index_init(&dbinfo->dindex); - drgn_dwarf_type_map_init(&dbinfo->types); - drgn_dwarf_type_map_init(&dbinfo->cant_be_incomplete_array_types); - dbinfo->depth = 0; - *ret = dbinfo; - return NULL; -} - -void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) -{ - if (!dbinfo) - return; - drgn_dwarf_type_map_deinit(&dbinfo->cant_be_incomplete_array_types); - drgn_dwarf_type_map_deinit(&dbinfo->types); - drgn_dwarf_index_deinit(&dbinfo->dindex); - c_string_set_deinit(&dbinfo->module_names); - drgn_debug_info_free_modules(dbinfo, false, true); - assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); - drgn_debug_info_module_table_deinit(&dbinfo->modules); - dwfl_end(dbinfo->dwfl); - free(dbinfo); -} - -static struct drgn_error * -drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, - uint8_t address_size, uint8_t encoding, - uint64_t func_addr, uint64_t *ret) -{ - struct drgn_error *err; - - /* Not currently used for CFI. */ - if (encoding & DW_EH_PE_indirect) { -unknown_fde_encoding: - return binary_buffer_error(&buffer->bb, - "unknown EH encoding %#" PRIx8, - encoding); - } - - size_t pos = (buffer->bb.pos - - (char *)buffer->module->scn_data[buffer->scn]->d_buf); - uint64_t base; - switch (encoding & 0x70) { - case DW_EH_PE_absptr: - base = 0; - break; - case DW_EH_PE_pcrel: - base = buffer->module->pcrel_base + pos; - break; - case DW_EH_PE_textrel: - base = buffer->module->textrel_base; - break; - case DW_EH_PE_datarel: - base = buffer->module->datarel_base; - break; - case DW_EH_PE_funcrel: - /* Relative to the FDE's initial location. */ - base = func_addr; - break; - case DW_EH_PE_aligned: - base = 0; - if (pos % address_size != 0 && - (err = binary_buffer_skip(&buffer->bb, - address_size - pos % address_size))) - return err; - break; - default: - goto unknown_fde_encoding; - } - - uint64_t offset; - switch (encoding & 0xf) { - case DW_EH_PE_absptr: - if ((err = binary_buffer_next_uint(&buffer->bb, address_size, - &offset))) - return err; - break; - case DW_EH_PE_uleb128: - if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) - return err; - break; - case DW_EH_PE_udata2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_udata4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_udata8: - if ((err = binary_buffer_next_u64(&buffer->bb, &offset))) - return err; - break; - case DW_EH_PE_sleb128: - if ((err = binary_buffer_next_sleb128_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata2: - if ((err = binary_buffer_next_s16_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata4: - if ((err = binary_buffer_next_s32_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata8: - if ((err = binary_buffer_next_s64_into_u64(&buffer->bb, - &offset))) - return err; - break; - default: - goto unknown_fde_encoding; - } - *ret = (base + offset) & uint_max(address_size); - - return NULL; -} - -static struct drgn_error * -drgn_parse_dwarf_cie(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, size_t cie_pointer, - struct drgn_dwarf_cie *cie) -{ - bool is_eh = scn == DRGN_SCN_EH_FRAME; - struct drgn_error *err; - - cie->is_eh = is_eh; - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - buffer.bb.pos += cie_pointer; - - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) - return err; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - uint64_t length; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &length))) - return err; - } else { - length = tmp; - } - if (length > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "entry length is out of bounds"); - } - buffer.bb.end = buffer.bb.pos + length; - - uint64_t cie_id, expected_cie_id; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &cie_id))) - return err; - expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &cie_id))) - return err; - expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffff); - } - if (cie_id != expected_cie_id) - return binary_buffer_error(&buffer.bb, "invalid CIE ID"); - - uint8_t version; - if ((err = binary_buffer_next_u8(&buffer.bb, &version))) - return err; - if (version < 1 || version == 2 || version > 4) { - return binary_buffer_error(&buffer.bb, - "unknown CIE version %" PRIu8, - version); - } - - const char *augmentation; - size_t augmentation_len; - if ((err = binary_buffer_next_string(&buffer.bb, &augmentation, - &augmentation_len))) - return err; - cie->have_augmentation_length = augmentation[0] == 'z'; - cie->signal_frame = false; - for (size_t i = 0; i < augmentation_len; i++) { - switch (augmentation[i]) { - case 'z': - if (i != 0) - goto unknown_augmentation; - break; - case 'L': - case 'P': - case 'R': - if (augmentation[0] != 'z') - goto unknown_augmentation; - break; - case 'S': - cie->signal_frame = true; - break; - default: -unknown_augmentation: - /* - * We could ignore this CIE and all FDEs that reference - * it or skip the augmentation if we have its length, - * but let's fail loudly so that we find out about - * missing support. - */ - return binary_buffer_error_at(&buffer.bb, - &augmentation[i], - "unknown CFI augmentation %s", - augmentation); - } - } - - if (version >= 4) { - if ((err = binary_buffer_next_u8(&buffer.bb, - &cie->address_size))) - return err; - if (cie->address_size < 1 || cie->address_size > 8) { - return binary_buffer_error(&buffer.bb, - "unsupported address size %" PRIu8, - cie->address_size); - } - uint8_t segment_selector_size; - if ((err = binary_buffer_next_u8(&buffer.bb, - &segment_selector_size))) - return err; - if (segment_selector_size) { - return binary_buffer_error(&buffer.bb, - "unsupported segment selector size %" PRIu8, - segment_selector_size); - } - } else { - cie->address_size = - drgn_platform_address_size(&module->platform); - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &cie->code_alignment_factor)) || - (err = binary_buffer_next_sleb128(&buffer.bb, - &cie->data_alignment_factor))) - return err; - uint64_t return_address_register; - if (version >= 3) { - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &return_address_register))) - return err; - } else { - if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, - &return_address_register))) - return err; - } - cie->return_address_register = - module->platform.arch->dwarf_regno_to_internal(return_address_register); - if (cie->return_address_register == DRGN_REGISTER_NUMBER_UNKNOWN) { - return binary_buffer_error(&buffer.bb, - "unknown return address register"); - } - cie->address_encoding = DW_EH_PE_absptr; - if (augmentation[0] == 'z') { - for (size_t i = 0; i < augmentation_len; i++) { - switch (augmentation[i]) { - case 'z': - if ((err = binary_buffer_skip_leb128(&buffer.bb))) - return err; - break; - case 'L': - if ((err = binary_buffer_skip(&buffer.bb, 1))) - return err; - break; - case 'P': { - uint8_t encoding; - if ((err = binary_buffer_next_u8(&buffer.bb, &encoding))) - return err; - /* - * We don't need the result, so don't bother - * dereferencing. - */ - encoding &= ~DW_EH_PE_indirect; - uint64_t unused; - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - encoding, - 0, - &unused))) - return err; - break; - } - case 'R': - if ((err = binary_buffer_next_u8(&buffer.bb, - &cie->address_encoding))) - return err; - break; - } - } - } - cie->initial_instructions = buffer.bb.pos; - cie->initial_instructions_size = buffer.bb.end - buffer.bb.pos; - return NULL; -} - -static struct drgn_error * -drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, - struct drgn_dwarf_cie_vector *cies, - struct drgn_dwarf_fde_vector *fdes) -{ - bool is_eh = scn == DRGN_SCN_EH_FRAME; - struct drgn_error *err; - - if (!module->scns[scn]) - return NULL; - err = drgn_debug_info_module_cache_section(module, scn); - if (err) - return err; - Elf_Data *data = module->scn_data[scn]; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - - struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; - while (binary_buffer_has_next(&buffer.bb)) { - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) - goto out; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - uint64_t length; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &length))) - goto out; - } else { - length = tmp; - } - /* - * Technically, a length of zero is only a terminator in - * .eh_frame, but other consumers (binutils, elfutils, GDB) - * handle it the same way in .debug_frame. - */ - if (length == 0) - break; - if (length > buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, - "entry length is out of bounds"); - goto out; - } - buffer.bb.end = buffer.bb.pos + length; - - /* - * The Linux Standard Base Core Specification [1] states that - * the CIE ID in .eh_frame is always 4 bytes. However, other - * consumers handle it the same as in .debug_frame (8 bytes for - * the 64-bit format). - * - * 1: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html - */ - uint64_t cie_pointer, cie_id; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, - &cie_pointer))) - goto out; - cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &cie_pointer))) - goto out; - cie_id = is_eh ? 0 : UINT64_C(0xffffffff); - } - - if (cie_pointer != cie_id) { - if (is_eh) { - size_t pointer_offset = - (buffer.bb.pos - - (is_64_bit ? 8 : 4) - - (char *)data->d_buf); - if (cie_pointer > pointer_offset) { - err = binary_buffer_error(&buffer.bb, - "CIE pointer is out of bounds"); - goto out; - } - cie_pointer = pointer_offset - cie_pointer; - } else if (cie_pointer > data->d_size) { - err = binary_buffer_error(&buffer.bb, - "CIE pointer is out of bounds"); - goto out; - } - struct drgn_dwarf_fde *fde = - drgn_dwarf_fde_vector_append_entry(fdes); - if (!fde) { - err = &drgn_enomem; - goto out; - } - struct drgn_dwarf_cie_map_entry entry = { - .key = cie_pointer, - .value = cies->size, - }; - struct drgn_dwarf_cie_map_iterator it; - int r = drgn_dwarf_cie_map_insert(&cie_map, &entry, - &it); - struct drgn_dwarf_cie *cie; - if (r > 0) { - cie = drgn_dwarf_cie_vector_append_entry(cies); - if (!cie) { - err = &drgn_enomem; - goto out; - } - err = drgn_parse_dwarf_cie(module, scn, - cie_pointer, cie); - if (err) - goto out; - } else if (r == 0) { - cie = &cies->data[it.entry->value]; - } else { - err = &drgn_enomem; - goto out; - } - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding, - 0, - &fde->initial_location)) || - (err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding & 0xf, - 0, - &fde->address_range))) - goto out; - if (cie->have_augmentation_length) { - uint64_t augmentation_length; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &augmentation_length))) - goto out; - if (augmentation_length > - buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, - "augmentation length is out of bounds"); - goto out; - } - buffer.bb.pos += augmentation_length; - } - fde->cie = it.entry->value; - fde->instructions = buffer.bb.pos; - fde->instructions_size = buffer.bb.end - buffer.bb.pos; - } - - buffer.bb.pos = buffer.bb.end; - buffer.bb.end = (const char *)data->d_buf + data->d_size; - } - - err = NULL; -out: - drgn_dwarf_cie_map_deinit(&cie_map); - return err; -} - -static void drgn_debug_info_cache_sh_addr(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, - uint64_t *addr) -{ - if (module->scns[scn]) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(module->scns[scn], &shdr_mem); - if (shdr) - *addr = shdr->sh_addr; - } -} - -static int drgn_dwarf_fde_compar(const void *_a, const void *_b, void *arg) -{ - const struct drgn_dwarf_fde *a = _a; - const struct drgn_dwarf_fde *b = _b; - const struct drgn_dwarf_cie *cies = arg; - if (a->initial_location < b->initial_location) - return -1; - else if (a->initial_location > b->initial_location) - return 1; - else - return cies[a->cie].is_eh - cies[b->cie].is_eh; -} - -static struct drgn_error * -drgn_debug_info_parse_frames(struct drgn_debug_info_module *module) -{ - struct drgn_error *err; - - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_EH_FRAME, - &module->pcrel_base); - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_TEXT, - &module->textrel_base); - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_GOT, - &module->datarel_base); - - struct drgn_dwarf_cie_vector cies = VECTOR_INIT; - struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; - - err = drgn_parse_dwarf_frames(module, DRGN_SCN_DEBUG_FRAME, &cies, - &fdes); - if (err) - goto err; - err = drgn_parse_dwarf_frames(module, DRGN_SCN_EH_FRAME, &cies, &fdes); - if (err) - goto err; - - drgn_dwarf_cie_vector_shrink_to_fit(&cies); - - /* - * Sort FDEs and remove duplicates, preferring .debug_frame over - * .eh_frame. - */ - qsort_r(fdes.data, fdes.size, sizeof(fdes.data[0]), - drgn_dwarf_fde_compar, cies.data); - if (fdes.size > 0) { - size_t src = 1, dst = 1; - for (; src < fdes.size; src++) { - if (fdes.data[src].initial_location != - fdes.data[dst - 1].initial_location) { - if (src != dst) - fdes.data[dst] = fdes.data[src]; - dst++; - } - } - fdes.size = dst; - } - drgn_dwarf_fde_vector_shrink_to_fit(&fdes); - - module->cies = cies.data; - module->fdes = fdes.data; - module->num_fdes = fdes.size; - return NULL; - -err: - drgn_dwarf_fde_vector_deinit(&fdes); - drgn_dwarf_cie_vector_deinit(&cies); - return err; -} - -static struct drgn_error * -drgn_debug_info_find_fde(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, struct drgn_dwarf_fde **ret) -{ - struct drgn_error *err; - - if (!module->parsed_frames) { - err = drgn_debug_info_parse_frames(module); - if (err) - return err; - module->parsed_frames = true; - } - - /* Binary search for the containing FDE. */ - size_t lo = 0, hi = module->num_fdes; - while (lo < hi) { - size_t mid = lo + (hi - lo) / 2; - struct drgn_dwarf_fde *fde = &module->fdes[mid]; - if (unbiased_pc < fde->initial_location) { - hi = mid; - } else if (unbiased_pc - fde->initial_location >= - fde->address_range) { - lo = mid + 1; - } else { - *ret = fde; - return NULL; - } - } - *ret = NULL; - return NULL; -} - -static struct drgn_error * -drgn_dwarf_cfi_next_offset(struct drgn_debug_info_buffer *buffer, int64_t *ret) -{ - struct drgn_error *err; - uint64_t offset; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) - return err; - if (offset > INT64_MAX) - return binary_buffer_error(&buffer->bb, "offset is too large"); - *ret = offset; - return NULL; -} - -static struct drgn_error * -drgn_dwarf_cfi_next_offset_sf(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_cie *cie, int64_t *ret) -{ - struct drgn_error *err; - int64_t factored; - if ((err = binary_buffer_next_sleb128(&buffer->bb, &factored))) - return err; - if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) - return binary_buffer_error(&buffer->bb, "offset is too large"); - return NULL; -} - -static struct drgn_error * -drgn_dwarf_cfi_next_offset_f(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_cie *cie, int64_t *ret) -{ - struct drgn_error *err; - uint64_t factored; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &factored))) - return err; - if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) - return binary_buffer_error(&buffer->bb, "offset is too large"); - return NULL; -} - -static struct drgn_error * -drgn_dwarf_cfi_next_block(struct drgn_debug_info_buffer *buffer, - const char **buf_ret, size_t *size_ret) -{ - struct drgn_error *err; - uint64_t size; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &size))) - return err; - if (size > buffer->bb.end - buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "block is out of bounds"); - } - *buf_ret = buffer->bb.pos; - buffer->bb.pos += size; - *size_ret = size; - return NULL; -} - -static struct drgn_error * -drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, - struct drgn_dwarf_fde *fde, - const struct drgn_cfi_row *initial_row, uint64_t target, - const char *instructions, size_t instructions_size, - struct drgn_cfi_row **row) -{ - struct drgn_error *err; - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - struct drgn_dwarf_cie *cie = &module->cies[fde->cie]; - uint64_t pc = fde->initial_location; - - struct drgn_cfi_row_vector state_stack = VECTOR_INIT; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, - cie->is_eh ? - DRGN_SCN_EH_FRAME : DRGN_SCN_DEBUG_FRAME); - buffer.bb.pos = instructions; - buffer.bb.end = instructions + instructions_size; - while (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) - goto out; - - uint64_t dwarf_regno; - drgn_register_number regno; - struct drgn_cfi_rule rule; - uint64_t tmp; - switch ((opcode & 0xc0) ? (opcode & 0xc0) : opcode) { - case DW_CFA_set_loc: - if (!initial_row) - goto invalid_for_initial; - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding, - fde->initial_location, - &tmp))) - goto out; - if (tmp <= pc) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_set_loc location is not greater than current location"); - goto out; - } - pc = tmp; - if (pc > target) - goto found; - break; - case DW_CFA_advance_loc: - if (!initial_row) - goto invalid_for_initial; - tmp = opcode & 0x3f; - goto advance_loc; - case DW_CFA_advance_loc1: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, - &tmp))) - goto out; - goto advance_loc; - case DW_CFA_advance_loc2: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u16_into_u64(&buffer.bb, - &tmp))) - goto out; - goto advance_loc; - case DW_CFA_advance_loc4: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &tmp))) - goto out; -advance_loc: - if (__builtin_mul_overflow(tmp, - cie->code_alignment_factor, - &tmp) || - __builtin_add_overflow(pc, tmp, &pc) || - pc > uint_max(cie->address_size)) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_CFA_advance_loc* overflows location"); - goto out; - } - if (pc > target) - goto found; - break; - case DW_CFA_def_cfa: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset(&buffer, &rule.offset))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_sf: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_register: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_register with incompatible CFA rule"); - goto out; - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_offset: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_offset with incompatible CFA rule"); - goto out; - } - if ((err = drgn_dwarf_cfi_next_offset(&buffer, - &rule.offset))) - goto out; - goto set_cfa; - case DW_CFA_def_cfa_offset_sf: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_offset_sf with incompatible CFA rule"); - goto out; - } - if ((err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - goto set_cfa; - case DW_CFA_def_cfa_expression: - rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; - rule.push_cfa = false; - if ((err = drgn_dwarf_cfi_next_block(&buffer, - &rule.expr, - &rule.expr_size))) - goto out; -set_cfa: - if (!drgn_cfi_row_set_cfa(row, &rule)) { - err = &drgn_enomem; - goto out; - } - break; - case DW_CFA_undefined: - rule.kind = DRGN_CFI_RULE_UNDEFINED; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_same_value: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.offset = 0; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - rule.regno = regno; - goto set_reg; - case DW_CFA_offset: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - if ((err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(opcode & 0x3f)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_offset_extended: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - goto reg_offset_f; - case DW_CFA_offset_extended_sf: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - goto reg_offset_sf; - case DW_CFA_val_offset: - rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; -reg_offset_f: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_val_offset_sf: - rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; -reg_offset_sf: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_register: { - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.offset = 0; - uint64_t dwarf_regno2; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno2))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno2)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_reg; - } - case DW_CFA_expression: - rule.kind = DRGN_CFI_RULE_AT_DWARF_EXPRESSION; - goto reg_expression; - case DW_CFA_val_expression: - rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; -reg_expression: - rule.push_cfa = true; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_block(&buffer, - &rule.expr, - &rule.expr_size))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_restore: - if (!initial_row) - goto invalid_for_initial; - dwarf_regno = opcode & 0x3f; - goto restore; - case DW_CFA_restore_extended: - if (!initial_row) { -invalid_for_initial: - err = binary_buffer_error(&buffer.bb, - "invalid initial DWARF CFI opcode %#" PRIx8, - opcode); - goto out; - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; -restore: - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - drgn_cfi_row_get_register(initial_row, regno, &rule); -set_reg: - if (!drgn_cfi_row_set_register(row, regno, &rule)) { - err = &drgn_enomem; - goto out; - } - break; - case DW_CFA_remember_state: { - struct drgn_cfi_row **state = - drgn_cfi_row_vector_append_entry(&state_stack); - if (!state) { - err = &drgn_enomem; - goto out; - } - *state = drgn_empty_cfi_row; - if (!drgn_cfi_row_copy(state, *row)) { - err = &drgn_enomem; - goto out; - } - break; - } - case DW_CFA_restore_state: - if (state_stack.size == 0) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_restore_state with empty state stack"); - goto out; - } - drgn_cfi_row_destroy(*row); - *row = state_stack.data[--state_stack.size]; - break; - case DW_CFA_nop: - break; - default: - err = binary_buffer_error(&buffer.bb, - "unknown DWARF CFI opcode %#" PRIx8, - opcode); - goto out; - } - } -found: - err = NULL; -out: - for (size_t i = 0; i < state_stack.size; i++) - drgn_cfi_row_destroy(state_stack.data[i]); - drgn_cfi_row_vector_deinit(&state_stack); - return err; -} - -static struct drgn_error * -drgn_debug_info_find_cfi_in_fde(struct drgn_debug_info_module *module, - struct drgn_dwarf_fde *fde, - uint64_t unbiased_pc, struct drgn_cfi_row **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_cie *cie = &module->cies[fde->cie]; - struct drgn_cfi_row *initial_row = - (struct drgn_cfi_row *)module->platform.arch->default_dwarf_cfi_row; - err = drgn_eval_dwarf_cfi(module, fde, NULL, unbiased_pc, - cie->initial_instructions, - cie->initial_instructions_size, &initial_row); - if (err) - goto out; - if (!drgn_cfi_row_copy(ret, initial_row)) { - err = &drgn_enomem; - goto out; - } - err = drgn_eval_dwarf_cfi(module, fde, initial_row, unbiased_pc, - fde->instructions, fde->instructions_size, - ret); -out: - drgn_cfi_row_destroy(initial_row); - return err; -} - -static struct drgn_error * -drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, - struct drgn_cfi_row **row_ret, - bool *interrupted_ret, - drgn_register_number *ret_addr_regno_ret) -{ - struct drgn_error *err; - struct drgn_dwarf_fde *fde; - err = drgn_debug_info_find_fde(module, unbiased_pc, &fde); - if (err) - return err; - if (!fde) - return &drgn_not_found; - err = drgn_debug_info_find_cfi_in_fde(module, fde, unbiased_pc, - row_ret); - if (err) - return err; - *interrupted_ret = module->cies[fde->cie].signal_frame; - *ret_addr_regno_ret = module->cies[fde->cie].return_address_register; - return NULL; -} - -struct drgn_error * -drgn_debug_info_module_find_cfi(struct drgn_program *prog, - struct drgn_debug_info_module *module, - uint64_t pc, struct drgn_cfi_row **row_ret, - bool *interrupted_ret, - drgn_register_number *ret_addr_regno_ret) -{ - struct drgn_error *err; - - Dwarf_Addr bias; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, - NULL, NULL); - uint64_t unbiased_pc = pc - bias; - - if (prog->prefer_orc_unwinder) { - err = drgn_debug_info_find_orc_cfi(module, unbiased_pc, row_ret, - interrupted_ret, - ret_addr_regno_ret); - if (err != &drgn_not_found) - return err; - return drgn_debug_info_find_dwarf_cfi(module, unbiased_pc, - row_ret, interrupted_ret, - ret_addr_regno_ret); - } else { - err = drgn_debug_info_find_dwarf_cfi(module, unbiased_pc, - row_ret, interrupted_ret, - ret_addr_regno_ret); - if (err != &drgn_not_found) - return err; - return drgn_debug_info_find_orc_cfi(module, unbiased_pc, - row_ret, interrupted_ret, - ret_addr_regno_ret); - } -} - -struct drgn_error * -drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, - const struct drgn_cfi_rule *rule, - const struct drgn_register_state *regs, - void *buf, size_t size) -{ - struct drgn_error *err; - struct uint64_vector stack = VECTOR_INIT; - - if (rule->push_cfa) { - struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); - if (!cfa.has_value) { - err = &drgn_not_found; - goto out; - } - if (!uint64_vector_append(&stack, &cfa.value)) { - err = &drgn_enomem; - goto out; - } - } - - int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_context ctx; - drgn_dwarf_expression_context_init(&ctx, prog, regs->module, NULL, NULL, - regs, rule->expr, rule->expr_size); - err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); - if (err) - goto out; - if (binary_buffer_has_next(&ctx.bb)) { - uint8_t opcode; - err = binary_buffer_next_u8(&ctx.bb, &opcode); - if (!err) { - err = binary_buffer_error(&ctx.bb, - "invalid opcode %#" PRIx8 " for CFI expression", - opcode); - } - goto out; - } - if (stack.size == 0) { - err = &drgn_not_found; - } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { - err = drgn_program_read_memory(prog, buf, - stack.data[stack.size - 1], size, - false); - } else { - copy_lsbytes(buf, size, - drgn_platform_is_little_endian(&prog->platform), - &stack.data[stack.size - 1], sizeof(uint64_t), - HOST_LITTLE_ENDIAN); - err = NULL; - } - -out: - uint64_vector_deinit(&stack); - return err; + return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); } +#endif struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) { diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index c3fca253e..dd47d646b 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,22 +12,19 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H -#include #include #include #include "binary_buffer.h" #include "cfi.h" #include "drgn.h" -#include "dwarf_index.h" +#include "dwarf_info.h" #include "hash_table.h" #include "orc_info.h" #include "platform.h" #include "string_builder.h" #include "vector.h" -struct drgn_register_state; - /** * @ingroup Internals * @@ -81,15 +78,6 @@ enum drgn_debug_info_scn { DRGN_NUM_DEBUG_SCNS, }; -struct drgn_dwarf_fde { - uint64_t initial_location; - uint64_t address_range; - /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ - size_t cie; - const char *instructions; - size_t instructions_size; -}; - /** * A module reported to a @ref drgn_debug_info. * @@ -114,21 +102,8 @@ struct drgn_debug_info_module { Elf_Scn *scns[DRGN_NUM_DEBUG_SCNS]; Elf_Data *scn_data[DRGN_NUM_DEBUG_SCN_DATA]; - /** Base for `DW_EH_PE_pcrel`. */ - uint64_t pcrel_base; - /** Base for `DW_EH_PE_textrel`. */ - uint64_t textrel_base; - /** Base for `DW_EH_PE_datarel`. */ - uint64_t datarel_base; - /** Array of DWARF Common Information Entries. */ - struct drgn_dwarf_cie *cies; - /** - * Array of DWARF Frame Description Entries sorted by initial_location. - */ - struct drgn_dwarf_fde *fdes; - /** Number of elements in @ref drgn_debug_info_module::fdes. */ - size_t num_fdes; - + /** DWARF debugging information. */ + struct drgn_dwarf_module_info dwarf; /** ORC unwinder information. */ struct drgn_orc_module_info orc; @@ -191,63 +166,11 @@ drgn_debug_info_buffer_init(struct drgn_debug_info_buffer *buffer, buffer->scn = scn; } -/** - * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing - * a given program counter. - * - * @param[in] module Module containing @p pc. - * @param[in] pc Program counter. - * @param[out] bias_ret Returned difference between addresses in the loaded - * module and addresses in the returned DIEs. - * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the - * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent - * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its - * grandparent, etc. Must be freed with @c free(). - * @param[out] length_ret Returned length of @p dies_ret. - */ -struct drgn_error * -drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, - uint64_t pc, uint64_t *bias_ret, - Dwarf_Die **dies_ret, - size_t *length_ret) - __attribute__((__nonnull__(1, 3, 4, 5))); - -/** - * Find the ancestors of a DWARF DIE. - * - * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. - * - * @param[in] module Module containing @p die. - * @param[in] die DIE to find. - * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, - * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` - * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. - * @param[out] length_ret Returned number of ancestors in @p dies_ret. - */ -struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, - size_t *length_ret) - __attribute__((__nonnull__(2, 3))); - DEFINE_HASH_TABLE_TYPE(drgn_debug_info_module_table, struct drgn_debug_info_module *) DEFINE_HASH_SET_TYPE(c_string_set, const char *) -/** Cached type in a @ref drgn_debug_info. */ -struct drgn_dwarf_type { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - /** - * Whether this is an incomplete array type or a typedef of one. - * - * This is used to work around a GCC bug; see @ref - * drgn_type_from_dwarf_internal(). - */ - bool is_incomplete_array; -}; - -DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type); - /** Cache of debugging information. */ struct drgn_debug_info { /** Program owning this cache. */ @@ -264,25 +187,8 @@ struct drgn_debug_info { * they should not be freed. */ struct c_string_set module_names; - /** Index of DWARF debugging information. */ - struct drgn_dwarf_index dindex; - - /** - * Cache of parsed types. - * - * The key is the address of the DIE (@c Dwarf_Die::addr). The value is - * a @ref drgn_dwarf_type. - */ - struct drgn_dwarf_type_map types; - /** - * Cache of parsed types which appear to be incomplete array types but - * can't be. - * - * See @ref drgn_type_from_dwarf_internal(). - */ - struct drgn_dwarf_type_map cant_be_incomplete_array_types; - /** Current parsing recursion depth. */ - int depth; + /** DWARF debugging information. */ + struct drgn_dwarf_info dwarf; }; /** Create a @ref drgn_debug_info. */ @@ -376,6 +282,14 @@ struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, const char *name); +/** + * Get the language of the program's `main` function or `NULL` if it could not + * be found. + */ +struct drgn_error * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, + const struct drgn_language **ret); + /** @ref drgn_type_find_fn() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, const char *name, size_t name_len, @@ -389,44 +303,6 @@ drgn_debug_info_find_object(const char *name, size_t name_len, enum drgn_find_object_flags flags, void *arg, struct drgn_object *ret); -/** - * Find an object DIE in an array of DWARF scopes. - * - * @param[in] scopes Array of scopes, from outermost to innermost. - * @param[in] num_scopes Number of scopes in @p scopes. - * @param[out] die_ret Returned object DIE. - * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. - * Otherwise, undefined. - */ -struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, - size_t num_scopes, - const char *name, - Dwarf_Die *die_ret, - Dwarf_Die *type_ret); - -/** - * Create a @ref drgn_object from a `Dwarf_Die`. - * - * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, - * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, - * `DW_TAG_template_value_parameter`). - * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` - * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be - * its parent. - * @param[in] function_die DIE of current function. @c NULL if not in function - * context. - * @param[in] regs Registers of current stack frame. @c NULL if not in stack - * frame context. - * @param[out] ret Returned object. - */ -struct drgn_error * -drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, Dwarf_Die *type_die, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret); - /** * Get the Call Frame Information in a @ref drgn_debug_info_module at a given * program counter. @@ -446,12 +322,6 @@ drgn_debug_info_module_find_cfi(struct drgn_program *prog, bool *interrupted_ret, drgn_register_number *ret_addr_regno_ret); -struct drgn_error * -drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, - const struct drgn_cfi_rule *rule, - const struct drgn_register_state *regs, - void *buf, size_t size); - struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c new file mode 100644 index 000000000..fabae59d2 --- /dev/null +++ b/libdrgn/dwarf_info.c @@ -0,0 +1,4869 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "array.h" +#include "debug_info.h" // IWYU pragma: associated +#include "dwarf_index.h" +#include "error.h" +#include "language.h" +#include "lazy_object.h" +#include "minmax.h" +#include "object.h" +#include "path.h" +#include "program.h" +#include "register_state.h" +#include "serialize.h" +#include "type.h" +#include "util.h" + +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) + +void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module) +{ + free(module->dwarf.fdes); + free(module->dwarf.cies); +} + +void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) +{ + drgn_dwarf_index_init(&dbinfo->dwarf.index); + drgn_dwarf_type_map_init(&dbinfo->dwarf.types); + drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); + dbinfo->dwarf.depth = 0; +} + +void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo) +{ + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.cant_be_incomplete_array_types); + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.types); + drgn_dwarf_index_deinit(&dbinfo->dwarf.index); +} + +/* + * Diagnostics. + */ + +#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" +#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) + +/** + * Get the name of a DWARF tag. + * + * @return Static string if the tag is known or @p buf if the tag is unknown + * (populated with a description). + */ +static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) +{ + switch (tag) { +#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; + DWARF_ALL_KNOWN_DW_TAG +#undef DWARF_ONE_KNOWN_DW_TAG + default: + sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); + return buf; + } +} + +/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ +static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) +{ + return dw_tag_str(dwarf_tag(die), buf); +} + +static struct drgn_error * +drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, + const char *message) +{ + uintptr_t p = (uintptr_t)ptr; + int end_match = -1; + for (int i = 0; i < array_size(module->scn_data); i++) { + if (!module->scn_data[i]) + continue; + uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; + uintptr_t end = start + module->scn_data[i]->d_size; + if (start <= p) { + if (p < end) { + return drgn_error_debug_info_scn(module, i, ptr, + message); + } else if (p == end) { + end_match = i; + } + } + } + if (end_match != -1) { + /* + * The pointer doesn't lie within a section, but it does point + * to the end of a section. + */ + return drgn_error_debug_info_scn(module, end_match, ptr, + message); + } + /* We couldn't find the section containing the pointer. */ + const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, + NULL, NULL, NULL, NULL, NULL); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); +} + +static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +{ + if (address_size < 1 || address_size > 8) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + address_size); + } + return NULL; +} + +/* + * Language support. + */ + +/** + * Return the @ref drgn_language of the CU of the given DIE. + * + * @param[in] fall_back Whether to fall back if the language is not found or + * unknown. If @c true, @ref drgn_default_language is returned in this case. If + * @c false, @c NULL is returned. + * @param[out] ret Returned language. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, + const struct drgn_language **ret) +{ + Dwarf_Die cudie; + if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) + return drgn_error_libdw(); + switch (dwarf_srclang(&cudie)) { + case DW_LANG_C: + case DW_LANG_C89: + case DW_LANG_C99: + case DW_LANG_C11: + *ret = &drgn_language_c; + break; + case DW_LANG_C_plus_plus: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_C_plus_plus_14: + *ret = &drgn_language_cpp; + break; + default: + *ret = fall_back ? &drgn_default_language : NULL; + break; + } + return NULL; +} + +struct drgn_error * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, + const struct drgn_language **ret) +{ + struct drgn_error *err; + struct drgn_dwarf_index_iterator it; + const uint64_t tag = DW_TAG_subprogram; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, + "main", strlen("main"), &tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) { + drgn_error_destroy(err); + continue; + } + + err = drgn_language_from_die(&die, false, ret); + if (err) { + drgn_error_destroy(err); + continue; + } + if (*ret) + return NULL; + } + *ret = NULL; + return NULL; +} + +/* + * DIE iteration. + */ + +DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) + +/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ +struct drgn_dwarf_die_iterator { + /** Stack of current DIE and its ancestors. */ + struct dwarf_die_vector dies; + Dwarf *dwarf; + /** End of current CU (for bounds checking). */ + const char *cu_end; + /** Offset of next CU. */ + Dwarf_Off next_cu_off; + /** Whether current CU is from .debug_types. */ + bool debug_types; +}; + +static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, + Dwarf *dwarf) +{ + dwarf_die_vector_init(&it->dies); + it->dwarf = dwarf; + it->next_cu_off = 0; + it->debug_types = false; +} + +static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) +{ + dwarf_die_vector_deinit(&it->dies); +} + +/** + * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. + * + * The first call returns the top-level DIE for the first unit in the module. + * Subsequent calls return children, siblings, and unit DIEs. + * + * This includes the .debug_types section. + * + * @param[in,out] it Iterator containing the returned DIE and its ancestors. The + * last entry in `it->dies` is the DIE itself, the entry before that is its + * parent, the entry before that is its grandparent, etc. + * @param[in] children If @c true and the last returned DIE has children, return + * its first child (this is a pre-order traversal). Otherwise, return the next + * DIE at the level less than or equal to the last returned DIE, i.e., the last + * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit + * DIE. + * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, + * stop after returning all DIEs in the subtree rooted at the DIE that was + * returned in the last call as entry `subtree - 1` in `it->dies`. + * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which + * case the size of `it->dies` equals @p subtree and `it->dies` refers to the + * root of the iterated subtree, non-@c NULL on error, in which case this should + * not be called again. + */ +static struct drgn_error * +drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, + size_t subtree) +{ +#define TOP() (&it->dies.data[it->dies.size - 1]) + int r; + Dwarf_Die die; + assert(subtree <= it->dies.size); + if (it->dies.size == 0) { + /* This is the first call. Get the first unit DIE. */ + if (!dwarf_die_vector_append_entry(&it->dies)) + return &drgn_enomem; + } else { + if (children) { + r = dwarf_child(TOP(), &die); + if (r == 0) { + /* The previous DIE has a child. Return it. */ + if (!dwarf_die_vector_append(&it->dies, &die)) + return &drgn_enomem; + return NULL; + } else if (r < 0) { + return drgn_error_libdw(); + } + /* The previous DIE has no children. */ + } + + if (it->dies.size == subtree) { + /* + * The previous DIE is the root of the subtree. We're + * done. + */ + return &drgn_stop; + } + + if (it->dies.size > 1) { + r = dwarf_siblingof(TOP(), &die); + if (r == 0) { + /* The previous DIE has a sibling. Return it. */ + *TOP() = die; + return NULL; + } else if (r > 0) { + if (!die.addr) + goto next_unit; + /* + * The previous DIE is the last child of its + * parent. + */ + char *addr = die.addr; + do { + /* + * addr points to the null terminator + * for the list of siblings. Go back up + * to its parent. The next byte is + * either the parent's sibling or + * another null terminator. + */ + it->dies.size--; + addr++; + if (it->dies.size == subtree) { + /* + * We're back to the root of the + * subtree. We're done. + */ + return &drgn_stop; + } + if (it->dies.size == 1 || + addr >= it->cu_end) + goto next_unit; + } while (*addr == '\0'); + /* + * addr now points to the next DIE. Return it. + */ + *TOP() = (Dwarf_Die){ + .cu = it->dies.data[0].cu, + .addr = addr, + }; + return NULL; + } else { + return drgn_error_libdw(); + } + } + } + +next_unit:; + /* There are no more DIEs in the current unit. */ + Dwarf_Off cu_off = it->next_cu_off; + size_t cu_header_size; + uint64_t type_signature; + r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, + &cu_header_size, NULL, NULL, NULL, NULL, + it->debug_types ? &type_signature : NULL, NULL); + if (r == 0) { + /* Got the next unit. Return the unit DIE. */ + if (it->debug_types) { + r = !dwarf_offdie_types(it->dwarf, + cu_off + cu_header_size, TOP()); + } else { + r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, + TOP()); + } + if (r) + return drgn_error_libdw(); + it->cu_end = ((const char *)TOP()->addr + - dwarf_dieoffset(TOP()) + + it->next_cu_off); + return NULL; + } else if (r > 0) { + if (!it->debug_types) { + it->next_cu_off = 0; + it->debug_types = true; + goto next_unit; + } + /* There are no more units. */ + return &drgn_stop; + } else { + return drgn_error_libdw(); + } +#undef TOP +} + +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdw(); + *bias_ret = bias; + pc -= bias; + + /* First, try to get the CU containing the PC. */ + Dwarf_Aranges *aranges; + size_t naranges; + if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + bool children; + size_t subtree; + Dwarf_Off offset; + if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, NULL, + &offset) >= 0) { + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + if (!dwarf_offdie(dwarf, offset, cu_die)) { + err = drgn_error_libdw(); + goto err; + } + if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, + NULL, NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = ((const char *)cu_die->addr + - dwarf_dieoffset(cu_die) + + it.next_cu_off); + children = true; + subtree = 1; + } else { + /* + * Range was not found. .debug_aranges could be missing or + * incomplete, so fall back to checking each CU. + */ + drgn_dwarf_die_iterator_init(&it, dwarf); + children = false; + subtree = 0; + } + + /* Now find DIEs containing the PC. */ + while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree))) { + int r = dwarf_haspc(&it.dies.data[it.dies.size - 1], pc); + if (r > 0) { + children = true; + subtree = it.dies.size; + } else if (r < 0) { + err = drgn_error_libdw(); + goto err; + } + } + if (err != &drgn_stop) + goto err; + + *dies_ret = it.dies.data; + *length_ret = it.dies.size; + return NULL; + +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); + if (!dwarf) + return drgn_error_libdw(); + + struct dwarf_die_vector dies = VECTOR_INIT; + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + + Dwarf_Half cu_version; + Dwarf_Off type_offset; + if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, + &type_offset)) { + err = drgn_error_libdw(); + goto err; + } + Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); + bool debug_types = cu_version == 4 && type_offset != 0; + Dwarf_Off next_cu_offset; + uint64_t type_signature; + if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), + &next_cu_offset, NULL, NULL, NULL, NULL, NULL, + debug_types ? &type_signature : NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + const unsigned char *cu_end = + (unsigned char *)cu_die->addr - cu_die_offset + next_cu_offset; + +#define TOP() (&dies.data[dies.size - 1]) + while ((char *)TOP()->addr <= (char *)die->addr) { + if (TOP()->addr == die->addr) { + *dies_ret = dies.data; + *length_ret = dies.size - 1; + return NULL; + } + + Dwarf_Attribute attr; + if (dwarf_attr(TOP(), DW_AT_sibling, &attr)) { + /* The top DIE has a DW_AT_sibling attribute. */ + Dwarf_Die sibling; + if (!dwarf_formref_die(&attr, &sibling)) { + err = drgn_error_libdw(); + goto err; + } + if (sibling.cu != TOP()->cu || + (char *)sibling.addr <= (char *)TOP()->addr) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_sibling"); + goto err; + } + + if ((char *)sibling.addr > (char *)die->addr) { + /* + * The top DIE's sibling is after the target + * DIE. Therefore, the target DIE must be a + * descendant of the top DIE. + */ + Dwarf_Die *child = + dwarf_die_vector_append_entry(&dies); + if (!child) { + err = &drgn_enomem; + goto err; + } + int r = dwarf_child(TOP() - 1, child); + if (r < 0) { + err = drgn_error_libdw(); + goto err; + } else if (r > 0) { + /* + * The top DIE didn't have any children, + * which should be impossible. + */ + goto not_found; + } + } else { + /* + * The top DIE's sibling is before or equal to + * the target DIE. Therefore, the target DIE + * isn't a descendant of the top DIE. Skip to + * the sibling. + */ + *TOP() = sibling; + } + } else { + /* + * The top DIE does not have a DW_AT_sibling attribute. + * Instead, we found the end of the top DIE. + */ + unsigned char *addr = attr.valp; + if (!addr || addr >= cu_end) + goto not_found; + + /* + * If the top DIE has children, then addr is its first + * child. Otherwise, then addr is its sibling. (Unless + * it is a null terminator.) + */ + size_t new_size = dies.size; + if (dwarf_haschildren(TOP()) > 0) + new_size++; + + while (*addr == '\0') { + /* + * addr points to the null terminator for the + * list of siblings. Go back up to its parent. + * The next byte is either the parent's sibling + * or another null terminator. + */ + new_size--; + addr++; + if (new_size <= 1 || addr >= cu_end) + goto not_found; + } + + /* addr now points to the next DIE. Go to it. */ + if (new_size > dies.size) { + if (!dwarf_die_vector_append_entry(&dies)) { + err = &drgn_enomem; + goto err; + } + } else { + dies.size = new_size; + } + *TOP() = (Dwarf_Die){ + .cu = dies.data[0].cu, + .addr = addr, + }; + } + } +#undef TOP + +not_found: + err = drgn_error_create(DRGN_ERROR_OTHER, + "could not find DWARF DIE ancestors"); +err: + dwarf_die_vector_deinit(&dies); + return err; +} + +/* + * Location lists. + */ + +static struct drgn_error * +drgn_dwarf_next_addrx(struct binary_buffer *bb, + struct drgn_debug_info_module *module, Dwarf_Die *cu_die, + uint8_t address_size, const char **addr_base, + uint64_t *ret) +{ + struct drgn_error *err; + + if (!*addr_base) { + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_addr_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without DW_AT_addr_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_ADDR]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without .debug_addr section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_ADDR); + if (err) + return err; + + if (base > module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size || + base == 0) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_addr_base is out of bounds"); + } + + *addr_base = (char *)module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; + uint8_t segment_selector_size = ((uint8_t *)*addr_base)[-1]; + if (segment_selector_size != 0) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported segment selector size %" PRIu8, + segment_selector_size); + } + } + + uint64_t index; + if ((err = binary_buffer_next_uleb128(bb, &index))) + return err; + + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_ADDR]; + if (index >= + ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { + return binary_buffer_error(bb, + "address index is out of bounds"); + } + copy_lsbytes(ret, sizeof(*ret), HOST_LITTLE_ENDIAN, + *addr_base + index * address_size, address_size, + drgn_platform_is_little_endian(&module->platform)); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_read_loclistx(struct drgn_debug_info_module *module, + Dwarf_Die *cu_die, uint8_t offset_size, + Dwarf_Word index, Dwarf_Word *ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_loclists_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without DW_AT_loclists_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; + + if (base > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_loclists_base is out of bounds"); + } + assert(offset_size == 4 || offset_size == 8); + if (index >= (data->d_size - base) / offset_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx is out of bounds"); + } + const char *basep = (char *)data->d_buf + base; + if (offset_size == 8) { + uint64_t offset; + memcpy(&offset, (uint64_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_64(offset); + *ret = base + offset; + } else { + uint32_t offset; + memcpy(&offset, (uint32_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_32(offset); + *ret = base + offset; + } + return NULL; +} + +static struct drgn_error * +drgn_dwarf5_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOCLISTS); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist is out of bounds"); + } + buffer.bb.pos += offset; + + const char *addr_base = NULL; + uint64_t base; + bool base_valid = false; + /* Default is unknown. May be overridden by DW_LLE_default_location. */ + *expr_ret = NULL; + *expr_size_ret = 0; + for (;;) { + uint8_t kind; + if ((err = binary_buffer_next_u8(&buffer.bb, &kind))) + return err; + uint64_t start, length, expr_size; + switch (kind) { + case DW_LLE_end_of_list: + return NULL; + case DW_LLE_base_addressx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &base))) + return err; + base_valid = true; + break; + case DW_LLE_startx_endx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &length))) + return err; + length -= start; +counted_location_description: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (pc >= start && pc - start < length) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + break; + case DW_LLE_startx_length: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + case DW_LLE_offset_pair: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + length -= start; + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + start += base; + goto counted_location_description; + case DW_LLE_default_location: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + buffer.bb.pos += expr_size; + break; + case DW_LLE_base_address: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &base))) + return err; + base_valid = true; + break; + case DW_LLE_start_end: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, + address_size, + &length))) + return err; + length -= start; + goto counted_location_description; + case DW_LLE_start_length: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + default: + return binary_buffer_error(&buffer.bb, + "unknown location list entry kind %#" PRIx8, + kind); + } + } +} + +static struct drgn_error * +drgn_dwarf4_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOC]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr without .debug_loc section"); + } + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_DEBUG_LOC); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOC); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr is out of bounds"); + } + buffer.bb.pos += offset; + + uint64_t address_max = uint_max(address_size); + uint64_t base; + bool base_valid = false; + for (;;) { + uint64_t start, end; + if ((err = binary_buffer_next_uint(&buffer.bb, address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, address_size, + &end))) + return err; + if (start == 0 && end == 0) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } else if (start == address_max) { + base = end; + base_valid = true; + } else { + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + uint16_t expr_size; + if ((err = binary_buffer_next_u16(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (base + start <= pc && pc < base + end) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + } + } +} + +static struct drgn_error * +drgn_dwarf_location(struct drgn_debug_info_module *module, + Dwarf_Attribute *attr, + const struct drgn_register_state *regs, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + switch (attr->form) { + /* DWARF 3 */ + case DW_FORM_data4: + case DW_FORM_data8: + /* DWARF 4-5 */ + case DW_FORM_sec_offset: + /* DWARF 5 */ + case DW_FORM_loclistx: { + Dwarf_Die cu_die; + Dwarf_Half cu_version; + uint8_t address_size; + uint8_t offset_size; + if (!dwarf_cu_die(attr->cu, &cu_die, &cu_version, NULL, + &address_size, &offset_size, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(address_size))) + return err; + + Dwarf_Word offset; + if (dwarf_formudata(attr, &offset)) + return drgn_error_libdw(); + if (attr->form == DW_FORM_loclistx && + ((err = drgn_dwarf_read_loclistx(module, &cu_die, + offset_size, offset, + &offset)))) + return err; + + struct optional_uint64 pc; + if (!regs || + !(pc = drgn_register_state_get_pc(regs)).has_value) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, + NULL, NULL, NULL); + pc.value = pc.value - !regs->interrupted - bias; + + if (cu_version >= 5) { + return drgn_dwarf5_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); + } else { + return drgn_dwarf4_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); + } + } + default: { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + *expr_ret = (char *)block.data; + *expr_size_ret = block.length; + return NULL; + } + } +} + +/* + * DWARF expressions. + */ + +/** + * Arbitrary limit for number of operations to execute in a DWARF expression to + * avoid infinite loops. + */ +static const int MAX_DWARF_EXPR_OPS = 10000; + +DEFINE_VECTOR(uint64_vector, uint64_t) + +/* A DWARF expression and the context it is being evaluated in. */ +struct drgn_dwarf_expression_context { + struct binary_buffer bb; + const char *start; + struct drgn_program *prog; + struct drgn_debug_info_module *module; + uint8_t address_size; + Dwarf_Die cu_die; + const char *cu_addr_base; + Dwarf_Die *function; + const struct drgn_register_state *regs; +}; + +static struct drgn_error * +drgn_dwarf_expression_buffer_error(struct binary_buffer *bb, const char *pos, + const char *message) +{ + struct drgn_dwarf_expression_context *ctx = + container_of(bb, struct drgn_dwarf_expression_context, bb); + return drgn_error_debug_info(ctx->module, pos, message); +} + +static inline struct drgn_error * +drgn_dwarf_expression_context_init(struct drgn_dwarf_expression_context *ctx, + struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_CU *cu, Dwarf_Die *function, + const struct drgn_register_state *regs, + const char *expr, size_t expr_size) +{ + struct drgn_error *err; + binary_buffer_init(&ctx->bb, expr, expr_size, + drgn_platform_is_little_endian(&module->platform), + drgn_dwarf_expression_buffer_error); + ctx->start = expr; + ctx->prog = prog; + ctx->module = module; + if (cu) { + if (!dwarf_cu_die(cu, &ctx->cu_die, NULL, NULL, + &ctx->address_size, NULL, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(ctx->address_size))) + return err; + } else { + ctx->cu_die.addr = NULL; + ctx->address_size = + drgn_platform_address_size(&module->platform); + } + ctx->cu_addr_base = NULL; + ctx->function = function; + ctx->regs = regs; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret); + +/* + * Evaluate a DWARF expression up to the next location description operation or + * operation that can't be evaluated in the given context. + * + * Returns &drgn_not_found if it tried to use an unknown register value. + */ +static struct drgn_error * +drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, + struct uint64_vector *stack, + int *remaining_ops) +{ + struct drgn_error *err; + const struct drgn_platform *platform = &ctx->module->platform; + bool little_endian = drgn_platform_is_little_endian(platform); + uint8_t address_size = ctx->address_size; + uint8_t address_bits = address_size * CHAR_BIT; + uint64_t address_mask = uint_max(address_size); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + platform->arch->dwarf_regno_to_internal; + +#define CHECK(n) do { \ + size_t _n = (n); \ + if (stack->size < _n) { \ + return binary_buffer_error(&ctx->bb, \ + "DWARF expression stack underflow"); \ + } \ +} while (0) + +#define ELEM(i) stack->data[stack->size - 1 - (i)] + +#define PUSH(x) do { \ + uint64_t push = (x); \ + if (!uint64_vector_append(stack, &push)) \ + return &drgn_enomem; \ +} while (0) + +#define PUSH_MASK(x) PUSH((x) & address_mask) + + while (binary_buffer_has_next(&ctx->bb)) { + if (*remaining_ops <= 0) { + return binary_buffer_error(&ctx->bb, + "DWARF expression executed too many operations"); + } + (*remaining_ops)--; + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx->bb, &opcode))) + return err; + uint64_t uvalue; + uint64_t dwarf_regno; + uint8_t deref_size; + switch (opcode) { + /* Literal encodings. */ + case DW_OP_lit0 ... DW_OP_lit31: + PUSH(opcode - DW_OP_lit0); + break; + case DW_OP_addr: + if ((err = binary_buffer_next_uint(&ctx->bb, + address_size, + &uvalue))) + return err; + PUSH(uvalue); + break; + case DW_OP_const1u: + if ((err = binary_buffer_next_u8_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH(uvalue); + break; + case DW_OP_const2u: + if ((err = binary_buffer_next_u16_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const4u: + if ((err = binary_buffer_next_u32_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const8u: + if ((err = binary_buffer_next_u64(&ctx->bb, &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const1s: + if ((err = binary_buffer_next_s8_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const2s: + if ((err = binary_buffer_next_s16_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const4s: + if ((err = binary_buffer_next_s32_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const8s: + if ((err = binary_buffer_next_s64_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_constu: + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_consts: + if ((err = binary_buffer_next_sleb128_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_addrx: + case DW_OP_constx: + if (!ctx->cu_die.addr) { + ctx->bb.pos = ctx->bb.prev; + return NULL; + } + if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->module, + &ctx->cu_die, + address_size, + &ctx->cu_addr_base, + &uvalue))) + return err; + PUSH(uvalue); + break; + /* Register values. */ + case DW_OP_fbreg: { + err = drgn_dwarf_frame_base(ctx->prog, ctx->module, + ctx->function, ctx->regs, + remaining_ops, &uvalue); + if (err) + return err; + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&ctx->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } + case DW_OP_breg0 ... DW_OP_breg31: + dwarf_regno = opcode - DW_OP_breg0; + goto breg; + case DW_OP_bregx: + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &dwarf_regno))) + return err; +breg: + { + if (!ctx->regs) + return &drgn_not_found; + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(ctx->regs, regno)) + return &drgn_not_found; + const struct drgn_register_layout *layout = + &platform->arch->register_layout[regno]; + copy_lsbytes(&uvalue, sizeof(uvalue), + HOST_LITTLE_ENDIAN, + &ctx->regs->buf[layout->offset], + layout->size, little_endian); + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&ctx->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } + /* Stack operations. */ + case DW_OP_dup: + CHECK(1); + PUSH(ELEM(0)); + break; + case DW_OP_drop: + CHECK(1); + stack->size--; + break; + case DW_OP_pick: { + uint8_t index; + if ((err = binary_buffer_next_u8(&ctx->bb, &index))) + return err; + CHECK(index + 1); + PUSH(ELEM(index)); + break; + } + case DW_OP_over: + CHECK(2); + PUSH(ELEM(1)); + break; + case DW_OP_swap: + CHECK(2); + uvalue = ELEM(0); + ELEM(0) = ELEM(1); + ELEM(1) = uvalue; + break; + case DW_OP_rot: + CHECK(3); + uvalue = ELEM(0); + ELEM(0) = ELEM(1); + ELEM(1) = ELEM(2); + ELEM(2) = uvalue; + break; + case DW_OP_deref: + deref_size = address_size; + goto deref; + case DW_OP_deref_size: + if ((err = binary_buffer_next_u8(&ctx->bb, + &deref_size))) + return err; + if (deref_size > address_size) { + return binary_buffer_error(&ctx->bb, + "DW_OP_deref_size has invalid size"); + } +deref: + { + CHECK(1); + char deref_buf[8]; + err = drgn_program_read_memory(ctx->prog, deref_buf, + ELEM(0), deref_size, + false); + if (err) + return err; + copy_lsbytes(&ELEM(0), sizeof(ELEM(0)), + HOST_LITTLE_ENDIAN, deref_buf, deref_size, + little_endian); + break; + } + case DW_OP_call_frame_cfa: { + if (!ctx->regs) + return &drgn_not_found; + /* + * The DWARF 5 specification says that + * DW_OP_call_frame_cfa cannot be used for CFI. For + * DW_CFA_def_cfa_expression, it is clearly invalid to + * define the CFA in terms of the CFA, and it will fail + * naturally below. This restriction doesn't make sense + * for DW_CFA_expression and DW_CFA_val_expression, as + * they push the CFA and thus depend on it anyways, so + * we don't bother enforcing it. + */ + struct optional_uint64 cfa = + drgn_register_state_get_cfa(ctx->regs); + if (!cfa.has_value) + return &drgn_not_found; + PUSH(cfa.value); + break; + } + /* Arithmetic and logical operations. */ +#define UNOP_MASK(op) do { \ + CHECK(1); \ + ELEM(0) = (op ELEM(0)) & address_mask; \ +} while (0) +#define BINOP(op) do { \ + CHECK(2); \ + ELEM(1) = ELEM(1) op ELEM(0); \ + stack->size--; \ +} while (0) +#define BINOP_MASK(op) do { \ + CHECK(2); \ + ELEM(1) = (ELEM(1) op ELEM(0)) & address_mask; \ + stack->size--; \ +} while (0) + case DW_OP_abs: + CHECK(1); + if (ELEM(0) & (UINT64_C(1) << (address_bits - 1))) + ELEM(0) = -ELEM(0) & address_mask; + break; + case DW_OP_and: + BINOP(&); + break; + case DW_OP_div: + CHECK(2); + if (ELEM(0) == 0) { + return binary_buffer_error(&ctx->bb, + "division by zero in DWARF expression"); + } + ELEM(1) = ((truncate_signed(ELEM(1), address_bits) + / truncate_signed(ELEM(0), address_bits)) + & address_mask); + stack->size--; + break; + case DW_OP_minus: + BINOP_MASK(-); + break; + case DW_OP_mod: + CHECK(2); + if (ELEM(0) == 0) { + return binary_buffer_error(&ctx->bb, + "modulo by zero in DWARF expression"); + } + ELEM(1) = ELEM(1) % ELEM(0); + stack->size--; + break; + case DW_OP_mul: + BINOP_MASK(*); + break; + case DW_OP_neg: + UNOP_MASK(-); + break; + case DW_OP_not: + UNOP_MASK(~); + break; + case DW_OP_or: + BINOP(|); + break; + case DW_OP_plus: + BINOP_MASK(+); + break; + case DW_OP_plus_uconst: + CHECK(1); + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &uvalue))) + return err; + ELEM(0) = (ELEM(0) + uvalue) & address_mask; + break; + case DW_OP_shl: + CHECK(2); + if (ELEM(0) < address_bits) + ELEM(1) = (ELEM(1) << ELEM(0)) & address_mask; + else + ELEM(1) = 0; + stack->size--; + break; + case DW_OP_shr: + CHECK(2); + if (ELEM(0) < address_bits) + ELEM(1) >>= ELEM(0); + else + ELEM(1) = 0; + stack->size--; + break; + case DW_OP_shra: + CHECK(2); + if (ELEM(0) < address_bits) { + ELEM(1) = ((truncate_signed(ELEM(1), address_bits) + >> ELEM(0)) + & address_mask); + } else if (ELEM(1) & (UINT64_C(1) << (address_bits - 1))) { + ELEM(1) = -INT64_C(1) & address_mask; + } else { + ELEM(1) = 0; + } + stack->size--; + break; + case DW_OP_xor: + BINOP(^); + break; +#undef BINOP_MASK +#undef BINOP +#undef UNOP_MASK + /* Control flow operations. */ +#define RELOP(op) do { \ + CHECK(2); \ + ELEM(1) = (truncate_signed(ELEM(1), address_bits) op \ + truncate_signed(ELEM(0), address_bits)); \ + stack->size--; \ +} while (0) + case DW_OP_le: + RELOP(<=); + break; + case DW_OP_ge: + RELOP(>=); + break; + case DW_OP_eq: + RELOP(==); + break; + case DW_OP_lt: + RELOP(<); + break; + case DW_OP_gt: + RELOP(>); + break; + case DW_OP_ne: + RELOP(!=); + break; +#undef RELOP + case DW_OP_skip: +branch: + { + int16_t skip; + if ((err = binary_buffer_next_s16(&ctx->bb, &skip))) + return err; + if ((skip >= 0 && skip > ctx->bb.end - ctx->bb.pos) || + (skip < 0 && -skip > ctx->bb.pos - ctx->start)) { + return binary_buffer_error(&ctx->bb, + "DWARF expression branch is out of bounds"); + } + ctx->bb.pos += skip; + break; + } + case DW_OP_bra: + CHECK(1); + if (ELEM(0)) { + stack->size--; + goto branch; + } else { + stack->size--; + if ((err = binary_buffer_skip(&ctx->bb, 2))) + return err; + } + break; + /* Special operations. */ + case DW_OP_nop: + break; + /* Location description operations. */ + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + case DW_OP_implicit_value: + case DW_OP_stack_value: + case DW_OP_piece: + case DW_OP_bit_piece: + /* The caller must handle it. */ + ctx->bb.pos = ctx->bb.prev; + return NULL; + /* + * We don't yet support: + * + * - DW_OP_push_object_address + * - DW_OP_form_tls_address + * - DW_OP_entry_value + * DW_OP_implicit_pointer + * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. + * - Typed operations: DW_OP_const_type, DW_OP_regval_type, + * DW_OP_deref_type, DW_OP_convert, DW_OP_reinterpret. + * - Operations for multiple address spaces: DW_OP_xderef, + * DW_OP_xderef_size, DW_OP_xderef_type. + */ + default: + return binary_buffer_error(&ctx->bb, + "unknown DWARF expression opcode %#" PRIx8, + opcode); + } + } + +#undef PUSH_MASK +#undef PUSH +#undef ELEM +#undef CHECK + + return NULL; +} + +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + if (!die) + return &drgn_not_found; + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) + return &drgn_not_found; + const char *expr; + size_t expr_size; + err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); + if (err) + return err; + + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, NULL, regs, expr, + expr_size))) + return err; + struct uint64_vector stack = VECTOR_INIT; + for (;;) { + err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); + if (err) + goto out; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &dwarf_regno))) + goto out; +reg: + { + if (!regs) { + err = &drgn_not_found; + goto out; + } + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) { + err = &drgn_not_found; + goto out; + } + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + /* + * Note that this doesn't mask the address since + * the caller does that. + */ + copy_lsbytes(ret, sizeof(*ret), + HOST_LITTLE_ENDIAN, + ®s->buf[layout->offset], + layout->size, little_endian); + if (binary_buffer_has_next(&ctx.bb)) { + err = binary_buffer_error(&ctx.bb, + "stray operations in DW_AT_frame_base expression"); + } else { + err = NULL; + } + goto out; + } + default: + err = binary_buffer_error(&ctx.bb, + "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", + opcode); + goto out; + } + } else if (stack.size) { + *ret = stack.data[stack.size - 1]; + err = NULL; + break; + } else { + err = &drgn_not_found; + break; + } + } +out: + uint64_vector_deinit(&stack); + return err; +} + +/* + * Type and object parsing. + */ + +/** + * Return whether a DWARF DIE is little-endian. + * + * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c + * false, only the ELF header is checked and this function cannot fail. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, + bool check_attr, bool *ret) +{ + Dwarf_Attribute endianity_attr_mem, *endianity_attr; + Dwarf_Word endianity; + if (check_attr && + (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, + &endianity_attr_mem))) { + if (dwarf_formudata(endianity_attr, &endianity)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_endianity"); + } + } else { + endianity = DW_END_default; + } + switch (endianity) { + case DW_END_default: { + Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); + *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; + return NULL; + } + case DW_END_little: + *ret = true; + return NULL; + case DW_END_big: + *ret = false; + return NULL; + default: + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_endianity"); + } +} + +/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ +static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, bool check_attr, + enum drgn_byte_order *ret) +{ + bool little_endian; + struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, + &little_endian); + /* + * dwarf_die_is_little_endian() can't fail if check_attr is false, so + * the !check_attr test suppresses maybe-uninitialized warnings. + */ + if (!err || !check_attr) + *ret = drgn_byte_order_from_little_endian(little_endian); + return err; +} + +static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) + return 1; + + return dwarf_formref_die(attr, ret) ? 0 : -1; +} + +static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr(die, name, &attr_mem))) { + *ret = false; + return 0; + } + return dwarf_formflag(attr, ret); +} + +static int dwarf_flag_integrate(Dwarf_Die *die, unsigned int name, bool *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { + *ret = false; + return 0; + } + return dwarf_formflag(attr, ret); +} + +/** + * Parse a type from a DWARF debugging information entry. + * + * This is the same as @ref drgn_type_from_dwarf() except that it can be used to + * work around a bug in GCC < 9.0 that zero length array types are encoded the + * same as incomplete array types. There are a few places where GCC allows + * zero-length arrays but not incomplete arrays: + * + * - As the type of a member of a structure with only one member. + * - As the type of a structure member other than the last member. + * - As the type of a union member. + * - As the element type of an array. + * + * In these cases, we know that what appears to be an incomplete array type must + * actually have a length of zero. In other cases, a subrange DIE without + * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array + * type. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE to parse. + * @param[in] can_be_incomplete_array Whether the type can be an incomplete + * array type. If this is @c false and the type appears to be an incomplete + * array type, its length is set to zero instead. + * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete + * array type or a typedef of an incomplete array type (regardless of @p + * can_be_incomplete_array). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret); + +/** + * Parse a type from a DWARF debugging information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE to parse. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static inline struct drgn_error * +drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + struct drgn_qualified_type *ret) +{ + return drgn_type_from_dwarf_internal(dbinfo, module, die, true, NULL, + ret); +} + +/** + * Parse a type from the @c DW_AT_type attribute of a DWARF debugging + * information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE with @c DW_AT_type attribute. + * @param[in] lang Language of @p die if it is already known, @c NULL if it + * should be determined from @p die. + * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, + * which is interpreted as a void type. If this is false and the @c DW_AT_type + * attribute is missing, an error is returned. + * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). + * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_type_from_dwarf_attr(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + bool can_be_void, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) { + if (can_be_void) { + if (!lang) { + err = drgn_language_from_die(die, true, &lang); + if (err) + return err; + } + ret->type = drgn_void_type(dbinfo->prog, lang); + ret->qualifiers = 0; + return NULL; + } else { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s is missing DW_AT_type", + dwarf_tag_str(die, tag_buf)); + } + } + + Dwarf_Die type_die; + if (!dwarf_formref_die(attr, &type_die)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_type", + dwarf_tag_str(die, tag_buf)); + } + + return drgn_type_from_dwarf_internal(dbinfo, module, &type_die, + can_be_incomplete_array, + is_incomplete_array_ret, ret); +} + +static struct drgn_error * +drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const char *name, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf(dbinfo, module, die, &qualified_type); + if (err) + return err; + const struct drgn_type_enumerator *enumerators = + drgn_type_enumerators(qualified_type.type); + size_t num_enumerators = drgn_type_num_enumerators(qualified_type.type); + for (size_t i = 0; i < num_enumerators; i++) { + if (strcmp(enumerators[i].name, name) != 0) + continue; + + if (drgn_enum_type_is_signed(qualified_type.type)) { + return drgn_object_set_signed(ret, qualified_type, + enumerators[i].svalue, 0); + } else { + return drgn_object_set_unsigned(ret, qualified_type, + enumerators[i].uvalue, + 0); + } + } + UNREACHABLE(); +} + +static struct drgn_error * +drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, struct drgn_object *ret) +{ + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, module, die, + &qualified_type); + if (err) + return err; + Dwarf_Addr low_pc; + if (dwarf_lowpc(die, &low_pc) == -1) + return drgn_object_set_absent(ret, qualified_type, 0); + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, + NULL, NULL); + return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, + 0); +} + +static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, + unsigned int dst_bit_offset, uint64_t src, + unsigned int src_bit_offset, + uint64_t bit_size, bool lsb0) +{ + struct drgn_error *err; + + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return NULL; + + if (dst_bit_offset == src_bit_offset) { + /* + * We can read directly into the the destination buffer, but we + * may have to preserve some bits at the start and/or end. + */ + uint8_t *d = dst; + uint64_t last_bit = dst_bit_offset + bit_size - 1; + uint8_t first_byte = d[0]; + uint8_t last_byte = d[last_bit / 8]; + err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, + false); + if (err) + return err; + if (dst_bit_offset != 0) { + uint8_t mask = + copy_bits_first_mask(dst_bit_offset, lsb0); + d[0] = (first_byte & ~mask) | (d[0] & mask); + } + if (last_bit % 8 != 7) { + uint8_t mask = copy_bits_last_mask(last_bit, lsb0); + d[last_bit / 8] = ((last_byte & ~mask) + | (d[last_bit / 8] & mask)); + } + return NULL; + } else { + /* + * If the source and destination have different offsets, then + * depending on the size and source offset, we may have to read + * one more byte than is available in the destination. To keep + * things simple, we always read into a temporary buffer (rather + * than adding a special case for reading directly into the + * destination and shifting bits around). + */ + uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; + char stack_tmp[16], *tmp; + if (src_bytes <= sizeof(stack_tmp)) { + tmp = stack_tmp; + } else { + tmp = malloc64(src_bytes); + if (!tmp) + return &drgn_enomem; + } + err = drgn_program_read_memory(prog, tmp, src, src_bytes, + false); + if (!err) { + copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, + bit_size, lsb0); + } + if (src_bytes > sizeof(stack_tmp)) + free(tmp); + return err; + } +} + +static struct drgn_error * +drgn_object_from_dwarf_location(struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + const char *expr, size_t expr_size, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + uint64_t address_mask = drgn_platform_address_mask(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + struct drgn_object_type type; + err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + + union drgn_value value; + char *value_buf = NULL; + + uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ + int bit_offset = -1; /* -1 means that we don't have an address. */ + + uint64_t bit_pos = 0; + + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, function_die, + regs, expr, expr_size))) + return err; + struct uint64_vector stack = VECTOR_INIT; + do { + stack.size = 0; + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err == &drgn_not_found) + goto absent; + else if (err) + goto out; + + const void *src = NULL; + size_t src_size; + + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + uint64_t uvalue; + uint64_t dwarf_regno; + drgn_register_number regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &dwarf_regno))) + goto out; +reg: + if (!regs) + goto absent; + regno = dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) + goto absent; + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + src = ®s->buf[layout->offset]; + src_size = layout->size; + break; + case DW_OP_implicit_value: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &uvalue))) + goto out; + if (uvalue > ctx.bb.end - ctx.bb.pos) { + err = binary_buffer_error(&ctx.bb, + "DW_OP_implicit_value size is out of bounds"); + goto out; + } + src = ctx.bb.pos; + src_size = uvalue; + ctx.bb.pos += uvalue; + break; + case DW_OP_stack_value: + if (!stack.size) + goto absent; + if (little_endian != HOST_LITTLE_ENDIAN) { + stack.data[stack.size - 1] = + bswap_64(stack.data[stack.size - 1]); + } + src = &stack.data[stack.size - 1]; + src_size = sizeof(stack.data[0]); + break; + default: + ctx.bb.pos = ctx.bb.prev; + break; + } + } + + uint64_t piece_bit_size; + uint64_t piece_bit_offset; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + switch (opcode) { + case DW_OP_piece: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_size))) + goto out; + /* + * It's probably bogus for the piece size to be + * larger than the remaining value size, but + * that's not explicitly stated in the DWARF 5 + * specification, so clamp it instead. + */ + if (__builtin_mul_overflow(piece_bit_size, 8U, + &piece_bit_size) || + piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + break; + case DW_OP_bit_piece: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_size)) || + (err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_offset))) + goto out; + if (piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + break; + default: + err = binary_buffer_error(&ctx.bb, + "unknown DWARF expression opcode %#" PRIx8 " after simple location description", + opcode); + goto out; + } + } else { + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + } + + /* + * TODO: there are a few cases that a DWARF location can + * describe that can't be represented in drgn's object model: + * + * 1. An object that is partially known and partially unknown. + * 2. An object that is partially in memory and partially a + * value. + * 3. An object that is in memory at non-contiguous addresses. + * 4. A pointer object whose pointer value is not known but + * whose referenced value is known (DW_OP_implicit_pointer). + * + * For case 1, we consider the whole object as absent. For cases + * 2 and 3, we convert the whole object to a value. Case 4 is + * not supported at all. We should add a way to represent all of + * these situations precisely. + */ + if (src && piece_bit_size == 0) { + /* Ignore empty value. */ + } else if (src) { + if (!value_buf && + !drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, &value_buf)) { + err = &drgn_enomem; + goto out; + } + if (bit_offset >= 0) { + /* + * We previously had an address. Read it into + * the value. + */ + err = read_bits(prog, value_buf, 0, address, + bit_offset, bit_pos, + little_endian); + if (err) + goto out; + bit_offset = -1; + } + /* + * It's probably safe to assume that we don't have an + * implicit value larger than 2 exabytes. + */ + assert(src_size <= UINT64_MAX / 8); + uint64_t src_bit_size = UINT64_C(8) * src_size; + if (piece_bit_offset > src_bit_size) + piece_bit_offset = src_bit_size; + uint64_t copy_bit_size = + min(piece_bit_size, + src_bit_size - piece_bit_offset); + uint64_t copy_bit_offset = bit_pos; + if (!little_endian) { + copy_bit_offset += piece_bit_size - copy_bit_size; + piece_bit_offset = (src_bit_size + - copy_bit_size + - piece_bit_offset); + } + copy_bits(&value_buf[copy_bit_offset / 8], + copy_bit_offset % 8, + (const char *)src + (piece_bit_offset / 8), + piece_bit_offset % 8, copy_bit_size, + little_endian); + } else if (stack.size) { + uint64_t piece_address = + ((stack.data[stack.size - 1] + piece_bit_offset / 8) + & address_mask); + piece_bit_offset %= 8; + if (bit_pos > 0 && bit_offset >= 0) { + /* + * We already had an address. Merge the pieces + * if the addresses are contiguous, otherwise + * convert to a value. + * + * The obvious way to write this is + * (address + (bit_pos + bit_offset) / 8), but + * (bit_pos + bit_offset) can overflow uint64_t. + */ + uint64_t end_address = + ((address + + bit_pos / 8 + + (bit_pos % 8 + bit_offset) / 8) + & address_mask); + unsigned int end_bit_offset = + (bit_offset + bit_pos) % 8; + if (piece_bit_size == 0 || + (piece_address == end_address && + piece_bit_offset == end_bit_offset)) { + /* Piece is contiguous. */ + piece_address = address; + piece_bit_offset = bit_offset; + } else { + if (!drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, + &value_buf)) { + err = &drgn_enomem; + goto out; + } + err = read_bits(prog, value_buf, 0, + address, bit_offset, + bit_pos, little_endian); + if (err) + goto out; + bit_offset = -1; + } + } + if (value_buf) { + /* We already have a value. Read into it. */ + err = read_bits(prog, &value_buf[bit_pos / 8], + bit_pos % 8, piece_address, + piece_bit_offset, + piece_bit_size, little_endian); + if (err) + goto out; + } else { + address = piece_address; + bit_offset = piece_bit_offset; + } + } else if (piece_bit_size > 0) { + goto absent; + } + bit_pos += piece_bit_size; + } while (binary_buffer_has_next(&ctx.bb)); + + if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { +absent: + if (dwarf_tag(die) == DW_TAG_template_value_parameter) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_template_value_parameter is missing value"); + } + drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + err = NULL; + } else if (bit_offset >= 0) { + Dwarf_Addr start, end, bias; + dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, + NULL, NULL, NULL); + /* + * If the address is not in the module's address range, then + * it's probably something special like a Linux per-CPU variable + * (which isn't actually a variable address but an offset). + * Don't apply the bias in that case. + */ + if (start <= address + bias && address + bias < end) + address += bias; + err = drgn_object_set_reference_internal(ret, &type, address, + bit_offset); + } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { + drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); + ret->value = value; + value_buf = NULL; + err = NULL; + } else { + err = drgn_object_set_from_buffer_internal(ret, &type, + value_buf, 0); + } + +out: + if (value_buf != value.ibuf) + free(value_buf); + uint64_vector_deinit(&stack); + return err; +} + +static struct drgn_error * +drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + Dwarf_Attribute *attr, struct drgn_object *ret) +{ + struct drgn_object_type type; + struct drgn_error *err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + Dwarf_Block block; + if (dwarf_formblock(attr, &block) == 0) { + if (block.length < drgn_value_size(type.bit_size)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_const_value block is too small"); + } + return drgn_object_set_from_buffer_internal(ret, &type, + block.data, 0); + } else if (type.encoding == DRGN_OBJECT_ENCODING_SIGNED) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + drgn_object_set_signed_internal(ret, &type, svalue); + return NULL; + } else if (type.encoding == DRGN_OBJECT_ENCODING_UNSIGNED) { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + drgn_object_set_unsigned_internal(ret, &type, uvalue); + return NULL; + } else { + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_const_value form"); + } +} + +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + if (dwarf_tag(die) == DW_TAG_subprogram) { + return drgn_object_from_dwarf_subprogram(dbinfo, module, die, + ret); + } + /* + * The DWARF 5 specifications mentions that data object entries can have + * DW_AT_endianity, but that doesn't seem to be used in practice. It + * would be inconvenient to support, so ignore it for now. + */ + struct drgn_qualified_type qualified_type; + if (type_die) { + err = drgn_type_from_dwarf(dbinfo, module, type_die, + &qualified_type); + } else { + err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, + true, NULL, &qualified_type); + } + if (err) + return err; + Dwarf_Attribute attr_mem, *attr; + const char *expr; + size_t expr_size; + if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { + err = drgn_dwarf_location(module, attr, regs, &expr, + &expr_size); + if (err) + return err; + } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, + &attr_mem))) { + return drgn_object_from_dwarf_constant(dbinfo, die, + qualified_type, attr, + ret); + } else { + expr = NULL; + expr_size = 0; + } + return drgn_object_from_dwarf_location(dbinfo->prog, module, die, + qualified_type, expr, expr_size, + function_die, regs, ret); +} + +static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, + const char *name, + Dwarf_Die *ret) +{ + int r = dwarf_child(enumeration_type, ret); + while (r == 0) { + if (dwarf_tag(ret) == DW_TAG_enumerator && + strcmp(dwarf_diename(ret), name) == 0) + return NULL; + r = dwarf_siblingof(ret, ret); + } + if (r < 0) + return drgn_error_libdw(); + ret->addr = NULL; + return NULL; +} + +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret) +{ + struct drgn_error *err; + Dwarf_Die die; + for (size_t scope = num_scopes; scope--;) { + bool have_declaration = false; + if (dwarf_child(&scopes[scope], &die) != 0) + continue; + do { + switch (dwarf_tag(&die)) { + case DW_TAG_variable: + case DW_TAG_formal_parameter: + case DW_TAG_subprogram: + if (strcmp(dwarf_diename(&die), name) == 0) { + *die_ret = die; + bool declaration; + if (dwarf_flag(&die, DW_AT_declaration, + &declaration)) + return drgn_error_libdw(); + if (declaration) + have_declaration = true; + else + return NULL; + } + break; + case DW_TAG_enumeration_type: { + bool enum_class; + if (dwarf_flag_integrate(&die, DW_AT_enum_class, + &enum_class)) + return drgn_error_libdw(); + if (!enum_class) { + Dwarf_Die enumerator; + err = find_dwarf_enumerator(&die, name, + &enumerator); + if (err) + return err; + if (enumerator.addr) { + *die_ret = enumerator; + *type_ret = die; + return NULL; + } + } + break; + } + default: + continue; + } + } while (dwarf_siblingof(&die, &die) == 0); + if (have_declaration) + return NULL; + } + die_ret->addr = NULL; + return NULL; +} + +static struct drgn_error * +drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr; + Dwarf_Word encoding; + if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || + dwarf_formudata(&attr, &encoding)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_encoding"); + } + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); + } + + enum drgn_byte_order byte_order; + err = dwarf_die_byte_order(die, true, &byte_order); + if (err) + return err; + + switch (encoding) { + case DW_ATE_boolean: + return drgn_bool_type_create(dbinfo->prog, name, size, + byte_order, lang, ret); + case DW_ATE_float: + return drgn_float_type_create(dbinfo->prog, name, size, + byte_order, lang, ret); + case DW_ATE_signed: + case DW_ATE_signed_char: + return drgn_int_type_create(dbinfo->prog, name, size, true, + byte_order, lang, ret); + case DW_ATE_unsigned: + case DW_ATE_unsigned_char: + return drgn_int_type_create(dbinfo->prog, name, size, false, + byte_order, lang, ret); + /* We don't support complex types yet. */ + case DW_ATE_complex_float: + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_base_type has unknown DWARF encoding 0x%llx", + (unsigned long long)encoding); + } +} + +/* + * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and + * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of + * true). This tries to find the complete type. If it succeeds, it returns NULL. + * If it can't find a complete type, it returns &drgn_not_found. Otherwise, it + * returns an error. + */ +static struct drgn_error * +drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, + const char *name, struct drgn_type **ret) +{ + struct drgn_error *err; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, + name, strlen(name), &tag, 1); + if (err) + return err; + + /* + * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs + * with DW_AT_declaration, so this will always be a complete type. + */ + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + /* + * Look for another matching DIE. If there is one, then we can't be sure + * which type this is, so leave it incomplete rather than guessing. + */ + if (drgn_dwarf_index_iterator_next(&it)) + return &drgn_not_found; + + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf(dbinfo, index_die->module, &die, + &qualified_type); + if (err) + return err; + *ret = qualified_type.type; + return NULL; +} + +struct drgn_dwarf_member_thunk_arg { + struct drgn_debug_info_module *module; + Dwarf_Die die; + bool can_be_incomplete_array; +}; + +static struct drgn_error * +drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_member_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + false, + arg->can_be_incomplete_array, + NULL, &qualified_type); + if (err) + return err; + + Dwarf_Attribute attr_mem, *attr; + uint64_t bit_field_size; + if ((attr = dwarf_attr_integrate(&arg->die, DW_AT_bit_size, + &attr_mem))) { + Dwarf_Word bit_size; + if (dwarf_formudata(attr, &bit_size)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_size"); + } + bit_field_size = bit_size; + } else { + bit_field_size = 0; + } + + err = drgn_object_set_absent(res, qualified_type, + bit_field_size); + if (err) + return err; + } + free(arg); + return NULL; +} + +static inline bool drgn_dwarf_attribute_is_block(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + case DW_FORM_block: + return true; + default: + return false; + } +} + +static inline bool drgn_dwarf_attribute_is_ptr(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_sec_offset: + return true; + case DW_FORM_data4: + case DW_FORM_data8: { + /* + * dwarf_cu_die() always returns the DIE. We should use + * dwarf_cu_info(), but that requires elfutils >= 0.171. + */ + Dwarf_Die unused; + Dwarf_Half cu_version; + dwarf_cu_die(attr->cu, &unused, &cu_version, NULL, NULL, NULL, + NULL, NULL); + return cu_version <= 3; + } + default: + return false; + } +} + +static struct drgn_error *invalid_data_member_location(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_member_location"); +} + +static struct drgn_error * +drgn_parse_dwarf_data_member_location(Dwarf_Attribute *attr, uint64_t *ret) +{ + struct drgn_error *err; + + if (drgn_dwarf_attribute_is_block(attr)) { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + /* + * In DWARF 2, DW_AT_data_member_location is always a location + * description. We can translate a DW_OP_plus_uconst expression + * into a constant offset; other expressions aren't supported + * yet. + */ + struct binary_buffer bb; + /* + * Right now we only parse u8 and ULEB128, so the byte order + * doesn't matter. + */ + binary_buffer_init(&bb, block.data, block.length, + HOST_LITTLE_ENDIAN, + invalid_data_member_location); + uint8_t opcode; + err = binary_buffer_next_u8(&bb, &opcode); + if (err) + return err; + if (opcode != DW_OP_plus_uconst) { +unsupported: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has unsupported DW_AT_data_member_location"); + } + err = binary_buffer_next_uleb128(&bb, ret); + if (err) + return err; + if (binary_buffer_has_next(&bb)) + goto unsupported; + } else if (drgn_dwarf_attribute_is_ptr(attr)) { + goto unsupported; + } else { + + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) + return invalid_data_member_location(NULL, NULL, NULL); + *ret = word; + } + return NULL; +} + +static struct drgn_error * +parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, + bool little_endian, uint64_t *ret) +{ + struct drgn_error *err; + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + /* + * The simplest case is when we have DW_AT_data_bit_offset, which is + * already the offset in bits from the beginning of the containing + * object to the beginning of the member (which may be a bit field). + */ + attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_bit_offset"); + } + *ret = bit_offset; + return NULL; + } + + /* + * Otherwise, we might have DW_AT_data_member_location, which is the + * offset in bytes from the beginning of the containing object. + */ + attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); + if (attr) { + err = drgn_parse_dwarf_data_member_location(attr, ret); + if (err) + return err; + *ret *= 8; + } else { + *ret = 0; + } + + /* + * In addition to DW_AT_data_member_location, a bit field might have + * DW_AT_bit_offset, which is the offset in bits of the most significant + * bit of the bit field from the most significant bit of the containing + * object. + */ + attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_offset"); + } + + /* + * If the architecture is little-endian, then we must compute + * the location of the most significant bit from the size of the + * member, then subtract the bit offset and bit size to get the + * location of the beginning of the bit field. + * + * If the architecture is big-endian, then the most significant + * bit of the bit field is the beginning. + */ + if (little_endian) { + err = drgn_lazy_object_evaluate(member_object); + if (err) + return err; + + attr = dwarf_attr_integrate(die, DW_AT_byte_size, + &attr_mem); + /* + * If the member has an explicit byte size, we can use + * that. Otherwise, we have to get it from the member + * type. + */ + uint64_t byte_size; + if (attr) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_byte_size"); + } + byte_size = word; + } else { + if (!drgn_type_has_size(member_object->obj.type)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member bit field type does not have size"); + } + err = drgn_type_sizeof(member_object->obj.type, + &byte_size); + if (err) + return err; + } + *ret += 8 * byte_size - bit_offset - member_object->obj.bit_size; + } else { + *ret += bit_offset; + } + } + + return NULL; +} + +static struct drgn_error * +parse_member(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + bool little_endian, bool can_be_incomplete_array, + struct drgn_compound_type_builder *builder) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + struct drgn_dwarf_member_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + thunk_arg->can_be_incomplete_array = can_be_incomplete_array; + + union drgn_lazy_object member_object; + drgn_lazy_object_init_thunk(&member_object, dbinfo->prog, + drgn_dwarf_member_thunk_fn, thunk_arg); + + uint64_t bit_offset; + err = parse_member_offset(die, &member_object, little_endian, + &bit_offset); + if (err) + goto err; + + err = drgn_compound_type_builder_add_member(builder, &member_object, + name, bit_offset); + if (err) + goto err; + return NULL; + +err: + drgn_lazy_object_deinit(&member_object); + return err; +} + +struct drgn_dwarf_die_thunk_arg { + struct drgn_debug_info_module *module; + Dwarf_Die die; +}; + +static struct drgn_error * +drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + true, true, NULL, + &qualified_type); + if (err) + return err; + + err = drgn_object_set_absent(res, qualified_type, 0); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, + void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + err = drgn_object_from_dwarf(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, NULL, + NULL, res); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +parse_template_parameter(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + drgn_object_thunk_fn *thunk_fn, + struct drgn_template_parameters_builder *builder) +{ + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_name", + dwarf_tag_str(die, tag_buf)); + } + } else { + name = NULL; + } + + bool defaulted; + if (dwarf_flag_integrate(die, DW_AT_default_value, &defaulted)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_default_value", + dwarf_tag_str(die, tag_buf)); + } + + struct drgn_dwarf_die_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + + union drgn_lazy_object argument; + drgn_lazy_object_init_thunk(&argument, dbinfo->prog, thunk_fn, + thunk_arg); + + struct drgn_error *err = + drgn_template_parameters_builder_add(builder, &argument, name, + defaulted); + if (err) + drgn_lazy_object_deinit(&argument); + return err; +} + +static struct drgn_error * +drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + enum drgn_type_kind kind, struct drgn_type **ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_name", + dwarf_tag_str(die, tag_buf)); + } + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_declaration", + dwarf_tag_str(die, tag_buf)); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, dwarf_tag(die), tag, + ret); + if (err != &drgn_not_found) + return err; + } + + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); + + int size; + bool little_endian; + if (declaration) { + size = 0; + } else { + size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has missing or invalid DW_AT_byte_size", + dwarf_tag_str(die, tag_buf)); + } + dwarf_die_is_little_endian(die, false, &little_endian); + } + + Dwarf_Die member = {}, child; + int r = dwarf_child(die, &child); + while (r == 0) { + switch (dwarf_tag(&child)) { + case DW_TAG_member: + if (!declaration) { + if (member.addr) { + err = parse_member(dbinfo, module, + &member, + little_endian, false, + &builder); + if (err) + goto err; + } + member = child; + } + break; + case DW_TAG_template_type_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_type_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + case DW_TAG_template_value_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_value_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + default: + break; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + /* + * Flexible array members are only allowed as the last member of a + * structure with at least one other member. + */ + if (member.addr) { + err = parse_member(dbinfo, module, &member, little_endian, + kind != DRGN_TYPE_UNION && + builder.members.size > 0, + &builder); + if (err) + goto err; + } + + err = drgn_compound_type_create(&builder, tag, size, !declaration, lang, + ret); + if (err) + goto err; + return NULL; + +err: + drgn_compound_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, + bool *is_signed) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator is missing DW_AT_const_value"); + } + struct drgn_error *err; + if (attr->form == DW_FORM_sdata || + attr->form == DW_FORM_implicit_const) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) + goto invalid; + err = drgn_enum_type_builder_add_signed(builder, name, + svalue); + /* + * GCC before 7.1 didn't include DW_AT_encoding for + * DW_TAG_enumeration_type DIEs, so we have to guess the sign + * for enum_compatible_type_fallback(). + */ + if (!err && svalue < 0) + *is_signed = true; + } else { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) + goto invalid; + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); + } + return err; + +invalid: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has invalid DW_AT_const_value"); +} + +/* + * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, + * so we have to fabricate the compatible type. + */ +static struct drgn_error * +enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); + } + enum drgn_byte_order byte_order; + dwarf_die_byte_order(die, false, &byte_order); + return drgn_int_type_create(dbinfo->prog, "", size, is_signed, + byte_order, lang, ret); +} + +static struct drgn_error * +drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_name"); + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_declaration"); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, + DW_TAG_enumeration_type, + tag, ret); + if (err != &drgn_not_found) + return err; + } + + if (declaration) { + return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, + ret); + } + + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, dbinfo->prog); + bool is_signed = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_enumerator) { + err = parse_enumerator(&child, &builder, &is_signed); + if (err) + goto err; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_type *compatible_type; + r = dwarf_type(die, &child); + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_type"); + goto err; + } else if (r) { + err = enum_compatible_type_fallback(dbinfo, die, is_signed, + lang, &compatible_type); + if (err) + goto err; + } else { + struct drgn_qualified_type qualified_compatible_type; + err = drgn_type_from_dwarf(dbinfo, module, &child, + &qualified_compatible_type); + if (err) + goto err; + compatible_type = + drgn_underlying_type(qualified_compatible_type.type); + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); + goto err; + } + } + + err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_enum_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_typedef has missing or invalid DW_AT_name"); + } + + struct drgn_qualified_type aliased_type; + struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, + lang, true, + can_be_incomplete_array, + is_incomplete_array_ret, + &aliased_type); + if (err) + return err; + + return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, + ret); +} + +static struct drgn_error * +drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_qualified_type referenced_type; + struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, + lang, true, true, + NULL, + &referenced_type); + if (err) + return err; + + Dwarf_Attribute attr_mem, *attr; + uint64_t size; + if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_pointer_type has invalid DW_AT_byte_size"); + } + size = word; + } else { + uint8_t address_size; + err = drgn_program_address_size(dbinfo->prog, &address_size); + if (err) + return err; + size = address_size; + } + + /* + * The DWARF 5 specification doesn't mention DW_AT_endianity for + * DW_TAG_pointer_type DIEs, and GCC as of version 10.2 doesn't emit it + * even for pointers stored in the opposite byte order (e.g., when using + * scalar_storage_order), but it probably should. + */ + enum drgn_byte_order byte_order; + dwarf_die_byte_order(die, false, &byte_order); + return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, + byte_order, lang, ret); +} + +struct array_dimension { + uint64_t length; + bool is_complete; +}; + +DEFINE_VECTOR(array_dimension_vector, struct array_dimension) + +static struct drgn_error *subrange_length(Dwarf_Die *die, + struct array_dimension *dimension) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + Dwarf_Word word; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && + !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { + dimension->is_complete = false; + return NULL; + } + + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_subrange_type has invalid %s", + attr->code == DW_AT_upper_bound ? + "DW_AT_upper_bound" : + "DW_AT_count"); + } + + dimension->is_complete = true; + /* + * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array + * variables without an explicit size (e.g., `int arr[] = {};`). + */ + if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && + word == (Dwarf_Word)-1) { + dimension->length = 0; + } else if (attr->code == DW_AT_upper_bound) { + if (word >= UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_upper_bound is too large"); + } + dimension->length = (uint64_t)word + 1; + } else { + if (word > UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_count is too large"); + } + dimension->length = word; + } + return NULL; +} + +static struct drgn_error * +drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + struct drgn_error *err; + struct array_dimension_vector dimensions = VECTOR_INIT; + struct array_dimension *dimension; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_subrange_type) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) + goto out; + err = subrange_length(&child, dimension); + if (err) + goto out; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto out; + } + if (!dimensions.size) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) + goto out; + dimension->is_complete = false; + } + + struct drgn_qualified_type element_type; + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, false, false, + NULL, &element_type); + if (err) + goto out; + + *is_incomplete_array_ret = !dimensions.data[0].is_complete; + struct drgn_type *type; + do { + dimension = array_dimension_vector_pop(&dimensions); + if (dimension->is_complete) { + err = drgn_array_type_create(dbinfo->prog, element_type, + dimension->length, lang, + &type); + } else if (dimensions.size || !can_be_incomplete_array) { + err = drgn_array_type_create(dbinfo->prog, element_type, + 0, lang, &type); + } else { + err = drgn_incomplete_array_type_create(dbinfo->prog, + element_type, + lang, &type); + } + if (err) + goto out; + + element_type.type = type; + element_type.qualifiers = 0; + } while (dimensions.size); + + *ret = type; + err = NULL; +out: + array_dimension_vector_deinit(&dimensions); + return err; +} + +static struct drgn_error * +drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + false, true, NULL, + &qualified_type); + if (err) + return err; + + err = drgn_object_set_absent(res, qualified_type, 0); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +parse_formal_parameter(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + struct drgn_function_type_builder *builder) +{ + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_formal_parameter has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + struct drgn_dwarf_die_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + + union drgn_lazy_object default_argument; + drgn_lazy_object_init_thunk(&default_argument, dbinfo->prog, + drgn_dwarf_formal_parameter_thunk_fn, + thunk_arg); + + struct drgn_error *err = + drgn_function_type_builder_add_parameter(builder, + &default_argument, + name); + if (err) + drgn_lazy_object_deinit(&default_argument); + return err; +} + +static struct drgn_error * +drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, dbinfo->prog); + bool is_variadic = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + switch (dwarf_tag(&child)) { + case DW_TAG_formal_parameter: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", + dwarf_tag_str(die, + tag_buf)); + goto err; + } + err = parse_formal_parameter(dbinfo, module, &child, + &builder); + if (err) + goto err; + break; + case DW_TAG_unspecified_parameters: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has multiple DW_TAG_unspecified_parameters children", + dwarf_tag_str(die, + tag_buf)); + goto err; + } + is_variadic = true; + break; + case DW_TAG_template_type_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_type_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + case DW_TAG_template_value_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_value_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + default: + break; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_qualified_type return_type; + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, true, + NULL, &return_type); + if (err) + goto err; + + err = drgn_function_type_create(&builder, return_type, is_variadic, + lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_function_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + if (dbinfo->dwarf.depth >= 1000) { + return drgn_error_create(DRGN_ERROR_RECURSION, + "maximum DWARF type parsing depth exceeded"); + } + + /* If the DIE has a type unit signature, follow it. */ + Dwarf_Die definition_die; + { + Dwarf_Attribute attr_mem, *attr; + if ((attr = dwarf_attr_integrate(die, DW_AT_signature, + &attr_mem))) { + if (!dwarf_formref_die(attr, &definition_die)) + return drgn_error_libdw(); + die = &definition_die; + } + } + + /* If we got a declaration, try to find the definition. */ + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) + return drgn_error_libdw(); + if (declaration) { + uintptr_t die_addr; + if (drgn_dwarf_index_find_definition(&dbinfo->dwarf.index, + (uintptr_t)die->addr, + &module, &die_addr)) { + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, + &bias); + if (!dwarf) + return drgn_error_libdwfl(); + uintptr_t start = + (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; + size_t size = + module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die_addr >= start && die_addr < start + size) { + if (!dwarf_offdie(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + /* Assume .debug_types */ + if (!dwarf_offdie_types(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } + die = &definition_die; + } + } + + struct drgn_dwarf_type_map_entry entry = { + .key = die->addr, + }; + struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); + struct drgn_dwarf_type_map_iterator it = + drgn_dwarf_type_map_search_hashed(&dbinfo->dwarf.types, + &entry.key, hp); + if (it.entry) { + if (!can_be_incomplete_array && + it.entry->value.is_incomplete_array) { + it = drgn_dwarf_type_map_search_hashed(&dbinfo->dwarf.cant_be_incomplete_array_types, + &entry.key, hp); + } + if (it.entry) { + ret->type = it.entry->value.type; + ret->qualifiers = it.entry->value.qualifiers; + return NULL; + } + } + + const struct drgn_language *lang; + struct drgn_error *err = drgn_language_from_die(die, true, &lang); + if (err) + return err; + + ret->qualifiers = 0; + dbinfo->dwarf.depth++; + entry.value.is_incomplete_array = false; + switch (dwarf_tag(die)) { + case DW_TAG_const_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_CONST; + break; + case DW_TAG_restrict_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; + break; + case DW_TAG_volatile_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; + break; + case DW_TAG_atomic_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; + break; + case DW_TAG_base_type: + err = drgn_base_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_structure_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_STRUCT, + &ret->type); + break; + case DW_TAG_union_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_UNION, + &ret->type); + break; + case DW_TAG_class_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_CLASS, + &ret->type); + break; + case DW_TAG_enumeration_type: + err = drgn_enum_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_typedef: + err = drgn_typedef_type_from_dwarf(dbinfo, module, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_pointer_type: + err = drgn_pointer_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_array_type: + err = drgn_array_type_from_dwarf(dbinfo, module, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_subroutine_type: + case DW_TAG_subprogram: + err = drgn_function_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + default: + err = drgn_error_format(DRGN_ERROR_OTHER, + "unknown DWARF type tag 0x%x", + dwarf_tag(die)); + break; + } + dbinfo->dwarf.depth--; + if (err) + return err; + + entry.value.type = ret->type; + entry.value.qualifiers = ret->qualifiers; + struct drgn_dwarf_type_map *map; + if (!can_be_incomplete_array && entry.value.is_incomplete_array) + map = &dbinfo->dwarf.cant_be_incomplete_array_types; + else + map = &dbinfo->dwarf.types; + if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { + /* + * This will "leak" the type we created, but it'll still be + * cleaned up when the program is freed. + */ + return &drgn_enomem; + } + if (is_incomplete_array_ret) + *is_incomplete_array_ret = entry.value.is_incomplete_array; + return NULL; +} + +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + uint64_t tag; + switch (kind) { + case DRGN_TYPE_INT: + case DRGN_TYPE_BOOL: + case DRGN_TYPE_FLOAT: + tag = DW_TAG_base_type; + break; + case DRGN_TYPE_STRUCT: + tag = DW_TAG_structure_type; + break; + case DRGN_TYPE_UNION: + tag = DW_TAG_union_type; + break; + case DRGN_TYPE_CLASS: + tag = DW_TAG_class_type; + break; + case DRGN_TYPE_ENUM: + tag = DW_TAG_enumeration_type; + break; + case DRGN_TYPE_TYPEDEF: + tag = DW_TAG_typedef; + break; + default: + UNREACHABLE(); + } + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, + name, name_len, &tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + if (die_matches_filename(&die, filename)) { + err = drgn_type_from_dwarf(dbinfo, index_die->module, + &die, ret); + if (err) + return err; + /* + * For DW_TAG_base_type, we need to check that the type + * we found was the right kind. + */ + if (drgn_type_kind(ret->type) == kind) + return NULL; + } + } + return &drgn_not_found; +} + +struct drgn_error * +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + struct drgn_dwarf_index_namespace *ns = &dbinfo->dwarf.index.global; + if (name_len >= 2 && memcmp(name, "::", 2) == 0) { + /* Explicit global namespace. */ + name_len -= 2; + name += 2; + } + const char *colons; + while ((colons = memmem(name, name_len, "::", 2))) { + struct drgn_dwarf_index_iterator it; + uint64_t ns_tag = DW_TAG_namespace; + err = drgn_dwarf_index_iterator_init(&it, ns, name, + colons - name, &ns_tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + ns = index_die->namespace; + name_len -= colons + 2 - name; + name = colons + 2; + } + + uint64_t tags[3]; + size_t num_tags = 0; + if (flags & DRGN_FIND_OBJECT_CONSTANT) + tags[num_tags++] = DW_TAG_enumerator; + if (flags & DRGN_FIND_OBJECT_FUNCTION) + tags[num_tags++] = DW_TAG_subprogram; + if (flags & DRGN_FIND_OBJECT_VARIABLE) + tags[num_tags++] = DW_TAG_variable; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, ns, name, name_len, tags, + num_tags); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + if (!die_matches_filename(&die, filename)) + continue; + if (dwarf_tag(&die) == DW_TAG_enumeration_type) { + return drgn_object_from_dwarf_enumerator(dbinfo, + index_die->module, + &die, name, + ret); + } else { + return drgn_object_from_dwarf(dbinfo, index_die->module, + &die, NULL, NULL, NULL, + ret); + } + } + return &drgn_not_found; +} + +/* + * Call frame information. + */ + +struct drgn_dwarf_cie { + /* Whether this CIE is from .eh_frame. */ + bool is_eh; + /* Size of an address in this CIE in bytes. */ + uint8_t address_size; + /* DW_EH_PE_* encoding of addresses in this CIE. */ + uint8_t address_encoding; + /* Whether this CIE has a 'z' augmentation. */ + bool have_augmentation_length; + /* Whether this CIE is for a signal handler ('S' augmentation). */ + bool signal_frame; + drgn_register_number return_address_register; + uint64_t code_alignment_factor; + int64_t data_alignment_factor; + const char *initial_instructions; + size_t initial_instructions_size; +}; + +DEFINE_VECTOR(drgn_dwarf_fde_vector, struct drgn_dwarf_fde) +DEFINE_VECTOR(drgn_dwarf_cie_vector, struct drgn_dwarf_cie) +DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, + scalar_key_eq) + +static struct drgn_error * +drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, + uint8_t address_size, uint8_t encoding, + uint64_t func_addr, uint64_t *ret) +{ + struct drgn_error *err; + + /* Not currently used for CFI. */ + if (encoding & DW_EH_PE_indirect) { +unknown_fde_encoding: + return binary_buffer_error(&buffer->bb, + "unknown EH encoding %#" PRIx8, + encoding); + } + + size_t pos = (buffer->bb.pos - + (char *)buffer->module->scn_data[buffer->scn]->d_buf); + uint64_t base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + case DW_EH_PE_pcrel: + base = buffer->module->dwarf.pcrel_base + pos; + break; + case DW_EH_PE_textrel: + base = buffer->module->dwarf.textrel_base; + break; + case DW_EH_PE_datarel: + base = buffer->module->dwarf.datarel_base; + break; + case DW_EH_PE_funcrel: + /* Relative to the FDE's initial location. */ + base = func_addr; + break; + case DW_EH_PE_aligned: + base = 0; + if (pos % address_size != 0 && + (err = binary_buffer_skip(&buffer->bb, + address_size - pos % address_size))) + return err; + break; + default: + goto unknown_fde_encoding; + } + + uint64_t offset; + switch (encoding & 0xf) { + case DW_EH_PE_absptr: + if ((err = binary_buffer_next_uint(&buffer->bb, address_size, + &offset))) + return err; + break; + case DW_EH_PE_uleb128: + if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) + return err; + break; + case DW_EH_PE_udata2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_udata4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_udata8: + if ((err = binary_buffer_next_u64(&buffer->bb, &offset))) + return err; + break; + case DW_EH_PE_sleb128: + if ((err = binary_buffer_next_sleb128_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata2: + if ((err = binary_buffer_next_s16_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata4: + if ((err = binary_buffer_next_s32_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata8: + if ((err = binary_buffer_next_s64_into_u64(&buffer->bb, + &offset))) + return err; + break; + default: + goto unknown_fde_encoding; + } + *ret = (base + offset) & uint_max(address_size); + + return NULL; +} + +static struct drgn_error * +drgn_parse_dwarf_cie(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, size_t cie_pointer, + struct drgn_dwarf_cie *cie) +{ + bool is_eh = scn == DRGN_SCN_EH_FRAME; + struct drgn_error *err; + + cie->is_eh = is_eh; + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + buffer.bb.pos += cie_pointer; + + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + return err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + uint64_t length; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &length))) + return err; + } else { + length = tmp; + } + if (length > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "entry length is out of bounds"); + } + buffer.bb.end = buffer.bb.pos + length; + + uint64_t cie_id, expected_cie_id; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &cie_id))) + return err; + expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &cie_id))) + return err; + expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffff); + } + if (cie_id != expected_cie_id) + return binary_buffer_error(&buffer.bb, "invalid CIE ID"); + + uint8_t version; + if ((err = binary_buffer_next_u8(&buffer.bb, &version))) + return err; + if (version < 1 || version == 2 || version > 4) { + return binary_buffer_error(&buffer.bb, + "unknown CIE version %" PRIu8, + version); + } + + const char *augmentation; + size_t augmentation_len; + if ((err = binary_buffer_next_string(&buffer.bb, &augmentation, + &augmentation_len))) + return err; + cie->have_augmentation_length = augmentation[0] == 'z'; + cie->signal_frame = false; + for (size_t i = 0; i < augmentation_len; i++) { + switch (augmentation[i]) { + case 'z': + if (i != 0) + goto unknown_augmentation; + break; + case 'L': + case 'P': + case 'R': + if (augmentation[0] != 'z') + goto unknown_augmentation; + break; + case 'S': + cie->signal_frame = true; + break; + default: +unknown_augmentation: + /* + * We could ignore this CIE and all FDEs that reference + * it or skip the augmentation if we have its length, + * but let's fail loudly so that we find out about + * missing support. + */ + return binary_buffer_error_at(&buffer.bb, + &augmentation[i], + "unknown CFI augmentation %s", + augmentation); + } + } + + if (version >= 4) { + if ((err = binary_buffer_next_u8(&buffer.bb, + &cie->address_size))) + return err; + if (cie->address_size < 1 || cie->address_size > 8) { + return binary_buffer_error(&buffer.bb, + "unsupported address size %" PRIu8, + cie->address_size); + } + uint8_t segment_selector_size; + if ((err = binary_buffer_next_u8(&buffer.bb, + &segment_selector_size))) + return err; + if (segment_selector_size) { + return binary_buffer_error(&buffer.bb, + "unsupported segment selector size %" PRIu8, + segment_selector_size); + } + } else { + cie->address_size = + drgn_platform_address_size(&module->platform); + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &cie->code_alignment_factor)) || + (err = binary_buffer_next_sleb128(&buffer.bb, + &cie->data_alignment_factor))) + return err; + uint64_t return_address_register; + if (version >= 3) { + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &return_address_register))) + return err; + } else { + if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, + &return_address_register))) + return err; + } + cie->return_address_register = + module->platform.arch->dwarf_regno_to_internal(return_address_register); + if (cie->return_address_register == DRGN_REGISTER_NUMBER_UNKNOWN) { + return binary_buffer_error(&buffer.bb, + "unknown return address register"); + } + cie->address_encoding = DW_EH_PE_absptr; + if (augmentation[0] == 'z') { + for (size_t i = 0; i < augmentation_len; i++) { + switch (augmentation[i]) { + case 'z': + if ((err = binary_buffer_skip_leb128(&buffer.bb))) + return err; + break; + case 'L': + if ((err = binary_buffer_skip(&buffer.bb, 1))) + return err; + break; + case 'P': { + uint8_t encoding; + if ((err = binary_buffer_next_u8(&buffer.bb, &encoding))) + return err; + /* + * We don't need the result, so don't bother + * dereferencing. + */ + encoding &= ~DW_EH_PE_indirect; + uint64_t unused; + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + encoding, + 0, + &unused))) + return err; + break; + } + case 'R': + if ((err = binary_buffer_next_u8(&buffer.bb, + &cie->address_encoding))) + return err; + break; + } + } + } + cie->initial_instructions = buffer.bb.pos; + cie->initial_instructions_size = buffer.bb.end - buffer.bb.pos; + return NULL; +} + +static struct drgn_error * +drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, + struct drgn_dwarf_cie_vector *cies, + struct drgn_dwarf_fde_vector *fdes) +{ + bool is_eh = scn == DRGN_SCN_EH_FRAME; + struct drgn_error *err; + + if (!module->scns[scn]) + return NULL; + err = drgn_debug_info_module_cache_section(module, scn); + if (err) + return err; + Elf_Data *data = module->scn_data[scn]; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + + struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; + while (binary_buffer_has_next(&buffer.bb)) { + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + goto out; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + uint64_t length; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &length))) + goto out; + } else { + length = tmp; + } + /* + * Technically, a length of zero is only a terminator in + * .eh_frame, but other consumers (binutils, elfutils, GDB) + * handle it the same way in .debug_frame. + */ + if (length == 0) + break; + if (length > buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "entry length is out of bounds"); + goto out; + } + buffer.bb.end = buffer.bb.pos + length; + + /* + * The Linux Standard Base Core Specification [1] states that + * the CIE ID in .eh_frame is always 4 bytes. However, other + * consumers handle it the same as in .debug_frame (8 bytes for + * the 64-bit format). + * + * 1: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + */ + uint64_t cie_pointer, cie_id; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, + &cie_pointer))) + goto out; + cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &cie_pointer))) + goto out; + cie_id = is_eh ? 0 : UINT64_C(0xffffffff); + } + + if (cie_pointer != cie_id) { + if (is_eh) { + size_t pointer_offset = + (buffer.bb.pos + - (is_64_bit ? 8 : 4) + - (char *)data->d_buf); + if (cie_pointer > pointer_offset) { + err = binary_buffer_error(&buffer.bb, + "CIE pointer is out of bounds"); + goto out; + } + cie_pointer = pointer_offset - cie_pointer; + } else if (cie_pointer > data->d_size) { + err = binary_buffer_error(&buffer.bb, + "CIE pointer is out of bounds"); + goto out; + } + struct drgn_dwarf_fde *fde = + drgn_dwarf_fde_vector_append_entry(fdes); + if (!fde) { + err = &drgn_enomem; + goto out; + } + struct drgn_dwarf_cie_map_entry entry = { + .key = cie_pointer, + .value = cies->size, + }; + struct drgn_dwarf_cie_map_iterator it; + int r = drgn_dwarf_cie_map_insert(&cie_map, &entry, + &it); + struct drgn_dwarf_cie *cie; + if (r > 0) { + cie = drgn_dwarf_cie_vector_append_entry(cies); + if (!cie) { + err = &drgn_enomem; + goto out; + } + err = drgn_parse_dwarf_cie(module, scn, + cie_pointer, cie); + if (err) + goto out; + } else if (r == 0) { + cie = &cies->data[it.entry->value]; + } else { + err = &drgn_enomem; + goto out; + } + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding, + 0, + &fde->initial_location)) || + (err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding & 0xf, + 0, + &fde->address_range))) + goto out; + if (cie->have_augmentation_length) { + uint64_t augmentation_length; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &augmentation_length))) + goto out; + if (augmentation_length > + buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "augmentation length is out of bounds"); + goto out; + } + buffer.bb.pos += augmentation_length; + } + fde->cie = it.entry->value; + fde->instructions = buffer.bb.pos; + fde->instructions_size = buffer.bb.end - buffer.bb.pos; + } + + buffer.bb.pos = buffer.bb.end; + buffer.bb.end = (const char *)data->d_buf + data->d_size; + } + + err = NULL; +out: + drgn_dwarf_cie_map_deinit(&cie_map); + return err; +} + +static void drgn_debug_info_cache_sh_addr(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, + uint64_t *addr) +{ + if (module->scns[scn]) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(module->scns[scn], &shdr_mem); + if (shdr) + *addr = shdr->sh_addr; + } +} + +static int drgn_dwarf_fde_compar(const void *_a, const void *_b, void *arg) +{ + const struct drgn_dwarf_fde *a = _a; + const struct drgn_dwarf_fde *b = _b; + const struct drgn_dwarf_cie *cies = arg; + if (a->initial_location < b->initial_location) + return -1; + else if (a->initial_location > b->initial_location) + return 1; + else + return cies[a->cie].is_eh - cies[b->cie].is_eh; +} + +static struct drgn_error * +drgn_debug_info_parse_frames(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_EH_FRAME, + &module->dwarf.pcrel_base); + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_TEXT, + &module->dwarf.textrel_base); + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_GOT, + &module->dwarf.datarel_base); + + struct drgn_dwarf_cie_vector cies = VECTOR_INIT; + struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; + + err = drgn_parse_dwarf_frames(module, DRGN_SCN_DEBUG_FRAME, &cies, + &fdes); + if (err) + goto err; + err = drgn_parse_dwarf_frames(module, DRGN_SCN_EH_FRAME, &cies, &fdes); + if (err) + goto err; + + drgn_dwarf_cie_vector_shrink_to_fit(&cies); + + /* + * Sort FDEs and remove duplicates, preferring .debug_frame over + * .eh_frame. + */ + qsort_r(fdes.data, fdes.size, sizeof(fdes.data[0]), + drgn_dwarf_fde_compar, cies.data); + if (fdes.size > 0) { + size_t src = 1, dst = 1; + for (; src < fdes.size; src++) { + if (fdes.data[src].initial_location != + fdes.data[dst - 1].initial_location) { + if (src != dst) + fdes.data[dst] = fdes.data[src]; + dst++; + } + } + fdes.size = dst; + } + drgn_dwarf_fde_vector_shrink_to_fit(&fdes); + + module->dwarf.cies = cies.data; + module->dwarf.fdes = fdes.data; + module->dwarf.num_fdes = fdes.size; + return NULL; + +err: + drgn_dwarf_fde_vector_deinit(&fdes); + drgn_dwarf_cie_vector_deinit(&cies); + return err; +} + +static struct drgn_error * +drgn_debug_info_find_fde(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, struct drgn_dwarf_fde **ret) +{ + struct drgn_error *err; + + if (!module->parsed_frames) { + err = drgn_debug_info_parse_frames(module); + if (err) + return err; + module->parsed_frames = true; + } + + /* Binary search for the containing FDE. */ + size_t lo = 0, hi = module->dwarf.num_fdes; + while (lo < hi) { + size_t mid = lo + (hi - lo) / 2; + struct drgn_dwarf_fde *fde = &module->dwarf.fdes[mid]; + if (unbiased_pc < fde->initial_location) { + hi = mid; + } else if (unbiased_pc - fde->initial_location >= + fde->address_range) { + lo = mid + 1; + } else { + *ret = fde; + return NULL; + } + } + *ret = NULL; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset(struct drgn_debug_info_buffer *buffer, int64_t *ret) +{ + struct drgn_error *err; + uint64_t offset; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) + return err; + if (offset > INT64_MAX) + return binary_buffer_error(&buffer->bb, "offset is too large"); + *ret = offset; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset_sf(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_cie *cie, int64_t *ret) +{ + struct drgn_error *err; + int64_t factored; + if ((err = binary_buffer_next_sleb128(&buffer->bb, &factored))) + return err; + if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) + return binary_buffer_error(&buffer->bb, "offset is too large"); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset_f(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_cie *cie, int64_t *ret) +{ + struct drgn_error *err; + uint64_t factored; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &factored))) + return err; + if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) + return binary_buffer_error(&buffer->bb, "offset is too large"); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_block(struct drgn_debug_info_buffer *buffer, + const char **buf_ret, size_t *size_ret) +{ + struct drgn_error *err; + uint64_t size; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &size))) + return err; + if (size > buffer->bb.end - buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "block is out of bounds"); + } + *buf_ret = buffer->bb.pos; + buffer->bb.pos += size; + *size_ret = size; + return NULL; +} + +DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) + +static struct drgn_error * +drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, + struct drgn_dwarf_fde *fde, + const struct drgn_cfi_row *initial_row, uint64_t target, + const char *instructions, size_t instructions_size, + struct drgn_cfi_row **row) +{ + struct drgn_error *err; + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + struct drgn_dwarf_cie *cie = &module->dwarf.cies[fde->cie]; + uint64_t pc = fde->initial_location; + + struct drgn_cfi_row_vector state_stack = VECTOR_INIT; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, + cie->is_eh ? + DRGN_SCN_EH_FRAME : DRGN_SCN_DEBUG_FRAME); + buffer.bb.pos = instructions; + buffer.bb.end = instructions + instructions_size; + while (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + drgn_register_number regno; + struct drgn_cfi_rule rule; + uint64_t tmp; + switch ((opcode & 0xc0) ? (opcode & 0xc0) : opcode) { + case DW_CFA_set_loc: + if (!initial_row) + goto invalid_for_initial; + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding, + fde->initial_location, + &tmp))) + goto out; + if (tmp <= pc) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_set_loc location is not greater than current location"); + goto out; + } + pc = tmp; + if (pc > target) + goto found; + break; + case DW_CFA_advance_loc: + if (!initial_row) + goto invalid_for_initial; + tmp = opcode & 0x3f; + goto advance_loc; + case DW_CFA_advance_loc1: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, + &tmp))) + goto out; + goto advance_loc; + case DW_CFA_advance_loc2: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u16_into_u64(&buffer.bb, + &tmp))) + goto out; + goto advance_loc; + case DW_CFA_advance_loc4: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &tmp))) + goto out; +advance_loc: + if (__builtin_mul_overflow(tmp, + cie->code_alignment_factor, + &tmp) || + __builtin_add_overflow(pc, tmp, &pc) || + pc > uint_max(cie->address_size)) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_CFA_advance_loc* overflows location"); + goto out; + } + if (pc > target) + goto found; + break; + case DW_CFA_def_cfa: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset(&buffer, &rule.offset))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_sf: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_register: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_register with incompatible CFA rule"); + goto out; + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_offset: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_offset with incompatible CFA rule"); + goto out; + } + if ((err = drgn_dwarf_cfi_next_offset(&buffer, + &rule.offset))) + goto out; + goto set_cfa; + case DW_CFA_def_cfa_offset_sf: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_offset_sf with incompatible CFA rule"); + goto out; + } + if ((err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + goto set_cfa; + case DW_CFA_def_cfa_expression: + rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; + rule.push_cfa = false; + if ((err = drgn_dwarf_cfi_next_block(&buffer, + &rule.expr, + &rule.expr_size))) + goto out; +set_cfa: + if (!drgn_cfi_row_set_cfa(row, &rule)) { + err = &drgn_enomem; + goto out; + } + break; + case DW_CFA_undefined: + rule.kind = DRGN_CFI_RULE_UNDEFINED; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_same_value: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.offset = 0; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + rule.regno = regno; + goto set_reg; + case DW_CFA_offset: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + if ((err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(opcode & 0x3f)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_offset_extended: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + goto reg_offset_f; + case DW_CFA_offset_extended_sf: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + goto reg_offset_sf; + case DW_CFA_val_offset: + rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; +reg_offset_f: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_val_offset_sf: + rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; +reg_offset_sf: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_register: { + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.offset = 0; + uint64_t dwarf_regno2; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno2))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno2)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_reg; + } + case DW_CFA_expression: + rule.kind = DRGN_CFI_RULE_AT_DWARF_EXPRESSION; + goto reg_expression; + case DW_CFA_val_expression: + rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; +reg_expression: + rule.push_cfa = true; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_block(&buffer, + &rule.expr, + &rule.expr_size))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_restore: + if (!initial_row) + goto invalid_for_initial; + dwarf_regno = opcode & 0x3f; + goto restore; + case DW_CFA_restore_extended: + if (!initial_row) { +invalid_for_initial: + err = binary_buffer_error(&buffer.bb, + "invalid initial DWARF CFI opcode %#" PRIx8, + opcode); + goto out; + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +restore: + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + drgn_cfi_row_get_register(initial_row, regno, &rule); +set_reg: + if (!drgn_cfi_row_set_register(row, regno, &rule)) { + err = &drgn_enomem; + goto out; + } + break; + case DW_CFA_remember_state: { + struct drgn_cfi_row **state = + drgn_cfi_row_vector_append_entry(&state_stack); + if (!state) { + err = &drgn_enomem; + goto out; + } + *state = drgn_empty_cfi_row; + if (!drgn_cfi_row_copy(state, *row)) { + err = &drgn_enomem; + goto out; + } + break; + } + case DW_CFA_restore_state: + if (state_stack.size == 0) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_restore_state with empty state stack"); + goto out; + } + drgn_cfi_row_destroy(*row); + *row = state_stack.data[--state_stack.size]; + break; + case DW_CFA_nop: + break; + default: + err = binary_buffer_error(&buffer.bb, + "unknown DWARF CFI opcode %#" PRIx8, + opcode); + goto out; + } + } +found: + err = NULL; +out: + for (size_t i = 0; i < state_stack.size; i++) + drgn_cfi_row_destroy(state_stack.data[i]); + drgn_cfi_row_vector_deinit(&state_stack); + return err; +} + +static struct drgn_error * +drgn_debug_info_find_cfi_in_fde(struct drgn_debug_info_module *module, + struct drgn_dwarf_fde *fde, + uint64_t unbiased_pc, struct drgn_cfi_row **ret) +{ + struct drgn_error *err; + struct drgn_dwarf_cie *cie = &module->dwarf.cies[fde->cie]; + struct drgn_cfi_row *initial_row = + (struct drgn_cfi_row *)module->platform.arch->default_dwarf_cfi_row; + err = drgn_eval_dwarf_cfi(module, fde, NULL, unbiased_pc, + cie->initial_instructions, + cie->initial_instructions_size, &initial_row); + if (err) + goto out; + if (!drgn_cfi_row_copy(ret, initial_row)) { + err = &drgn_enomem; + goto out; + } + err = drgn_eval_dwarf_cfi(module, fde, initial_row, unbiased_pc, + fde->instructions, fde->instructions_size, + ret); +out: + drgn_cfi_row_destroy(initial_row); + return err; +} + +struct drgn_error * +drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret) +{ + struct drgn_error *err; + struct drgn_dwarf_fde *fde; + err = drgn_debug_info_find_fde(module, unbiased_pc, &fde); + if (err) + return err; + if (!fde) + return &drgn_not_found; + err = drgn_debug_info_find_cfi_in_fde(module, fde, unbiased_pc, + row_ret); + if (err) + return err; + *interrupted_ret = module->dwarf.cies[fde->cie].signal_frame; + *ret_addr_regno_ret = + module->dwarf.cies[fde->cie].return_address_register; + return NULL; +} + +struct drgn_error * +drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, + const struct drgn_cfi_rule *rule, + const struct drgn_register_state *regs, + void *buf, size_t size) +{ + struct drgn_error *err; + struct uint64_vector stack = VECTOR_INIT; + + if (rule->push_cfa) { + struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); + if (!cfa.has_value) { + err = &drgn_not_found; + goto out; + } + if (!uint64_vector_append(&stack, &cfa.value)) { + err = &drgn_enomem; + goto out; + } + } + + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_context ctx; + drgn_dwarf_expression_context_init(&ctx, prog, regs->module, NULL, NULL, + regs, rule->expr, rule->expr_size); + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err) + goto out; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + err = binary_buffer_next_u8(&ctx.bb, &opcode); + if (!err) { + err = binary_buffer_error(&ctx.bb, + "invalid opcode %#" PRIx8 " for CFI expression", + opcode); + } + goto out; + } + if (stack.size == 0) { + err = &drgn_not_found; + } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { + err = drgn_program_read_memory(prog, buf, + stack.data[stack.size - 1], size, + false); + } else { + copy_lsbytes(buf, size, + drgn_platform_is_little_endian(&prog->platform), + &stack.data[stack.size - 1], sizeof(uint64_t), + HOST_LITTLE_ENDIAN); + err = NULL; + } + +out: + uint64_vector_deinit(&stack); + return err; +} diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h new file mode 100644 index 000000000..e1a3c1fa6 --- /dev/null +++ b/libdrgn/dwarf_info.h @@ -0,0 +1,194 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * DWARF and .eh_frame support. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_DEBUG_INFO_DWARF_H +#define DRGN_DEBUG_INFO_DWARF_H + +/** + * @ingroup DebugInfo + * + * @{ + */ + +#include + +#include "cfi.h" +#include "drgn.h" +#include "dwarf_index.h" +#include "hash_table.h" + +struct drgn_debug_info; +struct drgn_debug_info_module; +struct drgn_register_state; + +/** DWARF Frame Description Entry. */ +struct drgn_dwarf_fde { + uint64_t initial_location; + uint64_t address_range; + /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ + size_t cie; + const char *instructions; + size_t instructions_size; +}; + +/** DWARF debugging information for a @ref drgn_debug_info_module. */ +struct drgn_dwarf_module_info { + /** Base for `DW_EH_PE_pcrel`. */ + uint64_t pcrel_base; + /** Base for `DW_EH_PE_textrel`. */ + uint64_t textrel_base; + /** Base for `DW_EH_PE_datarel`. */ + uint64_t datarel_base; + /** Array of DWARF Common Information Entries. */ + struct drgn_dwarf_cie *cies; + /** + * Array of DWARF Frame Description Entries sorted by initial_location. + */ + struct drgn_dwarf_fde *fdes; + /** Number of elements in @ref drgn_debug_info_module::fdes. */ + size_t num_fdes; +}; + +void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module); + +/** Cached type in a @ref drgn_debug_info. */ +struct drgn_dwarf_type { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + /** + * Whether this is an incomplete array type or a typedef of one. + * + * This is used to work around a GCC bug; see @ref + * drgn_type_from_dwarf_internal(). + */ + bool is_incomplete_array; +}; + +DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type) + +/** DWARF debugging information for a program/@ref drgn_debug_info. */ +struct drgn_dwarf_info { + /** Index of DWARF debugging information. */ + struct drgn_dwarf_index index; + + /** + * Cache of parsed types. + * + * The key is the address of the DIE (@c Dwarf_Die::addr). The value is + * a @ref drgn_dwarf_type. + */ + struct drgn_dwarf_type_map types; + /** + * Cache of parsed types which appear to be incomplete array types but + * can't be. + * + * See @ref drgn_type_from_dwarf_internal(). + */ + struct drgn_dwarf_type_map cant_be_incomplete_array_types; + + /** Current parsing recursion depth. */ + int depth; +}; + +void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo); +void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo); + +/** + * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing + * a given program counter. + * + * @param[in] module Module containing @p pc. + * @param[in] pc Program counter. + * @param[out] bias_ret Returned difference between addresses in the loaded + * module and addresses in the returned DIEs. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the + * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent + * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its + * grandparent, etc. Must be freed with @c free(). + * @param[out] length_ret Returned length of @p dies_ret. + */ +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(1, 3, 4, 5))); + +/** + * Find the ancestors of a DWARF DIE. + * + * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. + * + * @param[in] module Module containing @p die. + * @param[in] die DIE to find. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, + * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * @param[out] length_ret Returned number of ancestors in @p dies_ret. + */ +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(2, 3))); + +/** + * Find an object DIE in an array of DWARF scopes. + * + * @param[in] scopes Array of scopes, from outermost to innermost. + * @param[in] num_scopes Number of scopes in @p scopes. + * @param[out] die_ret Returned object DIE. + * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. + * Otherwise, undefined. + */ +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret); + +/** + * Create a @ref drgn_object from a `Dwarf_Die`. + * + * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, + * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, + * `DW_TAG_template_value_parameter`). + * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` + * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be + * its parent. + * @param[in] function_die DIE of current function. @c NULL if not in function + * context. + * @param[in] regs Registers of current stack frame. @c NULL if not in stack + * frame context. + * @param[out] ret Returned object. + */ +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret); + +struct drgn_error * +drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret); + +struct drgn_error * +drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, + const struct drgn_cfi_rule *rule, + const struct drgn_register_state *regs, + void *buf, size_t size); + +/** @} */ + +#endif /* DRGN_DEBUG_INFO_DWARF_H */ diff --git a/libdrgn/language.c b/libdrgn/language.c index 3e5086228..6cdc36dd1 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -1,9 +1,6 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -#include - -#include "error.h" #include "language.h" const struct drgn_language drgn_languages[] = { @@ -62,29 +59,3 @@ const struct drgn_language drgn_languages[] = { .op_not = c_op_not, }, }; - -struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, - const struct drgn_language **ret) -{ - Dwarf_Die cudie; - if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) - return drgn_error_libdw(); - switch (dwarf_srclang(&cudie)) { - case DW_LANG_C: - case DW_LANG_C89: - case DW_LANG_C99: - case DW_LANG_C11: - *ret = &drgn_language_c; - break; - case DW_LANG_C_plus_plus: - case DW_LANG_C_plus_plus_03: - case DW_LANG_C_plus_plus_11: - case DW_LANG_C_plus_plus_14: - *ret = &drgn_language_cpp; - break; - default: - *ret = fall_back ? &drgn_default_language : NULL; - break; - } - return NULL; -} diff --git a/libdrgn/language.h b/libdrgn/language.h index 8bce2b5b4..b93620768 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -12,8 +12,6 @@ #ifndef DRGN_LANGUAGE_H #define DRGN_LANGUAGE_H -#include - #include "drgn.h" /** @@ -203,18 +201,6 @@ drgn_element_format_object_flags(enum drgn_format_object_flags flags) (flags & DRGN_FORMAT_OBJECT_ELEMENT_TYPE_NAMES) >> 2); } -/** - * Return the @ref drgn_language of the CU of the given DIE. - * - * @param[in] fall_back Whether to fall back if the language is not found or - * unknown. If @c true, @ref drgn_default_language is returned in this case. If - * @c false, @c NULL is returned. - * @param[out] ret Returned language. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, - const struct drgn_language **ret); - /** @} */ #endif /* DRGN_LANGUAGE_H */ diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 223ced13d..f3a9b489d 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -91,11 +91,11 @@ static size_t keep_orc_entry(struct drgn_debug_info_module *module, static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module, size_t *indices, size_t num_entries) { - if (module->num_fdes == 0) + if (module->dwarf.num_fdes == 0) return num_entries; - struct drgn_dwarf_fde *fde = module->fdes; - struct drgn_dwarf_fde *last_fde = &module->fdes[module->num_fdes - 1]; + struct drgn_dwarf_fde *fde = module->dwarf.fdes; + struct drgn_dwarf_fde *last_fde = fde + module->dwarf.num_fdes - 1; size_t new_num_entries = 0; diff --git a/libdrgn/program.c b/libdrgn/program.c index 90952f4e8..7ecf4e36d 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -17,9 +16,7 @@ #include #include -#include "array.h" #include "debug_info.h" -#include "dwarf_index.h" #include "error.h" #include "language.h" #include "linux_kernel.h" @@ -551,38 +548,18 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) } /* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) +static void drgn_program_set_language_from_main(struct drgn_program *prog) { struct drgn_error *err; - struct drgn_dwarf_index_iterator it; - static const uint64_t tags[] = { DW_TAG_subprogram }; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, - "main", strlen("main"), tags, - array_size(tags)); - if (err) { - drgn_error_destroy(err); - return; - } - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) { - drgn_error_destroy(err); - continue; - } - const struct drgn_language *lang; - err = drgn_language_from_die(&die, false, &lang); - if (err) { - drgn_error_destroy(err); - continue; - } - if (lang) { - dbinfo->prog->lang = lang; - break; - } - } + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + return; + const struct drgn_language *lang; + err = drgn_debug_info_main_language(prog->dbinfo, &lang); + if (err) + drgn_error_destroy(err); + if (lang) + prog->lang = lang; } static int drgn_set_platform_from_dwarf(Dwfl_Module *module, void **userdatap, @@ -639,9 +616,8 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { - if (!prog->lang && - !(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) - drgn_program_set_language_from_main(dbinfo); + if (!prog->lang) + drgn_program_set_language_from_main(prog); if (!prog->has_platform) { dwfl_getdwarf(dbinfo->dwfl, drgn_set_platform_from_dwarf, prog, 0); diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index be43f1601..50d6c4bcc 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -13,6 +13,7 @@ #include "cfi.h" #include "debug_info.h" #include "drgn.h" +#include "dwarf_info.h" #include "error.h" #include "helpers.h" #include "minmax.h" From c3f31e28f9193414e92c3b5fd1687ed2365c5739 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 12 Nov 2021 01:35:27 -0800 Subject: [PATCH 086/139] libdrgn: reorganize and move DWARF index into dwarf_info.c The upcoming introduction of a higher level data structure to represent a namespace has implications on the organization of the DWARF index and debug info management code. Basically, we're going to want to track what is currently known as struct drgn_dwarf_index_namespace as part of the new struct drgn_namespace. That only leaves the DWARF specification map and list of CUs in struct drgn_dwarf_index, which doesn't make much sense anymore. Instead, let's: * Move the specification map and CUs into struct drgn_dwarf_info. * Rename struct drgn_dwarf_index_namespace to struct drgn_namespace_dwarf_index to indicate that it is the "DWARF index for a namespace" rather than a "namespace of a DWARF index". * Move the DWARF index implementation into dwarf_info.c. The DWARF index and debugging information management have always been coupled, so this makes it more explicit and is more convenient. * Improve documentation and naming in the DWARF index implementation. Now, the only DWARF-specific code outside of dwarf_info.c is for stack tracing, but we'll leave that for another day. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 - libdrgn/debug_info.c | 16 +- libdrgn/dwarf_index.c | 2769 ------------------------------------ libdrgn/dwarf_index.h | 297 ---- libdrgn/dwarf_info.c | 3084 +++++++++++++++++++++++++++++++++++++++-- libdrgn/dwarf_info.h | 100 +- 6 files changed, 3108 insertions(+), 3160 deletions(-) delete mode 100644 libdrgn/dwarf_index.c delete mode 100644 libdrgn/dwarf_index.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index c761a2843..5efa66387 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -35,8 +35,6 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ cityhash.h \ debug_info.c \ debug_info.h \ - dwarf_index.c \ - dwarf_index.h \ dwarf_info.c \ dwarf_info.h \ error.c \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 4d01bc07d..99ea2c0d3 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -17,7 +17,6 @@ #include #include "debug_info.h" -#include "dwarf_index.h" #include "error.h" #include "linux_kernel.h" #include "program.h" @@ -948,7 +947,7 @@ drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, static struct drgn_error * drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, - struct drgn_dwarf_index_update_state *dindex_state, + struct drgn_dwarf_index_state *index, struct drgn_debug_info_module *head) { struct drgn_error *err; @@ -967,7 +966,7 @@ drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, continue; } module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - return drgn_dwarf_index_read_module(dindex_state, + return drgn_dwarf_index_read_module(index, module); } } @@ -1010,9 +1009,8 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) load->new_modules.size)) return &drgn_enomem; - struct drgn_dwarf_index_update_state dindex_state; - if (!drgn_dwarf_index_update_state_init(&dindex_state, - &dbinfo->dwarf.index)) + struct drgn_dwarf_index_state index; + if (!drgn_dwarf_index_state_init(&index, dbinfo)) return &drgn_enomem; struct drgn_error *err = NULL; #pragma omp parallel for schedule(dynamic) @@ -1020,7 +1018,7 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) if (err) continue; struct drgn_error *module_err = - drgn_debug_info_read_module(load, &dindex_state, + drgn_debug_info_read_module(load, &index, load->new_modules.data[i]); if (module_err) { #pragma omp critical(drgn_debug_info_update_index_error) @@ -1031,8 +1029,8 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) } } if (!err) - err = drgn_dwarf_index_update(&dindex_state); - drgn_dwarf_index_update_state_deinit(&dindex_state); + err = drgn_dwarf_info_update_index(&index); + drgn_dwarf_index_state_deinit(&index); if (!err) drgn_debug_info_free_modules(dbinfo, true, false); return err; diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c deleted file mode 100644 index 3fcdb8d68..000000000 --- a/libdrgn/dwarf_index.c +++ /dev/null @@ -1,2769 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0-or-later - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "array.h" -#include "binary_buffer.h" -#include "debug_info.h" -#include "drgn.h" -#include "dwarf_index.h" -#include "error.h" -#include "platform.h" -#include "util.h" - -static const size_t DRGN_DWARF_INDEX_SHARD_BITS = 8; -static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; - -struct drgn_dwarf_index_pending_cu { - struct drgn_debug_info_module *module; - const char *buf; - size_t len; - bool is_64_bit; - enum drgn_debug_info_scn scn; -}; - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) - -/* - * The DWARF abbreviation table gets translated into a series of instructions. - * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped - * over. The next few instructions mean that the corresponding attribute can be - * skipped over. The remaining instructions indicate that the corresponding - * attribute should be parsed. Finally, every sequence of instructions - * corresponding to a DIE is terminated by a zero byte followed by the DIE - * flags, which are a bitmask of flags combined with the DWARF tag (which may be - * set to zero if the tag is not of interest); see DIE_FLAG_*. - */ -enum { - INSN_MAX_SKIP = 199, - ATTRIB_BLOCK, - ATTRIB_BLOCK1, - ATTRIB_BLOCK2, - ATTRIB_BLOCK4, - ATTRIB_LEB128, - ATTRIB_STRING, - ATTRIB_SIBLING_REF1, - ATTRIB_SIBLING_REF2, - ATTRIB_SIBLING_REF4, - ATTRIB_SIBLING_REF8, - ATTRIB_SIBLING_REF_UDATA, - ATTRIB_NAME_STRP4, - ATTRIB_NAME_STRP8, - ATTRIB_NAME_STRING, - ATTRIB_NAME_STRX, - ATTRIB_NAME_STRX1, - ATTRIB_NAME_STRX2, - ATTRIB_NAME_STRX3, - ATTRIB_NAME_STRX4, - ATTRIB_COMP_DIR_STRP4, - ATTRIB_COMP_DIR_STRP8, - ATTRIB_COMP_DIR_LINE_STRP4, - ATTRIB_COMP_DIR_LINE_STRP8, - ATTRIB_COMP_DIR_STRING, - ATTRIB_COMP_DIR_STRX, - ATTRIB_COMP_DIR_STRX1, - ATTRIB_COMP_DIR_STRX2, - ATTRIB_COMP_DIR_STRX3, - ATTRIB_COMP_DIR_STRX4, - ATTRIB_STR_OFFSETS_BASE4, - ATTRIB_STR_OFFSETS_BASE8, - ATTRIB_STMT_LIST_LINEPTR4, - ATTRIB_STMT_LIST_LINEPTR8, - ATTRIB_DECL_FILE_DATA1, - ATTRIB_DECL_FILE_DATA2, - ATTRIB_DECL_FILE_DATA4, - ATTRIB_DECL_FILE_DATA8, - ATTRIB_DECL_FILE_UDATA, - /* - * This instruction is the only one with an operand: the ULEB128 - * implicit constant. - */ - ATTRIB_DECL_FILE_IMPLICIT, - ATTRIB_DECLARATION_FLAG, - ATTRIB_SPECIFICATION_REF1, - ATTRIB_SPECIFICATION_REF2, - ATTRIB_SPECIFICATION_REF4, - ATTRIB_SPECIFICATION_REF8, - ATTRIB_SPECIFICATION_REF_UDATA, - ATTRIB_SPECIFICATION_REF_ADDR4, - ATTRIB_SPECIFICATION_REF_ADDR8, - ATTRIB_INDIRECT, - ATTRIB_SIBLING_INDIRECT, - ATTRIB_NAME_INDIRECT, - ATTRIB_COMP_DIR_INDIRECT, - ATTRIB_STR_OFFSETS_BASE_INDIRECT, - ATTRIB_STMT_LIST_INDIRECT, - ATTRIB_DECL_FILE_INDIRECT, - ATTRIB_DECLARATION_INDIRECT, - ATTRIB_SPECIFICATION_INDIRECT, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_INDIRECT, -}; - -enum { - /* Mask of tags that we care about. */ - DIE_FLAG_TAG_MASK = 0x3f, - /* The remaining bits can be used for other purposes. */ - DIE_FLAG_DECLARATION = 0x40, - DIE_FLAG_CHILDREN = 0x80, -}; - -DEFINE_VECTOR(uint8_vector, uint8_t) -DEFINE_VECTOR(uint32_vector, uint32_t) -DEFINE_VECTOR(uint64_vector, uint64_t) - -/* - * Placeholder for drgn_dwarf_index_cu::file_name_hashes if the CU has no - * filenames. - */ -static const uint64_t no_file_name_hashes[1] = { 0 }; - -struct drgn_dwarf_index_cu { - struct drgn_debug_info_module *module; - const char *buf; - size_t len; - uint8_t version; - uint8_t unit_type; - uint8_t address_size; - bool is_64_bit; - enum drgn_debug_info_scn scn; - /* - * This is indexed on the DWARF abbreviation code minus one. It maps the - * abbreviation code to an index in abbrev_insns where the instruction - * stream for that code begins. - * - * Technically, abbreviation codes don't have to be sequential. In - * practice, GCC and Clang seem to always generate sequential codes - * starting at one, so we can get away with a flat array. - */ - uint32_t *abbrev_decls; - size_t num_abbrev_decls; - uint8_t *abbrev_insns; - uint64_t *file_name_hashes; - size_t num_file_names; - const char *str_offsets; -}; - -struct drgn_dwarf_index_cu_buffer { - struct binary_buffer bb; - struct drgn_dwarf_index_cu *cu; -}; - -static struct drgn_error * -drgn_dwarf_index_cu_buffer_error(struct binary_buffer *bb, const char *pos, - const char *message) -{ - struct drgn_dwarf_index_cu_buffer *buffer = - container_of(bb, struct drgn_dwarf_index_cu_buffer, bb); - return drgn_error_debug_info_scn(buffer->cu->module, - DRGN_SCN_DEBUG_INFO, pos, message); -} - -static void -drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, - struct drgn_dwarf_index_cu *cu) -{ - binary_buffer_init(&buffer->bb, cu->buf, cu->len, - drgn_platform_is_little_endian(&cu->module->platform), - drgn_dwarf_index_cu_buffer_error); - buffer->cu = cu; -} - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) - -/* DIE which needs to be indexed. */ -struct drgn_dwarf_index_pending_die { - /* Index of compilation unit containing DIE. */ - size_t cu; - /* Address of DIE */ - uintptr_t addr; -}; - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) - -DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_index_die_map, nstring_hash_pair, - nstring_eq) -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) - -static inline uintptr_t -drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) -{ - return entry->declaration; -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, - drgn_dwarf_index_specification_to_key, - int_key_hash_pair, scalar_key_eq) - -static inline size_t hash_pair_to_shard(struct hash_pair hp) -{ - /* - * The 8 most significant bits of the hash are used as the F14 tag, so - * we don't want to use those for sharding. - */ - return ((hp.first >> - (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & - (DRGN_DWARF_INDEX_NUM_SHARDS - 1)); -} - -static void -drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index *dindex) -{ - ns->shards = NULL; - ns->dindex = dindex; - drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); - ns->saved_err = NULL; -} - -static bool -drgn_dwarf_index_namespace_shards_init(struct drgn_dwarf_index_namespace *ns) -{ - if (ns->shards) - return true; - ns->shards = malloc_array(DRGN_DWARF_INDEX_NUM_SHARDS, - sizeof(*ns->shards)); - if (!ns->shards) - return false; - for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; - omp_init_lock(&shard->lock); - drgn_dwarf_index_die_map_init(&shard->map); - drgn_dwarf_index_die_vector_init(&shard->dies); - } - return true; -} - -void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) -{ - drgn_dwarf_index_namespace_init(&dindex->global, dindex); - drgn_dwarf_index_specification_map_init(&dindex->specifications); - drgn_dwarf_index_cu_vector_init(&dindex->cus); -} - -static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) -{ - if (cu->file_name_hashes != no_file_name_hashes) - free(cu->file_name_hashes); - free(cu->abbrev_insns); - free(cu->abbrev_decls); -} - -static void -drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) -{ - drgn_error_destroy(ns->saved_err); - drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); - if (ns->shards) { - for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; - for (size_t j = 0; j < shard->dies.size; j++) { - struct drgn_dwarf_index_die *die = &shard->dies.data[j]; - if (die->tag == DW_TAG_namespace) { - drgn_dwarf_index_namespace_deinit(die->namespace); - free(die->namespace); - } - } - drgn_dwarf_index_die_vector_deinit(&shard->dies); - drgn_dwarf_index_die_map_deinit(&shard->map); - omp_destroy_lock(&shard->lock); - } - free(ns->shards); - } -} - -void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) -{ - if (!dindex) - return; - for (size_t i = 0; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - drgn_dwarf_index_cu_vector_deinit(&dindex->cus); - drgn_dwarf_index_specification_map_deinit(&dindex->specifications); - drgn_dwarf_index_namespace_deinit(&dindex->global); -} - -bool -drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, - struct drgn_dwarf_index *dindex) -{ - state->dindex = dindex; - state->max_threads = omp_get_max_threads(); - state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); - if (!state->cus) - return false; - for (size_t i = 0; i < state->max_threads; i++) - drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); - return true; -} - -void -drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state) -{ - for (size_t i = 0; i < state->max_threads; i++) - drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); - free(state->cus); -} - -static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, uint8_t *insn_ret) -{ - struct drgn_error *err; - switch (form) { - case DW_FORM_addr: - *insn_ret = cu->address_size; - return NULL; - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_flag: - case DW_FORM_strx1: - case DW_FORM_addrx1: - *insn_ret = 1; - return NULL; - case DW_FORM_data2: - case DW_FORM_ref2: - case DW_FORM_strx2: - case DW_FORM_addrx2: - *insn_ret = 2; - return NULL; - case DW_FORM_strx3: - case DW_FORM_addrx3: - *insn_ret = 3; - return NULL; - case DW_FORM_data4: - case DW_FORM_ref4: - case DW_FORM_ref_sup4: - case DW_FORM_strx4: - case DW_FORM_addrx4: - *insn_ret = 4; - return NULL; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - case DW_FORM_ref_sup8: - *insn_ret = 8; - return NULL; - case DW_FORM_data16: - *insn_ret = 16; - return NULL; - case DW_FORM_block: - case DW_FORM_exprloc: - *insn_ret = ATTRIB_BLOCK; - return NULL; - case DW_FORM_block1: - *insn_ret = ATTRIB_BLOCK1; - return NULL; - case DW_FORM_block2: - *insn_ret = ATTRIB_BLOCK2; - return NULL; - case DW_FORM_block4: - *insn_ret = ATTRIB_BLOCK4; - return NULL; - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - case DW_FORM_strx: - case DW_FORM_addrx: - case DW_FORM_loclistx: - case DW_FORM_rnglistx: - *insn_ret = ATTRIB_LEB128; - return NULL; - case DW_FORM_ref_addr: - if (cu->version < 3) { - *insn_ret = cu->address_size; - return NULL; - } - /* fallthrough */ - case DW_FORM_sec_offset: - case DW_FORM_strp: - case DW_FORM_strp_sup: - case DW_FORM_line_strp: - *insn_ret = cu->is_64_bit ? 8 : 4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_STRING; - return NULL; - case DW_FORM_implicit_const: - if ((err = binary_buffer_skip_leb128(bb))) - return err; - /* fallthrough */ - case DW_FORM_flag_present: - *insn_ret = 0; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64, - form); - } -} - -static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_ref1: - *insn_ret = ATTRIB_SIBLING_REF1; - return NULL; - case DW_FORM_ref2: - *insn_ret = ATTRIB_SIBLING_REF2; - return NULL; - case DW_FORM_ref4: - *insn_ret = ATTRIB_SIBLING_REF4; - return NULL; - case DW_FORM_ref8: - *insn_ret = ATTRIB_SIBLING_REF8; - return NULL; - case DW_FORM_ref_udata: - *insn_ret = ATTRIB_SIBLING_REF_UDATA; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_SIBLING_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_sibling", - form); - } -} - -static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - *insn_ret = ATTRIB_NAME_STRP8; - else - *insn_ret = ATTRIB_NAME_STRP4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_NAME_STRING; - return NULL; - case DW_FORM_strx: - *insn_ret = ATTRIB_NAME_STRX; - return NULL; - case DW_FORM_strx1: - *insn_ret = ATTRIB_NAME_STRX1; - return NULL; - case DW_FORM_strx2: - *insn_ret = ATTRIB_NAME_STRX2; - return NULL; - case DW_FORM_strx3: - *insn_ret = ATTRIB_NAME_STRX3; - return NULL; - case DW_FORM_strx4: - *insn_ret = ATTRIB_NAME_STRX4; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_NAME_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_name", - form); - } -} - -static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - *insn_ret = ATTRIB_COMP_DIR_STRP8; - else - *insn_ret = ATTRIB_COMP_DIR_STRP4; - return NULL; - case DW_FORM_line_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]) { - return binary_buffer_error(bb, - "DW_FORM_line_strp without .debug_line_str section"); - } - if (cu->is_64_bit) - *insn_ret = ATTRIB_COMP_DIR_LINE_STRP8; - else - *insn_ret = ATTRIB_COMP_DIR_LINE_STRP4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_COMP_DIR_STRING; - return NULL; - case DW_FORM_strx: - *insn_ret = ATTRIB_COMP_DIR_STRX; - return NULL; - case DW_FORM_strx1: - *insn_ret = ATTRIB_COMP_DIR_STRX1; - return NULL; - case DW_FORM_strx2: - *insn_ret = ATTRIB_COMP_DIR_STRX2; - return NULL; - case DW_FORM_strx3: - *insn_ret = ATTRIB_COMP_DIR_STRX3; - return NULL; - case DW_FORM_strx4: - *insn_ret = ATTRIB_COMP_DIR_STRX4; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_COMP_DIR_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_comp_dir", - form); - } -} - -static struct drgn_error * -dw_at_str_offsets_base_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_sec_offset: - if (cu->is_64_bit) - *insn_ret = ATTRIB_STR_OFFSETS_BASE8; - else - *insn_ret = ATTRIB_STR_OFFSETS_BASE4; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_STR_OFFSETS_BASE_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_str_offsets_base", - form); - } -} - -static struct drgn_error * -dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_data4: - *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; - return NULL; - case DW_FORM_data8: - *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; - return NULL; - case DW_FORM_sec_offset: - if (cu->is_64_bit) - *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; - else - *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_STMT_LIST_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_stmt_list", - form); - } -} - -static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret, - uint64_t *implicit_const_ret) -{ - switch (form) { - case DW_FORM_data1: - *insn_ret = ATTRIB_DECL_FILE_DATA1; - return NULL; - case DW_FORM_data2: - *insn_ret = ATTRIB_DECL_FILE_DATA2; - return NULL; - case DW_FORM_data4: - *insn_ret = ATTRIB_DECL_FILE_DATA4; - return NULL; - case DW_FORM_data8: - *insn_ret = ATTRIB_DECL_FILE_DATA8; - return NULL; - /* - * decl_file must be positive, so if the compiler uses - * DW_FORM_sdata for some reason, just treat it as udata. - */ - case DW_FORM_sdata: - case DW_FORM_udata: - *insn_ret = ATTRIB_DECL_FILE_UDATA; - return NULL; - case DW_FORM_implicit_const: - *insn_ret = ATTRIB_DECL_FILE_IMPLICIT; - return binary_buffer_next_uleb128(bb, implicit_const_ret); - case DW_FORM_indirect: - *insn_ret = ATTRIB_DECL_FILE_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_decl_file", - form); - } -} - -static struct drgn_error * -dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret, uint8_t *die_flags) -{ - switch (form) { - case DW_FORM_flag: - *insn_ret = ATTRIB_DECLARATION_FLAG; - return NULL; - case DW_FORM_flag_present: - /* - * This could be an instruction, but as long as we have a free - * DIE flag bit, we might as well use it. - */ - *insn_ret = 0; - *die_flags |= DIE_FLAG_DECLARATION; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_DECLARATION_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_declaration", - form); - } -} - -static struct drgn_error * -dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_ref1: - *insn_ret = ATTRIB_SPECIFICATION_REF1; - return NULL; - case DW_FORM_ref2: - *insn_ret = ATTRIB_SPECIFICATION_REF2; - return NULL; - case DW_FORM_ref4: - *insn_ret = ATTRIB_SPECIFICATION_REF4; - return NULL; - case DW_FORM_ref8: - *insn_ret = ATTRIB_SPECIFICATION_REF8; - return NULL; - case DW_FORM_ref_udata: - *insn_ret = ATTRIB_SPECIFICATION_REF_UDATA; - return NULL; - case DW_FORM_ref_addr: - if (cu->version >= 3) { - if (cu->is_64_bit) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; - else - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; - } else { - if (cu->address_size == 8) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; - else if (cu->address_size == 4) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; - else - return binary_buffer_error(bb, - "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", - cu->address_size); - } - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_SPECIFICATION_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_specification", - form); - } -} - -static bool append_uleb128(struct uint8_vector *insns, uint64_t value) -{ - do { - uint8_t byte = value & 0x7f; - value >>= 7; - if (value != 0) - byte |= 0x80; - if (!uint8_vector_append(insns, &byte)) - return false; - } while (value != 0); - return true; -} - -static struct drgn_error * -read_abbrev_decl(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, - struct uint8_vector *insns) -{ - struct drgn_error *err; - - static_assert(ATTRIB_MAX_INSN == UINT8_MAX, - "maximum DWARF attribute instruction is invalid"); - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) - return &drgn_stop; - if (code != decls->size + 1) { - return binary_buffer_error(&buffer->bb, - "DWARF abbrevation table is not sequential"); - } - - uint32_t insn_index = insns->size; - if (!uint32_vector_append(decls, &insn_index)) - return &drgn_enomem; - - uint64_t tag; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &tag))) - return err; - - bool should_index; - switch (tag) { - /* Types. */ - case DW_TAG_base_type: - case DW_TAG_class_type: - case DW_TAG_enumeration_type: - case DW_TAG_structure_type: - case DW_TAG_typedef: - case DW_TAG_union_type: - /* Variables. */ - case DW_TAG_variable: - /* Constants. */ - case DW_TAG_enumerator: - /* Functions. */ - case DW_TAG_subprogram: - /* Namespaces */ - case DW_TAG_namespace: - /* If adding anything here, make sure it fits in DIE_FLAG_TAG_MASK. */ - should_index = true; - break; - default: - should_index = false; - break; - } - uint8_t die_flags = should_index ? tag : 0; - - uint8_t children; - if ((err = binary_buffer_next_u8(&buffer->bb, &children))) - return err; - if (children) - die_flags |= DIE_FLAG_CHILDREN; - - uint8_t insn, last_insn = UINT8_MAX; - for (;;) { - uint64_t name, form; - uint64_t implicit_const; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &name))) - return err; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &form))) - return err; - if (name == 0 && form == 0) - break; - - if (name == DW_AT_sibling) { - err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); - } else if (name == DW_AT_name && should_index) { - err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); - } else if (name == DW_AT_comp_dir) { - err = dw_at_comp_dir_to_insn(cu, &buffer->bb, form, - &insn); - } else if (name == DW_AT_str_offsets_base) { - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]) { - return binary_buffer_error(&buffer->bb, - "DW_AT_str_offsets_base without .debug_str_offsets section"); - } - err = dw_at_str_offsets_base_to_insn(cu, &buffer->bb, - form, &insn); - } else if (name == DW_AT_stmt_list) { - if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { - return binary_buffer_error(&buffer->bb, - "DW_AT_stmt_list without .debug_line section"); - } - err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, - &insn); - } else if (name == DW_AT_decl_file && should_index && - /* Namespaces are merged, so we ignore their file. */ - tag != DW_TAG_namespace) { - err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn, - &implicit_const); - } else if (name == DW_AT_declaration && should_index) { - err = dw_at_declaration_to_insn(&buffer->bb, form, - &insn, &die_flags); - } else if (name == DW_AT_specification && should_index) { - err = dw_at_specification_to_insn(cu, &buffer->bb, form, - &insn); - } else { - err = dw_form_to_insn(cu, &buffer->bb, form, &insn); - } - if (err) - return err; - - if (insn != 0) { - if (insn <= INSN_MAX_SKIP) { - if (last_insn + insn <= INSN_MAX_SKIP) { - insns->data[insns->size - 1] += insn; - continue; - } else if (last_insn < INSN_MAX_SKIP) { - insn = last_insn + insn - INSN_MAX_SKIP; - insns->data[insns->size - 1] = INSN_MAX_SKIP; - } - } - last_insn = insn; - - if (!uint8_vector_append(insns, &insn)) - return &drgn_enomem; - - if (insn == ATTRIB_DECL_FILE_IMPLICIT && - !append_uleb128(insns, implicit_const)) - return &drgn_enomem; - } - } - insn = 0; - if (!uint8_vector_append(insns, &insn) || - !uint8_vector_append(insns, &die_flags)) - return &drgn_enomem; - return NULL; -} - -static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, - size_t debug_abbrev_offset) -{ - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_ABBREV); - /* Checked in read_cu(). */ - buffer.bb.pos += debug_abbrev_offset; - struct uint32_vector decls = VECTOR_INIT; - struct uint8_vector insns = VECTOR_INIT; - for (;;) { - struct drgn_error *err = read_abbrev_decl(&buffer, cu, &decls, - &insns); - if (err == &drgn_stop) { - break; - } else if (err) { - uint8_vector_deinit(&insns); - uint32_vector_deinit(&decls); - return err; - } - } - uint8_vector_shrink_to_fit(&insns); - uint32_vector_shrink_to_fit(&decls); - cu->abbrev_decls = decls.data; - cu->num_abbrev_decls = decls.size; - cu->abbrev_insns = insns.data; - return NULL; -} - -/* Get the size of a unit header beyond that of a normal compilation unit. */ -static size_t cu_header_extra_size(struct drgn_dwarf_index_cu *cu) -{ - switch (cu->unit_type) { - case DW_UT_compile: - case DW_UT_partial: - return 0; - case DW_UT_skeleton: - case DW_UT_split_compile: - /* dwo_id */ - return 8; - case DW_UT_type: - case DW_UT_split_type: - /* type_signature and type_offset */ - return cu->is_64_bit ? 16 : 12; - default: - UNREACHABLE(); - } -} - -static size_t cu_header_size(struct drgn_dwarf_index_cu *cu) -{ - size_t size = cu->is_64_bit ? 23 : 11; - if (cu->version >= 5) - size++; - size += cu_header_extra_size(cu); - return size; -} - -static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) -{ - struct drgn_error *err; - buffer->bb.pos += buffer->cu->is_64_bit ? 12 : 4; - uint16_t version; - if ((err = binary_buffer_next_u16(&buffer->bb, &version))) - return err; - if (version < 2 || version > 5) { - return binary_buffer_error(&buffer->bb, - "unknown DWARF CU version %" PRIu16, - version); - } - buffer->cu->version = version; - - if (version >= 5) { - if ((err = binary_buffer_next_u8(&buffer->bb, - &buffer->cu->unit_type))) - return err; - if (buffer->cu->unit_type < DW_UT_compile || - buffer->cu->unit_type > DW_UT_split_type) { - return binary_buffer_error(&buffer->bb, - "unknown DWARF unit type"); - } - } else if (buffer->cu->scn == DRGN_SCN_DEBUG_TYPES) { - buffer->cu->unit_type = DW_UT_type; - } else { - buffer->cu->unit_type = DW_UT_compile; - } - - if (version >= 5 && - (err = binary_buffer_next_u8(&buffer->bb, - &buffer->cu->address_size))) - return err; - - uint64_t debug_abbrev_offset; - if (buffer->cu->is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer->bb, - &debug_abbrev_offset))) - return err; - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &debug_abbrev_offset))) - return err; - } - if (debug_abbrev_offset > - buffer->cu->module->scn_data[DRGN_SCN_DEBUG_ABBREV]->d_size) { - return binary_buffer_error(&buffer->bb, - "debug_abbrev_offset is out of bounds"); - } - - if (version < 5 && - (err = binary_buffer_next_u8(&buffer->bb, - &buffer->cu->address_size))) - return err; - if (buffer->cu->address_size > 8) { - return binary_buffer_error(&buffer->bb, - "unsupported address size %" PRIu8, - buffer->cu->address_size); - } - - if ((err = binary_buffer_skip(&buffer->bb, - cu_header_extra_size(buffer->cu)))) - return err; - - return read_abbrev_table(buffer->cu, debug_abbrev_offset); -} - -static struct drgn_error *read_strx(struct drgn_dwarf_index_cu_buffer *buffer, - uint64_t strx, const char **ret) -{ - if (!buffer->cu->str_offsets) { - return binary_buffer_error(&buffer->bb, - "string index without DW_AT_str_offsets_base"); - } - Elf_Data *debug_str_offsets = - buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]; - size_t offset_size = buffer->cu->is_64_bit ? 8 : 4; - if (((char *)debug_str_offsets->d_buf + debug_str_offsets->d_size - - buffer->cu->str_offsets) - / offset_size <= strx) { - return binary_buffer_error(&buffer->bb, - "string index out of bounds"); - } - uint64_t strp; - if (buffer->cu->is_64_bit) { - memcpy(&strp, (uint64_t *)buffer->cu->str_offsets + strx, - sizeof(strp)); - if (buffer->bb.bswap) - strp = bswap_64(strp); - } else { - uint32_t strp32; - memcpy(&strp32, (uint32_t *)buffer->cu->str_offsets + strx, - sizeof(strp32)); - if (buffer->bb.bswap) - strp32 = bswap_32(strp32); - strp = strp32; - } - if (strp >= buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { - return binary_buffer_error(&buffer->bb, - "indirect string is out of bounds"); - } - *ret = ((char *)buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_buf - + strp); - return NULL; -} - -static struct drgn_error *read_lnp_header(struct drgn_debug_info_buffer *buffer, - bool *is_64_bit_ret, int *version_ret) -{ - struct drgn_error *err; - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer->bb, &tmp))) - return err; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - if (is_64_bit && - (err = binary_buffer_skip(&buffer->bb, sizeof(uint64_t)))) - return err; - *is_64_bit_ret = is_64_bit; - - uint16_t version; - if ((err = binary_buffer_next_u16(&buffer->bb, &version))) - return err; - if (version < 2 || version > 5) { - return binary_buffer_error(&buffer->bb, - "unknown DWARF LNP version %" PRIu16, - version); - } - *version_ret = version; - - uint8_t opcode_base; - if ((err = binary_buffer_skip(&buffer->bb, - /* address_size + segment_selector_size */ - + (version >= 5 ? 2 : 0) - + (is_64_bit ? 8 : 4) /* header_length */ - + 1 /* minimum_instruction_length */ - + (version >= 4) /* maximum_operations_per_instruction */ - + 1 /* default_is_stmt */ - + 1 /* line_base */ - + 1 /* line_range */)) || - (err = binary_buffer_next_u8(&buffer->bb, &opcode_base)) || - (err = binary_buffer_skip(&buffer->bb, opcode_base - 1))) - return err; - - return NULL; -} - -/** - * Cached hash of file path. - * - * File names in the DWARF line number program header consist of three parts: - * the compilation directory path, the directory path, and the file name. - * Multiple file names may be relative to the same directory, and relative - * directory paths are all relative to the compilation directory. - * - * We'd like to hash DWARF file names to a unique hash so that we can - * deduplicate definitions without comparing full paths. - * - * The naive way to hash a DWARF file name entry would be to join and normalize - * the compilation directory path, directory path, and file name, and hash that. - * But this would involve a lot of redundant computations since most paths will - * have common prefixes. Instead, we cache the hashes of each directory path and - * update the hash for relative paths. - * - * It is not sufficient to cache the final hash for each directory because ".." - * components may require us to use the hash of a parent directory. So, we also - * cache the hash of every parent directory in a linked list. - * - * We use the FNV-1a hash function. Although FNV-1a is - * [known](https://github.com/rurban/smhasher/blob/master/doc/FNV1a.txt) to have - * some hash quality problems, it is sufficient for producing unique 64-bit - * hashes of file names. It has a couple of advantages over "better" hash - * functions: - * - * 1. Its only internal state is the 64-bit hash value, which keeps this - * structure small. - * 2. It operates byte-by-byte, which works well for incrementally hashing lots - * of short path components. - */ -struct path_hash { - /** Hash of this path. */ - uint64_t hash; - /** - * Tagged pointer comprising `struct path_hash *` of parent directory - * and flag in lowest-order bit specifying whether this path ends in a - * ".." component. - */ - uintptr_t parent_and_is_dot_dot; -}; - -#define FNV_OFFSET_BASIS_64 UINT64_C(0xcbf29ce484222325) -#define FNV_PRIME_64 UINT64_C(0x00000100000001b3) - -static inline void path_hash_update(struct path_hash *path_hash, - const void *src, size_t len) -{ - const uint8_t *s = src, *end = s + len; - uint64_t hash = path_hash->hash; - while (s < end) { - hash ^= *(s++); - hash *= FNV_PRIME_64; - } - path_hash->hash = hash; -} - -/** Path hash of "" (empty string). */ -static const struct path_hash empty_path_hash = { FNV_OFFSET_BASIS_64 }; -/** Path hash of "/". */ -static const struct path_hash absolute_path_hash = { - (FNV_OFFSET_BASIS_64 ^ '/') * FNV_PRIME_64, -}; - -static inline const struct path_hash * -path_hash_parent(const struct path_hash *path_hash) -{ - return (struct path_hash *)(path_hash->parent_and_is_dot_dot - & ~(uintptr_t)1); -} - -static inline bool path_hash_is_dot_dot(const struct path_hash *path_hash) -{ - return path_hash->parent_and_is_dot_dot & 1; -} - -/** Chunk of allocated @ref path_hash objects. See @ref path_hash_cache. */ -struct path_hash_chunk { - struct path_hash objects[(4096 - sizeof(struct path_hash_chunk *)) - / sizeof(struct path_hash)]; - struct path_hash_chunk *next; -}; - -DEFINE_VECTOR(path_hash_vector, const struct path_hash *) - -struct lnp_entry_format { - uint64_t content_type; - uint64_t form; -}; - -static const struct lnp_entry_format dwarf4_directory_entry_formats[] = { - { DW_LNCT_path, DW_FORM_string }, -}; -static const struct lnp_entry_format dwarf4_file_name_entry_formats[] = { - { DW_LNCT_path, DW_FORM_string }, - { DW_LNCT_directory_index, DW_FORM_udata }, - { DW_LNCT_timestamp, DW_FORM_udata }, - { DW_LNCT_size, DW_FORM_udata }, -}; - -/** - * Cache of hashed file paths. - * - * This uses a bump allocator for @ref path_hash objects. @ref path_hash objects - * are allocated sequentially out of a @ref path_hash_chunk; when a chunk is - * exhausted, a new @ref path_hash_chunk is allocated from the heap. The - * allocated chunks are kept and reused for each DWARF line number program; they - * are freed at the end of the first indexing pass. - * - * This also caches the allocations for directory hashes and line number program - * header entry formats. - */ -struct path_hash_cache { - /** Next @ref path_hash object to be allocated. */ - struct path_hash *next_object; - /** @ref path_hash_chunk currently being allocated from. */ - struct path_hash_chunk *current_chunk; - /** First allocated @ref path_hash_chunk. */ - struct path_hash_chunk *first_chunk; - /** Hashed directory paths. */ - struct path_hash_vector directories; - /** Line number program header entry formats. */ - struct lnp_entry_format *entry_formats; - /** Allocated size of @ref path_hash_cache::entry_formats. */ - size_t entry_formats_capacity; -}; - -static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) -{ - struct path_hash_chunk *current_chunk = cache->current_chunk; - if (cache->next_object < - ¤t_chunk->objects[array_size(current_chunk->objects)]) - return cache->next_object++; - struct path_hash_chunk *next_chunk = current_chunk->next; - if (!next_chunk) { - next_chunk = malloc(sizeof(*next_chunk)); - if (!next_chunk) - return NULL; - next_chunk->next = NULL; - current_chunk->next = next_chunk; - } - cache->current_chunk = next_chunk; - cache->next_object = &next_chunk->objects[1]; - return next_chunk->objects; -} - -static inline bool is_dot_dot(const char *component, size_t component_len) -{ - return component_len == 2 && component[0] == '.' && component[1] == '.'; -} - -static const struct path_hash *hash_path(struct path_hash_cache *cache, - const char *path, - const struct path_hash *path_hash) -{ - const char *p = path; - if (*p == '/') { - path_hash = &absolute_path_hash; - p++; - } - while (*p != '\0') { - const char *component = p; - p = strchrnul(p, '/'); - size_t component_len = p - component; - if (*p == '/') - p++; - if (component_len == 0 || - (component_len == 1 && component[0] == '.')) { - } else if (!is_dot_dot(component, component_len) || - path_hash == &empty_path_hash || - path_hash_is_dot_dot(path_hash)) { - struct path_hash *new_path_hash = path_hash_alloc(cache); - if (!new_path_hash) - return NULL; - new_path_hash->hash = path_hash->hash; - if (path_hash->parent_and_is_dot_dot != 0) - path_hash_update(new_path_hash, "/", 1); - path_hash_update(new_path_hash, component, - component_len); - new_path_hash->parent_and_is_dot_dot = - ((uintptr_t)path_hash | - is_dot_dot(component, component_len)); - path_hash = new_path_hash; - } else if (path_hash != &absolute_path_hash) { - path_hash = path_hash_parent(path_hash); - } - } - return path_hash; -} - -static struct drgn_error * -read_lnp_entry_formats(struct drgn_debug_info_buffer *buffer, - struct path_hash_cache *cache, int *count_ret) -{ - struct drgn_error *err; - uint8_t count; - if ((err = binary_buffer_next_u8(&buffer->bb, &count))) - return err; - if (count > cache->entry_formats_capacity) { - free(cache->entry_formats); - cache->entry_formats = malloc_array(count, - sizeof(cache->entry_formats[0])); - if (!cache->entry_formats) { - cache->entry_formats_capacity = 0; - return &drgn_enomem; - } - cache->entry_formats_capacity = count; - } - bool have_path = false; - for (int i = 0; i < count; i++) { - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &cache->entry_formats[i].content_type))) - return err; - if (cache->entry_formats[i].content_type == DW_LNCT_path) - have_path = true; - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &cache->entry_formats[i].form))) - return err; - } - if (!have_path) { - return binary_buffer_error(&buffer->bb, - "DWARF line number program header entry does not include DW_LNCT_path"); - } - *count_ret = count; - return NULL; -} - -static struct drgn_error *skip_lnp_form(struct binary_buffer *bb, - bool is_64_bit, uint64_t form) -{ - struct drgn_error *err; - uint64_t skip; - switch (form) { - case DW_FORM_block: - if ((err = binary_buffer_next_uleb128(bb, &skip))) - return err; -block: - return binary_buffer_skip(bb, skip); - case DW_FORM_block1: - if ((err = binary_buffer_next_u8_into_u64(bb, &skip))) - return err; - goto block; - case DW_FORM_block2: - if ((err = binary_buffer_next_u16_into_u64(bb, &skip))) - return err; - goto block; - case DW_FORM_block4: - if ((err = binary_buffer_next_u32_into_u64(bb, &skip))) - return err; - goto block; - case DW_FORM_data1: - case DW_FORM_flag: - case DW_FORM_strx1: - return binary_buffer_skip(bb, 1); - case DW_FORM_data2: - case DW_FORM_strx2: - return binary_buffer_skip(bb, 2); - case DW_FORM_strx3: - return binary_buffer_skip(bb, 3); - case DW_FORM_data4: - case DW_FORM_strx4: - return binary_buffer_skip(bb, 4); - case DW_FORM_data8: - return binary_buffer_skip(bb, 8); - case DW_FORM_data16: - return binary_buffer_skip(bb, 16); - case DW_FORM_line_strp: - case DW_FORM_sec_offset: - case DW_FORM_strp: - return binary_buffer_skip(bb, is_64_bit ? 8 : 4); - case DW_FORM_sdata: - case DW_FORM_strx: - case DW_FORM_udata: - return binary_buffer_skip_leb128(bb); - case DW_FORM_string: - return binary_buffer_skip_string(bb); - default: - return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for line number program", - form); - } -} - -static struct drgn_error *read_lnp_string(struct drgn_debug_info_buffer *buffer, - bool is_64_bit, uint64_t form, - const char **ret) -{ - struct drgn_error *err; - uint64_t strp; - Elf_Data *data; - switch (form) { - case DW_FORM_string: - *ret = buffer->bb.pos; - return binary_buffer_skip_string(&buffer->bb); - case DW_FORM_line_strp: - case DW_FORM_strp: - if (is_64_bit) - err = binary_buffer_next_u64(&buffer->bb, &strp); - else - err = binary_buffer_next_u32_into_u64(&buffer->bb, &strp); - if (err) - return err; - data = buffer->module->scn_data[ - form == DW_FORM_line_strp ? - DRGN_SCN_DEBUG_LINE_STR : DRGN_SCN_DEBUG_STR]; - if (!data || strp >= data->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_LNCT_path is out of bounds"); - } - *ret = (const char *)data->d_buf + strp; - return NULL; - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %#" PRIx64 " for DW_LNCT_path", - form); - } -} - -static struct drgn_error * -read_lnp_directory_index(struct drgn_debug_info_buffer *buffer, uint64_t form, - uint64_t *ret) -{ - switch (form) { - case DW_FORM_data1: - return binary_buffer_next_u8_into_u64(&buffer->bb, ret); - case DW_FORM_data2: - return binary_buffer_next_u16_into_u64(&buffer->bb, ret); - case DW_FORM_udata: - return binary_buffer_next_uleb128(&buffer->bb, ret); - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %#" PRIx64 " for DW_LNCT_directory_index", - form); - } -} - -static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, - struct drgn_dwarf_index_cu *cu, - const char *comp_dir, - size_t stmt_list) -{ - struct drgn_error *err; - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_LINE); - /* Checked in index_cu_first_pass(). */ - buffer.bb.pos += stmt_list; - - bool is_64_bit; - int version; - if ((err = read_lnp_header(&buffer, &is_64_bit, &version))) - return err; - - cache->current_chunk = cache->first_chunk; - cache->next_object = cache->first_chunk->objects; - cache->directories.size = 0; - - const struct lnp_entry_format *entry_formats; - int entry_format_count; - uint64_t entry_count = 0; /* For -Wmaybe-uninitialized. */ - const struct path_hash *path_hash, *parent; - if (version >= 5) { - if ((err = read_lnp_entry_formats(&buffer, cache, - &entry_format_count))) - return err; - entry_formats = cache->entry_formats; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &entry_count))) - return err; - if (entry_count > SIZE_MAX || - !path_hash_vector_reserve(&cache->directories, entry_count)) - return err; - parent = &empty_path_hash; - } else { - entry_formats = dwarf4_directory_entry_formats; - entry_format_count = array_size(dwarf4_directory_entry_formats); - path_hash = hash_path(cache, comp_dir, &empty_path_hash); - if (!path_hash || - !path_hash_vector_append(&cache->directories, &path_hash)) - return &drgn_enomem; - parent = path_hash; - } - - while (version < 5 || entry_count-- > 0) { - const char *path; - for (int j = 0; j < entry_format_count; j++) { - if (entry_formats[j].content_type == DW_LNCT_path) { - err = read_lnp_string(&buffer, is_64_bit, - entry_formats[j].form, - &path); - if (version < 5 && path[0] == '\0') - goto file_name_entries; - } else { - err = skip_lnp_form(&buffer.bb, is_64_bit, - entry_formats[j].form); - } - if (err) - return err; - } - path_hash = hash_path(cache, path, parent); - if (!path_hash || - !path_hash_vector_append(&cache->directories, &path_hash)) - return &drgn_enomem; - parent = cache->directories.data[0]; - } - -file_name_entries:; - /* - * File name 0 needs special treatment. In DWARF 2-4, file name entries - * are numbered starting at 1, and a DW_AT_decl_file of 0 indicates that - * no file was specified. In DWARF 5, file name entries are numbered - * starting at 0, and entry 0 is the current compilation file name. The - * DWARF 5 specification still states that a DW_AT_decl_file of 0 - * indicates that no file was specified, but some producers (including - * Clang) and consumers (including elfutils and GDB) treat a - * DW_AT_decl_file of 0 as specifying the current compilation file name, - * so we do the same. - * - * So, for DWARF 5, we hash entry 0 as usual, and for DWARF 4, we insert - * a placeholder for entry 0. If there are no file names at all, we keep - * the no_file_name_hashes placeholder. - */ - struct uint64_vector file_name_hashes; - if (version >= 5) { - if ((err = read_lnp_entry_formats(&buffer, cache, - &entry_format_count))) - return err; - entry_formats = cache->entry_formats; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &entry_count))) - return err; - if (entry_count == 0) - return NULL; - if (entry_count > SIZE_MAX) - return &drgn_enomem; - uint64_vector_init(&file_name_hashes); - if (!uint64_vector_reserve(&file_name_hashes, entry_count)) { - err = &drgn_enomem; - goto err; - } - } else { - entry_formats = dwarf4_file_name_entry_formats; - entry_format_count = array_size(dwarf4_file_name_entry_formats); - uint64_vector_init(&file_name_hashes); - } - - while (version < 5 || entry_count-- > 0) { - const char *path; - uint64_t directory_index = 0; - for (int j = 0; j < entry_format_count; j++) { - if (entry_formats[j].content_type == DW_LNCT_path) { - err = read_lnp_string(&buffer, is_64_bit, - entry_formats[j].form, - &path); - if (!err && version < 5) { - if (path[0] == '\0') { - if (file_name_hashes.size == 0) { - uint64_vector_deinit(&file_name_hashes); - return NULL; - } - goto done; - } else if (file_name_hashes.size == 0) { - uint64_t zero = 0; - if (!uint64_vector_append(&file_name_hashes, - &zero)) { - err = &drgn_enomem; - goto err; - } - } - } - } else if (entry_formats[j].content_type == - DW_LNCT_directory_index) { - err = read_lnp_directory_index(&buffer, - entry_formats[j].form, - &directory_index); - } else { - err = skip_lnp_form(&buffer.bb, is_64_bit, - entry_formats[j].form); - } - if (err) - goto err; - } - - if (directory_index >= cache->directories.size) { - err = binary_buffer_error(&buffer.bb, - "directory index %" PRIu64 " is invalid", - directory_index); - goto err; - } - struct path_hash *prev_object = cache->next_object; - struct path_hash_chunk *prev_chunk = cache->current_chunk; - path_hash = hash_path(cache, path, - cache->directories.data[directory_index]); - if (!path_hash || - !uint64_vector_append(&file_name_hashes, &path_hash->hash)) { - err = &drgn_enomem; - goto err; - } - - /* "Free" the objects allocated for this file name. */ - cache->next_object = prev_object; - cache->current_chunk = prev_chunk; - } - -done: - uint64_vector_shrink_to_fit(&file_name_hashes); - cu->file_name_hashes = file_name_hashes.data; - cu->num_file_names = file_name_hashes.size; - return NULL; - -err: - uint64_vector_deinit(&file_name_hashes); - return err; -} - -static struct drgn_error * -index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, - struct drgn_debug_info_module *module, uintptr_t addr) -{ - struct drgn_dwarf_index_specification entry = { - .declaration = declaration, - .module = module, - .addr = addr, - }; - struct hash_pair hp = - drgn_dwarf_index_specification_map_hash(&declaration); - int ret; - #pragma omp critical(drgn_index_specification) - ret = drgn_dwarf_index_specification_map_insert_hashed(&dindex->specifications, - &entry, hp, - NULL); - /* - * There may be duplicates if multiple DIEs reference one declaration, - * but we ignore them. - */ - return ret == -1 ? &drgn_enomem : NULL; -} - -static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint8_t insn, uint8_t *insn_ret, - uint8_t *die_flags) -{ - struct drgn_error *err; - uint64_t form; - if ((err = binary_buffer_next_uleb128(bb, &form))) - return err; - if (form == DW_FORM_implicit_const) { - return binary_buffer_error(bb, - "DW_FORM_implicit_const in DW_FORM_indirect"); - } - switch (insn) { - case ATTRIB_INDIRECT: - return dw_form_to_insn(cu, bb, form, insn_ret); - case ATTRIB_SIBLING_INDIRECT: - return dw_at_sibling_to_insn(bb, form, insn_ret); - case ATTRIB_NAME_INDIRECT: - return dw_at_name_to_insn(cu, bb, form, insn_ret); - case ATTRIB_COMP_DIR_INDIRECT: - return dw_at_comp_dir_to_insn(cu, bb, form, insn_ret); - case ATTRIB_STR_OFFSETS_BASE_INDIRECT: - return dw_at_str_offsets_base_to_insn(cu, bb, form, insn_ret); - case ATTRIB_STMT_LIST_INDIRECT: - return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); - case ATTRIB_DECL_FILE_INDIRECT: - return dw_at_decl_file_to_insn(bb, form, insn_ret, NULL); - case ATTRIB_DECLARATION_INDIRECT: - return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); - case ATTRIB_SPECIFICATION_INDIRECT: - return dw_at_specification_to_insn(cu, bb, form, insn_ret); - default: - UNREACHABLE(); - } -} - -/* - * First pass: read the file name tables and index DIEs with - * DW_AT_specification. This recurses into namespaces. - */ -static struct drgn_error * -index_cu_first_pass(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_index_cu_buffer *buffer, - struct path_hash_cache *path_hash_cache) -{ - /* - * If DW_AT_comp_dir uses a strx* form, we can't read it right away - * because we might not have seen DW_AT_str_offsets_base yet. Rather - * than adding an extra flag to indicate that we need to read it later, - * we set comp_dir to this sentinel value. - */ - static const char comp_dir_is_strx; - - struct drgn_error *err; - struct drgn_dwarf_index_cu *cu = buffer->cu; - const char *debug_info_buffer = cu->module->scn_data[cu->scn]->d_buf; - unsigned int depth = 0; - for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) { - if (depth-- > 1) - continue; - else - break; - } else if (code > cu->num_abbrev_decls) { - return binary_buffer_error(&buffer->bb, - "unknown abbreviation code %" PRIu64, - code); - } - - uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; - bool declaration = false; - uintptr_t specification = 0; - const char *comp_dir = ""; - uint64_t comp_dir_strx; - const char *stmt_list_ptr = NULL; - uint64_t stmt_list; - const char *sibling = NULL; - uint8_t insn; - uint8_t extra_die_flags = 0; - while ((insn = *insnp++)) { -indirect_insn:; - uint64_t skip, tmp; - Elf_Data *strp_scn; - switch (insn) { - case ATTRIB_BLOCK: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_LEB128: - case ATTRIB_NAME_STRX: - case ATTRIB_DECL_FILE_UDATA: - if ((err = binary_buffer_skip_leb128(&buffer->bb))) - return err; - break; - case ATTRIB_COMP_DIR_STRING: - comp_dir = buffer->bb.pos; - /* fallthrough */ - case ATTRIB_STRING: - case ATTRIB_NAME_STRING: - if ((err = binary_buffer_skip_string(&buffer->bb))) - return err; - break; - case ATTRIB_SIBLING_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -sibling: - if (tmp > cu->len) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling is out of bounds"); - } - sibling = cu->buf + tmp; - __builtin_prefetch(sibling); - if (sibling < buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling points backwards"); - } - break; - case ATTRIB_COMP_DIR_STRP4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - goto comp_dir_strp; - case ATTRIB_COMP_DIR_STRP8: - if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) - return err; - strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - goto comp_dir_strp; - case ATTRIB_COMP_DIR_LINE_STRP4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; - goto comp_dir_strp; - case ATTRIB_COMP_DIR_LINE_STRP8: - if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) - return err; - strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; -comp_dir_strp: - if (tmp >= strp_scn->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_AT_comp_dir is out of bounds"); - } - comp_dir = (const char *)strp_scn->d_buf + tmp; - break; - case ATTRIB_COMP_DIR_STRX: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &comp_dir_strx))) - return err; - comp_dir = &comp_dir_is_strx; - break; - case ATTRIB_COMP_DIR_STRX1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &comp_dir_strx))) - return err; - comp_dir = &comp_dir_is_strx; - break; - case ATTRIB_COMP_DIR_STRX2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &comp_dir_strx))) - return err; - comp_dir = &comp_dir_is_strx; - break; - case ATTRIB_COMP_DIR_STRX3: - if ((err = binary_buffer_next_uint(&buffer->bb, - 3, - &comp_dir_strx))) - return err; - comp_dir = &comp_dir_is_strx; - break; - case ATTRIB_COMP_DIR_STRX4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &comp_dir_strx))) - return err; - comp_dir = &comp_dir_is_strx; - break; - case ATTRIB_STR_OFFSETS_BASE4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto str_offsets_base; - case ATTRIB_STR_OFFSETS_BASE8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; -str_offsets_base: - if (tmp > cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_AT_str_offsets_base is out of bounds"); - } - cu->str_offsets = - (char *)cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_buf - + tmp; - break; - case ATTRIB_STMT_LIST_LINEPTR4: - stmt_list_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &stmt_list))) - return err; - break; - case ATTRIB_STMT_LIST_LINEPTR8: - stmt_list_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u64(&buffer->bb, - &stmt_list))) - return err; - break; - case ATTRIB_NAME_STRX1: - case ATTRIB_DECL_FILE_DATA1: - skip = 1; - goto skip; - case ATTRIB_NAME_STRX2: - case ATTRIB_DECL_FILE_DATA2: - skip = 2; - goto skip; - case ATTRIB_NAME_STRX3: - skip = 3; - goto skip; - case ATTRIB_NAME_STRP4: - case ATTRIB_NAME_STRX4: - case ATTRIB_DECL_FILE_DATA4: - skip = 4; - goto skip; - case ATTRIB_NAME_STRP8: - case ATTRIB_DECL_FILE_DATA8: - skip = 8; - goto skip; - case ATTRIB_DECL_FILE_IMPLICIT: - while (*insnp++ & 0x80) - ; - break; - case ATTRIB_DECLARATION_FLAG: { - uint8_t flag; - if ((err = binary_buffer_next_u8(&buffer->bb, - &flag))) - return err; - if (flag) - declaration = true; - break; - } - case ATTRIB_SPECIFICATION_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -specification: - specification = (uintptr_t)cu->buf + tmp; - break; - case ATTRIB_SPECIFICATION_REF_ADDR4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification_ref_addr; - case ATTRIB_SPECIFICATION_REF_ADDR8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; -specification_ref_addr: - specification = (uintptr_t)debug_info_buffer + tmp; - break; - case ATTRIB_INDIRECT: - case ATTRIB_SIBLING_INDIRECT: - case ATTRIB_NAME_INDIRECT: - case ATTRIB_COMP_DIR_INDIRECT: - case ATTRIB_STR_OFFSETS_BASE_INDIRECT: - case ATTRIB_STMT_LIST_INDIRECT: - case ATTRIB_DECL_FILE_INDIRECT: - case ATTRIB_DECLARATION_INDIRECT: - case ATTRIB_SPECIFICATION_INDIRECT: - if ((err = read_indirect_insn(cu, &buffer->bb, - insn, &insn, - &extra_die_flags))) - return err; - if (insn) - goto indirect_insn; - else - continue; - default: - skip = insn; -skip: - if ((err = binary_buffer_skip(&buffer->bb, - skip))) - return err; - break; - } - } - insn = *insnp | extra_die_flags; - - if (depth == 0) { - if (stmt_list_ptr) { - if (stmt_list > - cu->module->scn_data[DRGN_SCN_DEBUG_LINE]->d_size) { - return binary_buffer_error_at(&buffer->bb, - stmt_list_ptr, - "DW_AT_stmt_list is out of bounds"); - } - if (comp_dir == &comp_dir_is_strx && - (err = read_strx(buffer, comp_dir_strx, - &comp_dir))) - return err; - if ((err = read_file_name_table(path_hash_cache, - cu, comp_dir, - stmt_list))) - return err; - } - } else if (specification) { - if (insn & DIE_FLAG_DECLARATION) - declaration = true; - /* - * For now, we don't handle DIEs with - * DW_AT_specification which are themselves - * declarations. We may need to handle - * DW_AT_specification "chains" in the future. - */ - if (!declaration && - (err = index_specification(dindex, specification, - cu->module, die_addr))) - return err; - } - - if (insn & DIE_FLAG_CHILDREN) { - if (sibling && - (insn & DIE_FLAG_TAG_MASK) != DW_TAG_namespace) - buffer->bb.pos = sibling; - else - depth++; - } else if (depth == 0) { - break; - } - } - return NULL; -} - -static struct drgn_error * -drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn) -{ - struct drgn_dwarf_index_pending_cu_vector *cus = - &state->cus[omp_get_thread_num()]; - - struct drgn_error *err; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - while (binary_buffer_has_next(&buffer.bb)) { - struct drgn_dwarf_index_pending_cu *cu = - drgn_dwarf_index_pending_cu_vector_append_entry(cus); - if (!cu) - return &drgn_enomem; - cu->module = module; - cu->buf = buffer.bb.pos; - uint32_t unit_length32; - if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) - return err; - cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); - if (cu->is_64_bit) { - uint64_t unit_length64; - if ((err = binary_buffer_next_u64(&buffer.bb, - &unit_length64)) || - (err = binary_buffer_skip(&buffer.bb, - unit_length64))) - return err; - } else { - if ((err = binary_buffer_skip(&buffer.bb, - unit_length32))) - return err; - } - cu->len = buffer.bb.pos - cu->buf; - cu->scn = scn; - } - return NULL; -} - -struct drgn_error * -drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module) -{ - struct drgn_error *err; - err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); - if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(state, module, - DRGN_SCN_DEBUG_TYPES); - } - return err; -} - -bool -drgn_dwarf_index_find_definition(struct drgn_dwarf_index *dindex, - uintptr_t die_addr, - struct drgn_debug_info_module **module_ret, - uintptr_t *addr_ret) -{ - struct drgn_dwarf_index_specification_map_iterator it = - drgn_dwarf_index_specification_map_search(&dindex->specifications, - &die_addr); - if (!it.entry) - return false; - *module_ret = it.entry->module; - *addr_ret = it.entry->addr; - return true; -} - -static bool append_die_entry(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_index_shard *shard, uint8_t tag, - uint64_t file_name_hash, - struct drgn_debug_info_module *module, - uintptr_t addr) -{ - if (shard->dies.size == UINT32_MAX) - return false; - struct drgn_dwarf_index_die *die = - drgn_dwarf_index_die_vector_append_entry(&shard->dies); - if (!die) - return false; - die->next = UINT32_MAX; - die->tag = tag; - if (die->tag == DW_TAG_namespace) { - die->namespace = malloc(sizeof(*die->namespace)); - if (!die->namespace) { - shard->dies.size--; - return false; - } - drgn_dwarf_index_namespace_init(die->namespace, dindex); - } else { - die->file_name_hash = file_name_hash; - } - die->module = module; - die->addr = addr; - - return true; -} - -static bool index_die(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index_cu *cu, const char *name, - uint8_t tag, uint64_t file_name_hash, - struct drgn_debug_info_module *module, uintptr_t addr) -{ - bool success = false; - struct drgn_dwarf_index_die_map_entry entry = { - .key = { name, strlen(name) }, - }; - struct hash_pair hp = drgn_dwarf_index_die_map_hash(&entry.key); - struct drgn_dwarf_index_shard *shard = - &ns->shards[hash_pair_to_shard(hp)]; - omp_set_lock(&shard->lock); - struct drgn_dwarf_index_die_map_iterator it = - drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, - hp); - struct drgn_dwarf_index_die *die; - if (!it.entry) { - if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, - module, addr)) - goto err; - entry.value = shard->dies.size - 1; - if (drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, - NULL) < 0) - goto err; - die = &shard->dies.data[shard->dies.size - 1]; - goto out; - } - - die = &shard->dies.data[it.entry->value]; - for (;;) { - const uint64_t die_file_name_hash = - die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; - if (die->tag == tag && die_file_name_hash == file_name_hash) - goto out; - - if (die->next == UINT32_MAX) - break; - die = &shard->dies.data[die->next]; - } - - size_t index = die - shard->dies.data; - if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, module, - addr)) - goto err; - die = &shard->dies.data[shard->dies.size - 1]; - shard->dies.data[index].next = shard->dies.size - 1; -out: - if (tag == DW_TAG_namespace) { - struct drgn_dwarf_index_pending_die *pending = - drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); - if (!pending) - goto err; - pending->cu = cu - ns->dindex->cus.data; - pending->addr = addr; - } - success = true; -err: - omp_unset_lock(&shard->lock); - return success; -} - -/* Second pass: index the actual DIEs. */ -static struct drgn_error * -index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index_cu_buffer *buffer) -{ - struct drgn_error *err; - struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - unsigned int depth = 0; - uint8_t depth1_tag = 0; - size_t depth1_addr = 0; - for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) { - if (depth-- > 1) - continue; - else - break; - } else if (code > cu->num_abbrev_decls) { - return binary_buffer_error(&buffer->bb, - "unknown abbreviation code %" PRIu64, - code); - } - - uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; - const char *name = NULL; - const char *decl_file_ptr = NULL; - uint64_t decl_file = 0; /* For -Wmaybe-uninitialized. */ - bool declaration = false; - bool specification = false; - const char *sibling = NULL; - uint8_t insn; - uint8_t extra_die_flags = 0; - while ((insn = *insnp++)) { -indirect_insn:; - uint64_t skip, tmp; - switch (insn) { - case ATTRIB_BLOCK: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_SPECIFICATION_REF_UDATA: - specification = true; - /* fallthrough */ - case ATTRIB_LEB128: - case ATTRIB_COMP_DIR_STRX: - if ((err = binary_buffer_skip_leb128(&buffer->bb))) - return err; - break; - case ATTRIB_NAME_STRING: - name = buffer->bb.pos; - /* fallthrough */ - case ATTRIB_STRING: - case ATTRIB_COMP_DIR_STRING: - if ((err = binary_buffer_skip_string(&buffer->bb))) - return err; - break; - case ATTRIB_SIBLING_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -sibling: - if (tmp > cu->len) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling is out of bounds"); - } - sibling = cu->buf + tmp; - __builtin_prefetch(sibling); - if (sibling < buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling points backwards"); - } - break; - case ATTRIB_NAME_STRP4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto strp; - case ATTRIB_NAME_STRP8: - if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) - return err; -strp: - if (tmp >= debug_str->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_AT_name is out of bounds"); - } - name = (const char *)debug_str->d_buf + tmp; - __builtin_prefetch(name); - break; - case ATTRIB_NAME_STRX: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; - goto name_strx; - case ATTRIB_NAME_STRX1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto name_strx; - case ATTRIB_NAME_STRX2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto name_strx; - case ATTRIB_NAME_STRX3: - if ((err = binary_buffer_next_uint(&buffer->bb, - 3, &tmp))) - return err; - goto name_strx; - case ATTRIB_NAME_STRX4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; -name_strx: - if ((err = read_strx(buffer, tmp, &name))) - return err; - __builtin_prefetch(name); - break; - case ATTRIB_COMP_DIR_STRP4: - case ATTRIB_COMP_DIR_LINE_STRP4: - case ATTRIB_STR_OFFSETS_BASE4: - case ATTRIB_STMT_LIST_LINEPTR4: - skip = 4; - goto skip; - case ATTRIB_COMP_DIR_STRP8: - case ATTRIB_COMP_DIR_LINE_STRP8: - case ATTRIB_STR_OFFSETS_BASE8: - case ATTRIB_STMT_LIST_LINEPTR8: - skip = 8; - goto skip; - case ATTRIB_DECL_FILE_DATA1: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA2: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA4: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA8: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_UDATA: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_IMPLICIT: - decl_file_ptr = buffer->bb.pos; - decl_file = 0; - for (int shift = 0; ; shift += 7) { - uint8_t byte = *insnp++; - decl_file |= (uint64_t)(byte & 0x7f) << shift; - if (!(byte & 0x80)) - break; - } - break; - case ATTRIB_DECLARATION_FLAG: { - uint8_t flag; - if ((err = binary_buffer_next_u8(&buffer->bb, - &flag))) - return err; - if (flag) - declaration = true; - break; - } - case ATTRIB_SPECIFICATION_REF1: - specification = true; - /* fallthrough */ - case ATTRIB_COMP_DIR_STRX1: - skip = 1; - goto skip; - case ATTRIB_SPECIFICATION_REF2: - specification = true; - /* fallthrough */ - case ATTRIB_COMP_DIR_STRX2: - skip = 2; - goto skip; - case ATTRIB_COMP_DIR_STRX3: - skip = 3; - goto skip; - case ATTRIB_SPECIFICATION_REF4: - case ATTRIB_SPECIFICATION_REF_ADDR4: - specification = true; - /* fallthrough */ - case ATTRIB_COMP_DIR_STRX4: - skip = 4; - goto skip; - case ATTRIB_SPECIFICATION_REF8: - case ATTRIB_SPECIFICATION_REF_ADDR8: - specification = true; - skip = 8; - goto skip; - case ATTRIB_INDIRECT: - case ATTRIB_SIBLING_INDIRECT: - case ATTRIB_NAME_INDIRECT: - case ATTRIB_COMP_DIR_INDIRECT: - case ATTRIB_STR_OFFSETS_BASE_INDIRECT: - case ATTRIB_STMT_LIST_INDIRECT: - case ATTRIB_DECL_FILE_INDIRECT: - case ATTRIB_DECLARATION_INDIRECT: - case ATTRIB_SPECIFICATION_INDIRECT: - if ((err = read_indirect_insn(cu, &buffer->bb, - insn, &insn, - &extra_die_flags))) - return err; - if (insn) - goto indirect_insn; - else - continue; - default: - skip = insn; -skip: - if ((err = binary_buffer_skip(&buffer->bb, - skip))) - return err; - break; - } - } - insn = *insnp | extra_die_flags; - - uint8_t tag = insn & DIE_FLAG_TAG_MASK; - if (depth == 1) { - depth1_tag = tag; - depth1_addr = die_addr; - } - if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && - !specification) { - if (insn & DIE_FLAG_DECLARATION) - declaration = true; - struct drgn_debug_info_module *module = cu->module; - if (tag == DW_TAG_enumerator) { - if (depth1_tag != DW_TAG_enumeration_type) - goto next; - /* - * NB: the enumerator name points to the - * enumeration_type DIE. Also, enumerators can't - * be declared in C/C++, so we don't check for - * that. - */ - die_addr = depth1_addr; - } else if (declaration && - !drgn_dwarf_index_find_definition(ns->dindex, - die_addr, - &module, - &die_addr)) { - goto next; - } - - uint64_t file_name_hash; - if (decl_file_ptr) { - if (decl_file >= cu->num_file_names) { - return binary_buffer_error_at(&buffer->bb, - decl_file_ptr, - "invalid DW_AT_decl_file %" PRIu64, - decl_file); - } - file_name_hash = cu->file_name_hashes[decl_file]; - } else { - file_name_hash = 0; - } - if (!index_die(ns, cu, name, tag, file_name_hash, - module, die_addr)) - return &drgn_enomem; - } - -next: - if (insn & DIE_FLAG_CHILDREN) { - /* - * We must descend into the children of enumeration_type - * DIEs to index enumerator DIEs. We don't want to skip - * over the children of the top-level DIE even if it has - * a sibling pointer. - */ - if (sibling && tag != DW_TAG_enumeration_type && - depth > 0) - buffer->bb.pos = sibling; - else - depth++; - } else if (depth == 0) { - break; - } - } - return NULL; -} - -static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) -{ - for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { - struct drgn_dwarf_index_shard *shard = &dindex->global.shards[i]; - /* - * Because we're deleting everything that was added since the - * last update, we can just shrink the dies array to the first - * entry that was added for this update. - */ - while (shard->dies.size) { - struct drgn_dwarf_index_die *die = - &shard->dies.data[shard->dies.size - 1]; - if (die->module->state == - DRGN_DEBUG_INFO_MODULE_INDEXED) - break; - if (die->tag == DW_TAG_namespace) { - drgn_dwarf_index_namespace_deinit(die->namespace); - free(die->namespace); - } - shard->dies.size--; - } - - /* - * The new entries may be chained off of existing entries; - * unchain them. Note that any entries chained off of the new - * entries must also be new, so there's no need to preserve - * them. - */ - for (size_t index = 0; index < shard->dies.size; index++) { - struct drgn_dwarf_index_die *die = - &shard->dies.data[index]; - if (die->next != UINT32_MAX && - die->next >= shard->dies.size) - die->next = UINT32_MAX; - } - - /* Finally, delete the new entries in the map. */ - for (struct drgn_dwarf_index_die_map_iterator it = - drgn_dwarf_index_die_map_first(&shard->map); - it.entry; ) { - if (it.entry->value >= shard->dies.size) { - it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, - it); - } else { - it = drgn_dwarf_index_die_map_next(it); - } - } - } - - for (struct drgn_dwarf_index_specification_map_iterator it = - drgn_dwarf_index_specification_map_first(&dindex->specifications); - it.entry; ) { - if (it.entry->module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { - it = drgn_dwarf_index_specification_map_next(it); - } else { - it = drgn_dwarf_index_specification_map_delete_iterator(&dindex->specifications, - it); - } - } -} - -struct drgn_error * -drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) -{ - struct drgn_dwarf_index *dindex = state->dindex; - - if (!drgn_dwarf_index_namespace_shards_init(&dindex->global)) - return &drgn_enomem; - - size_t old_cus_size = dindex->cus.size; - size_t new_cus_size = old_cus_size; - for (size_t i = 0; i < state->max_threads; i++) - new_cus_size += state->cus[i].size; - if (!drgn_dwarf_index_cu_vector_reserve(&dindex->cus, new_cus_size)) - return &drgn_enomem; - for (size_t i = 0; i < state->max_threads; i++) { - for (size_t j = 0; j < state->cus[i].size; j++) { - struct drgn_dwarf_index_pending_cu *pending_cu = - &state->cus[i].data[j]; - dindex->cus.data[dindex->cus.size++] = (struct drgn_dwarf_index_cu){ - .module = pending_cu->module, - .buf = pending_cu->buf, - .len = pending_cu->len, - .is_64_bit = pending_cu->is_64_bit, - .scn = pending_cu->scn, - .file_name_hashes = - (uint64_t *)no_file_name_hashes, - .num_file_names = - array_size(no_file_name_hashes), - }; - } - } - - struct drgn_error *err = NULL; - #pragma omp parallel - { - struct path_hash_cache path_hash_cache; - path_hash_vector_init(&path_hash_cache.directories); - path_hash_cache.entry_formats = NULL; - path_hash_cache.entry_formats_capacity = 0; - path_hash_cache.first_chunk = - malloc(sizeof(struct path_hash_chunk)); - if (path_hash_cache.first_chunk) { - path_hash_cache.first_chunk->next = NULL; - } else { - #pragma omp critical(drgn_dwarf_index_update_error) - if (!err) - err = &drgn_enomem; - } - #pragma omp for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (err) - continue; - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - struct drgn_dwarf_index_cu_buffer cu_buffer; - drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); - struct drgn_error *cu_err = read_cu(&cu_buffer); - if (!cu_err) - cu_err = index_cu_first_pass(state->dindex, - &cu_buffer, - &path_hash_cache); - if (cu_err) { - #pragma omp critical(drgn_dwarf_index_update_error) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - free(path_hash_cache.entry_formats); - path_hash_vector_deinit(&path_hash_cache.directories); - struct path_hash_chunk *chunk = path_hash_cache.first_chunk; - while (chunk) { - struct path_hash_chunk *next_chunk = chunk->next; - free(chunk); - chunk = next_chunk; - } - } - if (err) - goto err; - - #pragma omp parallel for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (err) - continue; - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); - struct drgn_error *cu_err = - index_cu_second_pass(&dindex->global, &buffer); - if (cu_err) { - #pragma omp critical(drgn_dwarf_index_update_error) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - if (err) { - drgn_dwarf_index_rollback(dindex); -err: - for (size_t i = old_cus_size; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - dindex->cus.size = old_cus_size; - } - return err; -} - -static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) -{ - if (ns->pending_dies.size == 0) - return NULL; - - if (ns->saved_err) - return drgn_error_copy(ns->saved_err); - - if (!drgn_dwarf_index_namespace_shards_init(ns)) - return &drgn_enomem; - - struct drgn_error *err = NULL; - #pragma omp parallel for schedule(dynamic) - for (size_t i = 0; i < ns->pending_dies.size; i++) { - if (!err) { - struct drgn_dwarf_index_pending_die *pending = - &ns->pending_dies.data[i]; - struct drgn_dwarf_index_cu *cu = - &ns->dindex->cus.data[pending->cu]; - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos = (char *)pending->addr; - struct drgn_error *cu_err = - index_cu_second_pass(ns, &buffer); - if (cu_err) { - #pragma omp critical(drgn_index_namespace) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - } - if (err) { - ns->saved_err = err; - return drgn_error_copy(ns->saved_err); - } - ns->pending_dies.size = 0; - drgn_dwarf_index_pending_die_vector_shrink_to_fit(&ns->pending_dies); - return err; -} - -struct drgn_error * -drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_namespace *ns, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags) -{ - struct drgn_error *err = index_namespace(ns); - if (err) - return err; - if (ns->shards) { - struct nstring key = { name, name_len }; - struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); - it->shard = &ns->shards[hash_pair_to_shard(hp)]; - struct drgn_dwarf_index_die_map_iterator map_it = - drgn_dwarf_index_die_map_search_hashed(&it->shard->map, - &key, hp); - it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; - } else { - it->shard = NULL; - it->index = UINT32_MAX; - } - it->tags = tags; - it->num_tags = num_tags; - return NULL; -} - -static inline bool -drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_die *die) -{ - if (it->num_tags == 0) - return true; - for (size_t i = 0; i < it->num_tags; i++) { - if (die->tag == it->tags[i]) - return true; - } - return false; -} - -struct drgn_dwarf_index_die * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) -{ - while (it->index != UINT32_MAX) { - struct drgn_dwarf_index_die *die = - &it->shard->dies.data[it->index]; - it->index = die->next; - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - return die; - } - return NULL; -} - -struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, - Dwarf_Die *die_ret) -{ - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(die->module->dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - uintptr_t start = - (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; - if (die->addr >= start && die->addr < start + size) { - if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); - } else { - start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; - if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); - } - return NULL; -} diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h deleted file mode 100644 index b38643b23..000000000 --- a/libdrgn/dwarf_index.h +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0-or-later - -/** - * @file - * - * DWARF debugging information index. - * - * See @ref DwarfIndex. - */ - -#ifndef DRGN_DWARF_INDEX_H -#define DRGN_DWARF_INDEX_H - -#include -#include -#include -#include - -#ifdef _OPENMP -#include -#else -typedef struct {} omp_lock_t; -#define omp_init_lock(lock) do {} while (0) -#define omp_destroy_lock(lock) do {} while (0) -#define omp_set_lock(lock) do {} while (0) -#define omp_unset_lock(lock) do {} while (0) -static inline int omp_get_thread_num(void) -{ - return 0; -} -static inline int omp_get_max_threads(void) -{ - return 1; -} -#endif - -#include "hash_table.h" -#include "vector.h" - -/** - * @ingroup Internals - * - * @defgroup DwarfIndex DWARF index - * - * DWARF debugging information index. - * - * A core part of debugger functionality is looking up types, variables, etc. by - * name. A @ref drgn_dwarf_index combines debugging information from all object - * files and indexes it by name. - * - * Because this indexing step happens as part of startup, it is parallelized and - * highly optimized. This is implemented as a homegrown DWARF parser specialized - * for the task of scanning over DIEs quickly. - * - * Although the DWARF standard defines ".debug_pubnames" and ".debug_names" - * sections, GCC and Clang currently don't emit them by default, so we don't use - * them. - * - * @{ - */ - -/* - * An indexed DIE. - * - * DIEs with the same name but different tags or files are considered distinct. - * We only compare the hash of the file name, not the string value, because a - * 64-bit collision is unlikely enough, especially when also considering the - * name and tag. - */ -struct drgn_dwarf_index_die { - /* - * The next DIE with the same name (as an index into - * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. - */ - uint32_t next; - uint8_t tag; - union { - /* - * If tag != DW_TAG_namespace (namespaces are merged, so they - * don't need this). - */ - uint64_t file_name_hash; - /* If tag == DW_TAG_namespace. */ - struct drgn_dwarf_index_namespace *namespace; - }; - struct drgn_debug_info_module *module; - uintptr_t addr; -}; - -DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct nstring, uint32_t) -DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) - -struct drgn_dwarf_index_shard { - /** @privatesection */ - omp_lock_t lock; - /* - * Map from name to list of DIEs with that name (as the index in - * drgn_dwarf_index_shard::dies of the first DIE with that name). - */ - struct drgn_dwarf_index_die_map map; - /* - * We store all entries in a shard as a single array, which is more - * cache friendly. - */ - struct drgn_dwarf_index_die_vector dies; -}; - -/* A DIE with a DW_AT_specification attribute. */ -struct drgn_dwarf_index_specification { - /* - * Address of non-defining declaration DIE referenced by - * DW_AT_specification. - */ - uintptr_t declaration; - /* Module and address of DIE. */ - struct drgn_debug_info_module *module; - uintptr_t addr; -}; - -DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, - struct drgn_dwarf_index_specification) - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, - struct drgn_dwarf_index_pending_die) - -/** Mapping from names/tags to DIEs/nested namespaces. */ -struct drgn_dwarf_index_namespace { - /** - * Index shards. - * - * This is sharded to reduce lock contention. - */ - struct drgn_dwarf_index_shard *shards; - /** Parent DWARF index. */ - struct drgn_dwarf_index *dindex; - /** DIEs we have not indexed yet. */ - struct drgn_dwarf_index_pending_die_vector pending_dies; - /** Saved error from a previous index. */ - struct drgn_error *saved_err; -}; - -/** - * Fast index of DWARF debugging information. - * - * This interface indexes DWARF debugging information by name and tag, - * deduplicating information which exists in multiple compilation units or - * files. It is much faster for this task than other generic DWARF parsing - * libraries. - * - * Searches in the index are done with a @ref drgn_dwarf_index_iterator. - */ -struct drgn_dwarf_index { - /** Global namespace. */ - struct drgn_dwarf_index_namespace global; - /** - * Map from address of DIE referenced by DW_AT_specification to DIE that - * references it. This is used to resolve DIEs with DW_AT_declaration to - * their definition. - * - * This is not sharded because there typically aren't enough of these in - * a program to cause contention. - */ - struct drgn_dwarf_index_specification_map specifications; - /** Indexed compilation units. */ - struct drgn_dwarf_index_cu_vector cus; -}; - -/** Initialize a @ref drgn_dwarf_index. */ -void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); - -/** - * Deinitialize a @ref drgn_dwarf_index. - * - * After this is called, anything belonging to the index should no longer be - * accessed. - */ -void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, - struct drgn_dwarf_index_pending_cu) - -/** State tracked while updating a @ref drgn_dwarf_index. */ -struct drgn_dwarf_index_update_state { - struct drgn_dwarf_index *dindex; - /** Per-thread arrays of CUs to be indexed. */ - struct drgn_dwarf_index_pending_cu_vector *cus; - size_t max_threads; -}; - -/** - * Initialize state for updating a @ref drgn_dwarf_index. - * - * @return @c true on success, @c false on failure to allocate memory. - */ -bool -drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, - struct drgn_dwarf_index *dindex); - -/** Deinitialize state for updating a @ref drgn_dwarf_index. */ -void -drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state); - -/** Read a module for updating a @ref drgn_dwarf_index. */ -struct drgn_error * -drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module); - -/** - * Update a @ref drgn_dwarf_index. - * - * This should be called once all modules have been read with @ref - * drgn_dwarf_index_read_module() to finish indexing those modules. - */ -struct drgn_error * -drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state); - -/** - * Iterator over DWARF debugging information. - * - * An iterator is initialized with @ref drgn_dwarf_index_iterator_init(). It is - * advanced with @ref drgn_dwarf_index_iterator_next(). - */ -struct drgn_dwarf_index_iterator { - /** @privatesection */ - const uint64_t *tags; - size_t num_tags; - struct drgn_dwarf_index_shard *shard; - uint32_t index; -}; - -/** - * Create an iterator over DIEs in a DWARF index namespace. - * - * @param[out] it DWARF index iterator to initialize. - * @param[in] ns DWARF index namespace. - * @param[in] name Name of DIE to search for. - * @param[in] name_len Length of @c name. - * @param[in] tags List of DIE tags to search for. - * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_namespace *ns, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags); - -/** - * Get the next matching DIE from a DWARF index iterator. - * - * If matching any name, this is O(n), where n is the number of indexed DIEs. If - * matching by name, this is O(1) on average and O(n) worst case. - * - * Note that this returns the parent @c DW_TAG_enumeration_type for indexed @c - * DW_TAG_enumerator DIEs. - * - * @param[in] it DWARF index iterator. - * @return Next DIE, or @c NULL if there are no more matching DIEs. - */ -struct drgn_dwarf_index_die * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it); - -/** - * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. - * - * @param[in] die Indexed DIE. - * @param[out] die_ret Returned DIE. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, - Dwarf_Die *die_ret); - - -/** - * Find a definition corresponding to a declaration DIE. - * - * This finds the address of a DIE with a @c DW_AT_specification attribute that - * refers to the given address. - * - * @param[in] die_addr The address of the declaration DIE. - * @param[out] module_ret Returned module containing the definition DIE. - * @param[out] addr_ret Returned address of the definition DIE. - * @return @c true if a definition DIE was found, @c false if not (in which case - * *@p module_ret and *@p addr_ret are not modified). - */ -bool -drgn_dwarf_index_find_definition(struct drgn_dwarf_index *dindex, - uintptr_t die_addr, - struct drgn_debug_info_module **module_ret, - uintptr_t *addr_ret); - -/** @} */ - -#endif /* DRGN_DWARF_INDEX_H */ diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index fabae59d2..c296295b2 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -14,9 +14,26 @@ #include #include +#ifdef _OPENMP +#include +#else +typedef struct {} omp_lock_t; +#define omp_init_lock(lock) do {} while (0) +#define omp_destroy_lock(lock) do {} while (0) +#define omp_set_lock(lock) do {} while (0) +#define omp_unset_lock(lock) do {} while (0) +static inline int omp_get_thread_num(void) +{ + return 0; +} +static inline int omp_get_max_threads(void) +{ + return 1; +} +#endif + #include "array.h" #include "debug_info.h" // IWYU pragma: associated -#include "dwarf_index.h" #include "error.h" #include "language.h" #include "lazy_object.h" @@ -29,100 +46,3010 @@ #include "type.h" #include "util.h" -DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) +void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module) +{ + free(module->dwarf.fdes); + free(module->dwarf.cies); +} + +static inline uintptr_t +drgn_dwarf_specification_to_key(const struct drgn_dwarf_specification *entry) +{ + return entry->declaration; +} +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_specification_map, + drgn_dwarf_specification_to_key, int_key_hash_pair, + scalar_key_eq) + +/** + * Placeholder for drgn_dwarf_index_cu::file_name_hashes if the CU has no + * filenames. + */ +static const uint64_t no_file_name_hashes[1] = { 0 }; + +/** DWARF compilation unit indexed in a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_cu { + /** Module containing CU. */ + struct drgn_debug_info_module *module; + /** Address of CU data. */ + const char *buf; + /** Length of CU data. */ + size_t len; + /** DWARF version from CU header. */ + uint8_t version; + /** `DW_UT_*` type from CU header. */ + uint8_t unit_type; + /** Address size from CU header. */ + uint8_t address_size; + /** Whether CU uses 64-bit DWARF format. */ + bool is_64_bit; + /** + * Section containing CU (@ref DRGN_SCN_DEBUG_INFO or @ref + * DRGN_SCN_DEBUG_TYPES). + */ + enum drgn_debug_info_scn scn; + /** + * Mapping from DWARF abbreviation code to instructions for that + * abbreviation. + * + * This is indexed on the DWARF abbreviation code minus one. I.e., + * `abbrev_insns[abbrev_decls[abbrev_code - 1]]` is the first + * instruction for that abbreviation code. + * + * Technically, abbreviation codes don't have to be sequential. In + * practice, GCC and Clang seem to always generate sequential codes + * starting at one, so we can get away with a flat array. + */ + uint32_t *abbrev_decls; + /** Number of abbreviation codes. */ + size_t num_abbrev_decls; + /** + * Buffer of @ref drgn_dwarf_index_abbrev_insn instructions for all + * abbreviation codes. + * + * These are all stored in one array for cache locality. + */ + uint8_t *abbrev_insns; + /** + * Hashes of file names from line number program header for this CU, + * indexed by the line number program file numbers. + */ + uint64_t *file_name_hashes; + /** Number of file names in the line number program header. */ + size_t num_file_names; + /** + * Pointer in `.debug_str_offsets` section to string offset entries for + * this CU. + */ + const char *str_offsets; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) + +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) + +/** DIE which needs to be indexed. */ +struct drgn_dwarf_index_pending_die { + /** + * CU containing DIE (as an index into @ref drgn_dwarf_info::index_cus). + */ + size_t cu; + /** Address of DIE */ + uintptr_t addr; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) + +/** DIE indexed in a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_die { + /** + * The next DIE with the same name (as an index into @ref + * drgn_dwarf_index_shard::dies), or `UINT32_MAX` if this is the last + * DIE. + */ + uint32_t next; + /** DIE tag. */ + uint8_t tag; + union { + /** + * Hash of filename containing declaration. + * + * DIEs with the same name but different tags or files are + * considered distinct. We only compare the hash of the file + * name, not the string value, because a 64-bit collision is + * unlikely enough, especially when also considering the name + * and tag. + * + * This is used if `tag != DW_TAG_namespace` (namespaces are + * merged, so they don't need this). + */ + uint64_t file_name_hash; + /** Nested namespace if `tag == DW_TAG_namespace`. */ + struct drgn_namespace_dwarf_index *namespace; + }; + /** Module containing this DIE. */ + struct drgn_debug_info_module *module; + /** Address of this DIE. */ + uintptr_t addr; +}; + +DEFINE_HASH_MAP(drgn_dwarf_index_die_map, struct nstring, uint32_t, + nstring_hash_pair, nstring_eq) +DEFINE_VECTOR(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) + +static const size_t DRGN_DWARF_INDEX_SHARD_BITS = 8; +static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; + +/** Shard of a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_shard { + /** Mutex for this shard. */ + omp_lock_t lock; + /** + * Map from name to list of DIEs with that name (as the index in @ref + * drgn_dwarf_index_shard::dies of the first DIE with that name). + */ + struct drgn_dwarf_index_die_map map; + /** + * Entries in @ref drgn_dwarf_index_shard::map. + * + * These are stored in one array for cache locality. + */ + struct drgn_dwarf_index_die_vector dies; +}; + +static void +drgn_namespace_dwarf_index_init(struct drgn_namespace_dwarf_index *dindex, + struct drgn_debug_info *dbinfo) +{ + dindex->shards = NULL; + dindex->dbinfo = dbinfo; + drgn_dwarf_index_pending_die_vector_init(&dindex->pending_dies); + dindex->saved_err = NULL; +} + +static void +drgn_namespace_dwarf_index_deinit(struct drgn_namespace_dwarf_index *dindex) +{ + drgn_error_destroy(dindex->saved_err); + drgn_dwarf_index_pending_die_vector_deinit(&dindex->pending_dies); + if (dindex->shards) { + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; + for (size_t j = 0; j < shard->dies.size; j++) { + struct drgn_dwarf_index_die *die = &shard->dies.data[j]; + if (die->tag == DW_TAG_namespace) { + drgn_namespace_dwarf_index_deinit(die->namespace); + free(die->namespace); + } + } + drgn_dwarf_index_die_vector_deinit(&shard->dies); + drgn_dwarf_index_die_map_deinit(&shard->map); + omp_destroy_lock(&shard->lock); + } + free(dindex->shards); + } +} + +void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) +{ + drgn_namespace_dwarf_index_init(&dbinfo->dwarf.global, dbinfo); + drgn_dwarf_specification_map_init(&dbinfo->dwarf.specifications); + drgn_dwarf_index_cu_vector_init(&dbinfo->dwarf.index_cus); + drgn_dwarf_type_map_init(&dbinfo->dwarf.types); + drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); + dbinfo->dwarf.depth = 0; +} + +static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) +{ + if (cu->file_name_hashes != no_file_name_hashes) + free(cu->file_name_hashes); + free(cu->abbrev_insns); + free(cu->abbrev_decls); +} + +void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo) +{ + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.cant_be_incomplete_array_types); + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.types); + for (size_t i = 0; i < dbinfo->dwarf.index_cus.size; i++) + drgn_dwarf_index_cu_deinit(&dbinfo->dwarf.index_cus.data[i]); + drgn_dwarf_index_cu_vector_deinit(&dbinfo->dwarf.index_cus); + drgn_dwarf_specification_map_deinit(&dbinfo->dwarf.specifications); + drgn_namespace_dwarf_index_deinit(&dbinfo->dwarf.global); +} + +/* + * Diagnostics. + */ + +#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" +#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) + +/** + * Get the name of a DWARF tag. + * + * @return Static string if the tag is known or @p buf if the tag is unknown + * (populated with a description). + */ +static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) +{ + switch (tag) { +#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; + DWARF_ALL_KNOWN_DW_TAG +#undef DWARF_ONE_KNOWN_DW_TAG + default: + sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); + return buf; + } +} + +/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ +static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) +{ + return dw_tag_str(dwarf_tag(die), buf); +} + +static struct drgn_error * +drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, + const char *message) +{ + uintptr_t p = (uintptr_t)ptr; + int end_match = -1; + for (int i = 0; i < array_size(module->scn_data); i++) { + if (!module->scn_data[i]) + continue; + uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; + uintptr_t end = start + module->scn_data[i]->d_size; + if (start <= p) { + if (p < end) { + return drgn_error_debug_info_scn(module, i, ptr, + message); + } else if (p == end) { + end_match = i; + } + } + } + if (end_match != -1) { + /* + * The pointer doesn't lie within a section, but it does point + * to the end of a section. + */ + return drgn_error_debug_info_scn(module, end_match, ptr, + message); + } + /* We couldn't find the section containing the pointer. */ + const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, + NULL, NULL, NULL, NULL, NULL); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); +} + +static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +{ + if (address_size < 1 || address_size > 8) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + address_size); + } + return NULL; +} + +/* + * Indexing. + * + * A core part of debugger functionality is looking up types, variables, etc. by + * name. DWARF information can be very large, so scanning through all of it for + * every lookup would be too slow. Instead, when we load debugging information, + * we build an index of DIEs by name. + * + * This indexing step is parallelized and highly optimized. It is implemented as + * a bespoke DWARF parser specialized for the task of scanning over DIEs + * quickly. + * + * Although the DWARF standard defines ".debug_pubnames" and ".debug_names" + * sections, GCC and Clang currently don't emit them by default, so we don't use + * them. + * + * Every namespace has a separate index (@ref drgn_namespace_dwarf_index). The + * global namespace is indexed immediately upon loading debugging information. + * Other namespaces are indexed when they are first accessed. + */ + +struct drgn_dwarf_index_pending_cu { + struct drgn_debug_info_module *module; + const char *buf; + size_t len; + bool is_64_bit; + enum drgn_debug_info_scn scn; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) + +/** + * DWARF abbreviation table instructions. + * + * The DWARF abbreviation table can be large and contains more information than + * is strictly necessary for indexing. So, we translate the table into a series + * of instructions which specify how to process a DIE. This instruction stream + * omits unnecessary information and is more compact (and thus more cache + * friendly), which is important for the tight DIE parsing loop. + */ +enum drgn_dwarf_index_abbrev_insn { + /* + * Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to + * be skipped over. + */ + INSN_MAX_SKIP = 199, + + /* These instructions indicate an attribute that can be skipped over. */ + INSN_SKIP_BLOCK, + INSN_SKIP_BLOCK1, + INSN_SKIP_BLOCK2, + INSN_SKIP_BLOCK4, + INSN_SKIP_LEB128, + INSN_SKIP_STRING, + + /* These instructions indicate an attribute that should be parsed. */ + INSN_SIBLING_REF1, + INSN_SIBLING_REF2, + INSN_SIBLING_REF4, + INSN_SIBLING_REF8, + INSN_SIBLING_REF_UDATA, + INSN_NAME_STRP4, + INSN_NAME_STRP8, + INSN_NAME_STRING, + INSN_NAME_STRX, + INSN_NAME_STRX1, + INSN_NAME_STRX2, + INSN_NAME_STRX3, + INSN_NAME_STRX4, + INSN_COMP_DIR_STRP4, + INSN_COMP_DIR_STRP8, + INSN_COMP_DIR_LINE_STRP4, + INSN_COMP_DIR_LINE_STRP8, + INSN_COMP_DIR_STRING, + INSN_COMP_DIR_STRX, + INSN_COMP_DIR_STRX1, + INSN_COMP_DIR_STRX2, + INSN_COMP_DIR_STRX3, + INSN_COMP_DIR_STRX4, + INSN_STR_OFFSETS_BASE4, + INSN_STR_OFFSETS_BASE8, + INSN_STMT_LIST_LINEPTR4, + INSN_STMT_LIST_LINEPTR8, + INSN_DECL_FILE_DATA1, + INSN_DECL_FILE_DATA2, + INSN_DECL_FILE_DATA4, + INSN_DECL_FILE_DATA8, + INSN_DECL_FILE_UDATA, + /* + * This instruction is the only one with an operand: the ULEB128 + * implicit constant. + */ + INSN_DECL_FILE_IMPLICIT, + INSN_DECLARATION_FLAG, + INSN_SPECIFICATION_REF1, + INSN_SPECIFICATION_REF2, + INSN_SPECIFICATION_REF4, + INSN_SPECIFICATION_REF8, + INSN_SPECIFICATION_REF_UDATA, + INSN_SPECIFICATION_REF_ADDR4, + INSN_SPECIFICATION_REF_ADDR8, + INSN_INDIRECT, + INSN_SIBLING_INDIRECT, + INSN_NAME_INDIRECT, + INSN_COMP_DIR_INDIRECT, + INSN_STR_OFFSETS_BASE_INDIRECT, + INSN_STMT_LIST_INDIRECT, + INSN_DECL_FILE_INDIRECT, + INSN_DECLARATION_INDIRECT, + INSN_SPECIFICATION_INDIRECT, + + NUM_INSNS, + + /* + * Every sequence of instructions for a DIE is terminated by a zero + * byte. + */ + INSN_END = 0, + + /* + * The byte after INSN_END contains the DIE flags, which are a bitmask + * of flags combined with the DWARF tag (which is zero if the DIE does + * not need to be indexed). + */ + INSN_DIE_FLAG_TAG_MASK = 0x3f, + /* DIE is a declaration. */ + INSN_DIE_FLAG_DECLARATION = 0x40, + /* DIE has children. */ + INSN_DIE_FLAG_CHILDREN = 0x80, +}; + +/* Instructions are 8 bits. */ +static_assert(NUM_INSNS - 1 == UINT8_MAX, + "maximum DWARF index instruction is invalid"); + +DEFINE_VECTOR(uint8_vector, uint8_t) +DEFINE_VECTOR(uint32_vector, uint32_t) +DEFINE_VECTOR(uint64_vector, uint64_t) + +struct drgn_dwarf_index_cu_buffer { + struct binary_buffer bb; + struct drgn_dwarf_index_cu *cu; +}; + +static struct drgn_error * +drgn_dwarf_index_cu_buffer_error(struct binary_buffer *bb, const char *pos, + const char *message) +{ + struct drgn_dwarf_index_cu_buffer *buffer = + container_of(bb, struct drgn_dwarf_index_cu_buffer, bb); + return drgn_error_debug_info_scn(buffer->cu->module, + DRGN_SCN_DEBUG_INFO, pos, message); +} + +static void +drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, + struct drgn_dwarf_index_cu *cu) +{ + binary_buffer_init(&buffer->bb, cu->buf, cu->len, + drgn_platform_is_little_endian(&cu->module->platform), + drgn_dwarf_index_cu_buffer_error); + buffer->cu = cu; +} + +static inline size_t hash_pair_to_shard(struct hash_pair hp) +{ + /* + * The 8 most significant bits of the hash are used as the F14 tag, so + * we don't want to use those for sharding. + */ + return ((hp.first >> + (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & + (DRGN_DWARF_INDEX_NUM_SHARDS - 1)); +} + +static bool +drgn_namespace_dwarf_index_alloc_shards(struct drgn_namespace_dwarf_index *dindex) +{ + if (dindex->shards) + return true; + dindex->shards = malloc_array(DRGN_DWARF_INDEX_NUM_SHARDS, + sizeof(*dindex->shards)); + if (!dindex->shards) + return false; + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; + omp_init_lock(&shard->lock); + drgn_dwarf_index_die_map_init(&shard->map); + drgn_dwarf_index_die_vector_init(&shard->dies); + } + return true; +} + +bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, + struct drgn_debug_info *dbinfo) +{ + state->dbinfo = dbinfo; + state->max_threads = omp_get_max_threads(); + state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); + if (!state->cus) + return false; + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); + return true; +} + +void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state) +{ + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); + free(state->cus); +} + +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) +{ + struct drgn_dwarf_index_pending_cu_vector *cus = + &state->cus[omp_get_thread_num()]; + + struct drgn_error *err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + while (binary_buffer_has_next(&buffer.bb)) { + struct drgn_dwarf_index_pending_cu *cu = + drgn_dwarf_index_pending_cu_vector_append_entry(cus); + if (!cu) + return &drgn_enomem; + cu->module = module; + cu->buf = buffer.bb.pos; + uint32_t unit_length32; + if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) + return err; + cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); + if (cu->is_64_bit) { + uint64_t unit_length64; + if ((err = binary_buffer_next_u64(&buffer.bb, + &unit_length64)) || + (err = binary_buffer_skip(&buffer.bb, + unit_length64))) + return err; + } else { + if ((err = binary_buffer_skip(&buffer.bb, + unit_length32))) + return err; + } + cu->len = buffer.bb.pos - cu->buf; + cu->scn = scn; + } + return NULL; +} + +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); + if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(state, module, + DRGN_SCN_DEBUG_TYPES); + } + return err; +} + +static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + struct drgn_error *err; + switch (form) { + case DW_FORM_addr: + *insn_ret = cu->address_size; + return NULL; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: + *insn_ret = 1; + return NULL; + case DW_FORM_data2: + case DW_FORM_ref2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + *insn_ret = 2; + return NULL; + case DW_FORM_strx3: + case DW_FORM_addrx3: + *insn_ret = 3; + return NULL; + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_ref_sup4: + case DW_FORM_strx4: + case DW_FORM_addrx4: + *insn_ret = 4; + return NULL; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: + *insn_ret = 8; + return NULL; + case DW_FORM_data16: + *insn_ret = 16; + return NULL; + case DW_FORM_block: + case DW_FORM_exprloc: + *insn_ret = INSN_SKIP_BLOCK; + return NULL; + case DW_FORM_block1: + *insn_ret = INSN_SKIP_BLOCK1; + return NULL; + case DW_FORM_block2: + *insn_ret = INSN_SKIP_BLOCK2; + return NULL; + case DW_FORM_block4: + *insn_ret = INSN_SKIP_BLOCK4; + return NULL; + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + *insn_ret = INSN_SKIP_LEB128; + return NULL; + case DW_FORM_ref_addr: + if (cu->version < 3) { + *insn_ret = cu->address_size; + return NULL; + } + /* fallthrough */ + case DW_FORM_sec_offset: + case DW_FORM_strp: + case DW_FORM_strp_sup: + case DW_FORM_line_strp: + *insn_ret = cu->is_64_bit ? 8 : 4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_SKIP_STRING; + return NULL; + case DW_FORM_implicit_const: + if ((err = binary_buffer_skip_leb128(bb))) + return err; + /* fallthrough */ + case DW_FORM_flag_present: + *insn_ret = 0; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64, + form); + } +} + +static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = INSN_SIBLING_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = INSN_SIBLING_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = INSN_SIBLING_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = INSN_SIBLING_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = INSN_SIBLING_REF_UDATA; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_SIBLING_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_sibling", + form); + } +} + +static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_NAME_STRP8; + else + *insn_ret = INSN_NAME_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_NAME_STRING; + return NULL; + case DW_FORM_strx: + *insn_ret = INSN_NAME_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = INSN_NAME_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = INSN_NAME_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = INSN_NAME_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = INSN_NAME_STRX4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_NAME_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_name", + form); + } +} + +static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_STRP8; + else + *insn_ret = INSN_COMP_DIR_STRP4; + return NULL; + case DW_FORM_line_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]) { + return binary_buffer_error(bb, + "DW_FORM_line_strp without .debug_line_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_LINE_STRP8; + else + *insn_ret = INSN_COMP_DIR_LINE_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_COMP_DIR_STRING; + return NULL; + case DW_FORM_strx: + *insn_ret = INSN_COMP_DIR_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = INSN_COMP_DIR_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = INSN_COMP_DIR_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = INSN_COMP_DIR_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = INSN_COMP_DIR_STRX4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_COMP_DIR_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_comp_dir", + form); + } +} + +static struct drgn_error * +dw_at_str_offsets_base_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = INSN_STR_OFFSETS_BASE8; + else + *insn_ret = INSN_STR_OFFSETS_BASE4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_STR_OFFSETS_BASE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_str_offsets_base", + form); + } +} + +static struct drgn_error * +dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data4: + *insn_ret = INSN_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_data8: + *insn_ret = INSN_STMT_LIST_LINEPTR8; + return NULL; + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = INSN_STMT_LIST_LINEPTR8; + else + *insn_ret = INSN_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_STMT_LIST_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_stmt_list", + form); + } +} + +static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret, + uint64_t *implicit_const_ret) +{ + switch (form) { + case DW_FORM_data1: + *insn_ret = INSN_DECL_FILE_DATA1; + return NULL; + case DW_FORM_data2: + *insn_ret = INSN_DECL_FILE_DATA2; + return NULL; + case DW_FORM_data4: + *insn_ret = INSN_DECL_FILE_DATA4; + return NULL; + case DW_FORM_data8: + *insn_ret = INSN_DECL_FILE_DATA8; + return NULL; + /* + * decl_file must be positive, so if the compiler uses + * DW_FORM_sdata for some reason, just treat it as udata. + */ + case DW_FORM_sdata: + case DW_FORM_udata: + *insn_ret = INSN_DECL_FILE_UDATA; + return NULL; + case DW_FORM_implicit_const: + *insn_ret = INSN_DECL_FILE_IMPLICIT; + return binary_buffer_next_uleb128(bb, implicit_const_ret); + case DW_FORM_indirect: + *insn_ret = INSN_DECL_FILE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_decl_file", + form); + } +} + +static struct drgn_error * +dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret, uint8_t *die_flags) +{ + switch (form) { + case DW_FORM_flag: + *insn_ret = INSN_DECLARATION_FLAG; + return NULL; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as we have a free + * DIE flag bit, we might as well use it. + */ + *insn_ret = 0; + *die_flags |= INSN_DIE_FLAG_DECLARATION; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_DECLARATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_declaration", + form); + } +} + +static struct drgn_error * +dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = INSN_SPECIFICATION_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = INSN_SPECIFICATION_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = INSN_SPECIFICATION_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = INSN_SPECIFICATION_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = INSN_SPECIFICATION_REF_UDATA; + return NULL; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + *insn_ret = INSN_SPECIFICATION_REF_ADDR8; + else + *insn_ret = INSN_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + *insn_ret = INSN_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + *insn_ret = INSN_SPECIFICATION_REF_ADDR4; + else + return binary_buffer_error(bb, + "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", + cu->address_size); + } + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_SPECIFICATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_specification", + form); + } +} + +static bool append_uleb128(struct uint8_vector *insns, uint64_t value) +{ + do { + uint8_t byte = value & 0x7f; + value >>= 7; + if (value != 0) + byte |= 0x80; + if (!uint8_vector_append(insns, &byte)) + return false; + } while (value != 0); + return true; +} + +static struct drgn_error * +read_abbrev_decl(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, + struct uint8_vector *insns) +{ + struct drgn_error *err; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) + return &drgn_stop; + if (code != decls->size + 1) { + return binary_buffer_error(&buffer->bb, + "DWARF abbrevation table is not sequential"); + } + + uint32_t insn_index = insns->size; + if (!uint32_vector_append(decls, &insn_index)) + return &drgn_enomem; + + uint64_t tag; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &tag))) + return err; + + bool should_index; + switch (tag) { + /* Types. */ + case DW_TAG_base_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_typedef: + case DW_TAG_union_type: + /* Variables. */ + case DW_TAG_variable: + /* Constants. */ + case DW_TAG_enumerator: + /* Functions. */ + case DW_TAG_subprogram: + /* Namespaces */ + case DW_TAG_namespace: + /* If adding anything here, make sure it fits in INSN_DIE_FLAG_TAG_MASK. */ + should_index = true; + break; + default: + should_index = false; + break; + } + uint8_t die_flags = should_index ? tag : 0; + + uint8_t children; + if ((err = binary_buffer_next_u8(&buffer->bb, &children))) + return err; + if (children) + die_flags |= INSN_DIE_FLAG_CHILDREN; + + uint8_t insn, last_insn = UINT8_MAX; + for (;;) { + uint64_t name, form; + uint64_t implicit_const; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &name))) + return err; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &form))) + return err; + if (name == 0 && form == 0) + break; + + if (name == DW_AT_sibling) { + err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); + } else if (name == DW_AT_name && should_index) { + err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); + } else if (name == DW_AT_comp_dir) { + err = dw_at_comp_dir_to_insn(cu, &buffer->bb, form, + &insn); + } else if (name == DW_AT_str_offsets_base) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base without .debug_str_offsets section"); + } + err = dw_at_str_offsets_base_to_insn(cu, &buffer->bb, + form, &insn); + } else if (name == DW_AT_stmt_list) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_stmt_list without .debug_line section"); + } + err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, + &insn); + } else if (name == DW_AT_decl_file && should_index && + /* Namespaces are merged, so we ignore their file. */ + tag != DW_TAG_namespace) { + err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn, + &implicit_const); + } else if (name == DW_AT_declaration && should_index) { + err = dw_at_declaration_to_insn(&buffer->bb, form, + &insn, &die_flags); + } else if (name == DW_AT_specification && should_index) { + err = dw_at_specification_to_insn(cu, &buffer->bb, form, + &insn); + } else { + err = dw_form_to_insn(cu, &buffer->bb, form, &insn); + } + if (err) + return err; + + if (insn != 0) { + if (insn <= INSN_MAX_SKIP) { + if (last_insn + insn <= INSN_MAX_SKIP) { + insns->data[insns->size - 1] += insn; + continue; + } else if (last_insn < INSN_MAX_SKIP) { + insn = last_insn + insn - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; + } + } + last_insn = insn; + + if (!uint8_vector_append(insns, &insn)) + return &drgn_enomem; + + if (insn == INSN_DECL_FILE_IMPLICIT && + !append_uleb128(insns, implicit_const)) + return &drgn_enomem; + } + } + insn = INSN_END; + if (!uint8_vector_append(insns, &insn) || + !uint8_vector_append(insns, &die_flags)) + return &drgn_enomem; + return NULL; +} + +static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, + size_t debug_abbrev_offset) +{ + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_ABBREV); + /* Checked in read_cu(). */ + buffer.bb.pos += debug_abbrev_offset; + struct uint32_vector decls = VECTOR_INIT; + struct uint8_vector insns = VECTOR_INIT; + for (;;) { + struct drgn_error *err = read_abbrev_decl(&buffer, cu, &decls, + &insns); + if (err == &drgn_stop) { + break; + } else if (err) { + uint8_vector_deinit(&insns); + uint32_vector_deinit(&decls); + return err; + } + } + uint8_vector_shrink_to_fit(&insns); + uint32_vector_shrink_to_fit(&decls); + cu->abbrev_decls = decls.data; + cu->num_abbrev_decls = decls.size; + cu->abbrev_insns = insns.data; + return NULL; +} + +/* Get the size of a unit header beyond that of a normal compilation unit. */ +static size_t cu_header_extra_size(struct drgn_dwarf_index_cu *cu) +{ + switch (cu->unit_type) { + case DW_UT_compile: + case DW_UT_partial: + return 0; + case DW_UT_skeleton: + case DW_UT_split_compile: + /* dwo_id */ + return 8; + case DW_UT_type: + case DW_UT_split_type: + /* type_signature and type_offset */ + return cu->is_64_bit ? 16 : 12; + default: + UNREACHABLE(); + } +} + +static size_t cu_header_size(struct drgn_dwarf_index_cu *cu) +{ + size_t size = cu->is_64_bit ? 23 : 11; + if (cu->version >= 5) + size++; + size += cu_header_extra_size(cu); + return size; +} + +static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) +{ + struct drgn_error *err; + buffer->bb.pos += buffer->cu->is_64_bit ? 12 : 4; + uint16_t version; + if ((err = binary_buffer_next_u16(&buffer->bb, &version))) + return err; + if (version < 2 || version > 5) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF CU version %" PRIu16, + version); + } + buffer->cu->version = version; + + if (version >= 5) { + if ((err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->unit_type))) + return err; + if (buffer->cu->unit_type < DW_UT_compile || + buffer->cu->unit_type > DW_UT_split_type) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF unit type"); + } + } else if (buffer->cu->scn == DRGN_SCN_DEBUG_TYPES) { + buffer->cu->unit_type = DW_UT_type; + } else { + buffer->cu->unit_type = DW_UT_compile; + } + + if (version >= 5 && + (err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->address_size))) + return err; + + uint64_t debug_abbrev_offset; + if (buffer->cu->is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer->bb, + &debug_abbrev_offset))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &debug_abbrev_offset))) + return err; + } + if (debug_abbrev_offset > + buffer->cu->module->scn_data[DRGN_SCN_DEBUG_ABBREV]->d_size) { + return binary_buffer_error(&buffer->bb, + "debug_abbrev_offset is out of bounds"); + } + + if (version < 5 && + (err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->address_size))) + return err; + if (buffer->cu->address_size > 8) { + return binary_buffer_error(&buffer->bb, + "unsupported address size %" PRIu8, + buffer->cu->address_size); + } + + if ((err = binary_buffer_skip(&buffer->bb, + cu_header_extra_size(buffer->cu)))) + return err; + + return read_abbrev_table(buffer->cu, debug_abbrev_offset); +} + +static struct drgn_error *read_strx(struct drgn_dwarf_index_cu_buffer *buffer, + uint64_t strx, const char **ret) +{ + if (!buffer->cu->str_offsets) { + return binary_buffer_error(&buffer->bb, + "string index without DW_AT_str_offsets_base"); + } + Elf_Data *debug_str_offsets = + buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]; + size_t offset_size = buffer->cu->is_64_bit ? 8 : 4; + if (((char *)debug_str_offsets->d_buf + debug_str_offsets->d_size + - buffer->cu->str_offsets) + / offset_size <= strx) { + return binary_buffer_error(&buffer->bb, + "string index out of bounds"); + } + uint64_t strp; + if (buffer->cu->is_64_bit) { + memcpy(&strp, (uint64_t *)buffer->cu->str_offsets + strx, + sizeof(strp)); + if (buffer->bb.bswap) + strp = bswap_64(strp); + } else { + uint32_t strp32; + memcpy(&strp32, (uint32_t *)buffer->cu->str_offsets + strx, + sizeof(strp32)); + if (buffer->bb.bswap) + strp32 = bswap_32(strp32); + strp = strp32; + } + if (strp >= buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { + return binary_buffer_error(&buffer->bb, + "indirect string is out of bounds"); + } + *ret = ((char *)buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_buf + + strp); + return NULL; +} + +static struct drgn_error *read_lnp_header(struct drgn_debug_info_buffer *buffer, + bool *is_64_bit_ret, int *version_ret) +{ + struct drgn_error *err; + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer->bb, &tmp))) + return err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + if (is_64_bit && + (err = binary_buffer_skip(&buffer->bb, sizeof(uint64_t)))) + return err; + *is_64_bit_ret = is_64_bit; + + uint16_t version; + if ((err = binary_buffer_next_u16(&buffer->bb, &version))) + return err; + if (version < 2 || version > 5) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF LNP version %" PRIu16, + version); + } + *version_ret = version; + + uint8_t opcode_base; + if ((err = binary_buffer_skip(&buffer->bb, + /* address_size + segment_selector_size */ + + (version >= 5 ? 2 : 0) + + (is_64_bit ? 8 : 4) /* header_length */ + + 1 /* minimum_instruction_length */ + + (version >= 4) /* maximum_operations_per_instruction */ + + 1 /* default_is_stmt */ + + 1 /* line_base */ + + 1 /* line_range */)) || + (err = binary_buffer_next_u8(&buffer->bb, &opcode_base)) || + (err = binary_buffer_skip(&buffer->bb, opcode_base - 1))) + return err; + + return NULL; +} + +/** + * Cached hash of file path. + * + * File names in the DWARF line number program header consist of three parts: + * the compilation directory path, the directory path, and the file name. + * Multiple file names may be relative to the same directory, and relative + * directory paths are all relative to the compilation directory. + * + * We'd like to hash DWARF file names to a unique hash so that we can + * deduplicate definitions without comparing full paths. + * + * The naive way to hash a DWARF file name entry would be to join and normalize + * the compilation directory path, directory path, and file name, and hash that. + * But this would involve a lot of redundant computations since most paths will + * have common prefixes. Instead, we cache the hashes of each directory path and + * update the hash for relative paths. + * + * It is not sufficient to cache the final hash for each directory because ".." + * components may require us to use the hash of a parent directory. So, we also + * cache the hash of every parent directory in a linked list. + * + * We use the FNV-1a hash function. Although FNV-1a is + * [known](https://github.com/rurban/smhasher/blob/master/doc/FNV1a.txt) to have + * some hash quality problems, it is sufficient for producing unique 64-bit + * hashes of file names. It has a couple of advantages over "better" hash + * functions: + * + * 1. Its only internal state is the 64-bit hash value, which keeps this + * structure small. + * 2. It operates byte-by-byte, which works well for incrementally hashing lots + * of short path components. + */ +struct path_hash { + /** Hash of this path. */ + uint64_t hash; + /** + * Tagged pointer comprising `struct path_hash *` of parent directory + * and flag in lowest-order bit specifying whether this path ends in a + * ".." component. + */ + uintptr_t parent_and_is_dot_dot; +}; + +#define FNV_OFFSET_BASIS_64 UINT64_C(0xcbf29ce484222325) +#define FNV_PRIME_64 UINT64_C(0x00000100000001b3) + +static inline void path_hash_update(struct path_hash *path_hash, + const void *src, size_t len) +{ + const uint8_t *s = src, *end = s + len; + uint64_t hash = path_hash->hash; + while (s < end) { + hash ^= *(s++); + hash *= FNV_PRIME_64; + } + path_hash->hash = hash; +} + +/** Path hash of "" (empty string). */ +static const struct path_hash empty_path_hash = { FNV_OFFSET_BASIS_64 }; +/** Path hash of "/". */ +static const struct path_hash absolute_path_hash = { + (FNV_OFFSET_BASIS_64 ^ '/') * FNV_PRIME_64, +}; + +static inline const struct path_hash * +path_hash_parent(const struct path_hash *path_hash) +{ + return (struct path_hash *)(path_hash->parent_and_is_dot_dot + & ~(uintptr_t)1); +} + +static inline bool path_hash_is_dot_dot(const struct path_hash *path_hash) +{ + return path_hash->parent_and_is_dot_dot & 1; +} + +/** Chunk of allocated @ref path_hash objects. See @ref path_hash_cache. */ +struct path_hash_chunk { + struct path_hash objects[(4096 - sizeof(struct path_hash_chunk *)) + / sizeof(struct path_hash)]; + struct path_hash_chunk *next; +}; + +DEFINE_VECTOR(path_hash_vector, const struct path_hash *) + +struct lnp_entry_format { + uint64_t content_type; + uint64_t form; +}; + +static const struct lnp_entry_format dwarf4_directory_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, +}; +static const struct lnp_entry_format dwarf4_file_name_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, + { DW_LNCT_directory_index, DW_FORM_udata }, + { DW_LNCT_timestamp, DW_FORM_udata }, + { DW_LNCT_size, DW_FORM_udata }, +}; + +/** + * Cache of hashed file paths. + * + * This uses a bump allocator for @ref path_hash objects. @ref path_hash objects + * are allocated sequentially out of a @ref path_hash_chunk; when a chunk is + * exhausted, a new @ref path_hash_chunk is allocated from the heap. The + * allocated chunks are kept and reused for each DWARF line number program; they + * are freed at the end of the first indexing pass. + * + * This also caches the allocations for directory hashes and line number program + * header entry formats. + */ +struct path_hash_cache { + /** Next @ref path_hash object to be allocated. */ + struct path_hash *next_object; + /** @ref path_hash_chunk currently being allocated from. */ + struct path_hash_chunk *current_chunk; + /** First allocated @ref path_hash_chunk. */ + struct path_hash_chunk *first_chunk; + /** Hashed directory paths. */ + struct path_hash_vector directories; + /** Line number program header entry formats. */ + struct lnp_entry_format *entry_formats; + /** Allocated size of @ref path_hash_cache::entry_formats. */ + size_t entry_formats_capacity; +}; + +static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) +{ + struct path_hash_chunk *current_chunk = cache->current_chunk; + if (cache->next_object < + ¤t_chunk->objects[array_size(current_chunk->objects)]) + return cache->next_object++; + struct path_hash_chunk *next_chunk = current_chunk->next; + if (!next_chunk) { + next_chunk = malloc(sizeof(*next_chunk)); + if (!next_chunk) + return NULL; + next_chunk->next = NULL; + current_chunk->next = next_chunk; + } + cache->current_chunk = next_chunk; + cache->next_object = &next_chunk->objects[1]; + return next_chunk->objects; +} + +static inline bool is_dot_dot(const char *component, size_t component_len) +{ + return component_len == 2 && component[0] == '.' && component[1] == '.'; +} + +static const struct path_hash *hash_path(struct path_hash_cache *cache, + const char *path, + const struct path_hash *path_hash) +{ + const char *p = path; + if (*p == '/') { + path_hash = &absolute_path_hash; + p++; + } + while (*p != '\0') { + const char *component = p; + p = strchrnul(p, '/'); + size_t component_len = p - component; + if (*p == '/') + p++; + if (component_len == 0 || + (component_len == 1 && component[0] == '.')) { + } else if (!is_dot_dot(component, component_len) || + path_hash == &empty_path_hash || + path_hash_is_dot_dot(path_hash)) { + struct path_hash *new_path_hash = path_hash_alloc(cache); + if (!new_path_hash) + return NULL; + new_path_hash->hash = path_hash->hash; + if (path_hash->parent_and_is_dot_dot != 0) + path_hash_update(new_path_hash, "/", 1); + path_hash_update(new_path_hash, component, + component_len); + new_path_hash->parent_and_is_dot_dot = + ((uintptr_t)path_hash | + is_dot_dot(component, component_len)); + path_hash = new_path_hash; + } else if (path_hash != &absolute_path_hash) { + path_hash = path_hash_parent(path_hash); + } + } + return path_hash; +} + +static struct drgn_error * +read_lnp_entry_formats(struct drgn_debug_info_buffer *buffer, + struct path_hash_cache *cache, int *count_ret) +{ + struct drgn_error *err; + uint8_t count; + if ((err = binary_buffer_next_u8(&buffer->bb, &count))) + return err; + if (count > cache->entry_formats_capacity) { + free(cache->entry_formats); + cache->entry_formats = malloc_array(count, + sizeof(cache->entry_formats[0])); + if (!cache->entry_formats) { + cache->entry_formats_capacity = 0; + return &drgn_enomem; + } + cache->entry_formats_capacity = count; + } + bool have_path = false; + for (int i = 0; i < count; i++) { + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].content_type))) + return err; + if (cache->entry_formats[i].content_type == DW_LNCT_path) + have_path = true; + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].form))) + return err; + } + if (!have_path) { + return binary_buffer_error(&buffer->bb, + "DWARF line number program header entry does not include DW_LNCT_path"); + } + *count_ret = count; + return NULL; +} + +static struct drgn_error *skip_lnp_form(struct binary_buffer *bb, + bool is_64_bit, uint64_t form) +{ + struct drgn_error *err; + uint64_t skip; + switch (form) { + case DW_FORM_block: + if ((err = binary_buffer_next_uleb128(bb, &skip))) + return err; +block: + return binary_buffer_skip(bb, skip); + case DW_FORM_block1: + if ((err = binary_buffer_next_u8_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block2: + if ((err = binary_buffer_next_u16_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block4: + if ((err = binary_buffer_next_u32_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_strx1: + return binary_buffer_skip(bb, 1); + case DW_FORM_data2: + case DW_FORM_strx2: + return binary_buffer_skip(bb, 2); + case DW_FORM_strx3: + return binary_buffer_skip(bb, 3); + case DW_FORM_data4: + case DW_FORM_strx4: + return binary_buffer_skip(bb, 4); + case DW_FORM_data8: + return binary_buffer_skip(bb, 8); + case DW_FORM_data16: + return binary_buffer_skip(bb, 16); + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp: + return binary_buffer_skip(bb, is_64_bit ? 8 : 4); + case DW_FORM_sdata: + case DW_FORM_strx: + case DW_FORM_udata: + return binary_buffer_skip_leb128(bb); + case DW_FORM_string: + return binary_buffer_skip_string(bb); + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for line number program", + form); + } +} + +static struct drgn_error *read_lnp_string(struct drgn_debug_info_buffer *buffer, + bool is_64_bit, uint64_t form, + const char **ret) +{ + struct drgn_error *err; + uint64_t strp; + Elf_Data *data; + switch (form) { + case DW_FORM_string: + *ret = buffer->bb.pos; + return binary_buffer_skip_string(&buffer->bb); + case DW_FORM_line_strp: + case DW_FORM_strp: + if (is_64_bit) + err = binary_buffer_next_u64(&buffer->bb, &strp); + else + err = binary_buffer_next_u32_into_u64(&buffer->bb, &strp); + if (err) + return err; + data = buffer->module->scn_data[ + form == DW_FORM_line_strp ? + DRGN_SCN_DEBUG_LINE_STR : DRGN_SCN_DEBUG_STR]; + if (!data || strp >= data->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_LNCT_path is out of bounds"); + } + *ret = (const char *)data->d_buf + strp; + return NULL; + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %#" PRIx64 " for DW_LNCT_path", + form); + } +} + +static struct drgn_error * +read_lnp_directory_index(struct drgn_debug_info_buffer *buffer, uint64_t form, + uint64_t *ret) +{ + switch (form) { + case DW_FORM_data1: + return binary_buffer_next_u8_into_u64(&buffer->bb, ret); + case DW_FORM_data2: + return binary_buffer_next_u16_into_u64(&buffer->bb, ret); + case DW_FORM_udata: + return binary_buffer_next_uleb128(&buffer->bb, ret); + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %#" PRIx64 " for DW_LNCT_directory_index", + form); + } +} + +static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, + struct drgn_dwarf_index_cu *cu, + const char *comp_dir, + size_t stmt_list) +{ + struct drgn_error *err; + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_LINE); + /* Checked in index_cu_first_pass(). */ + buffer.bb.pos += stmt_list; + + bool is_64_bit; + int version; + if ((err = read_lnp_header(&buffer, &is_64_bit, &version))) + return err; + + cache->current_chunk = cache->first_chunk; + cache->next_object = cache->first_chunk->objects; + cache->directories.size = 0; + + const struct lnp_entry_format *entry_formats; + int entry_format_count; + uint64_t entry_count = 0; /* For -Wmaybe-uninitialized. */ + const struct path_hash *path_hash, *parent; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) + return err; + entry_formats = cache->entry_formats; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &entry_count))) + return err; + if (entry_count > SIZE_MAX || + !path_hash_vector_reserve(&cache->directories, entry_count)) + return err; + parent = &empty_path_hash; + } else { + entry_formats = dwarf4_directory_entry_formats; + entry_format_count = array_size(dwarf4_directory_entry_formats); + path_hash = hash_path(cache, comp_dir, &empty_path_hash); + if (!path_hash || + !path_hash_vector_append(&cache->directories, &path_hash)) + return &drgn_enomem; + parent = path_hash; + } + + while (version < 5 || entry_count-- > 0) { + const char *path; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (version < 5 && path[0] == '\0') + goto file_name_entries; + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + return err; + } + path_hash = hash_path(cache, path, parent); + if (!path_hash || + !path_hash_vector_append(&cache->directories, &path_hash)) + return &drgn_enomem; + parent = cache->directories.data[0]; + } + +file_name_entries:; + /* + * File name 0 needs special treatment. In DWARF 2-4, file name entries + * are numbered starting at 1, and a DW_AT_decl_file of 0 indicates that + * no file was specified. In DWARF 5, file name entries are numbered + * starting at 0, and entry 0 is the current compilation file name. The + * DWARF 5 specification still states that a DW_AT_decl_file of 0 + * indicates that no file was specified, but some producers (including + * Clang) and consumers (including elfutils and GDB) treat a + * DW_AT_decl_file of 0 as specifying the current compilation file name, + * so we do the same. + * + * So, for DWARF 5, we hash entry 0 as usual, and for DWARF 4, we insert + * a placeholder for entry 0. If there are no file names at all, we keep + * the no_file_name_hashes placeholder. + */ + struct uint64_vector file_name_hashes; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) + return err; + entry_formats = cache->entry_formats; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &entry_count))) + return err; + if (entry_count == 0) + return NULL; + if (entry_count > SIZE_MAX) + return &drgn_enomem; + uint64_vector_init(&file_name_hashes); + if (!uint64_vector_reserve(&file_name_hashes, entry_count)) { + err = &drgn_enomem; + goto err; + } + } else { + entry_formats = dwarf4_file_name_entry_formats; + entry_format_count = array_size(dwarf4_file_name_entry_formats); + uint64_vector_init(&file_name_hashes); + } + + while (version < 5 || entry_count-- > 0) { + const char *path; + uint64_t directory_index = 0; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (!err && version < 5) { + if (path[0] == '\0') { + if (file_name_hashes.size == 0) { + uint64_vector_deinit(&file_name_hashes); + return NULL; + } + goto done; + } else if (file_name_hashes.size == 0) { + uint64_t zero = 0; + if (!uint64_vector_append(&file_name_hashes, + &zero)) { + err = &drgn_enomem; + goto err; + } + } + } + } else if (entry_formats[j].content_type == + DW_LNCT_directory_index) { + err = read_lnp_directory_index(&buffer, + entry_formats[j].form, + &directory_index); + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + goto err; + } + + if (directory_index >= cache->directories.size) { + err = binary_buffer_error(&buffer.bb, + "directory index %" PRIu64 " is invalid", + directory_index); + goto err; + } + struct path_hash *prev_object = cache->next_object; + struct path_hash_chunk *prev_chunk = cache->current_chunk; + path_hash = hash_path(cache, path, + cache->directories.data[directory_index]); + if (!path_hash || + !uint64_vector_append(&file_name_hashes, &path_hash->hash)) { + err = &drgn_enomem; + goto err; + } + + /* "Free" the objects allocated for this file name. */ + cache->next_object = prev_object; + cache->current_chunk = prev_chunk; + } + +done: + uint64_vector_shrink_to_fit(&file_name_hashes); + cu->file_name_hashes = file_name_hashes.data; + cu->num_file_names = file_name_hashes.size; + return NULL; + +err: + uint64_vector_deinit(&file_name_hashes); + return err; +} + +static struct drgn_error * +index_specification(struct drgn_debug_info *dbinfo, uintptr_t declaration, + struct drgn_debug_info_module *module, uintptr_t addr) +{ + struct drgn_dwarf_specification entry = { + .declaration = declaration, + .module = module, + .addr = addr, + }; + struct hash_pair hp = drgn_dwarf_specification_map_hash(&declaration); + int ret; + #pragma omp critical(drgn_index_specification) + ret = drgn_dwarf_specification_map_insert_hashed(&dbinfo->dwarf.specifications, + &entry, hp, + NULL); + /* + * There may be duplicates if multiple DIEs reference one declaration, + * but we ignore them. + */ + return ret < 0 ? &drgn_enomem : NULL; +} + +static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint8_t insn, uint8_t *insn_ret, + uint8_t *die_flags) +{ + struct drgn_error *err; + uint64_t form; + if ((err = binary_buffer_next_uleb128(bb, &form))) + return err; + if (form == DW_FORM_implicit_const) { + return binary_buffer_error(bb, + "DW_FORM_implicit_const in DW_FORM_indirect"); + } + switch (insn) { + case INSN_INDIRECT: + return dw_form_to_insn(cu, bb, form, insn_ret); + case INSN_SIBLING_INDIRECT: + return dw_at_sibling_to_insn(bb, form, insn_ret); + case INSN_NAME_INDIRECT: + return dw_at_name_to_insn(cu, bb, form, insn_ret); + case INSN_COMP_DIR_INDIRECT: + return dw_at_comp_dir_to_insn(cu, bb, form, insn_ret); + case INSN_STR_OFFSETS_BASE_INDIRECT: + return dw_at_str_offsets_base_to_insn(cu, bb, form, insn_ret); + case INSN_STMT_LIST_INDIRECT: + return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); + case INSN_DECL_FILE_INDIRECT: + return dw_at_decl_file_to_insn(bb, form, insn_ret, NULL); + case INSN_DECLARATION_INDIRECT: + return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); + case INSN_SPECIFICATION_INDIRECT: + return dw_at_specification_to_insn(cu, bb, form, insn_ret); + default: + UNREACHABLE(); + } +} + +/* + * First pass: read the file name tables and index DIEs with + * DW_AT_specification. This recurses into namespaces. + */ +static struct drgn_error * +index_cu_first_pass(struct drgn_debug_info *dbinfo, + struct drgn_dwarf_index_cu_buffer *buffer, + struct path_hash_cache *path_hash_cache) +{ + /* + * If DW_AT_comp_dir uses a strx* form, we can't read it right away + * because we might not have seen DW_AT_str_offsets_base yet. Rather + * than adding an extra flag to indicate that we need to read it later, + * we set comp_dir to this sentinel value. + */ + static const char comp_dir_is_strx; + + struct drgn_error *err; + struct drgn_dwarf_index_cu *cu = buffer->cu; + const char *debug_info_buffer = cu->module->scn_data[cu->scn]->d_buf; + unsigned int depth = 0; + for (;;) { + size_t die_addr = (uintptr_t)buffer->bb.pos; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return binary_buffer_error(&buffer->bb, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + bool declaration = false; + uintptr_t specification = 0; + const char *comp_dir = ""; + uint64_t comp_dir_strx; + const char *stmt_list_ptr = NULL; + uint64_t stmt_list; + const char *sibling = NULL; + uint8_t insn; + uint8_t extra_die_flags = 0; + while ((insn = *insnp++) != INSN_END) { +indirect_insn:; + uint64_t skip, tmp; + Elf_Data *strp_scn; + switch (insn) { + case INSN_SKIP_BLOCK: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_LEB128: + case INSN_NAME_STRX: + case INSN_DECL_FILE_UDATA: + if ((err = binary_buffer_skip_leb128(&buffer->bb))) + return err; + break; + case INSN_COMP_DIR_STRING: + comp_dir = buffer->bb.pos; + /* fallthrough */ + case INSN_SKIP_STRING: + case INSN_NAME_STRING: + if ((err = binary_buffer_skip_string(&buffer->bb))) + return err; + break; + case INSN_SIBLING_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +sibling: + if (tmp > cu->len) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling is out of bounds"); + } + sibling = cu->buf + tmp; + __builtin_prefetch(sibling); + if (sibling < buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling points backwards"); + } + break; + case INSN_COMP_DIR_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_LINE_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_LINE_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; +comp_dir_strp: + if (tmp >= strp_scn->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_comp_dir is out of bounds"); + } + comp_dir = (const char *)strp_scn->d_buf + tmp; + break; + case INSN_COMP_DIR_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_STR_OFFSETS_BASE4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto str_offsets_base; + case INSN_STR_OFFSETS_BASE8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +str_offsets_base: + if (tmp > cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base is out of bounds"); + } + cu->str_offsets = + (char *)cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_buf + + tmp; + break; + case INSN_STMT_LIST_LINEPTR4: + stmt_list_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &stmt_list))) + return err; + break; + case INSN_STMT_LIST_LINEPTR8: + stmt_list_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u64(&buffer->bb, + &stmt_list))) + return err; + break; + case INSN_NAME_STRX1: + case INSN_DECL_FILE_DATA1: + skip = 1; + goto skip; + case INSN_NAME_STRX2: + case INSN_DECL_FILE_DATA2: + skip = 2; + goto skip; + case INSN_NAME_STRX3: + skip = 3; + goto skip; + case INSN_NAME_STRP4: + case INSN_NAME_STRX4: + case INSN_DECL_FILE_DATA4: + skip = 4; + goto skip; + case INSN_NAME_STRP8: + case INSN_DECL_FILE_DATA8: + skip = 8; + goto skip; + case INSN_DECL_FILE_IMPLICIT: + while (*insnp++ & 0x80) + ; + break; + case INSN_DECLARATION_FLAG: { + uint8_t flag; + if ((err = binary_buffer_next_u8(&buffer->bb, + &flag))) + return err; + if (flag) + declaration = true; + break; + } + case INSN_SPECIFICATION_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +specification: + specification = (uintptr_t)cu->buf + tmp; + break; + case INSN_SPECIFICATION_REF_ADDR4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_addr; + case INSN_SPECIFICATION_REF_ADDR8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_addr: + specification = (uintptr_t)debug_info_buffer + tmp; + break; + case INSN_INDIRECT: + case INSN_SIBLING_INDIRECT: + case INSN_NAME_INDIRECT: + case INSN_COMP_DIR_INDIRECT: + case INSN_STR_OFFSETS_BASE_INDIRECT: + case INSN_STMT_LIST_INDIRECT: + case INSN_DECL_FILE_INDIRECT: + case INSN_DECLARATION_INDIRECT: + case INSN_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; + default: + skip = insn; +skip: + if ((err = binary_buffer_skip(&buffer->bb, + skip))) + return err; + break; + } + } + insn = *insnp | extra_die_flags; + + if (depth == 0) { + if (stmt_list_ptr) { + if (stmt_list > + cu->module->scn_data[DRGN_SCN_DEBUG_LINE]->d_size) { + return binary_buffer_error_at(&buffer->bb, + stmt_list_ptr, + "DW_AT_stmt_list is out of bounds"); + } + if (comp_dir == &comp_dir_is_strx && + (err = read_strx(buffer, comp_dir_strx, + &comp_dir))) + return err; + if ((err = read_file_name_table(path_hash_cache, + cu, comp_dir, + stmt_list))) + return err; + } + } else if (specification) { + if (insn & INSN_DIE_FLAG_DECLARATION) + declaration = true; + /* + * For now, we don't handle DIEs with + * DW_AT_specification which are themselves + * declarations. We may need to handle + * DW_AT_specification "chains" in the future. + */ + if (!declaration && + (err = index_specification(dbinfo, specification, + cu->module, die_addr))) + return err; + } + + if (insn & INSN_DIE_FLAG_CHILDREN) { + if (sibling && + (insn & INSN_DIE_FLAG_TAG_MASK) != DW_TAG_namespace) + buffer->bb.pos = sibling; + else + depth++; + } else if (depth == 0) { + break; + } + } + return NULL; +} + +/** + * Find a definition corresponding to a declaration DIE. + * + * This finds the address of a DIE with a @c DW_AT_specification attribute that + * refers to the given address. + * + * @param[in] die_addr The address of the declaration DIE. + * @param[out] module_ret Returned module containing the definition DIE. + * @param[out] addr_ret Returned address of the definition DIE. + * @return @c true if a definition DIE was found, @c false if not (in which case + * *@p module_ret and *@p addr_ret are not modified). + */ +static bool +drgn_dwarf_find_definition(struct drgn_debug_info *dbinfo, uintptr_t die_addr, + struct drgn_debug_info_module **module_ret, + uintptr_t *addr_ret) +{ + struct drgn_dwarf_specification_map_iterator it = + drgn_dwarf_specification_map_search(&dbinfo->dwarf.specifications, + &die_addr); + if (!it.entry) + return false; + *module_ret = it.entry->module; + *addr_ret = it.entry->addr; + return true; +} + +static bool append_die_entry(struct drgn_debug_info *dbinfo, + struct drgn_dwarf_index_shard *shard, uint8_t tag, + uint64_t file_name_hash, + struct drgn_debug_info_module *module, + uintptr_t addr) +{ + if (shard->dies.size == UINT32_MAX) + return false; + struct drgn_dwarf_index_die *die = + drgn_dwarf_index_die_vector_append_entry(&shard->dies); + if (!die) + return false; + die->next = UINT32_MAX; + die->tag = tag; + if (die->tag == DW_TAG_namespace) { + die->namespace = malloc(sizeof(*die->namespace)); + if (!die->namespace) { + shard->dies.size--; + return false; + } + drgn_namespace_dwarf_index_init(die->namespace, dbinfo); + } else { + die->file_name_hash = file_name_hash; + } + die->module = module; + die->addr = addr; + + return true; +} + +static bool index_die(struct drgn_namespace_dwarf_index *ns, + struct drgn_dwarf_index_cu *cu, const char *name, + uint8_t tag, uint64_t file_name_hash, + struct drgn_debug_info_module *module, uintptr_t addr) +{ + bool success = false; + struct drgn_dwarf_index_die_map_entry entry = { + .key = { name, strlen(name) }, + }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&entry.key); + struct drgn_dwarf_index_shard *shard = + &ns->shards[hash_pair_to_shard(hp)]; + omp_set_lock(&shard->lock); + struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, + hp); + struct drgn_dwarf_index_die *die; + if (!it.entry) { + if (!append_die_entry(ns->dbinfo, shard, tag, file_name_hash, + module, addr)) + goto err; + entry.value = shard->dies.size - 1; + if (drgn_dwarf_index_die_map_insert_searched(&shard->map, + &entry, hp, + NULL) < 0) + goto err; + die = &shard->dies.data[shard->dies.size - 1]; + goto out; + } -void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module) + die = &shard->dies.data[it.entry->value]; + for (;;) { + const uint64_t die_file_name_hash = + die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; + if (die->tag == tag && die_file_name_hash == file_name_hash) + goto out; + + if (die->next == UINT32_MAX) + break; + die = &shard->dies.data[die->next]; + } + + size_t index = die - shard->dies.data; + if (!append_die_entry(ns->dbinfo, shard, tag, file_name_hash, module, + addr)) + goto err; + die = &shard->dies.data[shard->dies.size - 1]; + shard->dies.data[index].next = shard->dies.size - 1; +out: + if (tag == DW_TAG_namespace) { + struct drgn_dwarf_index_pending_die *pending = + drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); + if (!pending) + goto err; + pending->cu = cu - ns->dbinfo->dwarf.index_cus.data; + pending->addr = addr; + } + success = true; +err: + omp_unset_lock(&shard->lock); + return success; +} + +/* Second pass: index the actual DIEs. */ +static struct drgn_error * +index_cu_second_pass(struct drgn_namespace_dwarf_index *ns, + struct drgn_dwarf_index_cu_buffer *buffer) { - free(module->dwarf.fdes); - free(module->dwarf.cies); + struct drgn_error *err; + struct drgn_dwarf_index_cu *cu = buffer->cu; + Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + unsigned int depth = 0; + uint8_t depth1_tag = 0; + size_t depth1_addr = 0; + for (;;) { + size_t die_addr = (uintptr_t)buffer->bb.pos; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return binary_buffer_error(&buffer->bb, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + const char *name = NULL; + const char *decl_file_ptr = NULL; + uint64_t decl_file = 0; /* For -Wmaybe-uninitialized. */ + bool declaration = false; + bool specification = false; + const char *sibling = NULL; + uint8_t insn; + uint8_t extra_die_flags = 0; + while ((insn = *insnp++) != INSN_END) { +indirect_insn:; + uint64_t skip, tmp; + switch (insn) { + case INSN_SKIP_BLOCK: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SPECIFICATION_REF_UDATA: + specification = true; + /* fallthrough */ + case INSN_SKIP_LEB128: + case INSN_COMP_DIR_STRX: + if ((err = binary_buffer_skip_leb128(&buffer->bb))) + return err; + break; + case INSN_NAME_STRING: + name = buffer->bb.pos; + /* fallthrough */ + case INSN_SKIP_STRING: + case INSN_COMP_DIR_STRING: + if ((err = binary_buffer_skip_string(&buffer->bb))) + return err; + break; + case INSN_SIBLING_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +sibling: + if (tmp > cu->len) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling is out of bounds"); + } + sibling = cu->buf + tmp; + __builtin_prefetch(sibling); + if (sibling < buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling points backwards"); + } + break; + case INSN_NAME_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto strp; + case INSN_NAME_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; +strp: + if (tmp >= debug_str->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_name is out of bounds"); + } + name = (const char *)debug_str->d_buf + tmp; + __builtin_prefetch(name); + break; + case INSN_NAME_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; +name_strx: + if ((err = read_strx(buffer, tmp, &name))) + return err; + __builtin_prefetch(name); + break; + case INSN_COMP_DIR_STRP4: + case INSN_COMP_DIR_LINE_STRP4: + case INSN_STR_OFFSETS_BASE4: + case INSN_STMT_LIST_LINEPTR4: + skip = 4; + goto skip; + case INSN_COMP_DIR_STRP8: + case INSN_COMP_DIR_LINE_STRP8: + case INSN_STR_OFFSETS_BASE8: + case INSN_STMT_LIST_LINEPTR8: + skip = 8; + goto skip; + case INSN_DECL_FILE_DATA1: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA2: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA4: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA8: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_UDATA: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_IMPLICIT: + decl_file_ptr = buffer->bb.pos; + decl_file = 0; + for (int shift = 0; ; shift += 7) { + uint8_t byte = *insnp++; + decl_file |= (uint64_t)(byte & 0x7f) << shift; + if (!(byte & 0x80)) + break; + } + break; + case INSN_DECLARATION_FLAG: { + uint8_t flag; + if ((err = binary_buffer_next_u8(&buffer->bb, + &flag))) + return err; + if (flag) + declaration = true; + break; + } + case INSN_SPECIFICATION_REF1: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX1: + skip = 1; + goto skip; + case INSN_SPECIFICATION_REF2: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX2: + skip = 2; + goto skip; + case INSN_COMP_DIR_STRX3: + skip = 3; + goto skip; + case INSN_SPECIFICATION_REF4: + case INSN_SPECIFICATION_REF_ADDR4: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX4: + skip = 4; + goto skip; + case INSN_SPECIFICATION_REF8: + case INSN_SPECIFICATION_REF_ADDR8: + specification = true; + skip = 8; + goto skip; + case INSN_INDIRECT: + case INSN_SIBLING_INDIRECT: + case INSN_NAME_INDIRECT: + case INSN_COMP_DIR_INDIRECT: + case INSN_STR_OFFSETS_BASE_INDIRECT: + case INSN_STMT_LIST_INDIRECT: + case INSN_DECL_FILE_INDIRECT: + case INSN_DECLARATION_INDIRECT: + case INSN_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; + default: + skip = insn; +skip: + if ((err = binary_buffer_skip(&buffer->bb, + skip))) + return err; + break; + } + } + insn = *insnp | extra_die_flags; + + uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; + if (depth == 1) { + depth1_tag = tag; + depth1_addr = die_addr; + } + if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && + !specification) { + if (insn & INSN_DIE_FLAG_DECLARATION) + declaration = true; + struct drgn_debug_info_module *module = cu->module; + if (tag == DW_TAG_enumerator) { + if (depth1_tag != DW_TAG_enumeration_type) + goto next; + /* + * NB: the enumerator name points to the + * enumeration_type DIE. Also, enumerators can't + * be declared in C/C++, so we don't check for + * that. + */ + die_addr = depth1_addr; + } else if (declaration && + !drgn_dwarf_find_definition(ns->dbinfo, + die_addr, + &module, + &die_addr)) { + goto next; + } + + uint64_t file_name_hash; + if (decl_file_ptr) { + if (decl_file >= cu->num_file_names) { + return binary_buffer_error_at(&buffer->bb, + decl_file_ptr, + "invalid DW_AT_decl_file %" PRIu64, + decl_file); + } + file_name_hash = cu->file_name_hashes[decl_file]; + } else { + file_name_hash = 0; + } + if (!index_die(ns, cu, name, tag, file_name_hash, + module, die_addr)) + return &drgn_enomem; + } + +next: + if (insn & INSN_DIE_FLAG_CHILDREN) { + /* + * We must descend into the children of enumeration_type + * DIEs to index enumerator DIEs. We don't want to skip + * over the children of the top-level DIE even if it has + * a sibling pointer. + */ + if (sibling && tag != DW_TAG_enumeration_type && + depth > 0) + buffer->bb.pos = sibling; + else + depth++; + } else if (depth == 0) { + break; + } + } + return NULL; } -void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) +static void drgn_dwarf_index_rollback(struct drgn_debug_info *dbinfo) { - drgn_dwarf_index_init(&dbinfo->dwarf.index); - drgn_dwarf_type_map_init(&dbinfo->dwarf.types); - drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); - dbinfo->dwarf.depth = 0; + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = + &dbinfo->dwarf.global.shards[i]; + /* + * Because we're deleting everything that was added since the + * last update, we can just shrink the dies array to the first + * entry that was added for this update. + */ + while (shard->dies.size) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[shard->dies.size - 1]; + if (die->module->state == + DRGN_DEBUG_INFO_MODULE_INDEXED) + break; + if (die->tag == DW_TAG_namespace) { + drgn_namespace_dwarf_index_deinit(die->namespace); + free(die->namespace); + } + shard->dies.size--; + } + + /* + * The new entries may be chained off of existing entries; + * unchain them. Note that any entries chained off of the new + * entries must also be new, so there's no need to preserve + * them. + */ + for (size_t index = 0; index < shard->dies.size; index++) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[index]; + if (die->next != UINT32_MAX && + die->next >= shard->dies.size) + die->next = UINT32_MAX; + } + + /* Finally, delete the new entries in the map. */ + for (struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_first(&shard->map); + it.entry; ) { + if (it.entry->value >= shard->dies.size) { + it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, + it); + } else { + it = drgn_dwarf_index_die_map_next(it); + } + } + } + + for (struct drgn_dwarf_specification_map_iterator it = + drgn_dwarf_specification_map_first(&dbinfo->dwarf.specifications); + it.entry; ) { + if (it.entry->module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { + it = drgn_dwarf_specification_map_next(it); + } else { + it = drgn_dwarf_specification_map_delete_iterator(&dbinfo->dwarf.specifications, + it); + } + } } -void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo) +struct drgn_error * +drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) { - drgn_dwarf_type_map_deinit(&dbinfo->dwarf.cant_be_incomplete_array_types); - drgn_dwarf_type_map_deinit(&dbinfo->dwarf.types); - drgn_dwarf_index_deinit(&dbinfo->dwarf.index); + struct drgn_debug_info *dbinfo = state->dbinfo; + struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; + + if (!drgn_namespace_dwarf_index_alloc_shards(&dbinfo->dwarf.global)) + return &drgn_enomem; + + size_t old_cus_size = cus->size; + size_t new_cus_size = old_cus_size; + for (size_t i = 0; i < state->max_threads; i++) + new_cus_size += state->cus[i].size; + if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) + return &drgn_enomem; + for (size_t i = 0; i < state->max_threads; i++) { + for (size_t j = 0; j < state->cus[i].size; j++) { + struct drgn_dwarf_index_pending_cu *pending_cu = + &state->cus[i].data[j]; + cus->data[cus->size++] = (struct drgn_dwarf_index_cu){ + .module = pending_cu->module, + .buf = pending_cu->buf, + .len = pending_cu->len, + .is_64_bit = pending_cu->is_64_bit, + .scn = pending_cu->scn, + .file_name_hashes = + (uint64_t *)no_file_name_hashes, + .num_file_names = + array_size(no_file_name_hashes), + }; + } + } + + struct drgn_error *err = NULL; + #pragma omp parallel + { + struct path_hash_cache path_hash_cache; + path_hash_vector_init(&path_hash_cache.directories); + path_hash_cache.entry_formats = NULL; + path_hash_cache.entry_formats_capacity = 0; + path_hash_cache.first_chunk = + malloc(sizeof(struct path_hash_chunk)); + if (path_hash_cache.first_chunk) { + path_hash_cache.first_chunk->next = NULL; + } else { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (!err) + err = &drgn_enomem; + } + #pragma omp for schedule(dynamic) + for (size_t i = old_cus_size; i < cus->size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &cus->data[i]; + struct drgn_dwarf_index_cu_buffer cu_buffer; + drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); + struct drgn_error *cu_err = read_cu(&cu_buffer); + if (!cu_err) + cu_err = index_cu_first_pass(dbinfo, &cu_buffer, + &path_hash_cache); + if (cu_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + free(path_hash_cache.entry_formats); + path_hash_vector_deinit(&path_hash_cache.directories); + struct path_hash_chunk *chunk = path_hash_cache.first_chunk; + while (chunk) { + struct path_hash_chunk *next_chunk = chunk->next; + free(chunk); + chunk = next_chunk; + } + } + if (err) + goto err; + + #pragma omp parallel for schedule(dynamic) + for (size_t i = old_cus_size; i < cus->size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &cus->data[i]; + struct drgn_dwarf_index_cu_buffer buffer; + drgn_dwarf_index_cu_buffer_init(&buffer, cu); + buffer.bb.pos += cu_header_size(cu); + struct drgn_error *cu_err = + index_cu_second_pass(&dbinfo->dwarf.global, &buffer); + if (cu_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + if (err) { + drgn_dwarf_index_rollback(dbinfo); +err: + for (size_t i = old_cus_size; i < cus->size; i++) + drgn_dwarf_index_cu_deinit(&cus->data[i]); + cus->size = old_cus_size; + } + return err; } -/* - * Diagnostics. - */ +static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +{ + if (ns->pending_dies.size == 0) + return NULL; -#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" -#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) + if (ns->saved_err) + return drgn_error_copy(ns->saved_err); + + if (!drgn_namespace_dwarf_index_alloc_shards(ns)) + return &drgn_enomem; + + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) + for (size_t i = 0; i < ns->pending_dies.size; i++) { + if (!err) { + struct drgn_dwarf_index_pending_die *pending = + &ns->pending_dies.data[i]; + struct drgn_dwarf_index_cu *cu = + &ns->dbinfo->dwarf.index_cus.data[pending->cu]; + struct drgn_dwarf_index_cu_buffer buffer; + drgn_dwarf_index_cu_buffer_init(&buffer, cu); + buffer.bb.pos = (char *)pending->addr; + struct drgn_error *cu_err = + index_cu_second_pass(ns, &buffer); + if (cu_err) { + #pragma omp critical(drgn_index_namespace_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + } + if (err) { + ns->saved_err = err; + return drgn_error_copy(ns->saved_err); + } + ns->pending_dies.size = 0; + drgn_dwarf_index_pending_die_vector_shrink_to_fit(&ns->pending_dies); + return err; +} /** - * Get the name of a DWARF tag. + * Iterator over DWARF debugging information. * - * @return Static string if the tag is known or @p buf if the tag is unknown - * (populated with a description). + * An iterator is initialized with @ref drgn_dwarf_index_iterator_init(). It is + * advanced with @ref drgn_dwarf_index_iterator_next(). */ -static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) +struct drgn_dwarf_index_iterator { + const uint64_t *tags; + size_t num_tags; + struct drgn_dwarf_index_shard *shard; + uint32_t index; +}; + +/** + * Create an iterator over DIEs in a DWARF index namespace. + * + * @param[out] it DWARF index iterator to initialize. + * @param[in] ns Namespace DWARF index. + * @param[in] name Name of DIE to search for. + * @param[in] name_len Length of @c name. + * @param[in] tags List of DIE tags to search for. + * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_namespace_dwarf_index *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags) { - switch (tag) { -#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; - DWARF_ALL_KNOWN_DW_TAG -#undef DWARF_ONE_KNOWN_DW_TAG - default: - sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); - return buf; + struct drgn_error *err = index_namespace(ns); + if (err) + return err; + if (ns->shards) { + struct nstring key = { name, name_len }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); + it->shard = &ns->shards[hash_pair_to_shard(hp)]; + struct drgn_dwarf_index_die_map_iterator map_it = + drgn_dwarf_index_die_map_search_hashed(&it->shard->map, + &key, hp); + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; + } else { + it->shard = NULL; + it->index = UINT32_MAX; } + it->tags = tags; + it->num_tags = num_tags; + return NULL; } -/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ -static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) +static inline bool +drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_die *die) { - return dw_tag_str(dwarf_tag(die), buf); + if (it->num_tags == 0) + return true; + for (size_t i = 0; i < it->num_tags; i++) { + if (die->tag == it->tags[i]) + return true; + } + return false; } -static struct drgn_error * -drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, - const char *message) +/** + * Get the next matching DIE from a DWARF index iterator. + * + * If matching any name, this is O(n), where n is the number of indexed DIEs. If + * matching by name, this is O(1) on average and O(n) worst case. + * + * Note that this returns the parent `DW_TAG_enumeration_type` for indexed + * `DW_TAG_enumerator` DIEs. + * + * @param[in] it DWARF index iterator. + * @return Next DIE, or @c NULL if there are no more matching DIEs. + */ +static struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) { - uintptr_t p = (uintptr_t)ptr; - int end_match = -1; - for (int i = 0; i < array_size(module->scn_data); i++) { - if (!module->scn_data[i]) - continue; - uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; - uintptr_t end = start + module->scn_data[i]->d_size; - if (start <= p) { - if (p < end) { - return drgn_error_debug_info_scn(module, i, ptr, - message); - } else if (p == end) { - end_match = i; - } - } - } - if (end_match != -1) { - /* - * The pointer doesn't lie within a section, but it does point - * to the end of a section. - */ - return drgn_error_debug_info_scn(module, end_match, ptr, - message); + while (it->index != UINT32_MAX) { + struct drgn_dwarf_index_die *die = + &it->shard->dies.data[it->index]; + it->index = die->next; + if (drgn_dwarf_index_iterator_matches_tag(it, die)) + return die; } - /* We couldn't find the section containing the pointer. */ - const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, - NULL, NULL, NULL, NULL, NULL); - return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); + return NULL; } -static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +/** + * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. + * + * @param[in] die Indexed DIE. + * @param[out] die_ret Returned DIE. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, Dwarf_Die *die_ret) { - if (address_size < 1 || address_size > 8) { - return drgn_error_format(DRGN_ERROR_OTHER, - "unsupported address size %" PRIu8, - address_size); + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(die->module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + uintptr_t start = + (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; + size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die->addr >= start && die->addr < start + size) { + if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); } return NULL; } @@ -173,8 +3100,8 @@ drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, struct drgn_error *err; struct drgn_dwarf_index_iterator it; const uint64_t tag = DW_TAG_subprogram; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, - "main", strlen("main"), &tag, 1); + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, "main", + strlen("main"), &tag, 1); if (err) return err; struct drgn_dwarf_index_die *index_die; @@ -1000,8 +3927,6 @@ drgn_dwarf_location(struct drgn_debug_info_module *module, */ static const int MAX_DWARF_EXPR_OPS = 10000; -DEFINE_VECTOR(uint64_vector, uint64_t) - /* A DWARF expression and the context it is being evaluated in. */ struct drgn_dwarf_expression_context { struct binary_buffer bb; @@ -2479,8 +5404,8 @@ drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, struct drgn_error *err; struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, - name, strlen(name), &tag, 1); + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, name, + strlen(name), &tag, 1); if (err) return err; @@ -3531,9 +6456,8 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, return drgn_error_libdw(); if (declaration) { uintptr_t die_addr; - if (drgn_dwarf_index_find_definition(&dbinfo->dwarf.index, - (uintptr_t)die->addr, - &module, &die_addr)) { + if (drgn_dwarf_find_definition(dbinfo, (uintptr_t)die->addr, + &module, &die_addr)) { Dwarf_Addr bias; Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); @@ -3723,8 +6647,8 @@ struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, } struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.index.global, - name, name_len, &tag, 1); + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, name, + name_len, &tag, 1); if (err) return err; struct drgn_dwarf_index_die *index_die; @@ -3758,7 +6682,7 @@ drgn_debug_info_find_object(const char *name, size_t name_len, struct drgn_error *err; struct drgn_debug_info *dbinfo = arg; - struct drgn_dwarf_index_namespace *ns = &dbinfo->dwarf.index.global; + struct drgn_namespace_dwarf_index *ns = &dbinfo->dwarf.global; if (name_len >= 2 && memcmp(name, "::", 2) == 0) { /* Explicit global namespace. */ name_len -= 2; diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h index e1a3c1fa6..24f480f44 100644 --- a/libdrgn/dwarf_info.h +++ b/libdrgn/dwarf_info.h @@ -22,8 +22,8 @@ #include "cfi.h" #include "drgn.h" -#include "dwarf_index.h" #include "hash_table.h" +#include "vector.h" struct drgn_debug_info; struct drgn_debug_info_module; @@ -59,6 +59,50 @@ struct drgn_dwarf_module_info { void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module); +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, + struct drgn_dwarf_index_pending_die) + +/** + * Index of DWARF information for a namespace by entity name. + * + * This effectively maps a name to a list of DIEs with that name in a namespace. + * DIEs with the same name and tag and declared in the same file are + * deduplicated. + */ +struct drgn_namespace_dwarf_index { + /** + * Index shards. + * + * Indexing is parallelized, so this is sharded to reduce lock + * contention. + */ + struct drgn_dwarf_index_shard *shards; + /** Debugging information cache that owns this index. */ + struct drgn_debug_info *dbinfo; + /** DIEs we have not indexed yet. */ + struct drgn_dwarf_index_pending_die_vector pending_dies; + /** Saved error from a previous index. */ + struct drgn_error *saved_err; +}; + +/** DIE with a `DW_AT_specification` attribute. */ +struct drgn_dwarf_specification { + /** + * Address of non-defining declaration DIE referenced by + * `DW_AT_specification`. + */ + uintptr_t declaration; + /** Module containing DIE. */ + struct drgn_debug_info_module *module; + /** Address of DIE. */ + uintptr_t addr; +}; + +DEFINE_HASH_TABLE_TYPE(drgn_dwarf_specification_map, + struct drgn_dwarf_specification) + +DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) + /** Cached type in a @ref drgn_debug_info. */ struct drgn_dwarf_type { struct drgn_type *type; @@ -76,8 +120,20 @@ DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type) /** DWARF debugging information for a program/@ref drgn_debug_info. */ struct drgn_dwarf_info { - /** Index of DWARF debugging information. */ - struct drgn_dwarf_index index; + /** Global namespace index. */ + struct drgn_namespace_dwarf_index global; + /** + * Map from address of DIE referenced by DW_AT_specification to DIE that + * references it. This is used to resolve DIEs with DW_AT_declaration to + * their definition. + * + * This is populated while indexing new DWARF information. Unlike the + * name index, it is not sharded because there typically aren't enough + * of these in a program to cause contention. + */ + struct drgn_dwarf_specification_map specifications; + /** Indexed compilation units. */ + struct drgn_dwarf_index_cu_vector index_cus; /** * Cache of parsed types. @@ -101,6 +157,44 @@ struct drgn_dwarf_info { void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo); void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo); +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, + struct drgn_dwarf_index_pending_cu) + +/** + * State tracked while indexing new DWARF information in a @ref drgn_dwarf_info. + */ +struct drgn_dwarf_index_state { + struct drgn_debug_info *dbinfo; + /** Per-thread arrays of CUs to be indexed. */ + struct drgn_dwarf_index_pending_cu_vector *cus; + size_t max_threads; +}; + +/** + * Initialize state for indexing new DWARF information. + * + * @return @c true on success, @c false on failure to allocate memory. + */ +bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, + struct drgn_debug_info *dbinfo); + +/** Deinitialize state for indexing new DWARF information. */ +void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state); + +/** Read a @ref drgn_debug_info_module to index its DWARF information. */ +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module); + +/** + * Index new DWARF information. + * + * This should be called once all modules have been read with @ref + * drgn_dwarf_index_read_module() to finish indexing those modules. + */ +struct drgn_error * +drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state); + /** * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing * a given program counter. From 4808ef72ee5619c7642880157466fd9de8b31c1a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 14:19:32 -0800 Subject: [PATCH 087/139] libdrgn: debug_info: get address range of reported ET_EXEC files When explicitly reporting a debugging information file for a userspace program, userspace_report_debug_info() currently always reports it with a load address range of [0, 0) (i.e., not actually loaded into the program). This is because for ET_DYN and ET_REL files, we have to determine the address range by inspecting the core dump or program state, which is a bit involved. However, ET_EXEC is much easier: we can get the address range from the segment headers. In fact, we already implemented this for vmlinux files, so we can reuse that with a modification to make it more permissive. ET_CORE debug info files don't make much sense, but libdwfl seems to treat a reported ET_CORE file the same as ET_EXEC (see dwfl_report_elf()), so we do, too. Unfortunately, most executables on modern Linux distributions are ET_DYN, but this will at least make testing easier. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 63 ++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 99ea2c0d3..539610231 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -581,28 +581,52 @@ static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, return DWARF_CB_ABORT; } +static struct drgn_error * +userspace_report_elf_file(struct drgn_debug_info_load_state *load, + const char *path) +{ + struct drgn_error *err; + + int fd; + Elf *elf; + err = open_elf_file(path, &fd, &elf); + if (err) + goto err; + + GElf_Ehdr ehdr_mem, *ehdr; + ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) { + err = drgn_error_libelf(); + goto err_close; + } + /* + * We haven't implemented a way to get the load address for dynamically + * loaded or relocatable files, so for now we report those as unloaded. + */ + uint64_t start = 0, end = 0; + if (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_CORE) { + err = elf_address_range(elf, 0, &start, &end); + if (err) + goto err_close; + } + + return drgn_debug_info_report_elf(load, path, fd, elf, start, end, NULL, + NULL); + +err_close: + elf_end(elf); + close(fd); +err: + return drgn_debug_info_report_error(load, path, NULL, err); +} + static struct drgn_error * userspace_report_debug_info(struct drgn_debug_info_load_state *load) { struct drgn_error *err; for (size_t i = 0; i < load->num_paths; i++) { - int fd; - Elf *elf; - err = open_elf_file(load->paths[i], &fd, &elf); - if (err) { - err = drgn_debug_info_report_error(load, load->paths[i], - NULL, err); - if (err) - return err; - continue; - } - /* - * We haven't implemented a way to get the load address for - * anything reported here, so for now we report it as unloaded. - */ - err = drgn_debug_info_report_elf(load, load->paths[i], fd, elf, - 0, 0, NULL, NULL); + err = userspace_report_elf_file(load, load->paths[i]); if (err) return err; } @@ -1338,10 +1362,9 @@ struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, end = segment_end; } } - if (start >= end) { - return drgn_error_create(DRGN_ERROR_OTHER, - "ELF file has no loadable segments"); - } + /* There were no loadable segments. */ + if (start >= end) + start = end = 0; *start_ret = start; *end_ret = end; return NULL; From 681d8453cebbab498dab4ab095abc1e0824a6d18 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 10:56:45 -0800 Subject: [PATCH 088/139] tests: elfwriter: set e_phoff to zero if there are no segments readelf warns that a non-zero e_phoff with a zero e_phnum is invalid: Warning: possibly corrupt ELF header - it has a non-zero program header offset, but no program headers Signed-off-by: Omar Sandoval --- tests/elfwriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index b4393f54d..ad8682af3 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -93,7 +93,7 @@ def create_elf_file( e_machine, 1, # e_version = EV_CURRENT 0, # e_entry - phdr_offset, # e_phoff + phdr_offset if phnum else 0, # e_phoff shdr_offset, # e_shoff 0, # e_flags ehdr_struct.size, # e_ehsize From cb8bf339c8ba3f1105e2eeffb6a55c6d4cc6b8db Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 14:54:31 -0800 Subject: [PATCH 089/139] tests: elfwriter: don't add sections if there aren't any Only add SHT_NULL and .shstrtab sections if there are other sections to be added. This allows us to create core dumps with no sections, like core dumps on Linux. Signed-off-by: Omar Sandoval --- tests/elfwriter.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index ad8682af3..353c0afbf 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -52,19 +52,21 @@ def create_elf_file( phdr_struct = struct.Struct(endian + "8I") e_machine = 3 if little_endian else 8 # EM_386 or EM_MIPS - shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1)) - tmp = [shstrtab] - tmp.extend(sections) - sections = tmp - shnum = 1 # One for the SHT_NULL section. + shnum = 0 phnum = 0 + shstrtab = bytearray(1) for section in sections: if section.name is not None: - shstrtab.data.extend(section.name.encode()) - shstrtab.data.append(0) + shstrtab.extend(section.name.encode()) + shstrtab.append(0) shnum += 1 if section.p_type is not None: phnum += 1 + if shnum > 0: + shnum += 2 # One for the SHT_NULL section, one for .shstrtab. + shstrtab.extend(b".shstrtab\0") + sections = list(sections) + sections.append(ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=shstrtab)) shdr_offset = ehdr_struct.size phdr_offset = shdr_offset + shdr_struct.size * shnum @@ -94,14 +96,14 @@ def create_elf_file( 1, # e_version = EV_CURRENT 0, # e_entry phdr_offset if phnum else 0, # e_phoff - shdr_offset, # e_shoff + shdr_offset if shnum else 0, # e_shoff 0, # e_flags ehdr_struct.size, # e_ehsize phdr_struct.size, # e_phentsize phnum, # e_phnum - shdr_struct.size, # e_shentsize, - shnum, # e_shnum, - 1, # e_shstrndx + shdr_struct.size, # e_shentsize + shnum, # e_shnum + shnum - 1 if shnum else 0, # e_shstrndx ) shdr_offset += shdr_struct.size @@ -113,7 +115,7 @@ def create_elf_file( shdr_struct.pack_into( buf, shdr_offset, - shstrtab.data.index(section.name.encode()), # sh_name + shstrtab.index(section.name.encode()), # sh_name section.sh_type, # sh_type 0, # sh_flags section.vaddr, # sh_addr From c84d7e8c15b6c26dee034c468dcdeec2438a321c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 16:59:09 -0800 Subject: [PATCH 090/139] tests: generate ELF constants from elf.h Generalize generate_dwarf_constants.py for ELF and replace tests/elf.py with the generated version. Signed-off-by: Omar Sandoval --- scripts/generate_dwarf_constants.py | 70 ----------- scripts/generate_test_constants.py | 104 +++++++++++++++++ tests/elf.py | 174 +++++++++++++++++++++++----- 3 files changed, 246 insertions(+), 102 deletions(-) delete mode 100755 scripts/generate_dwarf_constants.py create mode 100755 scripts/generate_test_constants.py diff --git a/scripts/generate_dwarf_constants.py b/scripts/generate_dwarf_constants.py deleted file mode 100755 index baf224102..000000000 --- a/scripts/generate_dwarf_constants.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. -# SPDX-License-Identifier: GPL-3.0-or-later - -import keyword -import re - -prefixes = [ - "DW_AT", - "DW_ATE", - "DW_CHILDREN", - "DW_END", - "DW_FORM", - "DW_LANG", - "DW_LNE", - "DW_LNS", - "DW_OP", - "DW_TAG", -] - -if __name__ == "__main__": - with open("libdrgn/include/dwarf.h", "r") as f: - dwarf_h = f.read() - dwarf_h = re.sub(r"/\*.*?\*/", "", dwarf_h, flags=re.DOTALL) - dwarf_h = re.sub(r"\\\n", "", dwarf_h) - matches = re.findall( - r"^\s*(" + "|".join(prefixes) + r")_(\w+)\s*=\s*(0x[0-9a-fA-F]+|[0-9]+)", - dwarf_h, - re.MULTILINE, - ) - - enums = {} - for enum, name, value in matches: - try: - enums[enum].append((name, int(value, 0))) - except KeyError: - enums[enum] = [(name, int(value, 0))] - - print( - """\ -# Automatically generated from dwarf.h - -import enum -from typing import Text - -""" - ) - first = True - for enum in prefixes: - assert enums[enum] - if not first: - print() - print() - first = False - print(f"class {enum}(enum.IntEnum):") - for name, value in enums[enum]: - if keyword.iskeyword(name): - name += "_" - print(f" {name} = 0x{value:X}", end="") - if name == "name": - print(" # type: ignore") - else: - print() - print() - print(" @classmethod") - print(" def str(cls, value: int) -> Text:") - print(" try:") - print(f' return f"{enum}_{{cls(value).name}}"') - print(" except ValueError:") - print(" return hex(value)") diff --git a/scripts/generate_test_constants.py b/scripts/generate_test_constants.py new file mode 100755 index 000000000..f970f066e --- /dev/null +++ b/scripts/generate_test_constants.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse +import keyword +from pathlib import Path +import re + +ENUMS = { + "elf": ( + "ET", + "PT", + "SHT", + ), + "dwarf": ( + "DW_AT", + "DW_ATE", + "DW_CHILDREN", + "DW_END", + "DW_FORM", + "DW_LANG", + "DW_LNE", + "DW_LNS", + "DW_OP", + "DW_TAG", + ), +} + + +VALUE_REGEX = r"(?P0x[0-9a-fA-F]+|[0-9]+)" +REGEXES = { + "elf": r"^\s*#\s*define\s+(?P" + + "|".join(ENUMS["elf"]) + + r")_(?P\w+)\s+" + + VALUE_REGEX, + "dwarf": r"^\s*(?P" + + "|".join(ENUMS["dwarf"]) + + r")_(?P\w+)\s*=\s*" + + VALUE_REGEX, +} + + +def read_header(name: str) -> str: + contents = (Path("libdrgn/include") / name).read_text() + contents = re.sub(r"/\*.*?\*/", "", contents, flags=re.DOTALL) + contents = re.sub(r"\\\n", "", contents) + return contents + + +def generate_constants(file: str) -> None: + contents = read_header(file + ".h") + + enums = {} + for match in re.finditer(REGEXES[file], contents, re.MULTILINE): + enum = match.group("enum") + name = match.group("name") + value = int(match.group("value"), 0) + try: + enums[enum].append((name, value)) + except KeyError: + enums[enum] = [(name, value)] + + print( + f"""\ +# Automatically generated from {file}.h + +import enum +from typing import Text + +""" + ) + first = True + for enum in ENUMS[file]: + assert enums[enum] + if not first: + print() + print() + first = False + print(f"class {enum}(enum.IntEnum):") + for name, value in enums[enum]: + if keyword.iskeyword(name): + name += "_" + print(f" {name} = 0x{value:X}", end="") + if name == "name": + print(" # type: ignore") + else: + print() + print() + print(" @classmethod") + print(" def str(cls, value: int) -> Text:") + print(" try:") + print(f' return f"{enum}_{{cls(value).name}}"') + print(" except ValueError:") + print(" return hex(value)") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="generate constants for Python tests from header file" + ) + parser.add_argument("file", choices=list(ENUMS)) + args = parser.parse_args() + generate_constants(args.file) diff --git a/tests/elf.py b/tests/elf.py index 4a502bdd3..458f05b33 100644 --- a/tests/elf.py +++ b/tests/elf.py @@ -1,43 +1,153 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# SPDX-License-Identifier: GPL-3.0-or-later +# Automatically generated from elf.h import enum +from typing import Text class ET(enum.IntEnum): - NONE = 0 - REL = 1 - EXEC = 2 - DYN = 3 - CORE = 4 + NONE = 0x0 + REL = 0x1 + EXEC = 0x2 + DYN = 0x3 + CORE = 0x4 + NUM = 0x5 + LOOS = 0xFE00 + HIOS = 0xFEFF + LOPROC = 0xFF00 + HIPROC = 0xFFFF + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"ET_{cls(value).name}" + except ValueError: + return hex(value) class PT(enum.IntEnum): - NULL = 0 - LOAD = 1 - DYNAMIC = 2 - INTERP = 3 - NOTE = 4 - SHLIB = 5 - PHDR = 6 - TLS = 7 + NULL = 0x0 + LOAD = 0x1 + DYNAMIC = 0x2 + INTERP = 0x3 + NOTE = 0x4 + SHLIB = 0x5 + PHDR = 0x6 + TLS = 0x7 + NUM = 0x8 + LOOS = 0x60000000 + GNU_EH_FRAME = 0x6474E550 + GNU_STACK = 0x6474E551 + GNU_RELRO = 0x6474E552 + GNU_PROPERTY = 0x6474E553 + LOSUNW = 0x6FFFFFFA + SUNWBSS = 0x6FFFFFFA + SUNWSTACK = 0x6FFFFFFB + HISUNW = 0x6FFFFFFF + HIOS = 0x6FFFFFFF + LOPROC = 0x70000000 + HIPROC = 0x7FFFFFFF + MIPS_REGINFO = 0x70000000 + MIPS_RTPROC = 0x70000001 + MIPS_OPTIONS = 0x70000002 + MIPS_ABIFLAGS = 0x70000003 + PARISC_ARCHEXT = 0x70000000 + PARISC_UNWIND = 0x70000001 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"PT_{cls(value).name}" + except ValueError: + return hex(value) class SHT(enum.IntEnum): - NULL = 0 - PROGBITS = 1 - SYMTAB = 2 - STRTAB = 3 - RELA = 4 - HASH = 5 - DYNAMIC = 6 - NOTE = 7 - NOBITS = 8 - REL = 9 - SHLIB = 10 - DYNSYM = 11 - INIT_ARRAY = 14 - FINI_ARRAY = 15 - PREINIT_ARRAY = 16 - GROUP = 17 - SYMTAB_SHNDX = 18 + NULL = 0x0 + PROGBITS = 0x1 + SYMTAB = 0x2 + STRTAB = 0x3 + RELA = 0x4 + HASH = 0x5 + DYNAMIC = 0x6 + NOTE = 0x7 + NOBITS = 0x8 + REL = 0x9 + SHLIB = 0xA + DYNSYM = 0xB + INIT_ARRAY = 0xE + FINI_ARRAY = 0xF + PREINIT_ARRAY = 0x10 + GROUP = 0x11 + SYMTAB_SHNDX = 0x12 + NUM = 0x13 + LOOS = 0x60000000 + GNU_ATTRIBUTES = 0x6FFFFFF5 + GNU_HASH = 0x6FFFFFF6 + GNU_LIBLIST = 0x6FFFFFF7 + CHECKSUM = 0x6FFFFFF8 + LOSUNW = 0x6FFFFFFA + SUNW_move = 0x6FFFFFFA + SUNW_COMDAT = 0x6FFFFFFB + SUNW_syminfo = 0x6FFFFFFC + GNU_verdef = 0x6FFFFFFD + GNU_verneed = 0x6FFFFFFE + GNU_versym = 0x6FFFFFFF + HISUNW = 0x6FFFFFFF + HIOS = 0x6FFFFFFF + LOPROC = 0x70000000 + HIPROC = 0x7FFFFFFF + LOUSER = 0x80000000 + HIUSER = 0x8FFFFFFF + MIPS_LIBLIST = 0x70000000 + MIPS_MSYM = 0x70000001 + MIPS_CONFLICT = 0x70000002 + MIPS_GPTAB = 0x70000003 + MIPS_UCODE = 0x70000004 + MIPS_DEBUG = 0x70000005 + MIPS_REGINFO = 0x70000006 + MIPS_PACKAGE = 0x70000007 + MIPS_PACKSYM = 0x70000008 + MIPS_RELD = 0x70000009 + MIPS_IFACE = 0x7000000B + MIPS_CONTENT = 0x7000000C + MIPS_OPTIONS = 0x7000000D + MIPS_SHDR = 0x70000010 + MIPS_FDESC = 0x70000011 + MIPS_EXTSYM = 0x70000012 + MIPS_DENSE = 0x70000013 + MIPS_PDESC = 0x70000014 + MIPS_LOCSYM = 0x70000015 + MIPS_AUXSYM = 0x70000016 + MIPS_OPTSYM = 0x70000017 + MIPS_LOCSTR = 0x70000018 + MIPS_LINE = 0x70000019 + MIPS_RFDESC = 0x7000001A + MIPS_DELTASYM = 0x7000001B + MIPS_DELTAINST = 0x7000001C + MIPS_DELTACLASS = 0x7000001D + MIPS_DWARF = 0x7000001E + MIPS_DELTADECL = 0x7000001F + MIPS_SYMBOL_LIB = 0x70000020 + MIPS_EVENTS = 0x70000021 + MIPS_TRANSLATE = 0x70000022 + MIPS_PIXIE = 0x70000023 + MIPS_XLATE = 0x70000024 + MIPS_XLATE_DEBUG = 0x70000025 + MIPS_WHIRL = 0x70000026 + MIPS_EH_REGION = 0x70000027 + MIPS_XLATE_OLD = 0x70000028 + MIPS_PDR_EXCEPTION = 0x70000029 + MIPS_XHASH = 0x7000002B + PARISC_EXT = 0x70000000 + PARISC_UNWIND = 0x70000001 + PARISC_DOC = 0x70000002 + ALPHA_DEBUG = 0x70000001 + ALPHA_REGINFO = 0x70000002 + X86_64_UNWIND = 0x70000001 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"SHT_{cls(value).name}" + except ValueError: + return hex(value) From 07d00b7b110ab1e248c9f04204591a2050321b71 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 16:55:42 -0800 Subject: [PATCH 091/139] tests: add tests for ELF symbols Add some scaffolding to generate ELF files with symbol tables and use it to test symbol lookups and Elf_Sym -> drgn.Symbol translation. Signed-off-by: Omar Sandoval --- scripts/generate_test_constants.py | 4 + tests/dwarfwriter.py | 21 ++++- tests/elf.py | 88 +++++++++++++++++ tests/elfwriter.py | 97 +++++++++++++++++-- tests/test_symbol.py | 145 +++++++++++++++++++++++++++++ 5 files changed, 346 insertions(+), 9 deletions(-) create mode 100644 tests/test_symbol.py diff --git a/scripts/generate_test_constants.py b/scripts/generate_test_constants.py index f970f066e..442a3e805 100755 --- a/scripts/generate_test_constants.py +++ b/scripts/generate_test_constants.py @@ -11,7 +11,11 @@ "elf": ( "ET", "PT", + "SHN", "SHT", + "STB", + "STT", + "STV", ), "dwarf": ( "DW_AT", diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index c39574d69..9b645fcba 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -203,7 +203,7 @@ def compile_file_names(die): UNIT_HEADER_TYPES = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) -def compile_dwarf( +def dwarf_sections( dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False ): if isinstance(dies, DwarfDie): @@ -238,7 +238,6 @@ def compile_dwarf( ) sections = [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), ElfSection( name=".debug_abbrev", sh_type=SHT.PROGBITS, @@ -256,5 +255,21 @@ def compile_dwarf( sections.append( ElfSection(name=".debug_types", sh_type=SHT.PROGBITS, data=debug_types) ) + return sections + - return create_elf_file(ET.EXEC, sections, little_endian=little_endian, bits=bits) +def compile_dwarf( + dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False +): + return create_elf_file( + ET.EXEC, + dwarf_sections( + dies, + little_endian=little_endian, + bits=bits, + lang=lang, + use_dw_form_indirect=use_dw_form_indirect, + ), + little_endian=little_endian, + bits=bits, + ) diff --git a/tests/elf.py b/tests/elf.py index 458f05b33..0c82c7a37 100644 --- a/tests/elf.py +++ b/tests/elf.py @@ -61,6 +61,35 @@ def str(cls, value: int) -> Text: return hex(value) +class SHN(enum.IntEnum): + UNDEF = 0x0 + LORESERVE = 0xFF00 + LOPROC = 0xFF00 + BEFORE = 0xFF00 + AFTER = 0xFF01 + HIPROC = 0xFF1F + LOOS = 0xFF20 + HIOS = 0xFF3F + ABS = 0xFFF1 + COMMON = 0xFFF2 + XINDEX = 0xFFFF + HIRESERVE = 0xFFFF + MIPS_ACOMMON = 0xFF00 + MIPS_TEXT = 0xFF01 + MIPS_DATA = 0xFF02 + MIPS_SCOMMON = 0xFF03 + MIPS_SUNDEFINED = 0xFF04 + PARISC_ANSI_COMMON = 0xFF00 + PARISC_HUGE_COMMON = 0xFF01 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"SHN_{cls(value).name}" + except ValueError: + return hex(value) + + class SHT(enum.IntEnum): NULL = 0x0 PROGBITS = 0x1 @@ -151,3 +180,62 @@ def str(cls, value: int) -> Text: return f"SHT_{cls(value).name}" except ValueError: return hex(value) + + +class STB(enum.IntEnum): + LOCAL = 0x0 + GLOBAL = 0x1 + WEAK = 0x2 + NUM = 0x3 + LOOS = 0xA + GNU_UNIQUE = 0xA + HIOS = 0xC + LOPROC = 0xD + HIPROC = 0xF + MIPS_SPLIT_COMMON = 0xD + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STB_{cls(value).name}" + except ValueError: + return hex(value) + + +class STT(enum.IntEnum): + NOTYPE = 0x0 + OBJECT = 0x1 + FUNC = 0x2 + SECTION = 0x3 + FILE = 0x4 + COMMON = 0x5 + TLS = 0x6 + NUM = 0x7 + LOOS = 0xA + GNU_IFUNC = 0xA + HIOS = 0xC + LOPROC = 0xD + HIPROC = 0xF + SPARC_REGISTER = 0xD + PARISC_MILLICODE = 0xD + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STT_{cls(value).name}" + except ValueError: + return hex(value) + + +class STV(enum.IntEnum): + DEFAULT = 0x0 + INTERNAL = 0x1 + HIDDEN = 0x2 + PROTECTED = 0x3 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STV_{cls(value).name}" + except ValueError: + return hex(value) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index 353c0afbf..55ad09389 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -2,9 +2,9 @@ # SPDX-License-Identifier: GPL-3.0-or-later import struct -from typing import Optional, Sequence +from typing import List, NamedTuple, Optional, Sequence -from tests.elf import ET, PT, SHT +from tests.elf import ET, PT, SHN, SHT, STB, STT, STV class ElfSection: @@ -18,6 +18,9 @@ def __init__( paddr: int = 0, memsz: Optional[int] = None, p_align: int = 0, + sh_link: int = 0, + sh_info: int = 0, + sh_entsize: int = 0, ): self.data = data self.name = name @@ -27,6 +30,9 @@ def __init__( self.paddr = paddr self.memsz = memsz self.p_align = p_align + self.sh_link = sh_link + self.sh_info = sh_info + self.sh_entsize = sh_entsize assert (self.name is not None) or (self.p_type is not None) assert (self.name is None) == (self.sh_type is None) @@ -36,8 +42,84 @@ def __init__( self.memsz = len(self.data) +class ElfSymbol(NamedTuple): + name: str + value: int + size: int + type: STT + binding: STB + shindex: Optional[int] = None + visibility: STV = STV.DEFAULT + + def st_info(self) -> int: + return (self.binding << 4) + (self.type & 0xF) + + +def _create_symtab( + sections: List[ElfSection], + symbols: Sequence[ElfSymbol], + little_endian: bool, + bits: int, +): + assert not any(section.name in (".symtab", ".strtab") for section in sections) + + endian = "<" if little_endian else ">" + if bits == 64: + symbol_struct = struct.Struct(endian + "IBBHQQ") + + def symbol_fields(sym: ElfSymbol): + return ( + sym.st_info(), + sym.visibility, + SHN.UNDEF if sym.shindex is None else sym.shindex, + sym.value, + sym.size, + ) + + else: + symbol_struct = struct.Struct(endian + "IIIBBH") + + def symbol_fields(sym: ElfSymbol): + return ( + sym.value, + sym.size, + sym.st_info(), + sym.visibility, + SHN.UNDEF if sym.shindex is None else sym.shindex, + ) + + symtab_data = bytearray((len(symbols) + 1) * symbol_struct.size) + strtab_data = bytearray(1) + sh_info = 1 + for i, sym in enumerate(symbols, 1): + symbol_struct.pack_into( + symtab_data, i * symbol_struct.size, len(strtab_data), *symbol_fields(sym) + ) + strtab_data.extend(sym.name.encode()) + strtab_data.append(0) + if sym.binding == STB.LOCAL: + assert sh_info == i, "local symbol after non-local symbol" + sh_info = i + 1 + + sections.append( + ElfSection( + name=".symtab", + sh_type=SHT.SYMTAB, + data=symtab_data, + sh_link=sum((1 for section in sections if section.name is not None), 2), + sh_info=sh_info, + sh_entsize=symbol_struct.size, + ) + ) + sections.append(ElfSection(name=".strtab", sh_type=SHT.STRTAB, data=strtab_data)) + + def create_elf_file( - type: ET, sections: Sequence[ElfSection], little_endian: bool = True, bits: int = 64 + type: ET, + sections: Sequence[ElfSection], + symbols: Sequence[ElfSymbol] = (), + little_endian: bool = True, + bits: int = 64, ): endian = "<" if little_endian else ">" if bits == 64: @@ -52,6 +134,9 @@ def create_elf_file( phdr_struct = struct.Struct(endian + "8I") e_machine = 3 if little_endian else 8 # EM_386 or EM_MIPS + sections = list(sections) + if symbols: + _create_symtab(sections, symbols, little_endian=little_endian, bits=bits) shnum = 0 phnum = 0 shstrtab = bytearray(1) @@ -121,10 +206,10 @@ def create_elf_file( section.vaddr, # sh_addr len(buf), # sh_offset len(section.data), # sh_size - 0, # sh_link - 0, # sh_info + section.sh_link, # sh_link + section.sh_info, # sh_info 1 if section.p_type is None else bits // 8, # sh_addralign - 0, # sh_entsize + section.sh_entsize, # sh_entsize ) shdr_offset += shdr_struct.size if section.p_type is not None: diff --git a/tests/test_symbol.py b/tests/test_symbol.py new file mode 100644 index 000000000..9d821b418 --- /dev/null +++ b/tests/test_symbol.py @@ -0,0 +1,145 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import tempfile +from typing import NamedTuple +import unittest + +from drgn import Program, SymbolBinding, SymbolKind +from tests.dwarfwriter import dwarf_sections +from tests.elf import ET, PT, SHT, STB, STT +from tests.elfwriter import ElfSection, ElfSymbol, create_elf_file + + +def create_elf_symbol_file(symbols): + # We need some DWARF data so that libdwfl will load the file. + sections = dwarf_sections(()) + # Create a section for the symbols to reference and the corresponding + # segment for address lookups. + min_address = min(symbol.value for symbol in symbols) + max_address = max(symbol.value + symbol.size for symbol in symbols) + sections.append( + ElfSection( + name=".foo", + sh_type=SHT.PROGBITS, + p_type=PT.LOAD, + vaddr=min_address, + memsz=max_address - min_address, + data=bytes(max_address - min_address), + ) + ) + symbols = [ + symbol._replace( + shindex=len(sections) if symbol.shindex is None else symbol.shindex + ) + for symbol in symbols + ] + return create_elf_file(ET.EXEC, sections, symbols) + + +def elf_symbol_program(*modules): + prog = Program() + for symbols in modules: + with tempfile.NamedTemporaryFile() as f: + f.write(create_elf_symbol_file(symbols)) + f.flush() + prog.load_debug_info([f.name]) + return prog + + +# We don't want to support creating drgn.Symbol instances yet, so use this dumb +# class for testing. +class Symbol(NamedTuple): + name: str + address: int + size: int + binding: SymbolBinding + kind: SymbolKind + + +class TestElfSymbol(unittest.TestCase): + def assert_symbol_equal(self, drgn_symbol, symbol): + self.assertEqual( + Symbol( + drgn_symbol.name, + drgn_symbol.address, + drgn_symbol.size, + drgn_symbol.binding, + drgn_symbol.kind, + ), + symbol, + ) + + def test_by_address(self): + elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL) + elf_second = ElfSymbol("second", 0xFFFF0008, 0x8, STT.OBJECT, STB.LOCAL) + first = Symbol("first", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + second = Symbol( + "second", 0xFFFF0008, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT + ) + + same_module = ((elf_first, elf_second),) + different_modules = ((elf_first,), (elf_second,)) + + for modules in same_module, different_modules: + with self.subTest(modules=len(modules)): + prog = elf_symbol_program(*modules) + self.assertRaises(LookupError, prog.symbol, 0xFFFEFFFF) + self.assert_symbol_equal(prog.symbol(0xFFFF0000), first) + self.assert_symbol_equal(prog.symbol(0xFFFF0004), first) + self.assert_symbol_equal(prog.symbol(0xFFFF0008), second) + self.assert_symbol_equal(prog.symbol(0xFFFF000C), second) + self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) + + def test_by_name(self): + elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.GLOBAL) + elf_second = ElfSymbol("second", 0xFFFF0008, 0x8, STT.OBJECT, STB.GLOBAL) + first = Symbol( + "first", 0xFFFF0000, 0x8, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + second = Symbol( + "second", 0xFFFF0008, 0x8, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + + same_module = ((elf_first, elf_second),) + different_modules = ((elf_first,), (elf_second,)) + + for modules in same_module, different_modules: + with self.subTest(modules=len(modules)): + prog = elf_symbol_program(*modules) + self.assert_symbol_equal(prog.symbol("first"), first) + self.assert_symbol_equal(prog.symbol("second"), second) + self.assertRaises(LookupError, prog.symbol, "third") + + def test_binding(self): + for elf_binding, drgn_binding in ( + (STB.LOCAL, SymbolBinding.LOCAL), + (STB.GLOBAL, SymbolBinding.GLOBAL), + (STB.WEAK, SymbolBinding.WEAK), + (STB.GNU_UNIQUE, SymbolBinding.UNIQUE), + (STB.HIPROC, SymbolBinding.UNKNOWN), + ): + with self.subTest(binding=elf_binding): + prog = elf_symbol_program( + (ElfSymbol("foo", 0xFFFF0000, 1, STT.OBJECT, elf_binding),) + ) + self.assertEqual(prog.symbol(0xFFFF0000).binding, drgn_binding) + + def test_kind(self): + for elf_type, drgn_kind in ( + (STT.NOTYPE, SymbolKind.UNKNOWN), + (STT.OBJECT, SymbolKind.OBJECT), + (STT.FUNC, SymbolKind.FUNC), + # dwfl_module_addrinfo() ignores STT_SECTION, STT_FILE, and STT_TLS + # symbols, so we don't have an easy way to test those. + # (STT.SECTION, SymbolKind.SECTION), + # (STT.FILE, SymbolKind.FILE), + (STT.COMMON, SymbolKind.COMMON), + # (STT.TLS, SymbolKind.TLS), + (STT.GNU_IFUNC, SymbolKind.IFUNC), + ): + with self.subTest(type=elf_type): + prog = elf_symbol_program( + (ElfSymbol("foo", 0xFFFF0000, 1, elf_type, STB.GLOBAL),) + ) + self.assertEqual(prog.symbol(0xFFFF0000).kind, drgn_kind) From ff40f65f0db604254b47ae502cb8e3e2e2070444 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 19 Nov 2021 17:07:23 -0800 Subject: [PATCH 092/139] libdrgn: allow symbol name lookup to get local symbols Global symbols are preferred over weak symbols, and weak symbols are preferred over other symbols. dwfl_module_addrinfo() seems to have the same preference, so document address lookups as having the same behavior. (This is actually incorrect in the case of STB_GNU_UNIQUE, as dwfl_module_addrinfo() treats anything other than STB_GLOBAL, STB_WEAK, and STB_LOCAL as having the lowest precedence, but STB_GNU_UNIQUE is so obscure that it probably doesn't matter.) Based on work from Stephen Brennan. Closes #121. Signed-off-by: Omar Sandoval --- _drgn.pyi | 17 ++++++-- libdrgn/program.c | 77 ++++++++++++++++++++-------------- tests/test_symbol.py | 98 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 140 insertions(+), 52 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 56a3fe6d5..a16ec4d2b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -192,10 +192,19 @@ class Program: # address_or_name is positional-only. def symbol(self, address_or_name: Union[IntegerLike, str]) -> Symbol: """ - Get the symbol containing the given address, or the global symbol with - the given name. - - :param address_or_name: The address or name. + Get a symbol containing the given address, or a symbol with the given + name. + + Global symbols are preferred over weak symbols, and weak symbols are + preferred over other symbols. In other words: if a matching + :attr:`SymbolBinding.GLOBAL` or :attr:`SymbolBinding.UNIQUE` symbol is + found, it is returned. Otherwise, if a matching + :attr:`SymbolBinding.WEAK` symbol is found, it is returned. Otherwise, + any matching symbol (e.g., :attr:`SymbolBinding.LOCAL`) is returned. If + there are multiple matching symbols with the same binding, one is + returned arbitrarily. + + :param address_or_name: Address or name. :raises LookupError: if no symbol contains the given address or matches the given name """ diff --git a/libdrgn/program.c b/libdrgn/program.c index 7ecf4e36d..6c0530cd3 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1105,8 +1105,9 @@ drgn_program_find_symbol_by_address(struct drgn_program *prog, uint64_t address, struct find_symbol_by_name_arg { const char *name; - struct drgn_symbol **ret; - struct drgn_error *err; + GElf_Sym sym; + GElf_Addr addr; + bool found; bool bad_symtabs; }; @@ -1115,33 +1116,43 @@ static int find_symbol_by_name_cb(Dwfl_Module *dwfl_module, void **userdatap, void *cb_arg) { struct find_symbol_by_name_arg *arg = cb_arg; - int symtab_len, i; - - symtab_len = dwfl_module_getsymtab(dwfl_module); - i = dwfl_module_getsymtab_first_global(dwfl_module); - if (symtab_len == -1 || i == -1) { + int symtab_len = dwfl_module_getsymtab(dwfl_module); + if (symtab_len == -1) { arg->bad_symtabs = true; return DWARF_CB_OK; } - for (; i < symtab_len; i++) { - GElf_Sym elf_sym; - GElf_Addr elf_addr; - const char *name; - - name = dwfl_module_getsym_info(dwfl_module, i, &elf_sym, - &elf_addr, NULL, NULL, NULL); + /* + * Global symbols are after local symbols, so by iterating backwards we + * might find a global symbol faster. Ignore the zeroth null symbol. + */ + for (int i = symtab_len - 1; i > 0; i--) { + GElf_Sym sym; + GElf_Addr addr; + const char *name = dwfl_module_getsym_info(dwfl_module, i, &sym, + &addr, NULL, NULL, + NULL); if (name && strcmp(arg->name, name) == 0) { - struct drgn_symbol *sym; - - sym = malloc(sizeof(*sym)); - if (sym) { - drgn_symbol_from_elf(name, elf_addr, &elf_sym, - sym); - *arg->ret = sym; - } else { - arg->err = &drgn_enomem; + /* + * The order of precedence is + * GLOBAL = GNU_UNIQUE > WEAK > LOCAL = everything else + * + * If we found a global or unique symbol, return it + * immediately. If we found a weak symbol, then save it, + * which may overwrite a previously found weak or local + * symbol. Otherwise, save the symbol only if we haven't + * found another symbol. + */ + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || + GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE || + GELF_ST_BIND(sym.st_info) == STB_WEAK || + !arg->found) { + arg->sym = sym; + arg->addr = addr; + arg->found = true; } - return DWARF_CB_ABORT; + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || + GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE) + return DWARF_CB_ABORT; } } return DWARF_CB_OK; @@ -1153,13 +1164,19 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, { struct find_symbol_by_name_arg arg = { .name = name, - .ret = ret, }; - - if (prog->dbinfo && - dwfl_getmodules(prog->dbinfo->dwfl, find_symbol_by_name_cb, &arg, - 0)) - return arg.err; + if (prog->dbinfo) { + dwfl_getmodules(prog->dbinfo->dwfl, find_symbol_by_name_cb, + &arg, 0); + if (arg.found) { + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return &drgn_enomem; + drgn_symbol_from_elf(name, arg.addr, &arg.sym, sym); + *ret = sym; + return NULL; + } + } return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find symbol with name '%s'%s", name, arg.bad_symtabs ? diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 9d821b418..f893586ff 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -91,6 +91,30 @@ def test_by_address(self): self.assert_symbol_equal(prog.symbol(0xFFFF000C), second) self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) + def test_by_address_precedence(self): + precedence = (STB.GLOBAL, STB.WEAK, STB.LOCAL) + + def assert_find_higher(*modules): + self.assertEqual( + elf_symbol_program(*modules).symbol(0xFFFF0000).name, "foo" + ) + + for i in range(len(precedence) - 1): + higher_binding = precedence[i] + for j in range(i + 1, len(precedence)): + lower_binding = precedence[j] + with self.subTest(higher=higher_binding, lower=lower_binding): + higher = ElfSymbol( + "foo", 0xFFFF0000, 0x8, STT.OBJECT, higher_binding + ) + lower = ElfSymbol("bar", 0xFFFF0000, 0x8, STT.OBJECT, lower_binding) + # Local symbols must be before global symbols. + if lower_binding != STB.LOCAL: + with self.subTest("higher before lower"): + assert_find_higher((higher, lower)) + with self.subTest("lower before higher"): + assert_find_higher((lower, higher)) + def test_by_name(self): elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.GLOBAL) elf_second = ElfSymbol("second", 0xFFFF0008, 0x8, STT.OBJECT, STB.GLOBAL) @@ -111,35 +135,73 @@ def test_by_name(self): self.assert_symbol_equal(prog.symbol("second"), second) self.assertRaises(LookupError, prog.symbol, "third") + def test_by_name_precedence(self): + precedence = ( + (STB.GLOBAL, STB.GNU_UNIQUE), + (STB.WEAK,), + (STB.LOCAL, STB.HIPROC), + ) + + expected = 0xFFFF0008 + + def assert_find_higher(*modules): + self.assertEqual( + elf_symbol_program(*modules).symbol("foo").address, expected + ) + + for i in range(len(precedence) - 1): + for higher_binding in precedence[i]: + for j in range(i + 1, len(precedence)): + for lower_binding in precedence[j]: + with self.subTest(higher=higher_binding, lower=lower_binding): + higher = ElfSymbol( + "foo", expected, 0x8, STT.OBJECT, higher_binding + ) + lower = ElfSymbol( + "foo", expected - 0x8, 0x8, STT.OBJECT, lower_binding + ) + # Local symbols must be before global symbols. + if lower_binding not in precedence[-1]: + with self.subTest("same module, higher before lower"): + assert_find_higher((higher, lower)) + with self.subTest("same module, lower before higher"): + assert_find_higher((lower, higher)) + with self.subTest("different modules, higher before lower"): + assert_find_higher((higher,), (lower,)) + with self.subTest("different modules, lower before higher"): + assert_find_higher((lower,), (higher,)) + def test_binding(self): - for elf_binding, drgn_binding in ( - (STB.LOCAL, SymbolBinding.LOCAL), - (STB.GLOBAL, SymbolBinding.GLOBAL), - (STB.WEAK, SymbolBinding.WEAK), - (STB.GNU_UNIQUE, SymbolBinding.UNIQUE), - (STB.HIPROC, SymbolBinding.UNKNOWN), - ): - with self.subTest(binding=elf_binding): - prog = elf_symbol_program( - (ElfSymbol("foo", 0xFFFF0000, 1, STT.OBJECT, elf_binding),) - ) - self.assertEqual(prog.symbol(0xFFFF0000).binding, drgn_binding) + for by in "name", "address": + for elf_binding, drgn_binding in ( + (STB.LOCAL, SymbolBinding.LOCAL), + (STB.GLOBAL, SymbolBinding.GLOBAL), + (STB.WEAK, SymbolBinding.WEAK), + (STB.GNU_UNIQUE, SymbolBinding.UNIQUE), + (STB.HIPROC, SymbolBinding.UNKNOWN), + ): + with self.subTest(by=by, binding=elf_binding): + prog = elf_symbol_program( + (ElfSymbol("foo", 0xFFFF0000, 1, STT.OBJECT, elf_binding),) + ) + self.assertEqual( + prog.symbol("foo" if by == "name" else 0xFFFF0000).binding, + drgn_binding, + ) def test_kind(self): for elf_type, drgn_kind in ( (STT.NOTYPE, SymbolKind.UNKNOWN), (STT.OBJECT, SymbolKind.OBJECT), (STT.FUNC, SymbolKind.FUNC), - # dwfl_module_addrinfo() ignores STT_SECTION, STT_FILE, and STT_TLS - # symbols, so we don't have an easy way to test those. - # (STT.SECTION, SymbolKind.SECTION), - # (STT.FILE, SymbolKind.FILE), + (STT.SECTION, SymbolKind.SECTION), + (STT.FILE, SymbolKind.FILE), (STT.COMMON, SymbolKind.COMMON), - # (STT.TLS, SymbolKind.TLS), + (STT.TLS, SymbolKind.TLS), (STT.GNU_IFUNC, SymbolKind.IFUNC), ): with self.subTest(type=elf_type): prog = elf_symbol_program( (ElfSymbol("foo", 0xFFFF0000, 1, elf_type, STB.GLOBAL),) ) - self.assertEqual(prog.symbol(0xFFFF0000).kind, drgn_kind) + self.assertEqual(prog.symbol("foo").kind, drgn_kind) From cdee38af7a6fd08aaa319237d35df1949b7f8db7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 21 Nov 2021 14:40:06 -0800 Subject: [PATCH 093/139] tests: use different symbol for kernel module debug info test Linux kernel commit 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer") removed the loop_register_transfer function. We only used that symbol because it and loop_unregister_transfer were the only global symbols in the loop module. Now that we can get local symbols by name, we can use the "lo_fops" symbol, which is unlikely to be removed or renamed. Signed-off-by: Omar Sandoval --- tests/helpers/linux/test_debug_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/helpers/linux/test_debug_info.py b/tests/helpers/linux/test_debug_info.py index 94dc53b81..3a39ebaa4 100644 --- a/tests/helpers/linux/test_debug_info.py +++ b/tests/helpers/linux/test_debug_info.py @@ -10,7 +10,7 @@ class TestModuleDebugInfo(LinuxHelperTestCase): # Arbitrary symbol that we can use to check that the module debug info was # loaded. - SYMBOL = "loop_register_transfer" + SYMBOL = "lo_fops" def setUp(self): super().setUp() From 93dc02a271a00dafb990f54dc996d34a068d8dda Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 21 Nov 2021 14:52:46 -0800 Subject: [PATCH 094/139] setup.py: add 5.16 to vmtest kernels Signed-off-by: Omar Sandoval --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d6fbfee10..857fb9150 100755 --- a/setup.py +++ b/setup.py @@ -126,6 +126,7 @@ class test(Command): description = "run unit tests after in-place build" KERNELS = [ + "5.16", "5.15", "5.14", "5.13", From c0d8709b450f42bd6ee07bb31aaad207016eebb9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 21 Nov 2021 15:59:44 -0800 Subject: [PATCH 095/139] Update copyright headers to Meta Signed-off-by: Omar Sandoval --- README.rst | 2 +- _drgn.pyi | 2 +- docs/exts/drgndoc/docstrings.py | 2 +- docs/exts/drgndoc/ext.py | 2 +- docs/exts/drgndoc/format.py | 2 +- docs/exts/drgndoc/namespace.py | 2 +- docs/exts/drgndoc/parse.py | 2 +- docs/exts/drgndoc/util.py | 2 +- docs/exts/drgndoc/visitor.py | 2 +- drgn/__init__.py | 2 +- drgn/__main__.py | 2 +- drgn/helpers/__init__.py | 2 +- drgn/helpers/linux/__init__.py | 2 +- drgn/helpers/linux/bitops.py | 2 +- drgn/helpers/linux/block.py | 2 +- drgn/helpers/linux/boot.py | 2 +- drgn/helpers/linux/bpf.py | 2 +- drgn/helpers/linux/cgroup.py | 2 +- drgn/helpers/linux/cpumask.py | 2 +- drgn/helpers/linux/device.py | 2 +- drgn/helpers/linux/fs.py | 2 +- drgn/helpers/linux/idr.py | 2 +- drgn/helpers/linux/kconfig.py | 2 +- drgn/helpers/linux/kernfs.py | 2 +- drgn/helpers/linux/list.py | 2 +- drgn/helpers/linux/list_nulls.py | 2 +- drgn/helpers/linux/mm.py | 2 +- drgn/helpers/linux/net.py | 2 +- drgn/helpers/linux/percpu.py | 2 +- drgn/helpers/linux/pid.py | 2 +- drgn/helpers/linux/radixtree.py | 2 +- drgn/helpers/linux/rbtree.py | 2 +- drgn/helpers/linux/sched.py | 2 +- drgn/helpers/linux/tcp.py | 2 +- drgn/helpers/linux/user.py | 2 +- drgn/internal/__init__.py | 2 +- drgn/internal/cli.py | 2 +- drgn/internal/rlcompleter.py | 2 +- examples/linux/cgroup.py | 2 +- examples/linux/fs_inodes.py | 2 +- examples/linux/lsmod.py | 2 +- examples/linux/ps.py | 2 +- examples/linux/tcp_sock.py | 2 +- libdrgn/Makefile.am | 2 +- libdrgn/arch_register_layout.h | 2 +- libdrgn/arch_x86_64.c | 2 +- libdrgn/arch_x86_64.defs | 2 +- libdrgn/array.h | 2 +- libdrgn/binary_buffer.c | 2 +- libdrgn/binary_buffer.h | 2 +- libdrgn/binary_search_tree.h | 2 +- libdrgn/bitops.h | 2 +- libdrgn/build-aux/gen_arch.awk | 2 +- libdrgn/build-aux/gen_constants.py | 2 +- libdrgn/cfi.c | 2 +- libdrgn/cfi.h | 2 +- libdrgn/cityhash.h | 2 +- libdrgn/configure.ac | 2 +- libdrgn/debug_info.c | 2 +- libdrgn/debug_info.h | 2 +- libdrgn/drgn.h.in | 2 +- libdrgn/dwarf_info.c | 2 +- libdrgn/dwarf_info.h | 2 +- libdrgn/error.c | 2 +- libdrgn/error.h | 2 +- libdrgn/hash_table.c | 2 +- libdrgn/hash_table.h | 2 +- libdrgn/helpers.h | 2 +- libdrgn/language.c | 2 +- libdrgn/language.h | 2 +- libdrgn/language_c.c | 2 +- libdrgn/lazy_object.c | 2 +- libdrgn/lazy_object.h | 2 +- libdrgn/lexer.c | 2 +- libdrgn/lexer.h | 2 +- libdrgn/linux_kernel.c | 2 +- libdrgn/linux_kernel.h | 2 +- libdrgn/linux_kernel_helpers.c | 2 +- libdrgn/memory_reader.c | 2 +- libdrgn/memory_reader.h | 2 +- libdrgn/minmax.h | 2 +- libdrgn/nstring.h | 2 +- libdrgn/object.c | 2 +- libdrgn/object.h | 2 +- libdrgn/object_index.c | 2 +- libdrgn/object_index.h | 2 +- libdrgn/orc.h | 2 +- libdrgn/orc_info.c | 2 +- libdrgn/orc_info.h | 2 +- libdrgn/path.c | 2 +- libdrgn/path.h | 2 +- libdrgn/platform.c | 2 +- libdrgn/platform.h | 2 +- libdrgn/pp.h | 2 +- libdrgn/program.c | 2 +- libdrgn/program.h | 2 +- libdrgn/python/drgnpy.h | 2 +- libdrgn/python/error.c | 2 +- libdrgn/python/helpers.c | 2 +- libdrgn/python/language.c | 2 +- libdrgn/python/module.c | 2 +- libdrgn/python/object.c | 2 +- libdrgn/python/platform.c | 2 +- libdrgn/python/program.c | 2 +- libdrgn/python/stack_trace.c | 2 +- libdrgn/python/symbol.c | 2 +- libdrgn/python/test.c | 2 +- libdrgn/python/type.c | 2 +- libdrgn/python/util.c | 2 +- libdrgn/register_state.c | 2 +- libdrgn/register_state.h | 2 +- libdrgn/serialize.c | 2 +- libdrgn/serialize.h | 2 +- libdrgn/splay_tree.c | 2 +- libdrgn/stack_trace.c | 2 +- libdrgn/stack_trace.h | 2 +- libdrgn/string_builder.c | 2 +- libdrgn/string_builder.h | 2 +- libdrgn/symbol.c | 2 +- libdrgn/symbol.h | 2 +- libdrgn/type.c | 2 +- libdrgn/type.h | 2 +- libdrgn/util.h | 2 +- libdrgn/vector.c | 2 +- libdrgn/vector.h | 2 +- scripts/gen_pp_cat.py | 2 +- scripts/generate_primitive_type_spellings.py | 2 +- scripts/generate_test_constants.py | 2 +- scripts/iwyu.py | 2 +- scripts/test_cityhash.c | 2 +- setup.py | 2 +- tests/__init__.py | 2 +- tests/assembler.py | 2 +- tests/dwarfwriter.py | 2 +- tests/elfwriter.py | 2 +- tests/helpers/linux/__init__.py | 2 +- tests/helpers/linux/test_bitops.py | 2 +- tests/helpers/linux/test_block.py | 2 +- tests/helpers/linux/test_boot.py | 2 +- tests/helpers/linux/test_cgroup.py | 2 +- tests/helpers/linux/test_cpumask.py | 2 +- tests/helpers/linux/test_debug_info.py | 2 +- tests/helpers/linux/test_fs.py | 2 +- tests/helpers/linux/test_kconfig.py | 2 +- tests/helpers/linux/test_kernfs.py | 2 +- tests/helpers/linux/test_mm.py | 2 +- tests/helpers/linux/test_net.py | 2 +- tests/helpers/linux/test_percpu.py | 2 +- tests/helpers/linux/test_pid.py | 2 +- tests/helpers/linux/test_sched.py | 2 +- tests/helpers/linux/test_stack_trace.py | 2 +- tests/helpers/linux/test_tcp.py | 2 +- tests/helpers/linux/test_user.py | 2 +- tests/helpers/linux/test_uts.py | 2 +- tests/libdrgn.py | 2 +- tests/test_docs.py | 2 +- tests/test_dwarf.py | 2 +- tests/test_language_c.py | 2 +- tests/test_lexer.py | 2 +- tests/test_object.py | 2 +- tests/test_path.py | 2 +- tests/test_platform.py | 2 +- tests/test_program.py | 2 +- tests/test_python.py | 2 +- tests/test_serialize.py | 2 +- tests/test_symbol.py | 2 +- tests/test_type.py | 2 +- tests/test_util.py | 2 +- tools/bpf_inspect.py | 2 +- util.py | 2 +- vmtest/asynciosubprocess.py | 2 +- vmtest/download.py | 2 +- vmtest/githubapi.py | 2 +- vmtest/kbuild.py | 2 +- vmtest/manage.py | 2 +- vmtest/onoatimehack.c | 2 +- vmtest/vm.py | 2 +- 177 files changed, 177 insertions(+), 177 deletions(-) diff --git a/README.rst b/README.rst index 1789e9deb..8d6759335 100644 --- a/README.rst +++ b/README.rst @@ -202,7 +202,7 @@ License .. start-license -Copyright (c) Facebook, Inc. and its affiliates. +Copyright (c) Meta Platforms, Inc. and affiliates. drgn is licensed under the `GPLv3 `_ or later. diff --git a/_drgn.pyi b/_drgn.pyi index a16ec4d2b..487e11b21 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index a07994fc7..4a728430b 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 2d252f8ed..0a9d66815 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index b7be5b6f9..1a75beec9 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index 9b19f72ea..1a868ca2a 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index d9f5cc91d..aa3bc9bb3 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast diff --git a/docs/exts/drgndoc/util.py b/docs/exts/drgndoc/util.py index 5c088ac3a..1c9e9feda 100644 --- a/docs/exts/drgndoc/util.py +++ b/docs/exts/drgndoc/util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from typing import Optional diff --git a/docs/exts/drgndoc/visitor.py b/docs/exts/drgndoc/visitor.py index b527b22e6..b1fa51e1d 100644 --- a/docs/exts/drgndoc/visitor.py +++ b/docs/exts/drgndoc/visitor.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast diff --git a/drgn/__init__.py b/drgn/__init__.py index 85bd2c713..4fb73012f 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/__main__.py b/drgn/__main__.py index c85ee75b1..9fd749eb4 100644 --- a/drgn/__main__.py +++ b/drgn/__main__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index 4b3a366fc..8bf2b4d47 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index 71bf0c25a..a0f088212 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/bitops.py b/drgn/helpers/linux/bitops.py index 114ee3d49..fd6210ddc 100644 --- a/drgn/helpers/linux/bitops.py +++ b/drgn/helpers/linux/bitops.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index cf8588427..0bbdc4f69 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/boot.py b/drgn/helpers/linux/boot.py index 74dd2be85..6e959bf4f 100644 --- a/drgn/helpers/linux/boot.py +++ b/drgn/helpers/linux/boot.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/bpf.py b/drgn/helpers/linux/bpf.py index fa526041b..46d33665c 100644 --- a/drgn/helpers/linux/bpf.py +++ b/drgn/helpers/linux/bpf.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/cgroup.py b/drgn/helpers/linux/cgroup.py index dac0a91c3..beb151e88 100644 --- a/drgn/helpers/linux/cgroup.py +++ b/drgn/helpers/linux/cgroup.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index e3f45ebb6..3001da913 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/device.py b/drgn/helpers/linux/device.py index 73861d77d..6fde94295 100644 --- a/drgn/helpers/linux/device.py +++ b/drgn/helpers/linux/device.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 7eff48338..71cbfc361 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index 623fc0ca7..c1d1ba2d9 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/kconfig.py b/drgn/helpers/linux/kconfig.py index ff5aac0e0..fb125cbb3 100644 --- a/drgn/helpers/linux/kconfig.py +++ b/drgn/helpers/linux/kconfig.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index a3e223598..566f2a906 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index 6e557b267..2357389a5 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index a3b9536bb..7c5fb4b14 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 927f0caab..d669316b7 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 66db17ef0..cec12e76f 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index 2472f8e4c..a1e809939 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index d2e786ed0..ca14ad352 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index fe8814248..0339b0a08 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index 32d025674..a1572b043 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index dce340ead..afeb84865 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/tcp.py b/drgn/helpers/linux/tcp.py index af0c7b116..0f92551af 100644 --- a/drgn/helpers/linux/tcp.py +++ b/drgn/helpers/linux/tcp.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index 7eeca5bb8..32ee4fa92 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/internal/__init__.py b/drgn/internal/__init__.py index 1b14962ca..7400b44e2 100644 --- a/drgn/internal/__init__.py +++ b/drgn/internal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 77a25d1a2..16f1b8abd 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """drgn command line interface""" diff --git a/drgn/internal/rlcompleter.py b/drgn/internal/rlcompleter.py index 6c4bb7f3a..86d750f49 100644 --- a/drgn/internal/rlcompleter.py +++ b/drgn/internal/rlcompleter.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """Improved readline completer""" diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index e215136a2..d0de71ad9 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List the paths of all descendants of a cgroup v2""" diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 6e1a4c190..0d9252b1b 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List the paths of all inodes cached in a given filesystem""" diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index 1585e2c22..634783799 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """An implementation of lsmod(8) using drgn""" diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 0eda3be28..d2c66a1c4 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """A simplified implementation of ps(1) using drgn""" diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index ad628974c..e077df72c 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List all TCP sockets and their cgroup v2 paths""" diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 5efa66387..d966917c6 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later ACLOCAL_AMFLAGS = -I m4 diff --git a/libdrgn/arch_register_layout.h b/libdrgn/arch_register_layout.h index 9911589a2..3c9da6222 100644 --- a/libdrgn/arch_register_layout.h +++ b/libdrgn/arch_register_layout.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index 00cb3a669..bd9440b82 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/arch_x86_64.defs b/libdrgn/arch_x86_64.defs index 03b1ccf02..c5cda5ac9 100644 --- a/libdrgn/arch_x86_64.defs +++ b/libdrgn/arch_x86_64.defs @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later "rax" "rdx" diff --git a/libdrgn/array.h b/libdrgn/array.h index 60837d260..a860427c8 100644 --- a/libdrgn/array.h +++ b/libdrgn/array.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/binary_buffer.c b/libdrgn/binary_buffer.c index 7cabed214..98df2c580 100644 --- a/libdrgn/binary_buffer.c +++ b/libdrgn/binary_buffer.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "binary_buffer.h" diff --git a/libdrgn/binary_buffer.h b/libdrgn/binary_buffer.h index dc0e0f8fe..cfab91bdd 100644 --- a/libdrgn/binary_buffer.h +++ b/libdrgn/binary_buffer.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index dc9214aba..fe5ea8f01 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/bitops.h b/libdrgn/bitops.h index 7daa64437..3877889c0 100644 --- a/libdrgn/bitops.h +++ b/libdrgn/bitops.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/build-aux/gen_arch.awk b/libdrgn/build-aux/gen_arch.awk index 4bda1b7d1..cfda42456 100644 --- a/libdrgn/build-aux/gen_arch.awk +++ b/libdrgn/build-aux/gen_arch.awk @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later # This script generates drgn architecture definition code ("arch_foo.inc") from diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 4719c4cfc..70941ed02 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import re diff --git a/libdrgn/cfi.c b/libdrgn/cfi.c index 4a729a876..0f316b37a 100644 --- a/libdrgn/cfi.c +++ b/libdrgn/cfi.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/cfi.h b/libdrgn/cfi.h index 72d954c55..6ff250d6a 100644 --- a/libdrgn/cfi.h +++ b/libdrgn/cfi.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/cityhash.h b/libdrgn/cityhash.h index 29abebad3..9b4fdbb94 100644 --- a/libdrgn/cityhash.h +++ b/libdrgn/cityhash.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_CITYHASH_H diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 17b466570..b782402b1 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,4 +1,4 @@ -dnl Copyright (c) Facebook, Inc. and its affiliates. +dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later AC_INIT([libdrgn], [0.0.14], diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 539610231..8aba8c54d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index dd47d646b..5f5f13613 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index d161513dd..2949c9347 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index c296295b2..03b64e844 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h index 24f480f44..efe283ea2 100644 --- a/libdrgn/dwarf_info.h +++ b/libdrgn/dwarf_info.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/error.c b/libdrgn/error.c index b66c3661f..abf2a4f7c 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/error.h b/libdrgn/error.h index f56252557..f70dd250b 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/hash_table.c b/libdrgn/hash_table.c index f4b32be76..793bc0157 100644 --- a/libdrgn/hash_table.c +++ b/libdrgn/hash_table.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "hash_table.h" diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index d82a132a5..4a66c4067 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 342333603..95d3a4ebb 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/language.c b/libdrgn/language.c index 6cdc36dd1..15a701139 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "language.h" diff --git a/libdrgn/language.h b/libdrgn/language.h index b93620768..2c5578f73 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index b12728dcd..d5936d47e 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/lazy_object.c b/libdrgn/lazy_object.c index ae21be2b3..1bf0eddb1 100644 --- a/libdrgn/lazy_object.c +++ b/libdrgn/lazy_object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/lazy_object.h b/libdrgn/lazy_object.h index fa4ae640d..a44a93001 100644 --- a/libdrgn/lazy_object.h +++ b/libdrgn/lazy_object.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/lexer.c b/libdrgn/lexer.c index 221fa4530..089e1be9b 100644 --- a/libdrgn/lexer.c +++ b/libdrgn/lexer.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgn.h" diff --git a/libdrgn/lexer.h b/libdrgn/lexer.h index 693da4412..cdb2cec5b 100644 --- a/libdrgn/lexer.h +++ b/libdrgn/lexer.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 769c26b5c..0ef0bf2ce 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 62bead4c4..e6b5b26f3 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_LINUX_KERNEL_H diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 115fb4fa0..e9b61a7e3 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 18ad93b62..7bb98cd43 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index 9e608995a..143834660 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/minmax.h b/libdrgn/minmax.h index 7e517667b..0c64ce703 100644 --- a/libdrgn/minmax.h +++ b/libdrgn/minmax.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/nstring.h b/libdrgn/nstring.h index f916044b3..5fc803d57 100644 --- a/libdrgn/nstring.h +++ b/libdrgn/nstring.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/object.c b/libdrgn/object.c index e5ecc47f7..b0f0b7d61 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/object.h b/libdrgn/object.h index d008cdcc2..a94c87a3a 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c index e7248138c..e2e592683 100644 --- a/libdrgn/object_index.c +++ b/libdrgn/object_index.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/object_index.h b/libdrgn/object_index.h index 2929056c4..f551118e4 100644 --- a/libdrgn/object_index.h +++ b/libdrgn/object_index.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/orc.h b/libdrgn/orc.h index dd1fc87d6..c3b42c86d 100644 --- a/libdrgn/orc.h +++ b/libdrgn/orc.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index f3a9b489d..6fe9bc486 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h index 90208505c..4aee69eef 100644 --- a/libdrgn/orc_info.h +++ b/libdrgn/orc_info.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/path.c b/libdrgn/path.c index d7bf2f46f..653478ec4 100644 --- a/libdrgn/path.c +++ b/libdrgn/path.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/path.h b/libdrgn/path.h index d1a53a879..e03cd2fd9 100644 --- a/libdrgn/path.h +++ b/libdrgn/path.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/platform.c b/libdrgn/platform.c index d778b3a01..0c7059977 100644 --- a/libdrgn/platform.c +++ b/libdrgn/platform.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 288ea4e75..61b109f6e 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_PLATFORM_H diff --git a/libdrgn/pp.h b/libdrgn/pp.h index 0e44677df..67805269a 100644 --- a/libdrgn/pp.h +++ b/libdrgn/pp.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/program.c b/libdrgn/program.c index 6c0530cd3..3e5ecf019 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/program.h b/libdrgn/program.h index 741913a17..0924cb64d 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index ad5218aee..e5aa04bd5 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGNPY_H diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 5e674ca23..3b749efe6 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 3158be0bd..2f16a20d5 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/language.c b/libdrgn/python/language.c index d893091db..76f704743 100644 --- a/libdrgn/python/language.c +++ b/libdrgn/python/language.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 8d3260780..7fd1d2c3a 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index f2a1d695b..2f11fabbc 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/python/platform.c b/libdrgn/python/platform.c index b199e2d9e..9e5116f3e 100644 --- a/libdrgn/python/platform.c +++ b/libdrgn/python/platform.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 06d6ce36f..309fa6a21 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 5fe9fdf6e..c0556e62a 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index f47fd8f40..6220eaa77 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/python/test.c b/libdrgn/python/test.c index 6728762ea..c6733f98b 100644 --- a/libdrgn/python/test.c +++ b/libdrgn/python/test.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /* diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 1e5f4175b..fdb6f8d36 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index f49777338..ad5c5578e 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index 5c5364f19..d44584f4e 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/register_state.h b/libdrgn/register_state.h index 9f8b2bf32..5844e73e7 100644 --- a/libdrgn/register_state.h +++ b/libdrgn/register_state.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index 3904f4fb5..b4228e427 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 9e9604209..b28eab6d6 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/splay_tree.c b/libdrgn/splay_tree.c index 6021db001..cd84f1a53 100644 --- a/libdrgn/splay_tree.c +++ b/libdrgn/splay_tree.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "binary_search_tree.h" // IWYU pragma: associated diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 50d6c4bcc..d345ffbba 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/stack_trace.h b/libdrgn/stack_trace.h index a2eb9a254..004e556c2 100644 --- a/libdrgn/stack_trace.h +++ b/libdrgn/stack_trace.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/string_builder.c b/libdrgn/string_builder.c index 9a10d0783..d78374583 100644 --- a/libdrgn/string_builder.c +++ b/libdrgn/string_builder.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/string_builder.h b/libdrgn/string_builder.h index bc3b3f04e..f4ca10a25 100644 --- a/libdrgn/string_builder.h +++ b/libdrgn/string_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index b2aae8444..69030b47e 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index b4f1d3ef2..4fb765640 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_SYMBOL_H diff --git a/libdrgn/type.c b/libdrgn/type.c index d561089e8..39f6a27e2 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/type.h b/libdrgn/type.h index 24ddeb033..bab54b32e 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/util.h b/libdrgn/util.h index ff3a9c9dd..cd524d7ff 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/vector.c b/libdrgn/vector.c index 96b13c9f9..ab51e9919 100644 --- a/libdrgn/vector.c +++ b/libdrgn/vector.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "vector.h" diff --git a/libdrgn/vector.h b/libdrgn/vector.h index 70196c477..0fc2c0596 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/scripts/gen_pp_cat.py b/scripts/gen_pp_cat.py index 6a9aaaf51..7109f17c3 100755 --- a/scripts/gen_pp_cat.py +++ b/scripts/gen_pp_cat.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/scripts/generate_primitive_type_spellings.py b/scripts/generate_primitive_type_spellings.py index 425aec517..6f620c666 100755 --- a/scripts/generate_primitive_type_spellings.py +++ b/scripts/generate_primitive_type_spellings.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/scripts/generate_test_constants.py b/scripts/generate_test_constants.py index 442a3e805..65738a02e 100755 --- a/scripts/generate_test_constants.py +++ b/scripts/generate_test_constants.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/scripts/iwyu.py b/scripts/iwyu.py index 07423d0fb..e9a9bf61f 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/scripts/test_cityhash.c b/scripts/test_cityhash.c index 503dd7e74..a23dba958 100644 --- a/scripts/test_cityhash.c +++ b/scripts/test_cityhash.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/setup.py b/setup.py index 857fb9150..df6940bfb 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later # setuptools must be imported before distutils (see pypa/setuptools#2230). diff --git a/tests/__init__.py b/tests/__init__.py index 07d307df8..4d9e5d983 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools diff --git a/tests/assembler.py b/tests/assembler.py index 3d3e0dff6..94540e180 100644 --- a/tests/assembler.py +++ b/tests/assembler.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from collections import namedtuple diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index 9b645fcba..827ed9257 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from collections import namedtuple diff --git a/tests/elfwriter.py b/tests/elfwriter.py index 55ad09389..c9891a2ee 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import struct diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index b6508158f..ec419c0fb 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import contextlib diff --git a/tests/helpers/linux/test_bitops.py b/tests/helpers/linux/test_bitops.py index 0e5eb29af..ec510de6e 100644 --- a/tests/helpers/linux/test_bitops.py +++ b/tests/helpers/linux/test_bitops.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from drgn import Object diff --git a/tests/helpers/linux/test_block.py b/tests/helpers/linux/test_block.py index 26f981409..4f5f3bbb5 100644 --- a/tests/helpers/linux/test_block.py +++ b/tests/helpers/linux/test_block.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import errno diff --git a/tests/helpers/linux/test_boot.py b/tests/helpers/linux/test_boot.py index 595482bec..66454d909 100644 --- a/tests/helpers/linux/test_boot.py +++ b/tests/helpers/linux/test_boot.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import platform diff --git a/tests/helpers/linux/test_cgroup.py b/tests/helpers/linux/test_cgroup.py index 7324d56e6..9d89b708e 100644 --- a/tests/helpers/linux/test_cgroup.py +++ b/tests/helpers/linux/test_cgroup.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_cpumask.py b/tests/helpers/linux/test_cpumask.py index 7979456cd..cbe2f4441 100644 --- a/tests/helpers/linux/test_cpumask.py +++ b/tests/helpers/linux/test_cpumask.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from pathlib import Path diff --git a/tests/helpers/linux/test_debug_info.py b/tests/helpers/linux/test_debug_info.py index 3a39ebaa4..22d43728c 100644 --- a/tests/helpers/linux/test_debug_info.py +++ b/tests/helpers/linux/test_debug_info.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index 336a037a1..069914f29 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_kconfig.py b/tests/helpers/linux/test_kconfig.py index b5ebdd36f..16226bdfd 100644 --- a/tests/helpers/linux/test_kconfig.py +++ b/tests/helpers/linux/test_kconfig.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os.path diff --git a/tests/helpers/linux/test_kernfs.py b/tests/helpers/linux/test_kernfs.py index e31f564f1..1e12e8a83 100644 --- a/tests/helpers/linux/test_kernfs.py +++ b/tests/helpers/linux/test_kernfs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_mm.py b/tests/helpers/linux/test_mm.py index 73f800f7b..cd91df340 100644 --- a/tests/helpers/linux/test_mm.py +++ b/tests/helpers/linux/test_mm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import contextlib diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index 364b32493..f50e3edab 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_percpu.py b/tests/helpers/linux/test_percpu.py index 672532ee7..5f077f3d8 100644 --- a/tests/helpers/linux/test_percpu.py +++ b/tests/helpers/linux/test_percpu.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from drgn.helpers.linux.cpumask import for_each_possible_cpu diff --git a/tests/helpers/linux/test_pid.py b/tests/helpers/linux/test_pid.py index b29dce951..cc2056b5e 100644 --- a/tests/helpers/linux/test_pid.py +++ b/tests/helpers/linux/test_pid.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index 4a4253c68..fd5232d3f 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_stack_trace.py b/tests/helpers/linux/test_stack_trace.py index 92c10e1d7..1a9d20f9a 100644 --- a/tests/helpers/linux/test_stack_trace.py +++ b/tests/helpers/linux/test_stack_trace.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_tcp.py b/tests/helpers/linux/test_tcp.py index 0fc7dbd9b..8b6a77a2d 100644 --- a/tests/helpers/linux/test_tcp.py +++ b/tests/helpers/linux/test_tcp.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_user.py b/tests/helpers/linux/test_user.py index 1659aedb3..219814f8b 100644 --- a/tests/helpers/linux/test_user.py +++ b/tests/helpers/linux/test_user.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools diff --git a/tests/helpers/linux/test_uts.py b/tests/helpers/linux/test_uts.py index 9a637cc8b..f5f880cfb 100644 --- a/tests/helpers/linux/test_uts.py +++ b/tests/helpers/linux/test_uts.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/libdrgn.py b/tests/libdrgn.py index bdf3b6932..dba4dbb14 100644 --- a/tests/libdrgn.py +++ b/tests/libdrgn.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ctypes diff --git a/tests/test_docs.py b/tests/test_docs.py index 5b0cc369e..c3f170984 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import pydoc diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 165f24b58..381b44098 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 76b8a8f9e..60afafd0b 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import reduce diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 0b8a7786c..d9abe8eb7 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_object.py b/tests/test_object.py index 811d78388..523f18600 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import math diff --git a/tests/test_path.py b/tests/test_path.py index 10b96c358..ecf40f663 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/tests/test_platform.py b/tests/test_platform.py index 9e11c65bf..f3152d0be 100644 --- a/tests/test_platform.py +++ b/tests/test_platform.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/tests/test_program.py b/tests/test_program.py index 7c0215d58..889f1cde6 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ctypes diff --git a/tests/test_python.py b/tests/test_python.py index 5a108b676..a211fc1cf 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_serialize.py b/tests/test_serialize.py index ae03048bc..f8ff1210c 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_symbol.py b/tests/test_symbol.py index f893586ff..d980fa644 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import tempfile diff --git a/tests/test_type.py b/tests/test_type.py index 435db974d..add26ebbb 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from drgn import ( diff --git a/tests/test_util.py b/tests/test_util.py index 20eadfe94..cb186047e 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import cmp_to_key diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 0c51044c2..0ce7959f8 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/util.py b/util.py index 23f178692..4f3fc4ff8 100644 --- a/util.py +++ b/util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import total_ordering diff --git a/vmtest/asynciosubprocess.py b/vmtest/asynciosubprocess.py index b4b90d0a5..8309b42a2 100644 --- a/vmtest/asynciosubprocess.py +++ b/vmtest/asynciosubprocess.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import asyncio diff --git a/vmtest/download.py b/vmtest/download.py index b5abf1f5a..588d4768b 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py index d37dd73bc..46a7a32fc 100644 --- a/vmtest/githubapi.py +++ b/vmtest/githubapi.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import json diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index b02cb61d6..b9186a4da 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/vmtest/manage.py b/vmtest/manage.py index 0bbff359f..90b4fadbf 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse diff --git a/vmtest/onoatimehack.c b/vmtest/onoatimehack.c index 9042c7380..1eb54e363 100644 --- a/vmtest/onoatimehack.c +++ b/vmtest/onoatimehack.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /* diff --git a/vmtest/vm.py b/vmtest/vm.py index 62186850e..df86d04a0 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import errno From d18be05b7a354d5988edc532b396eedb9c1033a1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sun, 21 Nov 2021 16:01:39 -0800 Subject: [PATCH 096/139] README: mention Meta Signed-off-by: Omar Sandoval --- README.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 8d6759335..441175a67 100644 --- a/README.rst +++ b/README.rst @@ -42,9 +42,10 @@ complex, inter-connected state in large programs. It is also designed as a library that can be used to build debugging and introspection tools; see the official `tools `_. -drgn was developed for debugging the Linux kernel (as an alternative to the -`crash `_ utility), but it can also debug -userspace programs written in C. C++ support is in progress. +drgn was developed at `Meta `_ for debugging the +Linux kernel (as an alternative to the `crash +`_ utility), but it can also debug userspace +programs written in C. C++ support is in progress. .. end-introduction From 3914bb8e29d4c571fac916834a044d7bc1492832 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 23 Nov 2021 00:52:39 -0800 Subject: [PATCH 097/139] libdrgn: fix type names referring to anonymous types A pointer, array, or function referring to an anonymous type currently includes the full type definition in its type name. This creates very badly formatted objects for, e.g., drgn's own hash table types. Instead, use "struct " in the type name. Signed-off-by: Omar Sandoval --- libdrgn/language_c.c | 48 +++++++++++++------------------ tests/test_language_c.py | 61 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index d5936d47e..08fcc1a19 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -28,7 +28,7 @@ static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb); + bool define_anonymous_type, struct string_builder *sb); static struct drgn_error * c_define_type(struct drgn_qualified_type qualified_type, size_t indent, @@ -156,16 +156,20 @@ c_append_tagged_name(struct drgn_qualified_type qualified_type, size_t indent, static struct drgn_error * c_declare_tagged(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb) + bool define_anonymous_type, struct string_builder *sb) { struct drgn_error *err; - if (drgn_type_is_anonymous(qualified_type.type)) + bool anonymous = drgn_type_is_anonymous(qualified_type.type); + if (anonymous && define_anonymous_type) err = c_define_type(qualified_type, indent, sb); else err = c_append_tagged_name(qualified_type, indent, sb); if (err) return err; + if (anonymous && !define_anonymous_type && + !string_builder_append(sb, " ")) + return &drgn_enomem; if (name) { if (!string_builder_appendc(sb, ' ')) @@ -229,7 +233,8 @@ c_declare_pointer(struct drgn_qualified_type qualified_type, struct drgn_qualified_type referenced_type; referenced_type = drgn_type_type(qualified_type.type); - return c_declare_variable(referenced_type, &pointer_name, indent, sb); + return c_declare_variable(referenced_type, &pointer_name, indent, false, + sb); } static struct drgn_error *c_array_name(struct string_callback *name, void *arg, @@ -267,7 +272,7 @@ c_declare_array(struct drgn_qualified_type qualified_type, struct drgn_qualified_type element_type; element_type = drgn_type_type(qualified_type.type); - return c_declare_variable(element_type, &array_name, indent, sb); + return c_declare_variable(element_type, &array_name, indent, false, sb); } static struct drgn_error * @@ -289,7 +294,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, num_parameters = drgn_type_num_parameters(qualified_type.type); return_type = drgn_type_type(qualified_type.type); - err = c_declare_variable(return_type, name, indent, sb); + err = c_declare_variable(return_type, name, indent, false, sb); if (err) return err; @@ -314,7 +319,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, } err = c_declare_variable(parameter_type, parameter_name && parameter_name[0] ? - &name_cb : NULL, 0, sb); + &name_cb : NULL, 0, false, sb); if (err) return err; } @@ -335,7 +340,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb) + bool define_anonymous_type, struct string_builder *sb) { SWITCH_ENUM(drgn_type_kind(qualified_type.type), case DRGN_TYPE_VOID: @@ -348,7 +353,8 @@ c_declare_variable(struct drgn_qualified_type qualified_type, case DRGN_TYPE_UNION: case DRGN_TYPE_CLASS: case DRGN_TYPE_ENUM: - return c_declare_tagged(qualified_type, name, indent, sb); + return c_declare_tagged(qualified_type, name, indent, + define_anonymous_type, sb); case DRGN_TYPE_POINTER: return c_declare_pointer(qualified_type, name, indent, sb); case DRGN_TYPE_ARRAY: @@ -395,7 +401,7 @@ c_define_compound(struct drgn_qualified_type qualified_type, size_t indent, }; err = c_declare_variable(member_type, member_name && member_name[0] ? - &name_cb : NULL, indent + 1, sb); + &name_cb : NULL, indent + 1, true, sb); if (err) return err; if (member_bit_field_size && @@ -480,7 +486,7 @@ c_define_typedef(struct drgn_qualified_type qualified_type, size_t indent, return &drgn_enomem; aliased_type = drgn_type_type(qualified_type.type); - return c_declare_variable(aliased_type, &typedef_name, 0, sb); + return c_declare_variable(aliased_type, &typedef_name, 0, true, sb); } static struct drgn_error * @@ -511,27 +517,11 @@ c_define_type(struct drgn_qualified_type qualified_type, size_t indent, ) } -static struct drgn_error * -c_anonymous_type_name(struct drgn_qualified_type qualified_type, - struct string_builder *sb) -{ - struct drgn_error *err; - - err = c_append_tagged_name(qualified_type, 0, sb); - if (err) - return err; - if (!string_builder_append(sb, " ")) - return &drgn_enomem; - return NULL; -} - static struct drgn_error * c_format_type_name_impl(struct drgn_qualified_type qualified_type, struct string_builder *sb) { - if (drgn_type_is_anonymous(qualified_type.type)) { - return c_anonymous_type_name(qualified_type, sb); - } else if (drgn_type_kind(qualified_type.type) == DRGN_TYPE_FUNCTION) { + if (drgn_type_kind(qualified_type.type) == DRGN_TYPE_FUNCTION) { struct string_callback name_cb = { .fn = c_variable_name, .arg = (void *)"", @@ -539,7 +529,7 @@ c_format_type_name_impl(struct drgn_qualified_type qualified_type, return c_declare_function(qualified_type, &name_cb, 0, sb); } else { - return c_declare_variable(qualified_type, NULL, 0, sb); + return c_declare_variable(qualified_type, NULL, 0, false, sb); } } diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 60afafd0b..48a959bdc 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -340,6 +340,67 @@ def test_function(self): "int (void)", ) + def test_pointer_to_anonymous_struct(self): + self.assertTypeName( + self.prog.pointer_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ) + ), + "struct *", + ) + + def test_array_of_anonymous_struct(self): + self.assertTypeName( + self.prog.array_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + 2, + ), + "struct [2]", + ) + + def test_function_returning_anonymous_struct(self): + self.assertTypeName( + self.prog.function_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + (), + ), + "struct (void)", + ) + + def test_function_of_anonymous_struct(self): + self.assertTypeName( + self.prog.function_type( + self.prog.int_type("int", 4, True), + ( + TypeParameter( + self.prog.struct_type( + None, + 8, + (TypeMember(self.prog.int_type("int", 4, True), "x", 0),), + ), + "x", + ), + ), + ), + "int (struct x)", + ) + + def test_typedef_of_anonymous_struct(self): + self.assertTypeName( + self.prog.typedef_type( + "onymous", + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + ), + "onymous", + ) + class TestPrettyPrintType(MockProgramTestCase): def assertPrettyPrint(self, type, expected): From 36f7e8b59b5b54b7db3e4e6feef638c8c6c12001 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 2 Dec 2021 02:01:46 -0800 Subject: [PATCH 098/139] README: add libtool to build dependencies for Debian and Arch Fixes: 1b7badad0a72 ("docs: expand and reorganize installation instructions") Signed-off-by: Omar Sandoval --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 441175a67..2d4f44973 100644 --- a/README.rst +++ b/README.rst @@ -121,13 +121,13 @@ First, install dependencies: .. code-block:: console - $ sudo apt-get install autoconf automake gawk gcc git liblzma-dev libelf-dev libdw-dev make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + $ sudo apt-get install autoconf automake gawk gcc git liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev * Arch Linux .. code-block:: console - $ sudo pacman -S --needed autoconf automake gawk gcc git libelf make pkgconf python python-pip python-setuptools + $ sudo pacman -S --needed autoconf automake gawk gcc git libelf libtool make pkgconf python python-pip python-setuptools Optionally, install `libkdumpfile `_ if you want support for the `makedumpfile From 0315ade709ece0a7c27a5bb8e706024ffd75aa49 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 2 Dec 2021 03:46:06 -0800 Subject: [PATCH 099/139] tests: handle CONFIG_KALLSYMS=n and CONFIG_KALLSYMS_ALL=n If CONFIG_KALLSYMS_ALL=n, then /proc/kallsyms won't include lo_fops, which is a data symbol. Use a function symbol, lo_open, instead. Also check whether /proc/kallsyms exists in the first place. Signed-off-by: Omar Sandoval --- tests/helpers/linux/test_debug_info.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/helpers/linux/test_debug_info.py b/tests/helpers/linux/test_debug_info.py index 22d43728c..7d7e3211e 100644 --- a/tests/helpers/linux/test_debug_info.py +++ b/tests/helpers/linux/test_debug_info.py @@ -2,15 +2,22 @@ # SPDX-License-Identifier: GPL-3.0-or-later import os +from pathlib import Path +import unittest from drgn import Program from tests.helpers.linux import LinuxHelperTestCase, setenv +KALLSYMS_PATH = Path("/proc/kallsyms") + +@unittest.skipUnless( + KALLSYMS_PATH.exists(), "kernel does not have kallsyms (CONFIG_KALLSYMS)" +) class TestModuleDebugInfo(LinuxHelperTestCase): # Arbitrary symbol that we can use to check that the module debug info was # loaded. - SYMBOL = "lo_fops" + SYMBOL = "lo_open" def setUp(self): super().setUp() @@ -21,7 +28,7 @@ def setUp(self): else: self.skipTest("loop module is built in or not loaded") - with open("/proc/kallsyms", "r") as f: + with KALLSYMS_PATH.open() as f: for line in f: tokens = line.split() if tokens[2] == self.SYMBOL: From 0e318754fe3428d99459fbaf7ae99e3c034edead Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 30 Nov 2021 02:35:59 -0800 Subject: [PATCH 100/139] libdrgn: don't swallow errors in relocate_elf_file() Fixes: 62d98b301622 ("libdrgn: fold ELF relocation code into dwarf_index") Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 8aba8c54d..39943e79a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -865,9 +865,10 @@ static struct drgn_error *relocate_elf_file(Elf *elf) goto out; } } + err = NULL; out: free(sh_addrs); - return NULL; + return err; } static struct drgn_error * From 91f6d03ee8bd4285d411df634917573293f279e3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 3 Dec 2021 12:01:15 -0800 Subject: [PATCH 101/139] libdrgn: fix note name matching The current code matches the desired note name as a prefix, but we need an exact match. Fixes: 75c3679147a5 ("Rewrite drgn core in C") Signed-off-by: Omar Sandoval --- libdrgn/program.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libdrgn/program.c b/libdrgn/program.c index 3e5ecf019..de51c9dc5 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -293,11 +293,13 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) name = (char *)data->d_buf + name_offset; desc = (char *)data->d_buf + desc_offset; - if (strncmp(name, "CORE", nhdr.n_namesz) == 0) { + if (nhdr.n_namesz == sizeof("CORE") && + memcmp(name, "CORE", sizeof("CORE")) == 0) { if (nhdr.n_type == NT_TASKSTRUCT) have_nt_taskstruct = true; - } else if (strncmp(name, "VMCOREINFO", - nhdr.n_namesz) == 0) { + } else if (nhdr.n_namesz == sizeof("VMCOREINFO") && + memcmp(name, "VMCOREINFO", + sizeof("VMCOREINFO")) == 0) { vmcoreinfo_note = desc; vmcoreinfo_size = nhdr.n_descsz; /* From 2c6e36847f96f13e568b514be34835127f918248 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 6 Dec 2021 01:47:09 -0800 Subject: [PATCH 102/139] Remove some include-what-you-use workarounds include-what-you-use 0.17 fixed a couple of issues we were working around with a mapping file. Signed-off-by: Omar Sandoval --- scripts/iwyu.imp | 6 ------ scripts/iwyu.py | 2 -- 2 files changed, 8 deletions(-) delete mode 100644 scripts/iwyu.imp diff --git a/scripts/iwyu.imp b/scripts/iwyu.imp deleted file mode 100644 index a1d0d181e..000000000 --- a/scripts/iwyu.imp +++ /dev/null @@ -1,6 +0,0 @@ -[ - # include-what-you-use/include-what-you-use#967 - { include: [ "", public, "", public ] }, - # include-what-you-use/include-what-you-use#968 - { include: [ "", private, "", public ] }, -] diff --git a/scripts/iwyu.py b/scripts/iwyu.py index e9a9bf61f..26e4d776d 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -219,8 +219,6 @@ def main(): + [ "-Xiwyu", "--mapping_file=" + os.path.abspath(python_mapping_file), - "-Xiwyu", - "--mapping_file=" + os.path.abspath("scripts/iwyu.imp"), "-w", # We don't want warnings from Clang. ], cwd=command["directory"], From 10c66d4e99ffb941f44293197d78ade806bb9a2d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 6 Dec 2021 01:49:53 -0800 Subject: [PATCH 103/139] libdrgn: get correct error when dwelf_elf_gnu_build_id() fails The documentation for libdwelf states that "functions starting with dwelf_elf will take a (libelf) Elf object as first argument and might set elf_errno on error". So, we should be using drgn_error_libelf(), not drgn_error_libdwfl(). While we're here, close the Elf handle before the file descriptor for consistency. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 39943e79a..a6316335a 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -527,9 +527,9 @@ drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, ssize_t build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); if (build_id_len < 0) { err = drgn_debug_info_report_error(load, path, NULL, - drgn_error_libdwfl()); - close(fd); + drgn_error_libelf()); elf_end(elf); + close(fd); return err; } else if (build_id_len == 0) { build_id = NULL; From aef144c94466ae6bf318e6e14aeda2ab44eb98a6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 6 Dec 2021 13:33:55 -0800 Subject: [PATCH 104/139] libdrgn: debug_info: improve elf_address_range() Instead of iterating through every segment, we can just look at the first and last loadable segments. This even works for vmlinux on x86-64 and Arm which have some special, relocatable segments. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 65 +++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index a6316335a..656012080 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1328,45 +1328,54 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) return NULL; } +/* + * Get the start address from the first loadable segment and the end address + * from the last loadable segment. + * + * The ELF specification states that loadable segments are sorted on p_vaddr. + * However, vmlinux on x86-64 has an out of order segment for .data..percpu, and + * Arm has a couple for .vector and .stubs. Thankfully, those are placed in the + * middle by the vmlinux linker script, so we can still rely on the first and + * last loadable segments. + */ struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, uint64_t *start_ret, uint64_t *end_ret) { - uint64_t start = UINT64_MAX, end = 0; - size_t phnum, i; - - /* - * Get the minimum and maximum addresses from the PT_LOAD segments. We - * ignore memory ranges that start beyond UINT64_MAX, and we truncate - * ranges that end beyond UINT64_MAX. - */ + size_t phnum; if (elf_getphdrnum(elf, &phnum) != 0) return drgn_error_libelf(); + + GElf_Phdr phdr_mem, *phdr; + size_t i; for (i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr; - uint64_t segment_start, segment_end; + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_LOAD) { + uint64_t align = phdr->p_align ? phdr->p_align : 1; + *start_ret = (phdr->p_vaddr & -align) + bias; + break; + } + } + if (i >= phnum) { + /* There were no loadable segments. */ + *start_ret = *end_ret = 0; + return NULL; + } + for (i = phnum; i-- > 0;) { phdr = gelf_getphdr(elf, i, &phdr_mem); if (!phdr) return drgn_error_libelf(); - if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) - continue; - if (__builtin_add_overflow(phdr->p_vaddr, bias, - &segment_start)) - continue; - if (__builtin_add_overflow(segment_start, phdr->p_memsz, - &segment_end)) - segment_end = UINT64_MAX; - if (segment_start < segment_end) { - if (segment_start < start) - start = segment_start; - if (segment_end > end) - end = segment_end; + if (phdr->p_type == PT_LOAD) { + *end_ret = (phdr->p_vaddr + phdr->p_memsz) + bias; + if (*start_ret >= *end_ret) + *start_ret = *end_ret = 0; + return NULL; } } - /* There were no loadable segments. */ - if (start >= end) - start = end = 0; - *start_ret = start; - *end_ret = end; + /* We found a loadable segment earlier, so this shouldn't happen. */ + assert(!"PT_LOAD segment disappeared"); + *end_ret = 0; return NULL; } From 844d82848cd779b4e8cfaef9b3272aa3f42bcfdb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 7 Dec 2021 21:13:52 +0000 Subject: [PATCH 105/139] libdrgn: add partial support for .gnu_debugaltlink Issue #130 reported an "unknown attribute form 0x1f20" from drgn. 0x1f20 is DW_FORM_GNU_ref_alt, which is a reference to a DIE in an alternate file. Similarly, DW_FORM_GNU_strp_alt is a string in an alternate file. The alternate file is specified by the .gnu_debugaltlink section. This is generated by dwz, which is used by at least Fedora and Debian. libdwfl already finds the alternate debug info file, so we can save its .debug_info and .debug_str and use those to support DW_FORM_GNU_ref_alt and DW_FORM_GNU_strp_alt in the DWARF index. Imported units are going to be more work to support in the DWARF index, but this at least lets drgn start up. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 47 ++++++++++++++++++++++++ libdrgn/debug_info.h | 4 +++ libdrgn/dwarf_info.c | 86 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 656012080..293dec666 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -923,6 +923,40 @@ drgn_debug_info_find_sections(struct drgn_debug_info_module *module) } } } + + Dwarf *altdwarf = dwarf_getalt(dwarf); + if (altdwarf) { + elf = dwarf_getelf(altdwarf); + if (!elf) + return drgn_error_libdw(); + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + if (shdr->sh_type != SHT_PROGBITS) + continue; + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); + + /* + * TODO: save more sections and support imported units. + */ + if (strcmp(scnname, ".debug_info") == 0 && + !module->alt_debug_info) + module->alt_debug_info = scn; + else if (strcmp(scnname, ".debug_str") == 0 && + !module->alt_debug_str) + module->alt_debug_str = scn; + } + } + return NULL; } @@ -951,6 +985,18 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) return err; } } + if (module->alt_debug_info) { + err = read_elf_section(module->alt_debug_info, + &module->alt_debug_info_data); + if (err) + return err; + } + if (module->alt_debug_str) { + err = read_elf_section(module->alt_debug_str, + &module->alt_debug_str_data); + if (err) + return err; + } /* * Truncate any extraneous bytes so that we can assume that a pointer @@ -958,6 +1004,7 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) */ truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_STR]); truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_LINE_STR]); + truncate_null_terminated_section(module->alt_debug_str_data); return NULL; } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 5f5f13613..f453ab045 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -100,7 +100,11 @@ struct drgn_debug_info_module { Dwfl_Module *dwfl_module; struct drgn_platform platform; Elf_Scn *scns[DRGN_NUM_DEBUG_SCNS]; + Elf_Scn *alt_debug_info; + Elf_Scn *alt_debug_str; Elf_Data *scn_data[DRGN_NUM_DEBUG_SCN_DATA]; + Elf_Data *alt_debug_info_data; + Elf_Data *alt_debug_str_data; /** DWARF debugging information. */ struct drgn_dwarf_module_info dwarf; diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 03b64e844..5d506513b 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -379,7 +379,7 @@ enum drgn_dwarf_index_abbrev_insn { * Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to * be skipped over. */ - INSN_MAX_SKIP = 199, + INSN_MAX_SKIP = 193, /* These instructions indicate an attribute that can be skipped over. */ INSN_SKIP_BLOCK, @@ -403,6 +403,8 @@ enum drgn_dwarf_index_abbrev_insn { INSN_NAME_STRX2, INSN_NAME_STRX3, INSN_NAME_STRX4, + INSN_NAME_STRP_ALT4, + INSN_NAME_STRP_ALT8, INSN_COMP_DIR_STRP4, INSN_COMP_DIR_STRP8, INSN_COMP_DIR_LINE_STRP4, @@ -413,6 +415,8 @@ enum drgn_dwarf_index_abbrev_insn { INSN_COMP_DIR_STRX2, INSN_COMP_DIR_STRX3, INSN_COMP_DIR_STRX4, + INSN_COMP_DIR_STRP_ALT4, + INSN_COMP_DIR_STRP_ALT8, INSN_STR_OFFSETS_BASE4, INSN_STR_OFFSETS_BASE8, INSN_STMT_LIST_LINEPTR4, @@ -435,6 +439,8 @@ enum drgn_dwarf_index_abbrev_insn { INSN_SPECIFICATION_REF_UDATA, INSN_SPECIFICATION_REF_ADDR4, INSN_SPECIFICATION_REF_ADDR8, + INSN_SPECIFICATION_REF_ALT4, + INSN_SPECIFICATION_REF_ALT8, INSN_INDIRECT, INSN_SIBLING_INDIRECT, INSN_NAME_INDIRECT, @@ -674,6 +680,8 @@ static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_strp: case DW_FORM_strp_sup: case DW_FORM_line_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: *insn_ret = cu->is_64_bit ? 8 : 4; return NULL; case DW_FORM_string: @@ -759,6 +767,16 @@ static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_strx4: *insn_ret = INSN_NAME_STRX4; return NULL; + case DW_FORM_GNU_strp_alt: + if (!cu->module->alt_debug_str_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_strp_alt without alternate .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_NAME_STRP_ALT8; + else + *insn_ret = INSN_NAME_STRP_ALT4; + return NULL; case DW_FORM_indirect: *insn_ret = INSN_NAME_INDIRECT; return NULL; @@ -813,6 +831,16 @@ static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, case DW_FORM_strx4: *insn_ret = INSN_COMP_DIR_STRX4; return NULL; + case DW_FORM_GNU_strp_alt: + if (!cu->module->alt_debug_str_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_strp_alt without alternate .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_STRP_ALT8; + else + *insn_ret = INSN_COMP_DIR_STRP_ALT4; + return NULL; case DW_FORM_indirect: *insn_ret = INSN_COMP_DIR_INDIRECT; return NULL; @@ -976,6 +1004,16 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, cu->address_size); } return NULL; + case DW_FORM_GNU_ref_alt: + if (!cu->module->alt_debug_info_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_ref_alt without alternate .debug_info section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_SPECIFICATION_REF_ALT8; + else + *insn_ret = INSN_SPECIFICATION_REF_ALT4; + return NULL; case DW_FORM_indirect: *insn_ret = INSN_SPECIFICATION_INDIRECT; return NULL; @@ -2092,6 +2130,17 @@ indirect_insn:; return err; comp_dir = &comp_dir_is_strx; break; + case INSN_COMP_DIR_STRP_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->alt_debug_str_data; + goto comp_dir_strp; + case INSN_COMP_DIR_STRP_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->alt_debug_str_data; + goto comp_dir_strp; case INSN_STR_OFFSETS_BASE4: if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, &tmp))) @@ -2135,10 +2184,12 @@ indirect_insn:; goto skip; case INSN_NAME_STRP4: case INSN_NAME_STRX4: + case INSN_NAME_STRP_ALT4: case INSN_DECL_FILE_DATA4: skip = 4; goto skip; case INSN_NAME_STRP8: + case INSN_NAME_STRP_ALT8: case INSN_DECL_FILE_DATA8: skip = 8; goto skip; @@ -2194,6 +2245,19 @@ indirect_insn:; specification_ref_addr: specification = (uintptr_t)debug_info_buffer + tmp; break; + case INSN_SPECIFICATION_REF_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_alt; + case INSN_SPECIFICATION_REF_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_alt: + specification = ((uintptr_t)cu->module->alt_debug_info_data->d_buf + + tmp); + break; case INSN_INDIRECT: case INSN_SIBLING_INDIRECT: case INSN_NAME_INDIRECT: @@ -2545,14 +2609,32 @@ indirect_insn:; return err; __builtin_prefetch(name); break; + case INSN_NAME_STRP_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_alt_strp; + case INSN_NAME_STRP_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; +name_alt_strp: + if (tmp >= cu->module->alt_debug_str_data->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_name is out of bounds"); + } + name = (const char *)cu->module->alt_debug_str_data->d_buf + tmp; + __builtin_prefetch(name); + break; case INSN_COMP_DIR_STRP4: case INSN_COMP_DIR_LINE_STRP4: + case INSN_COMP_DIR_STRP_ALT4: case INSN_STR_OFFSETS_BASE4: case INSN_STMT_LIST_LINEPTR4: skip = 4; goto skip; case INSN_COMP_DIR_STRP8: case INSN_COMP_DIR_LINE_STRP8: + case INSN_COMP_DIR_STRP_ALT8: case INSN_STR_OFFSETS_BASE8: case INSN_STMT_LIST_LINEPTR8: skip = 8; @@ -2623,6 +2705,7 @@ indirect_insn:; goto skip; case INSN_SPECIFICATION_REF4: case INSN_SPECIFICATION_REF_ADDR4: + case INSN_SPECIFICATION_REF_ALT4: specification = true; /* fallthrough */ case INSN_COMP_DIR_STRX4: @@ -2630,6 +2713,7 @@ indirect_insn:; goto skip; case INSN_SPECIFICATION_REF8: case INSN_SPECIFICATION_REF_ADDR8: + case INSN_SPECIFICATION_REF_ALT8: specification = true; skip = 8; goto skip; From 02912ca7d07314597172970d0510d57d0d3574ba Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 7 Dec 2021 22:19:53 -0800 Subject: [PATCH 106/139] libdrgn: fix handling of p_filesz < p_memsz in core dumps I implemented the case of a segment in a core file with p_filesz < p_memsz by treating the difference as zero bytes. This is correct for ET_EXEC and ET_DYN, but for ET_CORE, it actually means that the memory existed in the program but was not saved. For userspace core dumps, this typically happens for read-only file mappings. For kernel core dumps, makedumpfile does this to indicate memory that was excluded. Instead, let's return a DRGN_FAULT_ERROR if an attempt is made to read from these bytes. In the future, we need to read from the executable/library files when we can. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 6 ++++++ libdrgn/memory_reader.c | 26 ++++++++++++-------------- libdrgn/memory_reader.h | 5 +++-- libdrgn/program.c | 17 +++++++++++++++-- tests/test_program.py | 6 ++++-- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 293dec666..71dc242a0 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1183,6 +1183,12 @@ struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, if (err) goto err; + /* + * TODO: for core dumps, we need to add memory reader segments for + * read-only segments of the loaded binaries since those aren't saved in + * the core dump. + */ + /* * If this fails, it's too late to roll back. This can only fail with * enomem, so it's not a big deal. diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 7bb98cd43..81b222fe4 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -249,21 +249,19 @@ struct drgn_error *drgn_read_memory_file(void *buf, uint64_t address, void *arg, bool physical) { struct drgn_memory_file_segment *file_segment = arg; - char *p = buf; - uint64_t file_offset = file_segment->file_offset + offset; - size_t file_count; - if (offset < file_segment->file_size) { - file_count = min((uint64_t)count, - file_segment->file_size - offset); - count -= file_count; - } else { - file_count = 0; + if (offset > file_segment->file_size || + count > file_segment->file_size - offset) { + if (offset <= file_segment->file_size) + address += file_segment->file_size - offset; + return drgn_error_create_fault("memory not saved in core dump", + address); } - while (file_count) { - ssize_t ret; - ret = pread(file_segment->fd, p, file_count, file_offset); + uint64_t file_offset = file_segment->file_offset + offset; + char *p = buf; + while (count) { + ssize_t ret = pread(file_segment->fd, p, count, file_offset); if (ret == -1) { if (errno == EINTR) { continue; @@ -278,9 +276,9 @@ struct drgn_error *drgn_read_memory_file(void *buf, uint64_t address, address); } p += ret; - file_count -= ret; + address += ret; + count -= ret; file_offset += ret; } - memset(p, 0, count); return NULL; } diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index 143834660..4fa524c4a 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -123,8 +123,9 @@ struct drgn_memory_file_segment { uint64_t file_offset; /** * Size of the segment in the file. This may be less than the size of - * the segment in memory, in which case the remaining bytes are treated - * as if they contained zeroes. + * the segment in memory, which means that the remaining bytes were in + * the program's memory but were not saved in the core dump. Attempting + * to read these bytes is treated as a fault. */ uint64_t file_size; /** File descriptor. */ diff --git a/libdrgn/program.c b/libdrgn/program.c index de51c9dc5..777f6cc1f 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -350,8 +350,10 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) goto out_platform; } - if ((is_proc_kcore || vmcoreinfo_note) && - prog->platform.arch->linux_kernel_pgtable_iterator_next) { + bool pgtable_reader = + (is_proc_kcore || vmcoreinfo_note) && + prog->platform.arch->linux_kernel_pgtable_iterator_next; + if (pgtable_reader) { /* * Try to read any memory that isn't in the core dump via the * page table. @@ -381,6 +383,13 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) prog->file_segments[j].fd = prog->core_fd; prog->file_segments[j].eio_is_fault = false; err = drgn_program_add_memory_segment(prog, phdr->p_vaddr, + /* + * Don't override the page + * table reader for + * unsaved regions. + */ + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], @@ -391,6 +400,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phdr->p_paddr != (is_64_bit ? UINT64_MAX : UINT32_MAX)) { err = drgn_program_add_memory_segment(prog, phdr->p_paddr, + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], @@ -436,6 +447,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phys_addr = phdr->p_vaddr - direct_mapping; err = drgn_program_add_memory_segment(prog, phys_addr, + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], diff --git a/tests/test_program.py b/tests/test_program.py index 889f1cde6..bc8ea8cc2 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -806,7 +806,7 @@ def test_physical(self): self.assertEqual(prog.read(0xFFFF0000, len(data)), data) self.assertEqual(prog.read(0xA0, len(data), physical=True), data) - def test_zero_fill(self): + def test_unsaved(self): data = b"hello, world" prog = Program() with tempfile.NamedTemporaryFile() as f: @@ -825,4 +825,6 @@ def test_zero_fill(self): ) f.flush() prog.set_core_dump(f.name) - self.assertEqual(prog.read(0xFFFF0000, len(data) + 4), data + bytes(4)) + with self.assertRaisesRegex(FaultError, "memory not saved in core dump") as cm: + prog.read(0xFFFF0000, len(data) + 4) + self.assertEqual(cm.exception.address, 0xFFFF000C) From e6abfeac0329f26ae619ea72f15e9f909ca14fe8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 11:48:49 -0800 Subject: [PATCH 107/139] libdrgn: debug_info: report userspace core dump debug info ourselves There are a few reasons for this: 1. dwfl_core_file_report() crashes on elfutils 0.183-0.185. Those versions are still used by several distros. 2. In order to support --main-symbols and --symbols properly, we need to report things ourselves. 3. I'm considering moving away from libdwfl in the long term. We provide an escape hatch for now: setting the environment variable DRGN_USE_LIBDWFL_REPORT=1 opts out of drgn's reporting and uses libdwfl's. Fixes #130. Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 6 + libdrgn/debug_info.c | 837 +++++++++++++++++++++++++++++++++++++++- libdrgn/debug_info.h | 10 + libdrgn/program.c | 14 +- 4 files changed, 852 insertions(+), 15 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index f951d3056..f1c0d2e5a 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -100,6 +100,12 @@ Some of drgn's behavior can be modified through environment variables: vice versa. This environment variable is mainly intended for testing and may be ignored in the future. +``DRGN_USE_LIBDWFL_REPORT`` + Whether drgn should use libdwfl to find debugging information for core + dumps instead of its own implementation (0 or 1). The default is 0. This + environment variable is mainly intended as an escape hatch in case of bugs + in drgn's implementation and will be ignored in the future. + ``DRGN_USE_LIBKDUMPFILE_FOR_ELF`` Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 71dc242a0..a8564c409 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -581,6 +581,817 @@ static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, return DWARF_CB_ABORT; } +static struct drgn_error *drgn_get_nt_file(Elf *elf, const char **ret, + size_t *len_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_NOTE) { + Elf_Data *data = elf_getdata_rawchunk(elf, + phdr->p_offset, + phdr->p_filesz, + note_header_type(phdr->p_align)); + if (!data) + return drgn_error_libelf(); + GElf_Nhdr nhdr; + size_t offset = 0, name_offset, desc_offset; + while (offset < data->d_size && + (offset = gelf_getnote(data, offset, &nhdr, + &name_offset, + &desc_offset))) { + const char *name = + (char *)data->d_buf + name_offset; + if (nhdr.n_namesz == sizeof("CORE") && + memcmp(name, "CORE", sizeof("CORE")) == 0 && + nhdr.n_type == NT_FILE) { + *ret = (char *)data->d_buf + desc_offset; + *len_ret = nhdr.n_descsz; + return NULL; + } + } + } + } + *ret = NULL; + *len_ret = 0; + return NULL; +} + +struct drgn_mapped_file_segment { + uint64_t start; + uint64_t end; + uint64_t file_offset; +}; + +DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment) + +DEFINE_HASH_MAP(drgn_mapped_files, const char *, + struct drgn_mapped_file_segment_vector, c_string_key_hash_pair, + c_string_key_eq) + +struct userspace_core_report_state { + struct drgn_mapped_files files; + char *phdr_buf; + size_t phdr_buf_capacity; + char *segment_buf; + size_t segment_buf_capacity; +}; + +static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); +} + +static bool +drgn_mapped_file_segments_contiguous(const struct drgn_mapped_file_segment *segment1, + const struct drgn_mapped_file_segment *segment2) +{ + if (segment1->end != segment2->start) + return false; + uint64_t size = segment1->end - segment1->start; + return segment1->file_offset + size == segment2->file_offset; +} + +static struct drgn_error * +userspace_core_get_mapped_files(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core, + const char *nt_file, size_t nt_file_len) +{ + struct drgn_error *err; + + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(load->dbinfo->prog->core, + &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + bool is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; + bool little_endian = ehdr->e_ident[EI_DATA] == ELFDATA2LSB; + + struct binary_buffer bb; + binary_buffer_init(&bb, nt_file, nt_file_len, little_endian, + parse_nt_file_error); + + /* + * fs/binfmt_elf.c in the Linux kernel source code documents the format + * of NT_FILE as: + * + * long count -- how many files are mapped + * long page_size -- units for file_ofs + * array of [COUNT] elements of + * long start + * long end + * long file_ofs + * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + */ + uint64_t count, page_size; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / 24) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, count * 24))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / 12) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, count * 12))) + return err; + } + + for (uint64_t i = 0; i < count; i++) { + struct drgn_mapped_file_segment segment; + if (is_64_bit) { + memcpy(&segment, nt_file + 16 + i * 24, 24); + if (bb.bswap) { + segment.start = bswap_64(segment.start); + segment.end = bswap_64(segment.end); + segment.file_offset = bswap_64(segment.file_offset); + } + } else { + struct { + uint32_t start; + uint32_t end; + uint32_t file_offset; + } segment32; + memcpy(&segment32, nt_file + 8 + i * 12, 12); + if (bb.bswap) { + segment.start = bswap_32(segment32.start); + segment.end = bswap_32(segment32.end); + segment.file_offset = bswap_32(segment32.file_offset); + } else { + segment.start = segment32.start; + segment.end = segment32.end; + segment.file_offset = segment32.file_offset; + } + } + segment.file_offset *= page_size; + + struct drgn_mapped_files_entry entry = { + .key = bb.pos, + }; + if ((err = binary_buffer_skip_string(&bb))) + return err; + struct drgn_mapped_files_iterator it; + int r = drgn_mapped_files_insert(&core->files, &entry, &it); + if (r < 0) + return &drgn_enomem; + if (r == 1) + drgn_mapped_file_segment_vector_init(&it.entry->value); + + /* + * The Linux kernel creates separate entries for contiguous + * mappings with different memory protections even though the + * protection is not included in NT_FILE. Merge them if we can. + */ + if (it.entry->value.size > 0 && + drgn_mapped_file_segments_contiguous(&it.entry->value.data[it.entry->value.size - 1], + &segment)) + it.entry->value.data[it.entry->value.size - 1].end = segment.end; + else if (!drgn_mapped_file_segment_vector_append(&it.entry->value, + &segment)) + return &drgn_enomem; + } + return NULL; +} + +static bool build_id_matches(Elf *elf, const void *build_id, + size_t build_id_len) +{ + const void *elf_build_id; + ssize_t elf_build_id_len = dwelf_elf_gnu_build_id(elf, &elf_build_id); + if (elf_build_id_len < 0) + return false; + return (elf_build_id_len == build_id_len && + memcmp(elf_build_id, build_id, build_id_len) == 0); +} + +static struct drgn_error * +userspace_core_elf_address_range(uint16_t e_type, size_t phnum, + struct drgn_error *(*get_phdr)(void *, size_t, GElf_Phdr *), + void *arg, + const struct drgn_mapped_file_segment *segments, + size_t num_segments, + const struct drgn_mapped_file_segment *ehdr_segment, + uint64_t *bias_ret, uint64_t *start_ret, + uint64_t *end_ret) +{ + struct drgn_error *err; + + /* + * First, find the virtual address of the ELF header so that we can + * calculate the bias. + */ + uint64_t ehdr_vaddr; + size_t i; + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr; + err = get_phdr(arg, i, &phdr); + if (err) + return err; + if (phdr.p_type == PT_LOAD) { + uint64_t align = phdr.p_align ? phdr.p_align : 1; + if ((phdr.p_offset & -align) == 0) { + ehdr_vaddr = phdr.p_vaddr & -align; + break; + } + } + } + if (i >= phnum) { + /* + * No loadable segments contain the ELF header. This can't be + * our file. + */ + *bias_ret = 0; +not_loaded: + *start_ret = *end_ret = 0; + return NULL; + } + *bias_ret = ehdr_segment->start - ehdr_vaddr; + if (*bias_ret != 0 && e_type == ET_EXEC) { + /* The executable is not loaded at the correct address. */ + goto not_loaded; + } + + /* + * Now check all of the program headers to (1) get the module address + * range and (2) make sure that they are mapped as expected. If we're + * lucky, this can detect a file that was mmap'd and not actually loaded + * by the kernel or dynamic loader. This could also be the wrong file. + */ + const struct drgn_mapped_file_segment *segment = segments; + const struct drgn_mapped_file_segment *end_segment = + segments + num_segments; + uint64_t start = 0, end = 0; + bool first = true; + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr; + err = get_phdr(arg, i, &phdr); + if (err) + return err; + if (phdr.p_type != PT_LOAD) + continue; + uint64_t vaddr = phdr.p_vaddr + *bias_ret; + if (phdr.p_filesz != 0) { + /* + * Advance to the mapped segment containing the start + * address. + */ + while (vaddr >= segment->end) { + if (++segment == end_segment) + goto not_loaded; + if (vaddr < segment->start) + goto not_loaded; + } + if (segment->file_offset + (vaddr - segment->start) != + phdr.p_offset) { + /* + * The address in the core dump does not map to + * the segment's file offset. + */ + goto not_loaded; + } + if (phdr.p_filesz > segment->end - vaddr) { + /* Part of the segment is not mapped. */ + goto not_loaded; + } + } + if (first) { + uint64_t align = phdr.p_align ? phdr.p_align : 1; + start = vaddr & -align; + first = false; + } + end = vaddr + phdr.p_memsz; + } + if (start >= end) + goto not_loaded; + *start_ret = start; + *end_ret = end; + return NULL; +} + +static bool alloc_or_reuse(char **buf, size_t *capacity, uint64_t size) +{ + if (size > *capacity) { + if (size > SIZE_MAX) + return false; + free(*buf); + *buf = malloc(size); + if (!*buf) { + *capacity = 0; + return false; + } + *capacity = size; + } + return true; +} + +/* ehdr_buf must be aligned as Elf64_Ehdr. */ +static void read_ehdr(const void *ehdr_buf, GElf_Ehdr *ret, bool *is_64_bit_ret, + bool *bswap_ret) +{ + *is_64_bit_ret = ((unsigned char *)ehdr_buf)[EI_CLASS] == ELFCLASS64; + bool little_endian = + ((unsigned char *)ehdr_buf)[EI_DATA] == ELFDATA2LSB; + *bswap_ret = little_endian != HOST_LITTLE_ENDIAN; + if (*is_64_bit_ret) { + const Elf64_Ehdr *ehdr64 = ehdr_buf; + if (*bswap_ret) { + memcpy(ret->e_ident, ehdr64->e_ident, EI_NIDENT); + ret->e_type = bswap_16(ehdr64->e_type); + ret->e_machine = bswap_16(ehdr64->e_machine); + ret->e_version = bswap_32(ehdr64->e_version); + ret->e_entry = bswap_64(ehdr64->e_entry); + ret->e_phoff = bswap_64(ehdr64->e_phoff); + ret->e_shoff = bswap_64(ehdr64->e_shoff); + ret->e_flags = bswap_32(ehdr64->e_flags); + ret->e_ehsize = bswap_16(ehdr64->e_ehsize); + ret->e_phentsize = bswap_16(ehdr64->e_phentsize); + ret->e_phnum = bswap_16(ehdr64->e_phnum); + ret->e_shentsize = bswap_16(ehdr64->e_shentsize); + ret->e_shnum = bswap_16(ehdr64->e_shnum); + ret->e_shstrndx = bswap_16(ehdr64->e_shstrndx); + } else { + *ret = *ehdr64; + } + } else { + const Elf32_Ehdr *ehdr32 = ehdr_buf; + memcpy(ret->e_ident, ehdr32->e_ident, EI_NIDENT); + if (*bswap_ret) { + ret->e_type = bswap_16(ehdr32->e_type); + ret->e_machine = bswap_16(ehdr32->e_machine); + ret->e_version = bswap_32(ehdr32->e_version); + ret->e_entry = bswap_32(ehdr32->e_entry); + ret->e_phoff = bswap_32(ehdr32->e_phoff); + ret->e_shoff = bswap_32(ehdr32->e_shoff); + ret->e_flags = bswap_32(ehdr32->e_flags); + ret->e_ehsize = bswap_16(ehdr32->e_ehsize); + ret->e_phentsize = bswap_16(ehdr32->e_phentsize); + ret->e_phnum = bswap_16(ehdr32->e_phnum); + ret->e_shentsize = bswap_16(ehdr32->e_shentsize); + ret->e_shnum = bswap_16(ehdr32->e_shnum); + ret->e_shstrndx = bswap_16(ehdr32->e_shstrndx); + } else { + ret->e_type = ehdr32->e_type; + ret->e_machine = ehdr32->e_machine; + ret->e_version = ehdr32->e_version; + ret->e_entry = ehdr32->e_entry; + ret->e_phoff = ehdr32->e_phoff; + ret->e_shoff = ehdr32->e_shoff; + ret->e_flags = ehdr32->e_flags; + ret->e_ehsize = ehdr32->e_ehsize; + ret->e_phentsize = ehdr32->e_phentsize; + ret->e_phnum = ehdr32->e_phnum; + ret->e_shentsize = ehdr32->e_shentsize; + ret->e_shnum = ehdr32->e_shnum; + ret->e_shstrndx = ehdr32->e_shstrndx; + } + } +} + +/* phdr_buf must be aligned as Elf64_Phdr. */ +static void read_phdr(const void *phdr_buf, size_t i, bool is_64_bit, + bool bswap, GElf_Phdr *ret) +{ + if (is_64_bit) { + const Elf64_Phdr *phdr64 = (Elf64_Phdr *)phdr_buf + i; + if (bswap) { + ret->p_type = bswap_32(phdr64->p_type); + ret->p_flags = bswap_32(phdr64->p_flags); + ret->p_offset = bswap_64(phdr64->p_offset); + ret->p_vaddr = bswap_64(phdr64->p_vaddr); + ret->p_paddr = bswap_64(phdr64->p_paddr); + ret->p_filesz = bswap_64(phdr64->p_filesz); + ret->p_memsz = bswap_64(phdr64->p_memsz); + ret->p_align = bswap_64(phdr64->p_align); + } else { + *ret = *phdr64; + } + } else { + const Elf32_Phdr *phdr32 = (Elf32_Phdr *)phdr_buf + i; + if (bswap) { + ret->p_type = bswap_32(phdr32->p_type); + ret->p_offset = bswap_32(phdr32->p_offset); + ret->p_vaddr = bswap_32(phdr32->p_vaddr); + ret->p_paddr = bswap_32(phdr32->p_paddr); + ret->p_filesz = bswap_32(phdr32->p_filesz); + ret->p_memsz = bswap_32(phdr32->p_memsz); + ret->p_flags = bswap_32(phdr32->p_flags); + ret->p_align = bswap_32(phdr32->p_align); + } else { + ret->p_type = phdr32->p_type; + ret->p_offset = phdr32->p_offset; + ret->p_vaddr = phdr32->p_vaddr; + ret->p_paddr = phdr32->p_paddr; + ret->p_filesz = phdr32->p_filesz; + ret->p_memsz = phdr32->p_memsz; + ret->p_flags = phdr32->p_flags; + ret->p_align = phdr32->p_align; + } + } +} + +static const char *read_build_id(const char *buf, size_t buf_len, + uint64_t align, bool bswap, + size_t *len_ret) +{ + /* + * Build IDs are usually 16 or 20 bytes (MD5 or SHA-1, respectively), so + * these arbitrary limits are generous. + */ + static const uint32_t build_id_min_size = 2; + static const uint32_t build_id_max_size = 1024; + /* Elf32_Nhdr is the same as Elf64_Nhdr. */ + Elf64_Nhdr nhdr; + const char *p = buf; + while (buf + buf_len - p >= sizeof(nhdr)) { + memcpy(&nhdr, p, sizeof(nhdr)); + if (bswap) { + nhdr.n_namesz = bswap_32(nhdr.n_namesz); + nhdr.n_descsz = bswap_32(nhdr.n_descsz); + nhdr.n_type = bswap_32(nhdr.n_type); + } + p += sizeof(nhdr); + + uint64_t namesz = (nhdr.n_namesz + align - 1) & ~(align - 1); + if (namesz > buf + buf_len - p) + return NULL; + const char *name = p; + p += namesz; + + if (nhdr.n_namesz == sizeof("GNU") && + memcmp(name, "GNU", sizeof("GNU")) == 0 && + nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_descsz >= build_id_min_size && + nhdr.n_descsz <= build_id_max_size) { + if (nhdr.n_descsz > buf + buf_len - p) + return NULL; + *len_ret = nhdr.n_descsz; + return p; + } + + uint64_t descsz = (nhdr.n_descsz + align - 1) & ~(align - 1); + if (descsz > buf + buf_len - p) + return NULL; + p += descsz; + } + return NULL; +} + +struct core_get_phdr_arg { + const void *phdr_buf; + bool is_64_bit; + bool bswap; +}; + +static struct drgn_error * +core_get_phdr(void *arg_, size_t i, GElf_Phdr *ret) +{ + struct core_get_phdr_arg *arg = arg_; + read_phdr(arg->phdr_buf, i, arg->is_64_bit, arg->bswap, ret); + return NULL; +} + +struct userspace_core_identified_file { + const void *build_id; + size_t build_id_len; + uint64_t start, end; + bool ignore; + bool have_address_range; +}; + +static struct drgn_error * +userspace_core_identify_file(struct drgn_program *prog, + struct userspace_core_report_state *core, + const struct drgn_mapped_file_segment *segments, + size_t num_segments, + const struct drgn_mapped_file_segment *ehdr_segment, + struct userspace_core_identified_file *ret) +{ + struct drgn_error *err; + + Elf64_Ehdr ehdr_buf; + err = drgn_program_read_memory(prog, &ehdr_buf, ehdr_segment->start, + sizeof(ehdr_buf), false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + err = NULL; + } + return err; + } + if (memcmp(&ehdr_buf, ELFMAG, SELFMAG) != 0) { + ret->ignore = true; + return NULL; + } + + GElf_Ehdr ehdr; + struct core_get_phdr_arg arg; + read_ehdr(&ehdr_buf, &ehdr, &arg.is_64_bit, &arg.bswap); + if (ehdr.e_type == ET_CORE || + ehdr.e_phnum == 0 || + ehdr.e_phentsize != + (arg.is_64_bit ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { + ret->ignore = true; + return NULL; + } + + if (ehdr.e_phnum > SIZE_MAX / ehdr.e_phentsize || + !alloc_or_reuse(&core->phdr_buf, &core->phdr_buf_capacity, + ehdr.e_phnum * ehdr.e_phentsize)) + return &drgn_enomem; + + /* + * Check whether the mapped segment containing the file header also + * contains the program headers. This seems to be the case in practice. + */ + uint64_t ehdr_segment_file_end = + (ehdr_segment->file_offset + + (ehdr_segment->end - ehdr_segment->start)); + if (ehdr_segment_file_end < ehdr.e_phoff || + ehdr_segment_file_end - ehdr.e_phoff < + ehdr.e_phnum * ehdr.e_phentsize) + return NULL; + + err = drgn_program_read_memory(prog, core->phdr_buf, + ehdr_segment->start + ehdr.e_phoff, + ehdr.e_phnum * ehdr.e_phentsize, false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + err = NULL; + } + return err; + } + arg.phdr_buf = core->phdr_buf; + + /* + * In theory, if the program has a huge number of program headers, they + * may not all be dumped. However, the largest binary I was able to find + * still had all program headers within 1k. + * + * It'd be more reliable to determine the bias based on the headers that + * were saved, use that to read the build ID, use that to find the ELF + * file, and then determine the address range directly from the ELF + * file. However, we need the address range to report the build ID to + * libdwfl, so we do it this way. + */ + uint64_t bias; + err = userspace_core_elf_address_range(ehdr.e_type, ehdr.e_phnum, + core_get_phdr, &arg, segments, + num_segments, ehdr_segment, + &bias, &ret->start, &ret->end); + if (err) + return err; + if (ret->start >= ret->end) { + ret->ignore = true; + return NULL; + } + ret->have_address_range = true; + + for (uint16_t i = 0; i < ehdr.e_phnum; i++) { + GElf_Phdr phdr; + core_get_phdr(&arg, i, &phdr); + if (phdr.p_type == PT_NOTE) { + if (!alloc_or_reuse(&core->segment_buf, + &core->segment_buf_capacity, + phdr.p_filesz)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, core->segment_buf, + phdr.p_vaddr + bias, + phdr.p_filesz, false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + continue; + } else { + return err; + } + } + ret->build_id = read_build_id(core->segment_buf, + phdr.p_filesz, + phdr.p_align, arg.bswap, + &ret->build_id_len); + if (ret->build_id) + break; + } + } + return NULL; +} + +static struct drgn_error *elf_file_get_phdr(void *arg, size_t i, + GElf_Phdr *phdr) +{ + if (!gelf_getphdr(arg, i, phdr)) + return drgn_error_libelf(); + return NULL; +} + +static struct drgn_error * +userspace_core_maybe_report_file(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core, + const char *path, + const struct drgn_mapped_file_segment *segments, + size_t num_segments) +{ + struct drgn_error *err; + struct drgn_program *prog = load->dbinfo->prog; + for (size_t ehdr_idx = 0; ehdr_idx < num_segments; ehdr_idx++) { + const struct drgn_mapped_file_segment *ehdr_segment = + &segments[ehdr_idx]; + /* + * There should always be a full page mapped, so even if it's a + * 32-bit file, we can read the 64-bit size. + */ + if (ehdr_segment->file_offset != 0 || + ehdr_segment->end - ehdr_segment->start < sizeof(Elf64_Ehdr)) + continue; + + /* + * This logic is complicated because we're dealing with two data + * sources that we can't completely trust: the memory in the + * core dump and the file at the path found in the core dump. + * + * First, we try to identify the mapped file contents in the + * core dump. Ideally, this will find a build ID. However, this + * can fail for a few reasons: + * + * 1. The file is not an ELF file. + * 2. The ELF file is not an executable or library. + * 3. The ELF file does not have a build ID. + * 4. The file header was not dumped to the core dump, in which + * case we can't tell whether this is an ELF file. Dumping + * the first page of an executable file has been the default + * behavior since Linux kernel commit 895021552d6f + * ("coredump: default + * CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y") (in v2.6.37), but + * it can be disabled at kernel build time or toggled at + * runtime. + * 5. The build ID or the necessary ELF metadata were not dumped + * in the core dump. This can happen if the necessary program + * headers or note segment were not in the first page of the + * file. + * 6. The file is mapped but not actually loaded into the + * program (e.g., if the program is a tool like a profiler or + * a debugger that mmaps binaries [like drgn itself!]). + * + * In cases 1 and 2, we can simply ignore the file. In cases + * 3-5, we blindly trust the path in the core dump. We can + * sometimes detect case 6 in + * userspace_core_elf_address_range(). + * + * There is also the possibility that the program modified or + * corrupted the ELF metadata in memory (more likely if the file + * was explicitly mmap'd, since the metadata will usually be + * read-only if it was loaded properly). We don't deal with that + * yet. + */ + struct userspace_core_identified_file identity = {}; + err = userspace_core_identify_file(prog, core, segments, + num_segments, ehdr_segment, + &identity); + if (err) + return err; + if (identity.ignore) + continue; + +#define CLEAR_ELF() do { \ + elf = NULL; \ + fd = -1; \ +} while (0) +#define CLOSE_ELF() do { \ + elf_end(elf); \ + close(fd); \ + CLEAR_ELF(); \ +} while (0) + int fd; + Elf *elf; + /* + * There are a few things that can go wrong here: + * + * 1. The path no longer exists. + * 2. The path refers to a different ELF file than was in the + * core dump. + * 3. The path refers to something which isn't a valid ELF file. + */ + err = open_elf_file(path, &fd, &elf); + if (err) { + drgn_error_destroy(err); + CLEAR_ELF(); + } else if (identity.build_id_len > 0) { + if (!build_id_matches(elf, identity.build_id, + identity.build_id_len)) + CLOSE_ELF(); + } + + if (elf && !identity.have_address_range) { + GElf_Ehdr ehdr_mem, *ehdr; + size_t phnum; + if ((ehdr = gelf_getehdr(elf, &ehdr_mem)) && + (elf_getphdrnum(elf, &phnum) == 0)) { + uint64_t bias; + err = userspace_core_elf_address_range(ehdr->e_type, + phnum, + elf_file_get_phdr, + elf, + segments, + num_segments, + ehdr_segment, + &bias, + &identity.start, + &identity.end); + if (err || identity.start >= identity.end) { + drgn_error_destroy(err); + CLOSE_ELF(); + } else { + identity.have_address_range = true; + } + } else { + CLOSE_ELF(); + } + } + + if (elf) { + err = drgn_debug_info_report_elf(load, path, fd, elf, + identity.start, + identity.end, NULL, + NULL); + if (err) + return err; + } else { + if (!identity.have_address_range) + identity.start = identity.end = 0; + Dwfl_Module *dwfl_module = + dwfl_report_module(load->dbinfo->dwfl, path, + identity.start, + identity.end); + if (!dwfl_module) + return drgn_error_libdwfl(); + if (identity.build_id_len > 0 && + dwfl_module_report_build_id(dwfl_module, + identity.build_id, + identity.build_id_len, + 0)) + return drgn_error_libdwfl(); + } +#undef CLOSE_ELF +#undef CLEAR_ELF + } + return NULL; +} + +static struct drgn_error * +userspace_core_report_mapped_files(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core) +{ + + struct drgn_error *err; + for (struct drgn_mapped_files_iterator it = + drgn_mapped_files_first(&core->files); + it.entry; it = drgn_mapped_files_next(it)) { + err = userspace_core_maybe_report_file(load, core, + it.entry->key, + it.entry->value.data, + it.entry->value.size); + if (err) + return err; + } + return NULL; +} + +static struct drgn_error * +userspace_core_report_debug_info(struct drgn_debug_info_load_state *load, + const char *nt_file, size_t nt_file_len) +{ + struct drgn_error *err; + + struct userspace_core_report_state core = { + .files = HASH_TABLE_INIT, + }; + err = userspace_core_get_mapped_files(load, &core, nt_file, + nt_file_len); + if (err) + goto out; + err = userspace_core_report_mapped_files(load, &core); +out: + free(core.segment_buf); + free(core.phdr_buf); + for (struct drgn_mapped_files_iterator it = + drgn_mapped_files_first(&core.files); + it.entry; it = drgn_mapped_files_next(it)) + drgn_mapped_file_segment_vector_deinit(&it.entry->value); + drgn_mapped_files_deinit(&core.files); + return err; +} + static struct drgn_error * userspace_report_elf_file(struct drgn_debug_info_load_state *load, const char *path) @@ -642,9 +1453,29 @@ userspace_report_debug_info(struct drgn_debug_info_load_state *load) return drgn_error_create_os("dwfl_linux_proc_report", ret, NULL); } - } else if (dwfl_core_file_report(dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); + } else { + const char *nt_file; + size_t nt_file_len; + char *env = getenv("DRGN_USE_LIBDWFL_REPORT"); + if (env && atoi(env)) { + nt_file = NULL; + nt_file_len = 0; + } else { + err = drgn_get_nt_file(prog->core, &nt_file, + &nt_file_len); + if (err) + return err; + } + if (nt_file) { + err = userspace_core_report_debug_info(load, + nt_file, + nt_file_len); + if (err) + return err; + } else if (dwfl_core_file_report(dwfl, prog->core, + NULL) == -1) { + return drgn_error_libdwfl(); + } } } return NULL; diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index f453ab045..e35b9b662 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -13,6 +13,7 @@ #define DRGN_DEBUG_INFO_H #include +#include #include #include "binary_buffer.h" @@ -336,6 +337,15 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, uint64_t *start_ret, uint64_t *end_ret); +static inline Elf_Type note_header_type(uint64_t p_align) +{ +#if _ELFUTILS_PREREQ(0, 175) + if (p_align == 8) + return ELF_T_NHDR8; +#endif + return ELF_T_NHDR; +} + /** @} */ #endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index 777f6cc1f..cc1bcdd6e 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -31,15 +30,6 @@ DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) DEFINE_HASH_MAP_FUNCTIONS(drgn_prstatus_map, int_key_hash_pair, scalar_key_eq) -static Elf_Type note_header_type(GElf_Phdr *phdr) -{ -#if _ELFUTILS_PREREQ(0, 175) - if (phdr->p_align == 8) - return ELF_T_NHDR8; -#endif - return ELF_T_NHDR; -} - LIBDRGN_PUBLIC enum drgn_program_flags drgn_program_flags(struct drgn_program *prog) { @@ -278,7 +268,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) data = elf_getdata_rawchunk(prog->core, phdr->p_offset, phdr->p_filesz, - note_header_type(phdr)); + note_header_type(phdr->p_align)); if (!data) { err = drgn_error_libelf(); goto out_platform; @@ -735,7 +725,7 @@ static struct drgn_error *drgn_program_cache_prstatus(struct drgn_program *prog) data = elf_getdata_rawchunk(prog->core, phdr->p_offset, phdr->p_filesz, - note_header_type(phdr)); + note_header_type(phdr->p_align)); if (!data) { err = drgn_error_libelf(); goto out; From f09fd13ef68350c31164073e2a4dce542c9cc15e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 13:37:01 -0800 Subject: [PATCH 108/139] libdrgn: helpers: add missing error check in linux_helper_pid_task() Found with clang-static-analyzer. Reported-by: Kevin Svetlitski Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index e9b61a7e3..54bdc282a 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -463,6 +463,8 @@ struct drgn_error *linux_helper_pid_task(struct drgn_object *res, task_struct_type = drgn_type_type(task_structp_type.type); err = drgn_object_bool(pid, &truthy); + if (err) + goto out; if (!truthy) goto null; From 8a41adc1b027211779da425cd43283773632932a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 13:47:12 -0800 Subject: [PATCH 109/139] libdrgn: language_c: add missing error check in c_parse_abstract_declarator() Found with clang-static-analyzer. Reported-by: Kevin Svetlitski Signed-off-by: Omar Sandoval --- libdrgn/language_c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 08fcc1a19..7a698145d 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -2445,6 +2445,8 @@ c_parse_abstract_declarator(struct drgn_program *prog, return err; err = drgn_lexer_peek(lexer, &token); + if (err) + return err; if (token.kind == C_TOKEN_LPAREN || token.kind == C_TOKEN_LBRACKET) { struct c_declarator *tmp; From 8b2bf85e49de01fc31aba6b31bdcaa586c9a0377 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 13:52:09 -0800 Subject: [PATCH 110/139] libdrgn: dwarf_info: fix garbage return from drgn_array_type_from_dwarf() Found with clang-static-analyzer. Reported-by: Kevin Svetlitski Signed-off-by: Omar Sandoval --- libdrgn/dwarf_info.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 5d506513b..7e0a2bf9c 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -6313,8 +6313,10 @@ drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, while (r == 0) { if (dwarf_tag(&child) == DW_TAG_subrange_type) { dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) + if (!dimension) { + err = &drgn_enomem; goto out; + } err = subrange_length(&child, dimension); if (err) goto out; @@ -6328,8 +6330,10 @@ drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, } if (!dimensions.size) { dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) + if (!dimension) { + err = &drgn_enomem; goto out; + } dimension->is_complete = false; } From 8ebdcb710928316d211ef849ee9344fce92e622e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 15:11:16 -0800 Subject: [PATCH 111/139] libdrgn: memory_reader: remove unnecessary include Fixes: 02912ca7d073 ("libdrgn: fix handling of p_filesz < p_memsz in core dumps") Signed-off-by: Omar Sandoval --- libdrgn/memory_reader.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 81b222fe4..527ec69c2 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "memory_reader.h" From ad2337897702e10974fc14b9ff0f59c76292a026 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 15:13:09 -0800 Subject: [PATCH 112/139] Update elfutils and libkdumpfile in manylinux wheels Use the latest version of elfutils (0.186) and libkdumpfile (0.4.1). We can drop the elfutils patch since 0.186 has the fix (and we have our own workaround), but we need a new patch to build libkdumpfile. Signed-off-by: Omar Sandoval --- scripts/build_manylinux_in_docker.sh | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index c742fa5b4..edc0aa3a9 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -20,28 +20,11 @@ ln -s /usr/share/aclocal/pkg.m4 /usr/local/share/aclocal/ # Install a recent version of elfutils instead of whatever is in the manylinux # image. -elfutils_version=0.185 +elfutils_version=0.186 elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 mkdir /tmp/elfutils cd /tmp/elfutils curl -L "$elfutils_url" | tar -xj --strip-components=1 -# Apply "libdwfl: fix potential NULL pointer dereference when reading link map" -# manually since it isn't in a release yet. -patch -p1 << "EOF" -diff --git a/libdwfl/link_map.c b/libdwfl/link_map.c -index 0d8d1c17..1e7d4502 100644 ---- a/libdwfl/link_map.c -+++ b/libdwfl/link_map.c -@@ -254,7 +254,7 @@ read_addrs (struct memory_closure *closure, - Dwfl *dwfl = closure->dwfl; - - /* Read a new buffer if the old one doesn't cover these words. */ -- if (buffer == NULL -+ if (*buffer == NULL - || vaddr < *read_vaddr - || vaddr - (*read_vaddr) + nb > *buffer_available) - { -EOF # We don't bother with debuginfod support for a few reasons: # # 1. It depends on libcurl, which would pull in a bunch of transitive @@ -52,13 +35,28 @@ EOF make -j$(($(nproc) + 1)) make install -libkdumpfile_commit=v0.4.0 -libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/archive/$libkdumpfile_commit/libkdumpfile-$libkdumpfile_commit.tar.gz +libkdumpfile_version=0.4.1 +libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/releases/download/v$libkdumpfile_version/libkdumpfile-$libkdumpfile_version.tar.gz mkdir /tmp/libkdumpfile cd /tmp/libkdumpfile curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 -autoreconf -fiv -# z_const was added in zlib 1.2.5.2, but CentOS 6 has 1.2.3. +# This file is missing an include of limits.h which it accidentally gets from +# zlib.h via zconf.h, but only since zlib 1.2.7. CentOS 6 has 1.2.3. +patch -p1 << "EOF" +diff --git a/src/kdumpfile/util.c b/src/kdumpfile/util.c +index 4fb2960..14e1ce3 100644 +--- a/src/kdumpfile/util.c ++++ b/src/kdumpfile/util.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #if USE_ZLIB + # include +EOF +# z_const was added in zlib 1.2.5.2. CPPFLAGS="-Dz_const=const" ./configure --with-lzo --with-snappy --with-zlib --without-python make -j$(($(nproc) + 1)) make install From 08e634c158ef65515945dcbc759937c366cc3ed1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 15:20:42 -0800 Subject: [PATCH 113/139] drgn 0.0.15 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index b782402b1..fa994225a 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.14], +AC_INIT([libdrgn], [0.0.15], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) From ffcce8a7451d8107fd7a4457ac6cb5addd7de582 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Dec 2021 17:19:27 -0800 Subject: [PATCH 114/139] Add a few files to source distributions In particular, the Fedora RPM build needs pytest.ini. CONTRIBUTING.rst should be included along the same lines as README.rst. libdrgn/Doxyfile should be included so that users with a source distribution can build the libdrgn documentation. Signed-off-by: Omar Sandoval --- MANIFEST.in | 2 +- libdrgn/Makefile.am | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index ef05b5490..430d66fcd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ recursive-include examples *.py recursive-include tests *.py recursive-include tools *.py recursive-include vmtest *.c *.py *.rst -include COPYING util.py vmtest/config +include CONTRIBUTING.rst COPYING pytest.ini util.py vmtest/config diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index d966917c6..15dfbeed2 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -158,7 +158,8 @@ python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps) python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps) $(AM_V_GEN)$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@ -EXTRA_DIST = $(ARCH_DEFS) \ +EXTRA_DIST = Doxyfile \ + $(ARCH_DEFS) \ build-aux/gen_arch.awk \ build-aux/gen_constants.py \ drgn.h.in From 061094187bd67751d14a57717e2684cb8300e10d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 9 Dec 2021 20:37:14 +0000 Subject: [PATCH 115/139] libdrgn: debug_info: serialize initial calls to dwfl_module_getdwarf dwfl_module_getdwarf() may call into debuginfod_find_executable() or debuginfod_find_debuginfo(), which aren't thread-safe. So, let's put the initial call of dwfl_module_getdwarf() (which is the call that may go into the debuginfod client) into a critical section. Signed-off-by: Omar Sandoval --- libdrgn/debug_info.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index a8564c409..60817d1a2 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1718,7 +1718,9 @@ drgn_debug_info_find_sections(struct drgn_debug_info_module *module) * ELF relocations to all sections, not just debug sections. */ Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + Dwarf *dwarf; + #pragma omp critical(drgn_dwfl_module_getdwarf) + dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); if (!dwarf) return drgn_error_libdwfl(); Elf *elf = dwarf_getelf(dwarf); From a70e5d7893b1f539cf4a6083db640346cda90e3a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 9 Dec 2021 20:44:24 +0000 Subject: [PATCH 116/139] cli: print debuginfod client progress Running drgn on a system with debuginfod can appear to hang while the debuginfod client downloads debug info. In interactive mode, let's set the DEBUGINFOD_PROGRESS environment variable to get progress updates. The output isn't super informative, but it's better than silence. Signed-off-by: Omar Sandoval --- drgn/internal/cli.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 16f1b8abd..b377dce3e 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -106,6 +106,10 @@ def main() -> None: args = parser.parse_args() + if not args.script: + print(version, file=sys.stderr, flush=True) + os.environ["DEBUGINFOD_PROGRESS"] = "1" + prog = drgn.Program() if args.core is not None: prog.set_core_dump(args.core) @@ -170,13 +174,11 @@ def write_history_file() -> None: sys.displayhook = displayhook - banner = ( - version - + """ + banner = """\ For help, type help(drgn). >>> import drgn ->>> from drgn import """ - + ", ".join(drgn_globals) +>>> from drgn import """ + ", ".join( + drgn_globals ) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" From 3a9ef1b6ca683974f7508b5dbfc7332f08c01e47 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 9 Dec 2021 13:33:49 -0800 Subject: [PATCH 117/139] cli: print download progress in script mode, too Instead of gating on script mode vs interactive mode, let's gate on --quiet. Signed-off-by: Omar Sandoval --- drgn/internal/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index b377dce3e..aaed8ba67 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -93,7 +93,8 @@ def main() -> None: "-q", "--quiet", action="store_true", - help="don't print non-fatal warnings (e.g., about missing debugging information)", + help="don't print download progress or non-fatal warnings " + "(e.g., about missing debugging information)", ) parser.add_argument( "script", @@ -108,6 +109,7 @@ def main() -> None: if not args.script: print(version, file=sys.stderr, flush=True) + if not args.quiet: os.environ["DEBUGINFOD_PROGRESS"] = "1" prog = drgn.Program() From 1b54a25632c50a9f525c5e89ef744189a8f4ede2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 9 Dec 2021 14:52:02 -0800 Subject: [PATCH 118/139] drgn 0.0.16 Signed-off-by: Omar Sandoval --- libdrgn/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index fa994225a..21feaf7f3 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.15], +AC_INIT([libdrgn], [0.0.16], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) From 609b4cc3522ab65ebd5748ea010756be120c8551 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Dec 2021 11:38:07 -0800 Subject: [PATCH 119/139] CONTRIBUTING: document some libdrgn coding conventions Document conventions for init/deinit functions, create/destroy functions, and functions which modify a struct drgn_object. Signed-off-by: Omar Sandoval --- CONTRIBUTING.rst | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 98d884b9a..c31cf1b49 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -58,14 +58,31 @@ C code in drgn mostly follows the `Linux kernel coding style `_ except that drgn requires C11 or newer, so declarations may be mixed with code. -A few other guidelines: +A few other guidelines/conventions: +* Constants should be defined as enums or ``static const`` variables rather + than macros. * Functions that can fail should return a ``struct drgn_error *`` (and return their result via an out parameter if necessary). * Out parameters should be named ``ret`` (or suffixed with ``_ret`` if there - are multiple). -* Constants should be defined as enums or ``static const`` variables rather - than macros. + are multiple) and be the last parameter(s) of the function. +* Functions that initialize an already allocated structure should be suffixed + with ``_init`` and take the structure to initialize as the first argument, + e.g., ``struct drgn_error *foo_init(struct foo *foo, int foo_flags)``. +* The matching function to deinitialize a structure should be suffixed with + ``_deinit``, e.g., ``void foo_deinit(struct foo *foo)``. If possible, the + definition should be placed directly after the definition of ``_init`` so + that it is easier to visually verify that everything is cleaned up. +* Functions that allocate and initialize a structure should be suffixed with + ``_create`` and either return the structure as an out parameter (e.g., + ``struct drgn_error *foo_create(int foo_flags, struct foo **ret)``) or as the + return value if they can only fail with an out-of-memory error (e.g., + ``struct foo *foo_create(int foo_flags)``). +* The matching function to free an allocated structure should be suffixed with + ``_destroy``, e.g., ``void foo_destroy(struct foo *foo)``. If possible, the + definition should be placed directly after the definition of ``_create``. +* Functions that return a result in a ``struct drgn_object *`` parameter should + only modify the object if the function succeeds. drgn assumes some `implementation-defined behavior `_ for sanity: From 6fb304e99a347f4469cb0409368fad37e75cb31f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 13 Dec 2021 12:14:42 -0800 Subject: [PATCH 120/139] Skip DCO check for draft pull requests Draft pull requests can have temporary commits, so it doesn't make much sense to check for sign-offs. Skip the check on drafts, making sure it runs when a draft is changed to a normal pull request. Signed-off-by: Omar Sandoval --- .github/workflows/dco-check.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml index 0b6bcf8ec..319b4b125 100644 --- a/.github/workflows/dco-check.yml +++ b/.github/workflows/dco-check.yml @@ -1,9 +1,12 @@ name: DCO Check -on: pull_request +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] jobs: check: + if: ${{ !github.event.pull_request.draft }} runs-on: ubuntu-latest steps: - name: Checkout commit logs From c4fbf7e5896c6099266f37868456168193bdcc72 Mon Sep 17 00:00:00 2001 From: Alakesh Haloi Date: Tue, 14 Dec 2021 19:28:37 +0000 Subject: [PATCH 121/139] libdrgn: fix for compilation error On gcc version 7.3, we get following compilation error CC libdrgnimpl_la-dwarf_info.lo ../../libdrgn/dwarf_info.c:181:51: error: initializer element is not constant static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; This fixes the compilation error on older versions of gcc Signed-off-by: Alakesh Haloi --- libdrgn/dwarf_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 7e0a2bf9c..5dc97dcad 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -177,7 +177,7 @@ DEFINE_HASH_MAP(drgn_dwarf_index_die_map, struct nstring, uint32_t, nstring_hash_pair, nstring_eq) DEFINE_VECTOR(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) -static const size_t DRGN_DWARF_INDEX_SHARD_BITS = 8; +#define DRGN_DWARF_INDEX_SHARD_BITS 8 static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; /** Shard of a @ref drgn_namespace_dwarf_index. */ From f1cc88378a4628418c40b9fbe93361b58f60d2b7 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 16 Dec 2021 10:13:24 -0800 Subject: [PATCH 122/139] Silence mypy warnings With mypy 0.920, two warnings appear on current main: $ mypy --strict --no-warn-return-any drgn _drgn.pyi drgn/helpers/linux/__init__.py:36: error: Need type annotation for "__all__" (hint: "__all__: List[] = ...") drgn/helpers/linux/__init__.py:38: error: unused "type: ignore" comment Found 2 errors in 1 file (checked 33 source files) The "unused" type:ignore directive was necessary for prior versions, so add --no-warn-unused-ignores, so that we pass on multiple versions. Apply a List[str] annotation to the __all__ variable to silence the other error. Signed-off-by: Stephen Brennan --- .github/workflows/ci.yml | 2 +- drgn/helpers/linux/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86d518b8a..ae89f8aed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: - name: Generate version.py run: python setup.py --version - name: Check with mypy - run: mypy --strict --no-warn-return-any drgn _drgn.pyi + run: mypy --strict --no-warn-return-any --no-warn-unused-ignores drgn _drgn.pyi - name: Build and test with ${{ matrix.cc }} run: python setup.py test -K diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index a0f088212..c46a3a571 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -32,8 +32,9 @@ import importlib import pkgutil +from typing import List -__all__ = [] +__all__: List[str] = [] for _module_info in pkgutil.iter_modules( __path__, # type: ignore[name-defined] # python/mypy#1422 prefix=__name__ + ".", From 9add9529eb49bc00bf569a1b4473682e4e48df75 Mon Sep 17 00:00:00 2001 From: Kevin Svetlitski Date: Fri, 17 Dec 2021 11:24:38 -0800 Subject: [PATCH 123/139] Ensure compile_commands.json contains -Wall This minor change is a quality of life improvement ensuring developers receive more warnings and diagnostics in their editors. Signed-off-by: Kevin Svetlitski --- scripts/iwyu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/iwyu.py b/scripts/iwyu.py index 26e4d776d..60cb773a5 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -169,6 +169,7 @@ def ignore_line(path, state, line): def main(): + os.environ["CFLAGS"] = "-Wall" parser = argparse.ArgumentParser(description="run include-what-you-use on drgn") parser.add_argument( "source", nargs="*", help="run on given file instead of all source files" From 0f68cd44e296b0b2a56d20c58da55a2998a80bfe Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 17 Dec 2021 13:01:18 -0800 Subject: [PATCH 124/139] vmtest: mount /dev/shm in VM PR #133 adds a test case using multiprocessing.Barrier(), which needs /dev/shm. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vmtest/vm.py b/vmtest/vm.py index df86d04a0..8b40dccbf 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -57,6 +57,8 @@ # Mount additional filesystems. "$BUSYBOX" mount -t devtmpfs -o nosuid,noexec dev /dev +"$BUSYBOX" mkdir /dev/shm +"$BUSYBOX" mount -t tmpfs -o nosuid,nodev tmpfs /dev/shm "$BUSYBOX" mount -t proc -o nosuid,nodev,noexec proc /proc "$BUSYBOX" mount -t sysfs -o nosuid,nodev,noexec sys /sys # cgroup2 was added in Linux v4.5. From 2b47583c732a1ad63fe544a08feddd2ea96ef1a5 Mon Sep 17 00:00:00 2001 From: Kevin Svetlitski Date: Thu, 18 Nov 2021 16:46:59 -0800 Subject: [PATCH 125/139] Rewrite linux helper iterators in C In preparation for introducing an API to represent threads, the linux helper iterators, radix_tree_for_each, idr_for_each, for_each_pid, and for_each_task have been rewritten in C. This will allow them to be accessed from libdrgn, which will be necessary for the threads API. Signed-off-by: Kevin Svetlitski --- _drgn.pyi | 38 +++ drgn/helpers/linux/idr.py | 24 +- drgn/helpers/linux/pid.py | 53 +-- drgn/helpers/linux/radixtree.py | 50 +-- libdrgn/helpers.h | 98 ++++++ libdrgn/linux_kernel_helpers.c | 560 +++++++++++++++++++++++++++----- libdrgn/python/drgnpy.h | 21 ++ libdrgn/python/helpers.c | 248 ++++++++++++++ libdrgn/python/module.c | 13 + tests/helpers/linux/test_pid.py | 23 +- 10 files changed, 929 insertions(+), 199 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 487e11b21..86e356b88 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2219,3 +2219,41 @@ def _linux_helper_kaslr_offset(prog: Program) -> int: def _linux_helper_pgtable_l5_enabled(prog: Program) -> bool: """Return whether 5-level paging is enabled.""" ... + +def _linux_helper_radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: + """ + Iterate over all of the entries in a radix tree. + + :param root: ``struct radix_tree_root *`` + :return: Iterator of (index, ``void *``) tuples. + """ + ... + +def _linux_helper_idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: + """ + Iterate over all of the entries in an IDR. + + :param idr: ``struct idr *`` + :return: Iterator of (index, ``void *``) tuples. + """ + ... + +def _linux_helper_for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: + """ + Iterate over all PIDs in a namespace. + + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. + :return: Iterator of ``struct pid *`` objects. + """ + ... + +def _linux_helper_for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: + """ + Iterate over all of the tasks visible in a namespace. + + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. + :return: Iterator of ``struct task_struct *`` objects. + """ + ... diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index c1d1ba2d9..7535fbb68 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -11,28 +11,12 @@ IDRs were not based on radix trees. """ -from typing import Iterator, Tuple - -from _drgn import _linux_helper_idr_find as idr_find -from drgn import Object -from drgn.helpers.linux.radixtree import radix_tree_for_each +from _drgn import ( + _linux_helper_idr_find as idr_find, + _linux_helper_idr_for_each as idr_for_each, +) __all__ = ( "idr_find", "idr_for_each", ) - - -def idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: - """ - Iterate over all of the entries in an IDR. - - :param idr: ``struct idr *`` - :return: Iterator of (index, ``void *``) tuples. - """ - try: - base = idr.idr_base.value_() - except AttributeError: - base = 0 - for index, entry in radix_tree_for_each(idr.idr_rt.address_of_()): - yield index + base, entry diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index ca14ad352..152d15b21 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -9,16 +9,13 @@ IDs and processes. """ -from typing import Iterator, Union - from _drgn import ( _linux_helper_find_pid as find_pid, _linux_helper_find_task as find_task, + _linux_helper_for_each_pid as for_each_pid, + _linux_helper_for_each_task as for_each_task, _linux_helper_pid_task as pid_task, ) -from drgn import Object, Program, cast, container_of -from drgn.helpers.linux.idr import idr_for_each -from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_pid", @@ -27,49 +24,3 @@ "for_each_task", "pid_task", ) - - -def for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: - """ - Iterate over all PIDs in a namespace. - - :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or - :class:`Program` to iterate over initial PID namespace. - :return: Iterator of ``struct pid *`` objects. - """ - if isinstance(prog_or_ns, Program): - prog = prog_or_ns - ns = prog_or_ns["init_pid_ns"].address_of_() - else: - prog = prog_or_ns.prog_ - ns = prog_or_ns - if hasattr(ns, "idr"): - for nr, entry in idr_for_each(ns.idr): - yield cast("struct pid *", entry) - else: - pid_hash = prog["pid_hash"] - for i in range(1 << prog["pidhash_shift"].value_()): - for upid in hlist_for_each_entry( - "struct upid", pid_hash[i].address_of_(), "pid_chain" - ): - if upid.ns == ns: - yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") - - -def for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: - """ - Iterate over all of the tasks visible in a namespace. - - :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or - :class:`Program` to iterate over initial PID namespace. - :return: Iterator of ``struct task_struct *`` objects. - """ - if isinstance(prog_or_ns, Program): - prog = prog_or_ns - else: - prog = prog_or_ns.prog_ - PIDTYPE_PID = prog["PIDTYPE_PID"].value_() - for pid in for_each_pid(prog_or_ns): - task = pid_task(pid, PIDTYPE_PID) - if task: - yield task diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index 0339b0a08..090835fc4 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -9,54 +9,12 @@ radix trees from :linux:`include/linux/radix-tree.h`. """ -from typing import Iterator, Tuple - -from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup -from drgn import Object, cast +from _drgn import ( + _linux_helper_radix_tree_for_each as radix_tree_for_each, + _linux_helper_radix_tree_lookup as radix_tree_lookup, +) __all__ = ( "radix_tree_for_each", "radix_tree_lookup", ) - -_RADIX_TREE_ENTRY_MASK = 3 - - -def _is_internal_node(node: Object, internal_node: int) -> bool: - return (node.value_() & _RADIX_TREE_ENTRY_MASK) == internal_node - - -def _entry_to_node(node: Object, internal_node: int) -> Object: - return Object(node.prog_, node.type_, value=node.value_() & ~internal_node) - - -def _radix_tree_root_node(root: Object) -> Tuple[Object, int]: - try: - node = root.xa_head - except AttributeError: - return root.rnode.read_(), 1 - else: - return cast("struct xa_node *", node).read_(), 2 - - -def radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: - """ - Iterate over all of the entries in a radix tree. - - :param root: ``struct radix_tree_root *`` - :return: Iterator of (index, ``void *``) tuples. - """ - node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) - - def aux(node: Object, index: int) -> Iterator[Tuple[int, Object]]: - if _is_internal_node(node, RADIX_TREE_INTERNAL_NODE): - parent = _entry_to_node(node, RADIX_TREE_INTERNAL_NODE) - for i, slot in enumerate(parent.slots): - yield from aux( - cast(parent.type_, slot).read_(), - index + (i << parent.shift.value_()), - ) - elif node: - yield index, cast("void *", node) - - yield from aux(node, 0) diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 95d3a4ebb..72b8a6e2f 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -15,6 +15,8 @@ #include #include +#include "drgn.h" +#include "vector.h" struct drgn_object; struct drgn_program; @@ -43,4 +45,100 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, const struct drgn_object *ns, uint64_t pid); +/** + * Iterator convention: + * + * For all of the iterators defined below, the convention for each of the + * `*_next` functions is that upon returning, `*ret` will point to space + * allocated inside of `iter`. The caller is free to do what they wish with + * this return value, but should note that it will be overwritten the next time + * the `*_next` function is called. + */ + +DEFINE_VECTOR_TYPE(linux_helper_radix_tree_iter_frame_vector, + struct linux_helper_radix_tree_iter_frame) + +struct linux_helper_radix_tree_iter_entry { + uint64_t index; + struct drgn_object node; +}; + +struct linux_helper_radix_tree_iter { + bool started; + struct drgn_object root; + // Current value to be yielded + struct linux_helper_radix_tree_iter_entry entry; + // We need this for later initialization of `drgn_object`s + struct drgn_program *prog; + // Frames to keep track of generator state + struct linux_helper_radix_tree_iter_frame_vector frames; + // One-time setup values that are persistent + uint64_t RADIX_TREE_INTERNAL_NODE; + uint64_t RADIX_TREE_MAP_MASK; + struct drgn_qualified_type node_type; +}; + +struct drgn_error *linux_helper_radix_tree_iter_init(struct linux_helper_radix_tree_iter *iter, + const struct drgn_object *root); + +void linux_helper_radix_tree_iter_deinit(struct linux_helper_radix_tree_iter *iter); + +struct drgn_error *linux_helper_radix_tree_iter_next(struct linux_helper_radix_tree_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret); + +struct linux_helper_idr_iter { + struct linux_helper_radix_tree_iter iter; + uint64_t base; +}; + +struct drgn_error *linux_helper_idr_iter_init(struct linux_helper_idr_iter *iter, + const struct drgn_object *idr); + +void linux_helper_idr_iter_deinit(struct linux_helper_idr_iter *iter); + +struct drgn_error *linux_helper_idr_iter_next(struct linux_helper_idr_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret); + +struct linux_helper_pid_iter { + bool has_idr; + struct drgn_qualified_type pid_type; + union { + // if has_idr + struct linux_helper_idr_iter iter; + // else + struct { + struct drgn_qualified_type upid_type; + struct drgn_object pid_hash; + struct drgn_object pos; // a `struct hlist_node*` + struct drgn_object ns; + struct drgn_object entry; // Current value of the iterator + size_t index; // Current loop index + char member_specifier[sizeof("numbers[]") + 20]; + // 20 = maximum length of a uint64_t as a string + // Space for the null terminator is included as part of the sizeof on the string literal + }; + }; +}; + +struct drgn_error *linux_helper_pid_iter_init(struct linux_helper_pid_iter *iter, + const struct drgn_object *ns); + +void linux_helper_pid_iter_deinit(struct linux_helper_pid_iter *iter); + +struct drgn_error *linux_helper_pid_iter_next(struct linux_helper_pid_iter *iter, + struct drgn_object **ret); + +struct linux_helper_task_iter { + struct linux_helper_pid_iter iter; + uint64_t PIDTYPE_PID; +}; + +struct drgn_error *linux_helper_task_iter_init(struct linux_helper_task_iter *iter, + const struct drgn_object *ns); + +void linux_helper_task_iter_deinit(struct linux_helper_task_iter *iter); + +struct drgn_error *linux_helper_task_iter_next(struct linux_helper_task_iter *iter, + struct drgn_object **ret); + #endif /* DRGN_HELPERS_H */ diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 54bdc282a..3a2736e98 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -6,10 +6,13 @@ #include #include "drgn.h" +#include "helpers.h" #include "minmax.h" #include "platform.h" #include "program.h" +static const uint64_t RADIX_TREE_ENTRY_MASK = 3; + struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count) @@ -99,68 +102,79 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, return err; } -struct drgn_error * -linux_helper_radix_tree_lookup(struct drgn_object *res, - const struct drgn_object *root, uint64_t index) +static struct drgn_error * +radix_tree_init(struct drgn_program *prog, const struct drgn_object *root, + uint64_t *RADIX_TREE_INTERNAL_NODE_ret, + uint64_t *RADIX_TREE_MAP_MASK_ret, + struct drgn_qualified_type *node_type_ret, + struct drgn_object *node_ret) { - struct drgn_error *err; - static const uint64_t RADIX_TREE_ENTRY_MASK = 3; - uint64_t RADIX_TREE_INTERNAL_NODE; - uint64_t RADIX_TREE_MAP_MASK; - struct drgn_object node, tmp; - struct drgn_qualified_type node_type; - - drgn_object_init(&node, drgn_object_program(res)); - drgn_object_init(&tmp, drgn_object_program(res)); - + struct drgn_error *err = + drgn_object_member_dereference(node_ret, root, "xa_head"); /* node = root->xa_head */ - err = drgn_object_member_dereference(&node, root, "xa_head"); if (!err) { - err = drgn_program_find_type(drgn_object_program(res), - "struct xa_node *", NULL, - &node_type); + err = drgn_program_find_type(prog, "struct xa_node *", NULL, + node_type_ret); if (err) - goto out; - RADIX_TREE_INTERNAL_NODE = 2; + return err; + *RADIX_TREE_INTERNAL_NODE_ret = 2; } else if (err->code == DRGN_ERROR_LOOKUP) { drgn_error_destroy(err); /* node = (void *)root.rnode */ - err = drgn_object_member_dereference(&node, root, "rnode"); + err = drgn_object_member_dereference(node_ret, root, "rnode"); if (err) - goto out; - err = drgn_program_find_type(drgn_object_program(res), "void *", - NULL, &node_type); + return err; + err = drgn_program_find_type(prog, "void *", NULL, + node_type_ret); if (err) - goto out; - err = drgn_object_cast(&node, node_type, &node); + return err; + err = drgn_object_cast(node_ret, *node_type_ret, node_ret); if (err) - goto out; - err = drgn_program_find_type(drgn_object_program(res), - "struct radix_tree_node *", NULL, - &node_type); + return err; + err = drgn_program_find_type(prog, "struct radix_tree_node *", + NULL, node_type_ret); if (err) - goto out; - RADIX_TREE_INTERNAL_NODE = 1; + return err; + *RADIX_TREE_INTERNAL_NODE_ret = 1; } else { - goto out; + return err; } struct drgn_type_member *member; uint64_t member_bit_offset; - err = drgn_type_find_member(drgn_type_type(node_type.type).type, + err = drgn_type_find_member(drgn_type_type(node_type_ret->type).type, "slots", &member, &member_bit_offset); if (err) - goto out; + return err; struct drgn_qualified_type member_type; err = drgn_member_type(member, &member_type, NULL); + if (err) + return err; + if (drgn_type_kind(member_type.type) != DRGN_TYPE_ARRAY) + return drgn_error_create( + DRGN_ERROR_TYPE, + "struct radix_tree_node slots member is not an array"); + *RADIX_TREE_MAP_MASK_ret = drgn_type_length(member_type.type) - 1; + return NULL; +} + +struct drgn_error * +linux_helper_radix_tree_lookup(struct drgn_object *res, + const struct drgn_object *root, uint64_t index) +{ + struct drgn_error *err; + uint64_t RADIX_TREE_INTERNAL_NODE; + uint64_t RADIX_TREE_MAP_MASK; + struct drgn_object node, tmp; + struct drgn_qualified_type node_type; + + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); + err = radix_tree_init(drgn_object_program(root), root, + &RADIX_TREE_INTERNAL_NODE, &RADIX_TREE_MAP_MASK, + &node_type, &node); if (err) goto out; - if (drgn_type_kind(member_type.type) != DRGN_TYPE_ARRAY) { - err = drgn_error_create(DRGN_ERROR_TYPE, - "struct radix_tree_node slots member is not an array"); - goto out; - } - RADIX_TREE_MAP_MASK = drgn_type_length(member_type.type) - 1; for (;;) { uint64_t value; @@ -243,6 +257,36 @@ struct drgn_error *linux_helper_idr_find(struct drgn_object *res, return err; } +static struct drgn_error *pid_hash_init(struct drgn_program *prog, + const struct drgn_object *ns, + struct drgn_qualified_type *upid_type_ret, + uint64_t *pidhash_length_ret, uint64_t *ns_level_ret) +{ + struct drgn_error *err; + struct drgn_object ns_level, pidhash_shift; + drgn_object_init(&ns_level, prog); + drgn_object_init(&pidhash_shift, prog); + err = drgn_program_find_type(prog, "struct upid", NULL, upid_type_ret); + if (err) + goto out; + err = drgn_program_find_object(prog, "pidhash_shift", NULL, DRGN_FIND_OBJECT_ANY, + &pidhash_shift); + if (err) + goto out; + err = drgn_object_read_unsigned(&pidhash_shift, pidhash_length_ret); + if (err) + goto out; + // *pidhash_length_ret = 1 << pidhash_shift + *pidhash_length_ret = *pidhash_length_ret >= 64 ? 0 : UINT64_C(1) << *pidhash_length_ret; + err = drgn_object_member_dereference(&ns_level, ns, "level"); + if (err) + goto out; + err = drgn_object_read_unsigned(&ns_level, ns_level_ret); +out: + drgn_object_deinit(&ns_level); + return err; +} + /* * Before Linux kernel commit 95846ecf9dac ("pid: replace pid bitmap * implementation with IDR API") (in v4.15), (struct pid_namespace).idr does not @@ -257,15 +301,27 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, { struct drgn_error *err; + struct drgn_object node, tmp; + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); + + err = drgn_object_read(&tmp, ns); + if (err) + goto out; + struct drgn_qualified_type upid_type; + uint64_t i, ns_level; + err = pid_hash_init(drgn_object_program(res), &tmp, &upid_type, &i, + &ns_level); + if (err) + goto out; struct drgn_qualified_type pidp_type; - err = drgn_program_find_type(drgn_object_program(res), "struct pid *", - NULL, &pidp_type); + err = drgn_program_find_type(drgn_object_program(res), "struct pid *", NULL, + &pidp_type); if (err) return err; - struct drgn_qualified_type upid_type; - err = drgn_program_find_type(drgn_object_program(res), "struct upid", - NULL, &upid_type); + uint64_t ns_addr; + err = drgn_object_read_unsigned(&tmp, &ns_addr); if (err) return err; @@ -298,40 +354,6 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, if (err) return err; - struct drgn_object node, tmp; - drgn_object_init(&node, drgn_object_program(res)); - drgn_object_init(&tmp, drgn_object_program(res)); - - err = drgn_object_read(&tmp, ns); - if (err) - goto out; - uint64_t ns_addr; - err = drgn_object_read_unsigned(&tmp, &ns_addr); - if (err) - goto out; - union drgn_value ns_level; - err = drgn_object_member_dereference(&tmp, &tmp, "level"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &ns_level); - if (err) - goto out; - - /* i = 1 << pidhash_shift */ - err = drgn_program_find_object(drgn_object_program(res), - "pidhash_shift", NULL, - DRGN_FIND_OBJECT_ANY, &tmp); - if (err) - goto out; - union drgn_value pidhash_shift; - err = drgn_object_read_integer(&tmp, &pidhash_shift); - if (err) - goto out; - uint64_t i; - if (pidhash_shift.uvalue >= 64) - i = 0; - else - i = UINT64_C(1) << pidhash_shift.uvalue; while (i--) { /* for (node = pid_hash[i].first; node; node = node->next) */ err = drgn_object_subscript(&node, pid_hash, i); @@ -382,7 +404,7 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, goto next; sprintf(member, "numbers[%" PRIu64 "].pid_chain", - ns_level.uvalue); + ns_level); err = drgn_object_container_of(res, &node, drgn_type_type(pidp_type.type), member); @@ -533,3 +555,381 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, drgn_object_deinit(&pid_obj); return err; } + +struct linux_helper_radix_tree_iter_frame { + struct drgn_object slots; + uint64_t index; + uint64_t shift; + uint64_t next_slot; +}; + +DEFINE_VECTOR_FUNCTIONS(linux_helper_radix_tree_iter_frame_vector) + +struct drgn_error *linux_helper_radix_tree_iter_init(struct linux_helper_radix_tree_iter *iter, + const struct drgn_object *root) +{ + struct drgn_program *prog = drgn_object_program(root); + iter->started = false; + drgn_object_init(&iter->root, prog); + drgn_object_init(&iter->entry.node, prog); + iter->entry.index = 0; + iter->prog = prog; + + struct drgn_error *err = + radix_tree_init(prog, root, &iter->RADIX_TREE_INTERNAL_NODE, + &iter->RADIX_TREE_MAP_MASK, &iter->node_type, &iter->root); + + if (err) { + drgn_object_deinit(&iter->root); + drgn_object_deinit(&iter->entry.node); + return err; + } + + linux_helper_radix_tree_iter_frame_vector_init(&iter->frames); + return NULL; +} + +void linux_helper_radix_tree_iter_deinit(struct linux_helper_radix_tree_iter *iter) +{ + drgn_object_deinit(&iter->root); + drgn_object_deinit(&iter->entry.node); + while (iter->frames.size) { + drgn_object_deinit( + &linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames)->slots); + } + linux_helper_radix_tree_iter_frame_vector_deinit(&iter->frames); +} + +static struct drgn_error *radix_tree_iter_handle_node(struct linux_helper_radix_tree_iter *iter, + struct drgn_object *_node, uint64_t index, + bool *entry_populated_ret) +{ + struct drgn_object *node = &iter->entry.node; + struct drgn_error *err; + uint64_t value; + + err = drgn_object_read(node, _node); + if (err) + return err; + err = drgn_object_read_unsigned(node, &value); + if (err) + return err; + if ((value & RADIX_TREE_ENTRY_MASK) != iter->RADIX_TREE_INTERNAL_NODE) { + // Base-case, node is NOT internal + if (value) { + *entry_populated_ret = true; + iter->entry.index = index; + } + return NULL; + } + + *entry_populated_ret = false; + + // We are dealing with an internal node, and must iterate over its slots + + err = drgn_object_set_unsigned(node, iter->node_type, + value & ~iter->RADIX_TREE_INTERNAL_NODE, 0); + if (err) + return err; + struct linux_helper_radix_tree_iter_frame *frame = + linux_helper_radix_tree_iter_frame_vector_append_entry(&iter->frames); + if (!frame) + return &drgn_enomem; + frame->index = index; + frame->next_slot = 0; + drgn_object_init(&frame->slots, iter->prog); + // We temporarily use `frame->slots` to hold `shift` in order to avoid + // using another `struct drgn_object`. + err = drgn_object_member_dereference(&frame->slots, node, "shift"); + if (err) + goto err_frame; + err = drgn_object_read_unsigned(&frame->slots, &frame->shift); + if (err) + goto err_frame; + // Now `frame->slots` is actually used for `slots`. + err = drgn_object_member_dereference(&frame->slots, node, "slots"); + if (err) + goto err_frame; + return NULL; + +err_frame: + drgn_object_deinit(&frame->slots); + linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames); + return err; +} + +struct drgn_error *linux_helper_radix_tree_iter_next(struct linux_helper_radix_tree_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret) +{ + bool entry_populated = false; + struct drgn_error *err = NULL; + struct drgn_object node; + drgn_object_init(&node, iter->prog); + if (!iter->started) { + iter->started = true; + err = radix_tree_iter_handle_node(iter, &iter->root, 0, &entry_populated); + } + + while (!err && !entry_populated && iter->frames.size) { + struct linux_helper_radix_tree_iter_frame *frame = + &iter->frames.data[iter->frames.size - 1]; + if (frame->next_slot <= iter->RADIX_TREE_MAP_MASK) { + err = drgn_object_subscript(&node, &frame->slots, frame->next_slot); + if (!err) + err = radix_tree_iter_handle_node(iter, &node, + frame->index + (frame->next_slot++ + << frame->shift), + &entry_populated); + } else { + drgn_object_deinit(&frame->slots); + linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames); + } + } + if (!err) + *ret = entry_populated ? &iter->entry : NULL; + drgn_object_deinit(&node); + return err; +} + +struct drgn_error *linux_helper_idr_iter_init(struct linux_helper_idr_iter *iter, + const struct drgn_object *idr) +{ + struct drgn_error *err; + struct drgn_object idr_rt, idr_base; + drgn_object_init(&idr_rt, drgn_object_program(idr)); + drgn_object_init(&idr_base, drgn_object_program(idr)); + + err = drgn_object_member(&idr_base, idr, "idr_base"); + if (!err) { + err = drgn_object_read_unsigned(&idr_base, &iter->base); + if (err) + goto out; + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + iter->base = 0; + } else { + goto out; + } + + err = drgn_object_member(&idr_rt, idr, "idr_rt"); + if (err) + goto out; + err = drgn_object_address_of(&idr_rt, &idr_rt); + if (err) + goto out; + err = linux_helper_radix_tree_iter_init(&iter->iter, &idr_rt); +out: + drgn_object_deinit(&idr_rt); + drgn_object_deinit(&idr_base); + return err; +} + +void linux_helper_idr_iter_deinit(struct linux_helper_idr_iter *iter) +{ + linux_helper_radix_tree_iter_deinit(&iter->iter); +} + +struct drgn_error *linux_helper_idr_iter_next(struct linux_helper_idr_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret) +{ + struct drgn_error *err = linux_helper_radix_tree_iter_next(&iter->iter, ret); + if (!err && *ret) + (*ret)->index += iter->base; + return err; +} + +// See `find_pid_in_pid_hash` +static struct drgn_error *pid_iter_init_pid_hash(struct drgn_program *prog, + const struct drgn_object *ns, + struct linux_helper_pid_iter *iter) +{ + struct drgn_error *err; + drgn_object_init(&iter->pid_hash, prog); + drgn_object_init(&iter->pos, prog); + drgn_object_init(&iter->ns, prog); + drgn_object_init(&iter->entry, prog); + err = drgn_program_find_object(prog, "pid_hash", NULL, DRGN_FIND_OBJECT_VARIABLE, + &iter->pid_hash); + if (err) + goto out; + struct drgn_qualified_type void_star_type; + err = drgn_program_find_type(prog, "void *", NULL, &void_star_type); + if (err) + goto out; + err = drgn_object_set_unsigned(&iter->pos, void_star_type, 0, 0); + if (err) + goto out; + err = drgn_object_copy(&iter->ns, ns); + if (err) + goto out; + uint64_t ns_level; + err = pid_hash_init(prog, ns, &iter->upid_type, &iter->index, &ns_level); + if (err) + goto out; + snprintf(iter->member_specifier, sizeof(iter->member_specifier), "numbers[%" PRIu64 "]", + ns_level); + err = drgn_program_find_type(prog, "struct pid", NULL, &iter->pid_type); + if (err) + goto out; + err = drgn_program_find_type(prog, "struct upid", NULL, &iter->upid_type); + if (err) + goto out; +out: + if (err) { + drgn_object_deinit(&iter->pid_hash); + drgn_object_deinit(&iter->pos); + drgn_object_deinit(&iter->ns); + drgn_object_deinit(&iter->entry); + } + return err; +} + +struct drgn_error *linux_helper_pid_iter_init(struct linux_helper_pid_iter *iter, + const struct drgn_object *ns) +{ + struct drgn_program *prog = drgn_object_program(ns); + struct drgn_error *err; + struct drgn_object idr; + drgn_object_init(&idr, prog); + + err = drgn_object_member_dereference(&idr, ns, "idr"); + if (!err) { + iter->has_idr = true; + err = drgn_program_find_type(prog, "struct pid *", NULL, &iter->pid_type); + if (!err) + err = linux_helper_idr_iter_init(&iter->iter, &idr); + } else if (err->code == DRGN_ERROR_LOOKUP) { + iter->has_idr = false; + drgn_error_destroy(err); + err = pid_iter_init_pid_hash(prog, ns, iter); + } + + drgn_object_deinit(&idr); + return err; +} + +void linux_helper_pid_iter_deinit(struct linux_helper_pid_iter *iter) +{ + if (iter->has_idr) { + linux_helper_idr_iter_deinit(&iter->iter); + } else { + drgn_object_deinit(&iter->pid_hash); + drgn_object_deinit(&iter->pos); + drgn_object_deinit(&iter->ns); + drgn_object_deinit(&iter->entry); + } +} + +struct drgn_error *linux_helper_pid_iter_next(struct linux_helper_pid_iter *iter, + struct drgn_object **ret) +{ + if (iter->has_idr) { + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_idr_iter_next(&iter->iter, &entry); + if (err) + return err; + if (!entry) { + *ret = NULL; + return NULL; + } + err = drgn_object_cast(&entry->node, iter->pid_type, &entry->node); + if (!err) + *ret = &entry->node; + return err; + } + + struct drgn_error *err = NULL; + struct drgn_object upid, upid_ns; + drgn_object_init(&upid, drgn_object_program(&iter->ns)); + drgn_object_init(&upid_ns, drgn_object_program(&iter->ns)); + + for (;;) { + for (;;) { + bool is_truthy; + err = drgn_object_bool(&iter->pos, &is_truthy); + if (err) + goto out; + if (is_truthy) + break; + if (iter->index == 0) { + *ret = NULL; + goto out; + } + err = drgn_object_subscript(&iter->pos, &iter->pid_hash, --iter->index); + if (err) + goto out; + err = drgn_object_member(&iter->pos, &iter->pos, "first"); + if (err) + goto out; + err = drgn_object_bool(&iter->pos, &is_truthy); + if (err) + goto out; + } + err = drgn_object_container_of(&upid, &iter->pos, iter->upid_type, "pid_chain"); + if (err) + goto out; + err = drgn_object_member_dereference(&iter->pos, &iter->pos, "next"); + if (err) + goto out; + err = drgn_object_member_dereference(&upid_ns, &upid, "ns"); + if (err) + goto out; + int ns_cmp_result; + err = drgn_object_cmp(&upid_ns, &iter->ns, &ns_cmp_result); + if (err) + goto out; + if (ns_cmp_result == 0) { + err = drgn_object_container_of(&iter->entry, &upid, iter->pid_type, + iter->member_specifier); + if (!err) + *ret = &iter->entry; + goto out; + } + } + +out: + drgn_object_deinit(&upid); + drgn_object_deinit(&upid_ns); + return err; +} + +struct drgn_error *linux_helper_task_iter_init(struct linux_helper_task_iter *iter, + const struct drgn_object *ns) +{ + struct drgn_program *prog = drgn_object_program(ns); + struct drgn_error *err = linux_helper_pid_iter_init(&iter->iter, ns); + if (err) + return err; + struct drgn_object PIDTYPE_PID; + drgn_object_init(&PIDTYPE_PID, prog); + err = drgn_program_find_object(prog, "PIDTYPE_PID", NULL, DRGN_FIND_OBJECT_CONSTANT, + &PIDTYPE_PID); + if (!err) + err = drgn_object_read_unsigned(&PIDTYPE_PID, &iter->PIDTYPE_PID); + if (err) + linux_helper_pid_iter_deinit(&iter->iter); + drgn_object_deinit(&PIDTYPE_PID); + return err; +} + +struct drgn_error *linux_helper_task_iter_next(struct linux_helper_task_iter *iter, + struct drgn_object **ret) +{ + struct drgn_error *err; + bool value_is_truthy; + do { + err = linux_helper_pid_iter_next(&iter->iter, ret); + if (err || !*ret) + return err; + err = linux_helper_pid_task(*ret, *ret, iter->PIDTYPE_PID); + if (err) + return err; + err = drgn_object_bool(*ret, &value_is_truthy); + } while (!err && !value_is_truthy); + return err; +} + +void linux_helper_task_iter_deinit(struct linux_helper_task_iter *iter) +{ + linux_helper_pid_iter_deinit(&iter->iter); +} diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index e5aa04bd5..e4ffba927 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -97,6 +97,14 @@ typedef struct { struct pyobjectp_set objects; } Program; +typedef struct _GenericIterator { + PyObject_HEAD + Program *prog; + void *iter; + PyObject *(*next)(struct _GenericIterator *); + void (*iter_deinit)(void *); +} GenericIterator; + typedef struct { PyObject_HEAD const struct drgn_register *reg; @@ -167,6 +175,7 @@ extern PyObject *TypeKind_class; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; extern PyTypeObject FaultError_type; +extern PyTypeObject GenericIterator_type; extern PyTypeObject Language_type; extern PyTypeObject ObjectIterator_type; extern PyTypeObject Platform_type; @@ -297,5 +306,17 @@ PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *args, PyObject *kwds); PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *args, PyObject *kwds); +GenericIterator *drgnpy_linux_helper_for_each_task(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_for_each_pid(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_idr_for_each(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_radix_tree_for_each(PyObject *self, + PyObject *args, + PyObject *kwds); #endif /* DRGNPY_H */ diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 2f16a20d5..d4444f1d2 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -249,3 +249,251 @@ PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *args, return PyErr_Format(PyExc_ValueError, "not Linux kernel"); Py_RETURN_BOOL(prog->prog.vmcoreinfo.pgtable_l5_enabled); } + +static void GenericIterator_dealloc(GenericIterator *self) +{ + if (self->iter) { + self->iter_deinit(self->iter); + free(self->iter); + } + Py_XDECREF(self->prog); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *GenericIterator_next(GenericIterator *self) +{ + return self->next(self); +} + +PyTypeObject GenericIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._GenericIterator", + .tp_basicsize = sizeof(GenericIterator), + .tp_dealloc = (destructor)GenericIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)GenericIterator_next, +}; + +static PyObject *for_each_task_next(GenericIterator *self) +{ + struct drgn_error *err; + struct drgn_object *entry; + err = linux_helper_task_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + DrgnObject *ret = DrgnObject_alloc(self->prog); + if (!ret) + return NULL; + err = drgn_object_copy(&ret->obj, entry); + if (err) { + Py_DECREF(ret); + return set_drgn_error(err); + } + return (PyObject *)ret; +} + +GenericIterator *drgnpy_linux_helper_for_each_task(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"prog_or_ns", NULL}; + struct drgn_error *err = NULL; + struct prog_or_ns_arg prog_or_ns; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:for_each_task", + keywords, &prog_or_pid_ns_converter, + &prog_or_ns)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + goto out; + iterator->prog = prog_or_ns.prog; + Py_INCREF(iterator->prog); + iterator->next = for_each_task_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_task_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_task_iter)); + if (!iterator->iter) { + PyErr_NoMemory(); + Py_DECREF(iterator); + iterator = NULL; + goto out; + } + err = linux_helper_task_iter_init(iterator->iter, prog_or_ns.ns); + if (err) { + set_drgn_error(err); + Py_DECREF(iterator); + iterator = NULL; + } +out: + prog_or_ns_cleanup(&prog_or_ns); + return iterator; +} + +static PyObject *for_each_pid_next(GenericIterator *self) +{ + struct drgn_error *err; + struct drgn_object *entry; + err = linux_helper_pid_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + DrgnObject *ret = DrgnObject_alloc(self->prog); + if (!ret) + return NULL; + err = drgn_object_copy(&ret->obj, entry); + if (err) { + Py_DECREF(ret); + return set_drgn_error(err); + } + return (PyObject *)ret; +} + +GenericIterator * +drgnpy_linux_helper_for_each_pid(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"prog_or_ns", NULL}; + struct drgn_error *err = NULL; + struct prog_or_ns_arg prog_or_ns; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:for_each_pid", + keywords, &prog_or_pid_ns_converter, + &prog_or_ns)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + goto out; + iterator->prog = prog_or_ns.prog; + Py_INCREF(iterator->prog); + iterator->next = for_each_pid_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_pid_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_pid_iter)); + if (!iterator->iter) { + PyErr_NoMemory(); + Py_DECREF(iterator); + iterator = NULL; + goto out; + } + err = linux_helper_pid_iter_init(iterator->iter, prog_or_ns.ns); + if (err) { + set_drgn_error(err); + Py_DECREF(iterator); + iterator = NULL; + } +out: + prog_or_ns_cleanup(&prog_or_ns); + return iterator; +} + +static PyObject *idr_iter_entry_wrap(struct linux_helper_radix_tree_iter_entry *entry, + Program *prog) +{ + DrgnObject *node = DrgnObject_alloc(prog); + if (!node) + return NULL; + struct drgn_error *err = drgn_object_copy(&node->obj, &entry->node); + if (err) { + Py_DECREF(node); + return set_drgn_error(err); + } + PyObject *ret = + Py_BuildValue("KO", (unsigned long long)entry->index, node); + Py_DECREF(node); + return ret; +} + +static PyObject *idr_for_each_next(GenericIterator *self) +{ + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_idr_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + return idr_iter_entry_wrap(entry, self->prog); +} + +GenericIterator * +drgnpy_linux_helper_idr_for_each(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"idr", NULL}; + struct drgn_error *err; + DrgnObject *idr; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:idr_for_each", + keywords, &DrgnObject_type, &idr)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + return NULL; + iterator->prog = DrgnObject_prog(idr); + Py_INCREF(iterator->prog); + iterator->next = idr_for_each_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_idr_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_idr_iter)); + if (!iterator->iter) { + Py_DECREF(iterator); + return (GenericIterator *)PyErr_NoMemory(); + } + err = linux_helper_idr_iter_init(iterator->iter, &idr->obj); + if (err) { + Py_DECREF(iterator); + return set_drgn_error(err); + } + return iterator; +} + +static PyObject *radix_tree_for_each_next(GenericIterator *self) +{ + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_radix_tree_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + return idr_iter_entry_wrap(entry, self->prog); +} + +GenericIterator *drgnpy_linux_helper_radix_tree_for_each(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"root", NULL}; + struct drgn_error *err; + DrgnObject *root; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:radix_tree_for_each", + keywords, &DrgnObject_type, &root)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + return NULL; + iterator->prog = DrgnObject_prog(root); + Py_INCREF(iterator->prog); + iterator->next = radix_tree_for_each_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_radix_tree_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_radix_tree_iter)); + if (!iterator->iter) { + Py_DECREF(iterator); + return (GenericIterator *)PyErr_NoMemory(); + } + err = linux_helper_radix_tree_iter_init(iterator->iter, &root->obj); + if (err) { + Py_DECREF(iterator); + return set_drgn_error(err); + } + return iterator; +} diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 7fd1d2c3a..79128bf1f 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -141,6 +141,18 @@ static PyMethodDef drgn_methods[] = { {"_linux_helper_pgtable_l5_enabled", (PyCFunction)drgnpy_linux_helper_pgtable_l5_enabled, METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_for_each_task", + (PyCFunction)drgnpy_linux_helper_for_each_task, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_for_each_task_DOC}, + {"_linux_helper_for_each_pid", + (PyCFunction)drgnpy_linux_helper_for_each_pid, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_for_each_pid_DOC}, + {"_linux_helper_idr_for_each", + (PyCFunction)drgnpy_linux_helper_idr_for_each, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_idr_for_each_DOC}, + {"_linux_helper_radix_tree_for_each", + (PyCFunction)drgnpy_linux_helper_radix_tree_for_each, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_radix_tree_for_each_DOC}, {}, }; @@ -230,6 +242,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) add_type(m, &StackTrace_type) || add_type(m, &Symbol_type) || add_type(m, &DrgnType_type) || + add_type(m, &GenericIterator_type) || add_type(m, &TypeEnumerator_type) || add_type(m, &TypeMember_type) || add_type(m, &TypeParameter_type) || diff --git a/tests/helpers/linux/test_pid.py b/tests/helpers/linux/test_pid.py index cc2056b5e..92664346b 100644 --- a/tests/helpers/linux/test_pid.py +++ b/tests/helpers/linux/test_pid.py @@ -1,6 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +from multiprocessing import Barrier, Process import os from drgn.helpers.linux.pid import find_pid, find_task, for_each_pid, for_each_task @@ -30,5 +31,23 @@ def test_find_task(self): self.assertEqual(task.comm.string_(), comm) def test_for_each_task(self): - pid = os.getpid() - self.assertTrue(any(task.pid == pid for task in for_each_task(self.prog))) + NUM_PROCS = 12 + barrier = Barrier(NUM_PROCS + 1) + + def proc_func(): + barrier.wait() + + try: + procs = [Process(target=proc_func) for _ in range(NUM_PROCS)] + for proc in procs: + proc.start() + pids = {task.pid.value_() for task in for_each_task(self.prog)} + for proc in procs: + self.assertIn(proc.pid, pids) + self.assertIn(os.getpid(), pids) + barrier.wait() + except: + barrier.abort() + for proc in procs: + proc.terminate() + raise From 6732148a11e5362f624139fe967349a1cc5e0601 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 17 Dec 2021 15:30:43 -0800 Subject: [PATCH 126/139] tests: use NOBITS section for ELF symbols Currently, we create a section filled with zeroes to contain the symbols in our ELF symbol tests. We can just use a NOBITS section with no file data instead. Signed-off-by: Omar Sandoval --- tests/elfwriter.py | 10 +++------- tests/test_symbol.py | 3 +-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index c9891a2ee..32e9ef1b0 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -10,7 +10,7 @@ class ElfSection: def __init__( self, - data: bytes, + data: bytes = b"", name: Optional[str] = None, sh_type: Optional[SHT] = None, p_type: Optional[PT] = None, @@ -28,7 +28,7 @@ def __init__( self.p_type = p_type self.vaddr = vaddr self.paddr = paddr - self.memsz = memsz + self.memsz = len(self.data) if memsz is None else memsz self.p_align = p_align self.sh_link = sh_link self.sh_info = sh_info @@ -36,10 +36,6 @@ def __init__( assert (self.name is not None) or (self.p_type is not None) assert (self.name is None) == (self.sh_type is None) - if self.p_type is None: - assert self.memsz is None - elif self.memsz is None: - self.memsz = len(self.data) class ElfSymbol(NamedTuple): @@ -205,7 +201,7 @@ def create_elf_file( 0, # sh_flags section.vaddr, # sh_addr len(buf), # sh_offset - len(section.data), # sh_size + section.memsz, # sh_size section.sh_link, # sh_link section.sh_info, # sh_info 1 if section.p_type is None else bits // 8, # sh_addralign diff --git a/tests/test_symbol.py b/tests/test_symbol.py index d980fa644..abd439e59 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -21,11 +21,10 @@ def create_elf_symbol_file(symbols): sections.append( ElfSection( name=".foo", - sh_type=SHT.PROGBITS, + sh_type=SHT.NOBITS, p_type=PT.LOAD, vaddr=min_address, memsz=max_address - min_address, - data=bytes(max_address - min_address), ) ) symbols = [ From 92f25e297455aac6c878ca10350424f461ded41c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 14:34:45 -0800 Subject: [PATCH 127/139] vmtest: enable logging when running vmtest.vm CLI Specifically, we want logs from vmtest.download. Signed-off-by: Omar Sandoval --- vmtest/vm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vmtest/vm.py b/vmtest/vm.py index 8b40dccbf..7928a87f9 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -279,6 +279,11 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: if __name__ == "__main__": import argparse + import logging + + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) parser = argparse.ArgumentParser( description="run vmtest virtual machine", From b916e6905b57f7b546dc4b8eddc6920270749e67 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 14:39:50 -0800 Subject: [PATCH 128/139] libdrgn: linux: translate per_cpu_ptr() helper to C The next change will add a C helper that needs per_cpu_ptr(). Signed-off-by: Omar Sandoval --- _drgn.pyi | 16 ++++++++++++++++ drgn/helpers/linux/percpu.py | 19 +------------------ libdrgn/helpers.h | 4 ++++ libdrgn/linux_kernel_helpers.c | 33 +++++++++++++++++++++++++++++++++ libdrgn/python/drgnpy.h | 2 ++ libdrgn/python/helpers.c | 24 ++++++++++++++++++++++++ libdrgn/python/module.c | 3 +++ 7 files changed, 83 insertions(+), 18 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 86e356b88..92fd8f162 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2164,6 +2164,22 @@ def _linux_helper_radix_tree_lookup(root: Object, index: IntegerLike) -> Object: """ ... +def _linux_helper_per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: + """ + Return the per-CPU pointer for a given CPU. + + >>> prog["init_net"].loopback_dev.pcpu_refcnt + (int *)0x2c980 + >>> per_cpu_ptr(prog["init_net"].loopback_dev.pcpu_refcnt, 7) + *(int *)0xffff925e3ddec980 = 4 + + :param ptr: Per-CPU pointer, i.e., ``type __percpu *``. For global + variables, it's usually easier to use :func:`per_cpu()`. + :param cpu: CPU number. + :return: ``type *`` object. + """ + ... + def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: """ Look up the entry with the given ID in an IDR. diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index a1e809939..3143533df 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -10,6 +10,7 @@ from :linux:`include/linux/percpu_counter.h`. """ +from _drgn import _linux_helper_per_cpu_ptr as per_cpu_ptr from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu @@ -37,24 +38,6 @@ def per_cpu(var: Object, cpu: IntegerLike) -> Object: return per_cpu_ptr(var.address_of_(), cpu)[0] -def per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: - """ - Return the per-CPU pointer for a given CPU. - - >>> prog["init_net"].loopback_dev.pcpu_refcnt - (int *)0x2c980 - >>> per_cpu_ptr(prog["init_net"].loopback_dev.pcpu_refcnt, 7) - *(int *)0xffff925e3ddec980 = 4 - - :param ptr: Per-CPU pointer, i.e., ``type __percpu *``. For global - variables, it's usually easier to use :func:`per_cpu()`. - :param cpu: CPU number. - :return: ``type *`` object. - """ - offset = ptr.prog_["__per_cpu_offset"][cpu].value_() - return Object(ptr.prog_, ptr.type_, value=ptr.value_() + offset) - - def percpu_counter_sum(fbc: Object) -> int: """ Return the sum of a per-CPU counter. diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 72b8a6e2f..2ff0b6f6e 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -25,6 +25,10 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count); +struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, + const struct drgn_object *ptr, + uint64_t cpu); + struct drgn_error * linux_helper_radix_tree_lookup(struct drgn_object *res, const struct drgn_object *root, uint64_t index); diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 3a2736e98..e38a68b44 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -102,6 +102,39 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, return err; } +struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, + const struct drgn_object *ptr, + uint64_t cpu) +{ + struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(ptr); + + struct drgn_object tmp; + drgn_object_init(&tmp, prog); + err = drgn_program_find_object(prog, "__per_cpu_offset", NULL, + DRGN_FIND_OBJECT_ANY, &tmp); + if (err) + goto out; + err = drgn_object_subscript(&tmp, &tmp, cpu); + if (err) + goto out; + union drgn_value per_cpu_offset; + err = drgn_object_read_integer(&tmp, &per_cpu_offset); + if (err) + goto out; + + uint64_t ptr_value; + err = drgn_object_read_unsigned(ptr, &ptr_value); + if (err) + goto out; + + err = drgn_object_set_unsigned(res, drgn_object_qualified_type(ptr), + ptr_value + per_cpu_offset.uvalue, 0); +out: + drgn_object_deinit(&tmp); + return err; +} + static struct drgn_error * radix_tree_init(struct drgn_program *prog, const struct drgn_object *root, uint64_t *RADIX_TREE_INTERNAL_NODE_ret, diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index e4ffba927..92434bf84 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -291,6 +291,8 @@ int enum_converter(PyObject *o, void *p); PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds); +DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, + PyObject *kwds); DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index d4444f1d2..c11d3d6a2 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -38,6 +38,30 @@ PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, return buf; } +DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"ptr", "cpu", NULL}; + struct drgn_error *err; + DrgnObject *ptr; + struct index_arg cpu = {}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:per_cpu_ptr", + keywords, &DrgnObject_type, &ptr, + index_converter, &cpu)) + return NULL; + + DrgnObject *res = DrgnObject_alloc(DrgnObject_prog(ptr)); + if (!res) + return NULL; + err = linux_helper_per_cpu_ptr(&res->obj, &ptr->obj, cpu.uvalue); + if (err) { + Py_DECREF(res); + return set_drgn_error(err); + } + return res; +} + DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds) diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 79128bf1f..f5184d9e1 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -124,6 +124,9 @@ static PyMethodDef drgn_methods[] = { METH_VARARGS | METH_KEYWORDS, drgn_program_from_pid_DOC}, {"_linux_helper_read_vm", (PyCFunction)drgnpy_linux_helper_read_vm, METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_per_cpu_ptr", + (PyCFunction)drgnpy_linux_helper_per_cpu_ptr, + METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", (PyCFunction)drgnpy_linux_helper_radix_tree_lookup, METH_VARARGS | METH_KEYWORDS}, From adfb04579b618b6838d0cfc6ea81293f7aee10a0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 14:40:57 -0800 Subject: [PATCH 129/139] libdrgn: linux: add idle_thread() helper PR #129 will need to get the idle thread for a CPU when the idle thread crashed. Add a helper for this. Signed-off-by: Omar Sandoval --- _drgn.pyi | 12 ++++++++++++ drgn/helpers/linux/sched.py | 6 +++++- libdrgn/helpers.h | 3 +++ libdrgn/linux_kernel_helpers.c | 32 +++++++++++++++++++++++++++++++ libdrgn/python/drgnpy.h | 2 ++ libdrgn/python/helpers.c | 24 +++++++++++++++++++++++ libdrgn/python/module.c | 3 +++ tests/helpers/linux/test_sched.py | 15 ++++++++++++++- 8 files changed, 95 insertions(+), 2 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 92fd8f162..8b086d325 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2180,6 +2180,18 @@ def _linux_helper_per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: """ ... +def _linux_helper_idle_thread(prog: Program, cpu: IntegerLike) -> Object: + """ + Return the idle thread (PID 0, a.k.a swapper) for the given CPU. + + >>> idle_thread(prog, 1).comm + (char [16])"swapper/1" + + :param cpu: CPU number. + :return: ``struct task_struct *`` + """ + ... + def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: """ Look up the entry with the given ID in an IDR. diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index afeb84865..b43bc1243 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -9,9 +9,13 @@ Linux CPU scheduler. """ +from _drgn import _linux_helper_idle_thread as idle_thread from drgn import Object -__all__ = ("task_state_to_char",) +__all__ = ( + "idle_thread", + "task_state_to_char", +) _TASK_NOLOAD = 0x400 diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 2ff0b6f6e..0e10d57e1 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -29,6 +29,9 @@ struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, const struct drgn_object *ptr, uint64_t cpu); +struct drgn_error *linux_helper_idle_thread(struct drgn_object *res, + uint64_t cpu); + struct drgn_error * linux_helper_radix_tree_lookup(struct drgn_object *res, const struct drgn_object *root, uint64_t index); diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index e38a68b44..842ab3122 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -135,6 +135,38 @@ struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, return err; } +struct drgn_error *linux_helper_idle_thread(struct drgn_object *res, + uint64_t cpu) +{ + struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(res); + + struct drgn_object tmp; + drgn_object_init(&tmp, prog); + err = drgn_program_find_object(prog, "idle_threads", NULL, + DRGN_FIND_OBJECT_ANY, &tmp); + if (!err) { + err = drgn_object_address_of(&tmp, &tmp); + if (err) + goto out; + err = linux_helper_per_cpu_ptr(&tmp, &tmp, cpu); + if (err) + goto out; + err = drgn_object_dereference(res, &tmp); + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + err = drgn_program_find_object(prog, "init_task", NULL, + DRGN_FIND_OBJECT_ANY, &tmp); + if (err) + goto out; + err = drgn_object_address_of(res, &tmp); + } + +out: + drgn_object_deinit(&tmp); + return err; +} + static struct drgn_error * radix_tree_init(struct drgn_program *prog, const struct drgn_object *root, uint64_t *RADIX_TREE_INTERNAL_NODE_ret, diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 92434bf84..42231a15a 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -293,6 +293,8 @@ PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, PyObject *kwds); +DrgnObject *drgnpy_linux_helper_idle_thread(PyObject *self, PyObject *args, + PyObject *kwds); DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index c11d3d6a2..4710e0166 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -62,6 +62,30 @@ DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, return res; } +DrgnObject *drgnpy_linux_helper_idle_thread(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"prog", "cpu", NULL}; + struct drgn_error *err; + Program *prog; + struct index_arg cpu = {}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:idle_thread", + keywords, &Program_type, &prog, + index_converter, &cpu)) + return NULL; + + DrgnObject *res = DrgnObject_alloc(prog); + if (!res) + return NULL; + err = linux_helper_idle_thread(&res->obj, cpu.uvalue); + if (err) { + Py_DECREF(res); + return set_drgn_error(err); + } + return res; +} + DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds) diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index f5184d9e1..10e00f364 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -127,6 +127,9 @@ static PyMethodDef drgn_methods[] = { {"_linux_helper_per_cpu_ptr", (PyCFunction)drgnpy_linux_helper_per_cpu_ptr, METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_idle_thread", + (PyCFunction)drgnpy_linux_helper_idle_thread, + METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", (PyCFunction)drgnpy_linux_helper_radix_tree_lookup, METH_VARARGS | METH_KEYWORDS}, diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index fd5232d3f..77d733510 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -4,8 +4,9 @@ import os import signal +from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.pid import find_task -from drgn.helpers.linux.sched import task_state_to_char +from drgn.helpers.linux.sched import idle_thread, task_state_to_char from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, @@ -34,3 +35,15 @@ def test_task_state_to_char(self): self.assertEqual(task_state_to_char(task), "Z") os.waitpid(pid, 0) + + def test_idle_thread(self): + if self.prog.type("struct task_struct").has_member("wake_cpu"): + # SMP + for cpu in for_each_possible_cpu(self.prog): + self.assertEqual( + idle_thread(self.prog, cpu).comm.string_(), + f"swapper/{cpu}".encode(), + ) + else: + # UP + self.assertEqual(idle_thread(self.prog, 0).comm.string_(), b"swapper") From bc95749975a081fc63c3000ca3509b01990d0c5d Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 21 Dec 2021 11:54:19 +0800 Subject: [PATCH 130/139] tests: Rename "sock" to "skt" in test_sk_fullsock() Reserve "sock" for "struct socket *" objects, according to our kernel naming convention. Signed-off-by: Peilin Ye --- tests/helpers/linux/test_net.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index f50e3edab..1e53d9181 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -29,8 +29,8 @@ def setUpClass(cls): cls.net = get_net_ns_by_fd(cls.task, file.fileno()) def test_sk_fullsock(self): - with create_socket() as sock: - file = fget(self.task, sock.fileno()) + with create_socket() as skt: + file = fget(self.task, skt.fileno()) sk = cast("struct socket *", file.private_data).sk.read_() self.assertTrue(sk_fullsock(sk)) From ed7f8645327f1e5ce2d78f5cbb40b882a3d2c6c2 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 21 Dec 2021 13:26:39 +0800 Subject: [PATCH 131/139] helpers: Add SOCKET_I() and SOCK_INODE() Add helpers to convert between sockets and inodes. As an example: >>> file = fget(task, fd) >>> sock = SOCKET_I(file.f_inode) >>> sock.type.value_() 2 >>> import socket >>> int(socket.SOCK_DGRAM) 2 >>> inode = SOCK_INODE(sock) Also add tests for the new helpers to tests/helpers/linux/test_net.py. Signed-off-by: Peilin Ye --- drgn/helpers/linux/net.py | 30 ++++++++++++++++++++++++++++++ tests/helpers/linux/test_net.py | 22 ++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index cec12e76f..9efbb3bfb 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -18,6 +18,8 @@ from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry __all__ = ( + "SOCK_INODE", + "SOCKET_I", "for_each_net", "get_net_ns_by_inode", "get_net_ns_by_fd", @@ -29,6 +31,34 @@ ) +_S_IFMT = 0o170000 +_S_IFSOCK = 0o140000 + + +def SOCKET_I(inode: Object) -> Object: + """ + Get a socket from an inode referring to the socket. + + :param inode: ``struct inode *`` + :return: ``struct socket *`` + :raises ValueError: If *inode* does not refer to a socket + """ + if inode.i_mode & _S_IFMT != _S_IFSOCK: + raise ValueError("not a socket inode") + + return container_of(inode, "struct socket_alloc", "vfs_inode").socket.address_of_() + + +def SOCK_INODE(sock: Object) -> Object: + """ + Get the inode of a socket. + + :param sock: ``struct socket *`` + :return: ``struct inode *`` + """ + return container_of(sock, "struct socket_alloc", "socket").vfs_inode.address_of_() + + def for_each_net(prog: Program) -> Iterator[Object]: """ Iterate over all network namespaces in the system. diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index 1e53d9181..2620073c0 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -3,11 +3,16 @@ import os import socket +import sys import tempfile from drgn import cast from drgn.helpers.linux.fs import fget from drgn.helpers.linux.net import ( + _S_IFMT, + _S_IFSOCK, + SOCK_INODE, + SOCKET_I, for_each_net, get_net_ns_by_fd, netdev_for_each_tx_queue, @@ -75,3 +80,20 @@ def test_netdev_for_each_tx_queue(self): netdev = netdev_get_by_index(self.net, index) for queue in netdev_for_each_tx_queue(netdev): self.assertEqual(queue.dev, netdev) + + def test_SOCKET_I(self): + with create_socket(type=socket.SOCK_DGRAM) as skt: + sock = SOCKET_I(fget(self.task, skt.fileno()).f_inode) + self.assertEqual(sock.type, socket.SOCK_DGRAM) + + with open("/dev/null") as null: + file = fget(self.task, null.fileno()) + self.assertRaisesRegex( + ValueError, "not a socket inode", SOCKET_I, file.f_inode + ) + + def test_SOCK_INODE(self): + with create_socket() as skt: + sock = SOCKET_I(fget(self.task, skt.fileno()).f_inode) + inode = SOCK_INODE(sock) + self.assertEqual(inode.i_mode & _S_IFMT, _S_IFSOCK) From d72a9043b074e91e2488275f7bae3081226f1970 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 16:03:25 -0800 Subject: [PATCH 132/139] libdrgn: linux: replace idle_thread() with idle_task() I missed that the kernel has an idle_task() function which uses cpu_rq()->idle instead of idle_threads; the latter is technically architecture-specific. So, replace idle_thread() with idle_task(), which is architecture-independent and more consistent with the kernel. Signed-off-by: Omar Sandoval --- _drgn.pyi | 4 ++-- drgn/helpers/linux/sched.py | 4 ++-- libdrgn/helpers.h | 4 ++-- libdrgn/linux_kernel_helpers.c | 31 +++++++++++-------------------- libdrgn/python/drgnpy.h | 4 ++-- libdrgn/python/helpers.c | 12 ++++++------ libdrgn/python/module.c | 3 +-- tests/helpers/linux/test_sched.py | 11 ++++------- 8 files changed, 30 insertions(+), 43 deletions(-) diff --git a/_drgn.pyi b/_drgn.pyi index 8b086d325..28174d1ee 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -2180,11 +2180,11 @@ def _linux_helper_per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: """ ... -def _linux_helper_idle_thread(prog: Program, cpu: IntegerLike) -> Object: +def _linux_helper_idle_task(prog: Program, cpu: IntegerLike) -> Object: """ Return the idle thread (PID 0, a.k.a swapper) for the given CPU. - >>> idle_thread(prog, 1).comm + >>> idle_task(prog, 1).comm (char [16])"swapper/1" :param cpu: CPU number. diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index b43bc1243..b0fc5d0ee 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -9,11 +9,11 @@ Linux CPU scheduler. """ -from _drgn import _linux_helper_idle_thread as idle_thread +from _drgn import _linux_helper_idle_task as idle_task from drgn import Object __all__ = ( - "idle_thread", + "idle_task", "task_state_to_char", ) diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 0e10d57e1..66a5a5282 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -29,8 +29,8 @@ struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, const struct drgn_object *ptr, uint64_t cpu); -struct drgn_error *linux_helper_idle_thread(struct drgn_object *res, - uint64_t cpu); +struct drgn_error *linux_helper_idle_task(struct drgn_object *res, + uint64_t cpu); struct drgn_error * linux_helper_radix_tree_lookup(struct drgn_object *res, diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 842ab3122..80897d7cb 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -135,33 +135,24 @@ struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, return err; } -struct drgn_error *linux_helper_idle_thread(struct drgn_object *res, - uint64_t cpu) +struct drgn_error *linux_helper_idle_task(struct drgn_object *res, uint64_t cpu) { struct drgn_error *err; struct drgn_program *prog = drgn_object_program(res); struct drgn_object tmp; drgn_object_init(&tmp, prog); - err = drgn_program_find_object(prog, "idle_threads", NULL, + err = drgn_program_find_object(prog, "runqueues", NULL, DRGN_FIND_OBJECT_ANY, &tmp); - if (!err) { - err = drgn_object_address_of(&tmp, &tmp); - if (err) - goto out; - err = linux_helper_per_cpu_ptr(&tmp, &tmp, cpu); - if (err) - goto out; - err = drgn_object_dereference(res, &tmp); - } else if (err->code == DRGN_ERROR_LOOKUP) { - drgn_error_destroy(err); - err = drgn_program_find_object(prog, "init_task", NULL, - DRGN_FIND_OBJECT_ANY, &tmp); - if (err) - goto out; - err = drgn_object_address_of(res, &tmp); - } - + if (err) + goto out; + err = drgn_object_address_of(&tmp, &tmp); + if (err) + goto out; + err = linux_helper_per_cpu_ptr(&tmp, &tmp, cpu); + if (err) + goto out; + err = drgn_object_member_dereference(res, &tmp, "idle"); out: drgn_object_deinit(&tmp); return err; diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 42231a15a..c3ed2428b 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -293,8 +293,8 @@ PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, PyObject *kwds); -DrgnObject *drgnpy_linux_helper_idle_thread(PyObject *self, PyObject *args, - PyObject *kwds); +DrgnObject *drgnpy_linux_helper_idle_task(PyObject *self, PyObject *args, + PyObject *kwds); DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds); diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 4710e0166..7b9b90595 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -62,23 +62,23 @@ DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, return res; } -DrgnObject *drgnpy_linux_helper_idle_thread(PyObject *self, PyObject *args, - PyObject *kwds) +DrgnObject *drgnpy_linux_helper_idle_task(PyObject *self, PyObject *args, + PyObject *kwds) { static char *keywords[] = {"prog", "cpu", NULL}; struct drgn_error *err; Program *prog; struct index_arg cpu = {}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:idle_thread", - keywords, &Program_type, &prog, - index_converter, &cpu)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:idle_task", keywords, + &Program_type, &prog, index_converter, + &cpu)) return NULL; DrgnObject *res = DrgnObject_alloc(prog); if (!res) return NULL; - err = linux_helper_idle_thread(&res->obj, cpu.uvalue); + err = linux_helper_idle_task(&res->obj, cpu.uvalue); if (err) { Py_DECREF(res); return set_drgn_error(err); diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 10e00f364..f67cd4e00 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -127,8 +127,7 @@ static PyMethodDef drgn_methods[] = { {"_linux_helper_per_cpu_ptr", (PyCFunction)drgnpy_linux_helper_per_cpu_ptr, METH_VARARGS | METH_KEYWORDS}, - {"_linux_helper_idle_thread", - (PyCFunction)drgnpy_linux_helper_idle_thread, + {"_linux_helper_idle_task", (PyCFunction)drgnpy_linux_helper_idle_task, METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", (PyCFunction)drgnpy_linux_helper_radix_tree_lookup, diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index 77d733510..cbdb02f45 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -6,7 +6,7 @@ from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.pid import find_task -from drgn.helpers.linux.sched import idle_thread, task_state_to_char +from drgn.helpers.linux.sched import idle_task, task_state_to_char from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, @@ -36,14 +36,11 @@ def test_task_state_to_char(self): os.waitpid(pid, 0) - def test_idle_thread(self): + def test_idle_task(self): if self.prog.type("struct task_struct").has_member("wake_cpu"): # SMP for cpu in for_each_possible_cpu(self.prog): - self.assertEqual( - idle_thread(self.prog, cpu).comm.string_(), - f"swapper/{cpu}".encode(), - ) + self.assertEqual(idle_task(self.prog, cpu).comm.string_(), f"swapper/{cpu}".encode()) else: # UP - self.assertEqual(idle_thread(self.prog, 0).comm.string_(), b"swapper") + self.assertEqual(idle_task(self.prog, 0).comm.string_(), b"swapper") From b341c212f4936c21900f3d554abcf49653bb5a7e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 16:06:23 -0800 Subject: [PATCH 133/139] tests: fix black error Signed-off-by: Omar Sandoval --- tests/helpers/linux/test_sched.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index cbdb02f45..6294cb3c2 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -40,7 +40,9 @@ def test_idle_task(self): if self.prog.type("struct task_struct").has_member("wake_cpu"): # SMP for cpu in for_each_possible_cpu(self.prog): - self.assertEqual(idle_task(self.prog, cpu).comm.string_(), f"swapper/{cpu}".encode()) + self.assertEqual( + idle_task(self.prog, cpu).comm.string_(), f"swapper/{cpu}".encode() + ) else: # UP self.assertEqual(idle_task(self.prog, 0).comm.string_(), b"swapper") From 2ff58a4d45250ae2f4951c38359aacadb114b372 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 16:51:15 -0800 Subject: [PATCH 134/139] libdrgn: linux: make per_cpu_ptr() support !SMP kernels Kernels built without multiprocessing support don't have __per_cpu_offset; instead, per_cpu_ptr() is a no-op. Make the helper do the same and update the test case to work on !SMP as well. Signed-off-by: Omar Sandoval --- libdrgn/linux_kernel_helpers.c | 35 +++++++++++++++++------------- tests/helpers/linux/__init__.py | 4 ++++ tests/helpers/linux/test_percpu.py | 12 ++++++++-- tests/helpers/linux/test_sched.py | 5 ++--- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 80897d7cb..fcd77b714 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -113,23 +113,28 @@ struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, drgn_object_init(&tmp, prog); err = drgn_program_find_object(prog, "__per_cpu_offset", NULL, DRGN_FIND_OBJECT_ANY, &tmp); - if (err) - goto out; - err = drgn_object_subscript(&tmp, &tmp, cpu); - if (err) - goto out; - union drgn_value per_cpu_offset; - err = drgn_object_read_integer(&tmp, &per_cpu_offset); - if (err) - goto out; + if (!err) { + err = drgn_object_subscript(&tmp, &tmp, cpu); + if (err) + goto out; + union drgn_value per_cpu_offset; + err = drgn_object_read_integer(&tmp, &per_cpu_offset); + if (err) + goto out; - uint64_t ptr_value; - err = drgn_object_read_unsigned(ptr, &ptr_value); - if (err) - goto out; + uint64_t ptr_value; + err = drgn_object_read_unsigned(ptr, &ptr_value); + if (err) + goto out; - err = drgn_object_set_unsigned(res, drgn_object_qualified_type(ptr), - ptr_value + per_cpu_offset.uvalue, 0); + err = drgn_object_set_unsigned(res, + drgn_object_qualified_type(ptr), + ptr_value + per_cpu_offset.uvalue, + 0); + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + err = drgn_object_copy(res, ptr); + } out: drgn_object_deinit(&tmp); return err; diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index ec419c0fb..68df7c258 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -103,6 +103,10 @@ def proc_blocked(pid): return f.read() != "running\n" +def smp_enabled(): + return bool(re.search(r"\bSMP\b", os.uname().version)) + + def parse_range_list(s): values = set() s = s.strip() diff --git a/tests/helpers/linux/test_percpu.py b/tests/helpers/linux/test_percpu.py index 5f077f3d8..5efce8b09 100644 --- a/tests/helpers/linux/test_percpu.py +++ b/tests/helpers/linux/test_percpu.py @@ -3,10 +3,18 @@ from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.percpu import per_cpu -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import LinuxHelperTestCase, smp_enabled class TestPerCpu(LinuxHelperTestCase): def test_per_cpu(self): + smp = smp_enabled() for cpu in for_each_possible_cpu(self.prog): - self.assertEqual(per_cpu(self.prog["runqueues"], cpu).cpu, cpu) + if smp: + self.assertEqual(per_cpu(self.prog["runqueues"], cpu).cpu, cpu) + else: + # struct rq::cpu only exists if CONFIG_SMP=y, so just check + # that we get something valid. + self.assertEqual( + per_cpu(self.prog["runqueues"], cpu).idle.comm.string_(), b"swapper" + ) diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index 6294cb3c2..841ca0173 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -11,6 +11,7 @@ LinuxHelperTestCase, fork_and_pause, proc_state, + smp_enabled, wait_until, ) @@ -37,12 +38,10 @@ def test_task_state_to_char(self): os.waitpid(pid, 0) def test_idle_task(self): - if self.prog.type("struct task_struct").has_member("wake_cpu"): - # SMP + if smp_enabled(): for cpu in for_each_possible_cpu(self.prog): self.assertEqual( idle_task(self.prog, cpu).comm.string_(), f"swapper/{cpu}".encode() ) else: - # UP self.assertEqual(idle_task(self.prog, 0).comm.string_(), b"swapper") From ba93fd5a71950a695b68c902cd402c188ede74da Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 17:32:46 -0800 Subject: [PATCH 135/139] vmtest: add kdump kernel config options We would like to test drgn against kernel core dumps (e.g., for #129). One option would be to include some vmcore files in the repository and test against those. But those can be huge, and we'd need a lot of them to test different kernel versions. Instead, we can run vmtest, enable kdump, and trigger a crash. To do that, we first need to enable a few kernel config options. Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index b9186a4da..55c4b88f4 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest6" +KERNEL_LOCALVERSION = "-vmtest7" def kconfig() -> str: @@ -62,21 +62,24 @@ def kconfig() -> str: CONFIG_HW_RANDOM=m CONFIG_HW_RANDOM_VIRTIO=m -# drgn needs /proc/kcore for live debugging. -CONFIG_PROC_KCORE=y -# In some cases, it also needs /proc/kallsyms. -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y - # drgn needs debug info. CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y -# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define -# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some -# important information in VMCOREINFO is initialized by the kexec code. +# For testing live kernel debugging with /proc/kcore. +CONFIG_PROC_KCORE=y +# drgn needs /proc/kallsyms in some cases. Some test cases also need it. +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y + +# For testing kernel core dumps with /proc/vmcore. CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PROC_VMCORE=y + +# So that we can trigger a crash with /proc/sysrq-trigger. +CONFIG_MAGIC_SYSRQ=y # For block tests. CONFIG_BLK_DEV_LOOP=m From 2a0b4c8848ad4bdb80f3fae26dae686d64053926 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Dec 2021 23:25:40 -0800 Subject: [PATCH 136/139] vmtest: also add kexec_file_load() syscall config options We can avoid the need for the kexec tool if we load the kdump kernel ourselves, which is much easier with kexec_file_load(). Add the config options to enable it. Signed-off-by: Omar Sandoval --- vmtest/kbuild.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index 55c4b88f4..b7ac4b14e 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest7" +KERNEL_LOCALVERSION = "-vmtest8" def kconfig() -> str: @@ -74,9 +74,13 @@ def kconfig() -> str: CONFIG_KALLSYMS_ALL=y # For testing kernel core dumps with /proc/vmcore. -CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_PROC_VMCORE=y +CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y +# Needed for CONFIG_KEXEC_FILE. +CONFIG_CRYPTO=y +CONFIG_CRYPTO_SHA256=y # So that we can trigger a crash with /proc/sysrq-trigger. CONFIG_MAGIC_SYSRQ=y From 2ce41c22ae82ea6d9700d97a0ecaa5549288fc27 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 Jan 2022 18:21:14 -0800 Subject: [PATCH 137/139] CONTRIBUTING: mention that _destroy functions should allow NULL This is another undocumented convention. Signed-off-by: Omar Sandoval --- CONTRIBUTING.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index c31cf1b49..51571301a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -81,6 +81,7 @@ A few other guidelines/conventions: * The matching function to free an allocated structure should be suffixed with ``_destroy``, e.g., ``void foo_destroy(struct foo *foo)``. If possible, the definition should be placed directly after the definition of ``_create``. + ``_destroy`` should usually allow a ``NULL`` argument, just like ``free()``. * Functions that return a result in a ``struct drgn_object *`` parameter should only modify the object if the function succeeds. From 69c069b09faa9a69a1b48288440234a603a655c3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 6 Jan 2022 18:23:27 -0800 Subject: [PATCH 138/139] libdrgn: allow NULL argument to drgn_stack_trace_destroy() This is one place where I broke the convention that I just documented. Signed-off-by: Omar Sandoval --- libdrgn/stack_trace.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index d345ffbba..73299643f 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -79,16 +79,18 @@ static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, LIBDRGN_PUBLIC void drgn_stack_trace_destroy(struct drgn_stack_trace *trace) { - struct drgn_register_state *regs = NULL; - for (size_t i = 0; i < trace->num_frames; i++) { - if (trace->frames[i].regs != regs) { - drgn_register_state_destroy(regs); - regs = trace->frames[i].regs; + if (trace) { + struct drgn_register_state *regs = NULL; + for (size_t i = 0; i < trace->num_frames; i++) { + if (trace->frames[i].regs != regs) { + drgn_register_state_destroy(regs); + regs = trace->frames[i].regs; + } + free(trace->frames[i].scopes); } - free(trace->frames[i].scopes); + drgn_register_state_destroy(regs); + free(trace); } - drgn_register_state_destroy(regs); - free(trace); } LIBDRGN_PUBLIC size_t From ac2cadabcd589b6ec0bdbfe0689bb048d19f942c Mon Sep 17 00:00:00 2001 From: Kevin Svetlitski Date: Thu, 6 Jan 2022 16:49:55 -0800 Subject: [PATCH 139/139] Add framework for testing in kdump Now that the vmtest kernel supports kdump, add a script that can be used to crash and enter the kdump environment on demand. Use that to crash after running the normal test suite so that we can run tests against /proc/vmcore. vmcore tests live in their own directory; presently the only test is a simple sanity check that ensures we can can attach to /proc/vmcore. Signed-off-by: Omar Sandoval Signed-off-by: Kevin Svetlitski --- setup.py | 17 ++++++++-- tests/linux_kernel/__init__.py | 0 tests/linux_kernel/vmcore/__init__.py | 0 tests/linux_kernel/vmcore/test_vmcore.py | 15 +++++++++ vmtest/enter_kdump.py | 40 ++++++++++++++++++++++++ vmtest/vm.py | 4 ++- 6 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 tests/linux_kernel/__init__.py create mode 100644 tests/linux_kernel/vmcore/__init__.py create mode 100644 tests/linux_kernel/vmcore/test_vmcore.py create mode 100644 vmtest/enter_kdump.py diff --git a/setup.py b/setup.py index df6940bfb..d53bd92e1 100755 --- a/setup.py +++ b/setup.py @@ -187,9 +187,20 @@ def _run_vm(self, kernel_dir): import vmtest.vm - command = fr"""cd {shlex.quote(os.getcwd())} && - DRGN_RUN_LINUX_HELPER_TESTS=1 {shlex.quote(sys.executable)} -Bm \ - unittest discover -t . -s tests/helpers/linux {"-v" if self.verbose else ""}""" + command = fr""" +set -e + +cd {shlex.quote(os.getcwd())} +if "$BUSYBOX" [ -e /proc/vmcore ]; then + "$PYTHON" -Bm unittest discover -t . -s tests/linux_kernel/vmcore {"-v" if self.verbose else ""} +else + DRGN_RUN_LINUX_HELPER_TESTS=1 "$PYTHON" -Bm \ + unittest discover -t . -s tests/helpers/linux {"-v" if self.verbose else ""} + "$PYTHON" vmtest/enter_kdump.py + # We should crash and not reach this. + exit 1 +fi +""" try: returncode = vmtest.vm.run_in_vm( command, Path(kernel_dir), Path(self.vmtest_dir) diff --git a/tests/linux_kernel/__init__.py b/tests/linux_kernel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/linux_kernel/vmcore/__init__.py b/tests/linux_kernel/vmcore/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/linux_kernel/vmcore/test_vmcore.py b/tests/linux_kernel/vmcore/test_vmcore.py new file mode 100644 index 000000000..73468d2dd --- /dev/null +++ b/tests/linux_kernel/vmcore/test_vmcore.py @@ -0,0 +1,15 @@ +from pathlib import Path +import unittest + +from drgn import Program, ProgramFlags + +VMCORE_PATH = Path("/proc/vmcore") + + +@unittest.skipUnless(VMCORE_PATH.exists(), "not running in kdump") +class TestAttachToVMCore(unittest.TestCase): + def test_attach_to_vmcore(self): + prog = Program() + prog.set_core_dump("/proc/vmcore") + self.assertFalse(prog.flags & ProgramFlags.IS_LIVE) + self.assertTrue(prog.flags & ProgramFlags.IS_LINUX_KERNEL) diff --git a/vmtest/enter_kdump.py b/vmtest/enter_kdump.py new file mode 100644 index 000000000..a34710996 --- /dev/null +++ b/vmtest/enter_kdump.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +# This isn't great: it's specific to x86-64, both by virtue of the syscall +# number and because kexec_file_load isn't implemented on many architectures, +# especially on older kernels. + +import ctypes +import os +import re + +SYS_kexec_file_load = 320 # On x86-64. +KEXEC_FILE_ON_CRASH = 2 +KEXEC_FILE_NO_INITRAMFS = 4 + +syscall = ctypes.CDLL(None, use_errno=True).syscall +syscall.restype = ctypes.c_long + +with open("/proc/cmdline", "rb") as f: + cmdline = f.read().rstrip(b"\n") + cmdline = re.sub(rb"(^|\s)crashkernel=\S+", b"", cmdline) + # `nosmp` is required to avoid QEMU sporadically failing an internal assertion + # `nokaslr` is required to avoid sporadically failing to reserve space for the + # capture kernel + cmdline += b" nosmp nokaslr" + +with open(f"/lib/modules/{os.uname().release}/vmlinuz", "rb") as kernel: + if syscall( + ctypes.c_long(SYS_kexec_file_load), + ctypes.c_int(kernel.fileno()), + ctypes.c_int(-1), + ctypes.c_ulong(len(cmdline) + 1), + ctypes.c_char_p(cmdline + b"\0"), + ctypes.c_ulong(KEXEC_FILE_ON_CRASH | KEXEC_FILE_NO_INITRAMFS), + ): + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + +with open("/proc/sysrq-trigger", "w") as f: + f.write("c") diff --git a/vmtest/vm.py b/vmtest/vm.py index 7928a87f9..971e108ca 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -24,6 +24,7 @@ set -eu export BUSYBOX={busybox} +export PYTHON={python} trap '"$BUSYBOX" poweroff -f' EXIT @@ -214,6 +215,7 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: _INIT_TEMPLATE.format( _9PFS_MSIZE=_9PFS_MSIZE, busybox=shlex.quote(busybox), + python=shlex.quote(sys.executable), command=shlex.quote(command), ) ) @@ -246,7 +248,7 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-kernel", str(kernel_dir / "vmlinuz"), "-append", - f"rootfstype=9p rootflags=trans=virtio,cache=loose,msize={_9PFS_MSIZE} ro console=0,115200 panic=-1 init={init}", + f"rootfstype=9p rootflags=trans=virtio,cache=loose,msize={_9PFS_MSIZE} ro console=0,115200 panic=-1 crashkernel=256M init={init}", # fmt: on ], env=env,