diff --git a/.travis.yml b/.travis.yml index 10bea5571..31f9a4dfd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ install: # Upstream defaults to world-read-writeable /dev/kvm. Debian/Ubuntu override # this; see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=892945. We want # the upstream default. - - sudo echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /lib/udev/rules.d/99-fix-kvm.rules > /dev/null + - echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /lib/udev/rules.d/99-fix-kvm.rules > /dev/null - sudo udevadm control --reload-rules # On systemd >= 238 we can use udevadm trigger -w and remove udevadm settle. - sudo udevadm trigger /dev/kvm diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 000000000..06c948848 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,88 @@ +Contributing +============ + +Thanks for your interest in drgn! See below for how to build, test, code, and +submit changes for drgn. + +Building +-------- + +The easiest way to develop drgn is by building and running it locally. See the +`installation documentation +`_. + +Testing +------- + +.. highlight:: console + +Tests should be added for all features and bug fixes. + +drgn's test suite can be run with:: + + $ python3 setup.py test + +To run Linux kernel helper tests in a virtual machine on all supported kernels, +add ``-K``. See `vmtest `_ for more details. + +Tests can also be run manually with `unittest +`_ +after building locally:: + + $ python3 -m unittest discover -v + +To run Linux kernel helper tests on the running kernel, this must be run as +root, and debug information for the running kernel must be available. + +Coding Guidelines +----------------- + +* Core functionality should be implemented in ``libdrgn`` and exposed to Python + via the `C extension `_. Only the CLI and helpers should be + in pure Python. +* Linux kernel helpers should work on all supported kernels if possible. + +C +^ + +C code in drgn mostly follows the `Linux kernel coding style +`_ except +that drgn requires C11 or newer, so declarations may be mixed with code. + +A few other guidelines: + +* Functions that can fail should return a ``struct drgn_error *`` (and return + their result via an out parameter if necessary). +* Out parameters should be named ``ret`` (or suffixed with ``_ret`` if there + are multiple). +* Constants should be defined as enums or ``static const`` variables rather + than macros. + +drgn assumes some `implementation-defined behavior +`_ for sanity: + +* Signed integers are represented with two's complement. +* Bitwise operators on signed integers operate on the two's complement + representation. +* Right shift of a signed integer type is arithmetic. +* Conversion to a signed integer type is modular. +* Casting between pointers and integers does not change the bit representation. + +Python +^^^^^^ + +Python code in drgn is formatted with `black `_. +Code should be compatible with Python 3.6 and newer. + +Type hints should be provided for all public interfaces other than helpers +(including the C extension) and most private interfaces. + +Submitting PRs +-------------- + +Pull requests and issues are always welcome. Feel free to start a discussion +with a prototype. + +All commits must be signed off (i.e., ``Signed-off-by: Jane Doe +``) as per the `Developer Certificate of Origin +`_. ``git commit -s`` can do this for you. diff --git a/README.rst b/README.rst index 6fc8d8330..c1d888465 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,8 @@ Installation .. start-install-dependencies +.. highlight:: console + Install dependencies: Arch Linux:: @@ -81,9 +83,7 @@ Optionally, install: .. end-install-dependencies -Then, run: - -.. code-block:: console +Then, run:: $ sudo pip3 install drgn diff --git a/docs/installation.rst b/docs/installation.rst index 0e09df42c..6c8c430aa 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,7 +1,10 @@ Installation ============ -.. highlight:: console +There are several options for installing drgn. + +Dependencies +------------ drgn depends on: @@ -30,8 +33,13 @@ The build requires: :start-after: start-install-dependencies :end-before: end-install-dependencies -The latest release of drgn can be installed globally with `pip -`_:: +Installation +------------ + +.. highlight:: console + +After installing dependencies, the latest release of drgn can be installed +globally with `pip `_:: $ sudo pip3 install drgn $ drgn --help @@ -53,9 +61,12 @@ drgn globally:: (drgenv) $ pip3 install drgn (drgenv) $ drgn --help +Development +----------- + For development, drgn can be built and run locally:: - $ python3 setup.py egg_info build_ext -i + $ CFLAGS="-Wall -Werror -g -O2" python3 setup.py egg_info build_ext -i $ python3 -m drgn --help libkdumpfile diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in index a64a5d15d..c6e4399ca 100644 --- a/libdrgn/arch_x86_64.c.in +++ b/libdrgn/arch_x86_64.c.in @@ -257,67 +257,25 @@ out: static struct drgn_error * linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread, - const struct drgn_object *task_obj, - const void *prstatus, - size_t prstatus_size) + const struct drgn_object *task_obj) { struct drgn_error *err; struct drgn_program *prog = task_obj->prog; struct drgn_object sp_obj; - struct drgn_qualified_type frame_type; - uint64_t sp; - Dwarf_Word dwarf_reg; drgn_object_init(&sp_obj, prog); - if (prstatus) { - /* - * If the stack pointer in PRSTATUS is within this task's stack, - * then we can use it. Otherwise, the task either wasn't running - * or was in the middle of context switching. Either way, we - * should use the saved registers instead. - */ - uint64_t thread_size; - uint64_t stack; - - err = linux_kernel_get_thread_size(prog, &thread_size); - if (err) - goto out; - err = drgn_object_member_dereference(&sp_obj, task_obj, - "stack"); - if (err) - goto out; - err = drgn_object_read_unsigned(&sp_obj, &stack); - if (err) - goto out; - - if (prstatus_size < 272) { - err = drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "registers are truncated"); - goto out; - } - memcpy(&sp, (char *)prstatus + 264, sizeof(sp)); - if (drgn_program_bswap(prog)) - sp = bswap_64(sp); - if (sp > stack && sp <= stack + thread_size) { - err = prstatus_set_initial_registers_x86_64(prog, - thread, - prstatus, - prstatus_size); - goto out; - } - } - err = drgn_object_member_dereference(&sp_obj, task_obj, "thread"); if (err) goto out; err = drgn_object_member(&sp_obj, &sp_obj, "sp"); if (err) goto out; + uint64_t sp; err = drgn_object_read_unsigned(&sp_obj, &sp); if (err) goto out; - dwarf_reg = sp; + Dwarf_Word dwarf_reg = sp; /* rsp is register 7. */ if (!dwfl_thread_state_registers(thread, 7, 1, &dwarf_reg)) { err = drgn_error_libdwfl(); @@ -330,6 +288,7 @@ linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread, * inactive_task_frame, which we can use to get most registers. Before * that, it points to bp. */ + struct drgn_qualified_type frame_type; err = drgn_program_find_type(prog, "struct inactive_task_frame *", NULL, &frame_type); if (!err) { diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 92d8798a7..b78d84e3b 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -242,11 +242,7 @@ struct abbrev_table { struct uint8_vector insns; }; -static void abbrev_table_init(struct abbrev_table *abbrev) -{ - uint32_vector_init(&abbrev->decls); - uint8_vector_init(&abbrev->insns); -} +#define ABBREV_TABLE_INIT { VECTOR_INIT, VECTOR_INIT } static void abbrev_table_deinit(struct abbrev_table *abbrev) { @@ -1310,12 +1306,10 @@ static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex, #pragma omp parallel { - struct compilation_unit_vector cus; - size_t i; + struct compilation_unit_vector cus = VECTOR_INIT; - compilation_unit_vector_init(&cus); #pragma omp for schedule(dynamic) - for (i = 0; i < num_unindexed; i++) { + for (size_t i = 0; i < num_unindexed; i++) { struct drgn_error *module_err; const char *name; @@ -1723,25 +1717,22 @@ read_file_name_table(struct drgn_dwarf_index *dindex, Elf_Data *debug_line = cu->sections[SECTION_DEBUG_LINE]; const char *ptr = section_ptr(debug_line, stmt_list); const char *end = section_end(debug_line); - struct siphash_vector directories; - - siphash_vector_init(&directories); err = skip_lnp_header(cu, &ptr, end); if (err) return err; + struct siphash_vector directories = VECTOR_INIT; for (;;) { - struct siphash *hash; const char *path; size_t path_len; - if (!read_string(&ptr, end, &path, &path_len)) return drgn_eof(); if (!path_len) break; - hash = siphash_vector_append_entry(&directories); + struct siphash *hash = + siphash_vector_append_entry(&directories); if (!hash) { err = &drgn_enomem; goto out; @@ -1753,10 +1744,6 @@ read_file_name_table(struct drgn_dwarf_index *dindex, for (;;) { const char *path; size_t path_len; - uint64_t directory_index; - struct siphash hash; - uint64_t file_name_hash; - if (!read_string(&ptr, end, &path, &path_len)) { err = drgn_eof(); goto out; @@ -1764,6 +1751,7 @@ read_file_name_table(struct drgn_dwarf_index *dindex, if (!path_len) break; + uint64_t directory_index; if ((err = read_uleb128(&ptr, end, &directory_index))) goto out; /* mtime, size */ @@ -1779,13 +1767,14 @@ read_file_name_table(struct drgn_dwarf_index *dindex, goto out; } + struct siphash hash; if (directory_index) hash = directories.data[directory_index - 1]; else siphash_init(&hash, siphash_key); siphash_update(&hash, path, path_len); - file_name_hash = siphash_final(&hash); + uint64_t file_name_hash = siphash_final(&hash); if (!uint64_vector_append(file_name_table, &file_name_hash)) { err = &drgn_enomem; goto out; @@ -2059,8 +2048,8 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, struct compilation_unit *cu) { struct drgn_error *err; - struct abbrev_table abbrev; - struct uint64_vector file_name_table; + struct abbrev_table abbrev = ABBREV_TABLE_INIT; + struct uint64_vector file_name_table = VECTOR_INIT; Elf_Data *debug_abbrev = cu->sections[SECTION_DEBUG_ABBREV]; const char *debug_abbrev_end = section_end(debug_abbrev); const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11]; @@ -2073,9 +2062,6 @@ static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex, unsigned int depth = 0; uint64_t enum_die_offset = 0; - abbrev_table_init(&abbrev); - uint64_vector_init(&file_name_table); - if ((err = read_abbrev_table(section_ptr(debug_abbrev, cu->debug_abbrev_offset), debug_abbrev_end, cu, &abbrev))) @@ -2265,8 +2251,8 @@ drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, bool report_from_dwfl) { struct drgn_error *err; - struct drgn_dwarf_module_vector unindexed; - struct compilation_unit_vector cus; + struct drgn_dwarf_module_vector unindexed = VECTOR_INIT; + struct compilation_unit_vector cus = VECTOR_INIT; dwfl_report_end(dindex->dwfl, NULL, NULL); if (report_from_dwfl && @@ -2275,8 +2261,6 @@ drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex, err = &drgn_enomem; goto err; } - drgn_dwarf_module_vector_init(&unindexed); - compilation_unit_vector_init(&cus); err = drgn_dwarf_index_get_unindexed(dindex, &unindexed); if (err) goto err; diff --git a/libdrgn/dwarf_info_cache.c b/libdrgn/dwarf_info_cache.c index 2292fe5bf..51b1f8a3c 100644 --- a/libdrgn/dwarf_info_cache.c +++ b/libdrgn/dwarf_info_cache.c @@ -534,19 +534,9 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, struct drgn_type **ret, bool *should_free) { struct drgn_error *err; - struct drgn_type *type; - struct drgn_type_member_vector members; + const char *dw_tag_str; uint64_t dw_tag; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - const char *tag; - bool declaration; - Dwarf_Die child; - int size; - bool little_endian; - int r; - switch (kind) { case DRGN_TYPE_STRUCT: dw_tag_str = "DW_TAG_structure_type"; @@ -564,7 +554,10 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, UNREACHABLE(); } - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; if (attr) { tag = dwarf_formstring(attr); if (!tag) { @@ -576,6 +569,7 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, tag = NULL; } + bool declaration; if (dwarf_flag(die, DW_AT_declaration, &declaration)) { return drgn_error_format(DRGN_ERROR_OTHER, "%s has invalid DW_AT_declaration", @@ -593,7 +587,7 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } *should_free = true; - type = malloc(sizeof(*type)); + struct drgn_type *type = malloc(sizeof(*type)); if (!type) return &drgn_enomem; @@ -615,9 +609,8 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, return NULL; } - drgn_type_member_vector_init(&members); - - size = dwarf_bytesize(die); + struct drgn_type_member_vector members = VECTOR_INIT; + int size = dwarf_bytesize(die); if (size == -1) { err = drgn_error_format(DRGN_ERROR_OTHER, "%s has missing or invalid DW_AT_byte_size", @@ -625,13 +618,13 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, goto err; } - little_endian = dwarf_die_is_little_endian(die); - r = dwarf_child(die, &child); + bool little_endian = dwarf_die_is_little_endian(die); + Dwarf_Die child; + int r = dwarf_child(die, &child); while (r == 0) { if (dwarf_tag(&child) == DW_TAG_member) { - struct drgn_type_member *member; - - member = drgn_type_member_vector_append_entry(&members); + struct drgn_type_member *member = + drgn_type_member_vector_append_entry(&members); if (!member) { err = &drgn_enomem; goto err; @@ -670,19 +663,17 @@ drgn_compound_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, * up. */ if (members.size > 1) { - struct drgn_type_member *member; - - member = &drgn_type_members(type)[members.size - 1]; + struct drgn_type_member *member = + &drgn_type_members(type)[members.size - 1]; /* * The type may have already been evaluated if it's a * bit field. Arrays can't be bit fields, so it's okay * if we missed it. */ if (!drgn_lazy_type_is_evaluated(&member->type)) { - struct drgn_type_from_dwarf_thunk *thunk; - - thunk = container_of(member->type.thunk, - struct drgn_type_from_dwarf_thunk, + struct drgn_type_from_dwarf_thunk *thunk = + container_of(member->type.thunk, struct + drgn_type_from_dwarf_thunk, thunk); thunk->can_be_incomplete_array = true; } @@ -813,18 +804,11 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, struct drgn_type **ret, bool *should_free) { struct drgn_error *err; - struct drgn_type *type; - struct drgn_type_enumerator_vector enumerators; - struct drgn_type *compatible_type; + Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); const char *tag; - bool declaration; - Dwarf_Die child; - bool is_signed = false; - int r; - - attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem); if (attr) { tag = dwarf_formstring(attr); if (!tag) @@ -834,6 +818,7 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, tag = NULL; } + bool declaration; if (dwarf_flag(die, DW_AT_declaration, &declaration)) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_TAG_enumeration_type has invalid DW_AT_declaration"); @@ -851,7 +836,7 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } *should_free = true; - type = malloc(sizeof(*type)); + struct drgn_type *type = malloc(sizeof(*type)); if (!type) return &drgn_enomem; @@ -861,9 +846,11 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, return NULL; } - drgn_type_enumerator_vector_init(&enumerators); - - r = dwarf_child(die, &child); + struct drgn_type_enumerator_vector enumerators = + VECTOR_INIT; + bool is_signed = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); while (r == 0) { int tag; @@ -889,6 +876,7 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, } drgn_type_enumerator_vector_shrink_to_fit(&enumerators); + struct drgn_type *compatible_type; r = dwarf_type(die, &child); if (r == -1) { err = drgn_error_create(DRGN_ERROR_OTHER, @@ -901,7 +889,6 @@ drgn_enum_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, Dwarf_Die *die, goto err; } else { struct drgn_qualified_type qualified_compatible_type; - err = drgn_type_from_dwarf(dicache, &child, &qualified_compatible_type); if (err) @@ -1043,15 +1030,10 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, struct drgn_type **ret) { struct drgn_error *err; - struct drgn_type *type; - struct drgn_qualified_type element_type; - Dwarf_Die child; - struct array_dimension_vector dimensions; + struct array_dimension_vector dimensions = VECTOR_INIT; struct array_dimension *dimension; - int r; - - array_dimension_vector_init(&dimensions); - r = dwarf_child(die, &child); + Dwarf_Die child; + int r = dwarf_child(die, &child); while (r == 0) { if (dwarf_tag(&child) == DW_TAG_subrange_type) { dimension = array_dimension_vector_append_entry(&dimensions); @@ -1075,6 +1057,7 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, dimension->is_complete = false; } + struct drgn_qualified_type element_type; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), "DW_TAG_array_type", false, false, @@ -1083,6 +1066,7 @@ drgn_array_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, goto out; *is_incomplete_array_ret = !dimensions.data[0].is_complete; + struct drgn_type *type; do { dimension = array_dimension_vector_pop(&dimensions); if (dimension->is_complete) { @@ -1151,33 +1135,21 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, struct drgn_type **ret) { struct drgn_error *err; - const char *tag_name; - struct drgn_type *type; - struct drgn_type_parameter_vector parameters; - struct drgn_qualified_type return_type; - Dwarf_Die child; - bool is_variadic = false; - int r; - if (dwarf_tag(die) == DW_TAG_subroutine_type) - tag_name = "DW_TAG_subroutine_type"; - else - tag_name = "DW_TAG_subprogram"; - - type = malloc(sizeof(*type)); + struct drgn_type *type = malloc(sizeof(*type)); if (!type) return &drgn_enomem; - drgn_type_parameter_vector_init(¶meters); - - r = dwarf_child(die, &child); + const char *tag_name = + dwarf_tag(die) == DW_TAG_subroutine_type ? + "DW_TAG_subroutine_type" : "DW_TAG_subprogram"; + struct drgn_type_parameter_vector parameters = VECTOR_INIT; + bool is_variadic = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); while (r == 0) { - int tag; - - tag = dwarf_tag(&child); + int tag = dwarf_tag(&child); if (tag == DW_TAG_formal_parameter) { - struct drgn_type_parameter *parameter; - if (is_variadic) { err = drgn_error_format(DRGN_ERROR_OTHER, "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", @@ -1185,7 +1157,8 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, goto err; } - parameter = drgn_type_parameter_vector_append_entry(¶meters); + struct drgn_type_parameter *parameter = + drgn_type_parameter_vector_append_entry(¶meters); if (!parameter) { err = &drgn_enomem; goto err; @@ -1213,6 +1186,7 @@ drgn_function_type_from_dwarf(struct drgn_dwarf_info_cache *dicache, } drgn_type_parameter_vector_shrink_to_fit(¶meters); + struct drgn_qualified_type return_type; err = drgn_type_from_dwarf_child(dicache, die, drgn_language_or_default(lang), tag_name, true, true, NULL, diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 32652af7f..ccfbfac63 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -135,6 +135,8 @@ struct hash_pair hash_table_hash(const key_type *key); * * The new hash table is empty. It must be deinitialized with @ref * hash_table_deinit(). + * + * @sa HASH_TABLE_INIT */ void hash_table_init(struct hash_table *table); @@ -1188,6 +1190,15 @@ DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) DEFINE_HASH_SET_TYPE(table, key_type) \ DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) +/** + * Empty hash table initializer. + * + * This can be used to initialize a hash table when declaring it. + * + * @sa hash_table_init() + */ +#define HASH_TABLE_INIT { hash_table_empty_chunk } + /** * @defgroup HashTableHelpers Hash table helpers * @@ -1226,7 +1237,7 @@ static inline struct hash_pair hash_pair_from_non_avalanching_hash(size_t hash) { #if SIZE_MAX == 0xffffffffffffffff #ifdef __SSE4_2__ -/* 64-bit with SSE4.2 uses CRC32 */ + /* 64-bit with SSE4.2 uses CRC32 */ size_t c = _mm_crc32_u64(0, hash); return (struct hash_pair){ @@ -1234,7 +1245,7 @@ static inline struct hash_pair hash_pair_from_non_avalanching_hash(size_t hash) .second = (c >> 24) | 0x80, }; #else -/* 64-bit without SSE4.2 uses a 128-bit multiplication-based mixer */ + /* 64-bit without SSE4.2 uses a 128-bit multiplication-based mixer */ static const uint64_t multiplier = UINT64_C(0xc4ceb9fe1a85ec53); uint64_t hi, lo; @@ -1244,12 +1255,12 @@ static inline struct hash_pair hash_pair_from_non_avalanching_hash(size_t hash) hash *= multiplier; return (struct hash_pair){ .first = hash >> 22, - .second = (hash >> 15) | 0x80, + .second = ((hash >> 15) & 0x7f) | 0x80, }; #endif #elif SIZE_MAX == 0xffffffff -/* 32-bit with SSE4.2 uses CRC32 */ #ifdef __SSE4_2__ + /* 32-bit with SSE4.2 uses CRC32 */ size_t c = _mm_crc32_u32(0, hash); return (struct hash_pair){ @@ -1257,7 +1268,7 @@ static inline struct hash_pair hash_pair_from_non_avalanching_hash(size_t hash) .second = (uint8_t)(~(c >> 25)), }; #else -/* 32-bit without SSE4.2 uses the 32-bit Murmur2 finalizer */ + /* 32-bit without SSE4.2 uses the 32-bit Murmur2 finalizer */ hash ^= hash >> 13; hash *= 0x5bd1e995; hash ^= hash >> 15; diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index b74910c8d..df4f6800a 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1682,15 +1682,12 @@ static const char *token_spelling[] = { DEFINE_HASH_MAP(c_keyword_map, struct string, int, string_hash, string_eq); -static struct c_keyword_map c_keywords; +static struct c_keyword_map c_keywords = HASH_TABLE_INIT; __attribute__((constructor(101))) static void c_keywords_init(void) { - int i; - - c_keyword_map_init(&c_keywords); - for (i = MIN_KEYWORD_TOKEN; i <= MAX_KEYWORD_TOKEN; i++) { + for (int i = MIN_KEYWORD_TOKEN; i <= MAX_KEYWORD_TOKEN; i++) { struct c_keyword_map_entry entry = { .key = { .str = token_spelling[i], @@ -1698,7 +1695,6 @@ static void c_keywords_init(void) }, .value = i, }; - if (c_keyword_map_insert(&c_keywords, &entry, NULL) != 1) abort(); } diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 0c2d0bc22..964f541a3 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -820,15 +820,19 @@ static struct drgn_error *kmod_index_init(struct kmod_index *index, static const char *kmod_index_find(struct kmod_index *index, const char *key) { + static const uint32_t INDEX_NODE_MASK = UINT32_C(0x0fffffff); + static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); + static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); + static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); const char *ptr = index->ptr + 8; uint32_t offset; for (;;) { if (!read_be32(&ptr, index->end, &offset)) return NULL; - ptr = index->ptr + (offset & 0x0fffffffU); + ptr = index->ptr + (offset & INDEX_NODE_MASK); - if (offset & 0x80000000U) { + if (offset & INDEX_NODE_PREFIX) { const char *prefix; size_t prefix_len; @@ -840,7 +844,7 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) key += prefix_len; } - if (offset & 0x20000000U) { + if (offset & INDEX_NODE_CHILDS) { uint8_t first, last; if (!read_u8(&ptr, index->end, &first) || @@ -864,7 +868,7 @@ static const char *kmod_index_find(struct kmod_index *index, const char *key) break; } } - if (!(offset & 0x40000000U)) + if (!(offset & INDEX_NODE_VALUES)) return NULL; return ptr; } @@ -1042,23 +1046,16 @@ cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf, uint64_t *start_ret, uint64_t *end_ret) { struct drgn_error *err; - uint64_t start = UINT64_MAX, end = 0; - size_t shstrndx; - Elf_Scn *scn = NULL; - struct elf_scn_name_map scn_map; - struct kernel_module_section_iterator section_it; - const char *name; - uint64_t address; + size_t shstrndx; if (elf_getshdrstrndx(elf, &shstrndx)) return drgn_error_libelf(); - elf_scn_name_map_init(&scn_map); + struct elf_scn_name_map scn_map = HASH_TABLE_INIT; + Elf_Scn *scn = NULL; while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - struct elf_scn_name_map_entry entry; - - shdr = gelf_getshdr(scn, &shdr_mem); + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); if (!shdr) { err = drgn_error_libelf(); goto out_scn_map; @@ -1067,7 +1064,10 @@ cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf, if (!(shdr->sh_flags & SHF_ALLOC)) continue; - entry.key = elf_strptr(elf, shstrndx, shdr->sh_name); + struct elf_scn_name_map_entry entry = { + .key = elf_strptr(elf, shstrndx, shdr->sh_name), + .value = scn, + }; /* * .init sections are freed once the module is initialized, but * they remain in the section list. Ignore them so we don't get @@ -1075,7 +1075,6 @@ cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf, */ if (!entry.key || strstartswith(entry.key, ".init")) continue; - entry.value = scn; if (elf_scn_name_map_insert(&scn_map, &entry, NULL) == -1) { err = &drgn_enomem; @@ -1083,19 +1082,21 @@ cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf, } } + uint64_t start = UINT64_MAX, end = 0; + struct kernel_module_section_iterator section_it; err = kernel_module_section_iterator_init(§ion_it, kmod_it); if (err) goto out_scn_map; + const char *name; + uint64_t address; while (!(err = kernel_module_section_iterator_next(§ion_it, &name, &address))) { - struct elf_scn_name_map_iterator it; - - it = elf_scn_name_map_search(&scn_map, &name); + struct elf_scn_name_map_iterator it = + elf_scn_name_map_search(&scn_map, &name); if (it.entry) { - GElf_Shdr *shdr, shdr_mem; - uint64_t section_end; - - shdr = gelf_getshdr(it.entry->value, &shdr_mem); + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(it.entry->value, + &shdr_mem); if (!shdr) { err = drgn_error_libelf(); break; @@ -1105,6 +1106,7 @@ cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf, err = drgn_error_libelf(); break; } + uint64_t section_end; if (__builtin_add_overflow(address, shdr->sh_size, §ion_end)) section_end = UINT64_MAX; @@ -1340,11 +1342,6 @@ report_kernel_modules(struct drgn_program *prog, bool vmlinux_is_pending) { struct drgn_error *err; - struct kernel_module_table kmod_table; - struct depmod_index depmod; - size_t module_name_offset = 0; - size_t i; - struct kernel_module_table_iterator it; if (!num_kmods && !report_default) return NULL; @@ -1363,10 +1360,10 @@ report_kernel_modules(struct drgn_program *prog, return err; } + size_t module_name_offset = 0; if (need_module_definition) { struct drgn_qualified_type module_type; struct drgn_member_info name_member; - err = drgn_program_find_type(prog, "struct module", NULL, &module_type); if (!err) { @@ -1382,12 +1379,12 @@ report_kernel_modules(struct drgn_program *prog, module_name_offset = name_member.bit_offset / 8; } - kernel_module_table_init(&kmod_table); + struct kernel_module_table kmod_table = HASH_TABLE_INIT; + struct depmod_index depmod; + struct kernel_module_table_iterator it; depmod.modules_dep.ptr = NULL; - for (i = 0; i < num_kmods; i++) { + for (size_t i = 0; i < num_kmods; i++) { struct kernel_module_file *kmod = &kmods[i]; - struct hash_pair hp; - if (!kmod->name) { err = get_kernel_module_name_from_this_module(kmod->this_module_scn, module_name_offset, @@ -1411,7 +1408,7 @@ report_kernel_modules(struct drgn_program *prog, } } - hp = kernel_module_table_hash(&kmod->name); + struct hash_pair hp = kernel_module_table_hash(&kmod->name); it = kernel_module_table_search_hashed(&kmod_table, &kmod->name, hp); if (it.entry) { @@ -1437,7 +1434,6 @@ report_kernel_modules(struct drgn_program *prog, /* Anything left over was not loaded. */ for (it = kernel_module_table_first(&kmod_table); it.entry; ) { struct kernel_module_file *kmod = *it.entry; - it = kernel_module_table_delete_iterator(&kmod_table, it); do { err = drgn_dwarf_index_report_elf(dindex, kmod->path, diff --git a/libdrgn/object.c b/libdrgn/object.c index 5361dedee..c35d9932c 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -565,11 +565,9 @@ drgn_object_dereference_offset(struct drgn_object *res, /* * / and % truncate towards 0. Here, we want to truncate towards - * negative infinity. As long as we have an arithmetic right shift, we - * can accomplish that by replacing "/ 8" with ">> 3" and "% 8" with - * "& 7". + * negative infinity. We can accomplish that by replacing "/ 8" with an + * arithmetic shift ">> 3" and "% 8" with "& 7". */ - static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); address += bit_offset >> 3; bit_offset &= 7; return drgn_object_set_reference(res, qualified_type, address, @@ -2167,11 +2165,6 @@ struct drgn_error *drgn_op_rshift_impl(struct drgn_object *res, err = drgn_object_convert_signed(lhs, bit_size, &svalue); if (err) return err; - /* - * Right shift of a negative integer is implementation-defined. - * GCC always uses an arithmetic shift. - */ - static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); if (shift < bit_size) svalue >>= shift; else if (svalue >= 0) diff --git a/libdrgn/platform.h b/libdrgn/platform.h index c97608ec8..8510cc45a 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -65,35 +65,8 @@ struct drgn_architecture_info { Dwfl_Thread *, const void *, size_t); - /* - * Get a task's registers from the task_struct or PRSTATUS note as - * appropriate. - * - * The given PRSTATUS note is for the CPU that the task is assigned to, - * which may or may not be for the given task. This callback must - * determine that (typically by checking whether the stack pointer in - * PRSTATUS lies within the task's stack). - * - * We find the PRSTATUS note by CPU rather than by PID for two reasons: - * - * 1. The PID is populated by the kernel from "current" (the current - * task) via a non-maskable interrupt (NMI). During a context switch, - * the stack pointer and current are not updated atomically, so if - * the NMI arrives in the middle of a context switch, the stack - * pointer may not actually be that of current. Therefore, the stack - * pointer in PRSTATUS may not actually be for the PID in PRSTATUS. - * - * We go through all of this trouble because blindly trusting the PID - * could result in a stack trace for the wrong task, which we want to - * avoid at all costs. - * - * 2. There is an idle task with PID 0 for each CPU, so for an idle task - * we have no choice but to find the note by CPU. - */ struct drgn_error *(*linux_kernel_set_initial_registers)(Dwfl_Thread *, - const struct drgn_object *, - const void *prstatus, - size_t prstatus_size); + const struct drgn_object *); struct drgn_error *(*linux_kernel_get_page_offset)(struct drgn_program *, uint64_t *); struct drgn_error *(*linux_kernel_get_vmemmap)(struct drgn_program *, diff --git a/libdrgn/program.c b/libdrgn/program.c index 72fbfe832..bbd45f369 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -738,33 +738,37 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, return err; } +static uint32_t get_prstatus_pid(struct drgn_program *prog, const char *data, + size_t size) +{ + uint32_t pr_pid; + memcpy(&pr_pid, data + (drgn_program_is_64_bit(prog) ? 32 : 24), + sizeof(pr_pid)); + if (drgn_program_bswap(prog)) + pr_pid = bswap_32(pr_pid); + return pr_pid; +} + struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, - char *data, size_t size) + const char *data, + size_t size) { + if (size < (drgn_program_is_64_bit(prog) ? 36 : 28)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "NT_PRSTATUS is truncated"); + } if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - struct string *entry; - - entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector); + struct string *entry = + drgn_prstatus_vector_append_entry(&prog->prstatus_vector); if (!entry) return &drgn_enomem; entry->str = data; entry->len = size; } else { - struct drgn_prstatus_map_entry entry; - size_t pr_pid_offset; - uint32_t pr_pid; - - pr_pid_offset = drgn_program_is_64_bit(prog) ? 32 : 24; - if (size < pr_pid_offset + sizeof(pr_pid)) - return NULL; - - memcpy(&pr_pid, data + pr_pid_offset, sizeof(pr_pid)); - if (drgn_program_bswap(prog)) - pr_pid = bswap_32(pr_pid); - - entry.key = pr_pid; - entry.value.str = data; - entry.value.len = size; + struct drgn_prstatus_map_entry entry = { + .key = get_prstatus_pid(prog, data, size), + .value = { data, size }, + }; if (drgn_prstatus_map_insert(&prog->prstatus_map, &entry, NULL) == -1) return &drgn_enomem; @@ -856,7 +860,8 @@ static struct drgn_error *drgn_program_cache_prstatus(struct drgn_program *prog) struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret) + struct string *ret, + uint32_t *tid_ret) { struct drgn_error *err; @@ -867,6 +872,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, if (cpu < prog->prstatus_vector.size) { *ret = prog->prstatus_vector.data[cpu]; + *tid_ret = get_prstatus_pid(prog, ret->str, ret->len); } else { ret->str = NULL; ret->len = 0; @@ -1023,13 +1029,9 @@ drgn_program_read_c_string(struct drgn_program *prog, uint64_t address, bool physical, size_t max_size, char **ret) { struct drgn_error *err; - struct char_vector str; - - char_vector_init(&str); + struct char_vector str = VECTOR_INIT; for (;;) { - char *c; - - c = char_vector_append_entry(&str); + char *c = char_vector_append_entry(&str); if (!c) { char_vector_deinit(&str); return &drgn_enomem; diff --git a/libdrgn/program.h b/libdrgn/program.h index f4db36c6c..7efbdcf23 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -190,10 +190,12 @@ struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret); * * @param[out] ret Returned note data. If not found, ret->str is set to * @c NULL and ret->len is set to zero. + * @param[out] tid_ret Returned thread ID of note. */ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret); + struct string *ret, + uint32_t *tid_ret); /** * Find the @c NT_PRSTATUS note for the given thread ID. @@ -214,7 +216,8 @@ struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, * @param[in] size Size of data in note. */ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, - char *data, size_t size); + const char *data, + size_t size); /* * Like @ref drgn_program_find_symbol_by_address(), but @p ret is already diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index bcd0d0959..479654551 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -426,17 +426,15 @@ static PyObject *Program_load_debug_info(Program *self, PyObject *args, static char *keywords[] = {"paths", "default", "main", NULL}; struct drgn_error *err; PyObject *paths_obj = Py_None; - struct path_arg_vector path_args; - const char **paths = NULL; int load_default = 0; int load_main = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Opp:load_debug_info", keywords, &paths_obj, &load_default, &load_main)) return NULL; - path_arg_vector_init(&path_args); + struct path_arg_vector path_args = VECTOR_INIT; + const char **paths = NULL; if (paths_obj != Py_None) { Py_ssize_t length_hint; PyObject *it, *item; diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index d3c344df8..4224e9a05 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -256,7 +256,6 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, /* First, try pt_regs. */ if (prog->stack_trace_obj) { bool is_pt_regs; - err = drgn_get_stack_trace_obj(&obj, prog, &is_pt_regs); if (err) goto out; @@ -275,8 +274,6 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, goto out; } } else if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - bool found; - err = drgn_program_find_object(prog, "init_pid_ns", NULL, DRGN_FIND_OBJECT_ANY, &tmp); if (err) @@ -287,6 +284,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, err = linux_helper_find_task(&obj, &tmp, prog->stack_trace_tid); if (err) goto out; + bool found; err = drgn_object_bool(&obj, &found); if (err) goto out; @@ -319,29 +317,62 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, } else { goto out; } - prstatus.str = NULL; - prstatus.len = 0; } else { + /* + * For kernel core dumps, we look up the PRSTATUS note + * by CPU rather than by PID. This is because there is + * an idle task with PID 0 for each CPU, so we must find + * the idle task by CPU. Rather than making PID 0 a + * special case, we handle all tasks this way. + */ union drgn_value value; - uint32_t cpu; - err = drgn_object_member_dereference(&tmp, &obj, "cpu"); if (!err) { err = drgn_object_read_integer(&tmp, &value); if (err) goto out; - cpu = value.uvalue; } else if (err->code == DRGN_ERROR_LOOKUP) { /* !SMP. Must be CPU 0. */ drgn_error_destroy(err); - cpu = 0; + value.uvalue = 0; } else { goto out; } - err = drgn_program_find_prstatus_by_cpu(prog, cpu, - &prstatus); + uint32_t prstatus_tid; + err = drgn_program_find_prstatus_by_cpu(prog, + value.uvalue, + &prstatus, + &prstatus_tid); if (err) goto out; + if (prstatus.str) { + /* + * The PRSTATUS note is for the CPU that the + * task is assigned to, but it is not + * necessarily for this task. Only use it if the + * PID matches. + * + * Note that this isn't perfect: the PID is + * populated by the kernel from "current" (the + * current task) via a non-maskable interrupt + * (NMI). During a context switch, the stack + * pointer and current are not updated + * atomically, so if the NMI arrives in the + * middle of a context switch, the stack pointer + * may not actually be that of current. + * Therefore, the stack pointer in PRSTATUS may + * not actually be for the PID in PRSTATUS. + * Unfortunately, we can't easily fix this. + */ + err = drgn_object_member_dereference(&tmp, &obj, "pid"); + if (err) + goto out; + err = drgn_object_read_integer(&tmp, &value); + if (err) + goto out; + if (prstatus_tid == value.uvalue) + goto prstatus; + } } if (!prog->platform.arch->linux_kernel_set_initial_registers) { err = drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, @@ -350,9 +381,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, goto out; } err = prog->platform.arch->linux_kernel_set_initial_registers(thread, - &obj, - prstatus.str, - prstatus.len); + &obj); } else { err = drgn_program_find_prstatus_by_tid(prog, prog->stack_trace_tid, @@ -363,6 +392,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread, err = drgn_error_create(DRGN_ERROR_LOOKUP, "thread not found"); goto out; } +prstatus: if (!prog->platform.arch->prstatus_set_initial_registers) { err = drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, "core dump stack unwinding is not supported for %s architecture", diff --git a/libdrgn/type.c b/libdrgn/type.c index f09f0b104..c27863a6a 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -713,12 +713,9 @@ static struct drgn_error *drgn_type_eq_impl(struct drgn_type *a, LIBDRGN_PUBLIC struct drgn_error *drgn_type_eq(struct drgn_type *a, struct drgn_type *b, bool *ret) { - struct drgn_error *err; - struct drgn_type_pair_set cache; + struct drgn_type_pair_set cache = HASH_TABLE_INIT; int depth = 0; - - drgn_type_pair_set_init(&cache); - err = drgn_type_eq_impl(a, b, &cache, &depth, ret); + struct drgn_error *err = drgn_type_eq_impl(a, b, &cache, &depth, ret); drgn_type_pair_set_deinit(&cache); return err; } diff --git a/libdrgn/vector.h b/libdrgn/vector.h index 3e3a96b0a..f8771715c 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -64,6 +64,8 @@ struct vector { * Initialize a @ref vector. * * The new vector is empty. + * + * @sa VECTOR_INIT */ void vector_init(struct vector *vector); @@ -230,6 +232,15 @@ static vector##_entry_type *vector##_pop(struct vector *vector) \ DEFINE_VECTOR_TYPE(vector, entry_type) \ DEFINE_VECTOR_FUNCTIONS(vector) +/** + * Empty vector initializer. + * + * This can be used to initialize a vector when declaring it. + * + * @sa vector_init() + */ +#define VECTOR_INIT { NULL } + /** @} */ #endif /* DRGN_VECTOR_H */ diff --git a/setup.py b/setup.py index 2b21b9183..15fc4a53d 100755 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def run(self): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = ["5.8", "5.7", "5.6", "5.5", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index 548b4c498..b0db70d06 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -4,6 +4,7 @@ import os import re import signal +import unittest from drgn.helpers.linux.pid import find_task from drgn.helpers.linux.sched import task_state_to_char @@ -40,6 +41,7 @@ def test_task_state_to_char(self): os.waitpid(pid, 0) + @unittest.skip("GCC 10 breaks THREAD_SIZE object finder") def test_thread_size(self): # As far as I can tell, there's no way to query this value from # userspace, so at least sanity check that it's a power-of-two multiple diff --git a/vmtest/README.rst b/vmtest/README.rst index f57c78f1c..b36ffc8d9 100644 --- a/vmtest/README.rst +++ b/vmtest/README.rst @@ -4,8 +4,8 @@ drgn VM Testing drgn has a significant amount of code (both core and in helpers) which is dependent on the Linux kernel version. This code is tested on multiple Linux kernel versions in a virtual machine. These tests can be run on all supported -kernels with ``python3 setup.py test -K``. This requires QEMU and zstd to be -installed. +kernels with ``python3 setup.py test -K``. This requires QEMU, BusyBox, and +zstd to be installed. Tests can also be run on specific kernels with ``-k``. This takes a comma-separated list of kernels which are either a wildcard pattern (e.g.,