diff --git a/ddprof-lib/src/main/cpp/codeCache.cpp b/ddprof-lib/src/main/cpp/codeCache.cpp index 408d92f2..6f88d9ac 100644 --- a/ddprof-lib/src/main/cpp/codeCache.cpp +++ b/ddprof-lib/src/main/cpp/codeCache.cpp @@ -22,12 +22,14 @@ char *NativeFunc::create(const char *name, short lib_index) { void NativeFunc::destroy(char *name) { free(from(name)); } CodeCache::CodeCache(const char *name, short lib_index, bool imports_patchable, - const void *min_address, const void *max_address) { + const void *min_address, const void *max_address, + const char* image_base) { _name = NativeFunc::create(name, -1); _lib_index = lib_index; _min_address = min_address; _max_address = max_address; _text_base = NULL; + _image_base = image_base; _plt_offset = 0; _plt_size = 0; diff --git a/ddprof-lib/src/main/cpp/codeCache.h b/ddprof-lib/src/main/cpp/codeCache.h index 2e595f76..093c2246 100644 --- a/ddprof-lib/src/main/cpp/codeCache.h +++ b/ddprof-lib/src/main/cpp/codeCache.h @@ -109,6 +109,7 @@ class CodeCache { const void *_min_address; const void *_max_address; const char *_text_base; + const char* _image_base; unsigned int _plt_offset; unsigned int _plt_size; @@ -132,7 +133,8 @@ class CodeCache { explicit CodeCache(const char *name, short lib_index = -1, bool imports_patchable = false, const void *min_address = NO_MIN_ADDRESS, - const void *max_address = NO_MAX_ADDRESS); + const void *max_address = NO_MAX_ADDRESS, + const char* image_base = NULL); // Copy constructor CodeCache(const CodeCache &other); // Copy assignment operator @@ -148,6 +150,8 @@ class CodeCache { const void *maxAddress() const { return _max_address; } + const char* imageBase() const { return _image_base; } + bool contains(const void *address) const { return address >= _min_address && address < _max_address; } diff --git a/ddprof-lib/src/main/cpp/symbols.h b/ddprof-lib/src/main/cpp/symbols.h index bf073831..749a2123 100644 --- a/ddprof-lib/src/main/cpp/symbols.h +++ b/ddprof-lib/src/main/cpp/symbols.h @@ -24,6 +24,7 @@ class Symbols { private: static Mutex _parse_lock; static bool _have_kernel_symbols; + static bool _libs_limit_reported; public: static void parseKernelSymbols(CodeCache *cc); @@ -37,4 +38,18 @@ class Symbols { static bool isRootSymbol(const void* address); }; +class UnloadProtection { + private: + void* _lib_handle; + bool _valid; + + public: + UnloadProtection(const CodeCache *cc); + ~UnloadProtection(); + + UnloadProtection& operator=(const UnloadProtection& other) = delete; + + bool isValid() const { return _valid; } +}; + #endif // _SYMBOLS_H diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp index 4e250e60..e27aebae 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.cpp +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -12,18 +12,21 @@ #include "log.h" #include "safeAccess.h" #include "symbols.h" +#include #include #include #include #include #include -#include #include #include #include +#include #include #include #include +#include +#include // make sure lseek will use 64 bits offset #define _FILE_OFFSET_BITS 64 @@ -481,13 +484,19 @@ void ElfParser::addRelocationSymbols(ElfSection *reltab, const char *plt) { } } +struct SharedLibrary { + char* file; + const char* map_start; + const char* map_end; + const char* image_base; +}; + Mutex Symbols::_parse_lock; bool Symbols::_have_kernel_symbols = false; -static std::set _parsed_libraries; -static std::set _parsed_inodes; +bool Symbols::_libs_limit_reported = false; +static std::unordered_set _parsed_inodes; void Symbols::clearParsingCaches() { - _parsed_libraries.clear(); _parsed_inodes.clear(); } @@ -531,22 +540,20 @@ void Symbols::parseKernelSymbols(CodeCache *cc) { fclose(f); } -static int parseLibrariesCallback(struct dl_phdr_info *info, size_t size, - void *data) { +static void collectSharedLibraries(std::unordered_map& libs, int max_count) { FILE *f = fopen("/proc/self/maps", "r"); if (f == NULL) { - return 1; + return; } - CodeCacheArray *array = (CodeCacheArray *)data; const char *image_base = NULL; u64 last_inode = 0; char *str = NULL; size_t str_size = 0; ssize_t len; - while ((len = getline(&str, &str_size, f)) > 0) { + while (max_count > 0 && (len = getline(&str, &str_size, f)) > 0) { str[len - 1] = 0; MemoryMapDesc map(str); if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { @@ -557,63 +564,46 @@ static int parseLibrariesCallback(struct dl_phdr_info *info, size_t size, continue; } - const char *map_start = map.addr(); - unsigned long map_offs = map.offs(); - - if (map_offs == 0) { - image_base = map_start; - last_inode = u64(map.dev()) << 32 | map.inode(); + u64 inode = u64(map.dev()) << 32 | map.inode(); + if (_parsed_inodes.find(inode) != _parsed_inodes.end()) { + continue; // shared object is already parsed } - - if (!map.isExecutable() || !_parsed_libraries.insert(map_start).second) { - // Not an executable segment or it has been already parsed - continue; + if (inode == 0 && strcmp(map.file(), "[vdso]") != 0) { + continue; // all shared libraries have inode, except vDSO } - int count = array->count(); - if (count >= MAX_NATIVE_LIBS) { - break; + const char* map_start = map.addr(); + const char *map_end = map.end(); + if (inode != last_inode && map.offs() == 0) { + image_base = map_start; + last_inode = inode; } - const char *map_end = map.end(); - // Do not try to parse pseudofiles like anon_inode:name, /memfd:name - if (strchr(map.file(), ':') == NULL) { - CodeCache *cc = new CodeCache(map.file(), count, false, map_start, map_end); - TEST_LOG("Procesing library: %s", map.file()); - u64 inode = u64(map.dev()) << 32 | map.inode(); - if (inode != 0) { - // Do not parse the same executable twice, e.g. on Alpine Linux - if (_parsed_inodes.insert(inode).second) { - if (inode == last_inode) { - // If last_inode is set, image_base is known to be valid and - // readable - ElfParser::parseFile(cc, image_base, map.file(), true); - // Parse program headers after the file to ensure debug symbols are - // parsed first - ElfParser::parseProgramHeaders(cc, image_base, map_end, MUSL); - } else if ((unsigned long)map_start > map_offs) { - // Unlikely case when image_base has not been found. - // Be careful: executable file is not always ELF, e.g. classes.jsa - ElfParser::parseFile(cc, map_start - map_offs, map.file(), true); - } + if (map.isExecutable()) { + SharedLibrary& lib = libs[inode]; + if (lib.file == nullptr) { + lib.file = strdup(map.file()); + lib.map_start = map_start; + lib.map_end = map_end; + lib.image_base = inode == last_inode ? image_base : NULL; + max_count--; + } else { + // The same library may have multiple executable segments mapped + lib.map_end = map_end; } - } else if (strcmp(map.file(), "[vdso]") == 0) { - ElfParser::parseProgramHeaders(cc, map_start, map_end, true); - } - - cc->sort(); - array->add(cc); } } - free(str); fclose(f); - return 1; // stop at first iteration } void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { MutexLocker ml(_parse_lock); + if (array->count() >= MAX_NATIVE_LIBS) { + return; + } + if (kernel_symbols && !haveKernelSymbols()) { CodeCache *cc = new CodeCache("[kernel]"); parseKernelSymbols(cc); @@ -625,12 +615,48 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { delete cc; } } + std::unordered_map libs; + collectSharedLibraries(libs, MAX_NATIVE_LIBS - array->count()); + + for (auto& it : libs) { + u64 inode = it.first; + _parsed_inodes.insert(inode); + + SharedLibrary& lib = it.second; + CodeCache* cc = new CodeCache(lib.file, array->count(), false, lib.map_start, lib.map_end, lib.image_base); + + // Strip " (deleted)" suffix so that removed library can be reopened + size_t len = strlen(lib.file); + if (len > 10 && strcmp(lib.file + len - 10, " (deleted)") == 0) { + lib.file[len - 10] = 0; + } + + if (strcmp(lib.file, "[vdso]") == 0) { + ElfParser::parseProgramHeaders(cc, lib.map_start, lib.map_end, true); + } else if (lib.image_base == NULL) { + // Unlikely case when image base has not been found: not safe to access program headers. + // Be careful: executable file is not always ELF, e.g. classes.jsa + ElfParser::parseFile(cc, lib.map_start, lib.file, true); + } else { + // Parse debug symbols first + ElfParser::parseFile(cc, lib.image_base, lib.file, true); + + UnloadProtection handle(cc); + if (handle.isValid()) { + ElfParser::parseProgramHeaders(cc, lib.image_base, lib.map_end, MUSL); + } + } + + free(lib.file); - // In glibc, dl_iterate_phdr() holds dl_load_write_lock, therefore preventing - // concurrent loading and unloading of shared libraries. - // Without it, we may access memory of a library that is being unloaded. - dl_iterate_phdr(parseLibrariesCallback, array); - TEST_LOG("Parsed %d libraries", array->count()); + cc->sort(); + array->add(cc); + } + + if (array->count() >= MAX_NATIVE_LIBS && !_libs_limit_reported) { + Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS); + _libs_limit_reported = true; + } } bool Symbols::isRootSymbol(const void* address) { @@ -642,4 +668,64 @@ bool Symbols::isRootSymbol(const void* address) { return false; } +// Check that the base address of the shared object has not changed +static bool verifyBaseAddress(const CodeCache* cc, void* lib_handle) { + Dl_info dl_info; + struct link_map* map; + + if (dlinfo(lib_handle, RTLD_DI_LINKMAP, &map) != 0 || dladdr(map->l_ld, &dl_info) == 0) { + return false; + } + + return cc->imageBase() == (const char*)dl_info.dli_fbase; +} + +static const void* getMainPhdr() { + void* main_phdr = NULL; + dl_iterate_phdr([](struct dl_phdr_info* info, size_t size, void* data) { + *(const void**)data = info->dlpi_phdr; + return 1; + }, &main_phdr); + return main_phdr; +} + +static const void* _main_phdr = getMainPhdr(); +static const char* _ld_base = (const char*)getauxval(AT_BASE); + +static bool isMainExecutable(const char* image_base, const void* map_end) { + return _main_phdr != NULL && _main_phdr >= image_base && _main_phdr < map_end; +} + +static bool isLoader(const char* image_base) { + return _ld_base == image_base; +} + +UnloadProtection::UnloadProtection(const CodeCache *cc) { + if (MUSL || isMainExecutable(cc->imageBase(), cc->maxAddress()) || isLoader(cc->imageBase())) { + _lib_handle = NULL; + _valid = true; + return; + } + + // dlopen() can reopen previously loaded libraries even if the underlying file has been deleted + const char* stripped_name = cc->name(); + size_t name_len = strlen(stripped_name); + if (name_len > 10 && strcmp(stripped_name + name_len - 10, " (deleted)") == 0) { + char* buf = (char*) alloca(name_len - 9); + *stpncpy(buf, stripped_name, name_len - 10) = 0; + stripped_name = buf; + } + + // Protect library from unloading while parsing in-memory ELF program headers. + // Also, dlopen() ensures the library is fully loaded. + _lib_handle = dlopen(stripped_name, RTLD_LAZY | RTLD_NOLOAD); + _valid = _lib_handle != NULL && verifyBaseAddress(cc, _lib_handle); +} + +UnloadProtection::~UnloadProtection() { + if (_lib_handle != NULL) { + dlclose(_lib_handle); + } +} + #endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/symbols_macos.cpp b/ddprof-lib/src/main/cpp/symbols_macos.cpp index 6b458f83..3e6f6d3d 100644 --- a/ddprof-lib/src/main/cpp/symbols_macos.cpp +++ b/ddprof-lib/src/main/cpp/symbols_macos.cpp @@ -22,9 +22,22 @@ #include #include #include -#include +#include #include +UnloadProtection::UnloadProtection(const CodeCache *cc) { + // Protect library from unloading while parsing in-memory ELF program headers. + // Also, dlopen() ensures the library is fully loaded. + _lib_handle = dlopen(cc->name(), RTLD_LAZY | RTLD_NOLOAD); + _valid = _lib_handle != NULL; +} + +UnloadProtection::~UnloadProtection() { + if (_lib_handle != NULL) { + dlclose(_lib_handle); + } +} + class MachOParser { private: CodeCache *_cc; @@ -140,7 +153,8 @@ class MachOParser { Mutex Symbols::_parse_lock; bool Symbols::_have_kernel_symbols = false; -static std::set _parsed_libraries; +bool Symbols::_libs_limit_reported = false; +static std::unordered_set _parsed_libraries; void Symbols::clearParsingCaches() { _parsed_libraries.clear(); } void Symbols::parseKernelSymbols(CodeCache *cc) {} @@ -157,26 +171,28 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { int count = array->count(); if (count >= MAX_NATIVE_LIBS) { + if (!_libs_limit_reported) { + Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS); + _libs_limit_reported = true; + } break; } const char *path = _dyld_get_image_name(i); - // Protect the library from unloading while parsing symbols - void *handle = dlopen(path, RTLD_LAZY | RTLD_NOLOAD); - if (handle == NULL) { - continue; - } - CodeCache *cc = new CodeCache(path, count, true); - MachOParser parser(cc, image_base); - if (!parser.parse()) { - Log::warn("Could not parse symbols from %s", path); - } - dlclose(handle); - cc->sort(); - array->add(cc); + UnloadProtection handle(cc); + if (handle.isValid()) { + MachOParser parser(cc, image_base); + if (!parser.parse()) { + Log::warn("Could not parse symbols from %s", path); + } + cc->sort(); + array->add(cc); + } else { + delete cc; + } } }