diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h index fd4ac3164c686..187642257cc52 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -187,13 +187,17 @@ struct FunctionInfo { /// /// \param Addr The address to lookup. /// + /// \param MergedFuncsData A pointer to an optional DataExtractor that, if + /// non-null, will be set to the raw data of the MergedFunctionInfo, if + /// present. + /// /// \returns An LookupResult or an error describing the issue that was /// encountered during decoding. An error should only be returned if the /// address is not contained in the FunctionInfo or if the data is corrupted. - static llvm::Expected lookup(DataExtractor &Data, - const GsymReader &GR, - uint64_t FuncAddr, - uint64_t Addr); + static llvm::Expected + lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr, + uint64_t Addr, + std::optional *MergedFuncsData = nullptr); uint64_t startAddress() const { return Range.start(); } uint64_t endAddress() const { return Range.end(); } diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h index 3d532588a7023..ee7929ae850fd 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -127,10 +127,29 @@ class GsymReader { /// is much faster for lookups. /// /// \param Addr A virtual address from the orignal object file to lookup. + /// + /// \param MergedFuncsData A pointer to an optional DataExtractor that, if + /// non-null, will be set to the raw data of the MergedFunctionInfo, if + /// present. + /// /// \returns An expected LookupResult that contains only the information /// needed for the current address, or an error object that indicates reason /// for failing to lookup the address. - llvm::Expected lookup(uint64_t Addr) const; + llvm::Expected + lookup(uint64_t Addr, + std::optional *MergedFuncsData = nullptr) const; + + /// Lookup all merged functions for a given address. + /// + /// This function performs a lookup for the specified address and then + /// retrieves additional LookupResults from any merged functions associated + /// with the primary LookupResult. + /// + /// \param Addr The address to lookup. + /// + /// \returns A vector of LookupResult objects, where the first element is the + /// primary result, followed by results for any merged functions + llvm::Expected> lookupAll(uint64_t Addr) const; /// Get a string from the string table. /// diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h index b68f9b6098d9e..203fb13cada10 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h @@ -31,6 +31,18 @@ struct MergedFunctionsInfo { /// \returns A boolean indicating if this FunctionInfo is valid. bool isValid() { return !MergedFunctions.empty(); } + /// Get a vector of DataExtractor objects for the functions in this + /// MergedFunctionsInfo object. + /// + /// \param Data The binary stream to read the data from. This object must have + /// the data for the MergedFunctionsInfo object starting at offset zero. The + /// data can contain more data than needed. + /// + /// \returns An llvm::Expected containing a vector of DataExtractor objects on + /// success, or an error object if parsing fails. + static llvm::Expected> + getFuncsDataExtractors(DataExtractor &Data); + /// Decode an MergedFunctionsInfo object from a binary data stream. /// /// \param Data The binary stream to read the data from. This object must have diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index dd754c701f624..785a8da64abe4 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -235,10 +235,10 @@ llvm::Expected FunctionInfo::encode(FileWriter &Out, return FuncInfoOffset; } -llvm::Expected FunctionInfo::lookup(DataExtractor &Data, - const GsymReader &GR, - uint64_t FuncAddr, - uint64_t Addr) { +llvm::Expected +FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR, + uint64_t FuncAddr, uint64_t Addr, + std::optional *MergedFuncsData) { LookupResult LR; LR.LookupAddr = Addr; uint64_t Offset = 0; @@ -289,6 +289,12 @@ llvm::Expected FunctionInfo::lookup(DataExtractor &Data, return ExpectedLE.takeError(); break; + case InfoType::MergedFunctionsInfo: + // Store the merged functions data for later parsing, if needed. + if (MergedFuncsData) + *MergedFuncsData = InfoData; + break; + case InfoType::InlineInfo: // We will parse the inline info after our line table, but only if // we have a line entry. diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index fa5476db191ec..0a5bb7caaee8c 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -334,14 +334,52 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const { return ExpectedData.takeError(); } -llvm::Expected GsymReader::lookup(uint64_t Addr) const { +llvm::Expected +GsymReader::lookup(uint64_t Addr, + std::optional *MergedFunctionsData) const { uint64_t FuncStartAddr = 0; if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) - return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr); + return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr, + MergedFunctionsData); else return ExpectedData.takeError(); } +llvm::Expected> +GsymReader::lookupAll(uint64_t Addr) const { + std::vector Results; + std::optional MergedFunctionsData; + + // First perform a lookup to get the primary function info result. + auto MainResult = lookup(Addr, &MergedFunctionsData); + if (!MainResult) + return MainResult.takeError(); + + // Add the main result as the first entry. + Results.push_back(std::move(*MainResult)); + + // Now process any merged functions data that was found during the lookup. + if (MergedFunctionsData) { + // Get data extractors for each merged function. + auto ExpectedMergedFuncExtractors = + MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData); + if (!ExpectedMergedFuncExtractors) + return ExpectedMergedFuncExtractors.takeError(); + + // Process each merged function data. + for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) { + if (auto FI = FunctionInfo::lookup(MergedData, *this, + MainResult->FuncRange.start(), Addr)) { + Results.push_back(std::move(*FI)); + } else { + return FI.takeError(); + } + } + } + + return Results; +} + void GsymReader::dump(raw_ostream &OS) { const auto &Header = getHeader(); // Dump the GSYM header. diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp index 4efae2262271d..d2c28f38799d3 100644 --- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp @@ -35,22 +35,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const { llvm::Expected MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) { MergedFunctionsInfo MFI; + auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data); + + if (!FuncExtractorsOrError) + return FuncExtractorsOrError.takeError(); + + for (DataExtractor &FuncData : *FuncExtractorsOrError) { + llvm::Expected FI = FunctionInfo::decode(FuncData, BaseAddr); + if (!FI) + return FI.takeError(); + MFI.MergedFunctions.push_back(std::move(*FI)); + } + + return MFI; +} + +llvm::Expected> +MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) { + std::vector Results; uint64_t Offset = 0; + + // Ensure there is enough data to read the function count. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError( + std::errc::io_error, + "unable to read the function count at offset 0x%8.8" PRIx64, Offset); + uint32_t Count = Data.getU32(&Offset); for (uint32_t i = 0; i < Count; ++i) { + // Ensure there is enough data to read the function size. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError( + std::errc::io_error, + "unable to read size of function %u at offset 0x%8.8" PRIx64, i, + Offset); + uint32_t FnSize = Data.getU32(&Offset); - DataExtractor FnData(Data.getData().substr(Offset, FnSize), + + // Ensure there is enough data for the function content. + if (!Data.isValidOffsetForDataOfSize(Offset, FnSize)) + return createStringError( + std::errc::io_error, + "function data is truncated for function %u at offset 0x%8.8" PRIx64 + ", expected size %u", + i, Offset, FnSize); + + // Extract the function data. + Results.emplace_back(Data.getData().substr(Offset, FnSize), Data.isLittleEndian(), Data.getAddressSize()); - llvm::Expected FI = - FunctionInfo::decode(FnData, BaseAddr + Offset); - if (!FI) - return FI.takeError(); - MFI.MergedFunctions.push_back(std::move(*FI)); + Offset += FnSize; } - - return MFI; + return Results; } bool operator==(const MergedFunctionsInfo &LHS, diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml index 94a162c5f2120..bcd3d7847da45 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml @@ -64,6 +64,16 @@ # CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10 # CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6 +## Test the lookup functionality for merged functions: +# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s +# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s + +# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248: +# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5 + +# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5 --- !mach-o diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index d61b418d2d843..89cd3ce6fc413 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -17,7 +17,10 @@ defm convert : Eq<"convert", "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">; def merged_functions : - FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; + FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n" + "Without this option one function per unique address range will be emitted.\n" + "When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n" + "Without this option only one function will be displayed.">; def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">; defm callsites_yaml_file : Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index aed4ae7c615fd..e6562b9ebf404 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -98,7 +98,7 @@ static uint64_t SegmentSize; static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; -static bool StoreMergedFunctionInfo = false; +static bool UseMergedFunctions = false; static bool LoadDwarfCallSites = false; static std::string CallSiteYamlPath; @@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) { } LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); - StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); + UseMergedFunctions = Args.hasArg(OPT_merged_functions); if (Args.hasArg(OPT_callsites_yaml_file_EQ)) { CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ); @@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, // functions in the first FunctionInfo with that address range. Do this right // after loading the DWARF data so we don't have to deal with functions from // the symbol table. - if (StoreMergedFunctionInfo) + if (UseMergedFunctions) Gsym.prepareMergedFunctions(Out); // Get the UUID and convert symbol table to GSYM. @@ -508,24 +508,39 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) { } static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) { - if (auto Result = Gsym.lookup(Addr)) { - // If verbose is enabled dump the full function info for the address. - if (Verbose) { - if (auto FI = Gsym.getFunctionInfo(Addr)) { - OS << "FunctionInfo for " << HEX64(Addr) << ":\n"; - Gsym.dump(OS, *FI); - OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; + auto logError = [Addr, &OS](Error E) { + OS << HEX64(Addr) << ": "; + logAllUnhandledErrors(std::move(E), OS, "error: "); + }; + + if (UseMergedFunctions) { + if (auto Results = Gsym.lookupAll(Addr)) { + OS << "Found " << Results->size() << " functions at address " + << HEX64(Addr) << ":\n"; + for (size_t i = 0; i < Results->size(); ++i) { + OS << " " << Results->at(i); + + if (i != Results->size() - 1) + OS << "\n"; } } - OS << Result.get(); - } else { - if (Verbose) - OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; - OS << HEX64(Addr) << ": "; - logAllUnhandledErrors(Result.takeError(), OS, "error: "); + } else { /* UseMergedFunctions == false */ + if (auto Result = Gsym.lookup(Addr)) { + OS << Result.get(); + } else { + logError(Result.takeError()); + return; + } } - if (Verbose) + + if (Verbose) { + if (auto FI = Gsym.getFunctionInfo(Addr)) { + OS << "FunctionInfo for " << HEX64(Addr) << ":\n"; + Gsym.dump(OS, *FI); + OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; + } OS << "\n"; + } } int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {