-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Modify llvm-dwp to be able to emit string tables over 4GB without losing data #167457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fccf37f
98b0ee5
604b555
011c2c1
18707c5
c3b53af
6e65e72
d4c35f6
96d894c
433eb76
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -413,33 +413,52 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) { | |
| } | ||
|
|
||
| static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data, | ||
| DenseMap<uint64_t, uint32_t> &OffsetRemapping, | ||
| uint64_t &Offset, uint64_t &Size) { | ||
|
|
||
| DenseMap<uint64_t, uint64_t> &OffsetRemapping, | ||
| uint64_t &Offset, const uint64_t Size, | ||
| uint32_t OldOffsetSize, uint32_t NewOffsetSize) { | ||
| // Create a mask so we don't trigger a emitIntValue() assert below if the | ||
| // NewOffset is over 4GB. | ||
| const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX; | ||
| while (Offset < Size) { | ||
| auto OldOffset = Data.getU32(&Offset); | ||
| auto NewOffset = OffsetRemapping[OldOffset]; | ||
| Out.emitIntValue(NewOffset, 4); | ||
| const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize); | ||
| const uint64_t NewOffset = OffsetRemapping[OldOffset]; | ||
| // Truncate the string offset like the old llvm-dwp would have if we aren't | ||
| // promoting the .debug_str_offsets to DWARF64. | ||
| Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize); | ||
| } | ||
| } | ||
|
|
||
| void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, | ||
| MCSection *StrOffsetSection, | ||
| StringRef CurStrSection, | ||
| StringRef CurStrOffsetSection, uint16_t Version) { | ||
| void writeStringsAndOffsets( | ||
| MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection, | ||
| StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version, | ||
| SectionLengths &SectionLength, | ||
| const Dwarf64StrOffsetsPromotion StrOffsetsOptValue) { | ||
| // Could possibly produce an error or warning if one of these was non-null but | ||
| // the other was null. | ||
| if (CurStrSection.empty() || CurStrOffsetSection.empty()) | ||
| return; | ||
|
|
||
| DenseMap<uint64_t, uint32_t> OffsetRemapping; | ||
| DenseMap<uint64_t, uint64_t> OffsetRemapping; | ||
|
|
||
| DataExtractor Data(CurStrSection, true, 0); | ||
| uint64_t LocalOffset = 0; | ||
| uint64_t PrevOffset = 0; | ||
|
|
||
| // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can | ||
| // emit a DWARF64 .debug_str_offsets table for this compile unit. If the | ||
| // \a StrOffsetsOptValue argument is Dwarf64StrOffsetsPromotion::Always, then | ||
| // force the emission of DWARF64 .debug_str_offsets for testing. | ||
| uint32_t OldOffsetSize = 4; | ||
| uint32_t NewOffsetSize = | ||
| StrOffsetsOptValue == Dwarf64StrOffsetsPromotion::Always ? 8 : 4; | ||
| while (const char *S = Data.getCStr(&LocalOffset)) { | ||
| OffsetRemapping[PrevOffset] = | ||
| Strings.getOffset(S, LocalOffset - PrevOffset); | ||
| uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset); | ||
| OffsetRemapping[PrevOffset] = NewOffset; | ||
| // Only promote the .debug_str_offsets to DWARF64 if our setting allows it. | ||
| if (StrOffsetsOptValue != Dwarf64StrOffsetsPromotion::Disabled && | ||
| NewOffset > UINT32_MAX) { | ||
|
Comment on lines
+458
to
+459
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably have an error message here if 32->64 promotion is disabled and the offset is too large for 32 bits. But that can/should be in a separate patch (since we didn't have a diagnostic for it before, this patch isn't regressing anything) |
||
| NewOffsetSize = 8; | ||
| } | ||
| PrevOffset = LocalOffset; | ||
| } | ||
|
|
||
|
|
@@ -451,7 +470,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, | |
| uint64_t Size = CurStrOffsetSection.size(); | ||
| if (Version > 4) { | ||
| while (Offset < Size) { | ||
| uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); | ||
| const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); | ||
| assert(HeaderSize <= Size - Offset && | ||
| "StrOffsetSection size is less than its header"); | ||
|
|
||
|
|
@@ -461,16 +480,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, | |
| if (HeaderSize == 8) { | ||
| ContributionSize = Data.getU32(&HeaderLengthOffset); | ||
| } else if (HeaderSize == 16) { | ||
| OldOffsetSize = 8; | ||
| HeaderLengthOffset += 4; // skip the dwarf64 marker | ||
| ContributionSize = Data.getU64(&HeaderLengthOffset); | ||
| } | ||
| ContributionEnd = ContributionSize + HeaderLengthOffset; | ||
| Out.emitBytes(Data.getBytes(&Offset, HeaderSize)); | ||
| writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd); | ||
|
|
||
| StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize); | ||
| if (OldOffsetSize == 4 && NewOffsetSize == 8) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we could simpilfy this code by just emitting whatever the new offset section header is - there's probably not much perrformance to be saved by copying the old header in rather than emitting a new one. |
||
| // We had a DWARF32 .debug_str_offsets header, but we need to emit | ||
| // some string offsets that require 64 bit offsets on the .debug_str | ||
| // section. Emit the .debug_str_offsets header in DWARF64 format so we | ||
| // can emit string offsets that exceed UINT32_MAX without truncating | ||
| // the string offset. | ||
|
|
||
| // 2 bytes for DWARF version, 2 bytes pad. | ||
| const uint64_t VersionPadSize = 4; | ||
| const uint64_t NewLength = | ||
| (ContributionSize - VersionPadSize) * 2 + VersionPadSize; | ||
| // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64 | ||
| // value followed by the 8 byte updated length. | ||
| Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4); | ||
| Out.emitIntValue(NewLength, 8); | ||
| // Emit DWARF version as a 2 byte integer. | ||
| Out.emitIntValue(Version, 2); | ||
| // Emit 2 bytes of padding. | ||
| Out.emitIntValue(0, 2); | ||
| // Update the .debug_str_offsets section length contribution for the | ||
| // this .dwo file. | ||
| for (auto &Pair : SectionLength) { | ||
| if (Pair.first == DW_SECT_STR_OFFSETS) { | ||
| Pair.second = NewLength + 12; | ||
| break; | ||
| } | ||
| } | ||
| } else { | ||
| // Just emit the same .debug_str_offsets header. | ||
| Out.emitBytes(HeaderBytes); | ||
| } | ||
| writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd, | ||
| OldOffsetSize, NewOffsetSize); | ||
| } | ||
|
|
||
| } else { | ||
| writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size); | ||
| assert(OldOffsetSize == NewOffsetSize); | ||
| writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize, | ||
| NewOffsetSize); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -562,7 +617,7 @@ Error handleSection( | |
| std::vector<StringRef> &CurTypesSection, | ||
| std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection, | ||
| StringRef &CurCUIndexSection, StringRef &CurTUIndexSection, | ||
| std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) { | ||
| SectionLengths &SectionLength) { | ||
| if (Section.isBSS()) | ||
| return Error::success(); | ||
|
|
||
|
|
@@ -620,7 +675,8 @@ Error handleSection( | |
| } | ||
|
|
||
| Error write(MCStreamer &Out, ArrayRef<std::string> Inputs, | ||
| OnCuIndexOverflow OverflowOptValue) { | ||
| OnCuIndexOverflow OverflowOptValue, | ||
| Dwarf64StrOffsetsPromotion StrOffsetsOptValue) { | ||
| const auto &MCOFI = *Out.getContext().getObjectFileInfo(); | ||
| MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); | ||
| MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection(); | ||
|
|
@@ -684,7 +740,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs, | |
| // This maps each section contained in this file to its length. | ||
| // This information is later on used to calculate the contributions, | ||
| // i.e. offset and length, of each compile/type unit to a section. | ||
| std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength; | ||
| SectionLengths SectionLength; | ||
|
|
||
| for (const auto &Section : Obj.sections()) | ||
| if (auto Err = handleSection( | ||
|
|
@@ -713,7 +769,8 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs, | |
| } | ||
|
|
||
| writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, | ||
| CurStrOffsetSection, Header.Version); | ||
| CurStrOffsetSection, Header.Version, SectionLength, | ||
| StrOffsetsOptValue); | ||
|
|
||
| for (auto Pair : SectionLength) { | ||
| auto Index = getContributionIndex(Pair.first, IndexVersion); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # This test tests that llvm-dwp can successfully promote .debug_str_offsets to | ||
| # DWARF64. We do this by using a hidden option to llvm-dwp which is | ||
| # "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can | ||
| # successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64 | ||
| # version. This allows us to test the functionality without having to create a | ||
| # 4GB .dwo file. | ||
|
|
||
| # RUN: yaml2obj %s -o %t.dwo | ||
| # RUN: llvm-dwp %t.dwo -o %t.dwp | ||
| # RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets-promotion | ||
| # RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets-promotion=disabled | ||
| # RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets-promotion=enabled | ||
| # RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets-promotion=always | ||
| # RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets-promotion=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s | ||
| # RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s | ||
| # RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s | ||
| # RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s | ||
| # RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s | ||
| # RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s | ||
|
|
||
| # DWARF32: .debug_str_offsets.dwo contents: | ||
| # DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5 | ||
| # DWARF32-NEXT: 0x00000008: 00000000 "main" | ||
| # DWARF32-NEXT: 0x0000000c: 00000005 "int" | ||
| # DWARF32-NEXT: 0x00000010: 00000009 "argc" | ||
| # DWARF32-NEXT: 0x00000014: 0000000e "argv" | ||
| # DWARF32-NEXT: 0x00000018: 00000013 "char" | ||
| # DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" | ||
| # DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp" | ||
| # DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo" | ||
|
|
||
| # DWARF64: .debug_str_offsets.dwo contents: | ||
| # DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5 | ||
| # DWARF64-NEXT: 0x00000010: 0000000000000000 "main" | ||
| # DWARF64-NEXT: 0x00000018: 0000000000000005 "int" | ||
| # DWARF64-NEXT: 0x00000020: 0000000000000009 "argc" | ||
| # DWARF64-NEXT: 0x00000028: 000000000000000e "argv" | ||
| # DWARF64-NEXT: 0x00000030: 0000000000000013 "char" | ||
| # DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" | ||
| # DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp" | ||
| # DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo" | ||
|
|
||
| # ERROR: invalid value for --dwarf64-str-offsets-promotion. Valid values are one of: "enabled", "disabled" or "always". | ||
|
|
||
| --- !ELF | ||
| FileHeader: | ||
| Class: ELFCLASS64 | ||
| Data: ELFDATA2LSB | ||
| Type: ET_REL | ||
| Machine: EM_X86_64 | ||
| SectionHeaderStringTable: .strtab | ||
| Sections: | ||
| - Name: .debug_str_offsets.dwo | ||
| Type: SHT_PROGBITS | ||
| Flags: [ SHF_EXCLUDE ] | ||
| AddressAlign: 0x1 | ||
| Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000' | ||
| - Name: .debug_str.dwo | ||
| Type: SHT_PROGBITS | ||
| Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] | ||
| AddressAlign: 0x1 | ||
| EntSize: 0x1 | ||
| Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00 | ||
| - Name: .debug_info.dwo | ||
| Type: SHT_PROGBITS | ||
| Flags: [ SHF_EXCLUDE ] | ||
| AddressAlign: 0x1 | ||
| Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100 | ||
| - Name: .debug_abbrev.dwo | ||
| Type: SHT_PROGBITS | ||
| Flags: [ SHF_EXCLUDE ] | ||
| AddressAlign: 0x1 | ||
| Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000 | ||
| - Type: SectionHeaderTable | ||
| Sections: | ||
| - Name: .strtab | ||
| - Name: .debug_str_offsets.dwo | ||
| - Name: .debug_str.dwo | ||
| - Name: .debug_info.dwo | ||
| - Name: .debug_abbrev.dwo | ||
| ... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could be an enum class