Skip to content

Commit a243106

Browse files
committed
Modify llvm-dwp to be able to emit string tables over 4GB without losing data.
We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers.
1 parent 92e2404 commit a243106

File tree

6 files changed

+89
-27
lines changed

6 files changed

+89
-27
lines changed

lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,15 +360,21 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
360360
const DWARFDataExtractor &strOffsets =
361361
GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
362362
uint64_t length = strOffsets.GetU32(&baseOffset);
363-
if (length == 0xffffffff)
363+
if (length == 0xffffffff) {
364364
length = strOffsets.GetU64(&baseOffset);
365+
m_str_offsets_size = 8;
366+
}
365367

366368
// Check version.
367369
if (strOffsets.GetU16(&baseOffset) < 5)
368370
return;
369371

370372
// Skip padding.
371373
baseOffset += 2;
374+
} else {
375+
// Size of offset for .debug_str_offsets is same as DWARF offset byte size
376+
// of the DWARFUnit for DWARF version 4 and earlier.
377+
m_str_offsets_size = m_header.getDwarfOffsetByteSize();
372378
}
373379

374380
SetStrOffsetsBase(baseOffset);
@@ -1079,10 +1085,9 @@ uint32_t DWARFUnit::GetHeaderByteSize() const { return m_header.getSize(); }
10791085

10801086
std::optional<uint64_t>
10811087
DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
1082-
lldb::offset_t offset =
1083-
GetStrOffsetsBase() + index * m_header.getDwarfOffsetByteSize();
1088+
lldb::offset_t offset = GetStrOffsetsBase() + index * m_str_offsets_size;
10841089
return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetMaxU64(
1085-
&offset, m_header.getDwarfOffsetByteSize());
1090+
&offset, m_str_offsets_size);
10861091
}
10871092

10881093
llvm::Expected<llvm::DWARFAddressRangesVector>

lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ class DWARFUnit : public DWARFExpression::Delegate, public UserID {
364364
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
365365

366366
dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
367-
367+
dw_offset_t m_str_offsets_size = 4; // Size in bytes of the string offsets.
368368
std::optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
369369
bool m_rnglist_table_done = false;
370370
std::optional<llvm::DWARFListTableHeader> m_loclist_table_header;

llvm/include/llvm/DWP/DWP.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ struct CompileUnitIdentifiers {
7070
LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
7171
OnCuIndexOverflow OverflowOptValue);
7272

73+
typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
74+
7375
LLVM_ABI Error handleSection(
7476
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
7577
const MCSection *StrSection, const MCSection *StrOffsetSection,
@@ -82,7 +84,7 @@ LLVM_ABI Error handleSection(
8284
std::vector<StringRef> &CurTypesSection,
8385
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
8486
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
85-
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
87+
SectionLengths &SectionLength);
8688

8789
LLVM_ABI Expected<InfoSectionUnitHeader>
8890
parseInfoSectionUnitHeader(StringRef Info);

llvm/include/llvm/DWP/DWPStringPool.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ class DWPStringPool {
3232

3333
MCStreamer &Out;
3434
MCSection *Sec;
35-
DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
36-
uint32_t Offset = 0;
35+
DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
36+
uint64_t Offset = 0;
3737

3838
public:
3939
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
4040

41-
uint32_t getOffset(const char *Str, unsigned Length) {
41+
uint64_t getOffset(const char *Str, unsigned Length) {
4242
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
4343

4444
auto Pair = Pool.insert(std::make_pair(Str, Offset));

llvm/lib/DWP/DWP.cpp

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -413,33 +413,43 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
413413
}
414414

415415
static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
416-
DenseMap<uint64_t, uint32_t> &OffsetRemapping,
417-
uint64_t &Offset, uint64_t &Size) {
416+
DenseMap<uint64_t, uint64_t> &OffsetRemapping,
417+
uint64_t &Offset, const uint64_t Size,
418+
uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
418419

419420
while (Offset < Size) {
420-
auto OldOffset = Data.getU32(&Offset);
421-
auto NewOffset = OffsetRemapping[OldOffset];
422-
Out.emitIntValue(NewOffset, 4);
421+
const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
422+
const uint64_t NewOffset = OffsetRemapping[OldOffset];
423+
assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX);
424+
Out.emitIntValue(NewOffset, NewOffsetSize);
423425
}
424426
}
425427

426428
void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
427429
MCSection *StrOffsetSection,
428430
StringRef CurStrSection,
429-
StringRef CurStrOffsetSection, uint16_t Version) {
431+
StringRef CurStrOffsetSection, uint16_t Version,
432+
SectionLengths &SectionLength) {
430433
// Could possibly produce an error or warning if one of these was non-null but
431434
// the other was null.
432435
if (CurStrSection.empty() || CurStrOffsetSection.empty())
433436
return;
434437

435-
DenseMap<uint64_t, uint32_t> OffsetRemapping;
438+
DenseMap<uint64_t, uint64_t> OffsetRemapping;
436439

437440
DataExtractor Data(CurStrSection, true, 0);
438441
uint64_t LocalOffset = 0;
439442
uint64_t PrevOffset = 0;
443+
444+
// Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
445+
// emit a DWARF64 .debug_str_offsets table for this compile unit.
446+
uint32_t OldOffsetSize = 4;
447+
uint32_t NewOffsetSize = 4;
440448
while (const char *S = Data.getCStr(&LocalOffset)) {
441-
OffsetRemapping[PrevOffset] =
442-
Strings.getOffset(S, LocalOffset - PrevOffset);
449+
uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
450+
OffsetRemapping[PrevOffset] = NewOffset;
451+
if (NewOffset > UINT32_MAX)
452+
NewOffsetSize = 8;
443453
PrevOffset = LocalOffset;
444454
}
445455

@@ -451,7 +461,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
451461
uint64_t Size = CurStrOffsetSection.size();
452462
if (Version > 4) {
453463
while (Offset < Size) {
454-
uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
464+
const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
455465
assert(HeaderSize <= Size - Offset &&
456466
"StrOffsetSection size is less than its header");
457467

@@ -461,16 +471,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
461471
if (HeaderSize == 8) {
462472
ContributionSize = Data.getU32(&HeaderLengthOffset);
463473
} else if (HeaderSize == 16) {
474+
OldOffsetSize = 8;
464475
HeaderLengthOffset += 4; // skip the dwarf64 marker
465476
ContributionSize = Data.getU64(&HeaderLengthOffset);
466477
}
467478
ContributionEnd = ContributionSize + HeaderLengthOffset;
468-
Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
469-
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
479+
480+
StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
481+
if (OldOffsetSize == 4 && NewOffsetSize == 8) {
482+
// We had a DWARF32 .debug_str_offsets header, but we need to emit
483+
// some string offsets that require 64 bit offsets on the .debug_str
484+
// section. Emit the .debug_str_offsets header in DWARF64 format so we
485+
// can emit string offsets that exceed UINT32_MAX without truncating
486+
// the string offset.
487+
488+
// 2 bytes for DWARF version, 2 bytes pad.
489+
const uint64_t VersionPadSize = 4;
490+
const uint64_t NewLength =
491+
(ContributionSize - VersionPadSize) * 2 + VersionPadSize;
492+
// Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
493+
// value followed by the 8 byte updated length.
494+
Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
495+
Out.emitIntValue(NewLength, 8);
496+
// Emit DWARF version as a 2 byte integer.
497+
Out.emitIntValue(Version, 2);
498+
// Emit 2 bytes of padding.
499+
Out.emitIntValue(0, 2);
500+
// Update the .debug_str_offsets section length contribution for the
501+
// this .dwo file.
502+
for (auto &Pair : SectionLength) {
503+
if (Pair.first == DW_SECT_STR_OFFSETS) {
504+
Pair.second = NewLength + 12;
505+
break;
506+
}
507+
}
508+
} else {
509+
// Just emit the same .debug_str_offsets header.
510+
Out.emitBytes(HeaderBytes);
511+
}
512+
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
513+
OldOffsetSize, NewOffsetSize);
470514
}
471515

472516
} else {
473-
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
517+
assert(OldOffsetSize == NewOffsetSize);
518+
writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
519+
NewOffsetSize);
474520
}
475521
}
476522

@@ -562,7 +608,7 @@ Error handleSection(
562608
std::vector<StringRef> &CurTypesSection,
563609
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
564610
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
565-
std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
611+
SectionLengths &SectionLength) {
566612
if (Section.isBSS())
567613
return Error::success();
568614

@@ -684,7 +730,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
684730
// This maps each section contained in this file to its length.
685731
// This information is later on used to calculate the contributions,
686732
// i.e. offset and length, of each compile/type unit to a section.
687-
std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
733+
SectionLengths SectionLength;
688734

689735
for (const auto &Section : Obj.sections())
690736
if (auto Err = handleSection(
@@ -713,7 +759,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
713759
}
714760

715761
writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
716-
CurStrOffsetSection, Header.Version);
762+
CurStrOffsetSection, Header.Version, SectionLength);
717763

718764
for (auto Pair : SectionLength) {
719765
auto Index = getContributionIndex(Pair.first, IndexVersion);

llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,9 +1187,18 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA) {
11871187
if (getVersion() >= 5) {
11881188
if (DA.getData().data() == nullptr)
11891189
return std::nullopt;
1190-
Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
1190+
// For .dwo files, the section contribution for the .debug_str_offsets
1191+
// points to the string offsets table header. Decode the format from this
1192+
// data as llvm-dwp has been modified to be able to emit a
1193+
// .debug_str_offsets table as DWARF64 even if the compile unit is DWARF32.
1194+
// This allows .dwp files to have string tables that exceed UINT32_MAX in
1195+
// size.
1196+
uint64_t Length = 0;
1197+
DwarfFormat Format = dwarf::DwarfFormat::DWARF32;
1198+
std::tie(Length, Format) = DA.getInitialLength(&Offset);
1199+
Offset += 4; // Skip the DWARF version uint16_t and the uint16_t padding.
11911200
// Look for a valid contribution at the given offset.
1192-
auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
1201+
auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Format, Offset);
11931202
if (!DescOrError)
11941203
return DescOrError.takeError();
11951204
return *DescOrError;

0 commit comments

Comments
 (0)