From 4204ca07e85470fa9169025f1c7a8172f4bb03c9 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Fri, 23 May 2025 12:36:07 +0800 Subject: [PATCH 1/7] [lld][COFF] Remove duplicate strtab entries String table size is too big for large binary when symbol table is enabled. Some strings in strtab is same so it can be reused. --- lld/COFF/Writer.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index db6133e20a037..6ec83a5f77e5a 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -285,6 +285,7 @@ class Writer { std::unique_ptr &buffer; std::map partialSections; std::vector strtab; + StringMap strtabMap; std::vector outputSymtab; std::vector codeMap; IdataContents idata; @@ -1439,10 +1440,13 @@ void Writer::assignOutputSectionIndices() { size_t Writer::addEntryToStringTable(StringRef str) { assert(str.size() > COFF::NameSize); - size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field - strtab.insert(strtab.end(), str.begin(), str.end()); - strtab.push_back('\0'); - return offsetOfEntry; + size_t newOffsetOfEntry = strtab.size() + 4; // +4 for the size field + auto res = strtabMap.try_emplace(str, newOffsetOfEntry); + if (res.second) { + strtab.insert(strtab.end(), str.begin(), str.end()); + strtab.push_back('\0'); + } + return res.first->getValue(); } std::optional Writer::createSymbol(Defined *def) { From a4a6c910a7578a895ed0c84ab588b7af49e97fbc Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 27 May 2025 14:15:27 +0800 Subject: [PATCH 2/7] Using prioritized StringTableBuilder --- lld/COFF/Writer.cpp | 53 +++++++++++++---------- llvm/include/llvm/MC/StringTableBuilder.h | 17 +++++--- llvm/lib/MC/StringTableBuilder.cpp | 37 ++++++++++++++-- 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 6ec83a5f77e5a..2807ab53d9aa1 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" @@ -204,7 +205,8 @@ struct ChunkRange { class Writer { public: Writer(COFFLinkerContext &c) - : buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {} + : buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF), + delayIdata(c), ctx(c) {} void run(); private: @@ -284,8 +286,7 @@ class Writer { std::unique_ptr &buffer; std::map partialSections; - std::vector strtab; - StringMap strtabMap; + StringTableBuilder strtab; std::vector outputSymtab; std::vector codeMap; IdataContents idata; @@ -1438,17 +1439,6 @@ void Writer::assignOutputSectionIndices() { sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } -size_t Writer::addEntryToStringTable(StringRef str) { - assert(str.size() > COFF::NameSize); - size_t newOffsetOfEntry = strtab.size() + 4; // +4 for the size field - auto res = strtabMap.try_emplace(str, newOffsetOfEntry); - if (res.second) { - strtab.insert(strtab.end(), str.begin(), str.end()); - strtab.push_back('\0'); - } - return res.first->getValue(); -} - std::optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { @@ -1489,7 +1479,8 @@ std::optional Writer::createSymbol(Defined *def) { StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; - sym.Name.Offset.Offset = addEntryToStringTable(name); + sym.Name.Offset.Offset = 0; // Filled in later. + strtab.add(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); @@ -1521,6 +1512,7 @@ void Writer::createSymbolAndStringTable() { // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. + SmallVector longNameSections; for (OutputSection *sec : ctx.outputSections) { if (sec->name.size() <= COFF::NameSize) continue; @@ -1532,9 +1524,13 @@ void Writer::createSymbolAndStringTable() { << " is longer than 8 characters and will use a non-standard string " "table"; } - sec->setStringTableOff(addEntryToStringTable(sec->name)); + // Put the section name in the begin of strtab so that its offset is less + // than Max7DecimalOffset otherwise lldb/gdb will not read it. + strtab.add(sec->name, /*Priority=*/UINT8_MAX); + longNameSections.push_back(sec); } + std::vector> longNameSymbols; if (ctx.config.writeSymtab) { for (ObjFile *file : ctx.objFileInstances) { for (Symbol *b : file->getSymbols()) { @@ -1549,15 +1545,22 @@ void Writer::createSymbolAndStringTable() { continue; } - if (std::optional sym = createSymbol(d)) + if (std::optional sym = createSymbol(d)) { + if (d->getName().size() > COFF::NameSize) + longNameSymbols.emplace_back(outputSymtab.size(), d->getName()); outputSymtab.push_back(*sym); + } if (auto *dthunk = dyn_cast(d)) { if (!dthunk->wrappedSym->writtenToSymtab) { dthunk->wrappedSym->writtenToSymtab = true; if (std::optional sym = - createSymbol(dthunk->wrappedSym)) + createSymbol(dthunk->wrappedSym)) { + if (d->getName().size() > COFF::NameSize) + longNameSymbols.emplace_back(outputSymtab.size(), + dthunk->wrappedSym->getName()); outputSymtab.push_back(*sym); + } } } } @@ -1567,11 +1570,19 @@ void Writer::createSymbolAndStringTable() { if (outputSymtab.empty() && strtab.empty()) return; + strtab.finalize(); + for (OutputSection *sec : longNameSections) + sec->setStringTableOff(strtab.getOffset(sec->name)); + for (auto P : longNameSymbols) { + coff_symbol16 &sym = outputSymtab[P.first]; + sym.Name.Offset.Offset = strtab.getOffset(P.second); + } + // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); - fileOff += 4 + strtab.size(); + fileOff += strtab.getSize(); fileSize = alignTo(fileOff, ctx.config.fileAlign); } @@ -1952,9 +1963,7 @@ template void Writer::writeHeader() { // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); - write32le(buf, strtab.size() + 4); - if (!strtab.empty()) - memcpy(buf + 4, strtab.data(), strtab.size()); + strtab.write(buf); } void Writer::openFile(StringRef path) { diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index a738683548cfa..d594df5acddaa 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -37,6 +37,8 @@ class StringTableBuilder { }; private: + // Only non-zero priority will be recorded. + DenseMap StringPriorityMap; DenseMap StringIndexMap; size_t Size = 0; Kind K; @@ -50,11 +52,15 @@ class StringTableBuilder { StringTableBuilder(Kind K, Align Alignment = Align(1)); ~StringTableBuilder(); - /// Add a string to the builder. Returns the position of S in the - /// table. The position will be changed if finalize is used. - /// Can only be used before the table is finalized. - size_t add(CachedHashStringRef S); - size_t add(StringRef S) { return add(CachedHashStringRef(S)); } + /// Add a string to the builder. Returns the position of S in the table. The + /// position will be changed if finalize is used. Can only be used before the + /// table is finalized. Priority is only useful with reordering. Strings with + /// same priority will be put together. Strings with higher priority are + /// placed closer to the begin of string table. + size_t add(CachedHashStringRef S, uint8_t Priority = 0); + size_t add(StringRef S, uint8_t Priority = 0) { + return add(CachedHashStringRef(S), Priority); + } /// Analyze the strings and build the final table. No more strings can /// be added after this point. @@ -77,6 +83,7 @@ class StringTableBuilder { bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); } bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); } + bool empty() const { return StringIndexMap.empty(); } size_t getSize() const { return Size; } void clear(); diff --git a/llvm/lib/MC/StringTableBuilder.cpp b/llvm/lib/MC/StringTableBuilder.cpp index df316bae98cea..2cbd18ef7d1bd 100644 --- a/llvm/lib/MC/StringTableBuilder.cpp +++ b/llvm/lib/MC/StringTableBuilder.cpp @@ -138,13 +138,41 @@ void StringTableBuilder::finalizeInOrder() { void StringTableBuilder::finalizeStringTable(bool Optimize) { Finalized = true; - if (Optimize) { + if (Optimize && StringIndexMap.size()) { std::vector Strings; Strings.reserve(StringIndexMap.size()); for (StringPair &P : StringIndexMap) Strings.push_back(&P); - multikeySort(Strings, 0); + auto getStringPriority = [this](const CachedHashStringRef Str) -> uint8_t { + if (StringPriorityMap.contains(Str)) + return StringPriorityMap[Str]; + return 0; + }; + + size_t RangeBegin = 0; + MutableArrayRef StringsRef(Strings); + if (StringPriorityMap.size()) { + llvm::sort(Strings, + [&](const StringPair *LHS, const StringPair *RHS) -> bool { + return getStringPriority(LHS->first) > + getStringPriority(RHS->first); + }); + // Make sure ArrayRef is valid. Although std::sort implementaion is + // normally in-place , it is not guaranteed by standard. + StringsRef = Strings; + + uint8_t RangePriority = getStringPriority(Strings[0]->first); + for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) { + uint8_t Priority = getStringPriority(Strings[I]->first); + if (Priority != RangePriority) { + multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0); + RangePriority = Priority; + RangeBegin = I; + } + } + } + multikeySort(StringsRef.slice(RangeBegin), 0); initSize(); StringRef Previous; @@ -199,11 +227,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const { return I->second; } -size_t StringTableBuilder::add(CachedHashStringRef S) { +size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) { if (K == WinCOFF) assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); assert(!isFinalized()); + if (Priority) + StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]); + auto P = StringIndexMap.insert(std::make_pair(S, 0)); if (P.second) { size_t Start = alignTo(Size, Alignment); From 47e5b2ce65f4ef1ed0c5f64d3cc010971ec3b104 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 3 Jun 2025 11:27:22 +0800 Subject: [PATCH 3/7] Add test --- lld/test/COFF/strtab.s | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 lld/test/COFF/strtab.s diff --git a/lld/test/COFF/strtab.s b/lld/test/COFF/strtab.s new file mode 100644 index 0000000000000..4d8fa39f56db6 --- /dev/null +++ b/lld/test/COFF/strtab.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj +# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf +# RUN: llvm-readobj --string-table %t.exe | FileCheck %s + +# CHECK: StringTable { +# CHECK-NEXT: Length: 87 +# CHECK-NEXT: [ 4] .debug_abbrev +# CHECK-NEXT: [ 12] .debug_line +# CHECK-NEXT: [ 1e] long_name_symbolz +# CHECK-NEXT: [ 30] .debug_abbrez +# CHECK-NEXT: [ 3e] __impl_long_name_symbolA +# CHECK-NEXT: } + + +.global main +.text +main: +long_name_symbolz: +long_name_symbolA: +__impl_long_name_symbolA: +name_symbolA: +.debug_abbrez: + ret + +.section .debug_abbrev,"dr" +.byte 0 + +.section .debug_line,"dr" +.byte 0 From 333a908186227d7e01995af14d9d3a3646b6aeb8 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Thu, 5 Jun 2025 10:13:44 +0800 Subject: [PATCH 4/7] Fix ObjCopy --- llvm/lib/ObjCopy/COFF/COFFWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp index 734a1698d515b..6446a2f52b76d 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp @@ -121,7 +121,7 @@ void COFFWriter::layoutSections() { Expected COFFWriter::finalizeStringTable() { for (const auto &S : Obj.getSections()) if (S.Name.size() > COFF::NameSize) - StrTabBuilder.add(S.Name); + StrTabBuilder.add(S.Name, /*Priority=*/UINT8_MAX); for (const auto &S : Obj.getSymbols()) if (S.Name.size() > COFF::NameSize) From bd00abac8d36c279df171a91087978aaf7f9a4a7 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Fri, 13 Jun 2025 18:31:38 +0800 Subject: [PATCH 5/7] Address comments --- llvm/include/llvm/MC/StringTableBuilder.h | 3 ++- llvm/lib/MC/StringTableBuilder.cpp | 18 ++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index d19f4cca04b1d..56b78b91a3879 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -57,7 +57,8 @@ class StringTableBuilder { /// position will be changed if finalize is used. Can only be used before the /// table is finalized. Priority is only useful with reordering. Strings with /// same priority will be put together. Strings with higher priority are - /// placed closer to the begin of string table. + /// placed closer to the begin of string table. When adding same string with + /// different priority, the maximum priority win. LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0); size_t add(StringRef S, uint8_t Priority = 0) { return add(CachedHashStringRef(S), Priority); diff --git a/llvm/lib/MC/StringTableBuilder.cpp b/llvm/lib/MC/StringTableBuilder.cpp index 43f210ee7ecbe..f2b82998f2457 100644 --- a/llvm/lib/MC/StringTableBuilder.cpp +++ b/llvm/lib/MC/StringTableBuilder.cpp @@ -144,27 +144,17 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) { for (StringPair &P : StringIndexMap) Strings.push_back(&P); - auto getStringPriority = [this](const CachedHashStringRef Str) -> uint8_t { - if (StringPriorityMap.contains(Str)) - return StringPriorityMap[Str]; - return 0; - }; - size_t RangeBegin = 0; MutableArrayRef StringsRef(Strings); if (StringPriorityMap.size()) { llvm::sort(Strings, [&](const StringPair *LHS, const StringPair *RHS) -> bool { - return getStringPriority(LHS->first) > - getStringPriority(RHS->first); + return StringPriorityMap.lookup(LHS->first) > + StringPriorityMap.lookup(RHS->first); }); - // Make sure ArrayRef is valid. Although std::sort implementaion is - // normally in-place , it is not guaranteed by standard. - StringsRef = Strings; - - uint8_t RangePriority = getStringPriority(Strings[0]->first); + uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first); for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) { - uint8_t Priority = getStringPriority(Strings[I]->first); + uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first); if (Priority != RangePriority) { multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0); RangePriority = Priority; From ff8fce20fbd7769346a8bae1298ba4a3e32bce63 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 17 Jun 2025 14:58:55 +0800 Subject: [PATCH 6/7] Update llvm/include/llvm/MC/StringTableBuilder.h Address comments Co-authored-by: James Henderson --- llvm/include/llvm/MC/StringTableBuilder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index 56b78b91a3879..3f1c045fc0bdd 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -56,7 +56,7 @@ class StringTableBuilder { /// Add a string to the builder. Returns the position of S in the table. The /// position will be changed if finalize is used. Can only be used before the /// table is finalized. Priority is only useful with reordering. Strings with - /// same priority will be put together. Strings with higher priority are + /// the same priority will be put together. Strings with higher priority are /// placed closer to the begin of string table. When adding same string with /// different priority, the maximum priority win. LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0); From ac31e6836ad7264e01199f3292875177a8dfab42 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 17 Jun 2025 15:33:58 +0800 Subject: [PATCH 7/7] split llvm-copy changes to another PR --- llvm/lib/ObjCopy/COFF/COFFWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp index 6446a2f52b76d..734a1698d515b 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp @@ -121,7 +121,7 @@ void COFFWriter::layoutSections() { Expected COFFWriter::finalizeStringTable() { for (const auto &S : Obj.getSections()) if (S.Name.size() > COFF::NameSize) - StrTabBuilder.add(S.Name, /*Priority=*/UINT8_MAX); + StrTabBuilder.add(S.Name); for (const auto &S : Obj.getSymbols()) if (S.Name.size() > COFF::NameSize)