From 76a14bb93bbc76b773e2e69dac9d4c435c728c5a Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Sun, 7 Sep 2025 20:30:27 +0200 Subject: [PATCH 1/8] [PDB] Add public symbol lookup by address --- .../llvm/DebugInfo/PDB/Native/PublicsStream.h | 18 ++++ .../DebugInfo/PDB/Native/PublicsStream.cpp | 91 ++++++++++++++++++ llvm/unittests/DebugInfo/PDB/CMakeLists.txt | 1 + .../DebugInfo/PDB/Inputs/PublicSymbols.cpp | 46 +++++++++ .../DebugInfo/PDB/Inputs/PublicSymbols.pdb | Bin 0 -> 53248 bytes .../DebugInfo/PDB/PublicsStreamTest.cpp | 62 ++++++++++++ 6 files changed, 218 insertions(+) create mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp create mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb create mode 100644 llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index 2cb4bee8ca5df..c5fdad057e867 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -18,9 +18,13 @@ namespace llvm { namespace msf { class MappedBlockStream; } +namespace codeview { +class PublicSym32; +} namespace pdb { struct PublicsStreamHeader; struct SectionOffset; +class SymbolStream; class PublicsStream { public: @@ -42,6 +46,20 @@ class PublicsStream { return SectionOffsets; } + /// Find a public symbol by a segment and offset. + /// + /// In case there is more than one symbol (for example due to ICF), the first + /// one is returned. + /// + /// \return If a symbol was found, the symbol at the provided address is + /// returned as well as the index of this symbol in the address map. If + /// the binary was linked with ICF, there might be more symbols with the + /// same address after the returned one. If no symbol is found, + /// `std::nullopt` is returned. + LLVM_ABI std::optional> + findByAddress(const SymbolStream &Symbols, uint16_t Segment, + uint32_t Offset) const; + private: std::unique_ptr Stream; GSIHashTable PublicsTable; diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index c350e0e0b3e19..984e6e70adba2 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -22,9 +22,12 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Error.h" #include @@ -96,3 +99,91 @@ Error PublicsStream::reload() { "Corrupted publics stream."); return Error::success(); } + +static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset, + uint16_t RhsSegment, uint32_t RhsOffset) { + if (LhsSegment == RhsSegment) + return LhsOffset - RhsOffset; + return LhsSegment - RhsSegment; +} + +static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst, + const codeview::PublicSym32 &Rhs) { + return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset); +} + +// This is a reimplementation of NearestSym: +// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 +std::optional> +PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, + uint32_t Offset) const { + // The address map is sorted by address, so we do binary search. + // Each element is an offset into the symbols for a public symbol. + auto Lo = AddressMap.begin(); + auto Hi = AddressMap.end(); + Hi -= 1; + + while (Lo < Hi) { + auto Cur = Lo + ((Hi - Lo + 1) / 2); + auto Sym = Symbols.readRecord(Cur->value()); + if (Sym.kind() != codeview::S_PUB32) + return std::nullopt; // this is most likely corrupted debug info + + auto Psym = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!Psym) { + consumeError(Psym.takeError()); + return std::nullopt; + } + + uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym); + if (Cmp < 0) { + Cur -= 1; + Hi = Cur; + } else if (Cmp == 0) + Lo = Hi = Cur; + else + Lo = Cur; + } + + auto Sym = Symbols.readRecord(Lo->value()); + if (Sym.kind() != codeview::S_PUB32) + return std::nullopt; // this is most likely corrupted debug info + + auto MaybePsym = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!MaybePsym) { + consumeError(MaybePsym.takeError()); + return std::nullopt; + } + codeview::PublicSym32 Psym = std::move(*MaybePsym); + + uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym); + if (Cmp != 0) + return std::nullopt; + + // We found a symbol. Due to ICF, multiple symbols can have the same + // address, so return the first one + while (Lo != AddressMap.begin()) { + --Lo; + Sym = Symbols.readRecord(Lo->value()); + if (Sym.kind() != codeview::S_PUB32) + return std::nullopt; + MaybePsym = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!MaybePsym) { + consumeError(MaybePsym.takeError()); + return std::nullopt; + } + + if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) { + ++Lo; + break; + } + + Psym = std::move(*MaybePsym); + } + + std::ptrdiff_t IterOffset = Lo - AddressMap.begin(); + return std::pair{Psym, static_cast(IterOffset)}; +} diff --git a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt index ba2a732848f4d..b1b9d2d98c944 100644 --- a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests StringTableBuilderTest.cpp PDBApiTest.cpp PDBVariantTest.cpp + PublicsStreamTest.cpp ) target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp new file mode 100644 index 0000000000000..0aeab04543caf --- /dev/null +++ b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp @@ -0,0 +1,46 @@ +// clang-format off + +// Compile with +// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy +// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF +// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml +// llvm-pdbutil yaml2pdb PublicSymbols.yaml +// +// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml + +int foobar(int i){ return i + 1; } +// these should be merged with ICF +int dup1(int i){ return i + 2; } +int dup2(int i){ return i + 2; } +int dup3(int i){ return i + 2; } + +class AClass { +public: + void AMethod(int, char*) {} + static bool Something(char c) { + return c == ' '; + } +}; + +struct Base { + virtual ~Base() = default; +}; +struct Derived : public Base {}; +struct Derived2 : public Base {}; +struct Derived3 : public Derived2, public Derived {}; + +int AGlobal; + +void operator delete(void *,unsigned __int64) {} + +int main() { + foobar(1); + dup1(1); + dup2(1); + dup3(1); + AClass a; + a.AMethod(1, nullptr); + AClass::Something(' '); + Derived3 d3; + return AGlobal; +} diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb new file mode 100644 index 0000000000000000000000000000000000000000..ffa3275d58d7b77cb6aad0b89ffb0dceeb1ad39e GIT binary patch literal 53248 zcmeI*PiP!v9LDk4G-}g75~WC*gN!1gR+?P{O#(t@la1M8A}MKUDnhcF#4K)hC7Y@T z4>@@7(1HgK3LfIkgMx>8_n;SvptpL9;8AG>3m$6q`OUnuvzg6e2(%{g`@*yD?!3Qw z_npsf4zrs~Cd;p_)atdxRe!W^^wcSTa{2{-;B+>7{77$M+EeTHybg88l?i*Sb|~t& zMF0T=5I_I{1Q0*~0R#|00D*lX@HY(s1Q0*~0R#|0009ILKmY**4!nT3dEgI`14IA; z1Q0*~0R#|0009ILK;VBbaBQFao&w86009ILKmY**5I_I{1Q0*~f!zz(2Ozy&eZSib zo%V%k@9I{x?_uqpSPE z^$)rIKX!Fj-S#23{hZtWlY4x@&GVS68*_ce)#+i<0zbKL_q6X$ny>39M$M1!?6TkW z{%iJxhwUb5J6pZ2T~_T{u;#6`8~fMoc45iX-g-<9c6o=~#N^GZ=UugobGtNhv5-l+TqvlpmBH)y!$RzpB5czFWDXTvwjbdw5X2U9_{J ze58D%Y$~^vo=%NhwC{OkLs?bUl_`zW5I_I{1Q0*~0R#|0;NS>6>OE+u%VEoS!^7E; zQhgx^&W+{s!OY8^_qZGPRS}LC7FNn{EzCc+P0duQrhl87^gK-^Uc8odH%~5F%Z2n> zzKq9{wY1dOd6BI*=W4W;mYVcDO(l)BXp(TA{@_x6W@;=yGy7s77Oz(>>cZBc=b0Ud z7JFtS2=}!CJJ*iJl7-s%Gj8AR^tcvzW+bT%G#=O5dFPh7nrnl=O4~oLO{T+tghgXqr$pZJ#@OVd5!Uk7IlX z$1%RcAHJ4mu^Cz9Cl^+iYV$#Uv{I_q zqaTEsqK>kl*CBSh8P!hLmQ`G?zTT*w&dWHAqY1JAb#LTQ7wWnQo&HgvVtd&i3|Kp&bcOdk z?>^VlA%Fk^2q1s}0tg_000IagfWV#+uv2$ymlV4WVBPiVyc0Ut_jRs6q1fO2A64S( p1K57{F@T-!+s6R$>A&vM5I_I{1Q0*~0R#|0009ILKwysv`~zW}Izs>e literal 0 HcmV?d00001 diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp new file mode 100644 index 0000000000000..bac4901073cd0 --- /dev/null +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/MemoryBuffer.h" + +#include "llvm/Testing/Support/SupportHelpers.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::pdb; + +extern const char *TestMainArgv0; + +static std::string getExePath() { + SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0); + llvm::sys::path::append(InputsDir, "PublicSymbols.pdb"); + return std::string(InputsDir); +} + +TEST(PublicsStreamTest, FindByAddress) { + std::string ExePath = getExePath(); + auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + ASSERT_TRUE(bool(Buffer)); + auto Stream = std::make_unique( + std::move(*Buffer), llvm::endianness::little); + + BumpPtrAllocator Alloc; + PDBFile File(ExePath, std::move(Stream), Alloc); + ASSERT_FALSE(bool(File.parseFileHeaders())); + ASSERT_FALSE(bool(File.parseStreamData())); + + auto Publics = File.getPDBPublicsStream(); + ASSERT_TRUE(bool(Publics)); + auto Symbols = File.getPDBSymbolStream(); + ASSERT_TRUE(bool(Symbols)); + + auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8); + ASSERT_TRUE(VTableDerived.has_value()); + // both derived and derived2 have their vftables there - but derived2 is first + // (due to ICF) + ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@"); + ASSERT_EQ(VTableDerived->second, 26); + + ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value()); + ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value()); + + auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0); + ASSERT_TRUE(GlobalSym.has_value()); + ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA"); + ASSERT_EQ(GlobalSym->second, 30); +} From ce02ae15f2d7fe06bcc2c2f5459f8cedcb1edb3e Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Sun, 7 Sep 2025 22:31:13 +0200 Subject: [PATCH 2/8] fix: make unsigned --- llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp index bac4901073cd0..0aa6a95001b7b 100644 --- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -50,7 +50,7 @@ TEST(PublicsStreamTest, FindByAddress) { // both derived and derived2 have their vftables there - but derived2 is first // (due to ICF) ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@"); - ASSERT_EQ(VTableDerived->second, 26); + ASSERT_EQ(VTableDerived->second, 26u); ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value()); ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value()); @@ -58,5 +58,5 @@ TEST(PublicsStreamTest, FindByAddress) { auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0); ASSERT_TRUE(GlobalSym.has_value()); ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA"); - ASSERT_EQ(GlobalSym->second, 30); + ASSERT_EQ(GlobalSym->second, 30u); } From 51fde81d412d2aac6052e6b51fa7f3b547b6bdf5 Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Mon, 8 Sep 2025 11:09:36 +0200 Subject: [PATCH 3/8] fix: use lower_bound to find the element --- .../DebugInfo/PDB/Native/PublicsStream.cpp | 78 +++++++------------ 1 file changed, 26 insertions(+), 52 deletions(-) diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 984e6e70adba2..8827bc4a5b329 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -117,36 +117,32 @@ static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst, std::optional> PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, uint32_t Offset) const { - // The address map is sorted by address, so we do binary search. - // Each element is an offset into the symbols for a public symbol. - auto Lo = AddressMap.begin(); - auto Hi = AddressMap.end(); - Hi -= 1; - - while (Lo < Hi) { - auto Cur = Lo + ((Hi - Lo + 1) / 2); - auto Sym = Symbols.readRecord(Cur->value()); - if (Sym.kind() != codeview::S_PUB32) - return std::nullopt; // this is most likely corrupted debug info - - auto Psym = - codeview::SymbolDeserializer::deserializeAs(Sym); - if (!Psym) { - consumeError(Psym.takeError()); - return std::nullopt; - } - - uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym); - if (Cmp < 0) { - Cur -= 1; - Hi = Cur; - } else if (Cmp == 0) - Lo = Hi = Cur; - else - Lo = Cur; - } + // The address map is sorted by address, so we can use lower_bound to find the + // position. Each element is an offset into the symbols for a public symbol. + auto It = llvm::lower_bound( + AddressMap, std::pair(Segment, Offset), + [&](support::ulittle32_t Cur, auto Addr) { + auto Sym = Symbols.readRecord(Cur.value()); + if (Sym.kind() != codeview::S_PUB32) + return false; // stop here, this is most likely corrupted debug info + + auto Psym = + codeview::SymbolDeserializer::deserializeAs( + Sym); + if (!Psym) { + consumeError(Psym.takeError()); + return false; + } + + if (Psym->Segment == Addr.first) + return Psym->Offset < Addr.second; + return Psym->Segment < Addr.first; + }); + + if (It == AddressMap.end()) + return std::nullopt; - auto Sym = Symbols.readRecord(Lo->value()); + auto Sym = Symbols.readRecord(It->value()); if (Sym.kind() != codeview::S_PUB32) return std::nullopt; // this is most likely corrupted debug info @@ -162,28 +158,6 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, if (Cmp != 0) return std::nullopt; - // We found a symbol. Due to ICF, multiple symbols can have the same - // address, so return the first one - while (Lo != AddressMap.begin()) { - --Lo; - Sym = Symbols.readRecord(Lo->value()); - if (Sym.kind() != codeview::S_PUB32) - return std::nullopt; - MaybePsym = - codeview::SymbolDeserializer::deserializeAs(Sym); - if (!MaybePsym) { - consumeError(MaybePsym.takeError()); - return std::nullopt; - } - - if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) { - ++Lo; - break; - } - - Psym = std::move(*MaybePsym); - } - - std::ptrdiff_t IterOffset = Lo - AddressMap.begin(); + std::ptrdiff_t IterOffset = It - AddressMap.begin(); return std::pair{Psym, static_cast(IterOffset)}; } From d2bddebdf1fba47b7dcfdaf8beefe6dc53a47535 Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 9 Sep 2025 17:03:18 +0200 Subject: [PATCH 4/8] fix: use tuples --- llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 8827bc4a5b329..73c00538d3f4c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -120,7 +120,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, // The address map is sorted by address, so we can use lower_bound to find the // position. Each element is an offset into the symbols for a public symbol. auto It = llvm::lower_bound( - AddressMap, std::pair(Segment, Offset), + AddressMap, std::tuple(Segment, Offset), [&](support::ulittle32_t Cur, auto Addr) { auto Sym = Symbols.readRecord(Cur.value()); if (Sym.kind() != codeview::S_PUB32) @@ -134,9 +134,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, return false; } - if (Psym->Segment == Addr.first) - return Psym->Offset < Addr.second; - return Psym->Segment < Addr.first; + return std::tie(Psym->Segment, Psym->Offset) < Addr; }); if (It == AddressMap.end()) From e5ec29b647fbf7648f34e122d0d6157882b687d3 Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 9 Sep 2025 17:04:11 +0200 Subject: [PATCH 5/8] refactor: generate publics programmatically --- .../DebugInfo/PDB/Inputs/PublicSymbols.cpp | 46 ----- .../DebugInfo/PDB/Inputs/PublicSymbols.pdb | Bin 53248 -> 0 bytes .../DebugInfo/PDB/PublicsStreamTest.cpp | 195 +++++++++++++++--- 3 files changed, 171 insertions(+), 70 deletions(-) delete mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp delete mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp deleted file mode 100644 index 0aeab04543caf..0000000000000 --- a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// clang-format off - -// Compile with -// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy -// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF -// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml -// llvm-pdbutil yaml2pdb PublicSymbols.yaml -// -// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml - -int foobar(int i){ return i + 1; } -// these should be merged with ICF -int dup1(int i){ return i + 2; } -int dup2(int i){ return i + 2; } -int dup3(int i){ return i + 2; } - -class AClass { -public: - void AMethod(int, char*) {} - static bool Something(char c) { - return c == ' '; - } -}; - -struct Base { - virtual ~Base() = default; -}; -struct Derived : public Base {}; -struct Derived2 : public Base {}; -struct Derived3 : public Derived2, public Derived {}; - -int AGlobal; - -void operator delete(void *,unsigned __int64) {} - -int main() { - foobar(1); - dup1(1); - dup2(1); - dup3(1); - AClass a; - a.AMethod(1, nullptr); - AClass::Something(' '); - Derived3 d3; - return AGlobal; -} diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb deleted file mode 100644 index ffa3275d58d7b77cb6aad0b89ffb0dceeb1ad39e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI*PiP!v9LDk4G-}g75~WC*gN!1gR+?P{O#(t@la1M8A}MKUDnhcF#4K)hC7Y@T z4>@@7(1HgK3LfIkgMx>8_n;SvptpL9;8AG>3m$6q`OUnuvzg6e2(%{g`@*yD?!3Qw z_npsf4zrs~Cd;p_)atdxRe!W^^wcSTa{2{-;B+>7{77$M+EeTHybg88l?i*Sb|~t& zMF0T=5I_I{1Q0*~0R#|00D*lX@HY(s1Q0*~0R#|0009ILKmY**4!nT3dEgI`14IA; z1Q0*~0R#|0009ILK;VBbaBQFao&w86009ILKmY**5I_I{1Q0*~f!zz(2Ozy&eZSib zo%V%k@9I{x?_uqpSPE z^$)rIKX!Fj-S#23{hZtWlY4x@&GVS68*_ce)#+i<0zbKL_q6X$ny>39M$M1!?6TkW z{%iJxhwUb5J6pZ2T~_T{u;#6`8~fMoc45iX-g-<9c6o=~#N^GZ=UugobGtNhv5-l+TqvlpmBH)y!$RzpB5czFWDXTvwjbdw5X2U9_{J ze58D%Y$~^vo=%NhwC{OkLs?bUl_`zW5I_I{1Q0*~0R#|0;NS>6>OE+u%VEoS!^7E; zQhgx^&W+{s!OY8^_qZGPRS}LC7FNn{EzCc+P0duQrhl87^gK-^Uc8odH%~5F%Z2n> zzKq9{wY1dOd6BI*=W4W;mYVcDO(l)BXp(TA{@_x6W@;=yGy7s77Oz(>>cZBc=b0Ud z7JFtS2=}!CJJ*iJl7-s%Gj8AR^tcvzW+bT%G#=O5dFPh7nrnl=O4~oLO{T+tghgXqr$pZJ#@OVd5!Uk7IlX z$1%RcAHJ4mu^Cz9Cl^+iYV$#Uv{I_q zqaTEsqK>kl*CBSh8P!hLmQ`G?zTT*w&dWHAqY1JAb#LTQ7wWnQo&HgvVtd&i3|Kp&bcOdk z?>^VlA%Fk^2q1s}0tg_000IagfWV#+uv2$ymlV4WVBPiVyc0Ut_jRs6q1fO2A64S( p1K57{F@T-!+s6R$>A&vM5I_I{1Q0*~0R#|0009ILKwysv`~zW}Izs>e diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp index 0aa6a95001b7b..0b12285a16d89 100644 --- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -7,43 +7,180 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/MemoryBuffer.h" - -#include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" using namespace llvm; using namespace llvm::pdb; -extern const char *TestMainArgv0; +namespace { +struct PublicSym { + llvm::StringRef Name; + uint16_t Segment; + uint32_t Offset; +}; + +class MockPublics { +public: + MockPublics(size_t StreamSize, BumpPtrAllocator &Alloc, + msf::MSFBuilder Builder); + static Expected> + create(BumpPtrAllocator &Allocator, size_t StreamSize); + + void addPublics(ArrayRef Syms); + Error finish(); + + PublicsStream *publicsStream(); + SymbolStream *symbolStream(); + +private: + MutableBinaryByteStream Stream; + + msf::MSFBuilder MsfBuilder; + std::optional MsfLayout; + + GSIStreamBuilder Gsi; -static std::string getExePath() { - SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0); - llvm::sys::path::append(InputsDir, "PublicSymbols.pdb"); - return std::string(InputsDir); + std::unique_ptr Publics; + std::unique_ptr Symbols; +}; + +MockPublics::MockPublics(size_t StreamSize, BumpPtrAllocator &Allocator, + msf::MSFBuilder Builder) + : Stream({Allocator.Allocate(StreamSize), StreamSize}, + llvm::endianness::little), + MsfBuilder(std::move(Builder)), Gsi(this->MsfBuilder) {} + +Expected> +MockPublics::create(BumpPtrAllocator &Allocator, size_t StreamSize) { + auto ExpectedMsf = msf::MSFBuilder::create(Allocator, 4096); + if (!ExpectedMsf) + return ExpectedMsf.takeError(); + return std::make_unique(StreamSize, Allocator, + std::move(*ExpectedMsf)); +} + +void MockPublics::addPublics(ArrayRef Publics) { + std::vector Bulks; + for (const auto &Sym : Publics) { + BulkPublic BP; + BP.Name = Sym.Name.data(); + BP.NameLen = Sym.Name.size(); + BP.Offset = Sym.Offset; + BP.Segment = Sym.Segment; + Bulks.emplace_back(BP); + } + Gsi.addPublicSymbols(std::move(Bulks)); +} + +Error MockPublics::finish() { + auto Err = Gsi.finalizeMsfLayout(); + if (Err) + return Err; + + auto ExpectedLayout = MsfBuilder.generateLayout(); + if (!ExpectedLayout) + return ExpectedLayout.takeError(); + MsfLayout = std::move(*ExpectedLayout); + + return Gsi.commit(*MsfLayout, Stream); +} + +PublicsStream *MockPublics::publicsStream() { + if (!Publics) { + Publics = std::make_unique( + msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream, + Gsi.getPublicsStreamIndex(), + MsfBuilder.getAllocator())); + } + return Publics.get(); +} + +SymbolStream *MockPublics::symbolStream() { + if (!Symbols) { + Symbols = std::make_unique( + msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream, + Gsi.getRecordStreamIndex(), + MsfBuilder.getAllocator())); + } + return Symbols.get(); +} + +std::array GSymbols{ + PublicSym{"??0Base@@QEAA@XZ", /*Segment=*/1, /*Offset=*/0}, + PublicSym{"??0Derived@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32}, + PublicSym{"??0Derived2@@QEAA@XZ", /*Segment=*/1, /*Offset=*/32}, + PublicSym{"??0Derived3@@QEAA@XZ", /*Segment=*/1, /*Offset=*/80}, + PublicSym{"??1Base@@UEAA@XZ", /*Segment=*/1, /*Offset=*/160}, + PublicSym{"??1Derived@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176}, + PublicSym{"??1Derived2@@UEAA@XZ", /*Segment=*/1, /*Offset=*/176}, + PublicSym{"??1Derived3@@UEAA@XZ", /*Segment=*/1, /*Offset=*/208}, + PublicSym{"??3@YAXPEAX_K@Z", /*Segment=*/1, /*Offset=*/256}, + PublicSym{"??_EDerived3@@W7EAAPEAXI@Z", /*Segment=*/1, /*Offset=*/268}, + PublicSym{"??_GBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288}, + PublicSym{"??_EBase@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/288}, + PublicSym{"??_EDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_EDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_GDerived@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_GDerived2@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/352}, + PublicSym{"??_EDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416}, + PublicSym{"??_GDerived3@@UEAAPEAXI@Z", /*Segment=*/1, /*Offset=*/416}, + PublicSym{"?AMethod@AClass@@QEAAXHPEAD@Z", /*Segment=*/1, /*Offset=*/480}, + PublicSym{"?Something@AClass@@SA_ND@Z", /*Segment=*/1, /*Offset=*/496}, + PublicSym{"?dup1@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?dup3@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?dup2@@YAHH@Z", /*Segment=*/1, /*Offset=*/544}, + PublicSym{"?foobar@@YAHH@Z", /*Segment=*/1, /*Offset=*/560}, + PublicSym{"main", /*Segment=*/1, /*Offset=*/576}, + PublicSym{"??_7Base@@6B@", /*Segment=*/2, /*Offset=*/0}, + PublicSym{"??_7Derived@@6B@", /*Segment=*/2, /*Offset=*/8}, + PublicSym{"??_7Derived2@@6B@", /*Segment=*/2, /*Offset=*/8}, + PublicSym{"??_7Derived3@@6BDerived2@@@", /*Segment=*/2, /*Offset=*/16}, + PublicSym{"??_7Derived3@@6BDerived@@@", /*Segment=*/2, /*Offset=*/24}, + PublicSym{"?AGlobal@@3HA", /*Segment=*/3, /*Offset=*/0}, +}; + +} // namespace + +static std::pair +nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) { + auto Index = Publics->getAddressMap()[N].value(); + codeview::CVSymbol Sym = Symbols->readRecord(Index); + auto ExpectedPub = + codeview::SymbolDeserializer::deserializeAs(Sym); + if (!ExpectedPub) + return std::pair(0, 0); + return std::pair(ExpectedPub->Segment, ExpectedPub->Offset); } TEST(PublicsStreamTest, FindByAddress) { - std::string ExePath = getExePath(); - auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false, - /*RequiresNullTerminator=*/false); - ASSERT_TRUE(bool(Buffer)); - auto Stream = std::make_unique( - std::move(*Buffer), llvm::endianness::little); - - BumpPtrAllocator Alloc; - PDBFile File(ExePath, std::move(Stream), Alloc); - ASSERT_FALSE(bool(File.parseFileHeaders())); - ASSERT_FALSE(bool(File.parseStreamData())); - - auto Publics = File.getPDBPublicsStream(); - ASSERT_TRUE(bool(Publics)); - auto Symbols = File.getPDBSymbolStream(); - ASSERT_TRUE(bool(Symbols)); + BumpPtrAllocator Allocator; + auto ExpectedMock = MockPublics::create(Allocator, 1 << 20); + ASSERT_TRUE(bool(ExpectedMock)); + std::unique_ptr Mock = std::move(*ExpectedMock); + + Mock->addPublics(GSymbols); + Error Err = Mock->finish(); + ASSERT_FALSE(Err) << Err; + + auto *Publics = Mock->publicsStream(); + ASSERT_NE(Publics, nullptr); + Err = Publics->reload(); + ASSERT_FALSE(Err) << Err; + + auto *Symbols = Mock->symbolStream(); + ASSERT_NE(Symbols, nullptr); + Err = Symbols->reload(); + ASSERT_FALSE(Err) << Err; auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8); ASSERT_TRUE(VTableDerived.has_value()); @@ -52,6 +189,16 @@ TEST(PublicsStreamTest, FindByAddress) { ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@"); ASSERT_EQ(VTableDerived->second, 26u); + // Again, make sure that we find the first symbol + auto VectorDtorDerived = Publics->findByAddress(*Symbols, 1, 352); + ASSERT_TRUE(VectorDtorDerived.has_value()); + ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI@Z"); + ASSERT_EQ(VectorDtorDerived->second, 12u); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1, 352)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1, 352)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1, 352)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1, 416)); + ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value()); ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value()); From c17d0cf3f4760b3440e561c960d29455fa32d5dc Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 9 Sep 2025 17:06:21 +0200 Subject: [PATCH 6/8] fix: use tuples in the last equality check --- llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 73c00538d3f4c..0453eea26605b 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -100,18 +100,6 @@ Error PublicsStream::reload() { return Error::success(); } -static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset, - uint16_t RhsSegment, uint32_t RhsOffset) { - if (LhsSegment == RhsSegment) - return LhsOffset - RhsOffset; - return LhsSegment - RhsSegment; -} - -static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst, - const codeview::PublicSym32 &Rhs) { - return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset); -} - // This is a reimplementation of NearestSym: // https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 std::optional> @@ -152,8 +140,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment, } codeview::PublicSym32 Psym = std::move(*MaybePsym); - uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym); - if (Cmp != 0) + if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset)) return std::nullopt; std::ptrdiff_t IterOffset = It - AddressMap.begin(); From b27edebfafa82c69d5acb13291b7b933f4f279cc Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 9 Sep 2025 17:14:49 +0200 Subject: [PATCH 7/8] test: add check for corrupted debug info --- .../DebugInfo/PDB/PublicsStreamTest.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp index 0b12285a16d89..e7d108eb10e1a 100644 --- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -42,6 +42,8 @@ class MockPublics { PublicsStream *publicsStream(); SymbolStream *symbolStream(); + MutableBinaryByteStream &stream() { return Stream; } + private: MutableBinaryByteStream Stream; @@ -206,4 +208,19 @@ TEST(PublicsStreamTest, FindByAddress) { ASSERT_TRUE(GlobalSym.has_value()); ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA"); ASSERT_EQ(GlobalSym->second, 30u); + + // test corrupt debug info + codeview::CVSymbol GlobalCVSym = + Symbols->readRecord(Publics->getAddressMap()[30]); + ASSERT_EQ(GlobalCVSym.kind(), codeview::S_PUB32); + // CVSymbol::data returns a pointer to const data, so we modify the backing + // data + uint8_t *PDBData = Mock->stream().data().data(); + auto Offset = GlobalCVSym.data().data() - PDBData; + reinterpret_cast(PDBData + Offset)->RecordKind = + codeview::S_GDATA32; + ASSERT_EQ(GlobalCVSym.kind(), codeview::S_GDATA32); + + GlobalSym = Publics->findByAddress(*Symbols, 3, 0); + ASSERT_FALSE(GlobalSym.has_value()); } From 413ebd125c46e942dc53f7de440ba35352c540f6 Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 9 Sep 2025 18:03:08 +0200 Subject: [PATCH 8/8] fix: comparison --- llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp index e7d108eb10e1a..4b89280cbdb93 100644 --- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp @@ -153,7 +153,7 @@ std::array GSymbols{ } // namespace -static std::pair +static std::pair nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) { auto Index = Publics->getAddressMap()[N].value(); codeview::CVSymbol Sym = Symbols->readRecord(Index); @@ -196,10 +196,10 @@ TEST(PublicsStreamTest, FindByAddress) { ASSERT_TRUE(VectorDtorDerived.has_value()); ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI@Z"); ASSERT_EQ(VectorDtorDerived->second, 12u); - ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1, 352)); - ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1, 352)); - ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1, 352)); - ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1, 416)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1u, 352u)); + ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1u, 416u)); ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value()); ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());