From 4edf9d8559339a12108d9c4d1e2f3bb062a5a768 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 20 Sep 2023 17:30:45 -0700 Subject: [PATCH 1/9] [clang][modules] Move `SLocEntry` search into `ASTReader` In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%. --- clang/include/clang/Basic/SourceManager.h | 3 + clang/include/clang/Serialization/ASTReader.h | 6 ++ clang/lib/Basic/SourceManager.cpp | 70 +------------------ clang/lib/Serialization/ASTReader.cpp | 63 +++++++++++++++++ 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 2f846502d6f33..a4c7facddd53d 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -533,6 +533,9 @@ class ExternalSLocEntrySource { /// entry from being loaded. virtual bool ReadSLocEntry(int ID) = 0; + /// Get the index ID for the loaded SourceLocation offset. + virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; + /// Retrieve the module import location and name for the given ID, if /// in fact it was loaded from a module (rather than, say, a precompiled /// header). diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dc1eb21c27801..e643fcf4c930f 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2153,6 +2153,12 @@ class ASTReader /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; + /// Get the index ID for the loaded SourceLocation offset. + int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; + /// Read the offset of the SLocEntry at the given index in the given module + /// file. + std::optional readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 0521ac7b30339..f881afc2e46c5 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - if (SLocOffset < CurrentLoadedOffset) { - assert(0 && "Invalid SLocOffset or bad function choice"); - return FileID(); - } - - // Essentially the same as the local case, but the loaded array is sorted - // in the other direction (decreasing order). - // GreaterIndex is the one where the offset is greater, which is actually a - // lower index! - unsigned GreaterIndex = 0; - unsigned LessIndex = LoadedSLocEntryTable.size(); - if (LastFileIDLookup.ID < 0) { - // Prune the search space. - int LastID = LastFileIDLookup.ID; - if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset) - GreaterIndex = - (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache - else - LessIndex = -LastID - 2; - } - - // First do a linear scan from the last lookup position, if possible. - unsigned NumProbes; + int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); bool Invalid = false; - for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) { - // Make sure the entry is loaded! - const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid); - if (Invalid) - return FileID(); // invalid entry. - if (E.getOffset() <= SLocOffset) { - FileID Res = FileID::get(-int(GreaterIndex) - 2); - LastFileIDLookup = Res; - NumLinearScans += NumProbes + 1; - return Res; - } - } - - // Linear scan failed. Do the binary search. - NumProbes = 0; - while (true) { - ++NumProbes; - unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex; - const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid); - if (Invalid) - return FileID(); // invalid entry. - - if (E.getOffset() > SLocOffset) { - if (GreaterIndex == MiddleIndex) { - assert(0 && "binary search missed the entry"); - return FileID(); - } - GreaterIndex = MiddleIndex; - continue; - } - - if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) { - FileID Res = FileID::get(-int(MiddleIndex) - 2); - LastFileIDLookup = Res; - NumBinaryProbes += NumProbes; - return Res; - } - - if (LessIndex == MiddleIndex) { - assert(0 && "binary search missed the entry"); - return FileID(); - } - LessIndex = MiddleIndex; - } + (void)getLoadedSLocEntryByID(ID, &Invalid); + return Invalid ? FileID() : FileID::get(ID); } SourceLocation SourceManager:: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0952244d037a7..fdf89dce41aab 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1444,6 +1444,69 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) { } } +std::optional +ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) { + BitstreamCursor &Cursor = F->SLocEntryCursor; + SavedStreamPosition SavedPosition(Cursor); + if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase + + F->SLocEntryOffsets[Index])) { + Error(std::move(Err)); + return std::nullopt; + } + + Expected MaybeEntry = Cursor.advance(); + if (!MaybeEntry) { + Error(MaybeEntry.takeError()); + return std::nullopt; + } + llvm::BitstreamEntry Entry = MaybeEntry.get(); + + if (Entry.Kind != llvm::BitstreamEntry::Record) { + Error("incorrectly-formatted source location entry in AST file"); + return std::nullopt; + } + + RecordData Record; + StringRef Blob; + Expected MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob); + if (!MaybeSLOC) { + Error(MaybeSLOC.takeError()); + return std::nullopt; + } + switch (MaybeSLOC.get()) { + default: + Error("incorrectly-formatted source location entry in AST file"); + return std::nullopt; + case SM_SLOC_FILE_ENTRY: + case SM_SLOC_BUFFER_ENTRY: + case SM_SLOC_EXPANSION_ENTRY: + return F->SLocEntryBaseOffset + Record[0]; + } +} + +int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { + auto SLocMapI = + GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1); + assert(SLocMapI != GlobalSLocOffsetMap.end() && + "Corrupted global sloc offset map"); + ModuleFile *F = SLocMapI->second; + + std::vector Indices(F->LocalNumSLocEntries); + for (unsigned I = 0; I != F->LocalNumSLocEntries; ++I) + Indices[I] = I; + + auto It = llvm::upper_bound(Indices, SLocOffset, + [&](SourceLocation::UIntTy Offset, unsigned Index) { + auto EntryOffset = readSLocOffset(F, Index); + assert(EntryOffset && "Corrupted AST file"); + return Offset < *EntryOffset; + }); + // The iterator points to the first entry with start offset greater than the + // offset of interest. The previous entry must contain the offset of interest. + It = std::prev(It); + return F->SLocEntryBaseID + *It; +} + bool ASTReader::ReadSLocEntry(int ID) { if (ID == 0) return false; From d793bbdfa0e738545b584d31c53186d53452ce65 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 22 Sep 2023 10:27:30 -0700 Subject: [PATCH 2/9] [clang][modules] Cache deserialized `SLocEntry` offsets --- clang/include/clang/Serialization/ModuleFile.h | 6 ++++-- clang/lib/Serialization/ASTReader.cpp | 10 +++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 0af5cae6aebc3..8284d8410732d 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -288,10 +288,12 @@ class ModuleFile { /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. uint64_t SLocEntryOffsetsBase = 0; - /// Offsets for all of the source location entries in the - /// AST file. + /// Stream bit offsets for all of the source location entries in the AST file. const uint32_t *SLocEntryOffsets = nullptr; + /// SLocEntry offsets that have been loaded from the AST file. + std::vector SLocEntryOffsetLoaded; + /// SLocEntries that we're going to preload. SmallVector PreloadSLocEntries; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index fdf89dce41aab..d9276582f5792 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1497,9 +1497,12 @@ int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { auto It = llvm::upper_bound(Indices, SLocOffset, [&](SourceLocation::UIntTy Offset, unsigned Index) { - auto EntryOffset = readSLocOffset(F, Index); - assert(EntryOffset && "Corrupted AST file"); - return Offset < *EntryOffset; + if (F->SLocEntryOffsetLoaded[Index] == -1U) { + auto MaybeEntryOffset = readSLocOffset(F, Index); + assert(MaybeEntryOffset && "Corrupted AST file"); + F->SLocEntryOffsetLoaded[Index] = *MaybeEntryOffset; + } + return Offset < F->SLocEntryOffsetLoaded[Index]; }); // The iterator points to the first entry with start offset greater than the // offset of interest. The previous entry must contain the offset of interest. @@ -3606,6 +3609,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, return llvm::createStringError(std::errc::invalid_argument, "ran out of source locations"); } + F.SLocEntryOffsetLoaded.resize(F.LocalNumSLocEntries, -1U); // Make our entry in the range map. BaseID is negative and growing, so // we invert it. Because we invert it, though, we need the other end of // the range. From 9981f50a130019c3b50d0a6b5d09ba81f7a6e936 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 22 Sep 2023 10:28:06 -0700 Subject: [PATCH 3/9] [clang][modules] Don't allocate when searching for FileID --- clang/lib/Serialization/ASTReader.cpp | 23 ++++---- llvm/include/llvm/ADT/STLExtras.h | 77 ++++++++++++++++++--------- 2 files changed, 61 insertions(+), 39 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d9276582f5792..b8c7611f1913c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1491,19 +1491,16 @@ int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { "Corrupted global sloc offset map"); ModuleFile *F = SLocMapI->second; - std::vector Indices(F->LocalNumSLocEntries); - for (unsigned I = 0; I != F->LocalNumSLocEntries; ++I) - Indices[I] = I; - - auto It = llvm::upper_bound(Indices, SLocOffset, - [&](SourceLocation::UIntTy Offset, unsigned Index) { - if (F->SLocEntryOffsetLoaded[Index] == -1U) { - auto MaybeEntryOffset = readSLocOffset(F, Index); - assert(MaybeEntryOffset && "Corrupted AST file"); - F->SLocEntryOffsetLoaded[Index] = *MaybeEntryOffset; - } - return Offset < F->SLocEntryOffsetLoaded[Index]; - }); + auto It = llvm::upper_bound( + llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset, + [&](SourceLocation::UIntTy Offset, std::size_t Index) { + if (F->SLocEntryOffsetLoaded[Index] == -1U) { + auto MaybeEntryOffset = readSLocOffset(F, Index); + assert(MaybeEntryOffset && "Corrupted AST file"); + F->SLocEntryOffsetLoaded[Index] = *MaybeEntryOffset; + } + return Offset < F->SLocEntryOffsetLoaded[Index]; + }); // The iterator points to the first entry with start offset greater than the // offset of interest. The previous entry must contain the offset of interest. It = std::prev(It); diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 5b926864f0cc4..8384dedf6365a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -2261,43 +2261,68 @@ template struct enumerator_result { mutable range_reference_tuple Storage; }; -/// Infinite stream of increasing 0-based `size_t` indices. -struct index_stream { - struct iterator : iterator_facade_base { - iterator &operator++() { - assert(Index != std::numeric_limits::max() && - "Attempting to increment end iterator"); - ++Index; - return *this; - } +struct index_iterator + : llvm::iterator_facade_base { + index_iterator(std::size_t Index) : Index(Index) {} + + index_iterator &operator+=(std::ptrdiff_t N) { + Index += N; + return *this; + } - // Note: This dereference operator returns a value instead of a reference - // and does not strictly conform to the C++17's definition of forward - // iterator. However, it satisfies all the forward_iterator requirements - // that the `zip_common` depends on and fully conforms to the C++20 - // definition of forward iterator. - std::size_t operator*() const { return Index; } + index_iterator &operator-=(std::ptrdiff_t N) { + Index -= N; + return *this; + } - friend bool operator==(const iterator &Lhs, const iterator &Rhs) { - return Lhs.Index == Rhs.Index; - } + std::ptrdiff_t operator-(const index_iterator &R) const { + return Index - R.Index; + } - std::size_t Index = 0; - }; + // Note: This dereference operator returns a value instead of a reference + // and does not strictly conform to the C++17's definition of forward + // iterator. However, it satisfies all the forward_iterator requirements + // that the `zip_common` depends on and fully conforms to the C++20 + // definition of forward iterator. + std::size_t operator*() const { return Index; } - iterator begin() const { return {}; } - iterator end() const { + friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) { + return Lhs.Index == Rhs.Index; + } + + friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) { + return Lhs.Index < Rhs.Index; + } + +private: + std::size_t Index; +}; + +/// Infinite stream of increasing 0-based `size_t` indices. +struct index_stream { + index_iterator begin() const { return {0}; } + index_iterator end() const { // We approximate 'infinity' with the max size_t value, which should be good // enough to index over any container. - iterator It; - It.Index = std::numeric_limits::max(); - return It; + return index_iterator{std::numeric_limits::max()}; } }; } // end namespace detail +/// Increasing range of `size_t` indices. +class index_range { + std::size_t Begin; + std::size_t End; + +public: + index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {} + detail::index_iterator begin() const { return {Begin}; } + detail::index_iterator end() const { return {End}; } +}; + /// Given two or more input ranges, returns a new range whose values are are /// tuples (A, B, C, ...), such that A is the 0-based index of the item in the /// sequence, and B, C, ..., are the values from the original input ranges. All From efedf1270722a5a96f83be0bddbceb0fc693f9be Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 22 Sep 2023 11:16:18 -0700 Subject: [PATCH 4/9] [clang][modules] Report failures from lightweight deserialization, save potentially unnecessary heavyweight deserialization --- clang/include/clang/Basic/SourceManager.h | 3 +++ clang/include/clang/Serialization/ASTReader.h | 4 ++-- clang/lib/Basic/SourceManager.cpp | 5 +---- clang/lib/Serialization/ASTReader.cpp | 14 +++++++++++--- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index a4c7facddd53d..71e3256ffa55f 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -534,6 +534,9 @@ class ExternalSLocEntrySource { virtual bool ReadSLocEntry(int ID) = 0; /// Get the index ID for the loaded SourceLocation offset. + /// + /// \returns Invalid index ID (0) if an error occurred that prevented the + /// SLocEntry from being loaded. virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0; /// Retrieve the module import location and name for the given ID, if diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index e643fcf4c930f..315e1d6afa3a5 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2155,8 +2155,8 @@ class ASTReader bool ReadSLocEntry(int ID) override; /// Get the index ID for the loaded SourceLocation offset. int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; - /// Read the offset of the SLocEntry at the given index in the given module - /// file. + /// Try to read the offset of the SLocEntry at the given index in the given + /// module file. std::optional readSLocOffset(ModuleFile *F, unsigned Index); diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index f881afc2e46c5..298d4d605c18b 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,10 +864,7 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { - int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset); - bool Invalid = false; - (void)getLoadedSLocEntryByID(ID, &Invalid); - return Invalid ? FileID() : FileID::get(ID); + return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset)); } SourceLocation SourceManager:: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b8c7611f1913c..f25a0665f18d2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1491,20 +1491,28 @@ int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { "Corrupted global sloc offset map"); ModuleFile *F = SLocMapI->second; + bool Invalid = false; + auto It = llvm::upper_bound( llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset, [&](SourceLocation::UIntTy Offset, std::size_t Index) { if (F->SLocEntryOffsetLoaded[Index] == -1U) { auto MaybeEntryOffset = readSLocOffset(F, Index); - assert(MaybeEntryOffset && "Corrupted AST file"); + if (!MaybeEntryOffset) { + Invalid = true; + return true; + } F->SLocEntryOffsetLoaded[Index] = *MaybeEntryOffset; } return Offset < F->SLocEntryOffsetLoaded[Index]; }); + + if (Invalid) + return 0; + // The iterator points to the first entry with start offset greater than the // offset of interest. The previous entry must contain the offset of interest. - It = std::prev(It); - return F->SLocEntryBaseID + *It; + return F->SLocEntryBaseID + *std::prev(It); } bool ASTReader::ReadSLocEntry(int ID) { From 7365dcbd8ae6fcc320e11372b4d7714ba4a2a44a Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 22 Sep 2023 15:16:36 -0700 Subject: [PATCH 5/9] [clang][modules] Cache offsets even when deserializing the full SLocEntry --- clang/lib/Serialization/ASTReader.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f25a0665f18d2..573389af221b4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1575,9 +1575,9 @@ bool ASTReader::ReadSLocEntry(int ID) { }; ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second; + unsigned Index = ID - F->SLocEntryBaseID; if (llvm::Error Err = F->SLocEntryCursor.JumpToBit( - F->SLocEntryOffsetsBase + - F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) { + F->SLocEntryOffsetsBase + F->SLocEntryOffsets[Index])) { Error(std::move(Err)); return true; } @@ -1612,6 +1612,9 @@ bool ASTReader::ReadSLocEntry(int ID) { return true; case SM_SLOC_FILE_ENTRY: { + SourceLocation::UIntTy Offset = BaseOffset + Record[0]; + F->SLocEntryOffsetLoaded[Index] = Offset; + // We will detect whether a file changed and return 'Failure' for it, but // we will also try to fail gracefully by setting up the SLocEntry. unsigned InputID = Record[4]; @@ -1632,8 +1635,8 @@ bool ASTReader::ReadSLocEntry(int ID) { } SrcMgr::CharacteristicKind FileCharacter = (SrcMgr::CharacteristicKind)Record[2]; - FileID FID = SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID, - BaseOffset + Record[0]); + FileID FID = + SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID, Offset); SrcMgr::FileInfo &FileInfo = const_cast(SourceMgr.getSLocEntry(FID).getFile()); FileInfo.NumCreatedFIDs = Record[5]; @@ -1663,8 +1666,10 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_BUFFER_ENTRY: { + SourceLocation::UIntTy Offset = BaseOffset + Record[0]; + F->SLocEntryOffsetLoaded[Index] = Offset; + const char *Name = Blob.data(); - unsigned Offset = Record[0]; SrcMgr::CharacteristicKind FileCharacter = (SrcMgr::CharacteristicKind)Record[2]; SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]); @@ -1676,7 +1681,7 @@ bool ASTReader::ReadSLocEntry(int ID) { if (!Buffer) return true; FileID FID = SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID, - BaseOffset + Offset, IncludeLoc); + Offset, IncludeLoc); if (Record[3]) { auto &FileInfo = const_cast(SourceMgr.getSLocEntry(FID).getFile()); @@ -1686,13 +1691,15 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_EXPANSION_ENTRY: { + SourceLocation::UIntTy Offset = BaseOffset + Record[0]; + F->SLocEntryOffsetLoaded[Index] = Offset; + LocSeq::State Seq; SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, - Record[5], Record[4], ID, - BaseOffset + Record[0]); + Record[5], Record[4], ID, Offset); break; } } From 751c0c6a9594116b78a5c33dbbf4d08725df57a3 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 25 Sep 2023 13:35:19 -0700 Subject: [PATCH 6/9] [clang][modules] Amend failing test --- clang/test/Modules/explicit-build-missing-files.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Modules/explicit-build-missing-files.cpp b/clang/test/Modules/explicit-build-missing-files.cpp index e36b5051e8319..3ea881d34c6b2 100644 --- a/clang/test/Modules/explicit-build-missing-files.cpp +++ b/clang/test/Modules/explicit-build-missing-files.cpp @@ -50,6 +50,7 @@ int y = a2; // CHECK: In module 'a': // CHECK-NEXT: a.h:1:45: error: +int z = b; // MISSING-B: could not find file '{{.*}}b.h' // MISSING-B-NOT: please delete the module cache #endif From 974ed1955cc53c50e3f9296428435cf7b3c85239 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 2 Oct 2023 15:44:24 -0700 Subject: [PATCH 7/9] Feedback --- clang/include/clang/Serialization/ASTReader.h | 4 +- clang/lib/Basic/SourceManager.cpp | 5 +++ clang/lib/Serialization/ASTReader.cpp | 37 +++++++++---------- llvm/include/llvm/ADT/STLExtras.h | 3 +- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 315e1d6afa3a5..32fa6f773647f 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2157,8 +2157,8 @@ class ASTReader int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override; /// Try to read the offset of the SLocEntry at the given index in the given /// module file. - std::optional readSLocOffset(ModuleFile *F, - unsigned Index); + llvm::Expected readSLocOffset(ModuleFile *F, + unsigned Index); /// Retrieve the module import location and module name for the /// given source manager entry ID. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 298d4d605c18b..47ae37392ef0a 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -864,6 +864,11 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const { /// This function knows that the SourceLocation is in a loaded buffer, not a /// local one. FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const { + if (SLocOffset < CurrentLoadedOffset) { + assert(0 && "Invalid SLocOffset or bad function choice"); + return FileID(); + } + return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset)); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 573389af221b4..3947c37bb63e4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1444,39 +1444,35 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) { } } -std::optional +llvm::Expected ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) { BitstreamCursor &Cursor = F->SLocEntryCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase + - F->SLocEntryOffsets[Index])) { - Error(std::move(Err)); - return std::nullopt; - } + F->SLocEntryOffsets[Index])) + return Err; Expected MaybeEntry = Cursor.advance(); - if (!MaybeEntry) { - Error(MaybeEntry.takeError()); - return std::nullopt; - } - llvm::BitstreamEntry Entry = MaybeEntry.get(); + if (!MaybeEntry) + return MaybeEntry.takeError(); - if (Entry.Kind != llvm::BitstreamEntry::Record) { - Error("incorrectly-formatted source location entry in AST file"); - return std::nullopt; - } + llvm::BitstreamEntry Entry = MaybeEntry.get(); + if (Entry.Kind != llvm::BitstreamEntry::Record) + return llvm::createStringError( + std::errc::illegal_byte_sequence, + "incorrectly-formatted source location entry in AST file"); RecordData Record; StringRef Blob; Expected MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob); - if (!MaybeSLOC) { - Error(MaybeSLOC.takeError()); - return std::nullopt; - } + if (!MaybeSLOC) + return MaybeSLOC.takeError(); + switch (MaybeSLOC.get()) { default: - Error("incorrectly-formatted source location entry in AST file"); - return std::nullopt; + return llvm::createStringError( + std::errc::illegal_byte_sequence, + "incorrectly-formatted source location entry in AST file"); case SM_SLOC_FILE_ENTRY: case SM_SLOC_BUFFER_ENTRY: case SM_SLOC_EXPANSION_ENTRY: @@ -1499,6 +1495,7 @@ int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { if (F->SLocEntryOffsetLoaded[Index] == -1U) { auto MaybeEntryOffset = readSLocOffset(F, Index); if (!MaybeEntryOffset) { + Error(MaybeEntryOffset.takeError()); Invalid = true; return true; } diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 8384dedf6365a..c7d417324c94f 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -2263,8 +2263,7 @@ template struct enumerator_result { struct index_iterator : llvm::iterator_facade_base { + std::random_access_iterator_tag, std::size_t> { index_iterator(std::size_t Index) : Index(Index) {} index_iterator &operator+=(std::ptrdiff_t N) { From 46acf7029f9dba524b264e3688e93b2b3d7fd137 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 4 Oct 2023 14:35:47 -0700 Subject: [PATCH 8/9] Reuse `SourceManager::LoadedSLocEntryTable` to store partially deserialized SLocEntry offsets --- clang/include/clang/Basic/SourceManager.h | 14 ++++++++++++++ .../include/clang/Serialization/ModuleFile.h | 3 --- clang/lib/Basic/SourceManager.cpp | 6 ++++-- clang/lib/Serialization/ASTReader.cpp | 19 ++++++++++--------- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 71e3256ffa55f..794bfc5df2d2a 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -499,6 +499,14 @@ class SLocEntry { return Expansion; } + /// Creates an incomplete SLocEntry that is only able to report its offset. + static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) { + assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large"); + SLocEntry E; + E.Offset = Offset; + return E; + } + static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) { assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large"); SLocEntry E; @@ -729,6 +737,12 @@ class SourceManager : public RefCountedBase { /// Same indexing as LoadedSLocEntryTable. llvm::BitVector SLocEntryLoaded; + /// A bitmap that indicates whether the entries of LoadedSLocEntryTable + /// have already had their offset loaded from the external source. + /// + /// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded. + llvm::BitVector SLocEntryOffsetLoaded; + /// An external source for source location entries. ExternalSLocEntrySource *ExternalSLocEntries = nullptr; diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 8284d8410732d..7fdaf6c699a95 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -291,9 +291,6 @@ class ModuleFile { /// Stream bit offsets for all of the source location entries in the AST file. const uint32_t *SLocEntryOffsets = nullptr; - /// SLocEntry offsets that have been loaded from the AST file. - std::vector SLocEntryOffsetLoaded; - /// SLocEntries that we're going to preload. SmallVector PreloadSLocEntries; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 47ae37392ef0a..c2068940e7bd2 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -338,6 +338,7 @@ void SourceManager::clearIDTables() { LocalSLocEntryTable.clear(); LoadedSLocEntryTable.clear(); SLocEntryLoaded.clear(); + SLocEntryOffsetLoaded.clear(); LastLineNoFileIDQuery = FileID(); LastLineNoContentCache = nullptr; LastFileIDLookup = FileID(); @@ -460,6 +461,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries, } LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries); SLocEntryLoaded.resize(LoadedSLocEntryTable.size()); + SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size()); CurrentLoadedOffset -= TotalSize; int ID = LoadedSLocEntryTable.size(); return std::make_pair(-ID - 1, CurrentLoadedOffset); @@ -608,7 +610,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename, assert(!SLocEntryLoaded[Index] && "FileID already loaded"); LoadedSLocEntryTable[Index] = SLocEntry::get( LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename)); - SLocEntryLoaded[Index] = true; + SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true; return FileID::get(LoadedID); } unsigned FileSize = File.getSize(); @@ -668,7 +670,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info, assert(Index < LoadedSLocEntryTable.size() && "FileID out of range"); assert(!SLocEntryLoaded[Index] && "FileID already loaded"); LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info); - SLocEntryLoaded[Index] = true; + SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true; return SourceLocation::getMacroLoc(LoadedOffset); } LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info)); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 3947c37bb63e4..4507c2b746672 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1491,17 +1491,22 @@ int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) { auto It = llvm::upper_bound( llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset, - [&](SourceLocation::UIntTy Offset, std::size_t Index) { - if (F->SLocEntryOffsetLoaded[Index] == -1U) { - auto MaybeEntryOffset = readSLocOffset(F, Index); + [&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) { + int ID = F->SLocEntryBaseID + LocalIndex; + std::size_t Index = -ID - 2; + if (!SourceMgr.SLocEntryOffsetLoaded[Index]) { + assert(!SourceMgr.SLocEntryLoaded[Index]); + auto MaybeEntryOffset = readSLocOffset(F, LocalIndex); if (!MaybeEntryOffset) { Error(MaybeEntryOffset.takeError()); Invalid = true; return true; } - F->SLocEntryOffsetLoaded[Index] = *MaybeEntryOffset; + SourceMgr.LoadedSLocEntryTable[Index] = + SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset); + SourceMgr.SLocEntryOffsetLoaded[Index] = true; } - return Offset < F->SLocEntryOffsetLoaded[Index]; + return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset(); }); if (Invalid) @@ -1610,7 +1615,6 @@ bool ASTReader::ReadSLocEntry(int ID) { case SM_SLOC_FILE_ENTRY: { SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - F->SLocEntryOffsetLoaded[Index] = Offset; // We will detect whether a file changed and return 'Failure' for it, but // we will also try to fail gracefully by setting up the SLocEntry. @@ -1664,7 +1668,6 @@ bool ASTReader::ReadSLocEntry(int ID) { case SM_SLOC_BUFFER_ENTRY: { SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - F->SLocEntryOffsetLoaded[Index] = Offset; const char *Name = Blob.data(); SrcMgr::CharacteristicKind @@ -1689,7 +1692,6 @@ bool ASTReader::ReadSLocEntry(int ID) { case SM_SLOC_EXPANSION_ENTRY: { SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - F->SLocEntryOffsetLoaded[Index] = Offset; LocSeq::State Seq; SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); @@ -3618,7 +3620,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, return llvm::createStringError(std::errc::invalid_argument, "ran out of source locations"); } - F.SLocEntryOffsetLoaded.resize(F.LocalNumSLocEntries, -1U); // Make our entry in the range map. BaseID is negative and growing, so // we invert it. Because we invert it, though, we need the other end of // the range. From a7ff132938ba11b67c306b960c854cb88c176a08 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 4 Oct 2023 14:38:16 -0700 Subject: [PATCH 9/9] Remove benign diffs --- .../include/clang/Serialization/ModuleFile.h | 3 ++- clang/lib/Serialization/ASTReader.cpp | 20 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 7fdaf6c699a95..0af5cae6aebc3 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -288,7 +288,8 @@ class ModuleFile { /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. uint64_t SLocEntryOffsetsBase = 0; - /// Stream bit offsets for all of the source location entries in the AST file. + /// Offsets for all of the source location entries in the + /// AST file. const uint32_t *SLocEntryOffsets = nullptr; /// SLocEntries that we're going to preload. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 4507c2b746672..439cb8ac1a27e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1577,9 +1577,9 @@ bool ASTReader::ReadSLocEntry(int ID) { }; ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second; - unsigned Index = ID - F->SLocEntryBaseID; if (llvm::Error Err = F->SLocEntryCursor.JumpToBit( - F->SLocEntryOffsetsBase + F->SLocEntryOffsets[Index])) { + F->SLocEntryOffsetsBase + + F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) { Error(std::move(Err)); return true; } @@ -1614,8 +1614,6 @@ bool ASTReader::ReadSLocEntry(int ID) { return true; case SM_SLOC_FILE_ENTRY: { - SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - // We will detect whether a file changed and return 'Failure' for it, but // we will also try to fail gracefully by setting up the SLocEntry. unsigned InputID = Record[4]; @@ -1636,8 +1634,8 @@ bool ASTReader::ReadSLocEntry(int ID) { } SrcMgr::CharacteristicKind FileCharacter = (SrcMgr::CharacteristicKind)Record[2]; - FileID FID = - SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID, Offset); + FileID FID = SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID, + BaseOffset + Record[0]); SrcMgr::FileInfo &FileInfo = const_cast(SourceMgr.getSLocEntry(FID).getFile()); FileInfo.NumCreatedFIDs = Record[5]; @@ -1667,9 +1665,8 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_BUFFER_ENTRY: { - SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - const char *Name = Blob.data(); + unsigned Offset = Record[0]; SrcMgr::CharacteristicKind FileCharacter = (SrcMgr::CharacteristicKind)Record[2]; SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]); @@ -1681,7 +1678,7 @@ bool ASTReader::ReadSLocEntry(int ID) { if (!Buffer) return true; FileID FID = SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID, - Offset, IncludeLoc); + BaseOffset + Offset, IncludeLoc); if (Record[3]) { auto &FileInfo = const_cast(SourceMgr.getSLocEntry(FID).getFile()); @@ -1691,14 +1688,13 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_EXPANSION_ENTRY: { - SourceLocation::UIntTy Offset = BaseOffset + Record[0]; - LocSeq::State Seq; SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, - Record[5], Record[4], ID, Offset); + Record[5], Record[4], ID, + BaseOffset + Record[0]); break; } }