From ea7ab2797f221205c66b9a0eb0073417dbe8aaec Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 9 Aug 2023 10:02:51 -0700 Subject: [PATCH] [clang][modules][deps] Create more efficient API for visitation of `ModuleFile` inputs The current `ASTReader::visitInputFiles()` function calls into `FileManager` to create `FileEntryRef` objects. This ends up being fairly costly in `clang-scan-deps`, where we mostly only care about file paths. This patch introduces new `ASTReader` API that gives clients access to just the serialized paths. Since the scanner needs both the as-requested path and the on-disk one (and doesn't want to transform the former into the latter via `FileManager`), this patch starts serializing both of them into the PCM file if they differ. This increases the size of scanning PCMs by 0.1% and speeds up scanning by 5%. Reviewed By: benlangmuir, vsapsai Differential Revision: https://reviews.llvm.org/D157066 --- .../include/clang/Serialization/ASTBitCodes.h | 2 +- clang/include/clang/Serialization/ASTReader.h | 7 +++ .../include/clang/Serialization/ModuleFile.h | 4 +- clang/lib/Serialization/ASTReader.cpp | 45 +++++++++++++++---- clang/lib/Serialization/ASTWriter.cpp | 27 ++++++++--- .../DependencyScanning/ModuleDepCollector.cpp | 21 +++++---- 6 files changed, 82 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index db34cabd4f13e..ac989417c9958 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 27; +const unsigned VERSION_MAJOR = 28; /// AST file minor version number supported by this version of /// Clang. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index ec6273f3856cf..9fa03a7f25dd9 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2356,6 +2356,13 @@ class ASTReader /// Loads comments ranges. void ReadComments() override; + /// Visit all the input file infos of the given module file. + void visitInputFileInfos( + serialization::ModuleFile &MF, bool IncludeSystem, + llvm::function_ref + Visitor); + /// Visit all the input files of the given module file. void visitInputFiles(serialization::ModuleFile &MF, bool IncludeSystem, bool Complain, diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index c94b089bc7936..b1e334ceae871 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -61,13 +61,15 @@ enum ModuleKind { /// The input file info that has been loaded from an AST file. struct InputFileInfo { + std::string FilenameAsRequested; std::string Filename; uint64_t ContentHash; off_t StoredSize; time_t StoredTime; bool Overridden; bool Transient; - bool TopLevelModuleMap; + bool TopLevel; + bool ModuleMap; }; /// The input file that has been loaded from this AST file, along with diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2c5dd3e522d4f..1d9b01cc462b2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2305,9 +2305,22 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) { R.StoredTime = static_cast(Record[2]); R.Overridden = static_cast(Record[3]); R.Transient = static_cast(Record[4]); - R.TopLevelModuleMap = static_cast(Record[5]); - R.Filename = std::string(Blob); - ResolveImportedPath(F, R.Filename); + R.TopLevel = static_cast(Record[5]); + R.ModuleMap = static_cast(Record[6]); + std::tie(R.FilenameAsRequested, R.Filename) = [&]() { + uint16_t AsRequestedLength = Record[7]; + + std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str(); + std::string Name = Blob.substr(AsRequestedLength).str(); + + ResolveImportedPath(F, NameAsRequested); + ResolveImportedPath(F, Name); + + if (Name.empty()) + Name = NameAsRequested; + + return std::make_pair(std::move(NameAsRequested), std::move(Name)); + }(); Expected MaybeEntry = Cursor.advance(); if (!MaybeEntry) // FIXME this drops errors on the floor. @@ -2358,7 +2371,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { time_t StoredTime = FI.StoredTime; bool Overridden = FI.Overridden; bool Transient = FI.Transient; - StringRef Filename = FI.Filename; + StringRef Filename = FI.FilenameAsRequested; uint64_t StoredContentHash = FI.ContentHash; OptionalFileEntryRefDegradesToFileEntryPtr File = @@ -2697,9 +2710,9 @@ ASTReader::ReadControlBlock(ModuleFile &F, for (unsigned I = 0; I < N; ++I) { bool IsSystem = I >= NumUserInputs; InputFileInfo FI = getInputFileInfo(F, I + 1); - Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden, - F.Kind == MK_ExplicitModule || - F.Kind == MK_PrebuiltModule); + Listener->visitInputFile( + FI.FilenameAsRequested, IsSystem, FI.Overridden, + F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule); } } @@ -9248,6 +9261,22 @@ void ASTReader::ReadComments() { } } +void ASTReader::visitInputFileInfos( + serialization::ModuleFile &MF, bool IncludeSystem, + llvm::function_ref + Visitor) { + unsigned NumUserInputs = MF.NumUserInputFiles; + unsigned NumInputs = MF.InputFilesLoaded.size(); + assert(NumUserInputs <= NumInputs); + unsigned N = IncludeSystem ? NumInputs : NumUserInputs; + for (unsigned I = 0; I < N; ++I) { + bool IsSystem = I >= NumUserInputs; + InputFileInfo IFI = getInputFileInfo(MF, I+1); + Visitor(IFI, IsSystem); + } +} + void ASTReader::visitInputFiles(serialization::ModuleFile &MF, bool IncludeSystem, bool Complain, llvm::function_refAdd(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient + IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map - IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name + IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len + IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev)); // Create input file hash abbreviation. @@ -1613,8 +1616,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.IsSystemFile = isSystem(File.getFileCharacteristic()); Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; - Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) && - File.getIncludeLoc().isInvalid(); + Entry.IsTopLevel = File.getIncludeLoc().isInvalid(); + Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); auto ContentHash = hash_code(-1); if (PP->getHeaderSearchInfo() @@ -1662,6 +1665,15 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, // Emit size/modification time for this file. // And whether this file was overridden. { + SmallString<128> NameAsRequested = Entry.File.getNameAsRequested(); + SmallString<128> Name = Entry.File.getName(); + + PreparePathForOutput(NameAsRequested); + PreparePathForOutput(Name); + + if (Name == NameAsRequested) + Name.clear(); + RecordData::value_type Record[] = { INPUT_FILE, InputFileOffsets.size(), @@ -1669,9 +1681,12 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, (uint64_t)getTimestampForOutput(Entry.File), Entry.BufferOverridden, Entry.IsTransient, - Entry.IsTopLevelModuleMap}; + Entry.IsTopLevel, + Entry.IsModuleMap, + NameAsRequested.size()}; - EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getNameAsRequested()); + Stream.EmitRecordWithBlob(IFAbbrevCode, Record, + (NameAsRequested + Name).str()); } // Emit content hash for this file. diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index eff46eafa0d7b..77ab2898e2cc2 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -464,18 +464,19 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { serialization::ModuleFile *MF = MDC.ScanInstance.getASTReader()->getModuleManager().lookup( M->getASTFile()); - MDC.ScanInstance.getASTReader()->visitInputFiles( - *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { + MDC.ScanInstance.getASTReader()->visitInputFileInfos( + *MF, /*IncludeSystem=*/true, + [&](const serialization::InputFileInfo &IFI, bool IsSystem) { // __inferred_module.map is the result of the way in which an implicit // module build handles inferred modules. It adds an overlay VFS with // this file in the proper directory and relies on the rest of Clang to // handle it like normal. With explicitly built modules we don't need // to play VFS tricks, so replace it with the correct module map. - if (IF.getFile()->getName().endswith("__inferred_module.map")) { + if (StringRef(IFI.Filename).endswith("__inferred_module.map")) { MDC.addFileDep(MD, ModuleMap->getName()); return; } - MDC.addFileDep(MD, IF.getFile()->getName()); + MDC.addFileDep(MD, IFI.Filename); }); llvm::DenseSet SeenDeps; @@ -483,11 +484,15 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { addAllSubmoduleDeps(M, MD, SeenDeps); addAllAffectingModules(M, MD, SeenDeps); - MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( - *MF, [&](FileEntryRef FE) { - if (FE.getNameAsRequested().endswith("__inferred_module.map")) + MDC.ScanInstance.getASTReader()->visitInputFileInfos( + *MF, /*IncludeSystem=*/true, + [&](const serialization::InputFileInfo &IFI, bool IsSystem) { + if (!(IFI.TopLevel && IFI.ModuleMap)) return; - MD.ModuleMapFileDeps.emplace_back(FE.getNameAsRequested()); + if (StringRef(IFI.FilenameAsRequested) + .endswith("__inferred_module.map")) + return; + MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested); }); if (!MF->IncludeTreeID.empty())