jansvoboda11 created this revision. jansvoboda11 added reviewers: Bigcheese, benlangmuir. Herald added a subscriber: ributzka. Herald added a project: All. jansvoboda11 requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
In `clang-scan-deps` contexts, the number of interesting identifiers in PCM files is fairly low (only macros), while the number of identifiers in the importing instance is high (builtins). Marking the whole identifier table out-of-date triggers lots of benign and expensive calls to `ASTReader::updateOutOfDateIdentifiers()`. (That unfortunately happens even for unused identifiers due to `SemaRef.IdResolver.begin(II)` line in `ASTWriter::WriteASTCore()`.) This patch makes the main code path more similar to C++ modules, where the PCM files have `INTERESTING_IDENTIFIERS` section which lists identifiers that get created in the identifier table of the importing instance and marked as out-of-date. The only difference is that the main code path doesn't *create* identifiers in the table and relies on the importing instance calling `ASTReader::get()` when creating new identifier on-demand. It only marks existing identifiers as out-of-date. This speeds up `clang-scan-deps` by 5-10%. Impact on explicitly built modules TBD. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D151277 Files: clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp
Index: clang/lib/Serialization/ASTWriter.cpp =================================================================== --- clang/lib/Serialization/ASTWriter.cpp +++ clang/lib/Serialization/ASTWriter.cpp @@ -3544,14 +3544,16 @@ // the mapping from persistent IDs to strings. Writer.SetIdentifierOffset(II, Out.tell()); + auto MacroOffset = Writer.getMacroDirectivesOffset(II); + // Emit the offset of the key/data length information to the interesting // identifiers table if necessary. - if (InterestingIdentifierOffsets && isInterestingIdentifier(II)) + if (InterestingIdentifierOffsets && + isInterestingIdentifier(II, MacroOffset)) InterestingIdentifierOffsets->push_back(Out.tell()); unsigned KeyLen = II->getLength() + 1; unsigned DataLen = 4; // 4 bytes for the persistent ID << 1 - auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (isInterestingIdentifier(II, MacroOffset)) { DataLen += 2; // 2 bytes for builtin ID DataLen += 2; // 2 bytes for flags @@ -3630,9 +3632,8 @@ // strings. { llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator; - ASTIdentifierTableTrait Trait( - *this, PP, IdResolver, IsModule, - (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr); + ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule, + IsModule ? &InterestingIdents : nullptr); // Look for any identifiers that were named while processing the // headers, but are otherwise not needed. We add these to the hash Index: clang/lib/Serialization/ASTReader.cpp =================================================================== --- clang/lib/Serialization/ASTReader.cpp +++ clang/lib/Serialization/ASTReader.cpp @@ -4385,27 +4385,56 @@ if (F.OriginalSourceFileID.isValid()) F.OriginalSourceFileID = TranslateFileID(F, F.OriginalSourceFileID); - // Preload all the pending interesting identifiers by marking them out of - // date. for (auto Offset : F.PreloadIdentifierOffsets) { const unsigned char *Data = F.IdentifierTableData + Offset; ASTIdentifierLookupTrait Trait(*this, F); auto KeyDataLen = Trait.ReadKeyDataLength(Data); auto Key = Trait.ReadKey(Data, KeyDataLen.first); - auto &II = PP.getIdentifierTable().getOwn(Key); - II.setOutOfDate(true); + + IdentifierInfo *II; + if (!PP.getLangOpts().CPlusPlus) { + // Identifiers present in both the module file and the importing + // instance are marked out-of-date so that they can be deserialized + // on next use via ASTReader::updateOutOfDateIdentifier(). + // Identifiers present in the module file but not in the importing + // instance are ignored for now, preventing growth of the identifier + // table. They will be deserialized on first use via ASTReader::get(). + auto It = PP.getIdentifierTable().find(Key); + if (It == PP.getIdentifierTable().end()) + continue; + II = It->second; + } else { + // With C++ modules, not many identifiers are considered interesting. + // All identifiers in the module file can be placed into the identifier + // table of the importing instance and marked as out-of-date. This makes + // ASTReader::get() a no-op, and deserialization will take place on + // first/next use via ASTReader::updateOutOfDateIdentifier(). + II = &PP.getIdentifierTable().get(Key); + } + + II->setOutOfDate(true); // Mark this identifier as being from an AST file so that we can track // whether we need to serialize it. - markIdentifierFromAST(*this, II); + markIdentifierFromAST(*this, *II); // Associate the ID with the identifier so that the writer can reuse it. auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first); - SetIdentifierInfo(ID, &II); + SetIdentifierInfo(ID, II); } } + // Builtins and library builtins have already been initialized. Mark all + // identifiers as out-of-date, so that they are deserialized on first use. + if (Type == MK_PCH || Type == MK_Preamble || Type == MK_MainFile) + for (auto &Id : PP.getIdentifierTable()) + Id.second->setOutOfDate(true); + + // Mark selectors as out of date. + for (auto Sel : SelectorGeneration) + SelectorOutOfDate[Sel.first] = true; + // Setup the import locations and notify the module manager that we've // committed to these module files. for (ImportedModule &M : Loaded) { @@ -4423,25 +4452,6 @@ F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc); } - if (!PP.getLangOpts().CPlusPlus || - (Type != MK_ImplicitModule && Type != MK_ExplicitModule && - Type != MK_PrebuiltModule)) { - // Mark all of the identifiers in the identifier table as being out of date, - // so that various accessors know to check the loaded modules when the - // identifier is used. - // - // For C++ modules, we don't need information on many identifiers (just - // those that provide macros or are poisoned), so we mark all of - // the interesting ones via PreloadIdentifierOffsets. - for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), - IdEnd = PP.getIdentifierTable().end(); - Id != IdEnd; ++Id) - Id->second->setOutOfDate(true); - } - // Mark selectors as out of date. - for (const auto &Sel : SelectorGeneration) - SelectorOutOfDate[Sel.first] = true; - // Resolve any unresolved module exports. for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits