On Wed, Jul 22, 2015 at 3:37 PM, Adam Nemet <ane...@apple.com> wrote:
> Hi Richard, > > Looks like this caused some ASan failures: > > http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/5677 > > Can you please take a look? > Thanks, should be fixed in r242960. > Thanks, > Adam > > On Jul 21, 2015, at 7:08 PM, Richard Smith <richard-l...@metafoo.co.uk> > wrote: > > Author: rsmith > Date: Tue Jul 21 21:08:40 2015 > New Revision: 242868 > > URL: http://llvm.org/viewvc/llvm-project?rev=242868&view=rev > Log: > [modules] Stop performing PCM lookups for all identifiers when building > with C++ modules. Instead, serialize a list of interesting identifiers and > mark those ones out of date on module import. Avoiding the identifier > lookups here gives a 20-30% speedup in builds with large numbers of > modules. No functionality change intended. > > Modified: > cfe/trunk/include/clang/Serialization/ASTBitCodes.h > cfe/trunk/include/clang/Serialization/Module.h > cfe/trunk/include/clang/Serialization/ModuleManager.h > cfe/trunk/lib/Sema/Sema.cpp > cfe/trunk/lib/Serialization/ASTReader.cpp > cfe/trunk/lib/Serialization/ASTWriter.cpp > cfe/trunk/lib/Serialization/ModuleManager.cpp > > Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=242868&r1=242867&r2=242868&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original) > +++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Tue Jul 21 > 21:08:40 2015 > @@ -543,7 +543,10 @@ namespace clang { > /// macro definition. > MACRO_OFFSET = 47, > > - // ID 48 used to be a table of macros. > + /// \brief A list of "interesting" identifiers. Only used in C++ > (where we > + /// don't normally do lookups into the serialized identifier > table). These > + /// are eagerly deserialized. > + INTERESTING_IDENTIFIERS = 48, > > /// \brief Record code for undefined but used functions and > variables that > /// need a definition in this TU. > > Modified: cfe/trunk/include/clang/Serialization/Module.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/Module.h?rev=242868&r1=242867&r2=242868&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/Serialization/Module.h (original) > +++ cfe/trunk/include/clang/Serialization/Module.h Tue Jul 21 21:08:40 2015 > @@ -270,6 +270,10 @@ public: > /// IdentifierHashTable. > void *IdentifierLookupTable; > > + /// \brief Offsets of identifiers that we're going to preload within > + /// IdentifierTableData. > + std::vector<unsigned> PreloadIdentifierOffsets; > + > // === Macros === > > /// \brief The cursor to the start of the preprocessor block, which > stores > > Modified: cfe/trunk/include/clang/Serialization/ModuleManager.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ModuleManager.h?rev=242868&r1=242867&r2=242868&view=diff > > > ============================================================================== > --- cfe/trunk/include/clang/Serialization/ModuleManager.h (original) > +++ cfe/trunk/include/clang/Serialization/ModuleManager.h Tue Jul 21 > 21:08:40 2015 > @@ -30,14 +30,19 @@ namespace serialization { > > /// \brief Manages the set of modules loaded by an AST reader. > class ModuleManager { > - /// \brief The chain of AST files. The first entry is the one named by > the > - /// user, the last one is the one that doesn't depend on anything > further. > + /// \brief The chain of AST files, in the order in which we started to > load > + /// them (this order isn't really useful for anything). > SmallVector<ModuleFile *, 2> Chain; > > + /// \brief The chain of non-module PCH files. The first entry is the > one named > + /// by the user, the last one is the one that doesn't depend on anything > + /// further. > + SmallVector<ModuleFile *, 2> PCHChain; > + > // \brief The roots of the dependency DAG of AST files. This is used > // to implement short-circuiting logic when running DFS over the > dependencies. > SmallVector<ModuleFile *, 2> Roots; > - > + > /// \brief All loaded modules, indexed by name. > llvm::DenseMap<const FileEntry *, ModuleFile *> Modules; > > @@ -138,6 +143,11 @@ public: > ModuleReverseIterator rbegin() { return Chain.rbegin(); } > /// \brief Reverse iterator end-point to traverse all loaded modules. > ModuleReverseIterator rend() { return Chain.rend(); } > + > + /// \brief A range covering the PCH and preamble module files loaded. > + llvm::iterator_range<ModuleConstIterator> pch_modules() const { > + return llvm::make_range(PCHChain.begin(), PCHChain.end()); > + } > > /// \brief Returns the primary module associated with the manager, that > is, > /// the first module loaded > > Modified: cfe/trunk/lib/Sema/Sema.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.cpp?rev=242868&r1=242867&r2=242868&view=diff > > ============================================================================== > --- cfe/trunk/lib/Sema/Sema.cpp (original) > +++ cfe/trunk/lib/Sema/Sema.cpp Tue Jul 21 21:08:40 2015 > @@ -154,6 +154,9 @@ void Sema::Initialize() { > // will not be able to merge any duplicate __va_list_tag decls correctly. > VAListTagName = PP.getIdentifierInfo("__va_list_tag"); > > + if (!TUScope) > + return; > + > // Initialize predefined 128-bit integer types, if needed. > if (Context.getTargetInfo().hasInt128Type()) { > // If either of the 128-bit integer types are unavailable to name > lookup, > > Modified: cfe/trunk/lib/Serialization/ASTReader.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=242868&r1=242867&r2=242868&view=diff > > > ============================================================================== > --- cfe/trunk/lib/Serialization/ASTReader.cpp (original) > +++ cfe/trunk/lib/Serialization/ASTReader.cpp Tue Jul 21 21:08:40 2015 > @@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, u > break; > } > > + case INTERESTING_IDENTIFIERS: > + F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end()); > + break; > + > case EAGERLY_DESERIALIZED_DECLS: > // FIXME: Skip reading this record if our ASTConsumer doesn't care > // about "interesting" decls (for instance, if we're building a > module). > @@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::Read > // SourceManager. > SourceMgr.getLoadedSLocEntryByID(Index); > } > + > + // Preload all the pending interesting identifiers by marking them > out of > + // date. > + for (auto Offset : F.PreloadIdentifierOffsets) { > + const unsigned char *Data = reinterpret_cast<const unsigned char *>( > + F.IdentifierTableData + Offset); > + > + ASTIdentifierLookupTrait Trait(*this, F); > + auto KeyDataLen = Trait.ReadKeyDataLength(Data); > + auto Key = Trait.ReadKey(Data, KeyDataLen.first); > + PP.getIdentifierTable().getOwn(Key).setOutOfDate(true); > + } > } > > // Setup the import locations and notify the module manager that we've > @@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::Read > M->ImportLoc.getRawEncoding()); > } > > - // Mark all of the identifiers in the identifier table as being out of > date, > - // so that various accessors know to check the loaded modules when the > - // identifier is used. > - for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), > - IdEnd = PP.getIdentifierTable().end(); > - Id != IdEnd; ++Id) > - Id->second->setOutOfDate(true); > + if (!Context.getLangOpts().CPlusPlus || > + (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) { > + // Mark all of the identifiers in the identifier table as being out > of date, > + // so that various accessors know to check the loaded modules when the > + // identifier is used. > + // > + // For C++ modules, we don't need information on many identifiers > (just > + // those that provide macros or are poisoned), so we mark all of > + // the interesting ones via PreloadIdentifierOffsets. > + for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), > + IdEnd = PP.getIdentifierTable().end(); > + Id != IdEnd; ++Id) > + Id->second->setOutOfDate(true); > + } > > // Resolve any unresolved module exports. > for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { > @@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef > // Note that we are loading an identifier. > Deserializing AnIdentifier(this); > > - // If there is a global index, look there first to determine which > modules > - // provably do not have any results for this identifier. > - GlobalModuleIndex::HitSet Hits; > - GlobalModuleIndex::HitSet *HitsPtr = nullptr; > - if (!loadGlobalIndex()) { > - if (GlobalIndex->lookupIdentifier(Name, Hits)) { > - HitsPtr = &Hits; > - } > - } > IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0, > NumIdentifierLookups, > NumIdentifierLookupHits); > - ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); > + > + // We don't need to do identifier table lookups in C++ modules (we > preload > + // all interesting declarations, and don't need to use the scope for > name > + // lookups). Perform the lookup in PCH files, though, since we don't > build > + // a complete initial identifier table if we're carrying on from a PCH. > + if (Context.getLangOpts().CPlusPlus) { > + for (auto F : ModuleMgr.pch_modules()) > + if (Visitor.visit(*F, &Visitor)) > + break; > + } else { > + // If there is a global index, look there first to determine which > modules > + // provably do not have any results for this identifier. > + GlobalModuleIndex::HitSet Hits; > + GlobalModuleIndex::HitSet *HitsPtr = nullptr; > + if (!loadGlobalIndex()) { > + if (GlobalIndex->lookupIdentifier(Name, Hits)) { > + HitsPtr = &Hits; > + } > + } > + > + ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr); > + } > + > IdentifierInfo *II = Visitor.getIdentifierInfo(); > markIdentifierUpToDate(II); > return II; > > Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=242868&r1=242867&r2=242868&view=diff > > > ============================================================================== > --- cfe/trunk/lib/Serialization/ASTWriter.cpp (original) > +++ cfe/trunk/lib/Serialization/ASTWriter.cpp Tue Jul 21 21:08:40 2015 > @@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait { > IdentifierResolver &IdResolver; > bool IsModule; > bool NeedDecls; > + ASTWriter::RecordData *InterestingIdentifierOffsets; > > /// \brief Determines whether this is an "interesting" identifier that > needs a > /// full IdentifierInfo structure written into the hash table. Notably, > this > @@ -3131,14 +3132,20 @@ public: > typedef unsigned offset_type; > > ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, > - IdentifierResolver &IdResolver, bool IsModule) > + IdentifierResolver &IdResolver, bool IsModule, > + ASTWriter::RecordData > *InterestingIdentifierOffsets) > : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule), > - NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {} > + NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus), > + InterestingIdentifierOffsets(InterestingIdentifierOffsets) {} > > static hash_value_type ComputeHash(const IdentifierInfo* II) { > return llvm::HashString(II->getName()); > } > > + bool isInterestingIdentifier(const IdentifierInfo *II) { > + auto MacroOffset = Writer.getMacroDirectivesOffset(II); > + return isInterestingIdentifier(II, MacroOffset); > + } > bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) { > return isInterestingIdentifier(II, 0); > } > @@ -3178,6 +3185,12 @@ public: > // Record the location of the key data. This is used when generating > // the mapping from persistent IDs to strings. > Writer.SetIdentifierOffset(II, Out.tell()); > + > + // Emit the offset of the key/data length information to the > interesting > + // identifiers table if necessary. > + if (InterestingIdentifierOffsets && isInterestingIdentifier(II)) > + InterestingIdentifierOffsets->push_back(Out.tell() - 4); > + > Out.write(II->getNameStart(), KeyLen); > } > > @@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Pre > bool IsModule) { > using namespace llvm; > > + RecordData InterestingIdents; > + > // Create and write out the blob that contains the identifier > // strings. > { > llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> > Generator; > - ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule); > + ASTIdentifierTableTrait Trait( > + *this, PP, IdResolver, IsModule, > + (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : > nullptr); > > // Look for any identifiers that were named while processing the > // headers, but are otherwise not needed. We add these to the hash > @@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Pre > Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS); > Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, > bytes(IdentifierOffsets)); > + > + // In C++, write the list of interesting identifiers (those that are > + // defined as macros, poisoned, or similar unusual things). > + if (!InterestingIdents.empty()) > + Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents); > } > > > //===----------------------------------------------------------------------===// > > Modified: cfe/trunk/lib/Serialization/ModuleManager.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ModuleManager.cpp?rev=242868&r1=242867&r2=242868&view=diff > > > ============================================================================== > --- cfe/trunk/lib/Serialization/ModuleManager.cpp (original) > +++ cfe/trunk/lib/Serialization/ModuleManager.cpp Tue Jul 21 21:08:40 2015 > @@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileN > New->File = Entry; > New->ImportLoc = ImportLoc; > Chain.push_back(New); > + if (!New->isModule()) > + PCHChain.push_back(New); > if (!ImportedBy) > Roots.push_back(New); > NewModule = true; > @@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileN > Modules.erase(Entry); > assert(Chain.back() == ModuleEntry); > Chain.pop_back(); > + if (!ModuleEntry->isModule()) > + PCHChain.pop_back(); > if (Roots.back() == ModuleEntry) > Roots.pop_back(); > else > @@ -225,6 +229,15 @@ void ModuleManager::removeModules( > > // Remove the modules from the chain. > Chain.erase(first, last); > + > + // Also remove them from the PCH chain. > + for (auto I = first; I != last; ++I) { > + if (!(*I)->isModule()) { > + PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I), > + PCHChain.end()); > + break; > + } > + } > } > > void > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > > >
_______________________________________________ cfe-commits mailing list cfe-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits