kadircet updated this revision to Diff 421847. kadircet added a comment. - Address comments and more cleanups
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D123289/new/ https://reviews.llvm.org/D123289 Files: clang-tools-extra/clangd/index/SymbolCollector.cpp clang-tools-extra/clangd/index/SymbolCollector.h clang-tools-extra/clangd/index/SymbolID.cpp clang-tools-extra/clangd/index/SymbolID.h clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
Index: clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1013,10 +1013,21 @@ )cpp", "Foo::Foo" /// constructor. }, + { // Unclean identifiers + R"cpp( + struct Foo {}; + )cpp", + R"cpp( + $spelled[[Fo\ +o]] f{}; + )cpp", + "Foo", + }, }; CollectorOpts.RefFilter = RefKind::All; CollectorOpts.RefsInHeaders = false; for (const auto& T : TestCases) { + SCOPED_TRACE(T.Header + "\n---\n" + T.Main); Annotations Header(T.Header); Annotations Main(T.Main); // Reset the file system. @@ -1039,10 +1050,14 @@ } const auto SpelledRefs = std::move(SpelledSlabBuilder).build(), ImplicitRefs = std::move(ImplicitSlabBuilder).build(); - EXPECT_THAT(SpelledRefs, - Contains(Pair(TargetID, haveRanges(SpelledRanges)))); - EXPECT_THAT(ImplicitRefs, - Contains(Pair(TargetID, haveRanges(ImplicitRanges)))); + EXPECT_EQ(SpelledRanges.empty(), SpelledRefs.empty()); + EXPECT_EQ(ImplicitRanges.empty(), ImplicitRefs.empty()); + if (!SpelledRanges.empty()) + EXPECT_THAT(SpelledRefs, + Contains(Pair(TargetID, haveRanges(SpelledRanges)))); + if (!ImplicitRanges.empty()) + EXPECT_THAT(ImplicitRefs, + Contains(Pair(TargetID, haveRanges(ImplicitRanges)))); } } Index: clang-tools-extra/clangd/index/SymbolID.h =================================================================== --- clang-tools-extra/clangd/index/SymbolID.h +++ clang-tools-extra/clangd/index/SymbolID.h @@ -14,6 +14,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include <array> +#include <cstddef> #include <cstdint> #include <string> @@ -36,9 +37,7 @@ bool operator==(const SymbolID &Sym) const { return HashValue == Sym.HashValue; } - bool operator!=(const SymbolID &Sym) const { - return !(*this == Sym); - } + bool operator!=(const SymbolID &Sym) const { return !(*this == Sym); } bool operator<(const SymbolID &Sym) const { return HashValue < Sym.HashValue; } @@ -60,7 +59,14 @@ std::array<uint8_t, RawSize> HashValue{}; }; -llvm::hash_code hash_value(const SymbolID &ID); +inline llvm::hash_code hash_value(const SymbolID &ID) { + // We already have a good hash, just return the first bytes. + static_assert(sizeof(size_t) <= SymbolID::RawSize, + "size_t longer than SHA1!"); + size_t Result; + memcpy(&Result, ID.raw().data(), sizeof(size_t)); + return llvm::hash_code(Result); +} // Write SymbolID into the given stream. SymbolID is encoded as ID.str(). llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID); Index: clang-tools-extra/clangd/index/SymbolID.cpp =================================================================== --- clang-tools-extra/clangd/index/SymbolID.cpp +++ clang-tools-extra/clangd/index/SymbolID.cpp @@ -20,8 +20,7 @@ } llvm::StringRef SymbolID::raw() const { - return llvm::StringRef(reinterpret_cast<const char *>(HashValue.data()), - RawSize); + return llvm::StringRef(reinterpret_cast<const char *>(&HashValue), RawSize); } SymbolID SymbolID::fromRaw(llvm::StringRef Raw) { @@ -46,14 +45,5 @@ return OS << llvm::toHex(ID.raw()); } -llvm::hash_code hash_value(const SymbolID &ID) { - // We already have a good hash, just return the first bytes. - static_assert(sizeof(size_t) <= SymbolID::RawSize, - "size_t longer than SHA1!"); - size_t Result; - memcpy(&Result, ID.raw().data(), sizeof(size_t)); - return llvm::hash_code(Result); -} - } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/index/SymbolCollector.h =================================================================== --- clang-tools-extra/clangd/index/SymbolCollector.h +++ clang-tools-extra/clangd/index/SymbolCollector.h @@ -13,6 +13,7 @@ #include "index/Ref.h" #include "index/Relation.h" #include "index/Symbol.h" +#include "index/SymbolID.h" #include "index/SymbolOrigin.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -21,6 +22,8 @@ #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include <functional> @@ -142,6 +145,10 @@ llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID); + SymbolID getSymbolIDCached(const Decl *D); + SymbolID getSymbolIDCached(const llvm::StringRef MacroName, + const MacroInfo *MI, const SourceManager &SM); + // All Symbols collected from the AST. SymbolSlab::Builder Symbols; // File IDs for Symbol.IncludeHeaders. @@ -164,14 +171,14 @@ Options Opts; struct SymbolRef { SourceLocation Loc; + FileID FID; index::SymbolRoleSet Roles; const Decl *Container; + bool Spelled; }; + void addRef(SymbolID ID, const SymbolRef &SR); // Symbols referenced from the current TU, flushed on finish(). - llvm::DenseSet<const NamedDecl *> ReferencedDecls; - llvm::DenseSet<const IdentifierInfo *> ReferencedMacros; - llvm::DenseMap<const NamedDecl *, std::vector<SymbolRef>> DeclRefs; - llvm::DenseMap<SymbolID, std::vector<SymbolRef>> MacroRefs; + llvm::DenseSet<SymbolID> ReferencedSymbols; // Maps canonical declaration provided by clang to canonical declaration for // an index symbol, if clangd prefers a different declaration than that // provided by clang. For example, friend declaration might be considered @@ -184,6 +191,8 @@ // to insert for which symbol, etc. class HeaderFileURICache; std::unique_ptr<HeaderFileURICache> HeaderFileURIs; + llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache; + llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache; }; } // namespace clangd Index: clang-tools-extra/clangd/index/SymbolCollector.cpp =================================================================== --- clang-tools-extra/clangd/index/SymbolCollector.cpp +++ clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -21,11 +21,14 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/DeclarationName.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -171,6 +174,23 @@ return Enclosing; } +// Check if there is an exact spelling of \p ND at \p Loc. \p Tokens are the +// spelled tokens in \p FID. +bool isSpelled(SourceLocation Loc, FileID FID, const NamedDecl &ND) { + auto Name = ND.getDeclName(); + const auto NameKind = Name.getNameKind(); + if (NameKind != DeclarationName::Identifier && + NameKind != DeclarationName::CXXConstructorName) + return false; + const auto &AST = ND.getASTContext(); + const auto &SM = AST.getSourceManager(); + const auto &LO = AST.getLangOpts(); + clang::Token Tok; + if (clang::Lexer::getRawToken(Loc, Tok, SM, LO)) + return false; + auto StrName = Name.getAsString(); + return clang::Lexer::getSpelling(Tok, SM, LO) == StrName; +} } // namespace // Encapsulates decisions about how to record header paths in the index, @@ -417,7 +437,8 @@ llvm::Optional<SymbolLocation> SymbolCollector::getTokenLocation(SourceLocation TokLoc) { const auto &SM = ASTCtx->getSourceManager(); - auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc)); + auto FID = SM.getFileID(TokLoc); + auto *FE = SM.getFileEntryForID(FID); if (!FE) return None; @@ -544,17 +565,17 @@ if (!ND) return true; + auto ID = getSymbolIDCached(ND); + if (!ID) + return true; + // Mark D as referenced if this is a reference coming from the main file. // D may not be an interesting symbol, but it's cheaper to check at the end. auto &SM = ASTCtx->getSourceManager(); if (Opts.CountReferences && (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) && SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID()) - ReferencedDecls.insert(ND); - - auto ID = getSymbolID(ND); - if (!ID) - return true; + ReferencedSymbols.insert(ID); // ND is the canonical (i.e. first) declaration. If it's in the main file // (which is not a header), then no public declaration was visible, so assume @@ -575,13 +596,6 @@ processRelations(*ND, ID, Relations); bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles)); - bool IsOnlyRef = - !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) | - static_cast<unsigned>(index::SymbolRole::Definition))); - - if (IsOnlyRef && !CollectRef) - return true; - // Unlike other fields, e.g. Symbols (which use spelling locations), we use // file locations for references (as it aligns the behavior of clangd's // AST-based xref). @@ -589,13 +603,18 @@ if (CollectRef && (!IsMainFileOnly || Opts.CollectMainFileRefs || ND->isExternallyVisible()) && - !isa<NamespaceDecl>(ND) && - (Opts.RefsInHeaders || - SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID())) - DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles, - getRefContainer(ASTNode.Parent, Opts)}); + !isa<NamespaceDecl>(ND)) { + auto FileLoc = SM.getFileLoc(Loc); + auto FID = SM.getFileID(FileLoc); + if (Opts.RefsInHeaders || FID == SM.getMainFileID()) { + addRef(ID, SymbolRef{FileLoc, FID, Roles, + getRefContainer(ASTNode.Parent, Opts), + isSpelled(FileLoc, FID, *ND)}); + } + } // Don't continue indexing if this is a mere reference. - if (IsOnlyRef) + if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) | + static_cast<unsigned>(index::SymbolRole::Definition)))) return true; // FIXME: ObjCPropertyDecl are not properly indexed here: @@ -681,7 +700,7 @@ Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM") return true; - auto ID = getSymbolID(Name->getName(), MI, SM); + auto ID = getSymbolIDCached(Name->getName(), MI, SM); if (!ID) return true; @@ -692,9 +711,13 @@ ASTCtx->getLangOpts()); // Do not store references to main-file macros. if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly && - (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) + (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) { // FIXME: Populate container information for macro references. - MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr}); + // FIXME: All MacroRefs are marked as Spelled now, but this should be + // checked. + addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr, + /*Spelled=*/true}); + } // Collect symbols. if (!Opts.CollectMacro) @@ -710,7 +733,7 @@ if (Opts.CountReferences && (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) && SM.getFileID(SpellingLoc) == SM.getMainFileID()) - ReferencedMacros.insert(Name); + ReferencedSymbols.insert(ID); // Don't continue indexing if this is a mere reference. // FIXME: remove macro with ID if it is undefined. @@ -760,7 +783,7 @@ continue; const Decl *Object = R.RelatedSymbol; - auto ObjectID = getSymbolID(Object); + auto ObjectID = getSymbolIDCached(Object); if (!ObjectID) continue; @@ -791,16 +814,13 @@ void SymbolCollector::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. - auto IncRef = [this](const SymbolID &ID) { + for (const auto &ID : ReferencedSymbols) { if (const auto *S = Symbols.find(ID)) { - Symbol Inc = *S; - ++Inc.References; - Symbols.insert(Inc); - } - }; - for (const NamedDecl *ND : ReferencedDecls) { - if (auto ID = getSymbolID(ND)) { - IncRef(ID); + // SymbolSlab::Builder returns const symbols because strings are interned + // and modifying returned symbols without inserting again wouldn't go + // well. const_cast is safe here as we're modifying a data owned by the + // Symbol. This reduces time spent in SymbolCollector by ~1%. + ++const_cast<Symbol *>(S)->References; } } if (Opts.CollectMacro) { @@ -808,16 +828,11 @@ // First, drop header guards. We can't identify these until EOF. for (const IdentifierInfo *II : IndexedMacros) { if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) - if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager())) + if (auto ID = + getSymbolIDCached(II->getName(), MI, PP->getSourceManager())) if (MI->isUsedForHeaderGuard()) Symbols.erase(ID); } - // Now increment refcounts. - for (const IdentifierInfo *II : ReferencedMacros) { - if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) - if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager())) - IncRef(ID); - } } // Fill in IncludeHeaders. // We delay this until end of TU so header guards are all resolved. @@ -851,58 +866,7 @@ } } - const auto &SM = ASTCtx->getSourceManager(); - auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole, - bool Spelled = false) { - auto FileID = SM.getFileID(LocAndRole.Loc); - // FIXME: use the result to filter out references. - shouldIndexFile(FileID); - if (const auto *FE = SM.getFileEntryForID(FileID)) { - auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts()); - Ref R; - R.Location.Start = Range.first; - R.Location.End = Range.second; - R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str(); - R.Kind = toRefKind(LocAndRole.Roles, Spelled); - R.Container = getSymbolID(LocAndRole.Container); - Refs.insert(ID, R); - } - }; - // Populate Refs slab from MacroRefs. - // FIXME: All MacroRefs are marked as Spelled now, but this should be checked. - for (const auto &IDAndRefs : MacroRefs) - for (const auto &LocAndRole : IDAndRefs.second) - CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true); - // Populate Refs slab from DeclRefs. - llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache; - for (auto &DeclAndRef : DeclRefs) { - if (auto ID = getSymbolID(DeclAndRef.first)) { - for (auto &LocAndRole : DeclAndRef.second) { - const auto FileID = SM.getFileID(LocAndRole.Loc); - // FIXME: It's better to use TokenBuffer by passing spelled tokens from - // the caller of SymbolCollector. - if (!FilesToTokensCache.count(FileID)) - FilesToTokensCache[FileID] = - syntax::tokenize(FileID, SM, ASTCtx->getLangOpts()); - llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID]; - // Check if the referenced symbol is spelled exactly the same way the - // corresponding NamedDecl is. If it is, mark this reference as spelled. - const auto *IdentifierToken = - spelledIdentifierTouching(LocAndRole.Loc, Tokens); - DeclarationName Name = DeclAndRef.first->getDeclName(); - const auto NameKind = Name.getNameKind(); - bool IsTargetKind = NameKind == DeclarationName::Identifier || - NameKind == DeclarationName::CXXConstructorName; - bool Spelled = IdentifierToken && IsTargetKind && - Name.getAsString() == IdentifierToken->text(SM); - CollectRef(ID, LocAndRole, Spelled); - } - } - } - - ReferencedDecls.clear(); - ReferencedMacros.clear(); - DeclRefs.clear(); + ReferencedSymbols.clear(); IncludeFiles.clear(); } @@ -982,16 +946,18 @@ const Symbol &DeclSym) { if (DeclSym.Definition) return; + const auto &SM = ND.getASTContext().getSourceManager(); + auto Loc = nameLocation(ND, SM); + shouldIndexFile(SM.getFileID(Loc)); + auto DefLoc = getTokenLocation(Loc); // If we saw some forward declaration, we end up copying the symbol. // This is not ideal, but avoids duplicating the "is this a definition" check // in clang::index. We should only see one definition. + if (!DefLoc) + return; Symbol S = DeclSym; - const auto &SM = ND.getASTContext().getSourceManager(); - auto Loc = nameLocation(ND, SM); // FIXME: use the result to filter out symbols. - shouldIndexFile(SM.getFileID(Loc)); - if (auto DefLoc = getTokenLocation(Loc)) - S.Definition = *DefLoc; + S.Definition = *DefLoc; Symbols.insert(S); } @@ -1004,5 +970,36 @@ return I.first->second; } +void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { + const auto &SM = ASTCtx->getSourceManager(); + // FIXME: use the result to filter out references. + shouldIndexFile(SR.FID); + if (const auto *FE = SM.getFileEntryForID(SR.FID)) { + auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts()); + Ref R; + R.Location.Start = Range.first; + R.Location.End = Range.second; + R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str(); + R.Kind = toRefKind(SR.Roles, SR.Spelled); + R.Container = getSymbolIDCached(SR.Container); + Refs.insert(ID, R); + } +} + +SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) { + auto It = DeclToIDCache.try_emplace(D, SymbolID{}); + if (It.second) + It.first->second = getSymbolID(D); + return It.first->second; +} + +SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName, + const MacroInfo *MI, + const SourceManager &SM) { + auto It = MacroToIDCache.try_emplace(MI, SymbolID{}); + if (It.second) + It.first->second = getSymbolID(MacroName, MI, SM); + return It.first->second; +} } // namespace clangd } // namespace clang
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits