kadircet updated this revision to Diff 421847.
kadircet added a comment.

- Address comments and more cleanups


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123289/new/

https://reviews.llvm.org/D123289

Files:
  clang-tools-extra/clangd/index/SymbolCollector.cpp
  clang-tools-extra/clangd/index/SymbolCollector.h
  clang-tools-extra/clangd/index/SymbolID.cpp
  clang-tools-extra/clangd/index/SymbolID.h
  clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp

Index: clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -1013,10 +1013,21 @@
       )cpp",
       "Foo::Foo" /// constructor.
     },
+    { // Unclean identifiers
+      R"cpp(
+        struct Foo {};
+      )cpp",
+      R"cpp(
+        $spelled[[Fo\
+o]] f{};
+      )cpp",
+      "Foo",
+    },
   };
   CollectorOpts.RefFilter = RefKind::All;
   CollectorOpts.RefsInHeaders = false;
   for (const auto& T : TestCases) {
+    SCOPED_TRACE(T.Header + "\n---\n" + T.Main);
     Annotations Header(T.Header);
     Annotations Main(T.Main);
     // Reset the file system.
@@ -1039,10 +1050,14 @@
     }
     const auto SpelledRefs = std::move(SpelledSlabBuilder).build(),
                ImplicitRefs = std::move(ImplicitSlabBuilder).build();
-    EXPECT_THAT(SpelledRefs,
-                Contains(Pair(TargetID, haveRanges(SpelledRanges))));
-    EXPECT_THAT(ImplicitRefs,
-                Contains(Pair(TargetID, haveRanges(ImplicitRanges))));
+    EXPECT_EQ(SpelledRanges.empty(), SpelledRefs.empty());
+    EXPECT_EQ(ImplicitRanges.empty(), ImplicitRefs.empty());
+    if (!SpelledRanges.empty())
+      EXPECT_THAT(SpelledRefs,
+                  Contains(Pair(TargetID, haveRanges(SpelledRanges))));
+    if (!ImplicitRanges.empty())
+      EXPECT_THAT(ImplicitRefs,
+                  Contains(Pair(TargetID, haveRanges(ImplicitRanges))));
   }
 }
 
Index: clang-tools-extra/clangd/index/SymbolID.h
===================================================================
--- clang-tools-extra/clangd/index/SymbolID.h
+++ clang-tools-extra/clangd/index/SymbolID.h
@@ -14,6 +14,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/raw_ostream.h"
 #include <array>
+#include <cstddef>
 #include <cstdint>
 #include <string>
 
@@ -36,9 +37,7 @@
   bool operator==(const SymbolID &Sym) const {
     return HashValue == Sym.HashValue;
   }
-  bool operator!=(const SymbolID &Sym) const {
-    return !(*this == Sym);
-  }
+  bool operator!=(const SymbolID &Sym) const { return !(*this == Sym); }
   bool operator<(const SymbolID &Sym) const {
     return HashValue < Sym.HashValue;
   }
@@ -60,7 +59,14 @@
   std::array<uint8_t, RawSize> HashValue{};
 };
 
-llvm::hash_code hash_value(const SymbolID &ID);
+inline llvm::hash_code hash_value(const SymbolID &ID) {
+  // We already have a good hash, just return the first bytes.
+  static_assert(sizeof(size_t) <= SymbolID::RawSize,
+                "size_t longer than SHA1!");
+  size_t Result;
+  memcpy(&Result, ID.raw().data(), sizeof(size_t));
+  return llvm::hash_code(Result);
+}
 
 // Write SymbolID into the given stream. SymbolID is encoded as ID.str().
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
Index: clang-tools-extra/clangd/index/SymbolID.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolID.cpp
+++ clang-tools-extra/clangd/index/SymbolID.cpp
@@ -20,8 +20,7 @@
 }
 
 llvm::StringRef SymbolID::raw() const {
-  return llvm::StringRef(reinterpret_cast<const char *>(HashValue.data()),
-                         RawSize);
+  return llvm::StringRef(reinterpret_cast<const char *>(&HashValue), RawSize);
 }
 
 SymbolID SymbolID::fromRaw(llvm::StringRef Raw) {
@@ -46,14 +45,5 @@
   return OS << llvm::toHex(ID.raw());
 }
 
-llvm::hash_code hash_value(const SymbolID &ID) {
-  // We already have a good hash, just return the first bytes.
-  static_assert(sizeof(size_t) <= SymbolID::RawSize,
-                "size_t longer than SHA1!");
-  size_t Result;
-  memcpy(&Result, ID.raw().data(), sizeof(size_t));
-  return llvm::hash_code(Result);
-}
-
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/SymbolCollector.h
===================================================================
--- clang-tools-extra/clangd/index/SymbolCollector.h
+++ clang-tools-extra/clangd/index/SymbolCollector.h
@@ -13,6 +13,7 @@
 #include "index/Ref.h"
 #include "index/Relation.h"
 #include "index/Symbol.h"
+#include "index/SymbolID.h"
 #include "index/SymbolOrigin.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -21,6 +22,8 @@
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include <functional>
 
@@ -142,6 +145,10 @@
 
   llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID);
 
+  SymbolID getSymbolIDCached(const Decl *D);
+  SymbolID getSymbolIDCached(const llvm::StringRef MacroName,
+                             const MacroInfo *MI, const SourceManager &SM);
+
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
   // File IDs for Symbol.IncludeHeaders.
@@ -164,14 +171,14 @@
   Options Opts;
   struct SymbolRef {
     SourceLocation Loc;
+    FileID FID;
     index::SymbolRoleSet Roles;
     const Decl *Container;
+    bool Spelled;
   };
+  void addRef(SymbolID ID, const SymbolRef &SR);
   // Symbols referenced from the current TU, flushed on finish().
-  llvm::DenseSet<const NamedDecl *> ReferencedDecls;
-  llvm::DenseSet<const IdentifierInfo *> ReferencedMacros;
-  llvm::DenseMap<const NamedDecl *, std::vector<SymbolRef>> DeclRefs;
-  llvm::DenseMap<SymbolID, std::vector<SymbolRef>> MacroRefs;
+  llvm::DenseSet<SymbolID> ReferencedSymbols;
   // Maps canonical declaration provided by clang to canonical declaration for
   // an index symbol, if clangd prefers a different declaration than that
   // provided by clang. For example, friend declaration might be considered
@@ -184,6 +191,8 @@
   // to insert for which symbol, etc.
   class HeaderFileURICache;
   std::unique_ptr<HeaderFileURICache> HeaderFileURIs;
+  llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache;
+  llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache;
 };
 
 } // namespace clangd
Index: clang-tools-extra/clangd/index/SymbolCollector.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -21,11 +21,14 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
@@ -171,6 +174,23 @@
   return Enclosing;
 }
 
+// Check if there is an exact spelling of \p ND at \p Loc. \p Tokens are the
+// spelled tokens in \p FID.
+bool isSpelled(SourceLocation Loc, FileID FID, const NamedDecl &ND) {
+  auto Name = ND.getDeclName();
+  const auto NameKind = Name.getNameKind();
+  if (NameKind != DeclarationName::Identifier &&
+      NameKind != DeclarationName::CXXConstructorName)
+    return false;
+  const auto &AST = ND.getASTContext();
+  const auto &SM = AST.getSourceManager();
+  const auto &LO = AST.getLangOpts();
+  clang::Token Tok;
+  if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))
+    return false;
+  auto StrName = Name.getAsString();
+  return clang::Lexer::getSpelling(Tok, SM, LO) == StrName;
+}
 } // namespace
 
 // Encapsulates decisions about how to record header paths in the index,
@@ -417,7 +437,8 @@
 llvm::Optional<SymbolLocation>
 SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
   const auto &SM = ASTCtx->getSourceManager();
-  auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
+  auto FID = SM.getFileID(TokLoc);
+  auto *FE = SM.getFileEntryForID(FID);
   if (!FE)
     return None;
 
@@ -544,17 +565,17 @@
   if (!ND)
     return true;
 
+  auto ID = getSymbolIDCached(ND);
+  if (!ID)
+    return true;
+
   // Mark D as referenced if this is a reference coming from the main file.
   // D may not be an interesting symbol, but it's cheaper to check at the end.
   auto &SM = ASTCtx->getSourceManager();
   if (Opts.CountReferences &&
       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
       SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
-    ReferencedDecls.insert(ND);
-
-  auto ID = getSymbolID(ND);
-  if (!ID)
-    return true;
+    ReferencedSymbols.insert(ID);
 
   // ND is the canonical (i.e. first) declaration. If it's in the main file
   // (which is not a header), then no public declaration was visible, so assume
@@ -575,13 +596,6 @@
   processRelations(*ND, ID, Relations);
 
   bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
-  bool IsOnlyRef =
-      !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
-                 static_cast<unsigned>(index::SymbolRole::Definition)));
-
-  if (IsOnlyRef && !CollectRef)
-    return true;
-
   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
   // file locations for references (as it aligns the behavior of clangd's
   // AST-based xref).
@@ -589,13 +603,18 @@
   if (CollectRef &&
       (!IsMainFileOnly || Opts.CollectMainFileRefs ||
        ND->isExternallyVisible()) &&
-      !isa<NamespaceDecl>(ND) &&
-      (Opts.RefsInHeaders ||
-       SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
-    DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles,
-                                     getRefContainer(ASTNode.Parent, Opts)});
+      !isa<NamespaceDecl>(ND)) {
+    auto FileLoc = SM.getFileLoc(Loc);
+    auto FID = SM.getFileID(FileLoc);
+    if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {
+      addRef(ID, SymbolRef{FileLoc, FID, Roles,
+                           getRefContainer(ASTNode.Parent, Opts),
+                           isSpelled(FileLoc, FID, *ND)});
+    }
+  }
   // Don't continue indexing if this is a mere reference.
-  if (IsOnlyRef)
+  if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
+                 static_cast<unsigned>(index::SymbolRole::Definition))))
     return true;
 
   // FIXME: ObjCPropertyDecl are not properly indexed here:
@@ -681,7 +700,7 @@
       Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
     return true;
 
-  auto ID = getSymbolID(Name->getName(), MI, SM);
+  auto ID = getSymbolIDCached(Name->getName(), MI, SM);
   if (!ID)
     return true;
 
@@ -692,9 +711,13 @@
                     ASTCtx->getLangOpts());
   // Do not store references to main-file macros.
   if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
-      (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
+      (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {
     // FIXME: Populate container information for macro references.
-    MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
+    // FIXME: All MacroRefs are marked as Spelled now, but this should be
+    // checked.
+    addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr,
+                         /*Spelled=*/true});
+  }
 
   // Collect symbols.
   if (!Opts.CollectMacro)
@@ -710,7 +733,7 @@
   if (Opts.CountReferences &&
       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
       SM.getFileID(SpellingLoc) == SM.getMainFileID())
-    ReferencedMacros.insert(Name);
+    ReferencedSymbols.insert(ID);
 
   // Don't continue indexing if this is a mere reference.
   // FIXME: remove macro with ID if it is undefined.
@@ -760,7 +783,7 @@
       continue;
     const Decl *Object = R.RelatedSymbol;
 
-    auto ObjectID = getSymbolID(Object);
+    auto ObjectID = getSymbolIDCached(Object);
     if (!ObjectID)
       continue;
 
@@ -791,16 +814,13 @@
 
 void SymbolCollector::finish() {
   // At the end of the TU, add 1 to the refcount of all referenced symbols.
-  auto IncRef = [this](const SymbolID &ID) {
+  for (const auto &ID : ReferencedSymbols) {
     if (const auto *S = Symbols.find(ID)) {
-      Symbol Inc = *S;
-      ++Inc.References;
-      Symbols.insert(Inc);
-    }
-  };
-  for (const NamedDecl *ND : ReferencedDecls) {
-    if (auto ID = getSymbolID(ND)) {
-      IncRef(ID);
+      // SymbolSlab::Builder returns const symbols because strings are interned
+      // and modifying returned symbols without inserting again wouldn't go
+      // well. const_cast is safe here as we're modifying a data owned by the
+      // Symbol. This reduces time spent in SymbolCollector by ~1%.
+      ++const_cast<Symbol *>(S)->References;
     }
   }
   if (Opts.CollectMacro) {
@@ -808,16 +828,11 @@
     // First, drop header guards. We can't identify these until EOF.
     for (const IdentifierInfo *II : IndexedMacros) {
       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
-        if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
+        if (auto ID =
+                getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))
           if (MI->isUsedForHeaderGuard())
             Symbols.erase(ID);
     }
-    // Now increment refcounts.
-    for (const IdentifierInfo *II : ReferencedMacros) {
-      if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
-        if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
-          IncRef(ID);
-    }
   }
   // Fill in IncludeHeaders.
   // We delay this until end of TU so header guards are all resolved.
@@ -851,58 +866,7 @@
     }
   }
 
-  const auto &SM = ASTCtx->getSourceManager();
-  auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
-                        bool Spelled = false) {
-    auto FileID = SM.getFileID(LocAndRole.Loc);
-    // FIXME: use the result to filter out references.
-    shouldIndexFile(FileID);
-    if (const auto *FE = SM.getFileEntryForID(FileID)) {
-      auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
-      Ref R;
-      R.Location.Start = Range.first;
-      R.Location.End = Range.second;
-      R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
-      R.Kind = toRefKind(LocAndRole.Roles, Spelled);
-      R.Container = getSymbolID(LocAndRole.Container);
-      Refs.insert(ID, R);
-    }
-  };
-  // Populate Refs slab from MacroRefs.
-  // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
-  for (const auto &IDAndRefs : MacroRefs)
-    for (const auto &LocAndRole : IDAndRefs.second)
-      CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
-  // Populate Refs slab from DeclRefs.
-  llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
-  for (auto &DeclAndRef : DeclRefs) {
-    if (auto ID = getSymbolID(DeclAndRef.first)) {
-      for (auto &LocAndRole : DeclAndRef.second) {
-        const auto FileID = SM.getFileID(LocAndRole.Loc);
-        // FIXME: It's better to use TokenBuffer by passing spelled tokens from
-        // the caller of SymbolCollector.
-        if (!FilesToTokensCache.count(FileID))
-          FilesToTokensCache[FileID] =
-              syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
-        llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
-        // Check if the referenced symbol is spelled exactly the same way the
-        // corresponding NamedDecl is. If it is, mark this reference as spelled.
-        const auto *IdentifierToken =
-            spelledIdentifierTouching(LocAndRole.Loc, Tokens);
-        DeclarationName Name = DeclAndRef.first->getDeclName();
-        const auto NameKind = Name.getNameKind();
-        bool IsTargetKind = NameKind == DeclarationName::Identifier ||
-                            NameKind == DeclarationName::CXXConstructorName;
-        bool Spelled = IdentifierToken && IsTargetKind &&
-                       Name.getAsString() == IdentifierToken->text(SM);
-        CollectRef(ID, LocAndRole, Spelled);
-      }
-    }
-  }
-
-  ReferencedDecls.clear();
-  ReferencedMacros.clear();
-  DeclRefs.clear();
+  ReferencedSymbols.clear();
   IncludeFiles.clear();
 }
 
@@ -982,16 +946,18 @@
                                     const Symbol &DeclSym) {
   if (DeclSym.Definition)
     return;
+  const auto &SM = ND.getASTContext().getSourceManager();
+  auto Loc = nameLocation(ND, SM);
+  shouldIndexFile(SM.getFileID(Loc));
+  auto DefLoc = getTokenLocation(Loc);
   // If we saw some forward declaration, we end up copying the symbol.
   // This is not ideal, but avoids duplicating the "is this a definition" check
   // in clang::index. We should only see one definition.
+  if (!DefLoc)
+    return;
   Symbol S = DeclSym;
-  const auto &SM = ND.getASTContext().getSourceManager();
-  auto Loc = nameLocation(ND, SM);
   // FIXME: use the result to filter out symbols.
-  shouldIndexFile(SM.getFileID(Loc));
-  if (auto DefLoc = getTokenLocation(Loc))
-    S.Definition = *DefLoc;
+  S.Definition = *DefLoc;
   Symbols.insert(S);
 }
 
@@ -1004,5 +970,36 @@
   return I.first->second;
 }
 
+void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {
+  const auto &SM = ASTCtx->getSourceManager();
+  // FIXME: use the result to filter out references.
+  shouldIndexFile(SR.FID);
+  if (const auto *FE = SM.getFileEntryForID(SR.FID)) {
+    auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());
+    Ref R;
+    R.Location.Start = Range.first;
+    R.Location.End = Range.second;
+    R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
+    R.Kind = toRefKind(SR.Roles, SR.Spelled);
+    R.Container = getSymbolIDCached(SR.Container);
+    Refs.insert(ID, R);
+  }
+}
+
+SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {
+  auto It = DeclToIDCache.try_emplace(D, SymbolID{});
+  if (It.second)
+    It.first->second = getSymbolID(D);
+  return It.first->second;
+}
+
+SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,
+                                            const MacroInfo *MI,
+                                            const SourceManager &SM) {
+  auto It = MacroToIDCache.try_emplace(MI, SymbolID{});
+  if (It.second)
+    It.first->second = getSymbolID(MacroName, MI, SM);
+  return It.first->second;
+}
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to