kbobyrev updated this revision to Diff 239545.
kbobyrev added a comment.

Attempt to drop collected reference before doing more computation to improve
performance.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72746/new/

https://reviews.llvm.org/D72746

Files:
  clang-tools-extra/clangd/index/Ref.h
  clang-tools-extra/clangd/index/SymbolCollector.cpp
  clang-tools-extra/clangd/index/SymbolCollector.h
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang-tools-extra/clangd/unittests/RenameTests.cpp
  clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp

Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -256,21 +256,27 @@
 
 llvm::ArrayRef<syntax::Token>
 syntax::spelledTokensTouching(SourceLocation Loc,
-                              const syntax::TokenBuffer &Tokens) {
+                              llvm::ArrayRef<syntax::Token> Tokens) {
   assert(Loc.isFileID());
-  llvm::ArrayRef<syntax::Token> All =
-      Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc));
   auto *Right = llvm::partition_point(
-      All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
-  bool AcceptRight = Right != All.end() && Right->location() <= Loc;
-  bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
+      Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
+  bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
+  bool AcceptLeft =
+      Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
   return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
                             Right + (AcceptRight ? 1 : 0));
 }
 
+llvm::ArrayRef<syntax::Token>
+syntax::spelledTokensTouching(SourceLocation Loc,
+                              const syntax::TokenBuffer &Tokens) {
+  return spelledTokensTouching(
+      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
 const syntax::Token *
 syntax::spelledIdentifierTouching(SourceLocation Loc,
-                                  const syntax::TokenBuffer &Tokens) {
+                                  llvm::ArrayRef<syntax::Token> Tokens) {
   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
     if (Tok.kind() == tok::identifier)
       return &Tok;
@@ -278,6 +284,13 @@
   return nullptr;
 }
 
+const syntax::Token *
+syntax::spelledIdentifierTouching(SourceLocation Loc,
+                                  const syntax::TokenBuffer &Tokens) {
+  return spelledIdentifierTouching(
+      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
 std::vector<const syntax::Token *>
 TokenBuffer::macroExpansions(FileID FID) const {
   auto FileIt = Files.find(FID);
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -313,11 +313,22 @@
   const SourceManager *SourceMgr;
 };
 
+/// The spelled tokens that overlap or touch a spelling location Loc.
+/// This always returns 0-2 tokens.
+llvm::ArrayRef<syntax::Token>
+spelledTokensTouching(SourceLocation Loc, llvm::ArrayRef<syntax::Token> Tokens);
+
 /// The spelled tokens that overlap or touch a spelling location Loc.
 /// This always returns 0-2 tokens.
 llvm::ArrayRef<syntax::Token>
 spelledTokensTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens);
 
+/// The identifier token that overlaps or touches a spelling location Loc.
+/// If there is none, returns nullptr.
+const syntax::Token *
+spelledIdentifierTouching(SourceLocation Loc,
+                          llvm::ArrayRef<syntax::Token> Tokens);
+
 /// The identifier token that overlaps or touches a spelling location Loc.
 /// If there is none, returns nullptr.
 const syntax::Token *
Index: clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -42,6 +42,7 @@
 using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::Pair;
+using ::testing::SizeIs;
 using ::testing::UnorderedElementsAre;
 using ::testing::UnorderedElementsAreArray;
 
@@ -700,6 +701,40 @@
   EXPECT_THAT(Refs, IsEmpty());
 }
 
+TEST_F(SymbolCollectorTest, SpelledReference) {
+  Annotations Header(R"cpp(
+  class Foo {};
+  #define MACRO Foo
+  )cpp");
+  Annotations Main(R"cpp(
+  $spelled[[Foo]] Variable1;
+  $macro[[MACRO]] Variable2;
+  )cpp");
+  CollectorOpts.RefFilter = RefKind::All;
+  CollectorOpts.DropImplicitReferences = true;
+  runSymbolCollector(Header.code(), Main.code());
+  EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID,
+                                  HaveRanges(Main.ranges("spelled")))));
+  EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Main.ranges("macro")))));
+  EXPECT_THAT(Refs, SizeIs(2));
+}
+
+TEST_F(SymbolCollectorTest, ImplicitReference) {
+  Annotations Header(R"cpp(
+  class Foo {};
+  #define MACRO Foo
+  )cpp");
+  Annotations Main(R"cpp(
+  $implicit[[MACRO]] Variable2;
+  )cpp");
+  CollectorOpts.RefFilter = RefKind::All;
+  runSymbolCollector(Header.code(), Main.code());
+  EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID,
+                                  HaveRanges(Main.ranges("implicit")))));
+  EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Main.ranges("implicit")))));
+  EXPECT_THAT(Refs, SizeIs(2));
+}
+
 TEST_F(SymbolCollectorTest, NameReferences) {
   CollectorOpts.RefFilter = RefKind::All;
   CollectorOpts.RefsInHeaders = true;
Index: clang-tools-extra/clangd/unittests/RenameTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -33,7 +33,7 @@
 // Convert a Range to a Ref.
 Ref refWithRange(const clangd::Range &Range, const std::string &URI) {
   Ref Result;
-  Result.Kind = RefKind::Reference;
+  Result.Kind = RefKind::Reference | RefKind::Spelled;
   Result.Location.Start.setLine(Range.start.line);
   Result.Location.Start.setColumn(Range.start.character);
   Result.Location.End.setLine(Range.end.line);
@@ -837,7 +837,7 @@
       {
           // variables.
           R"cpp(
-      static const int [[VA^R]] = 123;
+        static const int [[VA^R]] = 123;
       )cpp",
           R"cpp(
         #include "foo.h"
Index: clang-tools-extra/clangd/refactor/Rename.cpp
===================================================================
--- clang-tools-extra/clangd/refactor/Rename.cpp
+++ clang-tools-extra/clangd/refactor/Rename.cpp
@@ -296,6 +296,8 @@
   bool HasMore = Index.refs(RQuest, [&](const Ref &R) {
     if (AffectedFiles.size() > MaxLimitFiles)
       return;
+    if (!static_cast<unsigned>(R.Kind & RefKind::Spelled))
+      return;
     if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
       if (*RefFilePath != MainFile)
         AffectedFiles[*RefFilePath].push_back(toRange(R.Location));
Index: clang-tools-extra/clangd/index/SymbolCollector.h
===================================================================
--- clang-tools-extra/clangd/index/SymbolCollector.h
+++ clang-tools-extra/clangd/index/SymbolCollector.h
@@ -19,6 +19,7 @@
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Regex.h"
 #include <functional>
@@ -85,6 +86,9 @@
     /// If this is set, only collect symbols/references from a file if
     /// `FileFilter(SM, FID)` is true. If not set, all files are indexed.
     std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr;
+    /// If set to true, SymbolCollector will return only references that
+    /// explicitly spell out symbol's name.
+    bool DropImplicitReferences = false;
   };
 
   SymbolCollector(Options Opts);
Index: clang-tools-extra/clangd/index/SymbolCollector.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -28,6 +28,7 @@
 #include "clang/Index/IndexingAction.h"
 #include "clang/Index/USRGeneration.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -180,7 +181,16 @@
 }
 
 RefKind toRefKind(index::SymbolRoleSet Roles) {
-  return static_cast<RefKind>(static_cast<unsigned>(RefKind::All) & Roles);
+  RefKind Result = RefKind::Unknown;
+  if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
+    Result |= RefKind::Declaration;
+  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
+    Result |= RefKind::Definition;
+  if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
+    Result |= RefKind::Reference;
+  if (!(Roles & static_cast<unsigned>(index::SymbolRole::Implicit)))
+    Result |= RefKind::Spelled;
+  return Result;
 }
 
 bool shouldIndexRelation(const index::SymbolRelation &R) {
@@ -291,7 +301,7 @@
   // occurrence inside the base-specifier.
   processRelations(*ND, *ID, Relations);
 
-  bool CollectRef = static_cast<unsigned>(Opts.RefFilter) & Roles;
+  bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
   bool IsOnlyRef =
       !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
                  static_cast<unsigned>(index::SymbolRole::Definition)));
@@ -557,6 +567,9 @@
   auto CollectRef =
       [&](SymbolID ID,
           const std::pair<SourceLocation, index::SymbolRoleSet> &LocAndRole) {
+        if (Opts.DropImplicitReferences &&
+            !static_cast<bool>(toRefKind(LocAndRole.second) & RefKind::Spelled))
+          return;
         auto FileID = SM.getFileID(LocAndRole.first);
         // FIXME: use the result to filter out references.
         shouldIndexFile(FileID);
@@ -577,11 +590,30 @@
       CollectRef(IDAndRefs.first, LocAndRole);
   }
   // Populate Refs slab from DeclRefs.
-  if (auto MainFileURI = GetURI(SM.getMainFileID())) {
-    for (const auto &It : DeclRefs) {
-      if (auto ID = getSymbolID(It.first)) {
-        for (const auto &LocAndRole : It.second)
-          CollectRef(*ID, LocAndRole);
+  llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
+  for (auto &DeclAndRef : DeclRefs) {
+    if (auto ID = getSymbolID(DeclAndRef.first)) {
+      for (auto &LocAndRole : DeclAndRef.second) {
+        const auto FileID = SM.getFileID(LocAndRole.first);
+        // FIXME: It's better to use TokenBuffer by passing spelled tokens from
+        // the caller of SymbolCollector.
+        if (!FilesToTokensCache.count(FileID))
+          FilesToTokensCache[FileID] =
+              syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
+        llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
+        // Check if the referenced symbol is spelled exactly the same way the
+        // corresponding NamedDecl is. If it isn't, mark this reference as
+        // implicit. An example of implicit reference (implicit = !spelled)
+        // would be a macro expansion.
+        const auto *IdentifierToken =
+            spelledIdentifierTouching(LocAndRole.first, Tokens);
+        DeclarationName Name = DeclAndRef.first->getDeclName();
+        if (!IdentifierToken ||
+            (Name.isIdentifier() &&
+             Name.getAsString() != IdentifierToken->text(SM)))
+          LocAndRole.second |=
+              static_cast<uint32_t>(index::SymbolRole::Implicit);
+        CollectRef(*ID, LocAndRole);
       }
     }
   }
Index: clang-tools-extra/clangd/index/Ref.h
===================================================================
--- clang-tools-extra/clangd/index/Ref.h
+++ clang-tools-extra/clangd/index/Ref.h
@@ -27,10 +27,43 @@
 /// This is a bitfield which can be combined from different kinds.
 enum class RefKind : uint8_t {
   Unknown = 0,
-  Declaration = static_cast<uint8_t>(index::SymbolRole::Declaration),
-  Definition = static_cast<uint8_t>(index::SymbolRole::Definition),
-  Reference = static_cast<uint8_t>(index::SymbolRole::Reference),
-  All = Declaration | Definition | Reference,
+  // Points to symbol declaration. Example:
+  //
+  // class Foo {};
+  //       ^ Foo declaration
+  // Foo foo;
+  // ^ this does not reference Foo declaration
+  Declaration = 1 << 0,
+  // Points to symbol definition. Example:
+  //
+  // int foo();
+  //     ^ references foo declaration, but not foo definition
+  // int foo() { return 42; }
+  //     ^ references foo definition, but not declaration
+  // bool bar() { return true; }
+  //      ^ references both definition and declaration
+  Definition = 1 << 1,
+  // Points to symbol reference. Example:
+  //
+  // int Foo = 42;
+  // int Bar = Foo + 1;
+  //           ^ this is a reference to Foo
+  Reference = 1 << 2,
+  // The reference explicitly spells out declaration's name. Such references can
+  // not come from macro expansions or implicit AST nodes.
+  //
+  // class Foo {};
+  //       ^ references declaration, definition and explicitly spells out name
+  // #define MACRO Foo
+  //
+  // Foo foo;
+  // ^ this reference explicitly spells out Foo's name
+  // struct Bar {
+  //   MACRO Internal;
+  //   ^ this references Foo, but does not explicitly spell out its name
+  // };
+  Spelled = 1 << 3,
+  All = Declaration | Definition | Reference | Spelled,
 };
 
 inline RefKind operator|(RefKind L, RefKind R) {
@@ -72,6 +105,7 @@
   using value_type = std::pair<SymbolID, llvm::ArrayRef<Ref>>;
   using const_iterator = std::vector<value_type>::const_iterator;
   using iterator = const_iterator;
+  using size_type = size_t;
 
   RefSlab() = default;
   RefSlab(RefSlab &&Slab) = default;
@@ -80,11 +114,11 @@
   const_iterator begin() const { return Refs.begin(); }
   const_iterator end() const { return Refs.end(); }
   /// Gets the number of symbols.
-  size_t size() const { return Refs.size(); }
-  size_t numRefs() const { return NumRefs; }
+  size_type size() const { return Refs.size(); }
+  size_type numRefs() const { return NumRefs; }
   bool empty() const { return Refs.empty(); }
 
-  size_t bytes() const {
+  size_type bytes() const {
     return sizeof(*this) + Arena.getTotalMemory() +
            sizeof(value_type) * Refs.capacity();
   }
@@ -106,13 +140,13 @@
 
 private:
   RefSlab(std::vector<value_type> Refs, llvm::BumpPtrAllocator Arena,
-          size_t NumRefs)
+          size_type NumRefs)
       : Arena(std::move(Arena)), Refs(std::move(Refs)), NumRefs(NumRefs) {}
 
   llvm::BumpPtrAllocator Arena;
   std::vector<value_type> Refs;
   /// Number of all references.
-  size_t NumRefs = 0;
+  size_type NumRefs = 0;
 };
 
 } // namespace clangd
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to