ioeric updated this revision to Diff 126775.
ioeric added a comment.

- Remove everything except for SymbolIndex interface and MemIndex
- Added unit tests for MemIndex


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D40548

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,118 @@
+//===-- IndexTests.cpp  -------------------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct CountedSymbolSlab {
+  CountedSymbolSlab() = delete;
+
+  explicit CountedSymbolSlab(int &Counter) : Counter(Counter) { ++Counter; }
+
+  ~CountedSymbolSlab() { Counter--; }
+
+  SymbolSlab Slab;
+  // A counter for the number of living slabs.
+  int &Counter;
+};
+
+class MemIndexTest : public ::testing::Test {
+protected:
+  std::vector<std::string>
+  match(std::shared_ptr<std::vector<const Symbol *>> Symbols,
+        const FuzzyFindRequest &Req) {
+    Index.build(std::move(Symbols));
+    std::vector<std::string> Matches;
+    Index.fuzzyFind(
+        Req, [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+    return Matches;
+  }
+
+  // Build a CountedSymbolSlab containing symbols with IDs and names [Begin,
+  // End]. The life time of the slab is managed by the returned shared_ptr,
+  // which points to a vector of pointers pointing to all symbols in the slab.
+  std::shared_ptr<std::vector<const Symbol *>>
+  generateNumSymbols(int Begin, int End) {
+    auto Slab = std::make_shared<CountedSymbolSlab>(SlabCounter);
+
+    for (int i = Begin; i <= End; i++)
+      Slab->Slab.insert(symbol(std::to_string(i)));
+    auto *Symbols = new std::vector<const Symbol *>();
+
+    for (const auto &Sym : Slab->Slab)
+      Symbols->push_back(&Sym.second);
+    return std::shared_ptr<std::vector<const Symbol *>>(std::move(Slab),
+                                                        Symbols);
+  }
+
+  int SlabCounter = 0;
+  MemIndex Index;
+};
+
+TEST_F(MemIndexTest, MemIndexSymbolsRecycled) {
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  auto Matches = match(generateNumSymbols(0, 10), Req);
+  EXPECT_THAT(Matches, UnorderedElementsAre("7"));
+
+  EXPECT_EQ(SlabCounter, 1);
+  // Release old symbols.
+  Index.build(std::make_shared<std::vector<const Symbol *>>());
+  EXPECT_EQ(SlabCounter, 0);
+}
+
+TEST_F(MemIndexTest, MemIndexMatchSubstring) {
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  auto Matches = match(generateNumSymbols(5, 25), Req);
+  EXPECT_THAT(Matches, UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST_F(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  auto Matches = match(std::move(Symbols), Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST_F(MemIndexTest, MemIndexLimitedNumMatches) {
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(generateNumSymbols(0, 100), Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
Index: clangd/index/MemIndex.h
===================================================================
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
+  /// accessible as long as `Symbols` is kept alive.
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  bool fuzzyFind(const FuzzyFindRequest &Req,
+                 std::function<void(const Symbol &)> Callback) const override;
+
+private:
+  std::shared_ptr<std::vector<const Symbol *>> Symbols;
+  // Index is a set of symbols that are deduplicated by symbol IDs.
+  // FIXME: build smarter index structure.
+  llvm::DenseMap<SymbolID, const Symbol *> Index;
+  mutable std::mutex Mutex;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
Index: clangd/index/MemIndex.cpp
===================================================================
--- /dev/null
+++ clangd/index/MemIndex.cpp
@@ -0,0 +1,51 @@
+//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+
+#include "MemIndex.h"
+
+namespace clang {
+namespace clangd {
+
+void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+
+  llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+  for (const Symbol *Sym : *Syms)
+    TempIndex[Sym->ID] = Sym;
+
+  // Swap out the old symbols and index.
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    Index = std::move(TempIndex);
+    Symbols = std::move(Syms); // Relase old symbols.
+  }
+}
+
+bool MemIndex::fuzzyFind(const FuzzyFindRequest &Req,
+                 std::function<void(const Symbol &)> Callback) const {
+  std::string LoweredQuery = llvm::StringRef(Req.Query).lower();
+  unsigned Matched = 0;
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (const auto Pair : Index) {
+      const Symbol *Sym = Pair.second;
+      // Find all symbols that contain the query, igoring cases.
+      // FIXME: use better matching algorithm, e.g. fuzzy matcher.
+      if (StringRef(StringRef(Sym->QualifiedName).lower())
+              .contains(LoweredQuery)) {
+        if (++Matched > Req.MaxCandidateCount)
+          return false;
+        Callback(*Sym);
+      }
+    }
+  }
+  return true;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -110,6 +110,33 @@
   llvm::DenseMap<SymbolID, Symbol> Symbols;
 };
 
+struct FuzzyFindRequest {
+  /// \brief A query string for the fuzzy find. This is matched against symbols'
+  /// qualfified names.
+  std::string Query;
+  /// \brief The maxinum number of candidates to return.
+  size_t MaxCandidateCount = UINT_MAX;
+};
+
+/// \brief Interface for symbol indexes that can be used for searching or
+/// matching symbols among a set of symbols based on names or unique IDs.
+class SymbolIndex {
+public:
+  virtual ~SymbolIndex() = default;
+
+  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
+  /// each matched symbol.
+  ///
+  /// Returns true if all candidates are matched.
+  virtual bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+            std::function<void(const Symbol &)> Callback) const = 0;
+
+  // FIXME: add interfaces for more index use cases:
+  //  - Symbol getSymbolInfo(llvm::StringRef USR);
+  //  - getAllOccurrences(llvm::StringRef USR);
+};
+
 } // namespace clangd
 } // namespace clang
 
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Index.h"
-
+#include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/Support/SHA1.h"
 
 namespace clang {
@@ -20,6 +20,7 @@
 }
 } // namespace
 
+
 SymbolID::SymbolID(llvm::StringRef USR)
     : HashValue(llvm::SHA1::hash(toArrayRef(USR))) {}
 
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -19,6 +19,7 @@
   Protocol.cpp
   ProtocolHandlers.cpp
   Trace.cpp
+  index/MemIndex.cpp
   index/Index.cpp
   index/SymbolCollector.cpp
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to