kbobyrev updated this revision to Diff 161273.
kbobyrev marked 3 inline comments as done.
kbobyrev added a comment.
Address all the comment, except the one about True iterators.
https://reviews.llvm.org/D50337
Files:
clang-tools-extra/clangd/CMakeLists.txt
clang-tools-extra/clangd/index/dex/DexIndex.cpp
clang-tools-extra/clangd/index/dex/DexIndex.h
clang-tools-extra/clangd/index/dex/Token.h
clang-tools-extra/unittests/clangd/CMakeLists.txt
clang-tools-extra/unittests/clangd/DexIndexTests.cpp
clang-tools-extra/unittests/clangd/IndexTests.cpp
clang-tools-extra/unittests/clangd/TestIndex.cpp
clang-tools-extra/unittests/clangd/TestIndex.h
Index: clang-tools-extra/unittests/clangd/TestIndex.h
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/TestIndex.h
@@ -0,0 +1,57 @@
+//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName);
+
+struct SlabAndPointers {
+ SymbolSlab Slab;
+ std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+ std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+ std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+std::string getQualifiedName(const Symbol &Sym);
+
+std::vector<std::string> match(const SymbolIndex &I,
+ const FuzzyFindRequest &Req,
+ bool *Incomplete = nullptr);
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+ llvm::ArrayRef<SymbolID> IDs);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/unittests/clangd/TestIndex.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/TestIndex.cpp
@@ -0,0 +1,89 @@
+//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestIndex.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName) {
+ Symbol Sym;
+ Sym.ID = SymbolID(QName.str());
+ size_t Pos = QName.rfind("::");
+ if (Pos == llvm::StringRef::npos) {
+ Sym.Name = QName;
+ Sym.Scope = "";
+ } else {
+ Sym.Name = QName.substr(Pos + 2);
+ Sym.Scope = QName.substr(0, Pos + 2);
+ }
+ return Sym;
+}
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+ std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+ SymbolSlab::Builder Slab;
+ for (llvm::StringRef QName : QualifiedNames)
+ Slab.insert(symbol(QName));
+
+ auto Storage = std::make_shared<SlabAndPointers>();
+ Storage->Slab = std::move(Slab).build();
+ for (const auto &Sym : Storage->Slab)
+ Storage->Pointers.push_back(&Sym);
+ if (WeakSymbols)
+ *WeakSymbols = Storage;
+ auto *Pointers = &Storage->Pointers;
+ return {std::move(Storage), Pointers};
+}
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+ std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+ std::vector<std::string> Names;
+ for (int i = Begin; i <= End; i++)
+ Names.push_back(std::to_string(i));
+ return generateSymbols(Names, WeakSymbols);
+}
+
+std::string getQualifiedName(const Symbol &Sym) {
+ return (Sym.Scope + Sym.Name).str();
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+ const FuzzyFindRequest &Req, bool *Incomplete) {
+ std::vector<std::string> Matches;
+ bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+ Matches.push_back(clang::clangd::getQualifiedName(Sym));
+ });
+ if (Incomplete)
+ *Incomplete = IsIncomplete;
+ return Matches;
+}
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+ llvm::ArrayRef<SymbolID> IDs) {
+ LookupRequest Req;
+ Req.IDs.insert(IDs.begin(), IDs.end());
+ std::vector<std::string> Results;
+ I.lookup(Req, [&](const Symbol &Sym) {
+ Results.push_back(getQualifiedName(Sym));
+ });
+ return Results;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/unittests/clangd/IndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/IndexTests.cpp
+++ clang-tools-extra/unittests/clangd/IndexTests.cpp
@@ -7,33 +7,20 @@
//
//===----------------------------------------------------------------------===//
+#include "TestIndex.h"
#include "index/Index.h"
#include "index/MemIndex.h"
#include "index/Merge.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-using testing::UnorderedElementsAre;
using testing::Pointee;
+using testing::UnorderedElementsAre;
namespace clang {
namespace clangd {
namespace {
-Symbol symbol(llvm::StringRef QName) {
- Symbol Sym;
- Sym.ID = SymbolID(QName.str());
- size_t Pos = QName.rfind("::");
- if (Pos == llvm::StringRef::npos) {
- Sym.Name = QName;
- Sym.Scope = "";
- } else {
- Sym.Name = QName.substr(Pos + 2);
- Sym.Scope = QName.substr(0, Pos + 2);
- }
- return Sym;
-}
-
MATCHER_P(Named, N, "") { return arg.Name == N; }
TEST(SymbolSlab, FindAndIterate) {
@@ -52,59 +39,6 @@
EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
}
-struct SlabAndPointers {
- SymbolSlab Slab;
- std::vector<const Symbol *> Pointers;
-};
-
-// Create a slab of symbols with the given qualified names as both IDs and
-// names. The life time of the slab is managed by the returned shared pointer.
-// If \p WeakSymbols is provided, it will be pointed to the managed object in
-// the returned shared pointer.
-std::shared_ptr<std::vector<const Symbol *>>
-generateSymbols(std::vector<std::string> QualifiedNames,
- std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
- SymbolSlab::Builder Slab;
- for (llvm::StringRef QName : QualifiedNames)
- Slab.insert(symbol(QName));
-
- auto Storage = std::make_shared<SlabAndPointers>();
- Storage->Slab = std::move(Slab).build();
- for (const auto &Sym : Storage->Slab)
- Storage->Pointers.push_back(&Sym);
- if (WeakSymbols)
- *WeakSymbols = Storage;
- auto *Pointers = &Storage->Pointers;
- return {std::move(Storage), Pointers};
-}
-
-// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
-// to the `generateSymbols` above.
-std::shared_ptr<std::vector<const Symbol *>>
-generateNumSymbols(int Begin, int End,
- std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
- std::vector<std::string> Names;
- for (int i = Begin; i <= End; i++)
- Names.push_back(std::to_string(i));
- return generateSymbols(Names, WeakSymbols);
-}
-
-std::string getQualifiedName(const Symbol &Sym) {
- return (Sym.Scope + Sym.Name).str();
-}
-
-std::vector<std::string> match(const SymbolIndex &I,
- const FuzzyFindRequest &Req,
- bool *Incomplete = nullptr) {
- std::vector<std::string> Matches;
- bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
- Matches.push_back(getQualifiedName(Sym));
- });
- if (Incomplete)
- *Incomplete = IsIncomplete;
- return Matches;
-}
-
TEST(MemIndexTest, MemIndexSymbolsRecycled) {
MemIndex I;
std::weak_ptr<SlabAndPointers> Symbols;
@@ -212,18 +146,6 @@
EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
}
-// Returns qualified names of symbols with any of IDs in the index.
-std::vector<std::string> lookup(const SymbolIndex &I,
- llvm::ArrayRef<SymbolID> IDs) {
- LookupRequest Req;
- Req.IDs.insert(IDs.begin(), IDs.end());
- std::vector<std::string> Results;
- I.lookup(Req, [&](const Symbol &Sym) {
- Results.push_back(getQualifiedName(Sym));
- });
- return Results;
-}
-
TEST(MemIndexTest, Lookup) {
MemIndex I;
I.build(generateSymbols({"ns::abc", "ns::xyz"}));
@@ -269,7 +191,7 @@
TEST(MergeTest, Merge) {
Symbol L, R;
L.ID = R.ID = SymbolID("hello");
- L.Name = R.Name = "Foo"; // same in both
+ L.Name = R.Name = "Foo"; // same in both
L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs
R.CanonicalDeclaration.FileURI = "file:///right.h";
L.References = 1;
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,10 @@
//
//===----------------------------------------------------------------------===//
+#include "TestIndex.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
#include "index/dex/Iterator.h"
#include "index/dex/Token.h"
#include "index/dex/Trigram.h"
@@ -17,11 +21,13 @@
#include <string>
#include <vector>
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+
namespace clang {
namespace clangd {
namespace dex {
-
-using ::testing::ElementsAre;
+namespace {
TEST(DexIndexIterators, DocumentIterator) {
const PostingList L = {4, 7, 8, 20, 42, 100};
@@ -346,6 +352,175 @@
"hij", "ijk", "jkl", "klm"}));
}
+TEST(DexIndex, Lookup) {
+ DexIndex I;
+ I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+ EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+ EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::abc", "ns::xyz"));
+ EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::xyz"));
+ EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+TEST(DexIndex, FuzzyFind) {
+ DexIndex Index;
+ Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
+ "other::ABC", "other::A"}));
+ FuzzyFindRequest Req;
+ Req.Query = "ABC";
+ Req.Scopes = {"ns::"};
+ EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
+ Req.Scopes = {"ns::", "ns::nested::"};
+ EXPECT_THAT(match(Index, Req),
+ UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+ Req.Query = "A";
+ Req.Scopes = {"other::"};
+ EXPECT_THAT(match(Index, Req),
+ UnorderedElementsAre("other::A", "other::ABC"));
+ Req.Query = "";
+ Req.Scopes = {};
+ EXPECT_THAT(match(Index, Req),
+ UnorderedElementsAre("ns::ABC", "ns::BCD", "::ABC",
+ "ns::nested::ABC", "other::ABC",
+ "other::A"));
+}
+
+TEST(DexIndexTest, FuzzyMatchQ) {
+ DexIndex I;
+ I.build(
+ generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+ FuzzyFindRequest Req;
+ Req.Query = "lol";
+ Req.MaxCandidateCount = 2;
+ EXPECT_THAT(match(I, Req),
+ UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, DexIndexSymbolsRecycled) {
+ DexIndex I;
+ std::weak_ptr<SlabAndPointers> Symbols;
+ I.build(generateNumSymbols(0, 10, &Symbols));
+ FuzzyFindRequest Req;
+ Req.Query = "7";
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+ EXPECT_FALSE(Symbols.expired());
+ // Release old symbols.
+ I.build(generateNumSymbols(0, 0));
+ EXPECT_TRUE(Symbols.expired());
+}
+
+// FIXME(kbobyrev): This test is different for DexIndex and MemIndex: while
+// MemIndex manages response deduplication, DexIndex simply returns all matched
+// symbols which means there might be equivalent symbols in the response.
+// Before drop-in replacement of MemIndex with DexIndex happens, FileIndex
+// should handle deduplication instead.
+TEST(DexIndexTest, DexIndexDeduplicate) {
+ auto Symbols = generateNumSymbols(0, 10);
+
+ // Inject duplicates.
+ auto Sym = symbol("7");
+ Symbols->push_back(&Sym);
+ Symbols->push_back(&Sym);
+ Symbols->push_back(&Sym);
+
+ FuzzyFindRequest Req;
+ Req.Query = "7";
+ DexIndex I;
+ I.build(std::move(Symbols));
+ auto Matches = match(I, Req);
+ EXPECT_EQ(Matches.size(), 4u);
+}
+
+TEST(DexIndexTest, DexIndexLimitedNumMatches) {
+ DexIndex I;
+ I.build(generateNumSymbols(0, 100));
+ FuzzyFindRequest Req;
+ Req.Query = "5";
+ Req.MaxCandidateCount = 3;
+ bool Incomplete;
+ auto Matches = match(I, Req, &Incomplete);
+ EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+ EXPECT_TRUE(Incomplete);
+}
+
+TEST(DexIndexTest, FuzzyMatch) {
+ DexIndex I;
+ I.build(
+ generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+ FuzzyFindRequest Req;
+ Req.Query = "lol";
+ Req.MaxCandidateCount = 2;
+ EXPECT_THAT(match(I, Req),
+ UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
+ DexIndex I;
+ I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
+ DexIndex I;
+ I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {""};
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
+ DexIndex I;
+ I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::"};
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
+ DexIndex I;
+ I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::", "b::"};
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
+}
+
+TEST(DexIndexTest, NoMatchNestedScopes) {
+ DexIndex I;
+ I.build(generateSymbols({"a::y1", "a::b::y2"}));
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::"};
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
+}
+
+TEST(DexIndexTest, IgnoreCases) {
+ DexIndex I;
+ I.build(generateSymbols({"ns::ABC", "ns::abc"}));
+ FuzzyFindRequest Req;
+ Req.Query = "AB";
+ Req.Scopes = {"ns::"};
+ EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
+}
+
+TEST(DexIndexTest, Lookup) {
+ DexIndex I;
+ I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+ EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+ EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::abc", "ns::xyz"));
+ EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::xyz"));
+ EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+} // namespace
} // namespace dex
} // namespace clangd
} // namespace clang
Index: clang-tools-extra/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/unittests/clangd/CMakeLists.txt
@@ -30,6 +30,7 @@
SyncAPI.cpp
TUSchedulerTests.cpp
TestFS.cpp
+ TestIndex.cpp
TestTU.cpp
ThreadingTests.cpp
TraceTests.cpp
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -22,9 +22,9 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
+#include "../Index.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/raw_ostream.h"
-
#include <string>
#include <vector>
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -0,0 +1,76 @@
+//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines Dex - a symbol index implementation based on query iterators
+// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
+// While consuming more memory and having longer build stage due to
+// preprocessing, Dex will have substantially lower latency. It will also allow
+// efficient symbol searching which is crucial for operations like code
+// completion, and can be very important for a number of different code
+// transformations which will be eventually supported by Clangd.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+
+#include "../Index.h"
+#include "../MemIndex.h"
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex trigram-based index implementation.
+// FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
+// index so that it can be loaded from the disk. Since static index is not
+// changed frequently, it's safe to assume that it has to be built only once
+// (when the clangd process starts). Therefore, it can be easier to store built
+// index on disk and then load it if available.
+class DexIndex : public SymbolIndex {
+public:
+ /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
+ /// accessible as long as `Symbols` is kept alive.
+ void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+ bool
+ fuzzyFind(const FuzzyFindRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+ void lookup(const LookupRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+ void findOccurrences(const OccurrencesRequest &Req,
+ llvm::function_ref<void(const SymbolOccurrence &)>
+ Callback) const override;
+
+private:
+ mutable std::mutex Mutex;
+
+ std::shared_ptr<std::vector<const Symbol *>> Symbols /*GUARDED_BY(Mutex)*/;
+ llvm::DenseMap<SymbolID, const Symbol *> LookupTable /*GUARDED_BY(Mutex)*/;
+ llvm::DenseMap<const Symbol *, float> SymbolQuality /*GUARDED_BY(Mutex)*/;
+ // Inverted index is a mapping from the search token to the posting list,
+ // which contains all items which can be characterized by such search token.
+ // For example, if the search token is scope "std::", the corresponding
+ // posting list would contain all indices of symbols defined in namespace std.
+ // Inverted index is used to retrieve posting lists which are processed during
+ // the fuzzyFind process.
+ llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -0,0 +1,163 @@
+//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DexIndex.h"
+#include "../../FuzzyMatch.h"
+#include "../../Logger.h"
+#include <algorithm>
+#include <queue>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+// Returns the tokens which are given symbol's characteristics. Currently, the
+// generated tokens only contain fuzzy matching trigrams and symbol's scope,
+// but in the future this will also return path proximity tokens and other
+// types of tokens such as symbol type (if applicable).
+// Returns the tokens which are given symbols's characteristics. For example,
+// trigrams and scopes.
+// FIXME(kbobyrev): Support more token types:
+// * Path proximity
+// * Types
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+ std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+ Result.push_back(Token(Token::Kind::Scope, Sym.Scope));
+ return Result;
+}
+
+} // namespace
+
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+ llvm::DenseMap<SymbolID, const Symbol *> TempLookupTable;
+ llvm::DenseMap<const Symbol *, float> TempSymbolQuality;
+ for (const Symbol *Sym : *Syms) {
+ TempLookupTable[Sym->ID] = Sym;
+ TempSymbolQuality[Sym] = quality(*Sym);
+ }
+
+ // Symbols are sorted by symbol qualities so that items in the posting lists
+ // are stored in the descending order of symbol quality.
+ std::sort(begin(*Syms), end(*Syms),
+ [&](const Symbol *LHS, const Symbol *RHS) {
+ return TempSymbolQuality[LHS] > TempSymbolQuality[RHS];
+ });
+ llvm::DenseMap<Token, PostingList> TempInvertedIndex;
+ // Populate TempInvertedIndex with posting lists for index symbols.
+ for (DocID SymbolRank = 0; SymbolRank < Syms->size(); ++SymbolRank) {
+ const auto *Sym = (*Syms)[SymbolRank];
+ for (const auto &Token : generateSearchTokens(*Sym))
+ TempInvertedIndex[Token].push_back(SymbolRank);
+ }
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ // Replace outdated index with the new one.
+ LookupTable = std::move(TempLookupTable);
+ Symbols = std::move(Syms);
+ InvertedIndex = std::move(TempInvertedIndex);
+ SymbolQuality = std::move(TempSymbolQuality);
+ }
+}
+
+/// Constructs iterators over tokens extracted from the query and exhausts it
+/// while applying Callback to each symbol in the order of decreasing quality
+/// of the matched symbols.
+bool DexIndex::fuzzyFind(
+ const FuzzyFindRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const {
+ assert(!StringRef(Req.Query).contains("::") &&
+ "There must be no :: in query.");
+ FuzzyMatcher Filter(Req.Query);
+ bool More = false;
+
+ std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+ const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ // Generate query trigrams and construct AND iterator over all query
+ // trigrams.
+ std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+ for (const auto &Trigram : TrigramTokens) {
+ const auto It = InvertedIndex.find(Trigram);
+ if (It != InvertedIndex.end())
+ TrigramIterators.push_back(create(It->second));
+ }
+ // FIXME(kbobyrev): Add True iterator as soon as it's implemented. Right
+ // now, the trigram generator algorithm guarantees that TrigramIterators
+ // will have at least one item, but that might be changed in the future.
+ TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+ // Generate scope tokens for search query.
+ std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+ for (const auto &Scope : Req.Scopes) {
+ const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+ if (It != InvertedIndex.end())
+ ScopeIterators.push_back(create(It->second));
+ }
+ // Add OR iterator for scopes if there are any Scope Iterators.
+ if (!ScopeIterators.empty())
+ TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+ auto QueryIterator = createAnd(move(TopLevelChildren));
+ // Retrieve more items than it was requested: some of the items with high
+ // final score might not be retrieved otherwise.
+ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
+ // using 100x of the requested number might not be good in practice, e.g.
+ // when the requested number of items is small.
+ const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+ std::vector<DocID> SymbolDocIDs = consume(*QueryIterator, ItemsToRetrieve);
+
+ // Retrieve top Req.MaxCandidateCount items.
+ std::priority_queue<std::pair<float, const Symbol *>> Top;
+ for (const auto &SymbolDocID : SymbolDocIDs) {
+ const auto *Sym = (*Symbols)[SymbolDocID];
+ const llvm::Optional<float> Score = Filter.match(Sym->Name);
+ if (!Score.hasValue())
+ continue;
+ // Multiply score by a negative factor so that Top stores items with the
+ // highest actual score.
+ Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
+ if (Top.size() > Req.MaxCandidateCount) {
+ More = true;
+ Top.pop();
+ }
+ }
+
+ // Apply callback to the top Req.MaxCandidateCount items.
+ for (; !Top.empty(); Top.pop())
+ Callback(*Top.top().second);
+ }
+
+ return More;
+}
+
+void DexIndex::lookup(const LookupRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const {
+ for (const auto &ID : Req.IDs) {
+ auto I = LookupTable.find(ID);
+ if (I != LookupTable.end())
+ Callback(*I->second);
+ }
+}
+
+void DexIndex::findOccurrences(
+ const OccurrencesRequest &Req,
+ llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
+ log("findOccurrences is not implemented.");
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -43,6 +43,7 @@
index/SymbolCollector.cpp
index/SymbolYAML.cpp
+ index/dex/DexIndex.cpp
index/dex/Iterator.cpp
index/dex/Trigram.cpp
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits