kbobyrev updated this revision to Diff 163507.
kbobyrev added a comment.

Canonicalize URIs, slightly simplify code structure.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===================================================================
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
       FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector<std::string> URISchemes = {"unittest"};
+
 std::vector<DocID> consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector<DocID> IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher<std::vector<Token>>
-trigramsAre(std::initializer_list<std::string> Trigrams) {
+tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
   std::vector<Token> Tokens;
-  for (const auto &Symbols : Trigrams) {
-    Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+    Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher<std::vector<Token>>
+trigramsAre(std::initializer_list<std::string> Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher<std::vector<Token>>
+pathsAre(std::initializer_list<std::string> Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher<std::vector<std::pair<Token, unsigned>>> proximityPathsAre(
+    std::initializer_list<std::pair<std::string, unsigned>> ProximityPaths) {
+  std::vector<std::pair<Token, unsigned>> Result;
+  for (const auto &P : ProximityPaths) {
+    Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
               trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,9 +430,29 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+                  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+              pathsAre({"unittest:clang-tools-extra/clangd/index/dex/",
+                        "unittest:clang-tools-extra/clangd/index/",
+                        "unittest:clang-tools-extra/clangd/",
+                        "unittest:clang-tools-extra/",
+                        "unittest:"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+      generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+      proximityPathsAre({{"unittest:a/b/c/d/e/f/", 0},
+                         {"unittest:a/b/c/d/e/", 1},
+                         {"unittest:a/b/c/d/", 2},
+                         {"unittest:a/b/c/", 3},
+                         {"unittest:a/b/", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +464,8 @@
 TEST(DexIndex, FuzzyFind) {
   DexIndex Index;
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                               "other::ABC", "other::A"}));
+                               "other::ABC", "other::A"}),
+              URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -444,7 +488,8 @@
 TEST(DexIndexTest, FuzzyMatchQ) {
   DexIndex I;
   I.build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -455,14 +500,14 @@
 TEST(DexIndexTest, DexIndexSymbolsRecycled) {
   DexIndex I;
   std::weak_ptr<SlabAndPointers> Symbols;
-  I.build(generateNumSymbols(0, 10, &Symbols));
+  I.build(generateNumSymbols(0, 10, &Symbols), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "7";
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
 
   EXPECT_FALSE(Symbols.expired());
   // Release old symbols.
-  I.build(generateNumSymbols(0, 0));
+  I.build(generateNumSymbols(0, 0), URISchemes);
   EXPECT_TRUE(Symbols.expired());
 }
 
@@ -483,14 +528,14 @@
   FuzzyFindRequest Req;
   Req.Query = "7";
   DexIndex I;
-  I.build(std::move(Symbols));
+  I.build(std::move(Symbols), URISchemes);
   auto Matches = match(I, Req);
   EXPECT_EQ(Matches.size(), 4u);
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
   DexIndex I;
-  I.build(generateNumSymbols(0, 100));
+  I.build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount = 3;
@@ -503,7 +548,8 @@
 TEST(DexIndexTest, FuzzyMatch) {
   DexIndex I;
   I.build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -513,68 +559,119 @@
 
 TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
   DexIndex I;
-  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
   DexIndex I;
-  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
   DexIndex I;
-  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}),
+          URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
   DexIndex I;
-  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}),
+          URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::", "b::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
 }
 
 TEST(DexIndexTest, NoMatchNestedScopes) {
   DexIndex I;
-  I.build(generateSymbols({"a::y1", "a::b::y2"}));
+  I.build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
 }
 
 TEST(DexIndexTest, IgnoreCases) {
   DexIndex I;
-  I.build(generateSymbols({"ns::ABC", "ns::abc"}));
+  I.build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "AB";
   Req.Scopes = {"ns::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
 TEST(DexIndexTest, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::xyz"));
   EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
 }
 
+TEST(DexIndexTest, ProximityPathsBoosting) {
+  DexIndex I;
+
+  // All strings should be stored in local variables to ensure their lifetime is
+  // the same as Slab's.
+  auto RootURI = URI::create(testRoot() + "/file.h", "unittest");
+  assert(static_cast<bool>(RootURI));
+  auto CloseFileURI = URI::create(testRoot() + "/a/b/c/d/e/file.h", "unittest");
+  assert(static_cast<bool>(CloseFileURI));
+
+  // Populate Symbol Slab
+  std::vector<const Symbol *> Slab;
+  // Symbol in the root will perfectly match the request, but its canonical
+  // declaration is in the root directory.
+  Symbol RootSymbol;
+  std::string RootSymbolName = "abc";
+  RootSymbol.Name = RootSymbolName;
+  std::string RootFilename = RootURI->toString();
+  RootSymbol.CanonicalDeclaration.FileURI = RootFilename;
+  Slab.push_back(&RootSymbol);
+
+  // Another symbol has much lower Fuzzy Matching Score, but it is very close to
+  // the directory the request would come from.
+  Symbol CloseSymbol;
+  std::string CloseSymbolName = "AbsolutelyBizzareCoincidenceMatch";
+  CloseSymbol.Name = CloseSymbolName;
+  std::string CloseFileName = CloseFileURI->toString();
+  CloseSymbol.CanonicalDeclaration.FileURI = CloseFileName;
+
+  FuzzyFindRequest Req;
+  Req.Query = "abc";
+  Req.MaxCandidateCount = 1;
+  // FuzzyFind request comes from the file which is far from the root.
+  Req.ProximityPaths.push_back(testRoot() + "/a/b/c/d/e/f/file.h");
+
+  FuzzyMatcher Matcher(Req.Query);
+  EXPECT_TRUE(Matcher.match(RootSymbolName) > Matcher.match(CloseSymbolName));
+
+  // Insert duplicates so that only the boosted symbols end up in the scoring
+  // stage.
+  for (size_t Index = 0; Index < I.getScoringItemsMultiplier(); ++Index)
+    Slab.push_back(&CloseSymbol);
+
+  I.build(std::make_shared<std::vector<const Symbol *>>(Slab), URISchemes);
+
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("AbsolutelyBizzareCoincidenceMatch"));
+}
+
 } // namespace
 } // namespace dex
 } // namespace clangd
Index: clang-tools-extra/clangd/tool/ClangdMain.cpp
===================================================================
--- clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -42,7 +42,9 @@
 // Build an in-memory static index for global symbols from a YAML-format file.
 // The size of global symbols should be relatively small, so that all symbols
 // can be managed in memory.
-std::unique_ptr<SymbolIndex> buildStaticIndex(llvm::StringRef YamlSymbolFile) {
+std::unique_ptr<SymbolIndex>
+buildStaticIndex(llvm::StringRef YamlSymbolFile,
+                 std::vector<std::string> &URISchemes) {
   auto Buffer = llvm::MemoryBuffer::getFile(YamlSymbolFile);
   if (!Buffer) {
     llvm::errs() << "Can't open " << YamlSymbolFile << "\n";
@@ -53,8 +55,9 @@
   for (auto Sym : Slab)
     SymsBuilder.insert(Sym);
 
-  return UseDex ? dex::DexIndex::build(std::move(SymsBuilder).build())
-                : MemIndex::build(std::move(SymsBuilder).build());
+  return UseDex
+             ? dex::DexIndex::build(std::move(SymsBuilder).build(), URISchemes)
+             : MemIndex::build(std::move(SymsBuilder).build());
 }
 
 } // namespace
@@ -298,7 +301,7 @@
   Opts.BuildDynamicSymbolIndex = EnableIndex;
   std::unique_ptr<SymbolIndex> StaticIdx;
   if (EnableIndex && !YamlSymbolFile.empty()) {
-    StaticIdx = buildStaticIndex(YamlSymbolFile);
+    StaticIdx = buildStaticIndex(YamlSymbolFile, Opts.URISchemes);
     Opts.StaticIndex = StaticIdx.get();
   }
   Opts.AsyncThreadsCount = WorkerThreadsCount;
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -48,15 +48,28 @@
     Trigram,
     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
     ///
-    /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global
+    /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
     /// scope).
     Scope,
+    /// Path to symbol declaration.
+    ///
+    /// Data stores canonical URI (without trailing '/' after the "scheme:") to
+    /// the directory with the trailing '/', e.g.
+    /// "file:path/to/clang-tools-extra/clangd/index/dex/".
+    // FIXME(kbobyrev): Path tokens could store reference to the full path
+    // string and the number of segment to avoid data duplication (which
+    // results in quadratic memory consumption).
+    //
+    // For example, now there are path tokens "/a/b/c/", "/a/b/", "/a/".
+    // They can be replaced with (&"/a/b/c/", 3), (&"/a/b/c/", 2),
+    // (&"/a/b/c/", 1). That would save space, but make the search logic harder.
+    // Also, there should be an accessible storage of all these strings which
+    // would be available to proximity path generators.
+    Path,
     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
     /// llvm::DenseMap.
     Sentinel,
     /// FIXME(kbobyrev): Add other Token Kinds
-    /// * Path with full or relative path to the directory in which symbol is
-    ///   defined
     /// * Type with qualified type name or its USR
   };
 
@@ -81,6 +94,21 @@
   }
 };
 
+/// Returns Search Token for each parent directory of given Path. Should be used
+/// within the index build process.
+///
+/// When Limit is set, returns no more than Limit tokens.
+std::vector<Token>
+generateProximityPaths(llvm::StringRef Path,
+                       size_t Limit = std::numeric_limits<size_t>::max());
+
+/// Returns Search Token and its distance from the absolute path for the first
+/// URI scheme allows valid conversions from AbsolutePath.
+std::vector<std::pair<Token, unsigned>>
+generateQueryProximityPaths(llvm::StringRef AbsolutePath,
+                            llvm::ArrayRef<std::string> URISchemes,
+                            size_t Count = 5);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.h
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -41,10 +41,12 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Syms);
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols,
+             llvm::ArrayRef<std::string> URISchemes);
 
   /// \brief Build index from a symbol slab.
-  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
+  static std::unique_ptr<SymbolIndex>
+  build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes);
 
   bool
   fuzzyFind(const FuzzyFindRequest &Req,
@@ -59,8 +61,16 @@
 
   size_t estimateMemoryUsage() const override;
 
-private:
+  /// For fuzzyFind() Dex retrieves getRetrievalItemsMultiplier() more items
+  /// than requested via Req.MaxCandidateCount in the first stage of filtering.
+  size_t getRetrievalItemsMultiplier() const;
+
+  /// For fuzzyFind() Dex scores getScoringItemsMultiplier() more items
+  /// than requested via Req.MaxCandidateCount in the second stage of filtering.
+  size_t getScoringItemsMultiplier() const;
 
+private:
+private:
   mutable std::mutex Mutex;
 
   std::shared_ptr<std::vector<const Symbol *>> Symbols /*GUARDED_BY(Mutex)*/;
@@ -73,10 +83,15 @@
   // Inverted index is used to retrieve posting lists which are processed during
   // the fuzzyFind process.
   llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
+
+  std::vector<std::string> URISchemes /*GUARDED BY(Mutex)*/;
+
+  const static size_t RETRIEVAL_ITEMS_MULTIPLIER = 100;
+  const static size_t SCORING_ITEMS_MULTIPLIER = 10;
 };
 
 } // namespace dex
 } // namespace clangd
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -26,17 +26,21 @@
 // Returns the tokens which are given symbols's characteristics. For example,
 // trigrams and scopes.
 // FIXME(kbobyrev): Support more token types:
-// * Path proximity
 // * Types
 std::vector<Token> generateSearchTokens(const Symbol &Sym) {
   std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
   Result.emplace_back(Token::Kind::Scope, Sym.Scope);
+  for (const auto &PathToken :
+       generateProximityPaths(Sym.CanonicalDeclaration.FileURI)) {
+    Result.push_back(PathToken);
+  }
   return Result;
 }
 
 } // namespace
 
-void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms,
+                     llvm::ArrayRef<std::string> ServerURISchemes) {
   llvm::DenseMap<SymbolID, const Symbol *> TempLookupTable;
   llvm::DenseMap<const Symbol *, float> TempSymbolQuality;
   for (const Symbol *Sym : *Syms) {
@@ -66,15 +70,17 @@
     Symbols = std::move(Syms);
     InvertedIndex = std::move(TempInvertedIndex);
     SymbolQuality = std::move(TempSymbolQuality);
+    URISchemes = ServerURISchemes;
   }
 
   vlog("Built DexIndex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
 }
 
-std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
+std::unique_ptr<SymbolIndex>
+DexIndex::build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
   auto Idx = llvm::make_unique<DexIndex>();
-  Idx->build(getSymbolsFromSlab(std::move(Slab)));
+  Idx->build(getSymbolsFromSlab(std::move(Slab)), URISchemes);
   return std::move(Idx);
 }
 
@@ -117,6 +123,26 @@
     if (!ScopeIterators.empty())
       TopLevelChildren.push_back(createOr(move(ScopeIterators)));
 
+    // Add proximity paths boosting.
+    std::vector<std::unique_ptr<Iterator>> BoostingIterators;
+    // This should generate proximity path boosting iterators for each different
+    // URI Scheme the Index is aware of.
+    for (const auto &ProximityPath : Req.ProximityPaths) {
+      for (const auto &P :
+           generateQueryProximityPaths(ProximityPath, URISchemes)) {
+        const auto It = InvertedIndex.find(P.first);
+        if (It != InvertedIndex.end())
+          // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
+          BoostingIterators.push_back(
+              createBoost(create(It->second), P.second + 10));
+      }
+    }
+    // Boosting iterators do not actually filter symbols. In order to preserve
+    // the validity of resulting query, TRUE iterator should be added along
+    // BOOSTs.
+    BoostingIterators.push_back(createTrue(Symbols->size()));
+    TopLevelChildren.push_back(createOr(move(BoostingIterators)));
+
     // Use TRUE iterator if both trigrams and scopes from the query are not
     // present in the symbol index.
     auto QueryIterator = TopLevelChildren.empty()
@@ -127,16 +153,25 @@
     // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
     // using 100x of the requested number might not be good in practice, e.g.
     // when the requested number of items is small.
-    const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+    const size_t ItemsToRetrieve =
+        getRetrievalItemsMultiplier() * Req.MaxCandidateCount;
     auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
-    // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
-    // boosting scores.
+
     std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
 
+    // Sort items using boosting score as the key.
+    std::sort(begin(SymbolDocIDs), end(SymbolDocIDs),
+              [](const std::pair<DocID, float> &LHS,
+                 const std::pair<DocID, float> &RHS) {
+                return LHS.second > RHS.second;
+              });
+
     // Retrieve top Req.MaxCandidateCount items.
+    const size_t ItemsToScore =
+        getScoringItemsMultiplier() * Req.MaxCandidateCount;
     std::priority_queue<std::pair<float, const Symbol *>> Top;
-    for (const auto &P : SymbolDocIDs) {
-      const DocID SymbolDocID = P.first;
+    for (size_t I = 0; I < std::min(SymbolDocIDs.size(), ItemsToScore); ++I) {
+      const DocID SymbolDocID = SymbolDocIDs[I].first;
       const auto *Sym = (*Symbols)[SymbolDocID];
       const llvm::Optional<float> Score = Filter.match(Sym->Name);
       if (!Score)
@@ -188,6 +223,14 @@
   return Bytes;
 }
 
+size_t DexIndex::getRetrievalItemsMultiplier() const {
+  return RETRIEVAL_ITEMS_MULTIPLIER;
+}
+
+size_t DexIndex::getScoringItemsMultiplier() const {
+  return SCORING_ITEMS_MULTIPLIER;
+}
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -46,6 +46,7 @@
 
   index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
+  index/dex/Token.cpp
   index/dex/Trigram.cpp
 
   LINK_LIBS
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to