kbobyrev updated this revision to Diff 163667.
kbobyrev marked 2 inline comments as done.
kbobyrev edited the summary of this revision.
kbobyrev added a comment.

Rebase on top of `master`, s/Path/PathURI/g to be more explicit about the token 
contents and origin.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/;
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===================================================================
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
       FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector<std::string> URISchemes = {"unittest"};
+
 std::vector<DocID> consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector<DocID> IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher<std::vector<Token>>
-trigramsAre(std::initializer_list<std::string> Trigrams) {
+tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
   std::vector<Token> Tokens;
-  for (const auto &Symbols : Trigrams) {
-    Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+    Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher<std::vector<Token>>
+trigramsAre(std::initializer_list<std::string> Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher<std::vector<Token>>
+pathsAre(std::initializer_list<std::string> Paths) {
+  return tokensAre(Paths, Token::Kind::PathURI);
+}
+
+testing::Matcher<std::vector<std::pair<Token, unsigned>>> proximityPathsAre(
+    std::initializer_list<std::pair<std::string, unsigned>> ProximityPaths) {
+  std::vector<std::pair<Token, unsigned>> Result;
+  for (const auto &P : ProximityPaths) {
+    Result.push_back({Token(Token::Kind::PathURI, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
               trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,8 +430,28 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+                  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+              pathsAre({"unittest:///clang-tools-extra/clangd/index/dex",
+                        "unittest:///clang-tools-extra/clangd/index",
+                        "unittest:///clang-tools-extra/clangd",
+                        "unittest:///clang-tools-extra",
+                        "unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+      generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+      proximityPathsAre({{"unittest:///a/b/c/d/e/f", 0},
+                         {"unittest:///a/b/c/d/e", 1},
+                         {"unittest:///a/b/c/d", 2},
+                         {"unittest:///a/b/c", 3},
+                         {"unittest:///a/b", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -419,9 +462,10 @@
 }
 
 TEST(DexIndex, FuzzyFind) {
-  DexIndex Index;
+  DexIndex Index(URISchemes);
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                               "other::ABC", "other::A"}));
+                               "other::ABC", "other::A"})
+              );
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -442,7 +486,7 @@
 }
 
 TEST(DexIndexTest, FuzzyMatchQ) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(
       generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
   FuzzyFindRequest Req;
@@ -453,7 +497,7 @@
 }
 
 TEST(DexIndexTest, DexIndexSymbolsRecycled) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   std::weak_ptr<SlabAndPointers> Symbols;
   I.build(generateNumSymbols(0, 10, &Symbols));
   FuzzyFindRequest Req;
@@ -482,14 +526,14 @@
 
   FuzzyFindRequest Req;
   Req.Query = "7";
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(std::move(Symbols));
   auto Matches = match(I, Req);
   EXPECT_EQ(Matches.size(), 4u);
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateNumSymbols(0, 100));
   FuzzyFindRequest Req;
   Req.Query = "5";
@@ -501,7 +545,7 @@
 }
 
 TEST(DexIndexTest, FuzzyMatch) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(
       generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
   FuzzyFindRequest Req;
@@ -512,60 +556,60 @@
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::", "b::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
 }
 
 TEST(DexIndexTest, NoMatchNestedScopes) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"a::y1", "a::b::y2"}));
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
 }
 
 TEST(DexIndexTest, IgnoreCases) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::ABC", "ns::abc"}));
   FuzzyFindRequest Req;
   Req.Query = "AB";
   Req.Scopes = {"ns::"};
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
 TEST(DexIndexTest, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -575,6 +619,58 @@
   EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
 }
 
+TEST(DexIndexTest, ProximityPathsBoosting) {
+  DexIndex I(URISchemes);
+
+  // All strings should be stored in local variables to ensure their lifetime is
+  // the same as Slab's.
+  auto RootURI = URI::create(testRoot() + "/file.h", "unittest");
+  assert(static_cast<bool>(RootURI));
+  auto CloseFileURI = URI::create(testRoot() + "/a/b/c/d/e/file.h", "unittest");
+  assert(static_cast<bool>(CloseFileURI));
+
+  // Populate Symbol Slab
+  std::vector<const Symbol *> Slab;
+  // Symbol in the root will perfectly match the request, but its canonical
+  // declaration is in the root directory.
+  Symbol RootSymbol;
+  std::string RootSymbolName = "abc";
+  RootSymbol.Name = RootSymbolName;
+  std::string RootFilename = RootURI->toString();
+  RootSymbol.CanonicalDeclaration.FileURI = RootFilename;
+  Slab.push_back(&RootSymbol);
+
+  // Another symbol has much lower Fuzzy Matching Score, but it is very close to
+  // the directory the request would come from.
+  Symbol CloseSymbol;
+  std::string CloseSymbolName = "AbsolutelyBizzareCoincidenceMatch";
+  CloseSymbol.Name = CloseSymbolName;
+  std::string CloseFileName = CloseFileURI->toString();
+  CloseSymbol.CanonicalDeclaration.FileURI = CloseFileName;
+
+  FuzzyFindRequest Req;
+  Req.Query = "abc";
+  Req.MaxCandidateCount = 1;
+  // FuzzyFind request comes from the file which is far from the root.
+  Req.ProximityPaths.push_back(testRoot() + "/a/b/c/d/e/f/file.h");
+
+  FuzzyMatcher Matcher(Req.Query);
+  EXPECT_TRUE(Matcher.match(RootSymbolName) > Matcher.match(CloseSymbolName));
+
+  // Insert duplicates so that only the boosted symbols end up in the scoring
+  // stage.
+  // FIXME(kbobyrev): 10 is the number of items passed to the last stage which
+  // involves fuzzy scoring. Would having access to that information be better
+  // here?
+  for (size_t Index = 0; Index < 10; ++Index)
+    Slab.push_back(&CloseSymbol);
+
+  I.build(std::make_shared<std::vector<const Symbol *>>(Slab));
+
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("AbsolutelyBizzareCoincidenceMatch"));
+}
+
 } // namespace
 } // namespace dex
 } // namespace clangd
Index: clang-tools-extra/clangd/tool/ClangdMain.cpp
===================================================================
--- clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -42,7 +42,9 @@
 // Build an in-memory static index for global symbols from a YAML-format file.
 // The size of global symbols should be relatively small, so that all symbols
 // can be managed in memory.
-std::unique_ptr<SymbolIndex> buildStaticIndex(llvm::StringRef YamlSymbolFile) {
+std::unique_ptr<SymbolIndex>
+buildStaticIndex(llvm::StringRef YamlSymbolFile,
+                 std::vector<std::string> &URISchemes) {
   auto Buffer = llvm::MemoryBuffer::getFile(YamlSymbolFile);
   if (!Buffer) {
     llvm::errs() << "Can't open " << YamlSymbolFile << "\n";
@@ -53,9 +55,10 @@
   for (auto Sym : Slab)
     SymsBuilder.insert(Sym);
 
-  return UseDex ? dex::DexIndex::build(std::move(SymsBuilder).build())
-                : MemIndex::build(std::move(SymsBuilder).build(),
-                                  SymbolOccurrenceSlab::createEmpty());
+  return UseDex
+             ? dex::DexIndex::build(std::move(SymsBuilder).build(), URISchemes)
+             : MemIndex::build(std::move(SymsBuilder).build(),
+                               SymbolOccurrenceSlab::createEmpty());
 }
 
 } // namespace
@@ -299,7 +302,7 @@
   Opts.BuildDynamicSymbolIndex = EnableIndex;
   std::unique_ptr<SymbolIndex> StaticIdx;
   if (EnableIndex && !YamlSymbolFile.empty()) {
-    StaticIdx = buildStaticIndex(YamlSymbolFile);
+    StaticIdx = buildStaticIndex(YamlSymbolFile, Opts.URISchemes);
     Opts.StaticIndex = StaticIdx.get();
   }
   Opts.AsyncThreadsCount = WorkerThreadsCount;
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -41,22 +41,30 @@
   /// Kind specifies Token type which defines semantics for the internal
   /// representation. Each Kind has different representation stored in Data
   /// field.
+  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
+  // strings.
   enum class Kind {
     /// Represents trigram used for fuzzy search of unqualified symbol names.
     ///
     /// Data contains 3 bytes with trigram contents.
     Trigram,
     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
     ///
-    /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global
+    /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
     /// scope).
     Scope,
+    /// Path URI to symbol declaration.
+    ///
+    /// Data stores path URI of the parent directory of symbol declaration
+    /// location.
+    ///
+    /// Example:
+    /// "file:///path/to/clang-tools-extra/clangd/index/dex/".
+    PathURI,
     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
     /// llvm::DenseMap.
     Sentinel,
     /// FIXME(kbobyrev): Add other Token Kinds
-    /// * Path with full or relative path to the directory in which symbol is
-    ///   defined
     /// * Type with qualified type name or its USR
   };
 
@@ -81,6 +89,21 @@
   }
 };
 
+/// Returns Search Token for each parent directory of given Path. Should be used
+/// within the index build process.
+///
+/// When Limit is set, returns no more than Limit tokens.
+std::vector<Token>
+generateProximityPaths(llvm::StringRef Path,
+                       size_t Limit = std::numeric_limits<size_t>::max());
+
+/// Returns Search Token and its distance from the absolute path for the first
+/// URI scheme allows valid conversions from AbsolutePath.
+std::vector<std::pair<Token, unsigned>>
+generateQueryProximityPaths(llvm::StringRef AbsolutePath,
+                            llvm::ArrayRef<std::string> URISchemes,
+                            size_t Count = 5);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/Token.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/Token.cpp
@@ -0,0 +1,68 @@
+//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+#include "URI.h"
+#include "llvm/Support/Path.h"
+#include <cassert>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+std::vector<Token> generateProximityPaths(llvm::StringRef URIPath,
+                                          size_t Limit) {
+  std::vector<Token> Result;
+  // Don't generate any tokens for empty paths.
+  if (URIPath.empty())
+    return Result;
+  auto ParsedURI = URI::parse(URIPath);
+  assert(
+      static_cast<bool>(ParsedURI) &&
+      "Non-empty argument of generateProximityPaths() should be a valid URI.");
+  const auto Scheme = ParsedURI.get().scheme();
+  const auto Authority = ParsedURI.get().authority();
+  StringRef Path = ParsedURI.get().body();
+  // Get parent directory of the file with symbol declaration.
+  Path = llvm::sys::path::parent_path(Path);
+  while (!Path.empty() && Limit--) {
+    URI TokenURI(Scheme, Authority, Path);
+    Result.emplace_back(Token::Kind::PathURI, TokenURI.toString());
+    Path = llvm::sys::path::parent_path(Path);
+  }
+  return Result;
+}
+
+std::vector<std::pair<Token, unsigned>>
+generateQueryProximityPaths(llvm::StringRef AbsolutePath,
+                            llvm::ArrayRef<std::string> URISchemes,
+                            size_t Count) {
+  std::vector<std::pair<Token, unsigned>> Result;
+  for (const auto Scheme : URISchemes) {
+    unsigned Distance = 0;
+    auto URI = URI::create(AbsolutePath, Scheme);
+    // For some paths, conversion to different URI schemes is impossible. These
+    // should be just skipped.
+    if (!static_cast<bool>(URI)) {
+      // Ignore the error.
+      handleAllErrors(URI.takeError(), [](const llvm::ErrorInfoBase &) {});
+      continue;
+    }
+    for (const auto ProximityPath :
+         generateProximityPaths(URI.get().toString(), Count))
+      Result.emplace_back(ProximityPath, Distance++);
+    // As soon as one valid URI scheme for given absolute path is found, the
+    // token generation process should be stopped.
+    break;
+  }
+  return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.h
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -39,12 +39,15 @@
 // index on disk and then load it if available.
 class DexIndex : public SymbolIndex {
 public:
+  DexIndex(llvm::ArrayRef<std::string> URISchemes) : URISchemes(URISchemes) {}
+
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Syms);
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
 
   /// \brief Build index from a symbol slab.
-  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
+  static std::unique_ptr<SymbolIndex>
+  build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes);
 
   bool
   fuzzyFind(const FuzzyFindRequest &Req,
@@ -60,7 +63,6 @@
   size_t estimateMemoryUsage() const override;
 
 private:
-
   mutable std::mutex Mutex;
 
   std::shared_ptr<std::vector<const Symbol *>> Symbols /*GUARDED_BY(Mutex)*/;
@@ -73,10 +75,12 @@
   // Inverted index is used to retrieve posting lists which are processed during
   // the fuzzyFind process.
   llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
+
+  const std::vector<std::string> URISchemes;
 };
 
 } // namespace dex
 } // namespace clangd
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -26,11 +26,14 @@
 // Returns the tokens which are given symbols's characteristics. For example,
 // trigrams and scopes.
 // FIXME(kbobyrev): Support more token types:
-// * Path proximity
 // * Types
 std::vector<Token> generateSearchTokens(const Symbol &Sym) {
   std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
   Result.emplace_back(Token::Kind::Scope, Sym.Scope);
+  for (const auto &PathToken :
+       generateProximityPaths(Sym.CanonicalDeclaration.FileURI)) {
+    Result.push_back(PathToken);
+  }
   return Result;
 }
 
@@ -72,8 +75,9 @@
        estimateMemoryUsage());
 }
 
-std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
-  auto Idx = llvm::make_unique<DexIndex>();
+std::unique_ptr<SymbolIndex>
+DexIndex::build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
+  auto Idx = llvm::make_unique<DexIndex>(URISchemes);
   Idx->build(getSymbolsFromSlab(std::move(Slab)));
   return std::move(Idx);
 }
@@ -117,6 +121,28 @@
     if (!ScopeIterators.empty())
       TopLevelChildren.push_back(createOr(move(ScopeIterators)));
 
+    // Add proximity paths boosting.
+    std::vector<std::unique_ptr<Iterator>> BoostingIterators;
+    // This should generate proximity path boosting iterators for each different
+    // URI Scheme the Index is aware of.
+    const size_t PARENTS_TO_BOOST = 5;
+    for (const auto &ProximityPath : Req.ProximityPaths) {
+      for (const auto &P : generateQueryProximityPaths(
+               ProximityPath, URISchemes, PARENTS_TO_BOOST)) {
+        const auto It = InvertedIndex.find(P.first);
+        if (It != InvertedIndex.end())
+          // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
+          // Each parent directory is boosted by PARENTS_TO_BOOST + 1 - Level.
+          BoostingIterators.push_back(
+              createBoost(create(It->second), PARENTS_TO_BOOST + 1 - P.second));
+      }
+    }
+    // Boosting iterators do not actually filter symbols. In order to preserve
+    // the validity of resulting query, TRUE iterator should be added along
+    // BOOSTs.
+    BoostingIterators.push_back(createTrue(Symbols->size()));
+    TopLevelChildren.push_back(createOr(move(BoostingIterators)));
+
     // Use TRUE iterator if both trigrams and scopes from the query are not
     // present in the symbol index.
     auto QueryIterator = TopLevelChildren.empty()
@@ -127,16 +153,30 @@
     // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
     // using 100x of the requested number might not be good in practice, e.g.
     // when the requested number of items is small.
-    const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+    const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount;
     auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
-    // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
-    // boosting scores.
+
     std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
 
+    // Sort items using boosting score as the key.
+    // FIXME(kbobyrev): Consider merging this stage with the next one? This is
+    // a performance/quality trade-off and if the performance is not an issue
+    // there's plenty of quality to be bought by sacrificing part of
+    // performance.
+    std::sort(begin(SymbolDocIDs), end(SymbolDocIDs),
+              [&](const std::pair<DocID, float> &LHS,
+                  const std::pair<DocID, float> &RHS) {
+                return SymbolQuality.find((*Symbols)[LHS.first])->second *
+                           LHS.second >
+                       SymbolQuality.find((*Symbols)[RHS.first])->second *
+                           RHS.second;
+              });
+
     // Retrieve top Req.MaxCandidateCount items.
+    const size_t ItemsToScore = 10 * Req.MaxCandidateCount;
     std::priority_queue<std::pair<float, const Symbol *>> Top;
-    for (const auto &P : SymbolDocIDs) {
-      const DocID SymbolDocID = P.first;
+    for (size_t I = 0; I < std::min(SymbolDocIDs.size(), ItemsToScore); ++I) {
+      const DocID SymbolDocID = SymbolDocIDs[I].first;
       const auto *Sym = (*Symbols)[SymbolDocID];
       const llvm::Optional<float> Score = Filter.match(Sym->Name);
       if (!Score)
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -46,6 +46,7 @@
 
   index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
+  index/dex/Token.cpp
   index/dex/Trigram.cpp
 
   LINK_LIBS
Index: clang-tools-extra/;
===================================================================
--- /dev/null
+++ clang-tools-extra/;
@@ -0,0 +1,68 @@
+//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+#include "URI.h"
+#include "llvm/Support/Path.h"
+#include <cassert>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+std::vector<Token> generateProximityPaths(llvm::StringRef URIPath,
+                                          size_t Limit) {
+  std::vector<Token> Result;
+  // Don't generate any tokens for empty paths.
+  if (URIPath.empty())
+    return Result;
+  auto ParsedURI = URI::parse(URIPath);
+  assert(
+      static_cast<bool>(ParsedURI) &&
+      "Non-empty argument of generateProximityPaths() should be a valid URI.");
+  const auto Scheme = ParsedURI.get().scheme();
+  const auto Authority = ParsedURI.get().authority();
+  StringRef Path = ParsedURI.get().body();
+  // Get parent directory of the file with symbol declaration.
+  Path = llvm::sys::path::parent_path(Path);
+  while (!Path.empty() && Limit--) {
+    URI TokenURI(Scheme, Authority, Path);
+    Result.emplace_back(Token::Kind::PathURI, TokenURI.toString());
+    Path = llvm::sys::path::parent_path(Path);
+  }
+  return Result;
+}
+
+std::vector<std::pair<Token, unsigned>>
+generateQueryProximityPaths(llvm::StringRef AbsolutePath,
+                            llvm::ArrayRef<std::string> URISchemes,
+                            size_t Count) {
+  std::vector<std::pair<Token, unsigned>> Result;
+  for (const auto Scheme : URISchemes) {
+    unsigned Distance = 0;
+    auto URI = URI::create(AbsolutePath, Scheme);
+    // For some paths, conversion to different URI schemes is impossible. These
+    // should be just skipped.
+    if (!static_cast<bool>(URI)) {
+      // Ignore the error.
+      handleAllErrors(URI.takeError(), [](const llvm::ErrorInfoBase &) {});
+      continue;
+    }
+    for (const auto ProximityPath :
+         generateProximityPaths(URI.get().toString(), Count))
+      Result.emplace_back(ProximityPath, Distance++);
+    // As soon as one valid URI scheme for given absolute path is found, the
+    // token generation process should be stopped.
+    break;
+  }
+  return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to