kbobyrev updated this revision to Diff 164062.
kbobyrev marked 18 inline comments as done.
kbobyrev added a comment.

Address a round of comments


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/URI.cpp
  clang-tools-extra/clangd/URI.h
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector<std::string> URISchemes = {"unittest"};
+
+//===----------------------------------------------------------------------===//
+// Query iterator tests.
+//===----------------------------------------------------------------------===//
+
 std::vector<DocID> consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector<DocID> IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===----------------------------------------------------------------------===//
+// Search token tests.
+//===----------------------------------------------------------------------===//
+
 testing::Matcher<std::vector<Token>>
-trigramsAre(std::initializer_list<std::string> Trigrams) {
+tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
   std::vector<Token> Tokens;
-  for (const auto &Symbols : Trigrams) {
-    Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+    Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher<std::vector<Token>>
+trigramsAre(std::initializer_list<std::string> Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
               trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+                  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+              ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+                          "unittest:///clang-tools-extra/clangd/index",
+                          "unittest:///clang-tools-extra/clangd",
+                          "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+              ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+                          "unittest:///a", "unittest:///"));
+}
+
+//===----------------------------------------------------------------------===//
+// Index tests.
+//===----------------------------------------------------------------------===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
       generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                       "other::ABC", "other::A"}));
+                       "other::ABC", "other::A"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@
                                  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 100));
+  auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount = 3;
@@ -478,73 +514,107 @@
 
 TEST(DexIndexTest, FuzzyMatch) {
   auto I = DexIndex::build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
   EXPECT_THAT(match(*I, Req),
               UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  auto I =
+      DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  auto I =
+      DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
   auto I = DexIndex::build(
-      generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+      generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
   auto I = DexIndex::build(
-      generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+      generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::", "b::"};
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
 }
 
 TEST(DexIndexTest, NoMatchNestedScopes) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}));
+  auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1"));
 }
 
 TEST(DexIndexTest, IgnoreCases) {
-  auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}));
+  auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "AB";
   Req.Scopes = {"ns::"};
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
 TEST(DexIndexTest, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::xyz"));
   EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
 }
 
+TEST(DexIndexTest, ProximityPathsBoosting) {
+  // Populate Symbol Slab: generate two symbols with the same characteristics
+  // and hence quality, but different CanonicalDeclaration file.
+  SymbolSlab::Builder Builder;
+
+  auto RootSymbol = symbol("root::abc");
+  RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h";
+  auto CloseSymbol = symbol("close::abc");
+  CloseSymbol.CanonicalDeclaration.FileURI = "unittest:////a/b/c/d/e/f/file.h";
+
+  Builder.insert(RootSymbol);
+  Builder.insert(CloseSymbol);
+
+  auto I = DexIndex::build(std::move(Builder).build(), URISchemes);
+
+  FuzzyFindRequest Req;
+  Req.Query = "abc";
+  // The best candidate can change depending on the proximity paths.
+  Req.MaxCandidateCount = 1;
+
+  // FuzzyFind request comes from the file which is far from the root: expect
+  // CloseSymbol to come out.
+  Req.ProximityPaths = {testPath("/a/b/c/d/e/f/file.h")};
+  EXPECT_THAT(match(*I, Req), ElementsAre("close::abc"));
+
+  // FuzzyFind request comes from the file which is close to the root: expect
+  // RootSymbol to come out.
+  Req.ProximityPaths = {testPath("file.h")};
+  EXPECT_THAT(match(*I, Req), ElementsAre("root::abc"));
+}
+
 } // namespace
 } // namespace dex
 } // namespace clangd
Index: clang-tools-extra/clangd/tool/ClangdMain.cpp
===================================================================
--- clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -275,8 +275,8 @@
     // Load the index asynchronously. Meanwhile SwapIndex returns no results.
     SwapIndex *Placeholder;
     StaticIdx.reset(Placeholder = new SwapIndex(llvm::make_unique<MemIndex>()));
-    runAsync<void>([Placeholder] {
-      if (auto Idx = loadIndex(YamlSymbolFile))
+    runAsync<void>([Placeholder, &Opts] {
+      if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex))
         Placeholder->reset(std::move(Idx));
     });
   }
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -41,22 +41,31 @@
   /// Kind specifies Token type which defines semantics for the internal
   /// representation. Each Kind has different representation stored in Data
   /// field.
+  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
+  // strings. For example, PathURI store URIs of each directory and its parents,
+  // which induces a lot of overhead because these paths tend to be long and
+  // each parent directory is a prefix.
   enum class Kind {
     /// Represents trigram used for fuzzy search of unqualified symbol names.
     ///
     /// Data contains 3 bytes with trigram contents.
     Trigram,
     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
     ///
-    /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global
+    /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
     /// scope).
     Scope,
+    /// Path Proximity URI to symbol declaration.
+    ///
+    /// Data stores path URI of symbol declaration file or its parent.
+    ///
+    /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
+    /// and some amount of its parents.
+    ProximityURI,
     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
     /// llvm::DenseMap.
     Sentinel,
     /// FIXME(kbobyrev): Add other Token Kinds
-    /// * Path with full or relative path to the directory in which symbol is
-    ///   defined
     /// * Type with qualified type name or its USR
   };
 
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -392,7 +392,7 @@
 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
   std::vector<std::pair<DocID, float>> Result;
   for (; !It.reachedEnd(); It.advance())
-    Result.emplace_back(It.peek(), It.consume());
+    Result.emplace_back({It.peek(), It.consume()});
   return Result;
 }
 
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.h
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -39,22 +39,26 @@
 class DexIndex : public SymbolIndex {
 public:
   // All symbols must outlive this index.
-  template <typename Range> DexIndex(Range &&Symbols) {
+  template <typename Range>
+  DexIndex(Range &&Symbols, llvm::ArrayRef<std::string> URISchemes)
+      : URISchemes(URISchemes) {
     for (auto &&Sym : Symbols)
       this->Symbols.push_back(&Sym);
     buildIndex();
   }
   // Symbols are owned by BackingData, Index takes ownership.
   template <typename Range, typename Payload>
-  DexIndex(Range &&Symbols, Payload &&BackingData)
-      : DexIndex(std::forward<Range>(Symbols)) {
+  DexIndex(Range &&Symbols, Payload &&BackingData,
+           llvm::ArrayRef<std::string> URISchemes)
+      : DexIndex(std::forward<Range>(Symbols), URISchemes) {
     KeepAlive = std::shared_ptr<void>(
         std::make_shared<Payload>(std::move(BackingData)), nullptr);
   }
 
   /// Builds an index from a slab. The index takes ownership of the slab.
-  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab) {
-    return llvm::make_unique<DexIndex>(Slab, std::move(Slab));
+  static std::unique_ptr<SymbolIndex>
+  build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
+    return llvm::make_unique<DexIndex>(Slab, std::move(Slab), URISchemes);
   }
 
   bool
@@ -72,21 +76,31 @@
 private:
   void buildIndex();
 
+  /// Stores symbols sorted in the descending order of symbol quality..
   std::vector<const Symbol *> Symbols;
+  /// SymbolQuality[I] is the quality of Symbols[I].
+  std::vector<float> SymbolQuality;
   llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
-  llvm::DenseMap<const Symbol *, float> SymbolQuality;
-  // Inverted index is a mapping from the search token to the posting list,
-  // which contains all items which can be characterized by such search token.
-  // For example, if the search token is scope "std::", the corresponding
-  // posting list would contain all indices of symbols defined in namespace std.
-  // Inverted index is used to retrieve posting lists which are processed during
-  // the fuzzyFind process.
+  /// Inverted index is a mapping from the search token to the posting list,
+  /// which contains all items which can be characterized by such search token.
+  /// For example, if the search token is scope "std::", the corresponding
+  /// posting list would contain all indices of symbols defined in namespace
+  /// std. Inverted index is used to retrieve posting lists which are processed
+  /// during the fuzzyFind process.
   llvm::DenseMap<Token, PostingList> InvertedIndex;
   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
+
+  const std::vector<std::string> URISchemes;
 };
 
+/// Returns Search Token for a number of parent directories of given Path.
+/// Should be used within the index build process.
+///
+/// This function is exposed for testing only.
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -8,8 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "DexIndex.h"
+#include "../../FileDistance.h"
 #include "../../FuzzyMatch.h"
 #include "../../Logger.h"
+#include "../../Quality.h"
+#include "llvm/ADT/StringSet.h"
 #include <algorithm>
 #include <queue>
 
@@ -26,28 +29,82 @@
 // Returns the tokens which are given symbols's characteristics. For example,
 // trigrams and scopes.
 // FIXME(kbobyrev): Support more token types:
-// * Path proximity
 // * Types
+// * Namespace proximity
 std::vector<Token> generateSearchTokens(const Symbol &Sym) {
   std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
   Result.emplace_back(Token::Kind::Scope, Sym.Scope);
+  for (const auto &ProximityURI :
+       generateProximityURIs(Sym.CanonicalDeclaration.FileURI))
+    Result.emplace_back(Token::Kind::ProximityURI, ProximityURI);
   return Result;
 }
 
+// Constructs BOOST iterators for Path Proximities.
+std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
+    llvm::ArrayRef<std::string> ProximityPaths,
+    llvm::ArrayRef<std::string> URISchemes,
+    const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
+  std::vector<std::unique_ptr<Iterator>> BoostingIterators;
+  // Deduplicate parent URIs extracted from the ProximityPaths.
+  llvm::StringSet<> ParentURIs;
+  // Use ProximityPaths converted to URIs as proximity sources.
+  llvm::StringMap<SourceParams> RequestURIs;
+  for (const auto &Path : ProximityPaths) {
+    const auto PathProximityURIs =
+        generateProximityURIs(URI::create(Path, URISchemes)->toString());
+    if (PathProximityURIs.empty())
+      continue;
+    RequestURIs[Path] = SourceParams();
+    for (const auto &ProximityURI : PathProximityURIs)
+      ParentURIs.insert(ProximityURI);
+  }
+  // Use SymbolRelevanceSignals for symbol relevance evaluation: use defaults
+  // for all parameters except for Proximity Path distance signal.
+  SymbolRelevanceSignals PathProximitySignals;
+  // DistanceCalculator will find the shortest distance from requested URI to
+  // any URI extracted from the ProximityPaths.
+  URIDistance DistanceCalculator(RequestURIs);
+  PathProximitySignals.FileProximityMatch = &DistanceCalculator;
+  // Try to build BOOST iterator for each Proximity Path provided by
+  // ProximityPaths. Boosting factor should depend on the distance to the
+  // Proximity Path: the closer processed path is, the higher boosting factor.
+  for (const auto &ParentURI : ParentURIs.keys()) {
+    const auto It =
+        InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
+    if (It != InvertedIndex.end()) {
+      // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
+      PathProximitySignals.SymbolURI = ParentURI;
+      BoostingIterators.push_back(
+          createBoost(create(It->second), PathProximitySignals.evaluate()));
+    }
+  }
+  return BoostingIterators;
+}
+
 } // namespace
 
 void DexIndex::buildIndex() {
-  for (const Symbol *Sym : Symbols) {
+  std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
+
+  for (size_t I = 0; I < Symbols.size(); ++I) {
+    const Symbol *Sym = Symbols[I];
     LookupTable[Sym->ID] = Sym;
-    SymbolQuality[Sym] = quality(*Sym);
+    ScoredSymbols[I] = {quality(*Sym), Sym};
   }
 
   // Symbols are sorted by symbol qualities so that items in the posting lists
   // are stored in the descending order of symbol quality.
-  std::sort(begin(Symbols), end(Symbols),
-            [&](const Symbol *LHS, const Symbol *RHS) {
-              return SymbolQuality[LHS] > SymbolQuality[RHS];
-            });
+  std::sort(begin(ScoredSymbols), end(ScoredSymbols),
+            std::greater<std::pair<float, const Symbol *>>());
+
+  // SymbolQuality was empty up until now.
+  SymbolQuality.resize(Symbols.size());
+  // Populate internal storage using Symbol + Score pairs.
+  for (size_t I = 0; I < ScoredSymbols.size(); ++I) {
+    SymbolQuality[I] = ScoredSymbols[I].first;
+    Symbols[I] = ScoredSymbols[I].second;
+  }
 
   // Populate TempInvertedIndex with posting lists for index symbols.
   for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
@@ -96,6 +153,17 @@
   if (!ScopeIterators.empty())
     TopLevelChildren.push_back(createOr(move(ScopeIterators)));
 
+  // Add proximity paths boosting.
+  auto BoostingIterators = createFileProximityIterators(
+      Req.ProximityPaths, URISchemes, InvertedIndex);
+  // Boosting iterators do not actually filter symbols. In order to preserve
+  // the validity of resulting query, TRUE iterator should be added along
+  // BOOSTs.
+  if (!BoostingIterators.empty()) {
+    BoostingIterators.push_back(createTrue(Symbols.size()));
+    TopLevelChildren.push_back(createOr(move(BoostingIterators)));
+  }
+
   // Use TRUE iterator if both trigrams and scopes from the query are not
   // present in the symbol index.
   auto QueryIterator = TopLevelChildren.empty()
@@ -106,32 +174,36 @@
   // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
   // using 100x of the requested number might not be good in practice, e.g.
   // when the requested number of items is small.
-  const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+  const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount;
   auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
-  // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
-  // boosting scores.
-  std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
-
-  // Retrieve top Req.MaxCandidateCount items.
-  std::priority_queue<std::pair<float, const Symbol *>> Top;
-  for (const auto &P : SymbolDocIDs) {
-    const DocID SymbolDocID = P.first;
+
+  using IDAndScore = std::pair<DocID, float>;
+  std::vector<IDAndScore> IDAndScores = consume(*Root);
+
+  auto Compare = [](const IDAndScore &LHS, const IDAndScore &RHS) {
+    return LHS.second > RHS.second;
+  };
+  TopN<IDAndScore, decltype(Compare)> Top(Req.MaxCandidateCount, Compare);
+  for (const auto &IDAndScore : IDAndScores) {
+    const DocID SymbolDocID = IDAndScore.first;
     const auto *Sym = Symbols[SymbolDocID];
     const llvm::Optional<float> Score = Filter.match(Sym->Name);
     if (!Score)
       continue;
-    // Multiply score by a negative factor so that Top stores items with the
-    // highest actual score.
-    Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
-    if (Top.size() > Req.MaxCandidateCount) {
+    // Combine Fuzzy Matching score, precomputed symbol quality and boosting
+    // score for a cumulative final symbol score.
+    const float FinalScore =
+        (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
+    // If Top.push(...) returns true, it means that it had to pop an item. In
+    // this case, it is possible to retrieve more symbols.
+    if (Top.push({SymbolDocID, FinalScore}))
       More = true;
-      Top.pop();
-    }
   }
 
-  // Apply callback to the top Req.MaxCandidateCount items.
-  for (; !Top.empty(); Top.pop())
-    Callback(*Top.top().second);
+  // Apply callback to the top Req.MaxCandidateCount items in the descending
+  // order of cumulative score.
+  for (const auto &Item : std::move(Top).items())
+    Callback(*Symbols[Item.first]);
   return More;
 }
 
@@ -161,6 +233,36 @@
   return Bytes;
 }
 
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
+  std::vector<std::string> Result;
+  // Don't generate any tokens for empty paths.
+  if (URIPath.empty())
+    return Result;
+  auto ParsedURI = URI::parse(URIPath);
+  assert(ParsedURI &&
+         "Non-empty argument of generateProximityURIs() should be a valid "
+         "URI.");
+  StringRef Body = ParsedURI->body();
+  // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
+  // size of resulting vector. Some projects might want to have higher limit if
+  // the file hierarchy is deeper. For the generic case, it would be useful to
+  // calculate Limit in the index build stage by calculating the maximum depth
+  // of the project source tree at runtime.
+  size_t Limit = 5;
+  // Insert original URI before the loop: this would save a redundant iteration
+  // with a URI parse.
+  Result.emplace_back(ParsedURI->toString());
+  while (!Body.empty() && --Limit > 0) {
+    // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
+    // could be optimized.
+    Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
+    URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body);
+    if (!Body.empty())
+      Result.emplace_back(TokenURI.toString());
+  }
+  return Result;
+}
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/SymbolYAML.h
===================================================================
--- clang-tools-extra/clangd/index/SymbolYAML.h
+++ clang-tools-extra/clangd/index/SymbolYAML.h
@@ -45,6 +45,7 @@
 // The size of global symbols should be relatively small, so that all symbols
 // can be managed in memory.
 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+                                       llvm::ArrayRef<std::string> URISchemes,
                                        bool UseDex = true);
 
 } // namespace clangd
Index: clang-tools-extra/clangd/index/SymbolYAML.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolYAML.cpp
+++ clang-tools-extra/clangd/index/SymbolYAML.cpp
@@ -185,6 +185,7 @@
 }
 
 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+                                       llvm::ArrayRef<std::string> URISchemes,
                                        bool UseDex) {
   trace::Span OverallTracer("LoadIndex");
   auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
@@ -209,7 +210,7 @@
   if (!Slab)
     return nullptr;
   trace::Span Tracer("BuildIndex");
-  return UseDex ? dex::DexIndex::build(std::move(*Slab))
+  return UseDex ? dex::DexIndex::build(std::move(*Slab), URISchemes)
                 : MemIndex::build(std::move(*Slab), RefSlab());
 }
 
Index: clang-tools-extra/clangd/URI.h
===================================================================
--- clang-tools-extra/clangd/URI.h
+++ clang-tools-extra/clangd/URI.h
@@ -45,6 +45,11 @@
   static llvm::Expected<URI> create(llvm::StringRef AbsolutePath,
                                     llvm::StringRef Scheme);
 
+  /// Creates a URI for a file using the first valid scheme from \p Schemes.
+  /// \p Schemes entries must be registered. The URI is percent-encoded.
+  static llvm::Expected<URI> create(llvm::StringRef AbsolutePath,
+                                    const std::vector<std::string> &Schemes);
+
   /// This creates a file:// URI for \p AbsolutePath. The path must be absolute.
   static URI createFile(llvm::StringRef AbsolutePath);
 
Index: clang-tools-extra/clangd/URI.cpp
===================================================================
--- clang-tools-extra/clangd/URI.cpp
+++ clang-tools-extra/clangd/URI.cpp
@@ -181,6 +181,25 @@
   return S->get()->uriFromAbsolutePath(AbsolutePath);
 }
 
+llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
+                                const std::vector<std::string> &Schemes) {
+  if (!llvm::sys::path::is_absolute(AbsolutePath))
+    return make_string_error("Not a valid absolute path: " + AbsolutePath);
+  for (const auto &Scheme : Schemes) {
+    auto URI = URI::create(AbsolutePath, Scheme);
+    // For some paths, conversion to different URI schemes is impossible. These
+    // should be just skipped.
+    if (!URI) {
+      // Ignore the error.
+      llvm::consumeError(URI.takeError());
+      continue;
+    }
+    return URI;
+  }
+  return make_string_error("Couldn't convert " + AbsolutePath +
+                           " to any given scheme.");
+}
+
 URI URI::createFile(llvm::StringRef AbsolutePath) {
   auto U = create(AbsolutePath, "file");
   if (!U)
Index: clang-tools-extra/clangd/FileDistance.h
===================================================================
--- clang-tools-extra/clangd/FileDistance.h
+++ clang-tools-extra/clangd/FileDistance.h
@@ -37,6 +37,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
+
 #include "URI.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
@@ -107,3 +110,5 @@
 
 } // namespace clangd
 } // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to