[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-06 Thread Kirill Bobyrev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rCTE341542: [clangd] Implement proximity path boosting for Dex 
(authored by omtcyfz, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D51481?vs=164176&id=164195#toc

Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D51481

Files:
  clangd/FileDistance.h
  clangd/URI.cpp
  clangd/URI.h
  clangd/index/SymbolYAML.cpp
  clangd/index/SymbolYAML.h
  clangd/index/dex/DexIndex.cpp
  clangd/index/dex/DexIndex.h
  clangd/index/dex/Token.h
  clangd/tool/ClangdMain.cpp
  unittests/clangd/DexIndexTests.cpp

Index: unittests/clangd/DexIndexTests.cpp
===
--- unittests/clangd/DexIndexTests.cpp
+++ unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+  ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+  "unittest:///a", "unittest:///"));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
   generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@
  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 100));
+  auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount = 3;
@@ -478,73 +514,101 @@
 
 TEST(DexIndexTest, FuzzyMatch) {
   auto I = DexIndex::build(
-  generateSymbols({"LaughingOutLou

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-06 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164176.
kbobyrev marked 3 inline comments as done.
kbobyrev added a comment.

Resolve the issue with `SymbolSlab::Builder` and make sure passed vector has 
correct lifetime.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/URI.cpp
  clang-tools-extra/clangd/URI.h
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+  ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+  "unittest:///a", "unittest:///"));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
   generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@
  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 100));
+  auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-06 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164172.
kbobyrev marked 10 inline comments as done.
kbobyrev added a comment.

This should address the last round of comments.

There was still some hassle with the `SymbolSlab::Builder` in the unit tests 
and I decided to leave it as it is due to some weird ownership semantics 
introduced by `vector` of `Symbol`s.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/URI.cpp
  clang-tools-extra/clangd/URI.h
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+  ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+  "unittest:///a", "unittest:///"));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
   generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@
  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 10

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Eric Liu via Phabricator via cfe-commits
ioeric accepted this revision.
ioeric added inline comments.
This revision is now accepted and ready to land.



Comment at: clang-tools-extra/clangd/URI.cpp:200
+  return make_string_error("Couldn't convert " + AbsolutePath +
+   " to any given scheme.");
+}

maybe also include the schemes in the error message. with `llvm::join`?



Comment at: clang-tools-extra/clangd/URI.h:48
 
+  /// Creates a URI for a file using the first valid scheme from \p Schemes.
+  /// \p Schemes entries must be registered. The URI is percent-encoded.

nit: `// Similar to above except this uses the first scheme in \p Schemes that 
works.`



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:51
+  llvm::StringSet<> ParentURIs;
+  // Use ProximityPaths converted to URIs as proximity sources.
+  llvm::StringMap RequestURIs;

this comment is no longer true.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:52
+  // Use ProximityPaths converted to URIs as proximity sources.
+  llvm::StringMap RequestURIs;
+  for (const auto &Path : ProximityPaths) {

These are not URIs anymore. Consider calling it `Sources`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:55
+const auto PathProximityURIs =
+generateProximityURIs(URI::create(Path, URISchemes)->toString());
+if (PathProximityURIs.empty())

Result of `URI::create` must be explicitly checked; it would trigger assertion 
if it is not checked or contains error.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:58
+  continue;
+RequestURIs[Path] = SourceParams();
+for (const auto &ProximityURI : PathProximityURIs)

We should still add `Path` to `RequestURIs` even if there is no proximity URI.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:65
+  SymbolRelevanceSignals PathProximitySignals;
+  // DistanceCalculator will find the shortest distance from requested URI to
+  // any URI extracted from the ProximityPaths.

nit: `s/request URI/ProximityPaths/`



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:242
+  auto ParsedURI = URI::parse(URIPath);
+  assert(ParsedURI &&
+ "Non-empty argument of generateProximityURIs() should be a valid "

I'd suggesting avoiding assertion here. Consider `elog` the error and returning 
empty result.



Comment at: clang-tools-extra/clangd/index/dex/Iterator.cpp:395
   for (; !It.reachedEnd(); It.advance())
-Result.emplace_back(It.peek(), It.consume());
+Result.emplace_back({It.peek(), It.consume()});
   return Result;

Why do we need this change?



Comment at: clang-tools-extra/clangd/tool/ClangdMain.cpp:278
 StaticIdx.reset(Placeholder = new 
SwapIndex(llvm::make_unique()));
-runAsync([Placeholder] {
-  if (auto Idx = loadIndex(YamlSymbolFile))
+runAsync([Placeholder, &Opts] {
+  if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex))

(Have you rebased on HEAD? The code has been changed.)



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:623
+
+  auto I = DexIndex::build(std::move(Builder).build(), URISchemes);
+

kbobyrev wrote:
> ioeric wrote:
> > We could use the constructor that doesn't take ownership e.g. 
> > `DexIndex({Sym1, Sym2}, URISchemes)`
> That would try to convert `{Sym1, Sym2}` to `SymbolSlab` and that's not 
> possible. IIUC your suggestion is about using the constructor with ranges:
> 
> ```
> template 
> DexIndex(Range &&Symbols, llvm::ArrayRef URISchemes)
> ```
> 
> I could use `llvm::make_range(begin(Symbols), end(Symbols))`, but then I'd 
> have to put everything into `Symbols` container first and that might be the 
> same amount of code. I'm not sure that would be better than having a 
> `Builder`. Did I miss something?
Have you tried std::vector{Sym1, Sym2}? or `push_back` twice? Using 
`Builder` seems unnecessary but not too bad.


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:623
+
+  auto I = DexIndex::build(std::move(Builder).build(), URISchemes);
+

ioeric wrote:
> We could use the constructor that doesn't take ownership e.g. 
> `DexIndex({Sym1, Sym2}, URISchemes)`
That would try to convert `{Sym1, Sym2}` to `SymbolSlab` and that's not 
possible. IIUC your suggestion is about using the constructor with ranges:

```
template 
DexIndex(Range &&Symbols, llvm::ArrayRef URISchemes)
```

I could use `llvm::make_range(begin(Symbols), end(Symbols))`, but then I'd have 
to put everything into `Symbols` container first and that might be the same 
amount of code. I'm not sure that would be better than having a `Builder`. Did 
I miss something?


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164062.
kbobyrev marked 18 inline comments as done.
kbobyrev added a comment.

Address a round of comments


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/URI.cpp
  clang-tools-extra/clangd/URI.h
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+  ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+  "unittest:///a", "unittest:///"));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
   generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@
  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 100));
+  auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount = 3;
@@ -478,73 

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Eric Liu via Phabricator via cfe-commits
ioeric requested changes to this revision.
ioeric added a comment.
This revision now requires changes to proceed.

This should be the last round! ;)




Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:45
+std::vector>
+createPathIterators(llvm::ArrayRef ProximityPaths,
+llvm::ArrayRef URISchemes,

To be clearer, maybe `createFileProximityIterators`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:50
+  // Deduplicate parent URIs extracted from the FuzzyFindRequest.
+  llvm::StringSet UniqueURIs;
+  // Store all root URIs converted from the original FuzzyFindRequest absolute

nit: just `llvm::StringSet<>`?

`s/UniqueURIs/ParentURIs/`? As this is a set, we don't need to be explicit 
about uniqueness here. 

`s/FuzzyFindRequest/ProximityPaths` (in comment). FuzzyFindRequest shouldn't be 
relevant in this function, and we should avoid mentioning it here. same below.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:51
+  llvm::StringSet UniqueURIs;
+  // Store all root URIs converted from the original FuzzyFindRequest absolute
+  // paths within ProximityPath vector.

nit: maybe `// Use ProximityPaths as proximity sources`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:55
+  for (const auto &Path : ProximityPaths) {
+const auto PathProximityURIs = generateQueryProximityURIs(Path, 
URISchemes);
+// Use default distance parameters for the first URI in ProximityURIs, 
which

I'd double check `PathProximityURIs` is not empty.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:62
+  }
+  // Use SymbolRelevanceSignals for symbol quality evaluation: use defaults for
+  // all parameters except for Proximity Path distance signal.

nit: this is `symbol *relevance* evaluation`



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:77
+  PathProximitySignals.SymbolURI = URI;
+  float BoostFactor = 1 + PathProximitySignals.evaluate();
+  BoostingIterators.push_back(createBoost(create(It->second), 
BoostFactor));

`evaluate()` should return an appropriate boosting score (<1 for down-boosting 
and >1 for up-boosting), so there is no need to plus 1 here.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:182
+
+  auto Compare = [](IDAndScore LHS, IDAndScore RHS) {
+return LHS.second > RHS.second;

`const IDAndScore&`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:244
+ "URI.");
+  const auto Scheme = ParsedURI->scheme();
+  const auto Authority = ParsedURI->authority();

nit: `Scheme` and `Authority` are both used once. I'd just inline them.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:246
+  const auto Authority = ParsedURI->authority();
+  StringRef Path = ParsedURI->body();
+  // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum

nit: I'd call this `Body` for clarity.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:98
+/// Should be used within the index build process.
+std::vector generateProximityURIs(llvm::StringRef Path);
+

nit: s/Path/URIPath/



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:98
+/// Should be used within the index build process.
+std::vector generateProximityURIs(llvm::StringRef Path);
+

ioeric wrote:
> nit: s/Path/URIPath/
nit: maybe mention that these functions are exposed for testing only?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:104
+std::vector
+generateQueryProximityURIs(llvm::StringRef AbsolutePath,
+   llvm::ArrayRef URISchemes);

How about changing this one to just convert `AbsolutePath` to a `URI` of the 
preferred scheme and having the users call `generateProximityURIs`? This would 
simply the contract. And it can probably be a library function (e.g. 
`makePreferredURI(AbsPath, URISchemes)`) in URI.h.



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:430
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",

Could you add a case where `generateProximityURIs` generates fewer than 5 
proximity URIs? 



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:601
+
+  Symbol RootSymbol;
+  RootSymbol.Name = Name;

Could you add a helper (e.g. lambda) for creating the symbol? The code looks 
almost the same.

Alternatively, this can be:
```
auto Sym1 = symbol("na::X");
Sym1.CanonicalDeclaration.FileURI = "unittest:///...";
au

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164024.
kbobyrev added a comment.

Keep 2 minor refactorings out of the scope of this patch.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestIndex.cpp
  clang-tools-extra/unittests/clangd/TestIndex.h

Index: clang-tools-extra/unittests/clangd/TestIndex.h
===
--- clang-tools-extra/unittests/clangd/TestIndex.h
+++ clang-tools-extra/unittests/clangd/TestIndex.h
@@ -39,6 +39,13 @@
const FuzzyFindRequest &Req,
bool *Incomplete = nullptr);
 
+// Performs fuzzy matching-based symbol lookup given a query and an index.
+// Incomplete is set true if more items than requested can be retrieved, false
+// otherwise. Returns filename URIs of matched symbols canonical declarations.
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete = nullptr);
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs);
Index: clang-tools-extra/unittests/clangd/TestIndex.cpp
===
--- clang-tools-extra/unittests/clangd/TestIndex.cpp
+++ clang-tools-extra/unittests/clangd/TestIndex.cpp
@@ -55,6 +55,18 @@
   return Matches;
 }
 
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete) {
+  std::vector Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+Matches.push_back(Sym.CanonicalDeclaration.FileURI);
+  });
+  if (Incomplete)
+*Incomplete = IsIncomplete;
+  return Matches;
+}
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs) {
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,30 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  llvm::SmallSt

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164023.
kbobyrev marked 5 inline comments as done.
kbobyrev added a comment.

Address another round of comments.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestIndex.cpp
  clang-tools-extra/unittests/clangd/TestIndex.h

Index: clang-tools-extra/unittests/clangd/TestIndex.h
===
--- clang-tools-extra/unittests/clangd/TestIndex.h
+++ clang-tools-extra/unittests/clangd/TestIndex.h
@@ -39,6 +39,13 @@
const FuzzyFindRequest &Req,
bool *Incomplete = nullptr);
 
+// Performs fuzzy matching-based symbol lookup given a query and an index.
+// Incomplete is set true if more items than requested can be retrieved, false
+// otherwise. Returns filename URIs of matched symbols canonical declarations.
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete = nullptr);
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs);
Index: clang-tools-extra/unittests/clangd/TestIndex.cpp
===
--- clang-tools-extra/unittests/clangd/TestIndex.cpp
+++ clang-tools-extra/unittests/clangd/TestIndex.cpp
@@ -55,6 +55,18 @@
   return Matches;
 }
 
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete) {
+  std::vector Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+Matches.push_back(Sym.CanonicalDeclaration.FileURI);
+  });
+  if (Incomplete)
+*Incomplete = IsIncomplete;
+  return Matches;
+}
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs) {
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,24 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,30 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+  "unittest:///clang-tools-extra/clangd/index",
+  "unittest:///clang-tools-extra/clangd",
+  "unittest:///clang-tools-extra", "unittest:///"));
+}
+
+TEST(D

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:59
 LookupTable[Sym->ID] = Sym;
-SymbolQuality[Sym] = quality(*Sym);
+SymbolAndScores[I] = {Sym, static_cast(quality(*Sym))};
   }

ioeric wrote:
> ioeric wrote:
> > We should fix `quality` to return `float` for consistency. 
> nit: Instead of reconstruct the structure, I'd probably set the fields 
> manually.
We shouldn't need the `static_cast` anymore?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:55
   // are stored in the descending order of symbol quality.
-  std::sort(begin(Symbols), end(Symbols),
-[&](const Symbol *LHS, const Symbol *RHS) {
-  return SymbolQuality[LHS] > SymbolQuality[RHS];
-});
+  std::sort(begin(ScoredSymbols), end(ScoredSymbols));
+

This sorts in ascending order by default?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:118
+  SymbolRelevanceSignals PathProximitySignals;
+  for (const auto &ProximityPath : Req.ProximityPaths) {
+const auto PathProximityTokens =

As chatted offline, we only need a single URIDistacne structure for all 
proximity paths. And we could also deduplicate parent directories.

`generateQueryPathProximityTokens` should return URIs instead of tokens as the 
token data doesn't need to be raw URIs (e.g. hash). And the function should 
probably live in DexIndex.cpp instead of Token.cpp.

I'd also suggest pulling this code into a helper.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:159
+
+  std::vector> IDAndScores = consume(*Root);
+

I think `using IDAndScore = decltype(IDAndScores.fron())` might help here.



Comment at: clang-tools-extra/clangd/tool/ClangdMain.cpp:287
 StaticIdx.reset(Placeholder = new 
SwapIndex(llvm::make_unique()));
-runAsync([Placeholder] {
-  if (auto Idx = loadIndex(YamlSymbolFile))
+runAsync([Placeholder, Opts] {
+  if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex))

This could take `Opts` by reference.


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-05 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163987.
kbobyrev marked 14 inline comments as done.
kbobyrev added a comment.

- Rebase on top of master
- Address suggestions Eric wrote here
- Address suggestions from the offline discussion by reusing `Quality.cpp` 
infrastructure for path proximity boosting calculation


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/FileDistance.h
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/index/SymbolYAML.cpp
  clang-tools-extra/clangd/index/SymbolYAML.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestIndex.cpp
  clang-tools-extra/unittests/clangd/TestIndex.h

Index: clang-tools-extra/unittests/clangd/TestIndex.h
===
--- clang-tools-extra/unittests/clangd/TestIndex.h
+++ clang-tools-extra/unittests/clangd/TestIndex.h
@@ -39,6 +39,13 @@
const FuzzyFindRequest &Req,
bool *Incomplete = nullptr);
 
+// Performs fuzzy matching-based symbol lookup given a query and an index.
+// Incomplete is set true if more items than requested can be retrieved, false
+// otherwise. Returns filename URIs of matched symbols canonical declarations.
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete = nullptr);
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs);
Index: clang-tools-extra/unittests/clangd/TestIndex.cpp
===
--- clang-tools-extra/unittests/clangd/TestIndex.cpp
+++ clang-tools-extra/unittests/clangd/TestIndex.cpp
@@ -55,6 +55,18 @@
   return Matches;
 }
 
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete) {
+  std::vector Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+Matches.push_back(Sym.CanonicalDeclaration.FileURI);
+  });
+  if (Incomplete)
+*Incomplete = IsIncomplete;
+  return Matches;
+}
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs) {
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,15 +333,38 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::ProximityURI);
+}
+
+testing::Matcher>
+proximityPathsAre(std::initializer_list ProximityPaths) {
+  std::vector Result;
+  for (const auto &Path : ProximityPaths) {
+Result.emplace_back(Token::Kind::ProximityURI, Path);
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-04 Thread Eric Liu via Phabricator via cfe-commits
ioeric added a comment.

There are a few changes/refactorings which I would suggest splitting from this 
patch, as they would require more thoughts and seem unrelated in this patch. 
Please see the inline comments :)




Comment at: clang-tools-extra/clangd/Quality.h:130
+/// directories URIs.
+float parentProximityScore(unsigned Distance);
+

This should take in a `FileDistanceOptions` to allow customized up/down costs. 
To support general cases, this should probably take `UpDistance`, 
`DownDistance`, and `FileDistanceOptions`.



Comment at: clang-tools-extra/clangd/Quality.h:130
+/// directories URIs.
+float parentProximityScore(unsigned Distance);
+

ioeric wrote:
> This should take in a `FileDistanceOptions` to allow customized up/down 
> costs. To support general cases, this should probably take `UpDistance`, 
> `DownDistance`, and `FileDistanceOptions`.
Not sure if this is the right name. Two paths don't need to be parent and child 
(if we also consider down cost).  Maybe just `proximityScore`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:42
 
+struct SymbolAndScore {
+  const Symbol *Sym;

Maybe `ScoredSymbol`? Is this used outside of `buildIndex`? If not, maybe just 
inline it? Or simply use a `std::pair`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:59
 LookupTable[Sym->ID] = Sym;
-SymbolQuality[Sym] = quality(*Sym);
+SymbolAndScores[I] = {Sym, static_cast(quality(*Sym))};
   }

We should fix `quality` to return `float` for consistency. 



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:59
 LookupTable[Sym->ID] = Sym;
-SymbolQuality[Sym] = quality(*Sym);
+SymbolAndScores[I] = {Sym, static_cast(quality(*Sym))};
   }

ioeric wrote:
> We should fix `quality` to return `float` for consistency. 
nit: Instead of reconstruct the structure, I'd probably set the fields manually.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:65
+  std::sort(begin(SymbolAndScores), end(SymbolAndScores),
+std::greater());
+

Is the `operator>` overload used anywhere besides here? If not, I'd suggest 
inlining the overload.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:129
+const auto ProximityURIs =
+generateQueryProximityURIs(ProximityPath, URISchemes);
+for (size_t Distance = 0; Distance < ProximityURIs.size(); ++Distance) {

IIUC, the assumption here is that `generateQueryProximityURIs` returns 
proximity tokens of parent directories sorted by distance. This seems to be 
making assumption about the implementation. `generateQueryProximityURIs` should 
return the distance explicitly along with the tokens.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:176
+  std::sort(begin(IDAndScores), end(IDAndScores),
+std::greater());
 

Just inline the comparator for clarity.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:179
   // Retrieve top Req.MaxCandidateCount items.
-  std::priority_queue> Top;
-  for (const auto &P : SymbolDocIDs) {
-const DocID SymbolDocID = P.first;
+  const size_t ItemsToScore = 10 * Req.MaxCandidateCount;
+  TopN Top(Req.MaxCandidateCount);

This is adding another staging of filtering. It seems to be an optimization 
that's not specific to the proximity path change and would probably require 
more careful review. Can we split this out of this patch for now?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:80
 
+  /// Stores symbols in the descending order using symbol quality as the key.
   std::vector Symbols;

nit: `Sorted in the descending order of symbol quality.`



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:83
+  /// Maps DocID to the quality of underlying symbol.
+  std::vector SymbolQuality;
   llvm::DenseMap LookupTable;

nit: `SymbolQuality[I] is the quality of Symbols[I]. `



Comment at: clang-tools-extra/clangd/index/dex/Iterator.h:139
 /// to acquire preliminary scores of requested items.
-std::vector> consume(Iterator &It);
+std::vector consume(Iterator &It);
 

I'm not sure about this refactoring. It seems to be out of the scope. Can we 
split it out and still return pair for now?



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:36
+  size_t Limit = 5;
+  while (!Path.empty() && Limit--) {
+// FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and

ioeric wrote:
> For the original URI, we could simply add it to the result outside of the 
> loop (and `--Limit`) to save one iteration?
nit: `(--Limit > 0)` for clarity.



Comment a

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-04 Thread Eric Liu via Phabricator via cfe-commits
ioeric requested changes to this revision.
ioeric added a comment.
This revision now requires changes to proceed.

(and forgot to request changes ;)


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-04 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163773.
kbobyrev marked 12 inline comments as done.
kbobyrev added a comment.

- Rebase on top of the parent patch
- Apply many refactorings where appropriate
- Write more comments and documentation
- Abstract pieces of code which are shared by multiple pieces of Clangd


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/Quality.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Iterator.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestIndex.cpp
  clang-tools-extra/unittests/clangd/TestIndex.h

Index: clang-tools-extra/unittests/clangd/TestIndex.h
===
--- clang-tools-extra/unittests/clangd/TestIndex.h
+++ clang-tools-extra/unittests/clangd/TestIndex.h
@@ -39,6 +39,13 @@
const FuzzyFindRequest &Req,
bool *Incomplete = nullptr);
 
+// Performs fuzzy matching-based symbol lookup given a query and an index.
+// Incomplete is set true if more items than requested can be retrieved, false
+// otherwise. Returns filename URIs of matched symbols canonical declarations.
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete = nullptr);
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs);
Index: clang-tools-extra/unittests/clangd/TestIndex.cpp
===
--- clang-tools-extra/unittests/clangd/TestIndex.cpp
+++ clang-tools-extra/unittests/clangd/TestIndex.cpp
@@ -55,6 +55,18 @@
   return Matches;
 }
 
+std::vector matchDeclURIs(const SymbolIndex &I,
+   const FuzzyFindRequest &Req,
+   bool *Incomplete) {
+  std::vector Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+Matches.push_back(Sym.CanonicalDeclaration.FileURI);
+  });
+  if (Incomplete)
+*Incomplete = IsIncomplete;
+  return Matches;
+}
+
 // Returns qualified names of symbols with any of IDs in the index.
 std::vector lookup(const SymbolIndex &I,
 llvm::ArrayRef IDs) {
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,11 +32,17 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
+//===--===//
+// Query iterator tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
   for (size_t I = 0; I < IDAndScore.size(); ++I)
-IDs[I] = IDAndScore[I].first;
+IDs[I] = IDAndScore[I].ID;
   return IDs;
 }
 
@@ -325,15 +333,38 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::ProximityURI);
+}
+
+testing::Matcher>
+proximityPathsAre(std::initializer_list ProximityPaths) {
+  std::vector Result;
+  for (const auto &Path : ProximityPaths) {
+Result.emplace_back(Token::Kind::ProximityURI, Path);
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THA

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-03 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/;:1
+//===--- Token.cpp - Symbol Search primitive *- C++ 
-*-===//
+// The LLVM Compiler Infrastructure

Unintended change?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:137
+  BoostingIterators.push_back(
+  createBoost(create(It->second), PARENTS_TO_BOOST + 1 - 
P.second));
+  }

ioeric wrote:
> `PARENTS_TO_BOOST + 1 - P.second` seems to be too much boosting. We should 
> align the boosting function with the one we use in clangd/Quality.cpp, e.g. 
> proximity score should be would be in the range [0, 1], and we can boost by 
> `1+proximity`. 
The `FIXME` is now outdated?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:143
+  float BoostFactor =
+  std::exp(Distance * 0.4f / FileDistanceOptions().UpCost);
+  BoostingIterators.push_back(

`x` in `std::exp(x)` should be negated so that the range is (0, 1]. And 
`BoostFactor` should be `1+(0,1]`.

I'm not sure if dividing by UpCost is correct here (and similarly in 
Quality.cpp), because you would want lower score for larger UpCost?



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:36
+  size_t Limit = 5;
+  while (!Path.empty() && Limit--) {
+// FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and

For the original URI, we could simply add it to the result outside of the loop 
(and `--Limit`) to save one iteration?



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:54
+// should be just skipped.
+if (!static_cast(URI)) {
+  // Ignore the error.

nit: just `if (!URI)`



Comment at: clang-tools-extra/clangd/index/dex/Token.h:64
+/// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
+/// and all its parents.
+PathURI,

nit: not necessarily *all* parents right?



Comment at: clang-tools-extra/clangd/index/dex/Token.h:65
+/// and all its parents.
+PathURI,
 /// Internal Token type for invalid/special tokens, e.g. empty tokens for

Maybe just `URI`? Or `ProximityURI`?



Comment at: clang-tools-extra/clangd/index/dex/Token.h:96
+/// Should be used within the index build process.
+std::vector generateProximityPathURIs(llvm::StringRef Path);
+

nit: s/Path/URIStr/ for the argument.
nit: Just `generateProximityURIs`? Same below.



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:443
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(generateQueryProximityPathURIs(testRoot() + "/a/b/c/d/e/f/g.h",
+ URISchemes),

This doesn't seem to work on windows?



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:620
 
+TEST(DexIndexTest, ProximityPathsBoosting) {
+  DexIndex I(URISchemes);

It's unclear what this is testing. Intuitively, you would want a smoke test 
that checks a symbol with better proximity is ranked higher (when all other 
signals are the same).



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:653
+  // FuzzyFind request comes from the file which is far from the root.
+  Req.ProximityPaths.push_back(testRoot() + "/a/b/c/d/e/f/file.h");
+

again, watch out for windows when hard-coding paths :)



Comment at: clang-tools-extra/unittests/clangd/TestFS.cpp:67
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32

Why this change?


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-03 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163680.
kbobyrev marked 17 inline comments as done.
kbobyrev added a comment.

Address a round of comments, refactor code.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/;
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::PathURI);
+}
+
+testing::Matcher>
+proximityPathsAre(std::initializer_list ProximityPaths) {
+  std::vector Result;
+  for (const auto &Path : ProximityPaths) {
+Result.emplace_back(Token::Kind::PathURI, Path);
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,8 +430,27 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPathURIs(
+  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+  pathsAre({"unittest:///clang-tools-extra/clangd/index/Token.h",
+"unittest:///clang-tools-extra/clangd/index",
+"unittest:///clang-tools-extra/clangd",
+"unittest:///clang-tools-extra", "unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(generateQueryProximityPathURIs(testRoot() + "/a/b/c/d/e/f/g.h",
+ URISchemes),
+  proximityPathsAre({{"unittest:///a/b/c/d/e/f/g.h"},
+ {"unittest:///a/b/c/d/e/f"},
+ {"unittest:///a/b/c/d/e"},
+ {"unittest:///a/b/c/d"},
+ {"unittest:///a/b/c"}}));
+}
+
 TEST(DexIndex, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -419,7 +461,7 @@
 }
 
 TEST(DexIndex, FuzzyFind) {
-  DexIndex Index;
+  DexIndex Index(URISchemes);
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
"other::ABC", "other::A"}));
   FuzzyFindRequest Req;
@@ -442,7 +484,7 @@
 }
 
 TEST(DexIndexTest, FuzzyMatchQ) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(
   generateSymbols({"LaughingOutLoud", "LionPopulation

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-03 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163667.
kbobyrev marked 2 inline comments as done.
kbobyrev edited the summary of this revision.
kbobyrev added a comment.

Rebase on top of `master`, s/Path/PathURI/g to be more explicit about the token 
contents and origin.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/;
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::PathURI);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::PathURI, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,8 +430,28 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"unittest:///clang-tools-extra/clangd/index/dex",
+"unittest:///clang-tools-extra/clangd/index",
+"unittest:///clang-tools-extra/clangd",
+"unittest:///clang-tools-extra",
+"unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+  generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"unittest:///a/b/c/d/e/f", 0},
+ {"unittest:///a/b/c/d/e", 1},
+ {"unittest:///a/b/c/d", 2},
+ {"unittest:///a/b/c", 3},
+ {"unittest:///a/b", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -419,9 +462,10 @@
 }
 
 TEST(DexIndex, FuzzyFind) {
-  DexIndex Index;
+  DexIndex Index(URISchemes);
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"})
+  );
   FuzzyFindRequest Req;
   Req.Query = "ABC";
 

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-09-03 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:36
+Result.push_back(PathToken);
+  }
   return Result;

nit: no need for braces. [llvm-style]



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:126
+std::vector> BoostingIterators;
+// This should generate proximity path boosting iterators for each 
different
+// URI Scheme the Index is aware of.

I thought we wanted to take the first scheme that works, given that 
`URISchemes` is sorted by preference? 



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:137
+  BoostingIterators.push_back(
+  createBoost(create(It->second), PARENTS_TO_BOOST + 1 - 
P.second));
+  }

`PARENTS_TO_BOOST + 1 - P.second` seems to be too much boosting. We should 
align the boosting function with the one we use in clangd/Quality.cpp, e.g. 
proximity score should be would be in the range [0, 1], and we can boost by 
`1+proximity`. 



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:161
 
+// Sort items using boosting score as the key.
+// FIXME(kbobyrev): Consider merging this stage with the next one? This is

nit: in general, this should be a combination of relevance score (e.g. 
proximity) and quality score (e.g. #references).



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:169
+  const std::pair &RHS) {
+return SymbolQuality.find((*Symbols)[LHS.first])->second *
+   LHS.second >

nit: `SymbolQuality[(*Symbols)[LHS.first]]`?

For performance reason, could we instead make `SymbolQuality` a vector indexed 
by `DocID`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:42
 public:
+  DexIndex(llvm::ArrayRef URISchemes) : URISchemes(URISchemes) {}
+

nit: explicit?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:42
 public:
+  DexIndex(llvm::ArrayRef URISchemes) : URISchemes(URISchemes) {}
+

ioeric wrote:
> nit: explicit?
Add documentation about `URISchemes`? 



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:18
+
+std::vector generateProximityPaths(llvm::StringRef URIPath,
+  size_t Limit) {

Add `FIXME` mentioning that the parsing and encoding here are not necessary and 
could potentially be optimized. 



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:28
+  "Non-empty argument of generateProximityPaths() should be a valid URI.");
+  const auto Scheme = ParsedURI.get().scheme();
+  const auto Authority = ParsedURI.get().authority();

nit: `ParsedURI->scheme();`



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:32
+  // Get parent directory of the file with symbol declaration.
+  Path = llvm::sys::path::parent_path(Path);
+  while (!Path.empty() && Limit--) {

I think you need to set `posix` style explicitly here. On windows, the 
separator is '/'.



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:32
+  // Get parent directory of the file with symbol declaration.
+  Path = llvm::sys::path::parent_path(Path);
+  while (!Path.empty() && Limit--) {

ioeric wrote:
> I think you need to set `posix` style explicitly here. On windows, the 
> separator is '/'.
I think the original path should also be used as a proximity path.



Comment at: clang-tools-extra/clangd/index/dex/Token.cpp:51
+// should be just skipped.
+if (!static_cast(URI)) {
+  // Ignore the error.

You could use `llvm::consumeError(URI.takeError())`.



Comment at: clang-tools-extra/clangd/index/dex/Token.h:45
+  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing 
raw
+  // strings.
   enum class Kind {

nit: For example,  (URI is an good example I think)



Comment at: clang-tools-extra/clangd/index/dex/Token.h:58
+///
+/// Data stores URI the parent directory of symbol declaration location.
+///

Do we also consider the original URI as a proximity path? 



Comment at: clang-tools-extra/clangd/index/dex/Token.h:94
+///
+/// When Limit is set, returns no more than Limit tokens.
+std::vector

nit: also explain how Limit is used? e.g. closer parent directories are 
preferred?



Comment at: clang-tools-extra/clangd/index/dex/Token.h:96
+std::vector
+generateProximityPaths(llvm::StringRef Path,
+   size_t Limit = std::numeric_limits::max());

As we are assuming that `Path` is an URI. We should be more explicit in the 
function names.




[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-31 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/Token.h:54
 Scope,
+/// Path to symbol declaration.
+///

ioeric wrote:
> As this is called `Path`, I'd try to decouple it from URIs, so that we don't 
> need special handling of `scheme` etc in the implementation. We might want to 
> canonicalize URI to "path" like `/file:/path/to/something/` so that we could 
> simply treat it as normal paths, and the token could potentially handle 
> actual actual file paths.
Should probably rename it to URI and be more explicit about its nature.


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-31 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163538.
kbobyrev marked an inline comment as not done.
kbobyrev added a comment.

Fix tests


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,8 +430,28 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"unittest:///clang-tools-extra/clangd/index/dex",
+"unittest:///clang-tools-extra/clangd/index",
+"unittest:///clang-tools-extra/clangd",
+"unittest:///clang-tools-extra",
+"unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+  generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"unittest:///a/b/c/d/e/f", 0},
+ {"unittest:///a/b/c/d/e", 1},
+ {"unittest:///a/b/c/d", 2},
+ {"unittest:///a/b/c", 3},
+ {"unittest:///a/b", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -419,9 +462,10 @@
 }
 
 TEST(DexIndex, FuzzyFind) {
-  DexIndex Index;
+  DexIndex Index(URISchemes);
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"})
+  );
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -442,7 +486,7 @@
 }
 
 TEST(DexIndexTest, FuzzyMatchQ) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(
   generateSymbols(

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-31 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163537.
kbobyrev marked 14 inline comments as done.

https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,8 +430,28 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"unittest:///clang-tools-extra/clangd/index/dex/",
+"unittest:///clang-tools-extra/clangd/index/",
+"unittest:///clang-tools-extra/clangd/",
+"unittest:///clang-tools-extra/",
+"unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+  generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"unittest:///a/b/c/d/e/f/", 0},
+ {"unittest:///a/b/c/d/e/", 1},
+ {"unittest:///a/b/c/d/", 2},
+ {"unittest:///a/b/c/", 3},
+ {"unittest:///a/b/", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
@@ -419,9 +462,10 @@
 }
 
 TEST(DexIndex, FuzzyFind) {
-  DexIndex Index;
+  DexIndex Index(URISchemes);
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"})
+  );
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -442,7 +486,7 @@
 }
 
 TEST(DexIndexTest, FuzzyMatchQ) {
-  DexIndex I;
+  DexIndex I(URISchemes);
   I.build(
   generateSymbols({"LaughingOutLoud", "LionPopulat

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-31 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:137
+  BoostingIterators.push_back(
+  createBoost(create(It->second), P.second + 10));
+  }

Could you comment on `P.second + 10` here? It sounds like a lot of boost.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:157
+const size_t ItemsToRetrieve =
+getRetrievalItemsMultiplier() * Req.MaxCandidateCount;
 auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);

Again, the multiplier change seems irrelevant in this patch.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.cpp:163
+// Sort items using boosting score as the key.
+std::sort(begin(SymbolDocIDs), end(SymbolDocIDs),
+  [](const std::pair &LHS,

Shouldn't we sort them by `Quality * boost`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:64
 
-private:
+  /// For fuzzyFind() Dex retrieves getRetrievalItemsMultiplier() more items
+  /// than requested via Req.MaxCandidateCount in the first stage of filtering.

kbobyrev wrote:
> ioeric wrote:
> > Why are values of multipliers interesting to users? Could these be 
> > implementation details in the cpp file?
> Actually, my understanding is that users might want to have full access to 
> the multipliers at some point to control the performance/quality ratio.
> 
> And it's also useful for the tests: otherwise the last one would have to 
> hard-code number of generated symbols to ensure only boosted ones are in the 
> returned list. It would have to be updated each time these internal 
> multipliers are and we might update them often/make logic less clear (by 
> allowing users to control these parameters).
> 
> What do you think?
I'm not sure if users can usefully control this multipliers without 
understanding the whole implementation.  Do we have a use case for these APIs 
now? If not, I'd suggesting removing them from this patch. It also seems to be 
out of the scope of this patch.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:73
+private:
+private:
   mutable std::mutex Mutex;

Double `private`?



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:87
+
+  std::vector URISchemes /*GUARDED BY(Mutex)*/;
+

I think `URISchemes` should be initialized when creating a `DexIndex` and 
remain the same. So this could be `const` without mutex guard.



Comment at: clang-tools-extra/clangd/index/dex/Token.h:54
 Scope,
+/// Path to symbol declaration.
+///

As this is called `Path`, I'd try to decouple it from URIs, so that we don't 
need special handling of `scheme` etc in the implementation. We might want to 
canonicalize URI to "path" like `/file:/path/to/something/` so that we could 
simply treat it as normal paths, and the token could potentially handle actual 
actual file paths.



Comment at: clang-tools-extra/clangd/index/dex/Token.h:97
 
+/// Returns Search Token for each parent directory of given Path. Should be 
used
+/// within the index build process.

I couldn't find implementations of these two functions in this patch?


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-31 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163507.
kbobyrev added a comment.

Canonicalize URIs, slightly simplify code structure.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,8 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,9 +430,29 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"unittest:clang-tools-extra/clangd/index/dex/",
+"unittest:clang-tools-extra/clangd/index/",
+"unittest:clang-tools-extra/clangd/",
+"unittest:clang-tools-extra/",
+"unittest:"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(
+  generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"unittest:a/b/c/d/e/f/", 0},
+ {"unittest:a/b/c/d/e/", 1},
+ {"unittest:a/b/c/d/", 2},
+ {"unittest:a/b/c/", 3},
+ {"unittest:a/b/", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +464,8 @@
 TEST(DexIndex, FuzzyFind) {
   DexIndex Index;
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -444,7 +488,8 @@
 TEST(DexIndexTest, FuzzyMatchQ) {
   DexIndex I;
   I.build(
-  generateSymbols({"LaughingOutLoud", "LionPopulat

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:45
+  void build(std::shared_ptr> Symbols,
+ llvm::ArrayRef URISchemes);
 

sammccall wrote:
> ioeric wrote:
> > URI schemes are property of `Symbols`. We shouldn't need to pass specify 
> > the schemes again. Dex can collect all possible URI schemes when building 
> > the index.
> > 
> > I think we could generate proximity paths for index symbols without knowing 
> > about schemes (when building the index). For example, we could 
> > `canonicalize` the URI (as in `FileDistance.cpp`), for example, by 
> > converting `test:///x/y/z` to `/test:/x/y/z`.  For a query, we would first 
> > convert query proximity paths to URIs (of all possible schemes), perform 
> > the same canonicalization on URIs, and then use the canonicalized paths to 
> > compute proximity.
> We had an offline discussion about URIs which might be interesting.
> 
> Fetching posting lists for all URI schemes at query time seems wasteful, 
> @ilya-biryukov pointed out that in practice each file is going to exist in 
> some preferred URI space, and we should only compare that one.
> The only obstacle to doing that is that the preference order for URI schemes 
> is not global, each place we need it it's accepted as configuration.
> 
> Maybe we should change that: as part of registering the URI schemes, we 
> define a preferred order. And instead of the current operation `makeURI(File, 
> scheme)` we should just offer APIs like `makePreferredURI(File)` and 
> `makeFileURI(File)`.
I also thought about extracting schemes during the build stage earlier today, 
couldn't figure out whether it's a good thing: it's probably tiny overhead for 
the build stage (if we just pass schemes the server is aware of we avoid 
looking for new `scheme:` in the prefixes), but it might be worth considering 
the architecture (and the fact that even though the server might be aware of 
many schemes, some of them might not occur in the index and that would induce 
small overhead on the query side).

Could you please elaborate your suggestion about the `canonicalize`? I'm not 
sure I caught that: IIUC the current approach (which doesn't `canonicalize` 
URIs) should be sufficient for our needs. I'm not sure why we would want 
`/test:/x/y/z/` over `test:///x/y/z/` if the extracted tokens are `test:///`, 
`test:///x/`, `test:///x/y/`, `test:///x/y/z/` (which is the case right now).



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:64
 
-private:
+  /// For fuzzyFind() Dex retrieves getRetrievalItemsMultiplier() more items
+  /// than requested via Req.MaxCandidateCount in the first stage of filtering.

ioeric wrote:
> Why are values of multipliers interesting to users? Could these be 
> implementation details in the cpp file?
Actually, my understanding is that users might want to have full access to the 
multipliers at some point to control the performance/quality ratio.

And it's also useful for the tests: otherwise the last one would have to 
hard-code number of generated symbols to ensure only boosted ones are in the 
returned list. It would have to be updated each time these internal multipliers 
are and we might update them often/make logic less clear (by allowing users to 
control these parameters).

What do you think?


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163332.
kbobyrev marked 2 inline comments as done.
kbobyrev added a comment.

Address couple of comments.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/TestFS.cpp
  clang-tools-extra/unittests/clangd/TestFS.h

Index: clang-tools-extra/unittests/clangd/TestFS.h
===
--- clang-tools-extra/unittests/clangd/TestFS.h
+++ clang-tools-extra/unittests/clangd/TestFS.h
@@ -59,7 +59,7 @@
 };
 
 // Returns an absolute (fake) test directory for this OS.
-const char *testRoot();
+const std::string testRoot();
 
 // Returns a suitable absolute path for this OS.
 std::string testPath(PathRef File);
Index: clang-tools-extra/unittests/clangd/TestFS.cpp
===
--- clang-tools-extra/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/unittests/clangd/TestFS.cpp
@@ -64,7 +64,7 @@
   FileName, std::move(CommandLine), "")};
 }
 
-const char *testRoot() {
+const std::string testRoot() {
 #ifdef _WIN32
   return "C:\\clangd-test";
 #else
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,8 +7,10 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
 #include "TestIndex.h"
 #include "index/Index.h"
+#include "TestFS.h"
 #include "index/Merge.h"
 #include "index/dex/DexIndex.h"
 #include "index/dex/Iterator.h"
@@ -29,6 +31,8 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"unittest"};
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -325,14 +329,33 @@
 }
 
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
@@ -407,9 +430,27 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "unittest:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"unittest:///clang-tools-extra/clangd/index/dex/",
+"unittest:///clang-tools-extra/clangd/index/",
+"unittest:///clang-tools-extra/clangd/",
+"unittest:///clang-tools-extra/", "unittest:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityDistances) {
+  EXPECT_THAT(generateQueryProximityPaths(testRoot() + "/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"unittest:///a/b/c/d/e/f/", 0},
+ {"unittest:///a/b/c/d/e/", 1},
+ {"unittest:///a/b/c/d/", 2},
+ {"unittest:///a/b/c/", 3},
+ {"unittest:///a/b/", 4}}));
+}
+
 TEST(DexIndex, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +462,8 @@
 TEST(DexIndex, FuzzyFind) {
   DexIndex Index;
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   R

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:45
+  void build(std::shared_ptr> Symbols,
+ llvm::ArrayRef URISchemes);
 

sammccall wrote:
> ioeric wrote:
> > URI schemes are property of `Symbols`. We shouldn't need to pass specify 
> > the schemes again. Dex can collect all possible URI schemes when building 
> > the index.
> > 
> > I think we could generate proximity paths for index symbols without knowing 
> > about schemes (when building the index). For example, we could 
> > `canonicalize` the URI (as in `FileDistance.cpp`), for example, by 
> > converting `test:///x/y/z` to `/test:/x/y/z`.  For a query, we would first 
> > convert query proximity paths to URIs (of all possible schemes), perform 
> > the same canonicalization on URIs, and then use the canonicalized paths to 
> > compute proximity.
> We had an offline discussion about URIs which might be interesting.
> 
> Fetching posting lists for all URI schemes at query time seems wasteful, 
> @ilya-biryukov pointed out that in practice each file is going to exist in 
> some preferred URI space, and we should only compare that one.
> The only obstacle to doing that is that the preference order for URI schemes 
> is not global, each place we need it it's accepted as configuration.
> 
> Maybe we should change that: as part of registering the URI schemes, we 
> define a preferred order. And instead of the current operation `makeURI(File, 
> scheme)` we should just offer APIs like `makePreferredURI(File)` and 
> `makeFileURI(File)`.
That sounds like a good idea. 

We just need to be careful that indexes do not use non-preferred schemes (e.g. 
standalone indexer doesn't have the preferred scheme registered). This 
shouldn't be a problem in practice though.


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:45
+  void build(std::shared_ptr> Symbols,
+ llvm::ArrayRef URISchemes);
 

ioeric wrote:
> URI schemes are property of `Symbols`. We shouldn't need to pass specify the 
> schemes again. Dex can collect all possible URI schemes when building the 
> index.
> 
> I think we could generate proximity paths for index symbols without knowing 
> about schemes (when building the index). For example, we could `canonicalize` 
> the URI (as in `FileDistance.cpp`), for example, by converting 
> `test:///x/y/z` to `/test:/x/y/z`.  For a query, we would first convert query 
> proximity paths to URIs (of all possible schemes), perform the same 
> canonicalization on URIs, and then use the canonicalized paths to compute 
> proximity.
We had an offline discussion about URIs which might be interesting.

Fetching posting lists for all URI schemes at query time seems wasteful, 
@ilya-biryukov pointed out that in practice each file is going to exist in some 
preferred URI space, and we should only compare that one.
The only obstacle to doing that is that the preference order for URI schemes is 
not global, each place we need it it's accepted as configuration.

Maybe we should change that: as part of registering the URI schemes, we define 
a preferred order. And instead of the current operation `makeURI(File, scheme)` 
we should just offer APIs like `makePreferredURI(File)` and `makeFileURI(File)`.


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Eric Liu via Phabricator via cfe-commits
ioeric added a comment.

Some high-level comments :)




Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:45
+  void build(std::shared_ptr> Symbols,
+ llvm::ArrayRef URISchemes);
 

URI schemes are property of `Symbols`. We shouldn't need to pass specify the 
schemes again. Dex can collect all possible URI schemes when building the index.

I think we could generate proximity paths for index symbols without knowing 
about schemes (when building the index). For example, we could `canonicalize` 
the URI (as in `FileDistance.cpp`), for example, by converting `test:///x/y/z` 
to `/test:/x/y/z`.  For a query, we would first convert query proximity paths 
to URIs (of all possible schemes), perform the same canonicalization on URIs, 
and then use the canonicalized paths to compute proximity.



Comment at: clang-tools-extra/clangd/index/dex/DexIndex.h:64
 
-private:
+  /// For fuzzyFind() Dex retrieves getRetrievalItemsMultiplier() more items
+  /// than requested via Req.MaxCandidateCount in the first stage of filtering.

Why are values of multipliers interesting to users? Could these be 
implementation details in the cpp file?



Comment at: clang-tools-extra/clangd/index/dex/Token.h:99
+/// Boost starts with Count and decreases by 1 for each parent directory token.
+std::vector>
+generateQueryProximityPaths(llvm::StringRef AbsolutePath,

Note that `boost != up traversal distance` in most cases. We could return the 
distance here and let users decide what the exact boost score is.



Comment at: clang-tools-extra/unittests/clangd/DexIndexTests.cpp:33
 
+std::vector URISchemes = {"file"};
+

Use `unittest` scheme defined in TestFS.cpp


https://reviews.llvm.org/D51481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163294.
kbobyrev added a comment.

Stop query generation as soon as one valid URI scheme was found.


https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,7 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +30,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"file"};
+
+//===--===//
+// Query Iterators tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -324,16 +331,39 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search Token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
-TEST(DexIndexTrigrams, IdentifierTrigrams) {
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
+TEST(DexSearchTokens, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
 
@@ -374,7 +404,7 @@
"hkl", "ijk", "ikl", "jkl", "klm", "ab$", "ad$"}));
 }
 
-TEST(DexIndexTrigrams, QueryTrigrams) {
+TEST(DexSearchTokens, QueryTrigrams) {
   EXPECT_THAT(generateQueryTrigrams("c"), trigramsAre({"c$$"}));
   EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl$"}));
   EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"}));
@@ -407,9 +437,31 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "file:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"file:///clang-tools-extra/clangd/index/dex/",
+"file:///clang-tools-extra/clangd/index/",
+"file:///clang-tools-extra/clangd/",
+"file:///clang-tools-extra/", "file:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityPaths) {
+  EXPECT_THAT(generateQueryProximityPaths("/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"file:///a/b/c/d/e/f/", 6.},
+ {"file:///a/b/c/d/e/", 5.},
+ {"file:///a/b/c/d/", 4.},
+ {"file:///a/b/c/", 3.},
+ {"file:///a/b/", 2.}}));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +473,8 @@
 TEST(DexIndex, FuzzyFind) {
   DexIndex Index;
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -444,7 +497,8 @@
 

[PATCH] D51481: [clangd] Implement proximity path boosting for Dex

2018-08-30 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev created this revision.
kbobyrev added reviewers: ioeric, ilya-biryukov, sammccall.
kbobyrev added a project: clang-tools-extra.
Herald added subscribers: kadircet, arphaman, mgrang, jkorous, MaskRay, mgorny.

https://reviews.llvm.org/D51481

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,7 @@
 //
 //===--===//
 
+#include "FuzzyMatch.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -29,6 +30,12 @@
 namespace dex {
 namespace {
 
+std::vector URISchemes = {"file"};
+
+//===--===//
+// Query Iterators tests.
+//===--===//
+
 std::vector consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector IDs(IDAndScore.size());
@@ -324,16 +331,39 @@
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===--===//
+// Search Token tests.
+//===--===//
+
 testing::Matcher>
-trigramsAre(std::initializer_list Trigrams) {
+tokensAre(std::initializer_list Strings, Token::Kind Kind) {
   std::vector Tokens;
-  for (const auto &Symbols : Trigrams) {
-Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
-TEST(DexIndexTrigrams, IdentifierTrigrams) {
+testing::Matcher>
+trigramsAre(std::initializer_list Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+testing::Matcher>
+pathsAre(std::initializer_list Paths) {
+  return tokensAre(Paths, Token::Kind::Path);
+}
+
+testing::Matcher>> proximityPathsAre(
+std::initializer_list> ProximityPaths) {
+  std::vector> Result;
+  for (const auto &P : ProximityPaths) {
+Result.push_back({Token(Token::Kind::Path, P.first), P.second});
+  }
+  return testing::UnorderedElementsAreArray(Result);
+}
+
+TEST(DexSearchTokens, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
   trigramsAre({"x86", "x$$", "x8$"}));
 
@@ -374,7 +404,7 @@
"hkl", "ijk", "ikl", "jkl", "klm", "ab$", "ad$"}));
 }
 
-TEST(DexIndexTrigrams, QueryTrigrams) {
+TEST(DexSearchTokens, QueryTrigrams) {
   EXPECT_THAT(generateQueryTrigrams("c"), trigramsAre({"c$$"}));
   EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl$"}));
   EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"}));
@@ -407,9 +437,31 @@
"hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityPaths(
+  "file:///clang-tools-extra/clangd/index/dex/Token.h"),
+  pathsAre({"file:///clang-tools-extra/clangd/index/dex/",
+"file:///clang-tools-extra/clangd/index/",
+"file:///clang-tools-extra/clangd/",
+"file:///clang-tools-extra/", "file:///"}));
+}
+
+TEST(DexSearchTokens, QueryProximityPaths) {
+  EXPECT_THAT(generateQueryProximityPaths("/a/b/c/d/e/f/g.h", URISchemes),
+  proximityPathsAre({{"file:///a/b/c/d/e/f/", 6.},
+ {"file:///a/b/c/d/e/", 5.},
+ {"file:///a/b/c/d/", 4.},
+ {"file:///a/b/c/", 3.},
+ {"file:///a/b/", 2.}}));
+}
+
+//===--===//
+// Index tests.
+//===--===//
+
 TEST(DexIndex, Lookup) {
   DexIndex I;
-  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
   UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +473,8 @@
 TEST(DexIndex, FuzzyFind) {
   DexIndex Index;
   Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-   "other::ABC", "other::A"}));
+   "other::ABC", "other::A"}),
+  URISchemes);
   FuzzyFindReque