[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-12 Thread Kirill Bobyrev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL342025: [clangd] Implement a Proof-of-Concept tool for 
symbol index exploration (authored by omtcyfz, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D51628?vs=165016&id=165017#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/trunk/clangd/CMakeLists.txt
  clang-tools-extra/trunk/clangd/index/dex/dexp/CMakeLists.txt
  clang-tools-extra/trunk/clangd/index/dex/dexp/Dexp.cpp

Index: clang-tools-extra/trunk/clangd/index/dex/dexp/Dexp.cpp
===
--- clang-tools-extra/trunk/clangd/index/dex/dexp/Dexp.cpp
+++ clang-tools-extra/trunk/clangd/index/dex/dexp/Dexp.cpp
@@ -0,0 +1,161 @@
+//===--- Dexp.cpp - Dex EXPloration tool *- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../../../index/SymbolYAML.h"
+#include "../Dex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+
+using clang::clangd::FuzzyFindRequest;
+using clang::clangd::loadIndex;
+using clang::clangd::Symbol;
+using clang::clangd::SymbolIndex;
+using llvm::StringRef;
+
+namespace {
+
+llvm::cl::opt
+SymbolCollection("symbol-collection-file",
+ llvm::cl::desc("Path to the file with symbol collection"),
+ llvm::cl::Positional, llvm::cl::Required);
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+
+Type use "help" request to get information about the details.
+)";
+
+void reportTime(StringRef Name, llvm::function_ref F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  F();
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  const auto Duration = std::chrono::duration_cast(
+  TimerStop - TimerStart);
+  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
+}
+
+void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {
+  FuzzyFindRequest Request;
+  Request.MaxCandidateCount = 10;
+  Request.Query = UnqualifiedName;
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
+"Symbol Name");
+  size_t Rank = 0;
+  Index.fuzzyFind(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
+  });
+}
+
+static const std::string HelpMessage = R"(dexp commands:
+
+> find Name
+
+Constructs fuzzy find request given unqualified symbol name and returns top 10
+symbols retrieved from index.
+
+> lookup SymbolID
+
+Retrieves symbol names given USR.
+)";
+
+void help() { llvm::outs() << HelpMessage; }
+
+void lookup(StringRef USR, const SymbolIndex &Index) {
+  llvm::DenseSet IDs{clang::clangd::SymbolID{USR}};
+  clang::clangd::LookupRequest Request{IDs};
+  bool FoundSymbol = false;
+  Index.lookup(Request, [&](const Symbol &Sym) {
+if (!FoundSymbol)
+  FoundSymbol = true;
+llvm::outs() << SymbolToYAML(Sym);
+  });
+  if (!FoundSymbol)
+llvm::outs() << "not found\n";
+}
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * find symbol references: print set of reference locations
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+void dispatch(StringRef Request, const SymbolIndex &Index) {
+  llvm::SmallVector Arguments;
+  Request.split(Arguments, ' ');
+  if (Arguments.emp

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-12 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 165016.
kbobyrev marked an inline comment as done.
kbobyrev added a comment.

Remove artifact comment.


https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp

Index: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
@@ -0,0 +1,161 @@
+//===--- Dexp.cpp - Dex EXPloration tool *- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../../../index/SymbolYAML.h"
+#include "../Dex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+
+using clang::clangd::FuzzyFindRequest;
+using clang::clangd::loadIndex;
+using clang::clangd::Symbol;
+using clang::clangd::SymbolIndex;
+using llvm::StringRef;
+
+namespace {
+
+llvm::cl::opt
+SymbolCollection("symbol-collection-file",
+ llvm::cl::desc("Path to the file with symbol collection"),
+ llvm::cl::Positional, llvm::cl::Required);
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+
+Type use "help" request to get information about the details.
+)";
+
+void reportTime(StringRef Name, llvm::function_ref F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  F();
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  const auto Duration = std::chrono::duration_cast(
+  TimerStop - TimerStart);
+  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
+}
+
+void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {
+  FuzzyFindRequest Request;
+  Request.MaxCandidateCount = 10;
+  Request.Query = UnqualifiedName;
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
+"Symbol Name");
+  size_t Rank = 0;
+  Index.fuzzyFind(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
+  });
+}
+
+static const std::string HelpMessage = R"(dexp commands:
+
+> find Name
+
+Constructs fuzzy find request given unqualified symbol name and returns top 10
+symbols retrieved from index.
+
+> lookup SymbolID
+
+Retrieves symbol names given USR.
+)";
+
+void help() { llvm::outs() << HelpMessage; }
+
+void lookup(StringRef USR, const SymbolIndex &Index) {
+  llvm::DenseSet IDs{clang::clangd::SymbolID{USR}};
+  clang::clangd::LookupRequest Request{IDs};
+  bool FoundSymbol = false;
+  Index.lookup(Request, [&](const Symbol &Sym) {
+if (!FoundSymbol)
+  FoundSymbol = true;
+llvm::outs() << SymbolToYAML(Sym);
+  });
+  if (!FoundSymbol)
+llvm::outs() << "not found\n";
+}
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * find symbol references: print set of reference locations
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+void dispatch(StringRef Request, const SymbolIndex &Index) {
+  llvm::SmallVector Arguments;
+  Request.split(Arguments, ' ');
+  if (Arguments.empty()) {
+llvm::outs() << "Request can not be empty.\n";
+help();
+return;
+  }
+
+  if (Arguments.front() == "find") {
+if (Arguments.size() != 2) {
+  llvm::outs() << "find request must specify unqualified symbol name.\n";
+  return;
+}
+reportTime("fuzzy fin

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:55
+
+// llvm::formatv string pattern for pretty-printing symbols.
+void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {

Is this a leftover from previous changes?


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164893.
kbobyrev marked 9 inline comments as done.
kbobyrev added a comment.

Address comments


https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp

Index: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
@@ -0,0 +1,162 @@
+//===--- Dexp.cpp - Dex EXPloration tool *- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../../../index/SymbolYAML.h"
+#include "../Dex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+
+using clang::clangd::FuzzyFindRequest;
+using clang::clangd::loadIndex;
+using clang::clangd::Symbol;
+using clang::clangd::SymbolIndex;
+using llvm::StringRef;
+
+namespace {
+
+llvm::cl::opt
+SymbolCollection("symbol-collection-file",
+ llvm::cl::desc("Path to the file with symbol collection"),
+ llvm::cl::Positional, llvm::cl::Required);
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+
+Type use "help" request to get information about the details.
+)";
+
+void reportTime(StringRef Name, llvm::function_ref F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  F();
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  const auto Duration = std::chrono::duration_cast(
+  TimerStop - TimerStart);
+  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
+}
+
+// llvm::formatv string pattern for pretty-printing symbols.
+void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {
+  FuzzyFindRequest Request;
+  Request.MaxCandidateCount = 10;
+  Request.Query = UnqualifiedName;
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
+"Symbol Name");
+  size_t Rank = 0;
+  Index.fuzzyFind(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
+  });
+}
+
+static const std::string HelpMessage = R"(dexp commands:
+
+> find Name
+
+Constructs fuzzy find request given unqualified symbol name and returns top 10
+symbols retrieved from index.
+
+> lookup SymbolID
+
+Retrieves symbol names given USR.
+)";
+
+void help() { llvm::outs() << HelpMessage; }
+
+void lookup(StringRef USR, const SymbolIndex &Index) {
+  llvm::DenseSet IDs{clang::clangd::SymbolID{USR}};
+  clang::clangd::LookupRequest Request{IDs};
+  bool FoundSymbol = false;
+  Index.lookup(Request, [&](const Symbol &Sym) {
+if (!FoundSymbol)
+  FoundSymbol = true;
+llvm::outs() << SymbolToYAML(Sym);
+  });
+  if (!FoundSymbol)
+llvm::outs() << "not found\n";
+}
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * find symbol references: print set of reference locations
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+void dispatch(StringRef Request, const SymbolIndex &Index) {
+  llvm::SmallVector Arguments;
+  Request.split(Arguments, ' ');
+  if (Arguments.empty()) {
+llvm::outs() << "Request can not be empty.\n";
+help();
+return;
+  }
+
+  if (Arguments.front() == "find") {
+if (Arguments.size() != 2) {
+  llvm::outs() << "find request must specify unqualified symbol name

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt:10
+  Dexp.cpp
+  )
+

Should we indent closing parens to the opening one or keep at the start of the 
line?
Let's pick one style and be consistent (the best option is being consistent 
with the rest of LLVM/clangd)



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:46
+
+template  void reportTime(StringRef Name, Function F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();

NIT: Now that we ignore the return value, we could even remove templates:
```
void reportTime(StringRef Name, llvm::function_ref F);
```


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

Really just details now.




Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:56
+// llvm::formatv string pattern for pretty-printing symbols.
+static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
+

(I don't think you want to share this between fuzzyfind and lookup, see below)



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:62
+  Request.Query = UnqualifiedName;
+  std::vector Symbols;
+  llvm::outs() << '\n';

unused



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:63
+  std::vector Symbols;
+  llvm::outs() << '\n';
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.

why? (and below)

If you want all commands to be separated by a  blank line, the dispatcher can 
do it



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:64
+  llvm::outs() << '\n';
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",

you have this comment twice



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:90
+void lookup(StringRef USR, const SymbolIndex &Index) {
+  llvm::DenseSet IDs{clang::clangd::SymbolID{USR}};
+  clang::clangd::LookupRequest Request{IDs};

note that you're looking up one ID so you'll always get 0 or 1 result



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:95
+"Symbol Name");
+  size_t Rank = 0;
+  Index.lookup(Request, [&](const Symbol &Sym) {

Rank is never meaningful for lookup, especially when only looking up one id



Comment at: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp:97
+  Index.lookup(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), 
Sym.Name);
+  });

this isn't enough detail to be really useful. I'd suggest dumping the whole 
symbol as YAML, or printing 'not found' if no result.



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:125
+// * print out tokens with least dense posting lists
+void dispatchRequest(const std::unique_ptr &Index,
+ StringRef Request) {

sammccall wrote:
> ilya-biryukov wrote:
> > This function does too many things:
> > - parses the input.
> > - dispatches on different kinds of requests.
> > - handles user input errors.
> > - actually runs the commands.
> > 
> > This turns out somewhat unreadable at the end, we might want to separate 
> > those into multiple functions. Will need a bit of time to think on how to 
> > actually do it best. Will come back after I have concrete suggestions.
> For now can we keep it simple:
>  - this function can split the command from the rest, and pass the rest of 
> the args to `fuzzyFindRequest` etc
>  - if the command is unknown, the dispatcher reports the error
>  - if the command is known, the command handler reports input errors (this 
> could be cleaner/more declarative, but we don't want to build a big framework 
> in this patch)
>  - this function could do the timing - the request construction is negligible 
> and that keeps the command handlers focused on their job
this is not done (dispatchRequest is still doing validation for each command)



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:130
+  if (Arguments.empty()) {
+llvm::outs() << "ERROR: Request can not be empty.\n";
+helpRequest();

sammccall wrote:
> REPLs don't usually make this an error, just return
this is not done



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:180
+
+  llvm::LineEditor LE("dexplorer");
+

dexp?


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164865.
kbobyrev marked 19 inline comments as done.
kbobyrev added a comment.

Address comments.


https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp

Index: clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
@@ -0,0 +1,167 @@
+//===--- Dexp.cpp - Dex EXPloration tool *- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../../../index/SymbolYAML.h"
+#include "../Dex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+
+using clang::clangd::FuzzyFindRequest;
+using clang::clangd::loadIndex;
+using clang::clangd::Symbol;
+using clang::clangd::SymbolIndex;
+using llvm::StringRef;
+
+namespace {
+
+llvm::cl::opt
+SymbolCollection("symbol-collection-file",
+ llvm::cl::desc("Path to the file with symbol collection"),
+ llvm::cl::Positional, llvm::cl::Required);
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+
+Type use "help" request to get information about the details.
+)";
+
+template  void reportTime(StringRef Name, Function F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  F();
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  const auto Duration = std::chrono::duration_cast(
+  TimerStop - TimerStart);
+  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
+}
+
+// llvm::formatv string pattern for pretty-printing symbols.
+static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
+
+void fuzzyFind(llvm::StringRef UnqualifiedName, const SymbolIndex &Index) {
+  FuzzyFindRequest Request;
+  Request.MaxCandidateCount = 10;
+  Request.Query = UnqualifiedName;
+  std::vector Symbols;
+  llvm::outs() << '\n';
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
+"Symbol Name");
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  size_t Rank = 0;
+  Index.fuzzyFind(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
+  });
+  llvm::outs() << '\n';
+}
+
+static const std::string HelpMessage = R"(dexp commands:
+
+> find Name
+
+Constructs fuzzy find request given unqualified symbol name and returns top 10
+symbols retrieved from index.
+
+> lookup SymbolID
+
+Retrieves symbol names given USR.
+)";
+
+void help() { llvm::outs() << HelpMessage; }
+
+void lookup(StringRef USR, const SymbolIndex &Index) {
+  llvm::DenseSet IDs{clang::clangd::SymbolID{USR}};
+  clang::clangd::LookupRequest Request{IDs};
+  llvm::outs() << '\n';
+  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
+"Symbol Name");
+  size_t Rank = 0;
+  Index.lookup(Request, [&](const Symbol &Sym) {
+llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), Sym.Name);
+  });
+  llvm::outs() << '\n';
+}
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * find symbol references: print set of reference locations
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+void dispatch(StringRef Request, const SymbolIndex &Index) {
+  llvm::SmallVector Arguments;
+  Request.split(Arguments, ' ');
+  if (Argument

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:23
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 

why?



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:56
+
+template 
+void reportTime(StringRef Name, Function F) {

ilya-biryukov wrote:
> Why do we want `TimeUnit`?
> It adds complexity, if we want to make large values more readable we have 
> other alternatives:
> - printing adaptive units, e.g. "when it's larger than 5000ms, start printing 
> seconds", "when it's larger than 5minutes, start printing minutes"
> - provide separators (`5ms`  is pretty much unreadable, `500 000 
> 000ms` is much better)
> 
+1 just pick ms or us for now

You can use `formatv({0:ms}, Duration)` to print neatly, no need for the traits.



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:79
+
+static const std::string HelpMessage = R"(
+DExplorer commands:

nit: this starts with a newline



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:82
+
+> fuzzy-find Name
+

nit: just "find" for convenience?



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:87
+
+> lookup-id SymbolID
+

examples use `lookup-id`, code says `lookup`.
(I prefer `lookup` FWIW)



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:90
+Retrieves symbol names given USR.
+
+> help

(Not sure the "help" doc or "press ctrl-d to exit" are particularly useful)



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:100
+
+void lookupRequest(const std::unique_ptr &Index,
+   clang::clangd::LookupRequest &Request) {

SymbolIndex&, you don't need the unique_ptr



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:100
+
+void lookupRequest(const std::unique_ptr &Index,
+   clang::clangd::LookupRequest &Request) {

sammccall wrote:
> SymbolIndex&, you don't need the unique_ptr
nit: lookup (the "request" is the arg)



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:103
+  std::vector Symbols;
+  Index->lookup(Request, [&](const Symbol &S) { Symbols.push_back(S); });
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.

why not print them on-the-fly?
(this isn't safe, the data backing the symbols may not live long enough)



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:106
+  llvm::outs() << "\nRetrieved Symbols:\n";
+  llvm::outs() << "Rank. Symbol Name | Symbol ID\n";
+  for (size_t Rank = 0; Rank < Symbols.size(); ++Rank)

please be consistent in using . or | as separator



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:106
+  llvm::outs() << "\nRetrieved Symbols:\n";
+  llvm::outs() << "Rank. Symbol Name | Symbol ID\n";
+  for (size_t Rank = 0; Rank < Symbols.size(); ++Rank)

sammccall wrote:
> please be consistent in using . or | as separator
I'd suggest putting the ID first, as it's fixed-width so the columns will 
actually be columns



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:125
+// * print out tokens with least dense posting lists
+void dispatchRequest(const std::unique_ptr &Index,
+ StringRef Request) {

ilya-biryukov wrote:
> This function does too many things:
> - parses the input.
> - dispatches on different kinds of requests.
> - handles user input errors.
> - actually runs the commands.
> 
> This turns out somewhat unreadable at the end, we might want to separate 
> those into multiple functions. Will need a bit of time to think on how to 
> actually do it best. Will come back after I have concrete suggestions.
For now can we keep it simple:
 - this function can split the command from the rest, and pass the rest of the 
args to `fuzzyFindRequest` etc
 - if the command is unknown, the dispatcher reports the error
 - if the command is known, the command handler reports input errors (this 
could be cleaner/more declarative, but we don't want to build a big framework 
in this patch)
 - this function could do the timing - the request construction is negligible 
and that keeps the command handlers focused on their job



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:130
+  if (Arguments.empty()) {
+llvm::outs() << "ERROR: Request can not be empty.\n";
+helpRequest();

REPLs don't usually make this an error, just return



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DEx

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added a comment.

Dexplorer is a long name. How about shortening it to `dexp`?




Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:56
+
+template 
+void reportTime(StringRef Name, Function F) {

Why do we want `TimeUnit`?
It adds complexity, if we want to make large values more readable we have other 
alternatives:
- printing adaptive units, e.g. "when it's larger than 5000ms, start printing 
seconds", "when it's larger than 5minutes, start printing minutes"
- provide separators (`5ms`  is pretty much unreadable, `500 000 000ms` 
is much better)




Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:125
+// * print out tokens with least dense posting lists
+void dispatchRequest(const std::unique_ptr &Index,
+ StringRef Request) {

This function does too many things:
- parses the input.
- dispatches on different kinds of requests.
- handles user input errors.
- actually runs the commands.

This turns out somewhat unreadable at the end, we might want to separate those 
into multiple functions. Will need a bit of time to think on how to actually do 
it best. Will come back after I have concrete suggestions.



Comment at: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp:139
+  "symbol name.\n";
+  helpRequest();
+  return;

This produces a ton of output.
Could we simply produce a message to run 'help' to get a list of supported 
commands instead?

It could be really annoying to see large help messages on every typo.


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:39
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.

ilya-biryukov wrote:
> kbobyrev wrote:
> > ilya-biryukov wrote:
> > > Maybe we could expose `CommandLineParser` from 
> > > `llvm/lib/Support/CommandLine.cpp` as a public API and use it here?
> > > Not sure if there are any obstacles to doing so or how much work is it, 
> > > though.
> > > E.g. `cl::opt` seem to rely on being registered in the global parser and 
> > > I'm not sure if there's an easy way out of it.
> > Yes, that might be tricky. I have thought about a few options, e.g. 
> > consuming the first token from the input string as the command and 
> > dispatching arguments parsed via `CommandLineParser` manually (discarding 
> > those which are not accepted by provided command, etc). There are still few 
> > complications: help wouldn't be separate and easily accessible (unless 
> > hardcoded, which is suboptimal).
> > 
> > Another approach I could think of would be simplifying the interface of 
> > each command so that it would be easily parsed:
> > 
> > * `> fuzzy-find request.json`: this would require `FuzzyFindRequest` JSON 
> > (de)serialization implementation, but that would be useful for the 
> > benchmark tool, too
> > * `> lookup-id symbolid`
> > * `> load symbol.yaml`/`swap symbol.yaml`
> > * `> density-hist`
> > * `> tokens-distribution`
> > * `> dense-tokens`
> > * `> sparse-tokens`
> > 
> > And also a generic `> help` for the short reference of each command. Out of 
> > all these commands, only Fuzzy Find request would benefit a lot from a 
> > proper parsing of arguments, having JSON file representation might not be 
> > ideal, but it looks OK for the first iteration for me. Fuzzy Find request 
> > would presumably be one of the most used commands, though, but then again, 
> > we could iterate if that is not really convinient.
> The single-argument and no-arg commands certainly look nice. However, 
> creating a separate file for each fuzzy-find request feels like a pain.
> 
> Have you tried prototyping parsing with `CommandLineParser `? What are the 
> complications to exposing it from LLVM? 
I didn't, but I looked closely at `clang-query` which decided not to do that 
IIUC due to the complications with the `CommandLineParser` and also the 
specifics of the tool and `clang-refactor` which utilizes 
`CommandLineRefactoringOptionVisitor` and `RefactoringActionSubcommand` 
internal implementations, but overall I don't think it looks very simple.

We had a discussion with Eric and Sam, the consensus was that we should start 
with something very simple and iterate on that if we decide that more 
complicated interface is necessary.


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-11 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 164834.
kbobyrev marked 8 inline comments as done.
kbobyrev added a comment.

Address a round of comments; implement a dummy ad-hoc subcommand parser.


https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexplorer/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp

Index: clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/dexplorer/DExplorer.cpp
@@ -0,0 +1,186 @@
+//===--- DExplorer.cpp - Dex Exploration tool ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../../../index/SymbolYAML.h"
+#include "../Dex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 
+
+using clang::clangd::FuzzyFindRequest;
+using clang::clangd::loadIndex;
+using clang::clangd::Symbol;
+using clang::clangd::SymbolIndex;
+using llvm::StringRef;
+
+namespace {
+
+llvm::cl::opt
+SymbolCollection("symbol-collection-file",
+ llvm::cl::desc("Path to the file with symbol collection"),
+ llvm::cl::Positional, llvm::cl::Required);
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+
+Type use "help" request to get information about the details.
+)";
+
+template  std::string unitToString() { return ""; }
+
+template <> std::string unitToString() {
+  return "ms";
+}
+
+template <> std::string unitToString() { return "s"; }
+
+template 
+void reportTime(StringRef Name, Function F) {
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  F();
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  llvm::outs()
+  << Name << " took "
+  << std::chrono::duration_cast(TimerStop - TimerStart).count()
+  << ' ' << unitToString() << ".\n";
+}
+
+void fuzzyFindRequest(const std::unique_ptr &Index,
+  const FuzzyFindRequest &Request) {
+  std::vector Symbols;
+  Index->fuzzyFind(Request, [&](const Symbol &S) { Symbols.push_back(S); });
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  llvm::outs() << "\nRetrieved Symbols:\n";
+  llvm::outs() << "Symbol Name\n";
+  for (const auto &Symbol : Symbols)
+llvm::outs() << Symbol.Name << '\n';
+  llvm::outs() << '\n';
+}
+
+static const std::string HelpMessage = R"(
+DExplorer commands:
+
+> fuzzy-find Name
+
+Constructs fuzzy find request given unqualified symbol name and returns top 10
+symbols retrieved from index.
+
+> lookup-id SymbolID
+
+Retrieves symbol names given USR.
+
+> help
+
+Shows this message.
+
+Press Ctrl-D to exit.
+)";
+
+void helpRequest() { llvm::outs() << HelpMessage; }
+
+void lookupRequest(const std::unique_ptr &Index,
+   clang::clangd::LookupRequest &Request) {
+  std::vector Symbols;
+  Index->lookup(Request, [&](const Symbol &S) { Symbols.push_back(S); });
+  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
+  llvm::outs() << "\nRetrieved Symbols:\n";
+  llvm::outs() << "Rank. Symbol Name | Symbol ID\n";
+  for (size_t Rank = 0; Rank < Symbols.size(); ++Rank)
+llvm::outs() << Rank << ". " << Symbols[Rank].Name << " | "
+ << Symbols[Rank].ID.str() << '\n';
+  llvm::outs() << '\n';
+}
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * find symbol references: print set of reference locations
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense 

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-10 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

A tool like this will be a useful addition!

I think the big high-level questions are:

- UI: a REPL seems like a good model, but we need a more user-friendly way to 
read commands
- Testing: need a plan for either a) testing the commands or b) keeping the 
commands simple/readable enough that we can get away without testing them.

I suspect a good design links these questions together (e.g. by establishing a 
simple pattern for reading/printing to keep the eval part simple, and 
read/print is most of the UI).

I'm not sure I agree this should be deferred until later.




Comment at: clang-tools-extra/clangd/CMakeLists.txt:75
 add_subdirectory(global-symbol-builder)
+add_subdirectory(dexplorer)

if this is coupled to dex, and dex has its own directory, this should be a 
subdirectory I think



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:27
+
+llvm::cl::opt YAMLSymbolCollection(
+"symbol-collection-file",

please avoid mentioning YAML, as we now have multiple formats.
"index file"?



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:41
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * fuzzy find symbol given a set of properties

find references



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:62
+
+  llvm::outs() << "Scopes (comma-separated list):\n";
+  if (llvm::Optional Line = LE.readLine()) {

Agree with the concerns about the usability of this model, a command-like model 
would be nicer.
If we want to start with something simple without worrying too much about 
complex inputs, maybe just accept queries one-per-line and don't allow the 
other options to be set yet?

Otherwise it seems like all of this is likely to need to be replaced in a 
backwards-incompatible way...



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:122
+   << " ms.\n";
+  // FIXME(kbobyrev): Allow specifying which fields the user wants to see: e.g.
+  // origin of the symbol (CanonicalDeclaration path), #References, etc.

I'm not sure this will actually be a good UX.
Maybe just make sure you always include the symbol ID so the user can get 
details?



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:147
+
+  // FIXME(kbobyrev): Wrap time measurements into something like
+  // measureTime(Function, Arguments...).

kbobyrev wrote:
> ilya-biryukov wrote:
> > +1 to this FIXME.
> > 
> > Something like:
> > ```
> > template 
> > auto reportTime(StringRef Name, Func F) -> decltype(F()) {
> >   auto Result = F();
> >   llvm::outs() << Name << " took " << ...
> >   return Result;
> > } 
> > ```
> > 
> > The code calling this API would be quite readable:
> > ```
> > auto Index = reportTime("Build stage", []() { 
> >   return buildStaticIndex(...);
> > });
> > ```
> Ah, this looks really clean! I had few ideas of how to do that, but I 
> couldn't converge to a reasonable solution which wouldn't look like abusing 
> C++ to me :) Thanks!
+1 - could you add this in this patch? would improve readability already


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-10 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:39
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.

kbobyrev wrote:
> ilya-biryukov wrote:
> > Maybe we could expose `CommandLineParser` from 
> > `llvm/lib/Support/CommandLine.cpp` as a public API and use it here?
> > Not sure if there are any obstacles to doing so or how much work is it, 
> > though.
> > E.g. `cl::opt` seem to rely on being registered in the global parser and 
> > I'm not sure if there's an easy way out of it.
> Yes, that might be tricky. I have thought about a few options, e.g. consuming 
> the first token from the input string as the command and dispatching 
> arguments parsed via `CommandLineParser` manually (discarding those which are 
> not accepted by provided command, etc). There are still few complications: 
> help wouldn't be separate and easily accessible (unless hardcoded, which is 
> suboptimal).
> 
> Another approach I could think of would be simplifying the interface of each 
> command so that it would be easily parsed:
> 
> * `> fuzzy-find request.json`: this would require `FuzzyFindRequest` JSON 
> (de)serialization implementation, but that would be useful for the benchmark 
> tool, too
> * `> lookup-id symbolid`
> * `> load symbol.yaml`/`swap symbol.yaml`
> * `> density-hist`
> * `> tokens-distribution`
> * `> dense-tokens`
> * `> sparse-tokens`
> 
> And also a generic `> help` for the short reference of each command. Out of 
> all these commands, only Fuzzy Find request would benefit a lot from a proper 
> parsing of arguments, having JSON file representation might not be ideal, but 
> it looks OK for the first iteration for me. Fuzzy Find request would 
> presumably be one of the most used commands, though, but then again, we could 
> iterate if that is not really convinient.
The single-argument and no-arg commands certainly look nice. However, creating 
a separate file for each fuzzy-find request feels like a pain.

Have you tried prototyping parsing with `CommandLineParser `? What are the 
complications to exposing it from LLVM? 


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-09 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev added inline comments.



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:39
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.

ilya-biryukov wrote:
> Maybe we could expose `CommandLineParser` from 
> `llvm/lib/Support/CommandLine.cpp` as a public API and use it here?
> Not sure if there are any obstacles to doing so or how much work is it, 
> though.
> E.g. `cl::opt` seem to rely on being registered in the global parser and I'm 
> not sure if there's an easy way out of it.
Yes, that might be tricky. I have thought about a few options, e.g. consuming 
the first token from the input string as the command and dispatching arguments 
parsed via `CommandLineParser` manually (discarding those which are not 
accepted by provided command, etc). There are still few complications: help 
wouldn't be separate and easily accessible (unless hardcoded, which is 
suboptimal).

Another approach I could think of would be simplifying the interface of each 
command so that it would be easily parsed:

* `> fuzzy-find request.json`: this would require `FuzzyFindRequest` JSON 
(de)serialization implementation, but that would be useful for the benchmark 
tool, too
* `> lookup-id symbolid`
* `> load symbol.yaml`/`swap symbol.yaml`
* `> density-hist`
* `> tokens-distribution`
* `> dense-tokens`
* `> sparse-tokens`

And also a generic `> help` for the short reference of each command. Out of all 
these commands, only Fuzzy Find request would benefit a lot from a proper 
parsing of arguments, having JSON file representation might not be ideal, but 
it looks OK for the first iteration for me. Fuzzy Find request would presumably 
be one of the most used commands, though, but then again, we could iterate if 
that is not really convinient.



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:147
+
+  // FIXME(kbobyrev): Wrap time measurements into something like
+  // measureTime(Function, Arguments...).

ilya-biryukov wrote:
> +1 to this FIXME.
> 
> Something like:
> ```
> template 
> auto reportTime(StringRef Name, Func F) -> decltype(F()) {
>   auto Result = F();
>   llvm::outs() << Name << " took " << ...
>   return Result;
> } 
> ```
> 
> The code calling this API would be quite readable:
> ```
> auto Index = reportTime("Build stage", []() { 
>   return buildStaticIndex(...);
> });
> ```
Ah, this looks really clean! I had few ideas of how to do that, but I couldn't 
converge to a reasonable solution which wouldn't look like abusing C++ to me :) 
Thanks!


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-07 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added a comment.

Definitely like the idea of the tool. The main complication seems to be parsing 
of user input at this point.
I suggest we explore an option proposed before, that is reusing the LLVM 
command-line parser (see inline comment too). 
If that turns out to be much work, we could explore rolling out a simple parser 
for commands on our own.




Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:39
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.

Maybe we could expose `CommandLineParser` from 
`llvm/lib/Support/CommandLine.cpp` as a public API and use it here?
Not sure if there are any obstacles to doing so or how much work is it, though.
E.g. `cl::opt` seem to rely on being registered in the global parser and I'm 
not sure if there's an easy way out of it.



Comment at: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp:147
+
+  // FIXME(kbobyrev): Wrap time measurements into something like
+  // measureTime(Function, Arguments...).

+1 to this FIXME.

Something like:
```
template 
auto reportTime(StringRef Name, Func F) -> decltype(F()) {
  auto Result = F();
  llvm::outs() << Name << " took " << ...
  return Result;
} 
```

The code calling this API would be quite readable:
```
auto Index = reportTime("Build stage", []() { 
  return buildStaticIndex(...);
});
```


https://reviews.llvm.org/D51628



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-04 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev updated this revision to Diff 163800.
kbobyrev added a comment.

Cleaned up few minor issues.


https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/dexplorer/CMakeLists.txt
  clang-tools-extra/clangd/dexplorer/Dexplorer.cpp

Index: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/dexplorer/Dexplorer.cpp
@@ -0,0 +1,170 @@
+//===--- Dexplorer.cpp - Helper Index Exploration tool --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../index/SymbolYAML.h"
+#include "../index/dex/DexIndex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 
+
+namespace {
+
+llvm::cl::opt YAMLSymbolCollection(
+"symbol-collection-file",
+llvm::cl::desc("Path to the file with YAML symbol collection"),
+llvm::cl::Positional, llvm::cl::Required);
+
+const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+)";
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * fuzzy find symbol given a set of properties
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+llvm::Optional
+readRequest(const llvm::LineEditor &LE) {
+  clang::clangd::FuzzyFindRequest Result;
+
+  // By default, show 10 results. Otherwise, stdout might be polluted.
+  Result.MaxCandidateCount = 10;
+
+  llvm::outs() << "Query:\n";
+  if (llvm::Optional Line = LE.readLine())
+Result.Query = Line.getValue();
+
+  llvm::outs() << "Scopes (comma-separated list):\n";
+  if (llvm::Optional Line = LE.readLine()) {
+llvm::SmallVector Scopes;
+llvm::StringRef S = Line.getValue();
+S.split(Scopes, ',');
+for (auto Scope : Scopes)
+  Result.Scopes.push_back(Scope);
+  }
+
+  llvm::outs() << "Numbers of symbol to return (default: 10):\n";
+  if (llvm::Optional Line = LE.readLine())
+if (unsigned long MaxCandidateCount =
+std::strtoul(Line.getValue().c_str(), nullptr, /*base=*/10))
+  Result.MaxCandidateCount = MaxCandidateCount;
+
+  llvm::outs() << "Proximity paths (comma-separated list):\n";
+  if (llvm::Optional Line = LE.readLine()) {
+llvm::SmallVector Paths;
+llvm::StringRef S = Line.getValue();
+S.split(Paths, ',');
+for (auto Path : Paths)
+  Result.ProximityPaths.push_back(Path);
+  }
+
+  llvm::outs() << "FuzzyFindRequest {\n";
+  llvm::outs() << "  Query = " << Result.Query << '\n';
+  llvm::outs() << "  Scopes = [";
+  for (size_t ScopeID = 0; ScopeID < Result.Scopes.size(); ++ScopeID) {
+llvm::outs() << '"' << Result.Scopes[ScopeID] << '"';
+if (ScopeID != Result.Scopes.size() - 1)
+  llvm::outs() << ", ";
+  }
+  llvm::outs() << "  ]\n";
+
+  llvm::outs() << "  MaxCandidateCount = " << Result.MaxCandidateCount << '\n';
+  llvm::outs() << "  ProximityPaths = [";
+  for (size_t PathID = 0; PathID < Result.ProximityPaths.size(); ++PathID) {
+llvm::outs() << Result.ProximityPaths[PathID];
+if (PathID != Result.ProximityPaths.size() - 1)
+  llvm::outs() << ", ";
+  }
+  llvm::outs() << '\n';
+  llvm::outs() << "  ]\n";
+  llvm::outs() << "}\n";
+
+  return Result;
+}
+
+void processRequest(const std::unique_ptr &Index,
+const clang::clangd::FuzzyFindRequest &Request) {
+  std::vector Symbols;
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  Index->fuzzyFind(
+  Request, [&](const clang::clangd::Symbol &S) { Symbols.push_back(S); });
+  const auto TimerStop = std::chrono::high_resolution_clock::now();
+  llvm::outs

[PATCH] D51628: [clangd] Implement a Proof-of-Concept tool for symbol index exploration

2018-09-04 Thread Kirill Bobyrev via Phabricator via cfe-commits
kbobyrev created this revision.
kbobyrev added reviewers: ioeric, sammccall, ilya-biryukov.
kbobyrev added a project: clang-tools-extra.
Herald added subscribers: kadircet, arphaman, jkorous, MaskRay, mgorny.

https://reviews.llvm.org/D51628

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/dexplorer/CMakeLists.txt
  clang-tools-extra/clangd/dexplorer/Dexplorer.cpp

Index: clang-tools-extra/clangd/dexplorer/Dexplorer.cpp
===
--- /dev/null
+++ clang-tools-extra/clangd/dexplorer/Dexplorer.cpp
@@ -0,0 +1,170 @@
+//===--- Dexplorer.cpp - Helper Index Exploration tool --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===--===//
+
+#include "../index/SymbolYAML.h"
+#include "../index/dex/DexIndex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/YAMLTraits.h"
+#include 
+
+namespace {
+
+llvm::cl::opt YAMLSymbolCollection(
+"symbol-collection-file",
+llvm::cl::desc("Path to the file with YAML symbol collection"),
+llvm::cl::Positional, llvm::cl::Required);
+
+const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via global-symbol-builder. The
+tool can be used to evaluate search quality of existing index implementations
+and manually construct non-trivial test cases.
+)";
+
+// FIXME(kbobyrev): Make this an actual REPL: probably use LLVM Command Line
+// library for parsing flags and arguments.
+// FIXME(kbobyrev): Ideas for commands:
+// * fuzzy find symbol given a set of properties
+// * symbol lookup: print out symbol in YAML format given SymbolID
+// * load/swap/reload index: this would make it possible to get rid of llvm::cl
+//   usages in the tool driver and actually use llvm::cl library in the REPL.
+// * show posting list density histogram (our dump data somewhere so that user
+//   could build one)
+// * show number of tokens of each kind
+// * print out tokens with the most dense posting lists
+// * print out tokens with least dense posting lists
+llvm::Optional
+readRequest(llvm::LineEditor &LE) {
+  clang::clangd::FuzzyFindRequest Result;
+
+  // By default, show 10 results. Otherwise, stdout might be polluted.
+  Result.MaxCandidateCount = 10;
+
+  llvm::outs() << "Query:\n";
+  if (llvm::Optional Line = LE.readLine())
+Result.Query = Line.getValue();
+
+  llvm::outs() << "Scopes (comma-separated list):\n";
+  if (llvm::Optional Line = LE.readLine()) {
+llvm::SmallVector Scopes;
+llvm::StringRef S = Line.getValue();
+S.split(Scopes, ',');
+for (auto Scope : Scopes)
+  Result.Scopes.push_back(Scope);
+  }
+
+  llvm::outs() << "Numbers of symbol to return (default: 10):\n";
+  if (llvm::Optional Line = LE.readLine())
+if (unsigned long long MaxCandidateCount =
+std::strtoul(Line.getValue().c_str(), nullptr, 10))
+  Result.MaxCandidateCount = MaxCandidateCount;
+
+  llvm::outs() << "Proximity paths (comma-separated list):\n";
+  if (llvm::Optional Line = LE.readLine()) {
+llvm::SmallVector Paths;
+llvm::StringRef S = Line.getValue();
+S.split(Paths, ',');
+for (auto Path : Paths)
+  Result.ProximityPaths.push_back(Path);
+  }
+
+  llvm::outs() << "FuzzyFindRequest {\n";
+  llvm::outs() << "  Query = " << Result.Query << '\n';
+  llvm::outs() << "  Scopes = [";
+  for (size_t ScopeID = 0; ScopeID < Result.Scopes.size(); ++ScopeID) {
+llvm::outs() << '"' << Result.Scopes[ScopeID] << '"';
+if (ScopeID != Result.Scopes.size() - 1)
+  llvm::outs() << ", ";
+  }
+  llvm::outs() << "  ]\n";
+
+  llvm::outs() << "  MaxCandidateCount = " << Result.MaxCandidateCount << '\n';
+  llvm::outs() << "  ProximityPaths = [";
+  for (size_t PathID = 0; PathID < Result.ProximityPaths.size(); ++PathID) {
+llvm::outs() << Result.ProximityPaths[PathID];
+if (PathID != Result.ProximityPaths.size() - 1)
+  llvm::outs() << ", ";
+  }
+  llvm::outs() << '\n';
+  llvm::outs() << "  ]\n";
+  llvm::outs() << "}\n";
+
+  return Result;
+}
+
+void processRequest(const std::unique_ptr &Index,
+clang::clangd::FuzzyFindRequest &Request) {
+  std::vector Symbols;
+  const auto TimerStart = std::chrono::high_resolution_clock::now();
+  Index->fuzzyFind(
+  Request, [&](const clang::clangd::Symbol &S) { Symbols.push_back(S