sammccall created this revision.
sammccall added a reviewer: ioeric.
Herald added subscribers: cfe-commits, jfb, kadircet, arphaman, jkorous, 
MaskRay, ilya-biryukov, mgorny.

See tinyurl.com/clangd-automatic-index for design and goals.

Lots of limitations to keep this patch smallish, TODOs everywhere:

- no serialization to disk
- no changes to dynamic index, which now has a much simpler job
- no partitioning of symbols by file to avoid duplication of header symbols
- no reindexing of edited files
- only a single worker thread
- compilation database is slurped synchronously (doesn't scale)
- uses memindex, rebuilds after every file (should be dex, periodically)

Still needs tests, but should be ready for review of the basic shape.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D53032

Files:
  clangd/CMakeLists.txt
  clangd/ClangdLSPServer.cpp
  clangd/ClangdLSPServer.h
  clangd/ClangdServer.cpp
  clangd/Compiler.cpp
  clangd/Compiler.h
  clangd/GlobalCompilationDatabase.cpp
  clangd/GlobalCompilationDatabase.h
  clangd/index/Background.cpp
  clangd/index/Background.h
  clangd/tool/ClangdMain.cpp

Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -167,6 +167,14 @@
         "eventually. Don't rely on it."),
     llvm::cl::init(""), llvm::cl::Hidden);
 
+static llvm::cl::opt<bool> AutoIndex(
+    "auto-index",
+    llvm::cl::desc(
+        "Build a full index for the codebase containing edited files. "
+        "Indexing will occur in the background. "
+        "This option is still experimental, as the indexing is inefficient."),
+    llvm::cl::init(false), llvm::cl::Hidden);
+
 enum CompileArgsFrom { LSPCompileArgs, FilesystemCompileArgs };
 static llvm::cl::opt<CompileArgsFrom> CompileArgsFrom(
     "compile_args_from", llvm::cl::desc("The source of compile commands"),
@@ -316,9 +324,10 @@
   CCOpts.AllScopes = AllScopesCompletion;
 
   // Initialize and run ClangdLSPServer.
-  ClangdLSPServer LSPServer(
-      Out, CCOpts, CompileCommandsDirPath,
-      /*ShouldUseInMemoryCDB=*/CompileArgsFrom == LSPCompileArgs, Opts);
+  ClangdLSPServer LSPServer(Out, CCOpts, CompileCommandsDirPath,
+                            /*ShouldUseInMemoryCDB=*/CompileArgsFrom ==
+                                LSPCompileArgs,
+                            AutoIndex, Opts);
   constexpr int NoShutdownRequestErrorCode = 1;
   llvm::set_thread_name("clangd.main");
   // Change stdin to binary to not lose \r\n on windows.
Index: clangd/index/Background.h
===================================================================
--- /dev/null
+++ clangd/index/Background.h
@@ -0,0 +1,73 @@
+//===--- Background.h - Build an index in a background thread ----*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H
+
+#include "Context.h"
+#include "FSProvider.h"
+#include "index/Index.h"
+#include "index/FileIndex.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/Support/SHA1.h"
+#include <condition_variable>
+#include <deque>
+#include <thread>
+
+namespace clang {
+namespace clangd {
+
+// Builds an in-memory index by by running the static indexer action over
+// all commands in a compilation database. Indexing happens in the background.
+// TODO: it should also persist its state on disk for fast start.
+class BackgroundIndex : public SwapIndex {
+public:
+  // TODO: FileSystemProvider is not const-correct.
+  // TODO: resource-dir injection should be hoisted somewhere common.
+  BackgroundIndex(Context BackgroundContext,
+                  StringRef ResourceDir, FileSystemProvider *);
+  ~BackgroundIndex(); // Blocks while the current task finishes.
+
+  // Index all TUs described in the compilation database.
+  // The indexing happens in a background thread, so after enqueueing files
+  // for indexing their symbols will be available sometime later.
+  void enqueueAll(llvm::StringRef Directory,
+                  const tooling::CompilationDatabase &);
+
+  // Cause background threads to stop after ther current task, any remaining
+  // tasks will be discarded.
+  void stop();
+
+private:
+  // configuration
+  std::string ResourceDir;
+  FileSystemProvider *FSProvider;
+  Context BackgroundContext;
+
+  // index state
+  llvm::Error index(tooling::CompileCommand);
+  FileSymbols IndexedSymbols; // Index contents.
+  using Hash = decltype(llvm::SHA1::hash({}));
+  llvm::StringMap<Hash> FileHash; // Digest of indexed file.
+
+  // queue management
+  using Task = std::function<void()>; // TODO: use multiple worker threads.
+  void run(); // Main loop executed by Thread. Runs tasks from Queue.
+  void enqueueLocked(tooling::CompileCommand Cmd);
+  std::thread Thread;
+  std::mutex QueueMu;
+  std::condition_variable QueueCV;
+  bool ShouldStop = false;
+  std::deque<Task> Queue;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clangd/index/Background.cpp
===================================================================
--- /dev/null
+++ clangd/index/Background.cpp
@@ -0,0 +1,168 @@
+//===-- Background.cpp - Build an index in a background thread ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Background.h"
+#include "index/MemIndex.h"
+#include "index/Serialization.h"
+#include "index/IndexAction.h"
+#include "Compiler.h"
+#include "ClangdUnit.h"
+#include "Logger.h"
+#include "Trace.h"
+#include "llvm/Support/SHA1.h"
+#include <random>
+
+using namespace llvm;
+namespace clang {
+namespace clangd {
+
+BackgroundIndex::BackgroundIndex(Context BackgroundContext,
+                                 StringRef ResourceDir,
+                                 FileSystemProvider *FSProvider)
+    : SwapIndex(llvm::make_unique<MemIndex>()), ResourceDir(ResourceDir),
+      FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
+      Thread([this] { run(); }) {}
+
+BackgroundIndex::~BackgroundIndex() {
+  stop();
+  Thread.join();
+}
+
+void BackgroundIndex::stop() {
+  {
+    std::lock_guard<std::mutex> Lock(QueueMu);
+    ShouldStop = true;
+  }
+  QueueCV.notify_all();
+}
+
+void BackgroundIndex::run() {
+  WithContext Background(std::move(BackgroundContext));
+  while(true) {
+    llvm::Optional<Task> Task;
+    {
+      std::unique_lock<std::mutex> Lock(QueueMu);
+      QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); });
+      if (ShouldStop)
+        return;
+      Task = std::move(Queue.front());
+      Queue.pop_front();
+    }
+    (*Task)();
+  }
+}
+
+void BackgroundIndex::enqueueAll(StringRef Directory,
+                                 const tooling::CompilationDatabase &CDB) {
+  // TODO: this function may be slow. Perhaps enqueue a task to re-read the CDB
+  // from disk and enqueue the commands asynchronously?
+  auto Cmds = CDB.getAllCompileCommands();
+  std::mt19937 Generator(std::random_device{}());
+  std::shuffle(Cmds.begin(), Cmds.end(), Generator);
+  log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory);
+  {
+    std::lock_guard<std::mutex> Lock(QueueMu);
+    for (auto &Cmd : Cmds)
+      enqueueLocked(std::move(Cmd));
+  }
+  QueueCV.notify_all();
+}
+
+void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+  Queue.push_back(Bind(
+      [this](tooling::CompileCommand Cmd) {
+        std::string Filename = Cmd.Filename;
+        Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
+        if (auto Error = index(std::move(Cmd)))
+          log("Indexing {0} failed: {1}", Filename, std::move(Error));
+      },
+      std::move(Cmd)));
+}
+
+llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
+  trace::Span Tracer("BackgroundIndex");
+  SPAN_ATTACH(Tracer, "file", Cmd.Filename);
+  SmallString<128> AbsolutePath;
+  if (llvm::sys::path::is_absolute(Cmd.Filename)) {
+    AbsolutePath = Cmd.Filename;
+  } else {
+    AbsolutePath = Cmd.Directory;
+    llvm::sys::path::append(AbsolutePath, Cmd.Filename);
+  }
+  llvm::sys::path::native(AbsolutePath);
+
+  auto FS = FSProvider->getFileSystem();
+  auto Buf = FS->getBufferForFile(AbsolutePath);
+  if (!Buf)
+    return errorCodeToError(Buf.getError());
+  StringRef Contents = Buf->get()->getBuffer();
+  auto Hash = SHA1::hash({(const uint8_t *)Contents.data(), Contents.size()});
+
+  if (FileHash.lookup(AbsolutePath) == Hash) {
+    vlog("No need to index {0}, already up to date", AbsolutePath);
+    return Error::success();
+  }
+
+  log("Indexing {0}", Cmd.Filename, toHex(Hash));
+  ParseInputs Inputs;
+  Inputs.FS = std::move(FS);
+  Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory);
+  Inputs.CompileCommand = std::move(Cmd);
+  auto CI = buildCompilerInvocation(Inputs);
+  if (!CI)
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Couldn't build compiler invocation");
+  IgnoreDiagnostics IgnoreDiags;
+  auto Clang = prepareCompilerInstance(
+      std::move(CI), /*Preamble=*/nullptr, std::move(*Buf),
+      std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags);
+  if (!Clang)
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Couldn't build compiler instance");
+
+  SymbolCollector::Options IndexOpts;
+  SymbolSlab Symbols;
+  RefSlab Refs;
+  IndexFileIn IndexData;
+  auto Action = createStaticIndexingAction(
+      IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
+      [&](RefSlab R) { Refs = std::move(R); });
+
+  // We're going to run clang here, and it could potentially crash.
+  // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
+  // but the leaky "recovery" is pretty scary too in a long-running process.
+  // If crashes are a real problem, maybe we should fork a child process.
+
+  const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
+  if (!Action->BeginSourceFile(*Clang, Input))
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "BeginSourceFile() failed");
+  if (!Action->Execute())
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Execute() failed");
+  Action->EndSourceFile();
+
+  log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
+      Symbols.size(), Refs.size());
+  SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
+  SPAN_ATTACH(Tracer, "refs", int(Refs.size()));
+  // TODO: partition the symbols by file rather than TU, to avoid duplication.
+  IndexedSymbols.update(AbsolutePath,
+                        llvm::make_unique<SymbolSlab>(std::move(Symbols)),
+                        llvm::make_unique<RefSlab>(std::move(Refs)));
+  FileHash[AbsolutePath] = Hash;
+
+  // TODO: this should rebuild once-in-a-while, not after every file.
+  vlog("Rebuilding automatic index");
+  reset(IndexedSymbols.buildMemIndex());
+  return Error::success();
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/GlobalCompilationDatabase.h
===================================================================
--- clangd/GlobalCompilationDatabase.h
+++ clangd/GlobalCompilationDatabase.h
@@ -51,7 +51,10 @@
     : public GlobalCompilationDatabase {
 public:
   DirectoryBasedGlobalCompilationDatabase(
-      llvm::Optional<Path> CompileCommandsDir);
+      llvm::Optional<Path> CompileCommandsDir,
+      std::function<void(llvm::StringRef,
+                         const clang::tooling::CompilationDatabase &)>
+          OnNewCDB = nullptr);
   ~DirectoryBasedGlobalCompilationDatabase() override;
 
   /// Scans File's parents looking for compilation databases.
@@ -84,6 +87,8 @@
   /// Used for command argument pointing to folder where compile_commands.json
   /// is located.
   llvm::Optional<Path> CompileCommandsDir;
+  std::function<void(llvm::StringRef, const tooling::CompilationDatabase &)>
+      OnNewCDB;
 };
 
 /// A wrapper around GlobalCompilationDatabase that caches the compile commands.
Index: clangd/GlobalCompilationDatabase.cpp
===================================================================
--- clangd/GlobalCompilationDatabase.cpp
+++ clangd/GlobalCompilationDatabase.cpp
@@ -32,8 +32,12 @@
 
 DirectoryBasedGlobalCompilationDatabase::
     DirectoryBasedGlobalCompilationDatabase(
-        llvm::Optional<Path> CompileCommandsDir)
-    : CompileCommandsDir(std::move(CompileCommandsDir)) {}
+        llvm::Optional<Path> CompileCommandsDir,
+        std::function<void(llvm::StringRef,
+                           const tooling::CompilationDatabase &)>
+            OnNewCDB)
+    : CompileCommandsDir(std::move(CompileCommandsDir)),
+      OnNewCDB(std::move(OnNewCDB)) {}
 
 DirectoryBasedGlobalCompilationDatabase::
     ~DirectoryBasedGlobalCompilationDatabase() = default;
@@ -97,6 +101,11 @@
   auto CDB = tooling::CompilationDatabase::loadFromDirectory(Dir, Error);
   auto Result = CDB.get();
   CompilationDatabases.insert(std::make_pair(Dir, std::move(CDB)));
+  if (Result && OnNewCDB) {
+    Mutex.unlock();
+    OnNewCDB(Dir, *Result);
+    Mutex.lock();
+  }
   return Result;
 }
 
Index: clangd/Compiler.h
===================================================================
--- clangd/Compiler.h
+++ clangd/Compiler.h
@@ -48,6 +48,8 @@
     std::shared_ptr<PCHContainerOperations>,
     IntrusiveRefCntPtr<vfs::FileSystem>, DiagnosticConsumer &);
 
+std::string getStandardResourceDir();
+
 } // namespace clangd
 } // namespace clang
 
Index: clangd/Compiler.cpp
===================================================================
--- clangd/Compiler.cpp
+++ clangd/Compiler.cpp
@@ -80,5 +80,10 @@
   return Clang;
 }
 
+std::string getStandardResourceDir() {
+  static int Dummy; // Just an address in this process.
+  return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy);
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/ClangdServer.cpp
===================================================================
--- clangd/ClangdServer.cpp
+++ clangd/ClangdServer.cpp
@@ -9,6 +9,7 @@
 
 #include "ClangdServer.h"
 #include "CodeComplete.h"
+#include "Compiler.h"
 #include "FindSymbols.h"
 #include "Headers.h"
 #include "SourceCode.h"
@@ -43,11 +44,6 @@
   handleAllErrors(std::move(Err), [](const llvm::ErrorInfoBase &) {});
 }
 
-std::string getStandardResourceDir() {
-  static int Dummy; // Just an address in this process.
-  return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy);
-}
-
 class RefactoringResultCollector final
     : public tooling::RefactoringResultConsumer {
 public:
Index: clangd/ClangdLSPServer.h
===================================================================
--- clangd/ClangdLSPServer.h
+++ clangd/ClangdLSPServer.h
@@ -17,6 +17,7 @@
 #include "Path.h"
 #include "Protocol.h"
 #include "ProtocolHandlers.h"
+#include "index/Background.h"
 #include "clang/Tooling/Core/Replacement.h"
 #include "llvm/ADT/Optional.h"
 #include <memory>
@@ -39,7 +40,8 @@
   /// for compile_commands.json in all parent directories of each file.
   ClangdLSPServer(JSONOutput &Out, const clangd::CodeCompleteOptions &CCOpts,
                   llvm::Optional<Path> CompileCommandsDir,
-                  bool ShouldUseInMemoryCDB, const ClangdServer::Options &Opts);
+                  bool ShouldUseInMemoryCDB, bool AutoIndex,
+                  const ClangdServer::Options &Opts);
 
   /// Run LSP server loop, receiving input for it from \p In. \p In must be
   /// opened in binary mode. Output will be written using Out variable passed to
@@ -110,8 +112,10 @@
   class CompilationDB {
   public:
     static CompilationDB makeInMemory();
-    static CompilationDB
-    makeDirectoryBased(llvm::Optional<Path> CompileCommandsDir);
+    static CompilationDB makeDirectoryBased(
+        llvm::Optional<Path> CompileCommandsDir,
+        std::function<void(llvm::StringRef,
+                           const tooling::CompilationDatabase &)>);
 
     void invalidate(PathRef File);
 
@@ -167,6 +171,8 @@
   /// The supported completion item kinds of the client.
   CompletionItemKindBitset SupportedCompletionItemKinds;
 
+  llvm::Optional<BackgroundIndex> BackgroundIdx;
+
   // Store of the current versions of the open documents.
   DraftStore DraftMgr;
 
Index: clangd/ClangdLSPServer.cpp
===================================================================
--- clangd/ClangdLSPServer.cpp
+++ clangd/ClangdLSPServer.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangdLSPServer.h"
+#include "Compiler.h"
 #include "Diagnostics.h"
 #include "JSONRPCDispatcher.h"
 #include "SourceCode.h"
@@ -149,6 +150,8 @@
 void ClangdLSPServer::onShutdown(ShutdownParams &Params) {
   // Do essentially nothing, just say we're ready to exit.
   ShutdownRequestReceived = true;
+  if (BackgroundIdx)
+    BackgroundIdx->stop();
   reply(nullptr);
 }
 
@@ -472,15 +475,32 @@
 ClangdLSPServer::ClangdLSPServer(JSONOutput &Out,
                                  const clangd::CodeCompleteOptions &CCOpts,
                                  llvm::Optional<Path> CompileCommandsDir,
-                                 bool ShouldUseInMemoryCDB,
+                                 bool ShouldUseInMemoryCDB, bool AutoIndex,
                                  const ClangdServer::Options &Opts)
-    : Out(Out), CDB(ShouldUseInMemoryCDB ? CompilationDB::makeInMemory()
-                                         : CompilationDB::makeDirectoryBased(
-                                               std::move(CompileCommandsDir))),
+    : Out(Out), CDB(ShouldUseInMemoryCDB
+                        ? CompilationDB::makeInMemory()
+                        : CompilationDB::makeDirectoryBased(
+                              std::move(CompileCommandsDir),
+                              [&](StringRef Dir,
+                                  const tooling::CompilationDatabase &CDB) {
+                                if (BackgroundIdx)
+                                  BackgroundIdx->enqueueAll(Dir, CDB);
+                              })),
       CCOpts(CCOpts), SupportedSymbolKinds(defaultSymbolKinds()),
-      SupportedCompletionItemKinds(defaultCompletionItemKinds()),
-      Server(new ClangdServer(CDB.getCDB(), FSProvider, /*DiagConsumer=*/*this,
-                              Opts)) {}
+      SupportedCompletionItemKinds(defaultCompletionItemKinds()) {
+  ClangdServer::Options SOpts = Opts;
+  if (AutoIndex) {
+    log("Static index will be built in the background from compilation DB.");
+    BackgroundIdx.emplace(Context::empty(),
+                          Opts.ResourceDir
+                              ? *Opts.ResourceDir
+                              : StringRef(getStandardResourceDir()),
+                          &FSProvider);
+    SOpts.StaticIndex = BackgroundIdx.getPointer();
+  }
+  Server.reset(new ClangdServer(CDB.getCDB(), FSProvider,
+                                /*DiagConsumer=*/*this, SOpts));
+}
 
 bool ClangdLSPServer::run(std::FILE *In, JSONStreamStyle InputStyle) {
   assert(!IsDone && "Run was called before");
@@ -585,9 +605,11 @@
 
 ClangdLSPServer::CompilationDB
 ClangdLSPServer::CompilationDB::makeDirectoryBased(
-    llvm::Optional<Path> CompileCommandsDir) {
+    llvm::Optional<Path> CompileCommandsDir,
+    std::function<void(StringRef, const tooling::CompilationDatabase &)>
+        OnNewCDB) {
   auto CDB = llvm::make_unique<DirectoryBasedGlobalCompilationDatabase>(
-      std::move(CompileCommandsDir));
+      std::move(CompileCommandsDir), std::move(OnNewCDB));
   auto CachingCDB = llvm::make_unique<CachingCompilationDb>(*CDB);
   return CompilationDB(std::move(CDB), std::move(CachingCDB),
                        /*IsDirectoryBased=*/true);
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -38,6 +38,7 @@
   URI.cpp
   XRefs.cpp
 
+  index/Background.cpp
   index/CanonicalIncludes.cpp
   index/FileIndex.cpp
   index/Index.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to