kadircet created this revision.
kadircet added a reviewer: sammccall.
Herald added subscribers: cfe-commits, arphaman, jkorous, MaskRay, 
ilya-biryukov, mgorny.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D64712

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/Background.cpp
  clang-tools-extra/clangd/index/Background.h
  clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp
  clang-tools-extra/clangd/index/BackgroundIndexLoader.h

Index: clang-tools-extra/clangd/index/BackgroundIndexLoader.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/BackgroundIndexLoader.h
@@ -0,0 +1,59 @@
+//===--- Background.h - Build an index in a background thread ----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H
+
+#include "Path.h"
+#include "index/Background.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <memory>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+
+struct ShardInfo {
+  Path AbsolutePath;
+  FileDigest Digest = {};
+  bool CountReferences = false;
+  bool HadErrors = false;
+  std::unique_ptr<IndexFileIn> Shard;
+};
+
+class BackgroundIndexLoader {
+public:
+  BackgroundIndexLoader(llvm::vfs::FileSystem *FS) : FS(FS) {}
+  /// Loads all shards for the TU \p MainFile from \p Storage. Returns false if
+  /// any shards that are first seen for this TU are stale.
+  bool load(PathRef MainFile, BackgroundIndexStorage *Storage);
+
+  /// Consumes the loader and returns all shards.
+  std::vector<ShardInfo> takeShards() &&;
+
+private:
+  struct CachedShard {
+    ShardInfo SI;
+    // Strings points to ShardInfo.
+    std::vector<Path> Edges;
+    bool IsStale = false;
+  };
+  std::pair<const CachedShard &, /*WasCached=*/bool>
+  loadShard(PathRef ShardIdentifier, BackgroundIndexStorage *Storage);
+
+  // Cache for Storage lookups.
+  llvm::StringMap<CachedShard> LoadedShards;
+
+  llvm::vfs::FileSystem *FS;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp
@@ -0,0 +1,124 @@
+//===-- BackgroundIndexLoader.cpp - ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/BackgroundIndexLoader.h"
+#include "Logger.h"
+#include "Path.h"
+#include "index/Background.h"
+#include "llvm/ADT/DenseSet.h"
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace {
+
+llvm::Optional<Path> uriToAbsolutePath(llvm::StringRef URI, PathRef HintPath) {
+  auto U = URI::parse(URI);
+  if (!U)
+    return llvm::None;
+  auto AbsolutePath = URI::resolve(*U, HintPath);
+  if (!AbsolutePath)
+    return llvm::None;
+  return *AbsolutePath;
+}
+
+bool hasChanged(llvm::vfs::FileSystem *FS, const ShardInfo &SI) {
+  auto Buf = FS->getBufferForFile(SI.AbsolutePath);
+  if (!Buf) {
+    elog("Couldn't get buffer for file: {0}: {1}", SI.AbsolutePath,
+         Buf.getError().message());
+    // There is no point in indexing an unreadable file.
+    return false;
+  }
+  return digest(Buf->get()->getBuffer()) != SI.Digest;
+}
+
+} // namespace
+
+std::pair<const BackgroundIndexLoader::CachedShard &, /*WasCached=*/bool>
+BackgroundIndexLoader::loadShard(PathRef ShardIdentifier,
+                                 BackgroundIndexStorage *Storage) {
+  auto It = LoadedShards.try_emplace(ShardIdentifier);
+  CachedShard &CS = It.first->getValue();
+  if (!It.second)
+    return {CS, true};
+
+  ShardInfo &SI = CS.SI;
+  SI.AbsolutePath = ShardIdentifier;
+  auto Shard = Storage->loadShard(ShardIdentifier);
+  if (!Shard || !Shard->Sources) {
+    vlog("Failed to load shard: {0}", ShardIdentifier);
+    SI.HadErrors = true;
+    CS.IsStale = true;
+    return {CS, false};
+  }
+
+  SI.Shard = std::move(Shard);
+  SI.AbsolutePath = ShardIdentifier;
+  for (const auto &It : *SI.Shard->Sources) {
+    auto AbsPath = uriToAbsolutePath(It.getKey(), ShardIdentifier);
+    if (!AbsPath || *AbsPath != ShardIdentifier) {
+      CS.Edges.push_back(*AbsPath);
+      continue;
+    }
+
+    const IncludeGraphNode &IGN = It.getValue();
+    SI.Digest = IGN.Digest;
+    SI.CountReferences = IGN.Flags & IncludeGraphNode::SourceFlag::IsTU;
+    SI.HadErrors = IGN.Flags & IncludeGraphNode::SourceFlag::HadErrors;
+  }
+  assert(SI.Digest != FileDigest{{0}} && "Digest is empty?");
+  CS.IsStale = hasChanged(FS, SI);
+  return {CS, false};
+}
+
+bool BackgroundIndexLoader::load(PathRef MainFile,
+                                 BackgroundIndexStorage *Storage) {
+  bool IsStale = false;
+  // Following containers points to strings inside LoadedShards.
+  llvm::DenseSet<PathRef> InQueue;
+  std::queue<PathRef> ToVisit;
+  auto LoadResult = loadShard(MainFile, Storage);
+
+  const CachedShard &MainShard = LoadResult.first;
+  if (!LoadResult.second && MainShard.IsStale)
+    IsStale = true;
+
+  for (PathRef Edge : MainShard.Edges) {
+    InQueue.insert(Edge);
+    ToVisit.push(Edge);
+  }
+  while (!ToVisit.empty()) {
+    PathRef ShardIdentifier = ToVisit.front();
+    ToVisit.pop();
+
+    auto LoadResult = loadShard(ShardIdentifier, Storage);
+    const CachedShard &CS = LoadResult.first;
+    // Report the file as stale if it is the first time we see it.
+    // FIXME: We should still update staleness for main file, if it had errors.
+    if (!LoadResult.second && CS.IsStale)
+      IsStale = true;
+    for (PathRef Edge : CS.Edges) {
+      if (InQueue.insert(Edge).second)
+        ToVisit.push(Edge);
+    }
+  }
+
+  return !IsStale;
+}
+
+std::vector<ShardInfo> BackgroundIndexLoader::takeShards() && {
+  std::vector<ShardInfo> Shards;
+  Shards.reserve(LoadedShards.size());
+  for (auto &It : LoadedShards)
+    Shards.push_back(std::move(It.getValue().SI));
+  return Shards;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/Background.h
===================================================================
--- clang-tools-extra/clangd/index/Background.h
+++ clang-tools-extra/clangd/index/Background.h
@@ -12,6 +12,7 @@
 #include "Context.h"
 #include "FSProvider.h"
 #include "GlobalCompilationDatabase.h"
+#include "Path.h"
 #include "SourceCode.h"
 #include "Threading.h"
 #include "index/BackgroundRebuild.h"
@@ -165,20 +166,9 @@
   std::mutex ShardVersionsMu;
 
   BackgroundIndexStorage::Factory IndexStorageFactory;
-  struct Source {
-    std::string Path;
-    bool NeedsReIndexing;
-    Source(llvm::StringRef Path, bool NeedsReIndexing)
-        : Path(Path), NeedsReIndexing(NeedsReIndexing) {}
-  };
-  // Loads the shards for a single TU and all of its dependencies. Returns the
-  // list of sources and whether they need to be re-indexed.
-  std::vector<Source> loadShard(const tooling::CompileCommand &Cmd,
-                                BackgroundIndexStorage *IndexStorage,
-                                llvm::StringSet<> &LoadedShards);
-  // Tries to load shards for the ChangedFiles.
+  // Tries to load shards for the MainFiles.
   std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
-  loadShards(std::vector<std::string> ChangedFiles);
+  loadProject(std::vector<std::string> MainFiles);
 
   BackgroundQueue::Task
   changedFilesTask(const std::vector<std::string> &ChangedFiles);
Index: clang-tools-extra/clangd/index/Background.cpp
===================================================================
--- clang-tools-extra/clangd/index/Background.cpp
+++ clang-tools-extra/clangd/index/Background.cpp
@@ -10,6 +10,7 @@
 #include "ClangdUnit.h"
 #include "Compiler.h"
 #include "Context.h"
+#include "FSProvider.h"
 #include "Headers.h"
 #include "Logger.h"
 #include "Path.h"
@@ -18,6 +19,7 @@
 #include "Threading.h"
 #include "Trace.h"
 #include "URI.h"
+#include "index/BackgroundIndexLoader.h"
 #include "index/FileIndex.h"
 #include "index/IndexAction.h"
 #include "index/MemIndex.h"
@@ -41,6 +43,7 @@
 #include <atomic>
 #include <chrono>
 #include <condition_variable>
+#include <cstddef>
 #include <memory>
 #include <mutex>
 #include <numeric>
@@ -48,6 +51,8 @@
 #include <random>
 #include <string>
 #include <thread>
+#include <utility>
+#include <vector>
 
 namespace clang {
 namespace clangd {
@@ -118,6 +123,7 @@
   }
   return AbsolutePath;
 }
+
 } // namespace
 
 BackgroundIndex::BackgroundIndex(
@@ -155,7 +161,7 @@
     log("Enqueueing {0} commands for indexing", ChangedFiles.size());
     SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
 
-    auto NeedsReIndexing = loadShards(std::move(ChangedFiles));
+    auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
     // Run indexing for files that need to be updated.
     std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
                  std::mt19937(std::random_device{}()));
@@ -415,142 +421,18 @@
   return llvm::Error::success();
 }
 
-std::vector<BackgroundIndex::Source>
-BackgroundIndex::loadShard(const tooling::CompileCommand &Cmd,
-                           BackgroundIndexStorage *IndexStorage,
-                           llvm::StringSet<> &LoadedShards) {
-  struct ShardInfo {
-    std::string AbsolutePath;
-    std::unique_ptr<IndexFileIn> Shard;
-    FileDigest Digest = {};
-    bool CountReferences = false;
-    bool HadErrors = false;
-  };
-  std::vector<ShardInfo> IntermediateSymbols;
-  // Make sure we don't have duplicate elements in the queue. Keys are absolute
-  // paths.
-  llvm::StringSet<> InQueue;
-  auto FS = FSProvider.getFileSystem();
-  // Dependencies of this TU, paired with the information about whether they
-  // need to be re-indexed or not.
-  std::vector<Source> Dependencies;
-  std::queue<Source> ToVisit;
-  std::string AbsolutePath = getAbsolutePath(Cmd).str();
-  // Up until we load the shard related to a dependency it needs to be
-  // re-indexed.
-  ToVisit.emplace(AbsolutePath, true);
-  InQueue.insert(AbsolutePath);
-  // Goes over each dependency.
-  while (!ToVisit.empty()) {
-    Dependencies.push_back(std::move(ToVisit.front()));
-    // Dependencies is not modified during the rest of the loop, so it is safe
-    // to keep the reference.
-    auto &CurDependency = Dependencies.back();
-    ToVisit.pop();
-    // If we have already seen this shard before(either loaded or failed) don't
-    // re-try again. Since the information in the shard won't change from one TU
-    // to another.
-    if (!LoadedShards.try_emplace(CurDependency.Path).second) {
-      // If the dependency needs to be re-indexed, first occurence would already
-      // have detected that, so we don't need to issue it again.
-      CurDependency.NeedsReIndexing = false;
-      continue;
-    }
-
-    auto Shard = IndexStorage->loadShard(CurDependency.Path);
-    if (!Shard || !Shard->Sources) {
-      // File will be returned as requiring re-indexing to caller.
-      vlog("Failed to load shard: {0}", CurDependency.Path);
-      continue;
-    }
-    // These are the edges in the include graph for current dependency.
-    for (const auto &I : *Shard->Sources) {
-      auto U = URI::parse(I.getKey());
-      if (!U)
-        continue;
-      auto AbsolutePath = URI::resolve(*U, CurDependency.Path);
-      if (!AbsolutePath)
-        continue;
-      // Add file as dependency if haven't seen before.
-      if (InQueue.try_emplace(*AbsolutePath).second)
-        ToVisit.emplace(*AbsolutePath, true);
-      // The node contains symbol information only for current file, the rest is
-      // just edges.
-      if (*AbsolutePath != CurDependency.Path)
-        continue;
-
-      // We found source file info for current dependency.
-      assert(I.getValue().Digest != FileDigest{{0}} && "Digest is empty?");
-      ShardInfo SI;
-      SI.AbsolutePath = CurDependency.Path;
-      SI.Shard = std::move(Shard);
-      SI.Digest = I.getValue().Digest;
-      SI.CountReferences =
-          I.getValue().Flags & IncludeGraphNode::SourceFlag::IsTU;
-      SI.HadErrors =
-          I.getValue().Flags & IncludeGraphNode::SourceFlag::HadErrors;
-      IntermediateSymbols.push_back(std::move(SI));
-      // Check if the source needs re-indexing.
-      // Get the digest, skip it if file doesn't exist.
-      auto Buf = FS->getBufferForFile(CurDependency.Path);
-      if (!Buf) {
-        elog("Couldn't get buffer for file: {0}: {1}", CurDependency.Path,
-             Buf.getError().message());
-        continue;
-      }
-      // If digests match then dependency doesn't need re-indexing.
-      // FIXME: Also check for dependencies(sources) of this shard and compile
-      // commands for cache invalidation.
-      CurDependency.NeedsReIndexing =
-          digest(Buf->get()->getBuffer()) != I.getValue().Digest;
-    }
-  }
-  // Load shard information into background-index.
-  {
-    std::lock_guard<std::mutex> Lock(ShardVersionsMu);
-    // This can override a newer version that is added in another thread,
-    // if this thread sees the older version but finishes later. This
-    // should be rare in practice.
-    for (const ShardInfo &SI : IntermediateSymbols) {
-      auto SS =
-          SI.Shard->Symbols
-              ? llvm::make_unique<SymbolSlab>(std::move(*SI.Shard->Symbols))
-              : nullptr;
-      auto RS = SI.Shard->Refs
-                    ? llvm::make_unique<RefSlab>(std::move(*SI.Shard->Refs))
-                    : nullptr;
-      auto RelS =
-          SI.Shard->Relations
-              ? llvm::make_unique<RelationSlab>(std::move(*SI.Shard->Relations))
-              : nullptr;
-      ShardVersion &SV = ShardVersions[SI.AbsolutePath];
-      SV.Digest = SI.Digest;
-      SV.HadErrors = SI.HadErrors;
-
-      IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS),
-                            std::move(RelS), SI.CountReferences);
-    }
-  }
-  if (!IntermediateSymbols.empty())
-    Rebuilder.loadedTU();
-
-  return Dependencies;
-}
-
 // Goes over each changed file and loads them from index. Returns the list of
 // TUs that had out-of-date/no shards.
 std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
-BackgroundIndex::loadShards(std::vector<std::string> ChangedFiles) {
+BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
   std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
       NeedsReIndexing;
-  // Keeps track of the files that will be reindexed, to make sure we won't
-  // re-index same dependencies more than once. Keys are AbsolutePaths.
-  llvm::StringSet<> FilesToIndex;
-  // Keeps track of the loaded shards to make sure we don't perform redundant
-  // disk IO. Keys are absolute paths.
-  llvm::StringSet<> LoadedShards;
+
+  auto FS = FSProvider.getFileSystem();
+  BackgroundIndexLoader Loader(FS.get());
+
   Rebuilder.startLoading();
-  for (const auto &File : ChangedFiles) {
+  for (const auto &File : MainFiles) {
     auto Cmd = CDB.getCompileCommand(File);
     if (!Cmd)
       continue;
@@ -560,23 +442,38 @@
       ProjectRoot = std::move(PI->SourceRoot);
 
     BackgroundIndexStorage *IndexStorage = IndexStorageFactory(ProjectRoot);
-    auto Dependencies = loadShard(*Cmd, IndexStorage, LoadedShards);
-    for (const auto &Dependency : Dependencies) {
-      if (!Dependency.NeedsReIndexing || FilesToIndex.count(Dependency.Path))
-        continue;
-      // FIXME: Currently, we simply schedule indexing on a TU whenever any of
-      // its dependencies needs re-indexing. We might do it smarter by figuring
-      // out a minimal set of TUs that will cover all the stale dependencies.
-      vlog("Enqueueing TU {0} because its dependency {1} needs re-indexing.",
-           Cmd->Filename, Dependency.Path);
+    Path AbsPath = getAbsolutePath(*Cmd).str().str();
+
+    if (!Loader.load(AbsPath, IndexStorage))
       NeedsReIndexing.push_back({std::move(*Cmd), IndexStorage});
-      // Mark all of this TU's dependencies as to-be-indexed so that we won't
-      // try to re-index those.
-      for (const auto &Dependency : Dependencies)
-        FilesToIndex.insert(Dependency.Path);
-      break;
-    }
   }
+
+  auto Shards = std::move(Loader).takeShards();
+  std::lock_guard<std::mutex> Lock(ShardVersionsMu);
+  for (auto &SI : Shards) {
+    if (!SI.Shard)
+      continue;
+    auto SS = SI.Shard->Symbols
+                  ? llvm::make_unique<SymbolSlab>(std::move(*SI.Shard->Symbols))
+                  : nullptr;
+    auto RS = SI.Shard->Refs
+                  ? llvm::make_unique<RefSlab>(std::move(*SI.Shard->Refs))
+                  : nullptr;
+    auto RelS =
+        SI.Shard->Relations
+            ? llvm::make_unique<RelationSlab>(std::move(*SI.Shard->Relations))
+            : nullptr;
+    ShardVersion &SV = ShardVersions[SI.AbsolutePath];
+    SV.Digest = SI.Digest;
+    SV.HadErrors = SI.HadErrors;
+
+    IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS),
+                          std::move(RelS), SI.CountReferences);
+  }
+  // FIXME: Should we call it for each TU?
+  if (!Shards.empty())
+    Rebuilder.loadedTU();
+
   Rebuilder.doneLoading();
   return NeedsReIndexing;
 }
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -73,6 +73,7 @@
   XRefs.cpp
 
   index/Background.cpp
+  index/BackgroundIndexLoader.cpp
   index/BackgroundIndexStorage.cpp
   index/BackgroundQueue.cpp
   index/BackgroundRebuild.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to