ilya-biryukov updated this revision to Diff 162194.
ilya-biryukov added a comment.

- Don't merge on-the-fly for multi-process executors


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D51155

Files:
  clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp

Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
===================================================================
--- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
+++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
@@ -49,9 +49,33 @@
                    "not given, such headers will have relative paths."),
     llvm::cl::init(""));
 
+static llvm::cl::opt<bool> MergeOnTheFly(
+    "merge-on-the-fly",
+    llvm::cl::desc(
+        "Merges symbols for each processed translation unit as soon "
+        "they become available. This results in a smaller memory "
+        "usage and an almost instant reduce stage. Optimal for running as a "
+        "standalone tool, but cannot be used with multi-process executors like "
+        "MapReduce."),
+    llvm::cl::init(true));
+
+/// Responsible for aggregating symbols from each processed file and producing
+/// the final results. All methods in this class must be thread-safe,
+/// 'consumeSymbols' may be called from multiple threads.
+class SymbolsConsumer {
+public:
+  virtual ~SymbolsConsumer() = default;
+
+  /// Consume a SymbolSlab build for a file.
+  virtual void consumeSymbols(SymbolSlab Symbols) = 0;
+  /// Produce a resulting symbol slab, by combining  occurrences of the same
+  /// symbols across translation units.
+  virtual SymbolSlab mergeResults() = 0;
+};
+
 class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
 public:
-  SymbolIndexActionFactory(tooling::ExecutionContext *Ctx) : Ctx(Ctx) {}
+  SymbolIndexActionFactory(SymbolsConsumer &Consumer) : Consumer(Consumer) {}
 
   clang::FrontendAction *create() override {
     // Wraps the index action and reports collected symbols to the execution
@@ -61,10 +85,10 @@
       WrappedIndexAction(std::shared_ptr<SymbolCollector> C,
                          std::unique_ptr<CanonicalIncludes> Includes,
                          const index::IndexingOptions &Opts,
-                         tooling::ExecutionContext *Ctx)
+                         SymbolsConsumer &Consumer)
           : WrapperFrontendAction(
                 index::createIndexingAction(C, Opts, nullptr)),
-            Ctx(Ctx), Collector(C), Includes(std::move(Includes)),
+            Consumer(Consumer), Collector(C), Includes(std::move(Includes)),
             PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {}
 
       std::unique_ptr<ASTConsumer>
@@ -91,14 +115,11 @@
           return;
         }
 
-        auto Symbols = Collector->takeSymbols();
-        for (const auto &Sym : Symbols) {
-          Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym));
-        }
+        Consumer.consumeSymbols(Collector->takeSymbols());
       }
 
     private:
-      tooling::ExecutionContext *Ctx;
+      SymbolsConsumer &Consumer;
       std::shared_ptr<SymbolCollector> Collector;
       std::unique_ptr<CanonicalIncludes> Includes;
       std::unique_ptr<CommentHandler> PragmaHandler;
@@ -118,30 +139,72 @@
     CollectorOpts.Includes = Includes.get();
     return new WrappedIndexAction(
         std::make_shared<SymbolCollector>(std::move(CollectorOpts)),
-        std::move(Includes), IndexOpts, Ctx);
+        std::move(Includes), IndexOpts, Consumer);
   }
 
-  tooling::ExecutionContext *Ctx;
+  SymbolsConsumer &Consumer;
 };
 
-// Combine occurrences of the same symbol across translation units.
-SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
-  SymbolSlab::Builder UniqueSymbols;
-  llvm::BumpPtrAllocator Arena;
-  Symbol::Details Scratch;
-  Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
-    Arena.Reset();
-    llvm::yaml::Input Yin(Value, &Arena);
-    auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
-    clang::clangd::SymbolID ID;
-    Key >> ID;
-    if (const auto *Existing = UniqueSymbols.find(ID))
-      UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
-    else
-      UniqueSymbols.insert(Sym);
-  });
-  return std::move(UniqueSymbols).build();
-}
+/// Stashes per-file results inside ExecutionContext, merges all of them at the
+/// end. Useful for running on MapReduce infrastructure to avoid keeping symbols
+/// from multiple files in memory.
+class ToolExecutorConsumer : public SymbolsConsumer {
+public:
+  ToolExecutorConsumer(ToolExecutor &Executor) : Executor(Executor) {}
+
+  void consumeSymbols(SymbolSlab Symbols) override {
+    for (const auto &Sym : Symbols)
+      Executor.getExecutionContext()->reportResult(Sym.ID.str(),
+                                                   SymbolToYAML(Sym));
+  }
+
+  SymbolSlab mergeResults() override {
+    SymbolSlab::Builder UniqueSymbols;
+    llvm::BumpPtrAllocator Arena;
+    Symbol::Details Scratch;
+    Executor.getToolResults()->forEachResult(
+        [&](llvm::StringRef Key, llvm::StringRef Value) {
+          Arena.Reset();
+          llvm::yaml::Input Yin(Value, &Arena);
+          auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena);
+          clang::clangd::SymbolID ID;
+          Key >> ID;
+          if (const auto *Existing = UniqueSymbols.find(ID))
+            UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
+          else
+            UniqueSymbols.insert(Sym);
+        });
+    return std::move(UniqueSymbols).build();
+  }
+
+private:
+  ToolExecutor &Executor;
+};
+
+/// Merges symbols for each translation unit as soon as the file is processed.
+/// Optimal choice for standalone tools.
+class OnTheFlyConsumer : public SymbolsConsumer {
+public:
+  void consumeSymbols(SymbolSlab Symbols) override {
+    std::lock_guard<std::mutex> Lock(Mut);
+    for (auto &&Sym : Symbols) {
+      Symbol::Details Scratch;
+      if (const auto *Existing = Result.find(Sym.ID))
+        Result.insert(mergeSymbol(*Existing, Sym, &Scratch));
+      else
+        Result.insert(Sym);
+    }
+  }
+
+  SymbolSlab mergeResults() override {
+    std::lock_guard<std::mutex> Lock(Mut);
+    return std::move(Result).build();
+  }
+
+private:
+  std::mutex Mut;
+  SymbolSlab::Builder Result;
+};
 
 } // namespace
 } // namespace clangd
@@ -181,18 +244,28 @@
     return 1;
   }
 
+  if (clang::clangd::MergeOnTheFly && !Executor->get()->isSingleProcess()) {
+    llvm::errs()
+        << "Found multi-process executor, forcing the use of intermediate YAML "
+           "serialization instead of the on-the-fly merge.\n";
+    clang::clangd::MergeOnTheFly = false;
+  }
+
+  std::unique_ptr<clang::clangd::SymbolsConsumer> Consumer;
+  if (clang::clangd::MergeOnTheFly)
+    Consumer = llvm::make_unique<clang::clangd::OnTheFlyConsumer>();
+  else
+    Consumer =
+        llvm::make_unique<clang::clangd::ToolExecutorConsumer>(**Executor);
+
   // Map phase: emit symbols found in each translation unit.
   auto Err = Executor->get()->execute(
-      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
-          Executor->get()->getExecutionContext()));
+      llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(*Consumer));
   if (Err) {
     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
   }
-
-  // Reduce phase: combine symbols using the ID as a key.
-  auto UniqueSymbols =
-      clang::clangd::mergeSymbols(Executor->get()->getToolResults());
-
+  // Reduce phase: combine symbols with the same IDs.
+  auto UniqueSymbols = Consumer->mergeResults();
   // Output phase: emit YAML for result symbols.
   SymbolsToYAML(UniqueSymbols, llvm::outs());
   return 0;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to