llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangd

@llvm/pr-subscribers-clang-tools-extra

Author: Jason Williams (jasonwilliams)

<details>
<summary>Changes</summary>

Add support for generating background index shards directly from 
clangd-indexer, enabling offline pre-indexing of projects for use with clangd's 
background index.

Fixes https://github.com/clangd/clangd/issues/587

## Motivation:

Currently, clangd's background index builds shards incrementally as files are 
opened or on a background thread in the IDE while you're working. For large 
projects, this can take significant time before the index is fully populated. 
This change allows generating all index shards upfront using `clangd-indexer`, 
which can then be used by clangd on startup.

## Changes:

- Add --format=background option to clangd-indexer that writes per-file index 
shards to .cache/clangd/index/ instead of a single merged index to stdout
- Add --project-root option to specify where shards are stored (defaults to 
current directory)
- Add `BackgroundIndexActionFactory` class that writes shards using 
`BackgroundIndexStorage` after processing each translation unit
- Add `IndexFileFormat::BACKGROUND` enum value to Serialization.h


## Usage:
- Generate background index shards for a project
- `clangd-indexer --format=background --executor=all-TUs build/`
- Shards are written to ./.cache/clangd/index/
- clangd will automatically load these shards on startup

---
Full diff: https://github.com/llvm/llvm-project/pull/175209.diff


2 Files Affected:

- (modified) clang-tools-extra/clangd/index/Serialization.h (+3-2) 
- (modified) clang-tools-extra/clangd/indexer/IndexerMain.cpp (+188-17) 


``````````diff
diff --git a/clang-tools-extra/clangd/index/Serialization.h 
b/clang-tools-extra/clangd/index/Serialization.h
index bf8e036afcb6c..1553e702a5881 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -35,8 +35,9 @@ namespace clang {
 namespace clangd {
 
 enum class IndexFileFormat {
-  RIFF, // Versioned binary format, suitable for production use.
-  YAML, // Human-readable format, suitable for experiments and debugging.
+  RIFF,      // Versioned binary format, suitable for production use.
+  YAML,      // Human-readable format, suitable for experiments and debugging.
+  BACKGROUND // Background index format, suitable for language server use.
 };
 
 // Holds the contents of an index file that was read.
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp 
b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index bc5d1a7408991..5c9e540fee0ba 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -12,6 +12,8 @@
 
 #include "CompileCommands.h"
 #include "Compiler.h"
+#include "GlobalCompilationDatabase.h"
+#include "index/Background.h"
 #include "index/IndexAction.h"
 #include "index/Merge.h"
 #include "index/Ref.h"
@@ -30,13 +32,14 @@ namespace clang {
 namespace clangd {
 namespace {
 
-static llvm::cl::opt<IndexFileFormat>
-    Format("format", llvm::cl::desc("Format of the index to be written"),
-           llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
-                                       "human-readable YAML format"),
-                            clEnumValN(IndexFileFormat::RIFF, "binary",
-                                       "binary RIFF format")),
-           llvm::cl::init(IndexFileFormat::RIFF));
+static llvm::cl::opt<IndexFileFormat> Format(
+    "format", llvm::cl::desc("Format of the index to be written"),
+    llvm::cl::values(
+        clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML 
format"),
+        clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"),
+        clEnumValN(IndexFileFormat::BACKGROUND, "background",
+                   "background index format for language servers")),
+    llvm::cl::init(IndexFileFormat::RIFF));
 
 static llvm::cl::list<std::string> QueryDriverGlobs{
     "query-driver",
@@ -48,6 +51,16 @@ static llvm::cl::list<std::string> QueryDriverGlobs{
     llvm::cl::CommaSeparated,
 };
 
+static llvm::cl::opt<std::string> ProjectRoot{
+    "project-root",
+    llvm::cl::desc(
+        "Path to the project root for --format=background. "
+        "Determines where to store index shards. Shards are stored in "
+        "<project-root>/.cache/clangd/index/. "
+        "Defaults to current directory if not specified."),
+};
+
+// Action factory that merges all symbols into a single index (for YAML/RIFF).
 class IndexActionFactory : public tooling::FrontendActionFactory {
 public:
   IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
@@ -123,6 +136,117 @@ class IndexActionFactory : public 
tooling::FrontendActionFactory {
   RelationSlab::Builder Relations;
 };
 
+// Action factory that writes per-file shards (for background index format).
+class BackgroundIndexActionFactory : public tooling::FrontendActionFactory {
+public:
+  BackgroundIndexActionFactory(BackgroundIndexStorage &Storage)
+      : Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()),
+        Refs(std::make_unique<RefSlab::Builder>()),
+        Relations(std::make_unique<RelationSlab::Builder>()) {}
+
+  std::unique_ptr<FrontendAction> create() override {
+    SymbolCollector::Options Opts;
+    Opts.CountReferences = true;
+    Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
+      const auto F = SM.getFileEntryRefForID(FID);
+      if (!F)
+        return false;
+      auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
+      if (!AbsPath)
+        return false;
+      std::lock_guard<std::mutex> Lock(FilesMu);
+      return Files.insert(*AbsPath).second;
+    };
+    return createStaticIndexingAction(
+        Opts,
+        [&](SymbolSlab S) {
+          std::lock_guard<std::mutex> Lock(SymbolsMu);
+          for (const auto &Sym : S) {
+            if (const auto *Existing = Symbols->find(Sym.ID))
+              Symbols->insert(mergeSymbol(*Existing, Sym));
+            else
+              Symbols->insert(Sym);
+          }
+        },
+        [&](RefSlab S) {
+          std::lock_guard<std::mutex> Lock(RefsMu);
+          for (const auto &Sym : S) {
+            for (const auto &Ref : Sym.second)
+              Refs->insert(Sym.first, Ref);
+          }
+        },
+        [&](RelationSlab S) {
+          std::lock_guard<std::mutex> Lock(RelsMu);
+          for (const auto &R : S)
+            Relations->insert(R);
+        },
+        /*IncludeGraphCallback=*/nullptr);
+  }
+
+  bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+                     FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagConsumer) override {
+    disableUnsupportedOptions(*Invocation);
+
+    // Get the main file path before running.
+    std::string MainFile;
+    if (!Invocation->getFrontendOpts().Inputs.empty())
+      MainFile = Invocation->getFrontendOpts().Inputs[0].getFile().str();
+
+    bool Success = tooling::FrontendActionFactory::runInvocation(
+        std::move(Invocation), Files, std::move(PCHContainerOps), 
DiagConsumer);
+
+    // After processing, write a shard for this file.
+    if (Success && !MainFile.empty())
+      writeShardForFile(MainFile);
+
+    return Success;
+  }
+
+private:
+  void writeShardForFile(llvm::StringRef MainFile) {
+    IndexFileIn Data;
+    {
+      std::lock_guard<std::mutex> Lock(SymbolsMu);
+      Data.Symbols = std::move(*Symbols).build();
+      Symbols = std::make_unique<SymbolSlab::Builder>();
+    }
+    {
+      std::lock_guard<std::mutex> Lock(RefsMu);
+      Data.Refs = std::move(*Refs).build();
+      Refs = std::make_unique<RefSlab::Builder>();
+    }
+    {
+      std::lock_guard<std::mutex> Lock(RelsMu);
+      Data.Relations = std::move(*Relations).build();
+      Relations = std::make_unique<RelationSlab::Builder>();
+    }
+
+    IndexFileOut Out(Data);
+    Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format.
+
+    if (auto Err = Storage.storeShard(MainFile, Out)) {
+      elog("Failed to write shard for {0}: {1}", MainFile, std::move(Err));
+    } else {
+      std::lock_guard<std::mutex> Lock(FilesMu);
+      ++ShardsWritten;
+      log("Wrote shard for {0} ({1} total)", MainFile, ShardsWritten);
+    }
+  }
+
+  BackgroundIndexStorage &Storage;
+  std::mutex FilesMu;
+  llvm::StringSet<> Files;
+  unsigned ShardsWritten = 0;
+  std::mutex SymbolsMu;
+  std::unique_ptr<SymbolSlab::Builder> Symbols;
+  std::mutex RefsMu;
+  std::unique_ptr<RefSlab::Builder> Refs;
+  std::mutex RelsMu;
+  std::unique_ptr<RelationSlab::Builder> Relations;
+};
+
 } // namespace
 } // namespace clangd
 } // namespace clang
@@ -141,6 +265,13 @@ int main(int argc, const char **argv) {
 
   $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
 
+  Example usage for background index format (writes shards to disk):
+
+  $ clangd-indexer --format=background --executor=all-TUs build/
+
+  This writes index shards to .cache/clangd/index/ in the current directory.
+  Use --project-root to specify a different location for the shards.
+
   Note: only symbols from header files will be indexed.
   )";
 
@@ -152,23 +283,63 @@ int main(int argc, const char **argv) {
     return 1;
   }
 
-  // Collect symbols found in each translation unit, merging as we go.
-  clang::clangd::IndexFileIn Data;
   auto Mangler = std::make_shared<clang::clangd::CommandMangler>(
       clang::clangd::CommandMangler::detect());
   Mangler->SystemIncludeExtractor = clang::clangd::getSystemIncludeExtractor(
       static_cast<llvm::ArrayRef<std::string>>(
           clang::clangd::QueryDriverGlobs));
+
+  auto Adjuster = clang::tooling::ArgumentsAdjuster(
+      [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
+                                     llvm::StringRef File) {
+        clang::tooling::CompileCommand Cmd;
+        Cmd.CommandLine = Args;
+        Mangler->operator()(Cmd, File);
+        return Cmd.CommandLine;
+      });
+
+  // Handle background index format separately - writes per-file shards.
+  if (clang::clangd::Format == clang::clangd::IndexFileFormat::BACKGROUND) {
+    // Default to current directory if --project-root not specified.
+    std::string Root = clang::clangd::ProjectRoot;
+    if (Root.empty()) {
+      llvm::SmallString<256> CurrentDir;
+      if (auto EC = llvm::sys::fs::current_path(CurrentDir)) {
+        llvm::errs() << "Error: Failed to get current directory: "
+                     << EC.message() << "\n";
+        return 1;
+      }
+      Root = std::string(CurrentDir);
+    }
+
+    // Create storage factory for disk-backed index shards.
+    auto IndexStorageFactory =
+        clang::clangd::BackgroundIndexStorage::createDiskBackedStorageFactory(
+            [Root](clang::clangd::PathRef) {
+              return clang::clangd::ProjectInfo{Root};
+            });
+
+    // Get storage for the project root.
+    clang::clangd::BackgroundIndexStorage *Storage = IndexStorageFactory(Root);
+
+    auto Err = Executor->get()->execute(
+        
std::make_unique<clang::clangd::BackgroundIndexActionFactory>(*Storage),
+        std::move(Adjuster));
+    if (Err) {
+      clang::clangd::elog("{0}", std::move(Err));
+      return 1;
+    }
+
+    llvm::errs() << "Background index shards written to " << Root
+                 << "/.cache/clangd/index/\n";
+    return 0;
+  }
+
+  // Standard mode: collect and merge symbols, then emit to stdout.
+  clang::clangd::IndexFileIn Data;
   auto Err = Executor->get()->execute(
       std::make_unique<clang::clangd::IndexActionFactory>(Data),
-      clang::tooling::ArgumentsAdjuster(
-          [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
-                                         llvm::StringRef File) {
-            clang::tooling::CompileCommand Cmd;
-            Cmd.CommandLine = Args;
-            Mangler->operator()(Cmd, File);
-            return Cmd.CommandLine;
-          }));
+      std::move(Adjuster));
   if (Err) {
     clang::clangd::elog("{0}", std::move(Err));
   }

``````````

</details>


https://github.com/llvm/llvm-project/pull/175209
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to