kadircet updated this revision to Diff 175412.
kadircet added a comment.

- Rebase to get changes in background index.


Repository:
  rCTE Clang Tools Extra

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D54817/new/

https://reviews.llvm.org/D54817

Files:
  clangd/index/Background.cpp
  clangd/index/Background.h
  clangd/index/Serialization.cpp
  clangd/index/Serialization.h
  unittests/clangd/BackgroundIndexTests.cpp
  unittests/clangd/SerializationTests.cpp

Index: unittests/clangd/SerializationTests.cpp
===================================================================
--- unittests/clangd/SerializationTests.cpp
+++ unittests/clangd/SerializationTests.cpp
@@ -173,31 +173,44 @@
               UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
 }
 
-TEST(SerializationTest, HashTest) {
+TEST(SerializationTest, SrcsTest) {
   auto In = readIndexFile(YAML);
   EXPECT_TRUE(bool(In)) << In.takeError();
 
-  std::string TestContent("TESTCONTENT");
-  auto Digest =
+  std::string TestContent("TestContent");
+  IndexFileIn::SourceFile SF;
+  SF.Digest =
       llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
                         TestContent.size()});
+  SF.DirectIncludes = {"inc1", "inc2"};
+  SF.FileURI = "FileURI";
+  SF.IsTU = true;
+  llvm::StringMap<IndexFileIn::SourceFile> Sources;
+  Sources[SF.FileURI] = SF;
   // Write to binary format, and parse again.
   IndexFileOut Out(*In);
   Out.Format = IndexFileFormat::RIFF;
-  Out.Digest = &Digest;
-  std::string Serialized = to_string(Out);
-
-  auto In2 = readIndexFile(Serialized);
-  ASSERT_TRUE(bool(In2)) << In.takeError();
-  ASSERT_EQ(In2->Digest, Digest);
-  ASSERT_TRUE(In2->Symbols);
-  ASSERT_TRUE(In2->Refs);
-
-  // Assert the YAML serializations match, for nice comparisons and diffs.
-  EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
-              UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
-  EXPECT_THAT(YAMLFromRefs(*In2->Refs),
-              UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+  Out.Sources = &Sources;
+  {
+    std::string Serialized = to_string(Out);
+
+    auto In = readIndexFile(Serialized);
+    ASSERT_TRUE(bool(In)) << In.takeError();
+    ASSERT_TRUE(In->Symbols);
+    ASSERT_TRUE(In->Refs);
+    ASSERT_TRUE(In->Sources);
+    ASSERT_TRUE(In->Sources->count(SF.FileURI));
+    // Assert the YAML serializations match, for nice comparisons and diffs.
+    EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
+                UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
+    EXPECT_THAT(YAMLFromRefs(*In->Refs),
+                UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+    auto SFDeserialized = In->Sources->lookup(SF.FileURI);
+    EXPECT_EQ(SFDeserialized.Digest, SF.Digest);
+    EXPECT_EQ(SFDeserialized.DirectIncludes, SF.DirectIncludes);
+    EXPECT_EQ(SFDeserialized.FileURI, SF.FileURI);
+    EXPECT_EQ(SFDeserialized.IsTU, SF.IsTU);
+  }
 }
 
 } // namespace
Index: unittests/clangd/BackgroundIndexTests.cpp
===================================================================
--- unittests/clangd/BackgroundIndexTests.cpp
+++ unittests/clangd/BackgroundIndexTests.cpp
@@ -160,7 +160,51 @@
   EXPECT_NE(ShardSource, nullptr);
   EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
   EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
-  EXPECT_EQ(*ShardSource->Digest, Digest);
+  EXPECT_EQ(ShardSource->Sources->lookup("unittest:///root/A.cc").Digest,
+            Digest);
+}
+
+TEST(BackgroundIndexTest, DirectIncludesTest) {
+  MockFSProvider FS;
+  FS.Files[testPath("root/B.h")] = "";
+  FS.Files[testPath("root/A.h")] = R"cpp(
+      #include "B.h"
+      void common();
+      void f_b();
+      class A_CC {};
+      )cpp";
+  std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }";
+  FS.Files[testPath("root/A.cc")] = A_CC;
+
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  MemoryShardStorage MSS(Storage, CacheHits);
+
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = testPath("root/A.cc");
+  Cmd.Directory = testPath("root");
+  Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+  {
+    OverlayCDB CDB(/*Base=*/nullptr);
+    BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+                        [&](llvm::StringRef) { return &MSS; });
+    CDB.setCompileCommand(testPath("root"), Cmd);
+    ASSERT_TRUE(Idx.blockUntilIdleForTest());
+  }
+
+  auto ShardSource = MSS.loadShard(testPath("root/A.cc"));
+  EXPECT_TRUE(ShardSource->Sources);
+  EXPECT_EQ(ShardSource->Sources->size(), 3U); // A.cc, A.h, B.h
+  EXPECT_THAT(
+      ShardSource->Sources->lookup("unittest:///root/A.cc").DirectIncludes,
+      UnorderedElementsAre("unittest:///root/A.h"));
+
+  auto ShardHeader = MSS.loadShard(testPath("root/A.h"));
+  EXPECT_TRUE(ShardHeader->Sources);
+  EXPECT_EQ(ShardHeader->Sources->size(), 2U); // A.h B.h
+  EXPECT_THAT(
+      ShardHeader->Sources->lookup("unittest:///root/A.h").DirectIncludes,
+      UnorderedElementsAre("unittest:///root/B.h"));
 }
 
 } // namespace clangd
Index: clangd/index/Serialization.h
===================================================================
--- clangd/index/Serialization.h
+++ clangd/index/Serialization.h
@@ -38,10 +38,19 @@
 // Holds the contents of an index file that was read.
 struct IndexFileIn {
   using FileDigest = std::array<uint8_t, 20>;
+  // Contains information about one source file that participates in current
+  // index file.
+  struct SourceFile {
+    bool IsTU;
+    llvm::StringRef FileURI;
+    FileDigest Digest;
+    std::vector<llvm::StringRef> DirectIncludes;
+  };
+
   llvm::Optional<SymbolSlab> Symbols;
   llvm::Optional<RefSlab> Refs;
-  // Digest of the source file that generated the contents.
-  llvm::Optional<FileDigest> Digest;
+  // Keys are URIs of the source files.
+  llvm::Optional<llvm::StringMap<SourceFile>> Sources;
 };
 // Parse an index file. The input must be a RIFF or YAML file.
 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
@@ -50,8 +59,9 @@
 struct IndexFileOut {
   const SymbolSlab *Symbols = nullptr;
   const RefSlab *Refs = nullptr;
-  // Digest of the source file that generated the contents.
-  const IndexFileIn::FileDigest *Digest = nullptr;
+  // Information about the source files participated in current index file.
+  // Keys are URIs of the source files.
+  const llvm::StringMap<IndexFileIn::SourceFile> *Sources = nullptr;
   // TODO: Support serializing Dex posting lists.
   IndexFileFormat Format = IndexFileFormat::RIFF;
 
Index: clangd/index/Serialization.cpp
===================================================================
--- clangd/index/Serialization.cpp
+++ clangd/index/Serialization.cpp
@@ -247,6 +247,32 @@
   return Loc;
 }
 
+IndexFileIn::SourceFile
+readSourceFile(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
+  IndexFileIn::SourceFile SF;
+  SF.IsTU = Data.consume8();
+  SF.FileURI = Data.consumeString(Strings);
+  llvm::StringRef Digest = Data.consume(SF.Digest.size());
+  std::copy(Digest.bytes_begin(), Digest.bytes_end(), SF.Digest.begin());
+  SF.DirectIncludes.resize(Data.consumeVar());
+
+  for (llvm::StringRef& Include : SF.DirectIncludes)
+    Include = Data.consumeString(Strings);
+  return SF;
+}
+
+void writeSourceFile(const IndexFileIn::SourceFile &SF,
+                     const StringTableOut &Strings, raw_ostream &OS) {
+  OS.write(SF.IsTU);
+  writeVar(Strings.index(SF.FileURI), OS);
+  llvm::StringRef Hash(reinterpret_cast<const char *>(SF.Digest.data()),
+                       SF.Digest.size());
+  OS << Hash;
+  writeVar(SF.DirectIncludes.size(), OS);
+  for (llvm::StringRef Include : SF.DirectIncludes)
+    writeVar(Strings.index(Include), OS);
+}
+
 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
                  raw_ostream &OS) {
   OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
@@ -333,7 +359,7 @@
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
 //   - meta: version number
-//   - srcs: checksum of the source file
+//   - srcs: information related to source files
 //   - stri: string table
 //   - symb: symbols
 //   - refs: references to symbols
@@ -367,10 +393,18 @@
 
   IndexFileIn Result;
   if (Chunks.count("srcs")) {
-    Reader Hash(Chunks.lookup("srcs"));
-    Result.Digest.emplace();
-    llvm::StringRef Digest = Hash.consume(Result.Digest->size());
-    std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin());
+    Reader SrcsReader(Chunks.lookup("srcs"));
+    Result.Sources.emplace();
+    while (!SrcsReader.eof()) {
+      auto SF = readSourceFile(SrcsReader, Strings->Strings);
+      auto Entry = Result.Sources->try_emplace(SF.FileURI).first;
+      Entry->getValue() = std::move(SF);
+      Entry->getValue().FileURI = Entry->getKey();
+      for(auto &Include : Entry->getValue().DirectIncludes)
+        Include = Result.Sources->try_emplace(Include).first->getKey();
+    }
+    if (SrcsReader.err())
+      return makeError("malformed or truncated include uri");
   }
 
   if (Chunks.count("symb")) {
@@ -397,6 +431,13 @@
   return std::move(Result);
 }
 
+template <class Callback>
+void visitStrings(IndexFileIn::SourceFile &SF, const Callback &CB) {
+  CB(SF.FileURI);
+  for (llvm::StringRef &Include : SF.DirectIncludes)
+    CB(Include);
+}
+
 void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
   assert(Data.Symbols && "An index file without symbols makes no sense!");
   riff::File RIFF;
@@ -409,18 +450,19 @@
   }
   RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
 
-  if (Data.Digest) {
-    llvm::StringRef Hash(reinterpret_cast<const char *>(Data.Digest->data()),
-                         Data.Digest->size());
-    RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash});
-  }
-
   StringTableOut Strings;
   std::vector<Symbol> Symbols;
   for (const auto &Sym : *Data.Symbols) {
     Symbols.emplace_back(Sym);
     visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
   }
+  std::vector<IndexFileIn::SourceFile> Sources;
+  if (Data.Sources)
+    for (const auto &Source : *Data.Sources) {
+      Sources.push_back(Source.getValue());
+      visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); });
+    }
+
   std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
   if (Data.Refs) {
     for (const auto &Sym : *Data.Refs) {
@@ -458,6 +500,16 @@
     RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
   }
 
+  std::string SrcsSection;
+  {
+    {
+      raw_string_ostream SrcsOS(SrcsSection);
+      for (const auto &SF : Sources)
+        writeSourceFile(SF, Strings, SrcsOS);
+    }
+    RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
+  }
+
   OS << RIFF;
 }
 
Index: clangd/index/Background.h
===================================================================
--- clangd/index/Background.h
+++ clangd/index/Background.h
@@ -88,7 +88,8 @@
 
 private:
   /// Given index results from a TU, only update files in \p FilesToUpdate.
-  void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
+  /// Also stores new index information on IndexStorage.
+  void update(llvm::StringRef MainFile, IndexFileIn Index,
               const llvm::StringMap<FileDigest> &FilesToUpdate,
               BackgroundIndexStorage *IndexStorage);
 
Index: clangd/index/Background.cpp
===================================================================
--- clangd/index/Background.cpp
+++ clangd/index/Background.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SHA1.h"
+#include "llvm/Support/ScopedPrinter.h"
 
 #include <memory>
 #include <numeric>
@@ -192,9 +193,36 @@
   llvm::StringMap<std::string> URIToPathCache;
 };
 
+llvm::StringMap<IndexFileIn::SourceFile>
+FilterSources(llvm::StringRef Path,
+              const llvm::StringMap<IndexFileIn::SourceFile> &FullSources) {
+  llvm::StringMap<IndexFileIn::SourceFile> Filtered;
+
+  std::string FileURI = URI::create(Path).toString();
+  std::queue<llvm::StringRef> Dependencies;
+  Dependencies.push(FileURI);
+  while (!Dependencies.empty()) {
+    llvm::StringRef Dependency = Dependencies.front();
+    Dependencies.pop();
+
+    auto Entry = Filtered.try_emplace(Dependency).first;
+    auto &SF = Entry->getValue();
+    SF = FullSources.lookup(Entry->getKey());
+    SF.FileURI = Entry->getKey();
+
+    for (auto &Include : SF.DirectIncludes) {
+      auto I = Filtered.try_emplace(Include);
+      Include = I.first->getKey();
+      if (I.second) // Insert each dependency only once.
+        Dependencies.push(Include);
+    }
+  }
+
+  return Filtered;
+}
+
 /// Given index results from a TU, only update files in \p FilesToUpdate.
-void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
-                             RefSlab Refs,
+void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index,
                              const StringMap<FileDigest> &FilesToUpdate,
                              BackgroundIndexStorage *IndexStorage) {
   // Partition symbols/references into files.
@@ -204,7 +232,7 @@
   };
   StringMap<File> Files;
   URIToFileCache URICache(MainFile);
-  for (const auto &Sym : Symbols) {
+  for (const auto &Sym : *Index.Symbols) {
     if (Sym.CanonicalDeclaration) {
       auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
       if (FilesToUpdate.count(DeclPath) != 0)
@@ -222,7 +250,7 @@
     }
   }
   DenseMap<const Ref *, SymbolID> RefToIDs;
-  for (const auto &SymRefs : Refs) {
+  for (const auto &SymRefs : *Index.Refs) {
     for (const auto &R : SymRefs.second) {
       auto Path = URICache.resolve(R.Location.FileURI);
       if (FilesToUpdate.count(Path) != 0) {
@@ -246,16 +274,21 @@
 
     auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
     auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
+    std::unique_ptr<llvm::StringMap<IndexFileIn::SourceFile>> Srcs;
 
     auto Hash = FilesToUpdate.lookup(Path);
     // We need to store shards before updating the index, since the latter
     // consumes slabs.
-    // FIXME: Store Hash in the Shard.
     if (IndexStorage) {
       IndexFileOut Shard;
       Shard.Symbols = SS.get();
       Shard.Refs = RS.get();
-      Shard.Digest = &Hash;
+      if (Index.Sources) {
+        Srcs = llvm::make_unique<llvm::StringMap<IndexFileIn::SourceFile>>(
+            FilterSources(Path, Index.Sources.getValue()));
+        Shard.Sources = Srcs.get();
+      }
+
       if (auto Error = IndexStorage->storeShard(Path, Shard))
         elog("Failed to write background-index shard for file {0}: {1}", Path,
              std::move(Error));
@@ -302,6 +335,43 @@
   };
 }
 
+llvm::StringMap<IndexFileIn::SourceFile>
+IncludeStructureToSources(llvm::StringRef MainFile,
+                          const IncludeStructure &Includes,
+                          llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
+  llvm::StringMap<IndexFileIn::SourceFile> Sources;
+
+  std::queue<std::string> Dependencies;
+  Dependencies.push(MainFile);
+  while (!Dependencies.empty()) {
+    std::string Dependency = std::move(Dependencies.front());
+    Dependencies.pop();
+
+    std::string FileURI = URI::create(Dependency).toString();
+    auto Buf = FS->getBufferForFile(Dependency);
+    if (!Buf)
+      continue;
+    auto Digest = digest(Buf->get()->getBuffer());
+
+    auto Entry = Sources.try_emplace(FileURI).first;
+    auto &SF = Entry->getValue();
+    SF.IsTU = Dependency == MainFile;
+    SF.FileURI = Entry->getKey();
+    SF.Digest = std::move(Digest);
+    for (const auto &Include : Includes.includeDepth(Dependency)) {
+      if (Include.getValue() != 1) // Skip transitive includes.
+        continue;
+      FileURI = URI::create(Include.getKey()).toString();
+      auto I = Sources.try_emplace(FileURI);
+      SF.DirectIncludes.push_back(I.first->getKey());
+      if (I.second) // Insert each dependency only once.
+        Dependencies.push(Include.getKey());
+    }
+  }
+
+  return Sources;
+}
+
 Error BackgroundIndex::index(tooling::CompileCommand Cmd,
                              BackgroundIndexStorage *IndexStorage) {
   trace::Span Tracer("BackgroundIndex");
@@ -367,16 +437,28 @@
   if (!Action->BeginSourceFile(*Clang, Input))
     return createStringError(inconvertibleErrorCode(),
                              "BeginSourceFile() failed");
+
+  IncludeStructure Includes;
+  Clang->getPreprocessor().addPPCallbacks(
+      collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
+
   if (!Action->Execute())
     return createStringError(inconvertibleErrorCode(), "Execute() failed");
   Action->EndSourceFile();
 
+  llvm::StringMap<IndexFileIn::SourceFile> Sources =
+      IncludeStructureToSources(AbsolutePath, Includes, Inputs.FS);
+
   log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
-         IndexStorage);
+  IndexFileIn Index;
+  Index.Symbols = std::move(Symbols);
+  Index.Refs = std::move(Refs);
+  Index.Sources = std::move(Sources);
+
+  update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
   {
     // Make sure hash for the main file is always updated even if there is no
     // index data in it.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to