dang created this revision.
dang added a reviewer: Bigcheese.
Herald added subscribers: cfe-commits, dexonsmith.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.
dang edited the summary of this revision.

This places the control block after the AST block in PCMs so that the AST block
can be more easily hashed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79993

Files:
  clang/include/clang/Serialization/ASTWriter.h
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp

Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -10,14 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/OpenMPClause.h"
-#include "clang/Serialization/ASTRecordWriter.h"
 #include "ASTCommon.h"
 #include "ASTReaderInternals.h"
 #include "MultiOnDiskHashTable.h"
-#include "clang/AST/AbstractTypeWriter.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/AbstractTypeWriter.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
@@ -31,6 +29,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/LambdaCapture.h"
 #include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OpenMPClause.h"
 #include "clang/AST/RawCommentList.h"
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
@@ -66,6 +65,7 @@
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/Weak.h"
 #include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/ASTRecordWriter.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "clang/Serialization/ModuleFile.h"
 #include "clang/Serialization/ModuleFileExtension.h"
@@ -80,6 +80,7 @@
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -1138,9 +1139,6 @@
   }
 
   if (WritingModule && WritingModule->Directory) {
-    SmallString<128> BaseDir(WritingModule->Directory->getName());
-    cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir);
-
     // If the home of the module is the current working directory, then we
     // want to pick up the cwd of the build process loading the module, not
     // our cwd, when we load this module.
@@ -1155,14 +1153,8 @@
       unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
 
       RecordData::value_type Record[] = {MODULE_DIRECTORY};
-      Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
+      Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDirectory);
     }
-
-    // Write out all other paths relative to the base directory if possible.
-    BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
-  } else if (!isysroot.empty()) {
-    // Write out paths relative to the sysroot if possible.
-    BaseDirectory = std::string(isysroot);
   }
 
   // Module map file
@@ -1388,23 +1380,8 @@
   Stream.ExitBlock();
 }
 
-namespace  {
-
-/// An input file.
-struct InputFileEntry {
-  const FileEntry *File;
-  bool IsSystemFile;
-  bool IsTransient;
-  bool BufferOverridden;
-  bool IsTopLevelModuleMap;
-  uint32_t ContentHash[2];
-};
-
-} // namespace
-
 void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
-                                HeaderSearchOptions &HSOpts,
-                                bool Modules) {
+                                HeaderSearchOptions &HSOpts, bool Modules) {
   using namespace llvm;
 
   Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
@@ -1428,67 +1405,13 @@
   IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev));
 
-  // Get all ContentCache objects for files, sorted by whether the file is a
-  // system one or not. System files go at the back, users files at the front.
-  std::deque<InputFileEntry> SortedFiles;
-  for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) {
-    // Get this source location entry.
-    const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
-    assert(&SourceMgr.getSLocEntry(FileID::get(I)) == SLoc);
-
-    // We only care about file entries that were not overridden.
-    if (!SLoc->isFile())
-      continue;
-    const SrcMgr::FileInfo &File = SLoc->getFile();
-    const SrcMgr::ContentCache *Cache = File.getContentCache();
-    if (!Cache->OrigEntry)
-      continue;
-
-    InputFileEntry Entry;
-    Entry.File = Cache->OrigEntry;
-    Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
-    Entry.IsTransient = Cache->IsTransient;
-    Entry.BufferOverridden = Cache->BufferOverridden;
-    Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
-                                File.getIncludeLoc().isInvalid();
-
-    auto ContentHash = hash_code(-1);
-    if (PP->getHeaderSearchInfo()
-            .getHeaderSearchOpts()
-            .ValidateASTInputFilesContent) {
-      auto *MemBuff = Cache->getRawBuffer();
-      if (MemBuff)
-        ContentHash = hash_value(MemBuff->getBuffer());
-      else
-        // FIXME: The path should be taken from the FileEntryRef.
-        PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
-            << Entry.File->getName();
-    }
-    auto CH = llvm::APInt(64, ContentHash);
-    Entry.ContentHash[0] =
-        static_cast<uint32_t>(CH.getLoBits(32).getZExtValue());
-    Entry.ContentHash[1] =
-        static_cast<uint32_t>(CH.getHiBits(32).getZExtValue());
-
-    if (Entry.IsSystemFile)
-      SortedFiles.push_back(Entry);
-    else
-      SortedFiles.push_front(Entry);
-  }
-
   unsigned UserFilesNum = 0;
   // Write out all of the input files.
   std::vector<uint64_t> InputFileOffsets;
-  for (const auto &Entry : SortedFiles) {
-    uint32_t &InputFileID = InputFileIDs[Entry.File];
-    if (InputFileID != 0)
-      continue; // already recorded this file.
-
+  for (const auto &Entry : SortedInputFiles) {
     // Record this entry's offset.
     InputFileOffsets.push_back(Stream.GetCurrentBitNo());
 
-    InputFileID = InputFileOffsets.size();
-
     if (!Entry.IsSystemFile)
       ++UserFilesNum;
 
@@ -1497,7 +1420,7 @@
     {
       RecordData::value_type Record[] = {
           INPUT_FILE,
-          InputFileOffsets.size(),
+          InputFileIDs[Entry.File],
           (uint64_t)Entry.File->getSize(),
           (uint64_t)getTimestampForOutput(Entry.File),
           Entry.BufferOverridden,
@@ -2473,6 +2396,70 @@
   return SubmoduleIDs[Mod] = NextSubmoduleID++;
 }
 
+void ASTWriter::populateInputFileIDs(SourceManager &SourceMgr) {
+  for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) {
+    // Get this source location entry.
+    const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
+    assert(&SourceMgr.getSLocEntry(FileID::get(I)) == SLoc);
+
+    // We only care about file entries that were not overridden.
+    if (!SLoc->isFile())
+      continue;
+    const SrcMgr::FileInfo &File = SLoc->getFile();
+    const SrcMgr::ContentCache *Cache = File.getContentCache();
+    if (!Cache->OrigEntry)
+      continue;
+
+    InputFileEntry Entry;
+    Entry.File = Cache->OrigEntry;
+    Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
+    Entry.IsTransient = Cache->IsTransient;
+    Entry.BufferOverridden = Cache->BufferOverridden;
+    Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
+                                File.getIncludeLoc().isInvalid();
+
+    auto ContentHash = llvm::hash_code(-1);
+    if (PP->getHeaderSearchInfo()
+            .getHeaderSearchOpts()
+            .ValidateASTInputFilesContent) {
+      auto *MemBuff = Cache->getRawBuffer();
+      if (MemBuff)
+        ContentHash = hash_value(MemBuff->getBuffer());
+      else
+        // FIXME: The path should be taken from the FileEntryRef.
+        PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
+            << Entry.File->getName();
+    }
+    auto CH = llvm::APInt(64, ContentHash);
+    Entry.ContentHash[0] =
+        static_cast<uint32_t>(CH.getLoBits(32).getZExtValue());
+    Entry.ContentHash[1] =
+        static_cast<uint32_t>(CH.getHiBits(32).getZExtValue());
+
+    if (Entry.IsSystemFile)
+      SortedInputFiles.push_back(Entry);
+    else
+      SortedInputFiles.push_front(Entry);
+  }
+
+  llvm::SmallPtrSet<const FileEntry *, 32> SeenFileEntries;
+  auto NewEnd =
+      std::remove_if(SortedInputFiles.begin(), SortedInputFiles.end(),
+                     [&SeenFileEntries](const InputFileEntry &Entry) {
+                       return !SeenFileEntries.insert(Entry.File).second;
+                     });
+  SortedInputFiles.erase(NewEnd, SortedInputFiles.end());
+
+  uint32_t NextID = 1;
+  for (const auto &Entry : SortedInputFiles) {
+    uint32_t &InputFileID = InputFileIDs[Entry.File];
+    if (InputFileID != 0)
+      continue; // already generated an ID for this file.
+
+    InputFileID = NextID++;
+  }
+}
+
 unsigned ASTWriter::getSubmoduleID(Module *Mod) {
   // FIXME: This can easily happen, if we have a reference to a submodule that
   // did not result in us loading a module file for that submodule. For
@@ -4548,8 +4535,17 @@
     }
   }
 
-  // Write the control block
-  WriteControlBlock(PP, Context, isysroot, OutputFile);
+  if (WritingModule && WritingModule->Directory) {
+    SmallString<128> BaseDir(WritingModule->Directory->getName());
+    cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir);
+    // Write out all other paths relative to the base directory if possible.
+    BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
+  } else if (!isysroot.empty()) {
+    // Write out paths relative to the sysroot if possible.
+    BaseDirectory = std::string(isysroot);
+  }
+
+  populateInputFileIDs(Context.SourceMgr);
 
   // Write the remaining AST contents.
   Stream.EnterSubblock(AST_BLOCK_ID, 5);
@@ -4914,6 +4910,9 @@
   Stream.EmitRecord(STATISTICS, Record);
   Stream.ExitBlock();
 
+  // Write the control block
+  WriteControlBlock(PP, Context, isysroot, OutputFile);
+
   // Write the module file extension blocks.
   for (const auto &ExtWriter : ModuleFileExtensionWriters)
     WriteModuleFileExtension(SemaRef, *ExtWriter);
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -2568,11 +2568,6 @@
                             unsigned ClientLoadCapabilities) {
   BitstreamCursor &Stream = F.Stream;
 
-  if (llvm::Error Err = Stream.EnterSubBlock(CONTROL_BLOCK_ID)) {
-    Error(std::move(Err));
-    return Failure;
-  }
-
   // Lambda to read the unhashed control block the first time it's called.
   //
   // For PCM files, the unhashed control block cannot be read until after the
@@ -4549,8 +4544,45 @@
     return Failure;
   }
 
-  // This is used for compatibility with older PCH formats.
-  bool HaveReadControlBlock = false;
+  {
+    // The control block needs to be read first but the AST block comes before
+    // it in the PCM, so lookahead to the control block and read it now.
+    BitstreamCursor &PreviousCursor = F.Stream;
+    SavedStreamPosition SavedPosition(PreviousCursor);
+    if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID))
+      return Failure;
+    switch (ReadControlBlock(F, Loaded, ImportedBy, ClientLoadCapabilities)) {
+    case Success:
+      // Check that we didn't try to load a non-module AST file as a module.
+      //
+      // FIXME: Should we also perform the converse check? Loading a module as
+      // a PCH file sort of works, but it's a bit wonky.
+      if ((Type == MK_ImplicitModule || Type == MK_ExplicitModule ||
+           Type == MK_PrebuiltModule) &&
+          F.ModuleName.empty()) {
+        auto Result = (Type == MK_ImplicitModule) ? OutOfDate : Failure;
+        if (Result != OutOfDate ||
+            (ClientLoadCapabilities & ARR_OutOfDate) == 0)
+          Diag(diag::err_module_file_not_module) << FileName;
+        return Result;
+      }
+      break;
+
+    case Failure:
+      return Failure;
+    case Missing:
+      return Missing;
+    case OutOfDate:
+      return OutOfDate;
+    case VersionMismatch:
+      return VersionMismatch;
+    case ConfigurationMismatch:
+      return ConfigurationMismatch;
+    case HadErrors:
+      return HadErrors;
+    }
+  }
+
   while (true) {
     Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
     if (!MaybeEntry) {
@@ -4571,41 +4603,7 @@
     }
 
     switch (Entry.ID) {
-    case CONTROL_BLOCK_ID:
-      HaveReadControlBlock = true;
-      switch (ReadControlBlock(F, Loaded, ImportedBy, ClientLoadCapabilities)) {
-      case Success:
-        // Check that we didn't try to load a non-module AST file as a module.
-        //
-        // FIXME: Should we also perform the converse check? Loading a module as
-        // a PCH file sort of works, but it's a bit wonky.
-        if ((Type == MK_ImplicitModule || Type == MK_ExplicitModule ||
-             Type == MK_PrebuiltModule) &&
-            F.ModuleName.empty()) {
-          auto Result = (Type == MK_ImplicitModule) ? OutOfDate : Failure;
-          if (Result != OutOfDate ||
-              (ClientLoadCapabilities & ARR_OutOfDate) == 0)
-            Diag(diag::err_module_file_not_module) << FileName;
-          return Result;
-        }
-        break;
-
-      case Failure: return Failure;
-      case Missing: return Missing;
-      case OutOfDate: return OutOfDate;
-      case VersionMismatch: return VersionMismatch;
-      case ConfigurationMismatch: return ConfigurationMismatch;
-      case HadErrors: return HadErrors;
-      }
-      break;
-
     case AST_BLOCK_ID:
-      if (!HaveReadControlBlock) {
-        if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
-          Diag(diag::err_pch_version_too_old);
-        return VersionMismatch;
-      }
-
       // Record that we've loaded this module.
       Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc));
       ShouldFinalizePCM = true;
Index: clang/include/clang/Serialization/ASTWriter.h
===================================================================
--- clang/include/clang/Serialization/ASTWriter.h
+++ clang/include/clang/Serialization/ASTWriter.h
@@ -443,6 +443,23 @@
   std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
     ModuleFileExtensionWriters;
 
+  /// An input file.
+  struct InputFileEntry {
+    const FileEntry *File;
+    bool IsSystemFile;
+    bool IsTransient;
+    bool BufferOverridden;
+    bool IsTopLevelModuleMap;
+    uint32_t ContentHash[2];
+  };
+
+  /// All the ContentCache objects for files, sorted by whether the file is a
+  /// system one or not. System files go at the back, user files at the front.
+  std::deque<InputFileEntry> SortedInputFiles;
+
+  /// Generate IDs for the input files
+  void populateInputFileIDs(SourceManager &SourceMgr);
+
   /// Retrieve or create a submodule ID for this module.
   unsigned getSubmoduleID(Module *Mod);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D79993: Place cont... Daniel Grumberg via Phabricator via cfe-commits

Reply via email to