jansvoboda11 created this revision.
jansvoboda11 added reviewers: Bigcheese, dexonsmith, arphaman.
Herald added a subscriber: hiraditya.
jansvoboda11 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

The minimizing and caching filesystem used by the dependency scanner keeps 
minimized and original files in separate caches.

This setup is not well suited for dealing with files that are sometimes 
minimized and sometimes not. Such files are being stat-ed and read twice, which 
is wasteful and also means the two versions of the file can get "out of sync".

This patch squashes the two caches together. When a file is stat-ed or read, 
its original contents are populated. If a file needs to be minimized, we give 
the minimizer the already loaded contents instead of reading the file again.

Depends on D115043 <https://reviews.llvm.org/D115043>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115346

Files:
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  llvm/include/llvm/Support/VirtualFileSystem.h
  llvm/lib/Support/VirtualFileSystem.cpp

Index: llvm/lib/Support/VirtualFileSystem.cpp
===================================================================
--- llvm/lib/Support/VirtualFileSystem.cpp
+++ llvm/lib/Support/VirtualFileSystem.cpp
@@ -75,6 +75,12 @@
     : Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group),
       Size(Size), Type(Type), Perms(Perms) {}
 
+Status Status::copyWithNewSize(const Status &In, uint64_t NewSize) {
+  return Status(In.getName(), In.getUniqueID(), In.getLastModificationTime(),
+                In.getUser(), In.getGroup(), NewSize, In.getType(),
+                In.getPermissions());
+}
+
 Status Status::copyWithNewName(const Status &In, const Twine &NewName) {
   return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
                 In.getUser(), In.getGroup(), In.getSize(), In.getType(),
Index: llvm/include/llvm/Support/VirtualFileSystem.h
===================================================================
--- llvm/include/llvm/Support/VirtualFileSystem.h
+++ llvm/include/llvm/Support/VirtualFileSystem.h
@@ -64,6 +64,8 @@
          uint64_t Size, llvm::sys::fs::file_type Type,
          llvm::sys::fs::perms Perms);
 
+  /// Get a copy of a Status with a different size.
+  static Status copyWithNewSize(const Status &In, uint64_t NewSize);
   /// Get a copy of a Status with a different name.
   static Status copyWithNewName(const Status &In, const Twine &NewName);
   static Status copyWithNewName(const llvm::sys::fs::file_status &In,
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -17,7 +17,7 @@
 using namespace dependencies;
 
 CachedFileSystemEntry CachedFileSystemEntry::createFileEntry(
-    StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize) {
+    StringRef Filename, llvm::vfs::FileSystem &FS) {
   // Load the file and its content from the file system.
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
       FS.openFileForRead(Filename);
@@ -33,31 +33,29 @@
   if (!MaybeBuffer)
     return MaybeBuffer.getError();
 
+  CachedFileSystemEntry Result;
+  Result.MaybeStat = std::move(*Stat);
+  Result.OriginalContents = std::move(*MaybeBuffer);
+  return Result;
+}
+
+void CachedFileSystemEntry::minimize(CachedFileSystemEntry &Result) {
   llvm::SmallString<1024> MinimizedFileContents;
   // Minimize the file down to directives that might affect the dependencies.
-  const auto &Buffer = *MaybeBuffer;
   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
-  if (!Minimize || minimizeSourceToDependencyDirectives(
-                       Buffer->getBuffer(), MinimizedFileContents, Tokens)) {
-    // Use the original file unless requested otherwise, or
-    // if the minimization failed.
-    // FIXME: Propage the diagnostic if desired by the client.
-    CachedFileSystemEntry Result;
-    Result.MaybeStat = std::move(*Stat);
-    Result.Contents = std::move(*MaybeBuffer);
-    return Result;
+  if (minimizeSourceToDependencyDirectives(
+      Result.OriginalContents->getBuffer(), MinimizedFileContents, Tokens)) {
+    // FIXME: Propagate the diagnostic if desired by the client.
+    // Use the original file if the minimization failed.
+    Result.MinimizedContents =
+        llvm::MemoryBuffer::getMemBuffer(*Result.OriginalContents);
+    return;
   }
 
-  CachedFileSystemEntry Result;
-  size_t Size = MinimizedFileContents.size();
-  Result.MaybeStat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(),
-                                       Stat->getLastModificationTime(),
-                                       Stat->getUser(), Stat->getGroup(), Size,
-                                       Stat->getType(), Stat->getPermissions());
   // The contents produced by the minimizer must be null terminated.
   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
          "not null terminated contents");
-  Result.Contents = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+  Result.MinimizedContents = std::make_unique<llvm::SmallVectorMemoryBuffer>(
       std::move(MinimizedFileContents));
 
   // Compute the skipped PP ranges that speedup skipping over inactive
@@ -77,8 +75,6 @@
     Mapping[Range.Offset] = Range.Length;
   }
   Result.PPSkippedRangeMapping = std::move(Mapping);
-
-  return Result;
 }
 
 CachedFileSystemEntry
@@ -89,7 +85,8 @@
   return Result;
 }
 
-DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
+DependencyScanningFilesystemSharedCache::
+    DependencyScanningFilesystemSharedCache() {
   // This heuristic was chosen using a empirical testing on a
   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
   // sharding gives a performance edge by reducing the lock contention.
@@ -101,19 +98,13 @@
 }
 
 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
+DependencyScanningFilesystemSharedCache::get(StringRef Key) {
   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
   std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
   auto It = Shard.Cache.try_emplace(Key);
   return It.first->getValue();
 }
 
-DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
-  SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
-  return Cache.get(Key);
-}
-
 /// Whitelist file extensions that should be minimized, treating no extension as
 /// a source file that should be minimized.
 ///
@@ -158,31 +149,27 @@
 }
 
 CachedFileSystemEntry DependencyScanningWorkerFilesystem::createFileSystemEntry(
-    llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
-    bool ShouldMinimize) {
+    llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename) {
   if (!MaybeStatus)
     return CachedFileSystemEntry(MaybeStatus.getError());
 
   if (MaybeStatus->isDirectory())
     return CachedFileSystemEntry::createDirectoryEntry(std::move(*MaybeStatus));
 
-  return CachedFileSystemEntry::createFileEntry(Filename, getUnderlyingFS(),
-                                                ShouldMinimize);
+  return CachedFileSystemEntry::createFileEntry(Filename, getUnderlyingFS());
 }
 
 llvm::ErrorOr<const CachedFileSystemEntry *>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
-    const StringRef Filename) {
-  bool ShouldMinimize = shouldMinimize(Filename);
-
-  if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
+    const StringRef Filename, bool ShouldBeMinimized) {
+  if (const auto *Entry = Cache.getCachedEntry(Filename))
     return Entry;
 
   // FIXME: Handle PCM/PCH files.
   // FIXME: Handle module map files.
 
   DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
-      &SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
+      &SharedCacheEntry = SharedCache.get(Filename);
   const CachedFileSystemEntry *Result;
   {
     std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
@@ -196,15 +183,18 @@
         //   files before building them, and then looks for them again. If we
         //   cache the stat failure, it won't see them the second time.
         return MaybeStatus.getError();
-      CacheEntry = createFileSystemEntry(std::move(MaybeStatus), Filename,
-                                         ShouldMinimize);
+      CacheEntry = createFileSystemEntry(std::move(MaybeStatus), Filename);
     }
 
+    if (ShouldBeMinimized && CacheEntry.hasOriginalContents() &&
+        !CacheEntry.hasMinimizedContents())
+      CachedFileSystemEntry::minimize(CacheEntry);
+
     Result = &CacheEntry;
   }
 
   // Store the result in the local cache.
-  Cache.setCachedEntry(Filename, ShouldMinimize, Result);
+  Cache.setCachedEntry(Filename, Result);
   return Result;
 }
 
@@ -212,11 +202,12 @@
 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
+  bool ShouldBeMinimized = shouldMinimize(Filename);
   const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
-      getOrCreateFileSystemEntry(Filename);
+      getOrCreateFileSystemEntry(Filename, ShouldBeMinimized);
   if (!Result)
     return Result.getError();
-  return (*Result)->getStatus();
+  return (*Result)->getStatus(ShouldBeMinimized);
 }
 
 namespace {
@@ -230,7 +221,7 @@
       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
 
   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-  create(const CachedFileSystemEntry *Entry,
+  create(const CachedFileSystemEntry *Entry, bool ShouldBeMinimized,
          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
 
   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
@@ -251,18 +242,18 @@
 } // end anonymous namespace
 
 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
-    const CachedFileSystemEntry *Entry,
+    const CachedFileSystemEntry *Entry, bool ShouldBeMinimized,
     ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
   if (Entry->isDirectory())
     return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
         std::make_error_code(std::errc::is_a_directory));
-  llvm::ErrorOr<StringRef> Contents = Entry->getContents();
+  llvm::ErrorOr<StringRef> Contents = Entry->getContents(ShouldBeMinimized);
   if (!Contents)
     return Contents.getError();
   auto Result = std::make_unique<MinimizedVFSFile>(
       llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(),
                                        /*RequiresNullTerminator=*/false),
-      *Entry->getStatus());
+      *Entry->getStatus(ShouldBeMinimized));
   if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings)
     (*PPSkipMappings)[Result->Buffer->getBufferStart()] =
         &Entry->getPPSkippedRangeMapping();
@@ -275,9 +266,11 @@
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
 
+  bool ShouldBeMinimized = shouldMinimize(Filename);
   const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
-      getOrCreateFileSystemEntry(Filename);
+      getOrCreateFileSystemEntry(Filename, ShouldBeMinimized);
   if (!Result)
     return Result.getError();
-  return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
+  return MinimizedVFSFile::create(Result.get(), ShouldBeMinimized,
+                                  PPSkipMappings);
 }
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -38,15 +38,17 @@
 
   CachedFileSystemEntry(std::error_code Error) : MaybeStat(std::move(Error)) {}
 
-  /// Create an entry that represents an opened source file with minimized or
-  /// original contents.
+  /// Create an entry that represents an opened source file with original
+  /// contents.
   ///
   /// The filesystem opens the file even for `stat` calls open to avoid the
   /// issues with stat + open of minimized files that might lead to a
   /// mismatching size of the file.
   static CachedFileSystemEntry createFileEntry(StringRef Filename,
-                                               llvm::vfs::FileSystem &FS,
-                                               bool Minimize = true);
+                                               llvm::vfs::FileSystem &FS);
+
+  /// Minimize contents of the file.
+  static void minimize(CachedFileSystemEntry &Entry);
 
   /// Create an entry that represents a directory on the filesystem.
   static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status &&Stat);
@@ -57,19 +59,28 @@
   /// \returns True if the current entry points to a directory.
   bool isDirectory() const { return MaybeStat && MaybeStat->isDirectory(); }
 
+  /// \returns True if the file contents have already been read.
+  bool hasOriginalContents() const { return OriginalContents != nullptr; }
+
+  /// \returns True if the file contents have already been minimized.
+  bool hasMinimizedContents() const { return MinimizedContents != nullptr; }
+
   /// \returns The error or the file's contents.
-  llvm::ErrorOr<StringRef> getContents() const {
+  llvm::ErrorOr<StringRef> getContents(bool Minimized) const {
+    assert(isValid() && "not initialized");
     if (!MaybeStat)
       return MaybeStat.getError();
     assert(!MaybeStat->isDirectory() && "not a file");
-    assert(isValid() && "not initialized");
-    return Contents->getBuffer();
+    return (Minimized ? MinimizedContents : OriginalContents)->getBuffer();
   }
 
   /// \returns The error or the status of the entry.
-  llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
+  llvm::ErrorOr<llvm::vfs::Status> getStatus(bool Minimized) const {
     assert(isValid() && "not initialized");
-    return MaybeStat;
+    if (!MaybeStat || MaybeStat->isDirectory())
+      return MaybeStat;
+    return llvm::vfs::Status::copyWithNewSize(*MaybeStat,
+                                              getContents(Minimized)->size());
   }
 
   /// \returns the name of the file.
@@ -92,7 +103,8 @@
 
 private:
   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
-  std::unique_ptr<llvm::MemoryBuffer> Contents;
+  std::unique_ptr<llvm::MemoryBuffer> OriginalContents;
+  std::unique_ptr<llvm::MemoryBuffer> MinimizedContents;
   PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
 };
 
@@ -109,30 +121,21 @@
     CachedFileSystemEntry Value;
   };
 
+  DependencyScanningFilesystemSharedCache();
+
   /// Returns a cache entry for the corresponding key.
   ///
   /// A new cache entry is created if the key is not in the cache. This is a
   /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key, bool Minimized);
+  SharedFileSystemEntry &get(StringRef Key);
 
 private:
-  class SingleCache {
-  public:
-    SingleCache();
-
-    SharedFileSystemEntry &get(StringRef Key);
-
-  private:
-    struct CacheShard {
-      std::mutex CacheLock;
-      llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
-    };
-    std::unique_ptr<CacheShard[]> CacheShards;
-    unsigned NumShards;
+  struct CacheShard {
+    std::mutex CacheLock;
+    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
   };
-
-  SingleCache CacheMinimized;
-  SingleCache CacheOriginal;
+  std::unique_ptr<CacheShard[]> CacheShards;
+  unsigned NumShards;
 };
 
 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
@@ -140,28 +143,16 @@
 /// files.
 class DependencyScanningFilesystemLocalCache {
 private:
-  using SingleCache =
-      llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>;
-
-  SingleCache CacheMinimized;
-  SingleCache CacheOriginal;
-
-  SingleCache &selectCache(bool Minimized) {
-    return Minimized ? CacheMinimized : CacheOriginal;
-  }
+  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
 public:
-  void setCachedEntry(StringRef Filename, bool Minimized,
-                      const CachedFileSystemEntry *Entry) {
-    SingleCache &Cache = selectCache(Minimized);
+  void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
     bool IsInserted = Cache.try_emplace(Filename, Entry).second;
     (void)IsInserted;
     assert(IsInserted && "local cache is updated more than once");
   }
 
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename,
-                                              bool Minimized) {
-    SingleCache &Cache = selectCache(Minimized);
+  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
     auto It = Cache.find(Filename);
     return It == Cache.end() ? nullptr : It->getValue();
   }
@@ -199,12 +190,12 @@
   bool shouldMinimize(StringRef Filename);
 
   llvm::ErrorOr<const CachedFileSystemEntry *>
-  getOrCreateFileSystemEntry(const StringRef Filename);
+  getOrCreateFileSystemEntry(const StringRef Filename, bool ShouldBeMinimized);
 
   /// Create a cached file system entry based on the initial status result.
   CachedFileSystemEntry
   createFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
-                        StringRef Filename, bool ShouldMinimize);
+                        StringRef Filename);
 
   /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to