sammccall updated this revision to Diff 167126.
sammccall added a comment.

remove commented code


Repository:
  rC Clang

https://reviews.llvm.org/D52549

Files:
  include/clang/Basic/VirtualFileSystem.h
  lib/Basic/AvoidStatsVFS.cpp
  lib/Basic/CMakeLists.txt
  lib/Basic/VirtualFileSystem.cpp

Index: lib/Basic/VirtualFileSystem.cpp
===================================================================
--- lib/Basic/VirtualFileSystem.cpp
+++ lib/Basic/VirtualFileSystem.cpp
@@ -303,11 +303,6 @@
   return llvm::sys::fs::real_path(Path, Output);
 }
 
-IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
-  static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
-  return FS;
-}
-
 namespace {
 
 class RealFSDirIter : public clang::vfs::detail::DirIterImpl {
@@ -2141,3 +2136,75 @@
 
   return *this;
 }
+
+namespace {
+// Log operation counts.
+class StatFS : public FileSystem {
+public:
+  StatFS(const char *Name, llvm::IntrusiveRefCntPtr<FileSystem> FS)
+      : Name(Name), FS(std::move(FS)) {
+    llvm::errs() << "created statfs " << Name << "\n";
+  }
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override {
+    auto Ret = FS->status(Path);
+    bump(Ret ? StatusOK : StatusErr);
+    return Ret;
+  }
+
+  llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) override {
+    auto Ret = FS->openFileForRead(Path);
+    bump(Ret ? OpenOK : OpenErr);
+    return Ret;
+  }
+
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
+    bump(DirBegin);
+    return FS->dir_begin(Dir, EC);
+  }
+
+  virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+    bump(SetCWD);
+    return FS->setCurrentWorkingDirectory(Path);
+  }
+
+  virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    bump(GetCWD);
+    return FS->getCurrentWorkingDirectory();
+  }
+
+  virtual std::error_code
+  getRealPath(const Twine &Path, SmallVectorImpl<char> &Output) const override {
+    bump(GetRealPath);
+    return FS->getRealPath(Path, Output);
+  }
+
+private:
+  void bump(std::atomic<unsigned> &I) const {
+    ++I;
+    if (++All % 1 == 0) {
+      llvm::errs() << "== FILESYSTEM " << Name << " ==\n"
+                   << "Status: " << StatusOK << "+" << StatusErr << "\n"
+                   << "Open: " << OpenOK << "+" << OpenErr << "\n"
+                   << "Dir: " << DirBegin << "\n"
+                   << "GetRealPath: " << GetRealPath << "\n"
+                   << "===========================\n";
+    }
+  }
+
+  const char* Name;
+  llvm::IntrusiveRefCntPtr<FileSystem> FS;
+  mutable std::atomic<unsigned> StatusOK = {0}, StatusErr = {0}, OpenOK = {0},
+                        OpenErr = {0}, DirBegin = {0}, GetRealPath = {0},
+                        SetCWD = {0}, GetCWD = {0};
+  mutable std::atomic<unsigned> All= {0};
+};
+
+} // namespace
+
+IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
+  static IntrusiveRefCntPtr<FileSystem> FS = new StatFS(
+      "outer", avoidStats(new StatFS("Real", new RealFileSystem())).release());
+  return FS;
+}
Index: lib/Basic/CMakeLists.txt
===================================================================
--- lib/Basic/CMakeLists.txt
+++ lib/Basic/CMakeLists.txt
@@ -46,6 +46,7 @@
 
 add_clang_library(clangBasic
   Attributes.cpp
+  AvoidStatsVFS.cpp
   Builtins.cpp
   CharInfo.cpp
   Cuda.cpp
Index: lib/Basic/AvoidStatsVFS.cpp
===================================================================
--- /dev/null
+++ lib/Basic/AvoidStatsVFS.cpp
@@ -0,0 +1,296 @@
+//===- AvoidStatsVFS.cpp - Implements a stat-reducing VFS wrapper ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// We implement a VFS wrapper that uses directory listings to prune status() and
+// open() operations for files that *do not exist*.
+//
+// These operations are common in clang because of include paths.
+// With an include path of {A, B, ...}, an #include <foo/bar> directive results
+// in attempts to open or stat {A/foo/bar, B/foo/bar, ...}.
+//
+// This is expensive because this typically takes one syscall per path, so we
+// have O(NumIncludes * IncludePathLen) syscalls.
+//
+// To optimize this, we can list parent directories such as A/.
+// If A only contains {config.h}, attempts to open A/foo/bar, A/foo/baz, A/qux
+// can all fail instantly.
+// Listing a directory tends to be a single syscall, and in practice most
+// missing files can be recognized by looking at the same few directories.
+// In practice the number of syscalls is O(NumIncludes + IncludePathLen).
+//
+// Our constant factor is higher, but this improves performance for large TUs
+// with many include paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/VirtualFileSystem.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Path.h"
+#include <mutex>
+
+namespace clang {
+namespace vfs {
+namespace {
+using namespace llvm;
+namespace path = llvm::sys::path;
+using llvm::sys::fs::file_type;
+
+class StatLessFS : public ProxyFileSystem {
+public:
+  // In fact we only read a directory once we've wanted its contents several
+  // times. This avoids a common pattern:
+  //   - we read files under /some/long/path/...
+  //   - we don't read anything else under /some/long/...
+  //   - we readdir(/some/long), which tells us that /some/long/path is a dir
+  //   - we still need to readdir(/some/long/path) to get its contents
+  //   - that also tells us it's a directory, so readdir(/some/long) was useless
+  constexpr static unsigned ReadDirThreshold = 3;
+
+  StatLessFS(IntrusiveRefCntPtr<FileSystem> Base)
+      : ProxyFileSystem(std::move(Base)) {}
+
+  llvm::ErrorOr<std::unique_ptr<vfs::File>>
+  openFileForRead(const Twine &Path) override {
+    auto NormPath = normalizePath(Path);
+    if (!mayBeFile</*OrDir=*/false>(NormPath))
+      return std::make_error_code(std::errc::no_such_file_or_directory);
+    auto Result = ProxyFileSystem::openFileForRead(Path);
+    recordState(NormPath, Result ? File : MissingOrDir);
+    return Result;
+  }
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override {
+    auto NormPath = normalizePath(Path);
+    if (!mayBeFile</*OrDir=*/true>(NormPath))
+      return std::make_error_code(std::errc::no_such_file_or_directory);
+    auto Result = ProxyFileSystem::status(Path);
+    recordState(NormPath, Result
+                              ? (Result->getType() == file_type::directory_file
+                                     ? DirUnknownChildren
+                                     : File)
+                              : Missing);
+    return Result;
+  }
+
+private:
+  // What we know about a path, which may be partial information.
+  // Values are ordered: higher values have more info and "upgrade" lower ones.
+  enum PathState {
+    // Ambiguous states.
+    Unknown,
+    MissingOrDir,
+    MissingOrFile,
+    // Type known, but not all relevant details.
+    DirUnknownChildren,
+    DirUnenumerable, // unknown children, but don't try to enumerate again!
+    // Complete information.
+    DirKnownChildren,
+    File,
+    Missing,
+  };
+
+  template <bool OrDir>
+  bool mayBeFile(StringRef NormPath) {
+    // First, just see if we can answer from the cache.
+    {
+      std::lock_guard<std::mutex> Lock(CacheMu);
+      switch (Cache.lookup(NormPath)) {
+        case Unknown:
+        case MissingOrFile:
+          break;
+        case MissingOrDir:
+          if (!OrDir) {
+            llvm::errs() << "hitB1: " << NormPath << "\n";
+            return false;
+          }
+          break;
+        case DirUnknownChildren:
+        case DirUnenumerable:
+        case DirKnownChildren:
+          if (!OrDir) llvm::errs() << "hitB1: " << NormPath << "\n";
+          return OrDir;
+        case File:
+          return true;
+        case Missing:
+          llvm::errs() << "hitB1: " << NormPath << "\n";
+          return false;
+      }
+    }
+
+    // Next, maybe we can get an answer based on the parent directory.
+    auto Parent = path::parent_path(NormPath);
+    if (Parent.empty())
+      return OrDir; // Root is a directory.
+    // We may need to populate its cache entry.
+    if (!populateCacheForDir(Parent))
+      return true; // No more information.
+
+    std::lock_guard<std::mutex> Lock(CacheMu);
+    switch (Cache.lookup(Parent)) {
+    case Unknown:
+    case MissingOrDir:
+    case DirUnknownChildren:
+      llvm_unreachable("populateCacheForDir didn't provide enough info");
+    case MissingOrFile:
+    case File:
+    case Missing:
+      llvm::errs() << "hitB2: " << Parent << "\n";
+      return false;
+    case DirUnenumerable:
+      return true;
+    case DirKnownChildren:
+      // The point: we listed the parent, all children are now in cache.
+      switch (Cache.lookup(NormPath)) {
+      case MissingOrDir:
+      case MissingOrFile:
+        llvm_unreachable("populateCacheForDir didn't provide child info");
+      case Unknown:
+      case Missing:
+        llvm::errs() << "hitB3: " << Parent << "\n";
+        return false;
+      case DirUnknownChildren:
+      case DirUnenumerable:
+      case DirKnownChildren:
+        return OrDir;
+      case File:
+        return true;
+      };
+    }
+  }
+
+  void recordState(StringRef NormPath, PathState State) {
+    std::lock_guard<std::mutex> Lock(CacheMu);
+    auto& Current = Cache[NormPath];
+    // Sherlock Holmes would be proud of this special case.
+    if ((Current == MissingOrDir && State == MissingOrFile) ||
+        (Current == MissingOrFile && State == MissingOrDir)) {
+      Current = Missing;
+      return;
+    }
+    Current = std::max(Current, State);
+  }
+
+  // Roughly: if the directory children are not known, readdir() to fill it.
+  // But details are important. If we know the directory doesn't exist, we
+  // shouldn't read from it. And we must populate parent caches to know that.
+  // Postcondition: if returning true, cache will indicate either that:
+  //   - NormPath is not a directory
+  //   - NormPath is a directory, and all its children are cached
+  //   - NormPath is a directory whose children can't be listed
+  bool populateCacheForDir(StringRef NormPath) {
+    // First, just see if we have any work to do.
+    {
+      std::lock_guard<std::mutex> Lock(CacheMu);
+      switch (Cache.lookup(NormPath)) {
+        case Unknown:
+        case MissingOrDir:
+        case DirUnknownChildren:
+          break; // Need to populate cache with more info.
+        case MissingOrFile:
+        case DirUnenumerable:
+        case DirKnownChildren:
+        case File:
+        case Missing:
+          llvm::errs() << "hitA1: " << NormPath << "\n";
+          return true; // Cache already satisfies postcondition.
+      }
+    }
+    // Next, populate parent info and see if that determines our state.
+    auto Parent = path::parent_path(NormPath);
+    if (!Parent.empty() && populateCacheForDir(Parent)) {
+      std::lock_guard<std::mutex> Lock(CacheMu);
+      switch (Cache.lookup(Parent)) {
+      case Unknown:
+      case MissingOrDir:
+      case DirUnknownChildren:
+        llvm_unreachable("populateCacheForDir didn't provide enough info");
+      case MissingOrFile:
+      case File:
+      case Missing:
+        // Child can't exist if parent is not a directory.
+        Cache[NormPath] = Missing;
+        return true;
+      case DirUnenumerable:
+        break; // No info about child, need to read it.
+      case DirKnownChildren:
+        // Parent is a directory, and we can tell whether child is.
+        switch (Cache.lookup(NormPath)) {
+        case MissingOrDir:
+        case MissingOrFile:
+          llvm_unreachable("populateCacheForDir didn't provide child info");
+        case Unknown:
+          Cache[NormPath] = Missing;
+          llvm::errs() << "hitA2: " << Parent << "\n";
+          return true;
+        case DirUnknownChildren:
+          break; // Need to list children.
+        case DirUnenumerable:
+        case DirKnownChildren:
+          // populateCacheForDir shouldn't do this, but we might be racing.
+          return true;
+        case File:
+        case Missing:
+          llvm::errs() << "hitA2: " << Parent << "\n";
+          return true; // Cache now satisfies postcondition.
+        }
+        break;
+      }
+    }
+    // Finally, we need to read the directory.
+    {
+      std::lock_guard<std::mutex> Lock(CacheMu);
+      if (++ReadDirAttempts[NormPath] < ReadDirThreshold) // not hot enough
+        return false;
+    }
+    std::error_code EC;
+    llvm::errs() << "Induced readdir " << NormPath << "\n";
+    auto It = dir_begin(NormPath, EC);
+    if (EC) {
+      recordState(NormPath, MissingOrFile);
+      return true;
+    }
+    for (unsigned I = 0; I < 100; ++I) {
+      if (It == clang::vfs::directory_iterator()) { // at end of list
+        recordState(NormPath, DirKnownChildren);
+        return true;
+      }
+      recordState(It->path(), It->type() == file_type::directory_file
+                                  ? DirUnknownChildren
+                                  : File);
+      It.increment(EC);
+      if (EC) {
+        recordState(NormPath, DirUnenumerable);
+        return true;
+      }
+    }
+    recordState(NormPath, DirUnenumerable);
+    return true;
+  }
+
+  SmallString<256> normalizePath(const Twine &Path) {
+    SmallString<256> Result;
+    Path.toVector(Result);
+    makeAbsolute(Result);
+    path::remove_dots(Result, /*remove_dot_dot=*/true);
+    return Result;
+  }
+
+  std::mutex CacheMu;
+  StringMap<PathState> Cache;
+  StringMap<unsigned> ReadDirAttempts;
+};
+} // namespace
+
+std::unique_ptr<FileSystem>
+avoidStats(llvm::IntrusiveRefCntPtr<FileSystem> FS) {
+  return llvm::make_unique<StatLessFS>(std::move(FS));
+}
+
+} // namespace vfs
+} // namespace clang
Index: include/clang/Basic/VirtualFileSystem.h
===================================================================
--- include/clang/Basic/VirtualFileSystem.h
+++ include/clang/Basic/VirtualFileSystem.h
@@ -461,6 +461,9 @@
   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 };
 
+// Wrap a filesystem in a cache to avoid stats on missing files.
+std::unique_ptr<FileSystem> avoidStats(llvm::IntrusiveRefCntPtr<FileSystem>);
+
 /// Get a globally unique ID for a virtual file or directory.
 llvm::sys::fs::UniqueID getNextVirtualUniqueID();
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to