sammccall updated this revision to Diff 167126.
sammccall added a comment.
remove commented code
Repository:
rC Clang
https://reviews.llvm.org/D52549
Files:
include/clang/Basic/VirtualFileSystem.h
lib/Basic/AvoidStatsVFS.cpp
lib/Basic/CMakeLists.txt
lib/Basic/VirtualFileSystem.cpp
Index: lib/Basic/VirtualFileSystem.cpp
===================================================================
--- lib/Basic/VirtualFileSystem.cpp
+++ lib/Basic/VirtualFileSystem.cpp
@@ -303,11 +303,6 @@
return llvm::sys::fs::real_path(Path, Output);
}
-IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
- static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
- return FS;
-}
-
namespace {
class RealFSDirIter : public clang::vfs::detail::DirIterImpl {
@@ -2141,3 +2136,75 @@
return *this;
}
+
+namespace {
+// Log operation counts.
+class StatFS : public FileSystem {
+public:
+ StatFS(const char *Name, llvm::IntrusiveRefCntPtr<FileSystem> FS)
+ : Name(Name), FS(std::move(FS)) {
+ llvm::errs() << "created statfs " << Name << "\n";
+ }
+
+ llvm::ErrorOr<Status> status(const Twine &Path) override {
+ auto Ret = FS->status(Path);
+ bump(Ret ? StatusOK : StatusErr);
+ return Ret;
+ }
+
+ llvm::ErrorOr<std::unique_ptr<File>>
+ openFileForRead(const Twine &Path) override {
+ auto Ret = FS->openFileForRead(Path);
+ bump(Ret ? OpenOK : OpenErr);
+ return Ret;
+ }
+
+ directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
+ bump(DirBegin);
+ return FS->dir_begin(Dir, EC);
+ }
+
+ virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+ bump(SetCWD);
+ return FS->setCurrentWorkingDirectory(Path);
+ }
+
+ virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+ bump(GetCWD);
+ return FS->getCurrentWorkingDirectory();
+ }
+
+ virtual std::error_code
+ getRealPath(const Twine &Path, SmallVectorImpl<char> &Output) const override {
+ bump(GetRealPath);
+ return FS->getRealPath(Path, Output);
+ }
+
+private:
+ void bump(std::atomic<unsigned> &I) const {
+ ++I;
+ if (++All % 1 == 0) {
+ llvm::errs() << "== FILESYSTEM " << Name << " ==\n"
+ << "Status: " << StatusOK << "+" << StatusErr << "\n"
+ << "Open: " << OpenOK << "+" << OpenErr << "\n"
+ << "Dir: " << DirBegin << "\n"
+ << "GetRealPath: " << GetRealPath << "\n"
+ << "===========================\n";
+ }
+ }
+
+ const char* Name;
+ llvm::IntrusiveRefCntPtr<FileSystem> FS;
+ mutable std::atomic<unsigned> StatusOK = {0}, StatusErr = {0}, OpenOK = {0},
+ OpenErr = {0}, DirBegin = {0}, GetRealPath = {0},
+ SetCWD = {0}, GetCWD = {0};
+ mutable std::atomic<unsigned> All= {0};
+};
+
+} // namespace
+
+IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
+ static IntrusiveRefCntPtr<FileSystem> FS = new StatFS(
+ "outer", avoidStats(new StatFS("Real", new RealFileSystem())).release());
+ return FS;
+}
Index: lib/Basic/CMakeLists.txt
===================================================================
--- lib/Basic/CMakeLists.txt
+++ lib/Basic/CMakeLists.txt
@@ -46,6 +46,7 @@
add_clang_library(clangBasic
Attributes.cpp
+ AvoidStatsVFS.cpp
Builtins.cpp
CharInfo.cpp
Cuda.cpp
Index: lib/Basic/AvoidStatsVFS.cpp
===================================================================
--- /dev/null
+++ lib/Basic/AvoidStatsVFS.cpp
@@ -0,0 +1,296 @@
+//===- AvoidStatsVFS.cpp - Implements a stat-reducing VFS wrapper ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// We implement a VFS wrapper that uses directory listings to prune status() and
+// open() operations for files that *do not exist*.
+//
+// These operations are common in clang because of include paths.
+// With an include path of {A, B, ...}, an #include <foo/bar> directive results
+// in attempts to open or stat {A/foo/bar, B/foo/bar, ...}.
+//
+// This is expensive because this typically takes one syscall per path, so we
+// have O(NumIncludes * IncludePathLen) syscalls.
+//
+// To optimize this, we can list parent directories such as A/.
+// If A only contains {config.h}, attempts to open A/foo/bar, A/foo/baz, A/qux
+// can all fail instantly.
+// Listing a directory tends to be a single syscall, and in practice most
+// missing files can be recognized by looking at the same few directories.
+// In practice the number of syscalls is O(NumIncludes + IncludePathLen).
+//
+// Our constant factor is higher, but this improves performance for large TUs
+// with many include paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/VirtualFileSystem.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Path.h"
+#include <mutex>
+
+namespace clang {
+namespace vfs {
+namespace {
+using namespace llvm;
+namespace path = llvm::sys::path;
+using llvm::sys::fs::file_type;
+
+class StatLessFS : public ProxyFileSystem {
+public:
+ // In fact we only read a directory once we've wanted its contents several
+ // times. This avoids a common pattern:
+ // - we read files under /some/long/path/...
+ // - we don't read anything else under /some/long/...
+ // - we readdir(/some/long), which tells us that /some/long/path is a dir
+ // - we still need to readdir(/some/long/path) to get its contents
+ // - that also tells us it's a directory, so readdir(/some/long) was useless
+ constexpr static unsigned ReadDirThreshold = 3;
+
+ StatLessFS(IntrusiveRefCntPtr<FileSystem> Base)
+ : ProxyFileSystem(std::move(Base)) {}
+
+ llvm::ErrorOr<std::unique_ptr<vfs::File>>
+ openFileForRead(const Twine &Path) override {
+ auto NormPath = normalizePath(Path);
+ if (!mayBeFile</*OrDir=*/false>(NormPath))
+ return std::make_error_code(std::errc::no_such_file_or_directory);
+ auto Result = ProxyFileSystem::openFileForRead(Path);
+ recordState(NormPath, Result ? File : MissingOrDir);
+ return Result;
+ }
+
+ llvm::ErrorOr<Status> status(const Twine &Path) override {
+ auto NormPath = normalizePath(Path);
+ if (!mayBeFile</*OrDir=*/true>(NormPath))
+ return std::make_error_code(std::errc::no_such_file_or_directory);
+ auto Result = ProxyFileSystem::status(Path);
+ recordState(NormPath, Result
+ ? (Result->getType() == file_type::directory_file
+ ? DirUnknownChildren
+ : File)
+ : Missing);
+ return Result;
+ }
+
+private:
+ // What we know about a path, which may be partial information.
+ // Values are ordered: higher values have more info and "upgrade" lower ones.
+ enum PathState {
+ // Ambiguous states.
+ Unknown,
+ MissingOrDir,
+ MissingOrFile,
+ // Type known, but not all relevant details.
+ DirUnknownChildren,
+ DirUnenumerable, // unknown children, but don't try to enumerate again!
+ // Complete information.
+ DirKnownChildren,
+ File,
+ Missing,
+ };
+
+ template <bool OrDir>
+ bool mayBeFile(StringRef NormPath) {
+ // First, just see if we can answer from the cache.
+ {
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ switch (Cache.lookup(NormPath)) {
+ case Unknown:
+ case MissingOrFile:
+ break;
+ case MissingOrDir:
+ if (!OrDir) {
+ llvm::errs() << "hitB1: " << NormPath << "\n";
+ return false;
+ }
+ break;
+ case DirUnknownChildren:
+ case DirUnenumerable:
+ case DirKnownChildren:
+ if (!OrDir) llvm::errs() << "hitB1: " << NormPath << "\n";
+ return OrDir;
+ case File:
+ return true;
+ case Missing:
+ llvm::errs() << "hitB1: " << NormPath << "\n";
+ return false;
+ }
+ }
+
+ // Next, maybe we can get an answer based on the parent directory.
+ auto Parent = path::parent_path(NormPath);
+ if (Parent.empty())
+ return OrDir; // Root is a directory.
+ // We may need to populate its cache entry.
+ if (!populateCacheForDir(Parent))
+ return true; // No more information.
+
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ switch (Cache.lookup(Parent)) {
+ case Unknown:
+ case MissingOrDir:
+ case DirUnknownChildren:
+ llvm_unreachable("populateCacheForDir didn't provide enough info");
+ case MissingOrFile:
+ case File:
+ case Missing:
+ llvm::errs() << "hitB2: " << Parent << "\n";
+ return false;
+ case DirUnenumerable:
+ return true;
+ case DirKnownChildren:
+ // The point: we listed the parent, all children are now in cache.
+ switch (Cache.lookup(NormPath)) {
+ case MissingOrDir:
+ case MissingOrFile:
+ llvm_unreachable("populateCacheForDir didn't provide child info");
+ case Unknown:
+ case Missing:
+ llvm::errs() << "hitB3: " << Parent << "\n";
+ return false;
+ case DirUnknownChildren:
+ case DirUnenumerable:
+ case DirKnownChildren:
+ return OrDir;
+ case File:
+ return true;
+ };
+ }
+ }
+
+ void recordState(StringRef NormPath, PathState State) {
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ auto& Current = Cache[NormPath];
+ // Sherlock Holmes would be proud of this special case.
+ if ((Current == MissingOrDir && State == MissingOrFile) ||
+ (Current == MissingOrFile && State == MissingOrDir)) {
+ Current = Missing;
+ return;
+ }
+ Current = std::max(Current, State);
+ }
+
+ // Roughly: if the directory children are not known, readdir() to fill it.
+ // But details are important. If we know the directory doesn't exist, we
+ // shouldn't read from it. And we must populate parent caches to know that.
+ // Postcondition: if returning true, cache will indicate either that:
+ // - NormPath is not a directory
+ // - NormPath is a directory, and all its children are cached
+ // - NormPath is a directory whose children can't be listed
+ bool populateCacheForDir(StringRef NormPath) {
+ // First, just see if we have any work to do.
+ {
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ switch (Cache.lookup(NormPath)) {
+ case Unknown:
+ case MissingOrDir:
+ case DirUnknownChildren:
+ break; // Need to populate cache with more info.
+ case MissingOrFile:
+ case DirUnenumerable:
+ case DirKnownChildren:
+ case File:
+ case Missing:
+ llvm::errs() << "hitA1: " << NormPath << "\n";
+ return true; // Cache already satisfies postcondition.
+ }
+ }
+ // Next, populate parent info and see if that determines our state.
+ auto Parent = path::parent_path(NormPath);
+ if (!Parent.empty() && populateCacheForDir(Parent)) {
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ switch (Cache.lookup(Parent)) {
+ case Unknown:
+ case MissingOrDir:
+ case DirUnknownChildren:
+ llvm_unreachable("populateCacheForDir didn't provide enough info");
+ case MissingOrFile:
+ case File:
+ case Missing:
+ // Child can't exist if parent is not a directory.
+ Cache[NormPath] = Missing;
+ return true;
+ case DirUnenumerable:
+ break; // No info about child, need to read it.
+ case DirKnownChildren:
+ // Parent is a directory, and we can tell whether child is.
+ switch (Cache.lookup(NormPath)) {
+ case MissingOrDir:
+ case MissingOrFile:
+ llvm_unreachable("populateCacheForDir didn't provide child info");
+ case Unknown:
+ Cache[NormPath] = Missing;
+ llvm::errs() << "hitA2: " << Parent << "\n";
+ return true;
+ case DirUnknownChildren:
+ break; // Need to list children.
+ case DirUnenumerable:
+ case DirKnownChildren:
+ // populateCacheForDir shouldn't do this, but we might be racing.
+ return true;
+ case File:
+ case Missing:
+ llvm::errs() << "hitA2: " << Parent << "\n";
+ return true; // Cache now satisfies postcondition.
+ }
+ break;
+ }
+ }
+ // Finally, we need to read the directory.
+ {
+ std::lock_guard<std::mutex> Lock(CacheMu);
+ if (++ReadDirAttempts[NormPath] < ReadDirThreshold) // not hot enough
+ return false;
+ }
+ std::error_code EC;
+ llvm::errs() << "Induced readdir " << NormPath << "\n";
+ auto It = dir_begin(NormPath, EC);
+ if (EC) {
+ recordState(NormPath, MissingOrFile);
+ return true;
+ }
+ for (unsigned I = 0; I < 100; ++I) {
+ if (It == clang::vfs::directory_iterator()) { // at end of list
+ recordState(NormPath, DirKnownChildren);
+ return true;
+ }
+ recordState(It->path(), It->type() == file_type::directory_file
+ ? DirUnknownChildren
+ : File);
+ It.increment(EC);
+ if (EC) {
+ recordState(NormPath, DirUnenumerable);
+ return true;
+ }
+ }
+ recordState(NormPath, DirUnenumerable);
+ return true;
+ }
+
+ SmallString<256> normalizePath(const Twine &Path) {
+ SmallString<256> Result;
+ Path.toVector(Result);
+ makeAbsolute(Result);
+ path::remove_dots(Result, /*remove_dot_dot=*/true);
+ return Result;
+ }
+
+ std::mutex CacheMu;
+ StringMap<PathState> Cache;
+ StringMap<unsigned> ReadDirAttempts;
+};
+} // namespace
+
+std::unique_ptr<FileSystem>
+avoidStats(llvm::IntrusiveRefCntPtr<FileSystem> FS) {
+ return llvm::make_unique<StatLessFS>(std::move(FS));
+}
+
+} // namespace vfs
+} // namespace clang
Index: include/clang/Basic/VirtualFileSystem.h
===================================================================
--- include/clang/Basic/VirtualFileSystem.h
+++ include/clang/Basic/VirtualFileSystem.h
@@ -461,6 +461,9 @@
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
};
+// Wrap a filesystem in a cache to avoid stats on missing files.
+std::unique_ptr<FileSystem> avoidStats(llvm::IntrusiveRefCntPtr<FileSystem>);
+
/// Get a globally unique ID for a virtual file or directory.
llvm::sys::fs::UniqueID getNextVirtualUniqueID();
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits