Looks like my comments on the review got completely ignored and not even acknowledged.
On Thu, Jan 19, 2023 at 1:35 AM Fred Riss via cfe-commits <cfe-commits@lists.llvm.org> wrote: > > > Author: Fred Riss > Date: 2023-01-18T14:31:27-08:00 > New Revision: a033dbbe5c43247b60869b008e67ed86ed230eaa > > URL: > https://github.com/llvm/llvm-project/commit/a033dbbe5c43247b60869b008e67ed86ed230eaa > DIFF: > https://github.com/llvm/llvm-project/commit/a033dbbe5c43247b60869b008e67ed86ed230eaa.diff > > LOG: [Clang] Give Clang the ability to use a shared stat cache > > Every Clang instance uses an internal FileSystemStatCache to avoid > stating the same content multiple times. However, different instances > of Clang will contend for filesystem access for their initial stats > during HeaderSearch or module validation. > > On some workloads, the time spent in the kernel in these concurrent > stat calls has been measured to be over 20% of the overall compilation > time. This is extremly wassteful when most of the stat calls target > mostly immutable content like a SDK. > > This commit introduces a new tool `clang-stat-cache` able to generate > an OnDiskHashmap containing the stat data for a given filesystem > hierarchy. > > The driver part of this has been modeled after -ivfsoverlay given > the similarities with what it influences. It introduces a new > -ivfsstatcache driver option to instruct Clang to use a stat cache > generated by `clang-stat-cache`. These stat caches are inserted at > the bottom of the VFS stack (right above the real filesystem). > > Differential Revision: https://reviews.llvm.org/D136651 > > Added: > clang/test/Driver/vfsstatcache.c > clang/test/clang-stat-cache/cache-effects.c > clang/test/clang-stat-cache/errors.test > clang/tools/clang-stat-cache/CMakeLists.txt > clang/tools/clang-stat-cache/clang-stat-cache.cpp > llvm/include/llvm/Support/StatCacheFileSystem.h > llvm/lib/Support/StatCacheFileSystem.cpp > > Modified: > clang/include/clang/Basic/DiagnosticFrontendKinds.td > clang/include/clang/Driver/Options.td > clang/include/clang/Frontend/CompilerInvocation.h > clang/include/clang/Lex/HeaderSearchOptions.h > clang/lib/Frontend/ASTUnit.cpp > clang/lib/Frontend/CompilerInvocation.cpp > clang/test/CMakeLists.txt > clang/tools/CMakeLists.txt > llvm/lib/Support/CMakeLists.txt > llvm/unittests/Support/VirtualFileSystemTest.cpp > > Removed: > > > > ################################################################################ > diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td > b/clang/include/clang/Basic/DiagnosticFrontendKinds.td > index d0f672ae5a1bd..e106858688ac7 100644 > --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td > +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td > @@ -256,6 +256,11 @@ def err_test_module_file_extension_version : Error< > "test module file extension '%0' has > diff erent version (%1.%2) than expected " > "(%3.%4)">; > > +def err_missing_vfs_stat_cache_file : Error< > + "stat cache file '%0' not found">, DefaultFatal; > +def err_invalid_vfs_stat_cache : Error< > + "invalid stat cache file '%0'">, DefaultFatal; > + > def err_missing_vfs_overlay_file : Error< > "virtual filesystem overlay file '%0' not found">, DefaultFatal; > def err_invalid_vfs_overlay : Error< > > diff --git a/clang/include/clang/Driver/Options.td > b/clang/include/clang/Driver/Options.td > index ba49b335cf287..9334e6319d57b 100644 > --- a/clang/include/clang/Driver/Options.td > +++ b/clang/include/clang/Driver/Options.td > @@ -3357,6 +3357,8 @@ def iwithsysroot : JoinedOrSeparate<["-"], > "iwithsysroot">, Group<clang_i_Group> > HelpText<"Add directory to SYSTEM include search path, " > "absolute paths are relative to -isysroot">, > MetaVarName<"<directory>">, > Flags<[CC1Option]>; > +def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, > Group<clang_i_Group>, Flags<[CC1Option]>, > + HelpText<"Use the stat data cached in file instead of doing filesystem > syscalls. See clang-stat-cache utility.">; > def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, > Group<clang_i_Group>, Flags<[CC1Option]>, > HelpText<"Overlay the virtual filesystem described by file over the real > file system">; > def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>; > > diff --git a/clang/include/clang/Frontend/CompilerInvocation.h > b/clang/include/clang/Frontend/CompilerInvocation.h > index 254f048ed3c7e..9cc6aa5c4d8a1 100644 > --- a/clang/include/clang/Frontend/CompilerInvocation.h > +++ b/clang/include/clang/Frontend/CompilerInvocation.h > @@ -296,6 +296,7 @@ IntrusiveRefCntPtr<llvm::vfs::FileSystem> > createVFSFromCompilerInvocation( > > IntrusiveRefCntPtr<llvm::vfs::FileSystem> > createVFSFromOverlayFiles(ArrayRef<std::string> VFSOverlayFiles, > + ArrayRef<std::string> VFSStatCacheFiles, > DiagnosticsEngine &Diags, > IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS); > > > diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h > b/clang/include/clang/Lex/HeaderSearchOptions.h > index 6436a9b3bde20..548f7d4493de4 100644 > --- a/clang/include/clang/Lex/HeaderSearchOptions.h > +++ b/clang/include/clang/Lex/HeaderSearchOptions.h > @@ -181,6 +181,9 @@ class HeaderSearchOptions { > /// of computing the module hash. > llvm::SmallSetVector<llvm::CachedHashString, 16> ModulesIgnoreMacros; > > + /// The set of user-provided stat cache files. > + std::vector<std::string> VFSStatCacheFiles; > + > /// The set of user-provided virtual filesystem overlay files. > std::vector<std::string> VFSOverlayFiles; > > @@ -250,6 +253,10 @@ class HeaderSearchOptions { > SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader); > } > > + void AddVFSStatCacheFile(StringRef Name) { > + VFSStatCacheFiles.push_back(std::string(Name)); > + } > + > void AddVFSOverlayFile(StringRef Name) { > VFSOverlayFiles.push_back(std::string(Name)); > } > > diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp > index 3b4f25182ac95..dbf55d95746e9 100644 > --- a/clang/lib/Frontend/ASTUnit.cpp > +++ b/clang/lib/Frontend/ASTUnit.cpp > @@ -574,7 +574,7 @@ class ASTInfoCollector : public ASTReaderListener { > // performs the initialization too late (once both target and language > // options are read). > PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles( > - HSOpts.VFSOverlayFiles, PP.getDiagnostics(), > + HSOpts.VFSOverlayFiles, HSOpts.VFSStatCacheFiles, > PP.getDiagnostics(), > PP.getFileManager().getVirtualFileSystemPtr())); > > InitializedHeaderSearchPaths = true; > > diff --git a/clang/lib/Frontend/CompilerInvocation.cpp > b/clang/lib/Frontend/CompilerInvocation.cpp > index 0bb9c8c83c63b..b0ef37fad2227 100644 > --- a/clang/lib/Frontend/CompilerInvocation.cpp > +++ b/clang/lib/Frontend/CompilerInvocation.cpp > @@ -83,6 +83,7 @@ > #include "llvm/Support/Path.h" > #include "llvm/Support/Process.h" > #include "llvm/Support/Regex.h" > +#include "llvm/Support/StatCacheFileSystem.h" > #include "llvm/Support/VersionTuple.h" > #include "llvm/Support/VirtualFileSystem.h" > #include "llvm/Support/raw_ostream.h" > @@ -3084,6 +3085,9 @@ static void > GenerateHeaderSearchArgs(HeaderSearchOptions &Opts, > GenerateArg(Args, Opt, P.Prefix, SA); > } > > + for (const std::string &F : Opts.VFSStatCacheFiles) > + GenerateArg(Args, OPT_ivfsstatcache, F, SA); > + > for (const std::string &F : Opts.VFSOverlayFiles) > GenerateArg(Args, OPT_ivfsoverlay, F, SA); > } > @@ -3217,6 +3221,9 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions > &Opts, ArgList &Args, > Opts.AddSystemHeaderPrefix( > A->getValue(), A->getOption().matches(OPT_system_header_prefix)); > > + for (const auto *A : Args.filtered(OPT_ivfsstatcache)) > + Opts.AddVFSStatCacheFile(A->getValue()); > + > for (const auto *A : Args.filtered(OPT_ivfsoverlay)) > Opts.AddVFSOverlayFile(A->getValue()); > > @@ -4747,12 +4754,31 @@ clang::createVFSFromCompilerInvocation( > const CompilerInvocation &CI, DiagnosticsEngine &Diags, > IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) { > return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles, > + > CI.getHeaderSearchOpts().VFSStatCacheFiles, > Diags, std::move(BaseFS)); > } > > IntrusiveRefCntPtr<llvm::vfs::FileSystem> clang::createVFSFromOverlayFiles( > - ArrayRef<std::string> VFSOverlayFiles, DiagnosticsEngine &Diags, > + ArrayRef<std::string> VFSOverlayFiles, > + ArrayRef<std::string> VFSStatCacheFiles, DiagnosticsEngine &Diags, > IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) { > + for (const auto &File : VFSStatCacheFiles) { > + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer = > + BaseFS->getBufferForFile(File); > + if (!Buffer) { > + Diags.Report(diag::err_missing_vfs_stat_cache_file) << File; > + continue; > + } > + > + auto StatCache = > + llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), BaseFS); > + > + if (errorToBool(StatCache.takeError())) > + Diags.Report(diag::err_invalid_vfs_stat_cache) << File; > + else > + BaseFS = std::move(*StatCache); > + } > + > if (VFSOverlayFiles.empty()) > return BaseFS; > > > diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt > index 1d6377b5f2d80..cd3775b55a559 100644 > --- a/clang/test/CMakeLists.txt > +++ b/clang/test/CMakeLists.txt > @@ -71,6 +71,7 @@ list(APPEND CLANG_TEST_DEPS > clang-refactor > clang- > diff > clang-scan-deps > + clang-stat-cache > diagtool > hmaptool > ) > > diff --git a/clang/test/Driver/vfsstatcache.c > b/clang/test/Driver/vfsstatcache.c > new file mode 100644 > index 0000000000000..ec3c279d53d08 > --- /dev/null > +++ b/clang/test/Driver/vfsstatcache.c > @@ -0,0 +1,5 @@ > +// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s > +// CHECK: "-ivfsstatcache" "foo.h" > + > +// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck > -check-prefix=CHECK-MISSING %s > +// CHECK-MISSING: stat cache file 'foo.h' not found > > diff --git a/clang/test/clang-stat-cache/cache-effects.c > b/clang/test/clang-stat-cache/cache-effects.c > new file mode 100644 > index 0000000000000..bf2e2db447e2a > --- /dev/null > +++ b/clang/test/clang-stat-cache/cache-effects.c > @@ -0,0 +1,63 @@ > +#include "foo.h" > + > +// Testing the effects of a cache is tricky, because it's just supposed to > speed > +// things up, not change the behavior. In this test, we are using an outdated > +// cache to trick HeaderSearch into finding the wrong module and show that > it is > +// being used. > + > +// Clear the module cache. > +// RUN: rm -rf %t > +// RUN: mkdir -p %t/Inputs > +// RUN: mkdir -p %t/Inputs/Foo1 > +// RUN: mkdir -p %t/Inputs/Foo2 > +// RUN: mkdir -p %t/modules-to-compare > + > +// === > +// Create a Foo module in the Foo1 direcotry. > +// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h > +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map > + > +// === > +// Compile the module. Note that the compiler has 2 header search paths: > +// Foo2 and Foo1 in that order. The module has been created in Foo1, and > +// it is the only version available now. > +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps > -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I > %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build %s 2>&1 > +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm > + > +// === > +// Create a stat cache for our inputs directory > +// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache > + > +// === > +// As a sanity check, re-run the same compilation with the cache and check > that > +// the module does not change. > +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps > -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I > %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s > -Rmodule-build 2>&1 > +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm > + > +// RUN: > diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm > + > +// === > +// Now introduce a > diff erent Foo module in the Foo2 directory which is before > +// Foo1 in the search paths. > +// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h > +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map > + > +// === > +// Because we're using the (now-outdated) stat cache, this compilation > +// should still be using the first module. It will not see the new one > +// which is earlier in the search paths. > +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps > -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I > %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build > -Rmodule-import %s 2>&1 > +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm > + > +// RUN: > diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm > + > +// === > +// Regenerate the stat cache for our Inputs directory > +// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1 > + > +// === > +// Use the module and now see that we are recompiling the new one. > +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps > -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I > %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build > %s 2>&1 > +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm > + > +// RUN: not > diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm > > diff --git a/clang/test/clang-stat-cache/errors.test > b/clang/test/clang-stat-cache/errors.test > new file mode 100644 > index 0000000000000..ab73a1348f10e > --- /dev/null > +++ b/clang/test/clang-stat-cache/errors.test > @@ -0,0 +1,42 @@ > +RUN: rm -rf %t > +RUN: mkdir -p %t > + > +RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=NO-SUCH-DIR %s > +NO-SUCH-DIR: Failed to stat the target directory: {{[Nn]}}o such file or > directory > + > +RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck > --check-prefix=NO-SUCH-FILE %s > +NO-SUCH-FILE: Failed to open cache file: '{{.*}}': {{[Nn]}}o such file or > directory > + > +# Use mixed-case directories to exercise the case insensitive implementation. > +RUN: mkdir -p %t/Dir > +RUN: mkdir -p %t/Dir2 > + > +# Try to overwrite a few invalid caches > +RUN: echo "Not a stat cache" > %t/stat.cache > +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=INVALID-CACHE %s > +RUN: echo "Not a stat cache, but bigger than the stat cache header" > > %t/stat.cache > +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=INVALID-CACHE %s > +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > > %t/stat.cache > +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=INVALID-CACHE %s > + > +INVALID-CACHE: The output cache file exists and is not a valid stat cache. > Aborting. > + > +# Test the force flag > +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > > %t/stat.cache > +RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=INVALID-CACHE-FORCE %s > +INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat > cache. Forced update. > + > +# Generate a valid cache for dir > +RUN: rm %t/stat.cache > +RUN: clang-stat-cache %t/Dir -o %t/stat.cache > +RUN: cp %t/stat.cache %t/stat.cache.save > + > +# Try with same base direcotry but with extraneous separators > +RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck > --check-prefix=EXTRA-SEP %s > +EXTRA-SEP-NOT: Existing cache has > diff erent directory. Regenerating... > +EXTRA-SEP: Cache up-to-date, exiting > + > +# Rewrite the cache with a > diff erent base directory > +RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck > --check-prefix=OTHER-DIR %s > +OTHER-DIR: Existing cache has > diff erent directory. Regenerating... > + > > diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt > index f60db6ef0ba34..147555d87dfc5 100644 > --- a/clang/tools/CMakeLists.txt > +++ b/clang/tools/CMakeLists.txt > @@ -15,6 +15,7 @@ add_clang_subdirectory(clang-scan-deps) > if(HAVE_CLANG_REPL_SUPPORT) > add_clang_subdirectory(clang-repl) > endif() > +add_clang_subdirectory(clang-stat-cache) > > add_clang_subdirectory(c-index-test) > > > diff --git a/clang/tools/clang-stat-cache/CMakeLists.txt > b/clang/tools/clang-stat-cache/CMakeLists.txt > new file mode 100644 > index 0000000000000..ab93d8b3d0f0e > --- /dev/null > +++ b/clang/tools/clang-stat-cache/CMakeLists.txt > @@ -0,0 +1,19 @@ > +set(LLVM_LINK_COMPONENTS > + Core > + Support > + ) > + > +add_clang_tool(clang-stat-cache > + clang-stat-cache.cpp > + ) > + > +if(APPLE) > +set(CLANG_STAT_CACHE_LIB_DEPS > + "-framework CoreServices" > + ) > +endif() > + > +clang_target_link_libraries(clang-stat-cache > + PRIVATE > + ${CLANG_STAT_CACHE_LIB_DEPS} > + ) > > diff --git a/clang/tools/clang-stat-cache/clang-stat-cache.cpp > b/clang/tools/clang-stat-cache/clang-stat-cache.cpp > new file mode 100644 > index 0000000000000..183d0d79fadd8 > --- /dev/null > +++ b/clang/tools/clang-stat-cache/clang-stat-cache.cpp > @@ -0,0 +1,318 @@ > +//===- clang-stat-cache.cpp > -----------------------------------------------===// > +// > +// Part of the LLVM Project, under the Apache License v2.0 with LLVM > Exceptions. > +// See https://llvm.org/LICENSE.txt for license information. > +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception > +// > +//===----------------------------------------------------------------------===// > + > +#include "llvm/Support/CommandLine.h" > +#include "llvm/Support/FileSystem.h" > +#include "llvm/Support/MemoryBuffer.h" > +#include "llvm/Support/Path.h" > +#include "llvm/Support/StatCacheFileSystem.h" > +#include "llvm/Support/Timer.h" > +#include "llvm/Support/raw_ostream.h" > + > +#include <assert.h> > + > +#ifdef __APPLE__ > +#include <CoreServices/CoreServices.h> > + > +#include <sys/mount.h> > +#include <sys/param.h> > +#endif // __APPLE__ > + > +// The clang-stat-cache utility creates an on-disk cache for the stat data > +// of a file-system tree which is expected to be immutable during a build. > + > +using namespace llvm; > +using llvm::vfs::StatCacheFileSystem; > + > +cl::OptionCategory StatCacheCategory("clang-stat-cache options"); > + > +cl::opt<std::string> OutputFilename("o", cl::Required, > + cl::desc("Specify output filename"), > + cl::value_desc("filename"), > + cl::cat(StatCacheCategory)); > + > +cl::opt<std::string> TargetDirectory(cl::Positional, cl::Required, > + cl::value_desc("dirname"), > + cl::cat(StatCacheCategory)); > + > +cl::opt<bool> Verbose("v", cl::desc("More verbose output")); > +cl::opt<bool> Force("f", cl::desc("Force cache generation")); > + > +#if __APPLE__ > +// Used by checkContentsValidity. See below. > +struct CallbackInfo { > + bool SeenChanges = false; > +}; > + > +// Used by checkContentsValidity. See below. > +static void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo, > + size_t numEvents, void *eventPaths, > + const FSEventStreamEventFlags *eventFlags, > + const FSEventStreamEventId *eventIds) { > + CallbackInfo *Info = static_cast<CallbackInfo *>(CtxInfo); > + for (size_t i = 0; i < numEvents; ++i) { > + // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical' > + // event passed to the callback. This means it is passed after the > callback > + // all the relevant activity between the StartEvent of the stream and the > + // point the stream was created. > + // If the callback didn't see any other event, it means there haven't > been > + // any alterations to the target directory hierarchy and the cache > contents > + // is still up-to-date. > + if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) { > + // Let's stop the main queue and go back to our non-queue code. > + CFRunLoopStop(CFRunLoopGetCurrent()); > + break; > + } > + > + // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone > + // one, there have been changes to the target directory. > + Info->SeenChanges = true; > + } > +} > + > +// FSEvents-based check for cache contents validity. We store the latest > +// FSEventStreamEventId in the cache as a ValidityToken and check if any > +// file system events affected the base directory since the cache was > +// generated. > +static bool checkContentsValidity(uint64_t &ValidityToken) { > + CFStringRef TargetDir = CFStringCreateWithCStringNoCopy( > + kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII, > + kCFAllocatorNull); > + CFArrayRef PathsToWatch = > + CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr); > + CallbackInfo Info; > + FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr}; > + FSEventStreamRef Stream; > + CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait. > + > + // Start at the latest event stored in the cache. > + FSEventStreamEventId StartEvent = ValidityToken; > + // Update the Validity token with the current latest event. > + ValidityToken = FSEventsGetCurrentEventId(); > + > + // Create the stream > + Stream = > + FSEventStreamCreate(NULL, &FSEventsCallback, &Ctx, PathsToWatch, > + StartEvent, Latency, kFSEventStreamCreateFlagNone); > + > + // Associate the stream with the main queue. > + FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue()); > + // Start the stream (needs the queue to run to do anything). > + if (!FSEventStreamStart(Stream)) { > + errs() << "Failed to create FS event stream. " > + << "Considering the cache up-to-date.\n"; > + return true; > + } > + > + // Start the main queue. It will be exited by our callback when it got > + // confirmed it processed all events. > + CFRunLoopRun(); > + > + return !Info.SeenChanges; > +} > + > +#else // __APPLE__ > + > +// There is no cross-platform way to implement a validity check. If this > +// platform doesn't support it, just consider the cache contents always > +// valid. When that's the case, the tool running cache generation needs > +// to have the knowledge to do it only when needed. > +static bool checkContentsValidity(uint64_t &ValidityToken) { return true; } > + > +#endif // __APPLE__ > + > +// Populate Generator with the stat cache data for the filesystem tree > +// rooted at BasePath. > +static std::error_code > +populateHashTable(StringRef BasePath, > + StatCacheFileSystem::StatCacheWriter &Generator) { > + using namespace llvm; > + using namespace sys::fs; > + > + std::error_code ErrorCode; > + > + // Just loop over the target directory using a recursive iterator. > + // This invocation follows symlinks, so we are going to potentially > + // store the status of the same file multiple times with > diff erent > + // names. > + for (recursive_directory_iterator I(BasePath, ErrorCode), E; > + I != E && !ErrorCode; I.increment(ErrorCode)) { > + StringRef Path = I->path(); > + sys::fs::file_status s; > + // This can fail (broken symlink) and leave the file_status with > + // its default values. The reader knows this. > + status(Path, s); > + > + Generator.addEntry(Path, s); > + } > + > + return ErrorCode; > +} > + > +static bool checkCacheValid(int FD, raw_fd_ostream &Out, > + uint64_t &ValidityToken) { > + sys::fs::file_status Status; > + auto EC = sys::fs::status(FD, Status); > + if (EC) { > + llvm::errs() << "fstat failed: " > + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; > + return false; > + } > + > + auto Size = Status.getSize(); > + if (Size == 0) { > + // New file. > +#ifdef __APPLE__ > + // Get the current (global) FSEvent id and use this as ValidityToken. > + ValidityToken = FSEventsGetCurrentEventId(); > +#endif > + return false; > + } > + > + auto ErrorOrBuffer = MemoryBuffer::getOpenFile( > + sys::fs::convertFDToNativeFile(FD), OutputFilename, Status.getSize()); > + > + // Refuse to write to this cache file if it exists but its contents do > + // not look like a valid cache file. > + StringRef BaseDir; > + bool IsCaseSensitive; > + bool VersionMatch; > + if (auto E = StatCacheFileSystem::validateCacheFile( > + (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive, > + VersionMatch, ValidityToken)) { > + llvm::errs() << "The output cache file exists and is not a valid stat " > + "cache."; > + if (!Force) { > + llvm::errs() << " Aborting.\n"; > + exit(1); > + } > + > + consumeError(std::move(E)); > + llvm::errs() << " Forced update.\n"; > + return false; > + } > + > + if (BaseDir != TargetDirectory && > + (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) { > + llvm::errs() << "Existing cache has > diff erent directory. Regenerating...\n"; > + return false; > + } > + > + if (!VersionMatch) { > + llvm::errs() > + << "Exisitng cache has > diff erent version number. Regenerating...\n"; > + return false; > + } > + > + // Basic structure checks have passed. Lets see if we can prove that the > cache > + // contents are still valid. > + bool IsValid = checkContentsValidity(ValidityToken); > + if (IsValid) { > + // The cache is valid, but we might have gotten an updated ValidityToken. > + // Update the cache with it as clang-stat-cache is just going to exit > after > + // returning from this function. > + StatCacheFileSystem::updateValidityToken(Out, ValidityToken); > + } > + return IsValid && !Force; > +} > + > +int main(int argc, char *argv[]) { > + cl::ParseCommandLineOptions(argc, argv); > + > + llvm::SmallString<128> CanonicalDirectory = StringRef(TargetDirectory); > + > + // Remove extraneous separators from the end of the basename. > + while (!CanonicalDirectory.empty() && > + sys::path::is_separator(CanonicalDirectory.back())) > + CanonicalDirectory.pop_back(); > + // Canonicalize separators on Windows > + llvm::sys::path::make_preferred(CanonicalDirectory); > + TargetDirectory = std::string(CanonicalDirectory); > + > + StringRef Dirname(TargetDirectory); > + > + std::error_code EC; > + int FD; > + EC = sys::fs::openFileForReadWrite( > + OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, > llvm::sys::fs::OF_None); > + if (EC) { > + llvm::errs() << "Failed to open cache file: " > + << toString(llvm::createFileError(OutputFilename, EC)) << > "\n"; > + return 1; > + } > + > + raw_fd_ostream Out(FD, /* ShouldClose=*/true); > + > + uint64_t ValidityToken = 0; > + // Check if the cache is valid and up-to-date. > + if (checkCacheValid(FD, Out, ValidityToken)) { > + if (Verbose) > + outs() << "Cache up-to-date, exiting\n"; > + return 0; > + } > + > + if (Verbose) > + outs() << "Building a stat cache for '" << TargetDirectory << "' into '" > + << OutputFilename << "'\n"; > + > + // Do not generate a cache for NFS. Iterating huge directory hierarchies > + // over NFS will be very slow. Better to let the compiler search only the > + // pieces that it needs than use a cache that takes ages to populate. > + bool IsLocal; > + EC = sys::fs::is_local(Dirname, IsLocal); > + if (EC) { > + errs() << "Failed to stat the target directory: " > + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; > + return 1; > + } > + > + if (!IsLocal && !Force) { > + errs() << "Target directory is not a local filesystem. " > + << "Not populating the cache.\n"; > + return 0; > + } > + > + sys::fs::file_status BaseDirStatus; > + if (std::error_code EC = status(Dirname, BaseDirStatus)) { > + errs() << "Failed to stat the target directory: " > + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; > + return 1; > + } > + > + // Check if the filesystem hosting the target directory is case sensitive. > + bool IsCaseSensitive = true; > +#ifdef _PC_CASE_SENSITIVE > + IsCaseSensitive = > + ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1; > +#endif > + StatCacheFileSystem::StatCacheWriter Generator( > + Dirname, BaseDirStatus, IsCaseSensitive, ValidityToken); > + > + // Populate the cache. > + auto startTime = llvm::TimeRecord::getCurrentTime(); > + populateHashTable(Dirname, Generator); > + auto duration = llvm::TimeRecord::getCurrentTime(); > + duration -= startTime; > + > + if (Verbose) > + errs() << "populateHashTable took: " << duration.getWallTime() << "s\n"; > + > + // Write the cache to disk. > + startTime = llvm::TimeRecord::getCurrentTime(); > + int Size = Generator.writeStatCache(Out); > + duration = llvm::TimeRecord::getCurrentTime(); > + duration -= startTime; > + > + if (Verbose) > + errs() << "writeStatCache took: " << duration.getWallTime() << "s\n"; > + > + // We might have opened a pre-exising cache which was bigger. > + llvm::sys::fs::resize_file(FD, Size); > + > + return 0; > +} > > diff --git a/llvm/include/llvm/Support/StatCacheFileSystem.h > b/llvm/include/llvm/Support/StatCacheFileSystem.h > new file mode 100644 > index 0000000000000..cf2e06768acd2 > --- /dev/null > +++ b/llvm/include/llvm/Support/StatCacheFileSystem.h > @@ -0,0 +1,110 @@ > +//===- StatCacheFileSystem.h - Status Caching Proxy File System -*- C++ > -*-===// > +// > +// Part of the LLVM Project, under the Apache License v2.0 with LLVM > Exceptions. > +// See https://llvm.org/LICENSE.txt for license information. > +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception > +// > +//===----------------------------------------------------------------------===// > + > +#ifndef LLVM_SUPPORT_STATCACHEFILESYSTEM_H > +#define LLVM_SUPPORT_STATCACHEFILESYSTEM_H > + > +#include "llvm/Support/VirtualFileSystem.h" > + > +#include <list> > + > +namespace llvm { > +template <typename T> class OnDiskIterableChainedHashTable; > +template <typename T> class OnDiskChainedHashTableGenerator; > + > +namespace vfs { > + > +/// A ProxyFileSystem using cached information for status() rather than > going to > +/// the underlying filesystem. > +/// > +/// When dealing with a huge tree of (mostly) immutable filesystem content > +/// like an SDK, it can be very costly to ask the underlying filesystem for > +/// `stat` data. Even when caching the `stat`s internally, having many > +/// concurrent Clangs accessing the same tree in a similar way causes > +/// contention. As SDK files are mostly immutable, we can pre-compute the > status > +/// information using clang-stat-cache and use that information directly > without > +/// accessing the real filesystem until Clang needs to open a file. This can > +/// speed up module verification and HeaderSearch by significant amounts. > +class StatCacheFileSystem : public llvm::vfs::ProxyFileSystem { > + class StatCacheLookupInfo; > + using StatCacheType = > + llvm::OnDiskIterableChainedHashTable<StatCacheLookupInfo>; > + > + class StatCacheGenerationInfo; > + using StatCacheGeneratorType = > + llvm::OnDiskChainedHashTableGenerator<StatCacheGenerationInfo>; > + > + explicit StatCacheFileSystem(std::unique_ptr<llvm::MemoryBuffer> CacheFile, > + IntrusiveRefCntPtr<FileSystem> FS, > + bool IsCaseSensitive); > + > +public: > + /// Create a StatCacheFileSystem from the passed \a CacheBuffer, a > + /// MemoryBuffer representing the contents of the \a CacheFilename file. > The > + /// returned filesystem will be overlaid on top of \a FS. > + static Expected<IntrusiveRefCntPtr<StatCacheFileSystem>> > + create(std::unique_ptr<llvm::MemoryBuffer> CacheBuffer, > + IntrusiveRefCntPtr<FileSystem> FS); > + > + /// The status override which will consult the cache if \a Path is in the > + /// cached filesystem tree. > + llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; > + > +public: > + /// A helper class to generate stat caches. > + class StatCacheWriter { > + llvm::SmallString<128> BaseDir; > + bool IsCaseSensitive; > + uint64_t ValidityToken; > + StatCacheGeneratorType *Generator; > + std::list<std::string> PathStorage; > + > + public: > + /// Create a StatCacheWriter > + /// > + /// \param BaseDir The base directory for the path. Every filename > passed to > + /// addEntry() needs to start with this base directory. > + /// \param Status The status entry for the base directory. > + /// \param IsCaseSensitive Whether the cache is case sensitive. > + /// \param ValidityToken A 64 bits token that gets embedded in the cache > and > + /// can be used by generator tools to check for the > + /// cache validity in a platform-specific way. > + StatCacheWriter(StringRef BaseDir, const sys::fs::file_status &Status, > + bool IsCaseSensitive, uint64_t ValidityToken = 0); > + ~StatCacheWriter(); > + > + /// Add a cache entry storing \a Status for the file at \a Path. > + void addEntry(StringRef Path, const sys::fs::file_status &Status); > + > + /// Write the cache file to \a Out. > + size_t writeStatCache(raw_fd_ostream &Out); > + }; > + > +public: > + /// Validate that the file content in \a Buffer is a valid stat cache file. > + /// \a BaseDir, \a IsCaseSensitive and \a ValidityToken are output > parameters > + /// that get populated by this call. > + static Error validateCacheFile(llvm::MemoryBufferRef Buffer, > + StringRef &BaseDir, bool &IsCaseSensitive, > + bool &VersionMatch, uint64_t > &ValidityToken); > + > + /// Update the ValidityToken data in \a CacheFile. > + static void updateValidityToken(raw_fd_ostream &CacheFile, > + uint64_t ValidityToken); > + > +private: > + std::unique_ptr<llvm::MemoryBuffer> StatCacheFile; > + llvm::StringRef StatCachePrefix; > + std::unique_ptr<StatCacheType> StatCache; > + bool IsCaseSensitive = true; > +}; > + > +} // namespace vfs > +} // namespace llvm > + > +#endif // LLVM_SUPPORT_STATCACHEFILESYSTEM_H > > diff --git a/llvm/lib/Support/CMakeLists.txt > b/llvm/lib/Support/CMakeLists.txt > index 9b5402fa54f0f..16531c4ec78d2 100644 > --- a/llvm/lib/Support/CMakeLists.txt > +++ b/llvm/lib/Support/CMakeLists.txt > @@ -212,6 +212,7 @@ add_llvm_component_library(LLVMSupport > SmallVector.cpp > SourceMgr.cpp > SpecialCaseList.cpp > + StatCacheFileSystem.cpp > Statistic.cpp > StringExtras.cpp > StringMap.cpp > > diff --git a/llvm/lib/Support/StatCacheFileSystem.cpp > b/llvm/lib/Support/StatCacheFileSystem.cpp > new file mode 100644 > index 0000000000000..96fd32bf5082c > --- /dev/null > +++ b/llvm/lib/Support/StatCacheFileSystem.cpp > @@ -0,0 +1,306 @@ > +//===- StatCacheFileSystem.cpp - Status Caching Proxy File System > ---------===// > +// > +// Part of the LLVM Project, under the Apache License v2.0 with LLVM > Exceptions. > +// See https://llvm.org/LICENSE.txt for license information. > +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception > +// > +//===----------------------------------------------------------------------===// > + > +#include "llvm/Support/StatCacheFileSystem.h" > + > +#include "llvm/ADT/IntrusiveRefCntPtr.h" > +#include "llvm/Support/ErrorOr.h" > +#include "llvm/Support/OnDiskHashTable.h" > + > +namespace llvm { > +namespace vfs { > + > +class StatCacheFileSystem::StatCacheLookupInfo { > +public: > + typedef StringRef external_key_type; > + typedef StringRef internal_key_type; > + typedef llvm::sys::fs::file_status data_type; > + typedef uint32_t hash_value_type; > + typedef uint32_t offset_type; > + > + static bool EqualKey(const internal_key_type &a, const internal_key_type > &b) { > + return a == b; > + } > + > + static hash_value_type ComputeHash(const internal_key_type &a) { > + return hash_value(a); > + } > + > + static std::pair<unsigned, unsigned> > + ReadKeyDataLength(const unsigned char *&d) { > + using namespace llvm::support; > + unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d); > + unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d); > + return std::make_pair(KeyLen, DataLen); > + } > + > + static const internal_key_type &GetInternalKey(const external_key_type &x) > { > + return x; > + } > + > + static const external_key_type &GetExternalKey(const internal_key_type &x) > { > + return x; > + } > + > + static internal_key_type ReadKey(const unsigned char *d, unsigned n) { > + return StringRef((const char *)d, n); > + } > + > + static data_type ReadData(const internal_key_type &k, const unsigned char > *d, > + unsigned DataLen) { > + data_type Result; > + memcpy(&Result, d, sizeof(Result)); > + return Result; > + } > +}; > + > +class StatCacheFileSystem::StatCacheGenerationInfo { > +public: > + typedef StringRef key_type; > + typedef const StringRef &key_type_ref; > + typedef sys::fs::file_status data_type; > + typedef const sys::fs::file_status &data_type_ref; > + typedef uint32_t hash_value_type; > + typedef uint32_t offset_type; > + > + /// Calculate the hash for Key > + static hash_value_type ComputeHash(key_type_ref Key) { > + return static_cast<size_t>(hash_value(Key)); > + } > + > + /// Return the lengths, in bytes, of the given Key/Data pair. > + static std::pair<unsigned, unsigned> > + EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data) { > + using namespace llvm::support; > + endian::Writer LE(Out, little); > + unsigned KeyLen = Key.size(); > + unsigned DataLen = sizeof(Data); > + LE.write<uint16_t>(KeyLen); > + LE.write<uint16_t>(DataLen); > + return std::make_pair(KeyLen, DataLen); > + } > + > + static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen) { > + Out.write(Key.data(), KeyLen); > + } > + > + /// Write Data to Out. DataLen is the length from EmitKeyDataLength. > + static void EmitData(raw_ostream &Out, key_type_ref Key, data_type_ref > Data, > + unsigned Len) { > + Out.write((const char *)&Data, Len); > + } > + > + static bool EqualKey(key_type_ref Key1, key_type_ref Key2) { > + return Key1 == Key2; > + } > +}; > + > +// The format of the stat cache is (pseudo-code): > +// struct stat_cache { > +// char Magic[4]; // "STAT" or "Stat" > +// uint32_t BucketOffset; // See BucketOffset in OnDiskHashTable.h > +// uint64_t ValidityToken; // Platofrm specific data allowing to check > +// // whether the cache is up-to-date. > +// uint32_t Version; // The stat cache format version. > +// char BaseDir[N]; // Zero terminated path to the base directory > +// < OnDiskHashtable Data > // Data for the has table. The keys are the > +// // relative paths under BaseDir. The data is > +// // llvm::sys::fs::file_status structures. > +// }; > + > +#define MAGIC_CASE_SENSITIVE "Stat" > +#define MAGIC_CASE_INSENSITIVE "STAT" > +#define STAT_CACHE_VERSION 1 > + > +namespace { > +struct StatCacheHeader { > + char Magic[4]; > + uint32_t BucketOffset; > + uint64_t ValidityToken; > + uint32_t Version; > + char BaseDir[1]; > +}; > +} // namespace > + > +StatCacheFileSystem::StatCacheFileSystem( > + std::unique_ptr<MemoryBuffer> CacheFile, IntrusiveRefCntPtr<FileSystem> > FS, > + bool IsCaseSensitive) > + : ProxyFileSystem(std::move(FS)), StatCacheFile(std::move(CacheFile)), > + IsCaseSensitive(IsCaseSensitive) { > + const char *CacheFileStart = StatCacheFile->getBufferStart(); > + auto *Header = reinterpret_cast<const StatCacheHeader *>(CacheFileStart); > + > + uint32_t BucketOffset = Header->BucketOffset; > + StatCachePrefix = StringRef(Header->BaseDir); > + // HashTableStart points at the beginning of the data emitted by the > + // OnDiskHashTable. > + const unsigned char *HashTableStart = (const unsigned char > *)CacheFileStart + > + StatCachePrefix.size() + > + sizeof(StatCacheHeader); > + StatCache.reset(StatCacheType::Create( > + (const unsigned char *)CacheFileStart + BucketOffset, HashTableStart, > + (const unsigned char *)CacheFileStart)); > +} > + > +Expected<IntrusiveRefCntPtr<StatCacheFileSystem>> > +StatCacheFileSystem::create(std::unique_ptr<MemoryBuffer> CacheBuffer, > + IntrusiveRefCntPtr<FileSystem> FS) { > + StringRef BaseDir; > + bool IsCaseSensitive; > + bool VersionMatch; > + uint64_t ValidityToken; > + if (auto E = validateCacheFile(*CacheBuffer, BaseDir, IsCaseSensitive, > + VersionMatch, ValidityToken)) > + return E; > + if (!VersionMatch) { > + return createStringError(inconvertibleErrorCode(), > + CacheBuffer->getBufferIdentifier() + > + ": Mismatched cache file version"); > + } > + return new StatCacheFileSystem(std::move(CacheBuffer), FS, > IsCaseSensitive); > +} > + > +ErrorOr<Status> StatCacheFileSystem::status(const Twine &Path) { > + SmallString<180> StringPath; > + Path.toVector(StringPath); > + // If the cache is not case sensitive, do all operations on lower-cased > paths. > + if (!IsCaseSensitive) > + std::transform(StringPath.begin(), StringPath.end(), StringPath.begin(), > + toLower); > + > + // Canonicalize the path. This removes single dot path components, > + // but it also gets rid of repeating separators. > + llvm::sys::path::remove_dots(StringPath); > + > + // If on Windows, canonicalize separators. > + llvm::sys::path::make_preferred(StringPath); > + > + // Check if the requested path falls into the cache. > + StringRef SuffixPath(StringPath); > + if (!SuffixPath.consume_front(StatCachePrefix)) > + return ProxyFileSystem::status(Path); > + > + auto It = StatCache->find(SuffixPath); > + if (It == StatCache->end()) { > + // We didn't find the file in the cache even though it started with the > + // cache prefix. It could be that the file doesn't exist, or the spelling > + // the path is > diff erent. `remove_dots` canonicalizes the path by removing > + // `.` and excess separators, but leaves `..` since it isn't semantically > + // preserving to remove them in the presence of symlinks. If the path > + // does not contain '..' we can safely say it doesn't exist. > + if (std::find(sys::path::begin(SuffixPath), sys::path::end(SuffixPath), > + "..") == sys::path::end(SuffixPath)) { > + return llvm::errc::no_such_file_or_directory; > + } > + return ProxyFileSystem::status(Path); > + } > + > + // clang-stat-cache will record entries for broken symlnks with a default- > + // constructed Status. This will have a default-constructed UinqueID. > + if ((*It).getUniqueID() == llvm::sys::fs::UniqueID()) > + return llvm::errc::no_such_file_or_directory; > + > + return llvm::vfs::Status::copyWithNewName(*It, Path); > +} > + > +StatCacheFileSystem::StatCacheWriter::StatCacheWriter( > + StringRef BaseDir, const sys::fs::file_status &Status, bool > IsCaseSensitive, > + uint64_t ValidityToken) > + : BaseDir(IsCaseSensitive ? BaseDir.str() : BaseDir.lower()), > + IsCaseSensitive(IsCaseSensitive), ValidityToken(ValidityToken), > + Generator(new StatCacheGeneratorType()) { > + addEntry(BaseDir, Status); > + // If on Windows, canonicalize separators. > + llvm::sys::path::make_preferred(this->BaseDir); > +} > + > +StatCacheFileSystem::StatCacheWriter::~StatCacheWriter() { delete Generator; > } > + > +void StatCacheFileSystem::StatCacheWriter::addEntry( > + StringRef Path, const sys::fs::file_status &Status) { > + llvm::SmallString<128> StoredPath; > + > +#if defined(_WIN32) > + StoredPath = Path; > + llvm::sys::path::make_preferred(StoredPath); > + Path = StoredPath; > +#endif > + > + if (!IsCaseSensitive) { > + StoredPath = Path.lower(); > + Path = StoredPath; > + } > + > + LLVM_ATTRIBUTE_UNUSED bool Consumed = Path.consume_front(BaseDir); > + assert(Consumed && "Path does not start with expected prefix."); > + > + PathStorage.emplace_back(Path.str()); > + Generator->insert(PathStorage.back(), Status); > +} > + > +size_t > +StatCacheFileSystem::StatCacheWriter::writeStatCache(raw_fd_ostream &Out) { > + const uint32_t Version = STAT_CACHE_VERSION; > + // Magic value. > + if (IsCaseSensitive) > + Out.write(MAGIC_CASE_SENSITIVE, 4); > + else > + Out.write(MAGIC_CASE_INSENSITIVE, 4); > + // Placeholder for BucketOffset, filled in below. > + Out.write("\0\0\0\0", 4); > + // Write out the validity token. > + Out.write((const char *)&ValidityToken, sizeof(ValidityToken)); > + // Write out the version. > + Out.write((const char *)&Version, sizeof(Version)); > + // Write out the base directory for the cache. > + Out.write(BaseDir.c_str(), BaseDir.size() + 1); > + // Write out the hashtable data. > + uint32_t BucketOffset = Generator->Emit(Out); > + int Size = Out.tell(); > + // Move back to right after the Magic to insert BucketOffset > + Out.seek(4); > + Out.write((const char *)&BucketOffset, sizeof(BucketOffset)); > + return Size; > +} > + > +Error StatCacheFileSystem::validateCacheFile(MemoryBufferRef Buffer, > + StringRef &BaseDir, > + bool &IsCaseSensitive, > + bool &VersionMatch, > + uint64_t &ValidityToken) { > + auto *Header = > + reinterpret_cast<const StatCacheHeader *>(Buffer.getBufferStart()); > + if (Buffer.getBufferSize() < sizeof(StatCacheHeader) || > + (memcmp(Header->Magic, MAGIC_CASE_INSENSITIVE, sizeof(Header->Magic)) > && > + memcmp(Header->Magic, MAGIC_CASE_SENSITIVE, sizeof(Header->Magic))) || > + Header->BucketOffset > Buffer.getBufferSize()) > + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); > + > + auto PathLen = > + strnlen(Header->BaseDir, > + Buffer.getBufferSize() - offsetof(StatCacheHeader, BaseDir)); > + if (Header->BaseDir[PathLen] != 0) > + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); > + > + IsCaseSensitive = Header->Magic[1] == MAGIC_CASE_SENSITIVE[1]; > + VersionMatch = Header->Version == STAT_CACHE_VERSION; > + BaseDir = StringRef(Header->BaseDir, PathLen); > + ValidityToken = Header->ValidityToken; > + > + return ErrorSuccess(); > +} > + > +void StatCacheFileSystem::updateValidityToken(raw_fd_ostream &CacheFile, > + uint64_t ValidityToken) { > + CacheFile.pwrite(reinterpret_cast<char *>(&ValidityToken), > + sizeof(ValidityToken), > + offsetof(StatCacheHeader, ValidityToken)); > +} > + > +} // namespace vfs > +} // namespace llvm > > diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp > b/llvm/unittests/Support/VirtualFileSystemTest.cpp > index 242bb76865b2c..89fd0aac17f2f 100644 > --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp > +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp > @@ -14,9 +14,11 @@ > #include "llvm/Support/MemoryBuffer.h" > #include "llvm/Support/Path.h" > #include "llvm/Support/SourceMgr.h" > +#include "llvm/Support/StatCacheFileSystem.h" > #include "llvm/Testing/Support/SupportHelpers.h" > #include "gmock/gmock.h" > #include "gtest/gtest.h" > +#include <list> > #include <map> > #include <string> > > @@ -3228,3 +3230,306 @@ TEST(RedirectingFileSystemTest, PrintOutput) { > " DummyFileSystem (RecursiveContents)\n", > Output); > } > + > +class StatCacheFileSystemTest : public ::testing::Test { > +public: > + void SetUp() override {} > + > + template <typename StringCollection> > + void createStatCacheFileSystem( > + StringRef OutputFile, StringRef BaseDir, bool IsCaseSensitive, > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result, > + StringCollection &Filenames, > + IntrusiveRefCntPtr<vfs::FileSystem> Lower = new ErrorDummyFileSystem(), > + uint64_t ValidityToken = 0) { > + sys::fs::file_status s; > + status(BaseDir, s); > + vfs::StatCacheFileSystem::StatCacheWriter Generator( > + BaseDir, s, IsCaseSensitive, ValidityToken); > + std::error_code ErrorCode; > + > + Result.reset(); > + > + // Base path should be present in the stat cache. > + Filenames.push_back(std::string(BaseDir)); > + > + for (sys::fs::recursive_directory_iterator I(BaseDir, ErrorCode), E; > + I != E && !ErrorCode; I.increment(ErrorCode)) { > + Filenames.push_back(I->path()); > + StringRef Path(Filenames.back().c_str()); > + status(Path, s); > + Generator.addEntry(Path, s); > + } > + > + { > + raw_fd_ostream StatCacheFile(OutputFile, ErrorCode); > + ASSERT_FALSE(ErrorCode); > + Generator.writeStatCache(StatCacheFile); > + } > + > + loadCacheFile(OutputFile, ValidityToken, Lower, Result); > + } > + > + void loadCacheFile(StringRef OutputFile, uint64_t ExpectedValidityToken, > + IntrusiveRefCntPtr<vfs::FileSystem> Lower, > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result) { > + auto ErrorOrBuffer = MemoryBuffer::getFile(OutputFile); > + EXPECT_TRUE(ErrorOrBuffer); > + StringRef CacheBaseDir; > + bool IsCaseSensitive; > + bool VersionMatch; > + uint64_t FileValidityToken; > + auto E = vfs::StatCacheFileSystem::validateCacheFile( > + (*ErrorOrBuffer)->getMemBufferRef(), CacheBaseDir, IsCaseSensitive, > + VersionMatch, FileValidityToken); > + ASSERT_FALSE(E); > + EXPECT_TRUE(VersionMatch); > + EXPECT_EQ(FileValidityToken, ExpectedValidityToken); > + auto ExpectedCache = > + vfs::StatCacheFileSystem::create(std::move(*ErrorOrBuffer), Lower); > + ASSERT_FALSE(ExpectedCache.takeError()); > + Result = *ExpectedCache; > + } > + > + template <typename StringCollection> > + void > + compareStatCacheToRealFS(IntrusiveRefCntPtr<vfs::StatCacheFileSystem> > CacheFS, > + const StringCollection &Files) { > + IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem(); > + > + for (auto &File : Files) { > + auto ErrorOrStatus1 = RealFS->status(File); > + auto ErrorOrStatus2 = CacheFS->status(File); > + > + EXPECT_EQ((bool)ErrorOrStatus1, (bool)ErrorOrStatus2); > + if (!ErrorOrStatus1 || !ErrorOrStatus2) > + continue; > + > + vfs::Status s1 = *ErrorOrStatus1, s2 = *ErrorOrStatus2; > + EXPECT_EQ(s1.getName(), s2.getName()); > + EXPECT_EQ(s1.getType(), s2.getType()); > + EXPECT_EQ(s1.getPermissions(), s2.getPermissions()); > + EXPECT_EQ(s1.getLastModificationTime(), s2.getLastModificationTime()); > + EXPECT_EQ(s1.getUniqueID(), s2.getUniqueID()); > + EXPECT_EQ(s1.getUser(), s2.getUser()); > + EXPECT_EQ(s1.getGroup(), s2.getGroup()); > + EXPECT_EQ(s1.getSize(), s2.getSize()); > + } > + } > +}; > + > +TEST_F(StatCacheFileSystemTest, Basic) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempFile _ab(TestDirectory.path("a/b")); > + TempDir _ac(TestDirectory.path("a/c")); > + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); > + TempFile _ace(TestDirectory.path("a/c/e")); > + TempFile _acf(TestDirectory.path("a/c/f"), "", "More dummy contents"); > + TempDir _ag(TestDirectory.path("a/g")); > + TempFile _agh(TestDirectory.path("a/g/h")); > + > + StringRef BaseDir(_a.path()); > + > + SmallVector<std::string, 10> Filenames; > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > + Filenames); > + ASSERT_TRUE(StatCacheFS); > + compareStatCacheToRealFS(StatCacheFS, Filenames); > +} > + > +TEST_F(StatCacheFileSystemTest, CaseSensitivity) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempDir _ac(TestDirectory.path("a/c")); > + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); > + TempDir _b(TestDirectory.path("B")); > + TempDir _bc(TestDirectory.path("B/c")); > + TempFile _bcd(TestDirectory.path("B/c/D"), "", "Dummy contents"); > + > + StringRef BaseDir(TestDirectory.path()); > + SmallVector<std::string, 10> Filenames; > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > + Filenames); > + ASSERT_TRUE(StatCacheFS); > + > + auto ErrorOrStatus = StatCacheFS->status(_acd.path()); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(_bcd.path()); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); > + EXPECT_FALSE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); > + EXPECT_FALSE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); > + EXPECT_FALSE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); > + EXPECT_FALSE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); > + EXPECT_FALSE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); > + EXPECT_FALSE(ErrorOrStatus); > + > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ false, StatCacheFS, > + Filenames); > + ASSERT_TRUE(StatCacheFS); > + ErrorOrStatus = StatCacheFS->status(_acd.path()); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(_bcd.path()); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); > + EXPECT_TRUE(ErrorOrStatus); > + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); > + EXPECT_TRUE(ErrorOrStatus); > +} > + > +TEST_F(StatCacheFileSystemTest, DotDot) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempDir _ab(TestDirectory.path("a/b")); > + TempFile _abd(TestDirectory.path("a/b/d")); > + TempDir _ac(TestDirectory.path("a/c")); > + TempFile _acd(TestDirectory.path("a/c/d")); > + > + StringRef BaseDir(_a.path()); > + SmallVector<std::string, 10> Filenames; > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + auto RealFS = vfs::getRealFileSystem(); > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > Filenames, > + RealFS); > + ASSERT_TRUE(StatCacheFS); > + > + // Create a file in the cached prefix after the cache was created. > + TempFile _abe(TestDirectory.path("a/b/e")); > + // Verify the cache is kicking in. > + ASSERT_FALSE(StatCacheFS->status(_abe.path())); > + // We can access the new file using a ".." because the StatCache will > + // just pass that request to the FileSystem below it. > + const SmallString<128> PathsToTest[] = { > + TestDirectory.path("a/b/../e"), > + TestDirectory.path("a/b/../c/d"), > + TestDirectory.path("a/b/.."), > + }; > + compareStatCacheToRealFS(StatCacheFS, PathsToTest); > +} > + > +#ifdef LLVM_ON_UNIX > +TEST_F(StatCacheFileSystemTest, Links) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempLink _ab("d", TestDirectory.path("a/b")); > + TempFile _ac(TestDirectory.path("a/c")); > + TempDir _ad(TestDirectory.path("a/d")); > + TempFile _add(TestDirectory.path("a/d/d"), "", "Dummy contents"); > + TempFile _ade(TestDirectory.path("a/d/e")); > + TempFile _adf(TestDirectory.path("a/d/f"), "", "More dummy contents"); > + TempLink _adg(_ad.path(), TestDirectory.path("a/d/g")); > + TempDir _ah(TestDirectory.path("a/h")); > + TempLink _ahi(_ad.path(), TestDirectory.path("a/h/i")); > + TempLink _ahj("no_such_file", TestDirectory.path("a/h/j")); > + > + StringRef BaseDir(_a.path()); > + > + SmallVector<std::string, 10> Filenames; > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > + Filenames); > + ASSERT_TRUE(StatCacheFS); > + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), > + TestDirectory.path("a/d/g/g")), > + Filenames.end()); > + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), > + TestDirectory.path("a/b/e")), > + Filenames.end()); > + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), > + TestDirectory.path("a/h/i/f")), > + Filenames.end()); > + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), > + TestDirectory.path("a/h/j")), > + Filenames.end()); > + compareStatCacheToRealFS(StatCacheFS, Filenames); > + > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > Filenames, > + vfs::getRealFileSystem()); > + const SmallString<128> PathsToTest[] = { > + TestDirectory.path("a/h/i/../c"), > + TestDirectory.path("a/b/../d"), > + TestDirectory.path("a/g/g/../c"), > + TestDirectory.path("a/b/.."), > + }; > + compareStatCacheToRealFS(StatCacheFS, PathsToTest); > +} > +#endif > + > +TEST_F(StatCacheFileSystemTest, Canonical) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempFile _ab(TestDirectory.path("a/b")); > + TempDir _ac(TestDirectory.path("a/c")); > + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); > + > + StringRef BaseDir(_a.path()); > + SmallVector<std::string, 10> Filenames; > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > + Filenames); > + ASSERT_TRUE(StatCacheFS); > + > + const SmallString<128> PathsToTest[] = { > + TestDirectory.path("./a/b"), TestDirectory.path("a//./b"), > + TestDirectory.path("a///b"), TestDirectory.path("a//c//d"), > + TestDirectory.path("a//c/./d"), TestDirectory.path("a/./././b"), > + TestDirectory.path("a/.//.//.//b"), > + }; > + compareStatCacheToRealFS(StatCacheFS, PathsToTest); > +} > + > +TEST_F(StatCacheFileSystemTest, ValidityToken) { > + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); > + TempDir _a(TestDirectory.path("a")); > + TempFile _ab(TestDirectory.path("a/b")); > + TempDir _ac(TestDirectory.path("a/c")); > + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); > + > + StringRef BaseDir(_a.path()); > + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; > + { > + SmallVector<std::string, 10> Filenames; > + uint64_t ValidityToken = 0x1234567890abcfef; > + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, > + /* IsCaseSensitive= */ true, StatCacheFS, > + Filenames, new DummyFileSystem(), > ValidityToken); > + ASSERT_TRUE(StatCacheFS); > + } > + > + uint64_t UpdatedValidityToken = 0xabcdef0123456789; > + { > + std::error_code EC; > + raw_fd_ostream CacheFile(TestDirectory.path("stat.cache"), EC, > + sys::fs::CD_OpenAlways); > + ASSERT_FALSE(EC); > + vfs::StatCacheFileSystem::updateValidityToken(CacheFile, > + UpdatedValidityToken); > + } > + > + loadCacheFile(TestDirectory.path("stat.cache"), UpdatedValidityToken, > + new DummyFileSystem(), StatCacheFS); > + EXPECT_TRUE(StatCacheFS); > +} > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits