sammccall created this revision.
sammccall added a reviewer: kbobyrev.
Herald added subscribers: cfe-commits, usaxena95, kadircet, jfb, arphaman, 
mgorny.
Herald added a project: clang.
sammccall requested review of this revision.
Herald added subscribers: MaskRay, ilya-biryukov.

The plan is to use this to use this for .clang-format, .clang-tidy, and
compile_commands.json. (Currently the former two are reparsed every
time, and the latter is cached forever and changes are never seen).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88172

Files:
  clang-tools-extra/clangd/ConfigProvider.cpp
  clang-tools-extra/clangd/ConfigProvider.h
  clang-tools-extra/clangd/support/CMakeLists.txt
  clang-tools-extra/clangd/support/FileCache.cpp
  clang-tools-extra/clangd/support/FileCache.h
  clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
  clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp

Index: clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
@@ -0,0 +1,83 @@
+//===-- FileCacheTests.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+#include "TestFS.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <atomic>
+#include <chrono>
+
+namespace clang {
+namespace clangd {
+namespace config {
+namespace {
+
+class TestCache : public FileCache {
+  MockFS FS;
+  mutable std::string Value;
+
+public:
+  TestCache() : FileCache(testPath("foo.cc")) {}
+
+  void setContents(const char *C) {
+    if (C)
+      FS.Files[testPath("foo.cc")] = C;
+    else
+      FS.Files.erase(testPath("foo.cc"));
+  }
+
+  std::string get(std::chrono::steady_clock::time_point FreshTime,
+                  bool ExpectParse) const {
+    bool GotParse = false;
+    bool GotRead;
+    std::string Result;
+    read(
+        FS, FreshTime,
+        [&](llvm::Optional<llvm::StringRef> Data) {
+          GotParse = true;
+          Value = Data.getValueOr("").str();
+        },
+        [&]() {
+          GotRead = true;
+          Result = Value;
+        });
+    EXPECT_EQ(GotParse, ExpectParse);
+    EXPECT_TRUE(GotRead);
+    return Result;
+  }
+};
+
+TEST(FileCacheTest, Invalidation) {
+  TestCache C;
+
+  auto StaleOK = std::chrono::steady_clock::now();
+  auto MustBeFresh = StaleOK + std::chrono::hours(1);
+
+  C.setContents("a");
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time";
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)";
+  C.setContents("bb");
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)";
+  C.setContents(nullptr);
+  EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed";
+  EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)";
+  C.setContents("bb"); // Match the previous stat values!
+  EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+}
+
+} // namespace
+} // namespace config
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
+++ clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
@@ -150,6 +150,7 @@
   EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
 }
 
+// FIXME: delete this test, it's covered by FileCacheTests.
 TEST(ProviderTest, Staleness) {
   MockFS FS;
 
Index: clang-tools-extra/clangd/support/FileCache.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/support/FileCache.h
@@ -0,0 +1,77 @@
+//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+
+#include "ThreadsafeFS.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// Base class for threadsafe cache of data read from a file on disk.
+///
+/// We want configuration files to be "live" as much as possible.
+/// Reading them every time is simplest, but caching solves a few problems:
+///  - reading and parsing is cheap but not free (and happens on hot paths)
+///  - we can ignore invalid data and use the old value (we may see truncated
+///    compile_commands.json from non-atomic writers)
+///  - we avoid reporting the same errors repeatedly
+///
+/// We still read and parse the data synchronously on demand, but skip as much
+/// work as possible:
+///  - if not enough wall-time has elapsed, assume the data is still up-to-date
+///  - if we stat the file and it has the same mtime + size, don't read it
+///  - obviously we only have to parse when we re-read the file
+/// (Tracking OS change events is an alternative, but difficult to do portably.)
+///
+/// Caches for particular data (e.g. compilation databases) should inherit and:
+///  - add mutable storage for the cached parsed data
+///  - add a public interface implemented on top of read()
+class FileCache {
+protected:
+  // Path must be absolute.
+  FileCache(llvm::StringRef Path);
+
+  // Updates the cached value if needed, then provides threadsafe access to it.
+  //
+  // Specifically:
+  // - Parse() may be called (if the cache was not up-to-date)
+  //   The lock is held, so cache storage may be safely written.
+  //   Parse(None) means the file doesn't exist.
+  // - Read() will always be called, to provide access to the value.
+  //   The lock is again held, so the value can be copied or used.
+  //
+  // If the last Parse is newer than FreshTime, we don't check metadata.
+  //   - time_point::min() means we only do IO if we never read the file before
+  //   - time_point::max() means we always at least stat the file
+  void read(const ThreadsafeFS &TFS,
+            std::chrono::steady_clock::time_point FreshTime,
+            llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+            llvm::function_ref<void()> Read) const;
+
+  llvm::StringRef path() const { return Path; }
+
+private:
+  std::string Path;
+  // Members are mutable so read() can present a const interface.
+  // (It is threadsafe and approximates read-through to TFS).
+  mutable std::mutex Mu;
+  mutable std::chrono::steady_clock::time_point ValidTime;
+  mutable llvm::sys::TimePoint<> MTime;
+  mutable uint64_t Size;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/support/FileCache.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/support/FileCache.cpp
@@ -0,0 +1,78 @@
+//===--- FileCache.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+namespace clang {
+namespace clangd {
+
+// Sentinel values for the Size cache key. In both cases, a successful stat of
+// the file will never result in the cached value being reused.
+
+// The cached value does not reflect the current content on disk.
+static constexpr uint64_t CacheDiskMismatch = -1;
+// The cached value reflects that the file doesn't exist.
+static constexpr uint64_t FileNotFound = -2;
+
+FileCache::FileCache(llvm::StringRef Path)
+    : Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()),
+      MTime(), Size(CacheDiskMismatch) {
+  assert(llvm::sys::path::is_absolute(Path));
+}
+
+void FileCache::read(
+    const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime,
+    llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+    llvm::function_ref<void()> Read) const {
+
+  std::lock_guard<std::mutex> Lock(Mu);
+  // We're going to update the cache and return whatever's in it.
+  auto Return = llvm::make_scope_exit(Read);
+
+  // Return any sufficiently recent result without doing any further work.
+  if (ValidTime > FreshTime)
+    return;
+
+  // Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on
+  // how often we do IO.
+  auto BumpValidTime = llvm::make_scope_exit(
+      [&] { ValidTime = std::chrono::steady_clock::now(); });
+
+  // stat is cheaper than opening the file. It's usually unchanged.
+  assert(llvm::sys::path::is_absolute(Path));
+  auto FS = TFS.view(/*CWD=*/llvm::None);
+  auto Stat = FS->status(Path);
+  if (!Stat || !Stat->isRegularFile()) {
+    if (Size != FileNotFound) // Allow "not found" value to be cached.
+      Parse(llvm::None);
+    // Ensure the cache key won't match any future stat().
+    Size = FileNotFound;
+    return;
+  }
+  // If the modified-time and size match, assume the content does too.
+  if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
+    return;
+
+  // OK, the file has actually changed. Update cache key, compute new value.
+  Size = Stat->getSize();
+  MTime = Stat->getLastModificationTime();
+  // Now read the file from disk.
+  if (auto Buf = FS->getBufferForFile(Path)) {
+    Parse(Buf->get()->getBuffer());
+    // Result is cacheable if the actual read size matches the new cache key.
+    // (We can't update the cache key, because we don't know the new mtime).
+    if (Buf->get()->getBufferSize() != Size)
+      Size = CacheDiskMismatch;
+  } else {
+    // File was unreadable. Keep the old value and try again next time.
+    Size = CacheDiskMismatch;
+  }
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/support/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/support/CMakeLists.txt
+++ clang-tools-extra/clangd/support/CMakeLists.txt
@@ -19,6 +19,7 @@
 add_clang_library(clangdSupport
   Cancellation.cpp
   Context.cpp
+  FileCache.cpp
   Logger.cpp
   Markup.cpp
   Shutdown.cpp
Index: clang-tools-extra/clangd/ConfigProvider.h
===================================================================
--- clang-tools-extra/clangd/ConfigProvider.h
+++ clang-tools-extra/clangd/ConfigProvider.h
@@ -37,8 +37,9 @@
   llvm::StringRef Path;
   /// Hint that stale data is OK to improve performance (e.g. avoid IO).
   /// FreshTime sets a bound for how old the data can be.
-  /// If not set, providers should validate caches against the data source.
-  llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
+  /// By default, providers should validate caches against the data source.
+  std::chrono::steady_clock::time_point FreshTime =
+      std::chrono::steady_clock::time_point::max();
 };
 
 /// Used to report problems in parsing or interpreting a config.
Index: clang-tools-extra/clangd/ConfigProvider.cpp
===================================================================
--- clang-tools-extra/clangd/ConfigProvider.cpp
+++ clang-tools-extra/clangd/ConfigProvider.cpp
@@ -9,6 +9,7 @@
 #include "ConfigProvider.h"
 #include "Config.h"
 #include "ConfigFragment.h"
+#include "support/FileCache.h"
 #include "support/ThreadsafeFS.h"
 #include "support/Trace.h"
 #include "llvm/ADT/ScopeExit.h"
@@ -22,85 +23,24 @@
 namespace config {
 
 // Threadsafe cache around reading a YAML config file from disk.
-class FileConfigCache {
-  std::mutex Mu;
-  std::chrono::steady_clock::time_point ValidTime = {};
-  llvm::SmallVector<CompiledFragment, 1> CachedValue;
-  llvm::sys::TimePoint<> MTime = {};
-  unsigned Size = -1;
-
-  // Called once we are sure we want to read the file.
-  // REQUIRES: Cache keys are set. Mutex must be held.
-  void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
-    CachedValue.clear();
-
-    auto Buf = FS.getBufferForFile(Path);
-    // If we failed to read (but stat succeeded), don't cache failure.
-    if (!Buf) {
-      Size = -1;
-      MTime = {};
-      return;
-    }
-
-    // If file changed between stat and open, we don't know its mtime.
-    // For simplicity, don't cache the value in this case (use a bad key).
-    if (Buf->get()->getBufferSize() != Size) {
-      Size = -1;
-      MTime = {};
-    }
-
-    // Finally parse and compile the actual fragments.
-    for (auto &Fragment :
-         Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC))
-      CachedValue.push_back(std::move(Fragment).compile(DC));
-  }
+class FileConfigCache : public FileCache {
+  mutable llvm::SmallVector<CompiledFragment, 1> CachedValue;
 
 public:
-  // Must be set before the cache is used. Not a constructor param to allow
-  // computing ancestor-relative paths to be deferred.
-  std::string Path;
-
-  // Retrieves up-to-date config fragments from disk.
-  // A cached result may be reused if the mtime and size are unchanged.
-  // (But several concurrent read()s can miss the cache after a single change).
-  // Future performance ideas:
-  // - allow caches to be reused based on short elapsed walltime
-  // - allow latency-sensitive operations to skip revalidating the cache
-  void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
-            llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
-            std::vector<CompiledFragment> &Out) {
-    std::lock_guard<std::mutex> Lock(Mu);
-    // We're going to update the cache and return whatever's in it.
-    auto Return = llvm::make_scope_exit(
-        [&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
-
-    // Return any sufficiently recent result without doing any further work.
-    if (FreshTime && ValidTime >= FreshTime)
-      return;
-
-    // Ensure we bump the ValidTime at the end to allow for reuse.
-    auto MarkTime = llvm::make_scope_exit(
-        [&] { ValidTime = std::chrono::steady_clock::now(); });
-
-    // Stat is cheaper than opening the file, it's usually unchanged.
-    assert(llvm::sys::path::is_absolute(Path));
-    auto FS = TFS.view(/*CWD=*/llvm::None);
-    auto Stat = FS->status(Path);
-    // If there's no file, the result is empty. Ensure we have an invalid key.
-    if (!Stat || !Stat->isRegularFile()) {
-      MTime = {};
-      Size = -1;
-      CachedValue.clear();
-      return;
-    }
-    // If the modified-time and size match, assume the content does too.
-    if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
-      return;
-
-    // OK, the file has actually changed. Update cache key, compute new value.
-    Size = Stat->getSize();
-    MTime = Stat->getLastModificationTime();
-    fillCacheFromDisk(*FS, DC);
+  FileConfigCache(llvm::StringRef Path) : FileCache(Path) {}
+
+  void get(const ThreadsafeFS &TFS, DiagnosticCallback DC,
+           std::chrono::steady_clock::time_point FreshTime,
+           std::vector<CompiledFragment> &Out) const {
+    read(
+        TFS, FreshTime,
+        [&](llvm::Optional<llvm::StringRef> Data) {
+          CachedValue.clear();
+          if (Data)
+            for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC))
+              CachedValue.push_back(std::move(Fragment).compile(DC));
+        },
+        [&]() { llvm::copy(CachedValue, std::back_inserter(Out)); });
   }
 };
 
@@ -113,15 +53,13 @@
     std::vector<CompiledFragment>
     getFragments(const Params &P, DiagnosticCallback DC) const override {
       std::vector<CompiledFragment> Result;
-      Cache.read(FS, DC, P.FreshTime, Result);
+      Cache.get(FS, DC, P.FreshTime, Result);
       return Result;
     };
 
   public:
-    AbsFileProvider(llvm::StringRef Path, const ThreadsafeFS &FS) : FS(FS) {
-      assert(llvm::sys::path::is_absolute(Path));
-      Cache.Path = Path.str();
-    }
+    AbsFileProvider(llvm::StringRef Path, const ThreadsafeFS &FS)
+        : Cache(Path), FS(FS) {}
   };
 
   return std::make_unique<AbsFileProvider>(AbsPath, FS);
@@ -164,21 +102,21 @@
       {
         std::lock_guard<std::mutex> Lock(Mu);
         for (llvm::StringRef Ancestor : Ancestors) {
-          auto R = Cache.try_emplace(Ancestor);
+          auto It = Cache.find(Ancestor);
           // Assemble the actual config file path only once.
-          if (R.second) {
+          if (It == Cache.end()) {
             llvm::SmallString<256> ConfigPath = Ancestor;
             path::append(ConfigPath, RelPath);
-            R.first->second.Path = ConfigPath.str().str();
+            It = Cache.try_emplace(Ancestor, ConfigPath.str()).first;
           }
-          Caches.push_back(&R.first->second);
+          Caches.push_back(&It->second);
         }
       }
       // Finally query each individual file.
       // This will take a (per-file) lock for each file that actually exists.
       std::vector<CompiledFragment> Result;
       for (FileConfigCache *Cache : Caches)
-        Cache->read(FS, DC, P.FreshTime, Result);
+        Cache->get(FS, DC, P.FreshTime, Result);
       return Result;
     };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D88172: [clangd] Extrac... Sam McCall via Phabricator via cfe-commits

Reply via email to