This is an automated email from the ASF dual-hosted git repository.
zwoop pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 51a277dd98 Adds Cache Groups concepts to Cripts (#12743)
51a277dd98 is described below
commit 51a277dd98b29bba3052acb7dc8a7a38d7d77b60
Author: Leif Hedstrom <[email protected]>
AuthorDate: Wed Mar 11 13:27:53 2026 -0700
Adds Cache Groups concepts to Cripts (#12743)
* Adds Cache Groups concepts to Cripts
- cleans up the notion around cached URLs and headers, and cache keys.
- adds APIs to set the lookup status as well
* Changes from code review
* Address Bryan's review issues
WriteToDisk previously updated last_sync and cleared the transaction
log before confirming syncMap succeeded. On rename failure the log
would be lost even though the map was never written to disk.
syncMap now returns bool; WriteToDisk reverts last_sync on failure and
only calls clearLog when all dirty slots have been successfully synced.
Adds Catch2 unit tests covering basic insert/lookup, persist-and-reload,
transaction log replay across restarts, corrupt/truncated/wrong-version
map file handling, sync failure log preservation, and map rotation.
More unit tests will be done in a future PR.
* Address Copilot review comments
Add missing standard headers in CacheGroup.cc. Use std::error_code
overload for filesystem::remove in syncMap error paths. Add <filesystem>
and <system_error> to CacheGroup.hpp. Use fixed-width types in _MapHeader
for a stable on-disk format. Fix max_to_process calculation in the sync
continuation to spread groups evenly across the sync window.
* Address bryancall's review comments
Fix sync retry logic: revert last_sync to its previous value on
syncMap failure instead of setting it to last_write. The old code
made the slot appear clean, preventing retries until a new Insert
bumped last_write again.
---
doc/developer-guide/cripts/cripts-misc.en.rst | 62 ++++
example/cripts/cache_groups.cc | 106 ++++++
include/cripts/CacheGroup.hpp | 217 ++++++++++++
src/cripts/CMakeLists.txt | 10 +-
src/cripts/CacheGroup.cc | 453 ++++++++++++++++++++++++++
src/cripts/unit_tests/stub.cc | 78 +++++
src/cripts/unit_tests/test_CacheGroup.cc | 224 +++++++++++++
7 files changed, 1149 insertions(+), 1 deletion(-)
diff --git a/doc/developer-guide/cripts/cripts-misc.en.rst
b/doc/developer-guide/cripts/cripts-misc.en.rst
index dd582e6eda..67c19adb19 100644
--- a/doc/developer-guide/cripts/cripts-misc.en.rst
+++ b/doc/developer-guide/cripts/cripts-misc.en.rst
@@ -414,3 +414,65 @@ Debug logging uses the same format string syntax as
``fmt::format()`` in ``libfm
debug tags in your ATS configuration to enable debug output for your
Cripts.
The default debug tag for Cripts is the name of the Cript itself, either
the Cript source file, or the compiled plugin name.
+
+.. _cripts-misc-cache-groups:
+
+Cache Groups
+============
+
+As a way to manage association between cache entries, Cripts provides an
infrastructure
+for cache groups. A cache group is a set of cache entries that are logically
+associated with each other via custom identifiers.
+
+Example implementation of the Cache Groups RFC:
+
+.. code-block:: cpp
+
+ do_create_instance()
+ {
+ // Create a cache-group for this site / remap rule(s). They can be shared.
+ instance.data[0] = cripts::Cache::Group::Manager::Factory("example_site");
+ }
+
+ do_delete_instance()
+ {
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ delete static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr);
+ instance.data[0] = nullptr;
+ }
+ }
+
+ do_cache_lookup()
+ {
+ if (cached.response.lookupstatus != cripts::LookupStatus::MISS) {
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ auto date = cached.response.AsDate("Date");
+ if (date != 0) {
+ auto cache_groups = cached.response["Cache-Groups"];
+ if (!cache_groups.empty()) {
+ borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group>
*>(ptr);
+ if (cg->Lookup(cache_groups.split(','), date)) {
+ cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ do_read_response()
+ {
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ auto invalidation = client.request["Cache-Group-Invalidation"];
+ if (!invalidation.empty()) {
+ borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group>
*>(ptr);
+ cg->Insert(invalidation.split(','));
+ }
+ }
+ }
diff --git a/example/cripts/cache_groups.cc b/example/cripts/cache_groups.cc
new file mode 100644
index 0000000000..b51c1ae27c
--- /dev/null
+++ b/example/cripts/cache_groups.cc
@@ -0,0 +1,106 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#define CRIPTS_CONVENIENCE_APIS 1
+
+#include <cripts/CacheGroup.hpp>
+#include <cripts/Preamble.hpp>
+
+do_create_instance()
+{
+ // Create a cache-group for this site / remap rule(s). They can be shared.
+ instance.data[0] = cripts::Cache::Group::Manager::Factory("example");
+}
+
+do_delete_instance()
+{
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ delete static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr);
+ instance.data[0] = nullptr;
+ }
+}
+
+do_cache_lookup()
+{
+ if (cached.response.lookupstatus != cripts::LookupStatus::MISS) {
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ auto date = cached.response.AsDate("Date");
+
+ if (date != 0) {
+ auto cache_groups = cached.response["Cache-Groups"];
+
+ CDebug("Looking up {}", cache_groups);
+ if (!cache_groups.empty()) {
+ borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group>
*>(ptr);
+
+ if (cg->Lookup(cache_groups.split(','), date)) {
+ CDebug("Cache Group hit, forcing revalidation for request");
+ cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE;
+ }
+ }
+ }
+ }
+ }
+}
+
+do_read_response()
+{
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr) {
+ auto invalidation = client.request["Cache-Group-Invalidation"];
+
+ if (!invalidation.empty()) {
+ borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr);
+
+ cg->Insert(invalidation.split(','));
+ }
+ }
+
+// This is just for simulating origin responses that would include
cache-groups.
+#if 0
+ server.response["Cache-Groups"] = "\"foo\", \"bar\"";
+#endif
+}
+
+// The RFC draft does not support / provide definitions for this. It is useful,
+// but should be protected with appropriate ACLs / authentication.
+#if 0
+do_remap()
+{
+ void *ptr = AsPointer(instance.data[0]);
+
+ if (ptr && urls.pristine.path == ".well-known/Cache-Groups") {
+ auto invalidation = client.request["Cache-Group-Invalidation"];
+
+ if (!invalidation.empty()) {
+ borrow cg = *static_cast<std::shared_ptr<cripts::Cache::Group> *>(ptr);
+
+ cg->Insert(invalidation.split(','));
+ CDebug("Forcing a cache miss for cache-groups: {}", invalidation);
+ StatusCode(202);
+ }
+ }
+}
+#endif
+
+#include <cripts/Epilogue.hpp>
diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp
new file mode 100644
index 0000000000..1c97e33777
--- /dev/null
+++ b/include/cripts/CacheGroup.hpp
@@ -0,0 +1,217 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#pragma once
+
+#include <unordered_map>
+#include <string>
+#include <vector>
+#include <chrono>
+#include <filesystem>
+#include <mutex>
+#include <shared_mutex>
+#include <fstream>
+#include <memory>
+#include <cstdint>
+#include <system_error>
+
+#include "cripts/Context.hpp"
+#include "cripts/Time.hpp"
+
+// Implemented in the .cc file
+int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata);
+
+namespace cripts::Cache
+{
+
+class Group
+{
+private:
+ using self_type = Group;
+
+ struct _Entry {
+ cripts::Time::Point timestamp; // Timestamp of when the entry was created
+ size_t length; // Length of the group ID
+ uint32_t prefix; // First 4 characters of the group ID
+ uint64_t hash; // Hash value of the group ID, needed when
writing to disk
+ };
+
+ // Header structure for on-disk map files (after VERSION field)
+ struct _MapHeader {
+ int64_t created_ts;
+ int64_t last_write_ts;
+ int64_t last_sync_ts;
+ uint64_t count;
+ };
+
+ using _MapType = std::unordered_map<uint64_t, _Entry>;
+
+ struct _MapSlot {
+ std::unique_ptr<_MapType> map;
+ std::string path;
+ cripts::Time::Point created;
+ cripts::Time::Point last_write;
+ cripts::Time::Point last_sync;
+ };
+
+public:
+ static constexpr uint64_t VERSION = (static_cast<uint64_t>('C') << 56) |
(static_cast<uint64_t>('G') << 48) |
+ (static_cast<uint64_t>('M') << 40) |
(static_cast<uint64_t>('A') << 32) |
+ (static_cast<uint64_t>('P') << 24) |
(static_cast<uint64_t>('S') << 16) |
+ (static_cast<uint64_t>('0') << 8) |
0x00; // Change this on version bump
+
+ static constexpr std::chrono::seconds DEFAULT_MAX_AGE{63072000}; // 2 Years,
max cache lifetime in ATS as well
+
+ Group(const std::string &name, const std::string &base_dir, size_t
max_entries = 1024, size_t num_maps = 3)
+ {
+ Initialize(name, base_dir, max_entries, num_maps, DEFAULT_MAX_AGE);
+ }
+
+ // Not used at the moment.
+ Group() = default;
+
+ ~Group() { WriteToDisk(); }
+
+ Group(const self_type &) = delete;
+ self_type &operator=(const self_type &) = delete;
+
+ void Initialize(const std::string &name, const std::string &base_dir, size_t
max_entries = 1024, size_t num_maps = 3,
+ std::chrono::seconds max_age = DEFAULT_MAX_AGE);
+
+ void
+ SetMaxEntries(size_t max_entries)
+ {
+ std::unique_lock lock(_mutex);
+ _max_entries = max_entries;
+ }
+
+ void
+ SetMaxAge(std::chrono::seconds max_age)
+ {
+ std::unique_lock lock(_mutex);
+ _max_age = max_age;
+ }
+
+ void Insert(cripts::string_view key);
+ void Insert(const std::vector<cripts::string_view> &keys);
+ bool Lookup(cripts::string_view key, cripts::Time::Point age) const;
+ bool Lookup(const std::vector<cripts::string_view> &keys,
cripts::Time::Point age) const;
+
+ bool
+ Lookup(cripts::string_view key, time_t age) const
+ {
+ return Lookup(key, cripts::Time::Clock::from_time_t(age));
+ }
+
+ bool
+ Lookup(const std::vector<cripts::string_view> &keys, time_t age) const
+ {
+ return Lookup(keys, cripts::Time::Clock::from_time_t(age));
+ }
+
+ cripts::Time::Point
+ LastSync() const
+ {
+ std::shared_lock lock(_mutex);
+ return _last_sync;
+ }
+
+ void WriteToDisk();
+ void LoadFromDisk();
+
+private:
+ mutable std::shared_mutex _mutex;
+ std::string _name = "CacheGroup";
+ size_t _num_maps = 3;
+ size_t _max_entries = 1024;
+ std::chrono::seconds _max_age = DEFAULT_MAX_AGE;
+ size_t _map_index = 0;
+ cripts::Time::Point _last_sync = cripts::Time::Point{};
+
+ std::vector<_MapSlot> _slots;
+ std::ofstream _txn_log;
+ std::string _log_path;
+ std::string _base_dir;
+
+ void appendLog(const _Entry &entry);
+ void clearLog();
+ bool syncMap(size_t index);
+
+public:
+ class Manager
+ {
+ friend int ::_cripts_cache_group_sync(TSCont cont, TSEvent event, void
*edata);
+ using self_type = Manager;
+
+ public:
+ static void *Factory(const std::string &name, size_t max_entries = 1024,
size_t num_maps = 3);
+
+ Manager(const self_type &) = delete;
+ self_type &operator=(const self_type &) = delete;
+
+ protected:
+ void _scheduleCont();
+
+ std::unordered_map<std::string, std::weak_ptr<Group>> _groups;
+ std::mutex _mutex;
+
+ private:
+ Manager()
+ {
+ _base_dir = TSRuntimeDirGet();
+
+ if (std::filesystem::exists(_base_dir)) {
+ _base_dir += "/cache_groups";
+ if (!std::filesystem::exists(_base_dir)) {
+ std::error_code ec;
+
+ std::filesystem::create_directories(_base_dir, ec);
+ if (ec) {
+ TSError("cripts::Cache::Group::Manager: Failed to create directory
`%s': %s", _base_dir.c_str(), ec.message().c_str());
+ } else {
+ std::filesystem::permissions(_base_dir,
std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec);
+
+ if (ec) {
+ TSWarning("cripts::Cache::Group::Manager: Failed to set
permissions on `%s': %s", _base_dir.c_str(),
+ ec.message().c_str());
+ }
+ }
+ }
+ }
+ _scheduleCont(); // Kick it off
+ }
+
+ ~Manager()
+ {
+ if (_action) {
+ TSActionCancel(_action);
+ _action = nullptr;
+ }
+ if (_cont) {
+ TSContDestroy(_cont);
+ _cont = nullptr;
+ }
+ }
+
+ static self_type &_instance();
+
+ TSCont _cont = nullptr;
+ TSAction _action = nullptr;
+ std::string _base_dir;
+ };
+};
+} // namespace cripts::Cache
diff --git a/src/cripts/CMakeLists.txt b/src/cripts/CMakeLists.txt
index 6e151f46f3..dbe17a3f18 100644
--- a/src/cripts/CMakeLists.txt
+++ b/src/cripts/CMakeLists.txt
@@ -23,11 +23,12 @@ pkg_check_modules(PCRE2 REQUIRED IMPORTED_TARGET libpcre2-8)
# The source files, globbed so we can drop in local / custom Bundles and
extensions.
file(GLOB CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/*.cc
${PROJECT_SOURCE_DIR}/src/cripts/*/*.cc)
-file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc)
+file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc
${PROJECT_SOURCE_DIR}/src/cripts/unit_tests/*.cc)
list(REMOVE_ITEM CPP_FILES ${TEST_CPP_FILES})
set(CRIPTS_PUBLIC_HEADERS
${PROJECT_SOURCE_DIR}/include/cripts/Bundle.hpp
+ ${PROJECT_SOURCE_DIR}/include/cripts/CacheGroup.hpp
${PROJECT_SOURCE_DIR}/include/cripts/Certs.hpp
${PROJECT_SOURCE_DIR}/include/cripts/Configs.hpp
${PROJECT_SOURCE_DIR}/include/cripts/ConfigsBase.hpp
@@ -92,3 +93,10 @@ add_custom_target(
)
clang_tidy_check(cripts)
+
+if(BUILD_TESTING)
+ add_executable(test_cripts unit_tests/test_CacheGroup.cc CacheGroup.cc
unit_tests/stub.cc)
+ target_include_directories(test_cripts PRIVATE ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/src)
+ target_link_libraries(test_cripts PRIVATE libswoc::libswoc fmt::fmt
Catch2::Catch2WithMain)
+ add_test(NAME test_cripts COMMAND test_cripts)
+endif()
diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc
new file mode 100644
index 0000000000..a20343ab69
--- /dev/null
+++ b/src/cripts/CacheGroup.cc
@@ -0,0 +1,453 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstdio>
+#include <cstring>
+#include <filesystem>
+#include <unordered_set>
+
+#include "ts/ts.h"
+#include "cripts/CacheGroup.hpp"
+
+inline static uint32_t
+_make_prefix_int(cripts::string_view key)
+{
+ uint32_t prefix = 0;
+
+ std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size()));
+ return prefix;
+}
+
+// Stuff around the disk sync continuation
+constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation
+constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s
+
+int
+_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */)
+{
+ auto *mgr = static_cast<cripts::Cache::Group::Manager
*>(TSContDataGet(cont));
+ std::lock_guard lock(mgr->_mutex);
+ auto &groups = mgr->_groups;
+
+ constexpr size_t runs_per_window = _SYNC_GROUP_EVERY / _CONT_SYNC_INTERVAL;
+ const size_t max_to_process = (groups.size() + (runs_per_window - 1)) /
runs_per_window;
+ size_t processed = 0;
+ auto now = cripts::Time::Clock::now();
+
+ for (auto it = groups.begin(); it != groups.end() && processed <
max_to_process;) {
+ if (auto group = it->second.lock()) {
+ if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) {
+ group->WriteToDisk();
+ ++processed;
+ }
+ ++it;
+ } else {
+ // The group has been deleted, remove it from the map
+ it = groups.erase(it);
+ }
+ }
+
+ return TS_SUCCESS;
+}
+
+namespace cripts
+{
+
+void
+Cache::Group::Initialize(const std::string &name, const std::string &base_dir,
size_t max_entries, size_t num_maps,
+ std::chrono::seconds max_age)
+{
+ cripts::Time::Point zero = cripts::Time::Point{};
+ cripts::Time::Point now = cripts::Time::Clock::now();
+
+ _name = name;
+ _num_maps = num_maps;
+ _max_entries = max_entries;
+ _max_age = max_age;
+
+ _base_dir = base_dir + "/" + _name;
+ _log_path = _base_dir + "/" + "txn.log";
+
+ if (!std::filesystem::exists(_base_dir)) {
+ std::error_code ec;
+
+ std::filesystem::create_directories(_base_dir, ec);
+ if (ec) {
+ TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s",
_base_dir.c_str(), ec.message().c_str());
+ } else {
+ std::filesystem::permissions(_base_dir,
std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec);
+ if (ec) {
+ TSWarning("cripts::Cache::Group: Failed to set permissions on `%s':
%s", _base_dir.c_str(), ec.message().c_str());
+ }
+ }
+ }
+
+ for (size_t ix = 0; ix < _num_maps; ++ix) {
+ _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(),
+ .path = _base_dir + "/map_" +
std::to_string(ix) + ".bin",
+ .created = zero,
+ .last_write = zero,
+ .last_sync = zero});
+ }
+
+ _slots[0].created = now;
+ _slots[0].last_write = now;
+
+ LoadFromDisk();
+ _last_sync = now;
+}
+
+void
+Cache::Group::Insert(cripts::string_view key)
+{
+ static const std::hash<cripts::string_view> hasher;
+
+ // Trim any "'s and white spaces from the key
+ key.trim_if(&isspace).trim('"');
+
+ auto now = cripts::Time::Clock::now();
+ auto hash = static_cast<uint64_t>(hasher(key));
+ uint32_t prefix = _make_prefix_int(key);
+
+ std::unique_lock lock(_mutex);
+
+ auto &slot = _slots[_map_index];
+ auto it = slot.map->find(hash);
+
+ if (it != slot.map->end() && it->second.length == key.size() &&
it->second.prefix == prefix) {
+ it->second.timestamp = now;
+ slot.last_write = now;
+ appendLog(it->second);
+
+ return;
+ }
+
+ Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix =
prefix, .hash = hash};
+
+ slot.map->insert_or_assign(hash, entry);
+ slot.last_write = now;
+ appendLog(entry);
+
+ if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) {
+ _map_index = (_map_index + 1) % _slots.size();
+
+ auto &next_slot = _slots[_map_index];
+
+ if (next_slot.last_write > next_slot.last_sync) {
+ TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This
may lead to data loss.", _name.c_str());
+ }
+ next_slot.map->clear();
+ next_slot.created = now;
+ next_slot.last_write = now;
+ syncMap(_map_index); // Sync to disk will make sure it's empty on disk
+ }
+}
+
+void
+Cache::Group::Insert(const std::vector<cripts::string_view> &keys)
+{
+ for (auto &key : keys) {
+ Insert(key);
+ }
+}
+
+bool
+Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const
+{
+ // Trim any "'s and whitespaces from the key
+ key.trim_if(&isspace).trim('"');
+
+ uint64_t hash =
static_cast<uint64_t>(std::hash<cripts::string_view>{}(key));
+ std::shared_lock lock(_mutex);
+
+ for (size_t i = 0; i < _slots.size(); ++i) {
+ size_t map_index = (_map_index + _slots.size() - i) % _slots.size();
+ const auto &slot = _slots[map_index];
+
+ if (slot.last_write < age) {
+ continue; // Skip maps that haven't been written to since this time
+ }
+
+ const auto &map = *slot.map;
+ auto it = map.find(hash);
+
+ if (it != map.end()) {
+ const Cache::Group::_Entry &entry = it->second;
+
+ if (entry.timestamp < age || entry.length != key.size() || entry.prefix
!= _make_prefix_int(key)) {
+ continue;
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+Cache::Group::Lookup(const std::vector<cripts::string_view> &keys,
cripts::Time::Point age) const
+{
+ for (auto &key : keys) {
+ if (Lookup(key, age)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void
+Cache::Group::LoadFromDisk()
+{
+ std::unique_lock lock(_mutex);
+ std::ifstream log(_log_path, std::ios::binary);
+ std::unordered_set<uint64_t> loaded_hashes;
+
+ for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) {
+ auto &slot = _slots[slot_ix];
+ uint64_t version_id = 0;
+ _MapHeader header{};
+ std::ifstream file(slot.path, std::ios::binary);
+
+ if (!file) {
+ continue;
+ }
+
+ file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id));
+ if (version_id != VERSION) {
+ TSWarning("cripts::Cache::Group: Version mismatch for map file: %s,
expected %llu, got %llu. Skipping this map.",
+ slot.path.c_str(), static_cast<unsigned long long>(VERSION),
static_cast<unsigned long long>(version_id));
+ continue;
+ }
+
+ file.read(reinterpret_cast<char *>(&header), sizeof(header));
+ if (!file) {
+ TSWarning("cripts::Cache::Group: Failed to read header from map file:
%s. Skipping this map.", slot.path.c_str());
+ continue;
+ }
+
+ slot.created = cripts::Time::Clock::from_time_t(header.created_ts);
+ slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts);
+ slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts);
+
+ for (size_t i = 0; i < header.count; ++i) {
+ Cache::Group::_Entry entry;
+
+ file.read(reinterpret_cast<char *>(&entry), sizeof(entry));
+ if (!file) {
+ TSWarning("cripts::Cache::Group: Failed to read entry %zu from map
file: %s. Stopping entry load.", i, slot.path.c_str());
+ break;
+ }
+ if (!loaded_hashes.contains(entry.hash)) {
+ slot.map->insert_or_assign(entry.hash, entry);
+ loaded_hashes.insert(entry.hash);
+ }
+ }
+ }
+
+ // Sort the slots by creation time, newest first, since we'll start with
index 0 upon loading
+ std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return
a.created > b.created; });
+
+ // Replay any entries from the transaction log, and then truncate it
+ if (log) {
+ Cache::Group::_Entry entry;
+ auto last_write = cripts::Time::Clock::from_time_t(0);
+
+ while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) {
+ _slots[0].map->insert_or_assign(entry.hash, entry);
+ last_write = std::max(last_write, entry.timestamp);
+ }
+ _slots[0].last_write = last_write;
+ clearLog();
+ }
+}
+
+void
+Cache::Group::WriteToDisk()
+{
+ std::unique_lock lock(_mutex);
+
+ auto now = cripts::Time::Clock::now();
+ bool any_dirty = false;
+ bool all_synced = true;
+
+ for (size_t ix = 0; ix < _slots.size(); ++ix) {
+ if (_slots[ix].last_write > _slots[ix].last_sync) {
+ auto old_sync = _slots[ix].last_sync;
+ any_dirty = true;
+ _slots[ix].last_sync = now;
+ if (syncMap(ix)) {
+ _last_sync = now;
+ } else {
+ _slots[ix].last_sync = old_sync; // revert so next call retries
+ all_synced = false;
+ }
+ }
+ }
+
+ if (any_dirty && all_synced) {
+ clearLog();
+ }
+}
+
+void
+Cache::Group::appendLog(const Cache::Group::_Entry &entry)
+{
+ if (!_txn_log.is_open() || !_txn_log.good()) {
+ _txn_log.open(_log_path, std::ios::app | std::ios::out | std::ios::binary);
+ if (!_txn_log) {
+ TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.",
_log_path.c_str());
+ return;
+ }
+ }
+
+ _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry));
+ _txn_log.flush();
+}
+
+bool
+Cache::Group::syncMap(size_t index)
+{
+ constexpr size_t BUFFER_SIZE = 64 * 1024;
+ std::array<std::byte, BUFFER_SIZE> buffer;
+ size_t buf_pos = 0;
+ bool write_failed = false;
+ const auto &slot = _slots[index];
+ const std::string tmp_path = slot.path + ".tmp";
+ std::ofstream tmp_file(tmp_path, std::ios::binary |
std::ios::trunc);
+
+ if (!tmp_file) {
+ TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.",
tmp_path.c_str());
+ return false;
+ }
+
+ auto _appendToBuffer = [&](const void *data, size_t size) {
+ if (write_failed) {
+ return;
+ }
+ if (buf_pos + size > buffer.size()) {
+ tmp_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos);
+ if (!tmp_file) {
+ write_failed = true;
+ return;
+ }
+ buf_pos = 0;
+ }
+ std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data),
size);
+ buf_pos += size;
+ };
+
+ _MapHeader header{.created_ts =
cripts::Time::Clock::to_time_t(slot.created),
+ .last_write_ts =
cripts::Time::Clock::to_time_t(slot.last_write),
+ .last_sync_ts =
cripts::Time::Clock::to_time_t(slot.last_sync),
+ .count = slot.map->size()};
+
+ _appendToBuffer(&VERSION, sizeof(VERSION));
+ _appendToBuffer(&header, sizeof(header));
+
+ for (const auto &[_, entry] : *slot.map) {
+ _appendToBuffer(&entry, sizeof(entry));
+ }
+
+ if (buf_pos > 0 && !write_failed) {
+ tmp_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos);
+ }
+ tmp_file.flush();
+ tmp_file.close();
+
+ if (write_failed || !tmp_file) {
+ TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.",
tmp_path.c_str());
+ std::error_code ec;
+ std::filesystem::remove(tmp_path, ec);
+ return false;
+ }
+
+ if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) {
+ TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to
`%s'.", tmp_path.c_str(), slot.path.c_str());
+ std::error_code ec;
+ std::filesystem::remove(tmp_path, ec);
+ return false;
+ }
+
+ return true;
+}
+
+void
+Cache::Group::clearLog()
+{
+ std::error_code ec;
+
+ _txn_log.close();
+ std::filesystem::remove(_log_path, ec);
+ if (ec) {
+ TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s':
%s", _log_path.c_str(), ec.message().c_str());
+ }
+}
+
+// Singleton instance for the Cache::Group::Manager
+Cache::Group::Manager &
+Cache::Group::Manager::_instance()
+{
+ static Cache::Group::Manager inst;
+ return inst;
+}
+
+void *
+Cache::Group::Manager::Factory(const std::string &name, size_t max_entries,
size_t num_maps)
+{
+ std::lock_guard lock(_instance()._mutex);
+ auto &groups = _instance()._groups;
+
+ if (auto it = groups.find(name); it != groups.end()) {
+ if (auto group = it->second.lock()) {
+ return new std::shared_ptr<Group>(std::move(group));
+ }
+ }
+
+ if (!_instance()._base_dir.empty()) {
+ auto group = std::make_shared<Group>(name, _instance()._base_dir,
max_entries, num_maps);
+
+ groups[name] = group;
+ return new std::shared_ptr<Group>(std::move(group));
+ } else {
+ TSError("cripts::Cache::Group: Failed to get runtime directory for
initialization.");
+ return nullptr;
+ }
+}
+
+void
+Cache::Group::Manager::_scheduleCont()
+{
+ if (!_cont) {
+ _cont = TSContCreate(_cripts_cache_group_sync, TSMutexCreate());
+ TSContDataSet(_cont, this);
+ }
+
+ if (_action) {
+ TSActionCancel(_action);
+ _action = nullptr;
+ }
+
+ _action = TSContScheduleEveryOnPool(_cont, _CONT_SYNC_INTERVAL * 1000,
TS_THREAD_POOL_TASK);
+}
+
+} // namespace cripts
diff --git a/src/cripts/unit_tests/stub.cc b/src/cripts/unit_tests/stub.cc
new file mode 100644
index 0000000000..65680db4a1
--- /dev/null
+++ b/src/cripts/unit_tests/stub.cc
@@ -0,0 +1,78 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Stubs for the ATS plugin API symbols referenced by CacheGroup.cc.
+// Only the Group class (not Manager) is exercised in unit tests, so the
+// TSCont/TSAction/TSMutex stubs never execute — they just satisfy the linker.
+
+#include "ts/ts.h"
+
+void
+TSWarning(const char * /* fmt */, ...)
+{
+}
+
+void
+TSError(const char * /* fmt */, ...)
+{
+}
+
+TSCont
+TSContCreate(TSEventFunc, TSMutex)
+{
+ return nullptr;
+}
+
+void
+TSContDataSet(TSCont, void *)
+{
+}
+
+void *
+TSContDataGet(TSCont)
+{
+ return nullptr;
+}
+
+TSMutex
+TSMutexCreate()
+{
+ return nullptr;
+}
+
+TSAction
+TSContScheduleEveryOnPool(TSCont, TSHRTime, TSThreadPool)
+{
+ return nullptr;
+}
+
+void
+TSActionCancel(TSAction)
+{
+}
+
+void
+TSContDestroy(TSCont)
+{
+}
+
+const char *
+TSRuntimeDirGet()
+{
+ return "/tmp";
+}
diff --git a/src/cripts/unit_tests/test_CacheGroup.cc
b/src/cripts/unit_tests/test_CacheGroup.cc
new file mode 100644
index 0000000000..aa46e01162
--- /dev/null
+++ b/src/cripts/unit_tests/test_CacheGroup.cc
@@ -0,0 +1,224 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "cripts/CacheGroup.hpp"
+
+#include <catch2/catch_test_macros.hpp>
+#include <filesystem>
+#include <fstream>
+
+// RAII temp directory that cleans up after each test
+struct TempDir {
+ std::filesystem::path path;
+
+ TempDir()
+ {
+ path = std::filesystem::temp_directory_path() /
+ ("cg_test_" +
std::to_string(std::chrono::steady_clock::now().time_since_epoch().count()));
+ std::filesystem::create_directories(path);
+ }
+
+ ~TempDir() { std::filesystem::remove_all(path); }
+
+ std::string
+ str() const
+ {
+ return path.string();
+ }
+};
+
+TEST_CASE("CacheGroup: basic insert and lookup", "[cripts][CacheGroup]")
+{
+ TempDir dir;
+ cripts::Cache::Group g("test", dir.str());
+
+ g.Insert("url1");
+ g.Insert("url2");
+
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+ CHECK(g.Lookup("url1", epoch));
+ CHECK(g.Lookup("url2", epoch));
+ CHECK_FALSE(g.Lookup("url3", epoch));
+}
+
+TEST_CASE("CacheGroup: persist and reload", "[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ {
+ cripts::Cache::Group g("test", dir.str());
+ g.Insert("key_a");
+ g.Insert("key_b");
+ g.WriteToDisk();
+ }
+
+ cripts::Cache::Group g2("test", dir.str());
+ CHECK(g2.Lookup("key_a", epoch));
+ CHECK(g2.Lookup("key_b", epoch));
+ CHECK_FALSE(g2.Lookup("key_c", epoch));
+}
+
+TEST_CASE("CacheGroup: transaction log replay on restart",
"[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ {
+ cripts::Cache::Group g("test", dir.str());
+ g.Insert("persisted");
+ g.WriteToDisk();
+
+ // Insert more keys — these go to the txn log but maps are not re-synced
+ g.Insert("in_log_only");
+ }
+ // The txn log should still exist since WriteToDisk was not called after the
second Insert
+
+ // Reload: log should be replayed
+ cripts::Cache::Group g2("test", dir.str());
+ CHECK(g2.Lookup("persisted", epoch));
+ CHECK(g2.Lookup("in_log_only", epoch));
+}
+
+TEST_CASE("CacheGroup: corrupt map file is skipped", "[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ {
+ cripts::Cache::Group g("test", dir.str(), 1024, 2);
+ g.Insert("good_key");
+ g.WriteToDisk();
+ }
+
+ // Corrupt the first map file
+ auto map_path = dir.path / "test" / "map_0.bin";
+ if (std::filesystem::exists(map_path)) {
+ std::ofstream corrupt(map_path, std::ios::binary | std::ios::trunc);
+ corrupt << "JUNK_DATA_GARBAGE";
+ }
+
+ // Reload — corrupt map should be skipped; good_key is lost (log was cleared
by
+ // WriteToDisk), but the group must still accept new inserts without
crashing.
+ cripts::Cache::Group g2("test", dir.str(), 1024, 2);
+ CHECK_FALSE(g2.Lookup("good_key", epoch));
+ g2.Insert("new_key");
+ CHECK(g2.Lookup("new_key", epoch));
+}
+
+TEST_CASE("CacheGroup: truncated map file is handled gracefully",
"[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ {
+ cripts::Cache::Group g("test", dir.str(), 1024, 2);
+ g.Insert("truncated_key");
+ g.WriteToDisk();
+ }
+
+ // Truncate the map file to just the version field (incomplete header)
+ auto map_path = dir.path / "test" / "map_0.bin";
+ if (std::filesystem::exists(map_path)) {
+ auto size = std::filesystem::file_size(map_path);
+ if (size > sizeof(uint64_t)) {
+ std::filesystem::resize_file(map_path, sizeof(uint64_t) + 1); // version
+ 1 byte of header
+ }
+ }
+
+ // Reload — truncated header should be skipped; truncated_key is lost (log
was
+ // cleared by WriteToDisk), but the group must recover and accept new
inserts.
+ cripts::Cache::Group g2("test", dir.str(), 1024, 2);
+ CHECK_FALSE(g2.Lookup("truncated_key", epoch));
+ g2.Insert("after_truncation");
+ CHECK(g2.Lookup("after_truncation", epoch));
+}
+
+TEST_CASE("CacheGroup: wrong version map file is skipped",
"[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ {
+ cripts::Cache::Group g("test", dir.str(), 1024, 2);
+ g.Insert("versioned_key");
+ g.WriteToDisk();
+ }
+
+ // Overwrite the version field with a bad value
+ auto map_path = dir.path / "test" / "map_0.bin";
+ if (std::filesystem::exists(map_path)) {
+ std::fstream f(map_path, std::ios::in | std::ios::out | std::ios::binary);
+ uint64_t bad_version = 0xDEADBEEFCAFEBABEULL;
+ f.write(reinterpret_cast<const char *>(&bad_version), sizeof(bad_version));
+ }
+
+ // Reload — version mismatch should be skipped; versioned_key is lost (log
was
+ // cleared by WriteToDisk), but the group must recover and accept new
inserts.
+ cripts::Cache::Group g2("test", dir.str(), 1024, 2);
+ CHECK_FALSE(g2.Lookup("versioned_key", epoch));
+ g2.Insert("post_version_check");
+ CHECK(g2.Lookup("post_version_check", epoch));
+}
+
+TEST_CASE("CacheGroup: WriteToDisk does not clear log on sync failure",
"[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ cripts::Cache::Group g("test", dir.str(), 1024, 2);
+ g.Insert("before_fail");
+ g.WriteToDisk(); // initial successful sync + log clear
+
+ g.Insert("after_initial_sync");
+
+ // Make the map directory read-only so syncMap will fail on rename
+ auto group_dir = dir.path / "test";
+ std::filesystem::permissions(group_dir, std::filesystem::perms::owner_read |
std::filesystem::perms::owner_exec);
+
+ g.WriteToDisk(); // should fail to sync; log must NOT be cleared
+
+ // Restore permissions so cleanup works
+ std::filesystem::permissions(group_dir, std::filesystem::perms::owner_all);
+
+ // The txn log should still contain "after_initial_sync" — verify via reload
+ cripts::Cache::Group g2("test", dir.str(), 1024, 2);
+ CHECK(g2.Lookup("before_fail", epoch));
+ CHECK(g2.Lookup("after_initial_sync", epoch));
+}
+
+TEST_CASE("CacheGroup: map rotation writes empty map to disk",
"[cripts][CacheGroup]")
+{
+ TempDir dir;
+ auto epoch = cripts::Time::Clock::from_time_t(0);
+
+ // max_entries=2 to trigger rotation after 2 inserts
+ {
+ cripts::Cache::Group g("test", dir.str(), 2, 3);
+ g.Insert("key1");
+ g.Insert("key2");
+ g.Insert("key3"); // triggers rotation
+ g.WriteToDisk();
+ }
+
+ cripts::Cache::Group g2("test", dir.str(), 2, 3);
+ // All three keys were in slot 0 at WriteToDisk time and survive the reload.
+ CHECK(g2.Lookup("key1", epoch));
+ CHECK(g2.Lookup("key2", epoch));
+ CHECK(g2.Lookup("key3", epoch));
+}