Copilot commented on code in PR #12743: URL: https://github.com/apache/trafficserver/pull/12743#discussion_r2621121798
########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already hodling an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + std::cerr << "Failed to open temp file for sync: " << tmp_path << "\n"; Review Comment: Inconsistent error reporting: This function uses std::cerr for error output, while the rest of the codebase consistently uses TSWarning for similar errors (e.g., lines 140, 224, 298, 355, 368). This should use TSWarning to maintain consistency with the codebase's error reporting pattern. ```suggestion TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s", tmp_path.c_str()); ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already hodling an exclusive lock on the mutex. Review Comment: Spelling error: "hodling" should be "holding". ```suggestion // already holding an exclusive lock on the mutex. ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); Review Comment: Missing error handling: The std::filesystem::create_directories and std::filesystem::permissions calls can throw exceptions or fail silently. Consider using the error_code overloads and checking for errors, similar to how it's done in the clearLog method (line 363). ```suggestion std::error_code ec; std::filesystem::create_directories(_base_dir, ec); if (ec) { TSError("[cripts][Cache::Group::Initialize] Failed to create directory '%s': %s", _base_dir.c_str(), ec.message().c_str()); } ec.clear(); std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); if (ec) { TSError("[cripts][Cache::Group::Initialize] Failed to set permissions on directory '%s': %s", _base_dir.c_str(), ec.message().c_str()); } ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } Review Comment: Missing error checking: The file.read() operation on line 241 within the loop does not check if the read succeeded. If a read fails mid-loop (e.g., corrupted file), the code continues processing with uninitialized data. Consider checking file.good() or the read result before using the entry. ```suggestion if (!file) { TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping further reads from this file.", i, slot.path.c_str()); break; } if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { slot.map->insert_or_assign(entry.hash, entry); } ``` ########## include/cripts/Matcher.hpp: ########## @@ -23,6 +23,7 @@ #include <algorithm> #include <vector> #include <tuple> +#include <algorithm> Review Comment: Duplicate include of <algorithm>. This header is already included on line 23. ```suggestion ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } Review Comment: Iterator increment bug: When erase() is called on line 58, it returns the next valid iterator. However, the loop then increments the iterator again at line 50, skipping an element. This should not increment the iterator after erase. Consider changing to: `it = groups.erase(it);` without the increment in the loop, or use a continue statement to skip the increment. ```suggestion for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ) { if (auto group = it->second.lock()) { if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { group->WriteToDisk(); ++processed; } ++it; } else { // The group has been deleted, remove it from the map ?? it = groups.erase(it); } ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); Review Comment: Missing error checking: The file.read() operations on lines 229-232 do not check if the reads succeeded. If any read fails (e.g., due to a corrupted or truncated file), the code will use uninitialized or partially read data. Consider checking the stream state after reads or using a single read operation with validation. ```suggestion // Read all four fields in a single operation for error checking struct { time_t created_ts; time_t last_write_ts; time_t last_sync_ts; size_t count; } header; file.read(reinterpret_cast<char *>(&header), sizeof(header)); if (!file) { TSWarning("cripts::Cache::Group: Failed to read header for map file: %s. Skipping this map.", slot.path.c_str()); continue; } slot.created = cripts::Time::Clock::from_time_t(header.created_ts); slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); count = header.count; ``` ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { Review Comment: Performance concern: This line performs a linear search through all slots for every entry being loaded. With large maps, this becomes O(n*m) complexity where n is the number of entries and m is the number of slots. Consider using a more efficient approach, such as building a set of loaded hashes first, or checking only the current slot's map. ########## doc/developer-guide/cripts/cripts-misc.en.rst: ########## @@ -414,3 +414,63 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm debug tags in your ATS configuration to enable debug output for your Cripts. The default debug tag for Cripts is the name of the Cript itself, either the Cript source file, or the compiled plugin name. + +Cache Groups +============ + +As a way to manage assosication between cache entries, Cripts provides an infrastructure Review Comment: Spelling error: "assosication" should be "association". ```suggestion As a way to manage association between cache entries, Cripts provides an infrastructure ``` ########## include/cripts/CacheGroup.hpp: ########## @@ -0,0 +1,195 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include <unordered_map> +#include <string> +#include <vector> +#include <chrono> +#include <mutex> +#include <shared_mutex> +#include <fstream> +#include <atomic> +#include <memory> +#include <cstdint> + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group +{ +private: + using self_type = Group; + + struct _Entry { + cripts::Time::Point timestamp; // Timestamp of when the entry was created + size_t length; // Length of the group ID + uint32_t prefix; // First 4 characters of the group ID + uint64_t hash; // Hash value of the group ID, needed when writing to disk + }; + + using _MapType = std::unordered_map<uint64_t, _Entry>; + + struct _MapSlot { + std::unique_ptr<_MapType> map; + std::string path; + cripts::Time::Point created; + cripts::Time::Point last_write; + cripts::Time::Point last_sync; + }; + +public: + static constexpr uint64_t VERSION = (static_cast<uint64_t>('C') << 56) | (static_cast<uint64_t>('G') << 48) | + (static_cast<uint64_t>('M') << 40) | (static_cast<uint64_t>('A') << 32) | + (static_cast<uint64_t>('P') << 24) | (static_cast<uint64_t>('S') << 16) | + (static_cast<uint64_t>('0') << 8) | 0x00; // Change this on version bump + + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) + { + Initialize(name, base_dir, num_maps, max_entries, std::chrono::seconds{63072000}); + } + + // Not used at the moment. + Group() = default; + + ~Group() { WriteToDisk(); } + + Group(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + void Initialize(const std::string &name, const std::string &base_dir, size_t num_maps = 3, size_t max_entries = 1024, + std::chrono::seconds max_age = std::chrono::seconds{63072000}); + + void + SetMaxEntries(size_t max_entries) + { + std::unique_lock lock(_mutex); + _max_entries = max_entries; + } + + void + SetMaxAge(std::chrono::seconds max_age) + { + std::unique_lock lock(_mutex); + _max_age = max_age; + } + + void Insert(cripts::string_view key); + void Insert(const std::vector<cripts::string_view> &keys); + bool Lookup(cripts::string_view key, cripts::Time::Point age) const; + bool Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const; + + bool + Lookup(cripts::string_view key, time_t age) const + { + return Lookup(key, cripts::Time::Clock::from_time_t(age)); + } + + bool + Lookup(const std::vector<cripts::string_view> &keys, time_t age) const + { + return Lookup(keys, cripts::Time::Clock::from_time_t(age)); + } + + cripts::Time::Point + LastSync() const + { + std::shared_lock lock(_mutex); + return _last_sync; + } + + void WriteToDisk(); + void LoadFromDisk(); + +private: + mutable std::shared_mutex _mutex; + std::string _name = "CacheGroup"; + size_t _num_maps = 3; + size_t _max_entries = 1024; + std::chrono::seconds _max_age = std::chrono::seconds(63072000); Review Comment: Magic number without documentation: The default max_age value of 63072000 seconds (approximately 2 years) is not documented. Consider adding a comment explaining why this specific duration was chosen, or define it as a named constant with a descriptive name. ########## src/cripts/CacheGroup.cc: ########## @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <algorithm> +#include <iostream> +#include <filesystem> + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min<size_t>(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast<cripts::Cache::Group::Manager *>(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash<cripts::string_view> hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast<uint64_t>(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector<cripts::string_view> &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast<uint64_t>(std::hash<cripts::string_view>{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector<cripts::string_view> &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast<char *>(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast<unsigned long long>(VERSION), static_cast<unsigned long long>(version_id)); + continue; + } + + file.read(reinterpret_cast<char *>(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast<char *>(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast<char *>(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast<char *>(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast<char *>(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast<char *>(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already hodling an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast<const char *>(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array<std::byte, BUFFER_SIZE> buffer; + size_t buf_pos = 0; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + std::cerr << "Failed to open temp file for sync: " << tmp_path << "\n"; + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast<const char *>(buffer.data()), buf_pos); + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast<const std::byte *>(data), size); + buf_pos += size; + }; Review Comment: Missing error checking: The _AppendToBuffer lambda writes to the output stream on line 325 without checking if the write succeeded. If the write fails (e.g., disk full), this could lead to silent data loss. Consider checking the stream state after write operations. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
