From a7d4224e3a117b3acb4105efca591c56ba7719fd Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Thu, 29 May 2025 14:46:04 -0600 Subject: [PATCH 1/5] Adds Cache Groups concepts to Cripts - cleans up the notion around cached URLs and headers, and cache keys. - adds APIs to set the lookup status as well --- doc/developer-guide/cripts/cripts-misc.en.rst | 60 +++ example/cripts/cache_groups.cc | 106 +++++ include/cripts/CacheGroup.hpp | 195 ++++++++ include/cripts/Matcher.hpp | 1 + src/cripts/CMakeLists.txt | 1 + src/cripts/CacheGroup.cc | 419 ++++++++++++++++++ 6 files changed, 782 insertions(+) create mode 100644 example/cripts/cache_groups.cc create mode 100644 include/cripts/CacheGroup.hpp create mode 100644 src/cripts/CacheGroup.cc diff --git a/doc/developer-guide/cripts/cripts-misc.en.rst b/doc/developer-guide/cripts/cripts-misc.en.rst index dd582e6edac..32f3fb536a8 100644 --- a/doc/developer-guide/cripts/cripts-misc.en.rst +++ b/doc/developer-guide/cripts/cripts-misc.en.rst @@ -414,3 +414,63 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm debug tags in your ATS configuration to enable debug output for your Cripts. The default debug tag for Cripts is the name of the Cript itself, either the Cript source file, or the compiled plugin name. + +Cache Groups +============ + +As a way to manage assosication between cache entries, Cripts provides an infrastructure +for cache groups. A cache group is a set of cache entries that are logically +associated with each other via custom identifiers. + +Example implementation of the Cache Groups RFC + +.. code-block:: cpp + + do_create_instance() + { + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example_site"); + } + + do_delete_instance() + { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast *>(ptr); + instance.data[0] = nullptr; + } + } + + do_cache_lookup() + { + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + if (date > 0) { + auto cache_groups = cached.response["Cache-Groups"]; + if (!cache_groups.empty()) { + borrow cg = *static_cast *>(ptr); + if (cg->Lookup(cache_groups.split(','), date)) { + cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE; + } + } + } + } + } + } + + do_read_response() + { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto invalidation = client.request["Cache-Group-Invalidation"]; + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + cg->Insert(invalidation.split(',')); + } + } + } diff --git a/example/cripts/cache_groups.cc b/example/cripts/cache_groups.cc new file mode 100644 index 00000000000..c7f2fc0c677 --- /dev/null +++ b/example/cripts/cache_groups.cc @@ -0,0 +1,106 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#define CRIPTS_CONVENIENCE_APIS 1 + +#include +#include + +do_create_instance() +{ + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example"); +} + +do_delete_instance() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast *>(ptr); + instance.data[0] = nullptr; + } +} + +do_cache_lookup() +{ + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + + if (date > 0) { + auto cache_groups = cached.response["Cache-Groups"]; + + CDebug("Looking up {}", cache_groups); + if (!cache_groups.empty()) { + borrow cg = *static_cast *>(ptr); + + if (cg->Lookup(cache_groups.split(','), date)) { + CDebug("Cache Group hit, forcing revalidation for request"); + cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE; + } + } + } + } + } +} + +do_read_response() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto invalidation = client.request["Cache-Group-Invalidation"]; + + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + + cg->Insert(invalidation.split(',')); + } + } + +// This is just for simulating origin responses that would include cache-groups. +#if 0 + server.response["Cache-Groups"] = "\"foo\", \"bar\""; +#endif +} + +// The RFC draft does not support / provide definitions for this. It is useful, +// but should be protected with appropriate ACLs / authentication. +#if 0 +do_remap() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr && urls.pristine.path == ".well-known/Cache-Groups") { + auto invalidation = client.request["Cache-Group-Invalidation"]; + + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + + cg->Insert(invalidation.split(',')); + CDebug("Forcing a cache miss for cache-groups: {}", invalidation); + StatusCode(202); + } + } +} +#endif + +#include diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp new file mode 100644 index 00000000000..42c11b0fded --- /dev/null +++ b/include/cripts/CacheGroup.hpp @@ -0,0 +1,195 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group +{ +private: + using self_type = Group; + + struct _Entry { + cripts::Time::Point timestamp; // Timestamp of when the entry was created + size_t length; // Length of the group ID + uint32_t prefix; // First 4 characters of the group ID + uint64_t hash; // Hash value of the group ID, needed when writing to disk + }; + + using _MapType = std::unordered_map; + + struct _MapSlot { + std::unique_ptr<_MapType> map; + std::string path; + cripts::Time::Point created; + cripts::Time::Point last_write; + cripts::Time::Point last_sync; + }; + +public: + static constexpr uint64_t VERSION = (static_cast('C') << 56) | (static_cast('G') << 48) | + (static_cast('M') << 40) | (static_cast('A') << 32) | + (static_cast('P') << 24) | (static_cast('S') << 16) | + (static_cast('0') << 8) | 0x00; // Change this on version bump + + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) + { + Initialize(name, base_dir, num_maps, max_entries, std::chrono::seconds{63072000}); + } + + // Not used at the moment. + Group() = default; + + ~Group() { WriteToDisk(); } + + Group(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + void Initialize(const std::string &name, const std::string &base_dir, size_t num_maps = 3, size_t max_entries = 1024, + std::chrono::seconds max_age = std::chrono::seconds{63072000}); + + void + SetMaxEntries(size_t max_entries) + { + std::unique_lock lock(_mutex); + _max_entries = max_entries; + } + + void + SetMaxAge(std::chrono::seconds max_age) + { + std::unique_lock lock(_mutex); + _max_age = max_age; + } + + void Insert(cripts::string_view key); + void Insert(const std::vector &keys); + bool Lookup(cripts::string_view key, cripts::Time::Point age) const; + bool Lookup(const std::vector &keys, cripts::Time::Point age) const; + + bool + Lookup(cripts::string_view key, time_t age) const + { + return Lookup(key, cripts::Time::Clock::from_time_t(age)); + } + + bool + Lookup(const std::vector &keys, time_t age) const + { + return Lookup(keys, cripts::Time::Clock::from_time_t(age)); + } + + cripts::Time::Point + LastSync() const + { + std::shared_lock lock(_mutex); + return _last_sync; + } + + void WriteToDisk(); + void LoadFromDisk(); + +private: + mutable std::shared_mutex _mutex; + std::string _name = "CacheGroup"; + size_t _num_maps = 3; + size_t _max_entries = 1024; + std::chrono::seconds _max_age = std::chrono::seconds(63072000); + std::atomic _map_index = 0; + cripts::Time::Point _last_sync = cripts::Time::Point{}; + + std::vector<_MapSlot> _slots; + std::ofstream _txn_log; + std::string _log_path; + std::string _base_dir; + + void appendLog(const _Entry &entry); + void clearLog(); + void syncMap(size_t index); + +public: + class Manager + { + friend int ::_cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + using self_type = Manager; + + public: + static void *Factory(const std::string &name, size_t max_entries = 1024, size_t num_maps = 3); + + Manager(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + protected: + void _scheduleCont(); + + std::unordered_map> _groups; + std::mutex _mutex; + + private: + Manager() + { + _base_dir = TSRuntimeDirGet(); + + if (std::filesystem::exists(_base_dir)) { + _base_dir += "/cache_groups"; + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + } + _scheduleCont(); // Kick it off + } + + ~Manager() + { + if (_action) { + TSActionCancel(_action); + _action = nullptr; + } + if (_cont) { + TSContDestroy(_cont); + _cont = nullptr; + } + } + + static self_type &_instance(); + + TSCont _cont = nullptr; + TSAction _action = nullptr; + std::string _base_dir; + }; +}; +} // namespace cripts::Cache diff --git a/include/cripts/Matcher.hpp b/include/cripts/Matcher.hpp index 6a749910a36..a97e53dc75f 100644 --- a/include/cripts/Matcher.hpp +++ b/include/cripts/Matcher.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "cripts/Headers.hpp" #include "cripts/Lulu.hpp" diff --git a/src/cripts/CMakeLists.txt b/src/cripts/CMakeLists.txt index 6e151f46f3d..62bc6164850 100644 --- a/src/cripts/CMakeLists.txt +++ b/src/cripts/CMakeLists.txt @@ -28,6 +28,7 @@ list(REMOVE_ITEM CPP_FILES ${TEST_CPP_FILES}) set(CRIPTS_PUBLIC_HEADERS ${PROJECT_SOURCE_DIR}/include/cripts/Bundle.hpp + ${PROJECT_SOURCE_DIR}/include/cripts/CacheGroup.hpp ${PROJECT_SOURCE_DIR}/include/cripts/Certs.hpp ${PROJECT_SOURCE_DIR}/include/cripts/Configs.hpp ${PROJECT_SOURCE_DIR}/include/cripts/ConfigsBase.hpp diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc new file mode 100644 index 00000000000..03208e6aec4 --- /dev/null +++ b/src/cripts/CacheGroup.cc @@ -0,0 +1,419 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min(4, key.size())); + return prefix; +} + +// Stuff around the disk sync contination +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + } else { + // The group has been deleted, remove it from the map ?? + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::filesystem::create_directories(_base_dir); + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast(std::hash{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + size_t count = 0; + time_t created_ts = 0; + time_t last_write_ts = 0; + time_t last_sync_ts = 0; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast(VERSION), static_cast(version_id)); + continue; + } + + file.read(reinterpret_cast(&created_ts), sizeof(created_ts)); + file.read(reinterpret_cast(&last_write_ts), sizeof(last_write_ts)); + file.read(reinterpret_cast(&last_sync_ts), sizeof(last_sync_ts)); + file.read(reinterpret_cast(&count), sizeof(count)); + + slot.created = cripts::Time::Clock::from_time_t(created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + + for (size_t i = 0; i < count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast(&entry), sizeof(entry)); + if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + slot.map->insert_or_assign(entry.hash, entry); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock unique_lock(_mutex); + + _last_sync = cripts::Time::Clock::now(); + for (size_t ix = 0; ix < _slots.size(); ++ix) { + bool need_sync = false; + + if (_slots[ix].last_write > _slots[ix].last_sync) { + _slots[ix].last_sync = _last_sync; + need_sync = true; + } + + if (need_sync) { + syncMap(ix); + } + } + + clearLog(); +} + +// +// Here comes the private member methods, these must never be called without +// already hodling an exclusive lock on the mutex. +// + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast(&entry), sizeof(entry)); + _txn_log.flush(); +} + +void +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array buffer; + size_t buf_pos = 0; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!o_file) { + std::cerr << "Failed to open temp file for sync: " << tmp_path << "\n"; + return; + } + + // Helper lambda to append data to the write buffer + auto _AppendToBuffer = [&](const void *data, size_t size) { + if (buf_pos + size > buffer.size()) { + o_file.write(reinterpret_cast(buffer.data()), buf_pos); + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast(data), size); + buf_pos += size; + }; + + time_t created_ts = cripts::Time::Clock::to_time_t(slot.created); + time_t last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write); + time_t last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync); + size_t count = slot.map->size(); + + _AppendToBuffer(&VERSION, sizeof(VERSION)); + _AppendToBuffer(&created_ts, sizeof(created_ts)); + _AppendToBuffer(&last_write_ts, sizeof(last_write_ts)); + _AppendToBuffer(&last_sync_ts, sizeof(last_sync_ts)); + _AppendToBuffer(&count, sizeof(count)); + + // Write entries + for (const auto &[_, entry] : *slot.map) { + _AppendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0) { + o_file.write(reinterpret_cast(buffer.data()), buf_pos); + } + o_file.flush(); + o_file.close(); + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::filesystem::remove(tmp_path); + } +} + +void +Cache::Group::clearLog() +{ + std::error_code ec; + + _txn_log.close(); + std::filesystem::remove(_log_path, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s': %s", _log_path.c_str(), ec.message().c_str()); + } +} + +// Singleton instance for the Cache::Group::Manager +Cache::Group::Manager & +Cache::Group::Manager::_instance() +{ + static Cache::Group::Manager inst; + return inst; +} + +void * +Cache::Group::Manager::Factory(const std::string &name, size_t max_entries, size_t num_maps) +{ + std::lock_guard lock(_instance()._mutex); + auto &groups = _instance()._groups; + + if (auto it = groups.find(name); it != groups.end()) { + if (auto group = it->second.lock()) { + return new std::shared_ptr(std::move(group)); + } + } + + if (!_instance()._base_dir.empty()) { + auto group = std::make_shared(name, _instance()._base_dir, max_entries, num_maps); + + groups[name] = group; + return new std::shared_ptr(std::move(group)); + } else { + TSError("cripts::Cache::Group: Failed to get runtime directory for initialization."); + return nullptr; + } +} + +void +Cache::Group::Manager::_scheduleCont() +{ + if (!_cont) { + _cont = TSContCreate(_cripts_cache_group_sync, TSMutexCreate()); + TSContDataSet(_cont, this); + } + + if (_action) { + TSActionCancel(_action); // Can this even happen ? + _action = nullptr; + } + + _action = TSContScheduleEveryOnPool(_cont, _CONT_SYNC_INTERVAL * 1000, TS_THREAD_POOL_TASK); +} + +} // namespace cripts From 982fc6d0b54f6866b52672f5cc50d8b160c70e9f Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Mon, 22 Dec 2025 10:24:05 -0700 Subject: [PATCH 2/5] Changes from code review --- doc/developer-guide/cripts/cripts-misc.en.rst | 4 +- example/cripts/cache_groups.cc | 2 +- include/cripts/CacheGroup.hpp | 36 +++-- include/cripts/Matcher.hpp | 1 - src/cripts/CacheGroup.cc | 125 +++++++++++------- 5 files changed, 106 insertions(+), 62 deletions(-) diff --git a/doc/developer-guide/cripts/cripts-misc.en.rst b/doc/developer-guide/cripts/cripts-misc.en.rst index 32f3fb536a8..875e4aac1eb 100644 --- a/doc/developer-guide/cripts/cripts-misc.en.rst +++ b/doc/developer-guide/cripts/cripts-misc.en.rst @@ -418,7 +418,7 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm Cache Groups ============ -As a way to manage assosication between cache entries, Cripts provides an infrastructure +As a way to manage association between cache entries, Cripts provides an infrastructure for cache groups. A cache group is a set of cache entries that are logically associated with each other via custom identifiers. @@ -449,7 +449,7 @@ Example implementation of the Cache Groups RFC if (ptr) { auto date = cached.response.AsDate("Date"); - if (date > 0) { + if (date != 0) { auto cache_groups = cached.response["Cache-Groups"]; if (!cache_groups.empty()) { borrow cg = *static_cast *>(ptr); diff --git a/example/cripts/cache_groups.cc b/example/cripts/cache_groups.cc index c7f2fc0c677..b51c1ae27c1 100644 --- a/example/cripts/cache_groups.cc +++ b/example/cripts/cache_groups.cc @@ -45,7 +45,7 @@ do_cache_lookup() if (ptr) { auto date = cached.response.AsDate("Date"); - if (date > 0) { + if (date != 0) { auto cache_groups = cached.response["Cache-Groups"]; CDebug("Looking up {}", cache_groups); diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp index 42c11b0fded..eab93670389 100644 --- a/include/cripts/CacheGroup.hpp +++ b/include/cripts/CacheGroup.hpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -49,6 +48,14 @@ class Group uint64_t hash; // Hash value of the group ID, needed when writing to disk }; + // Header structure for on-disk map files (after VERSION field) + struct _MapHeader { + time_t created_ts; + time_t last_write_ts; + time_t last_sync_ts; + size_t count; + }; + using _MapType = std::unordered_map; struct _MapSlot { @@ -65,9 +72,11 @@ class Group (static_cast('P') << 24) | (static_cast('S') << 16) | (static_cast('0') << 8) | 0x00; // Change this on version bump + static constexpr std::chrono::seconds DEFAULT_MAX_AGE{63072000}; // 2 Years, max cache lifetime in ATS as well + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) { - Initialize(name, base_dir, num_maps, max_entries, std::chrono::seconds{63072000}); + Initialize(name, base_dir, max_entries, num_maps, DEFAULT_MAX_AGE); } // Not used at the moment. @@ -78,8 +87,8 @@ class Group Group(const self_type &) = delete; self_type &operator=(const self_type &) = delete; - void Initialize(const std::string &name, const std::string &base_dir, size_t num_maps = 3, size_t max_entries = 1024, - std::chrono::seconds max_age = std::chrono::seconds{63072000}); + void Initialize(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3, + std::chrono::seconds max_age = DEFAULT_MAX_AGE); void SetMaxEntries(size_t max_entries) @@ -127,8 +136,8 @@ class Group std::string _name = "CacheGroup"; size_t _num_maps = 3; size_t _max_entries = 1024; - std::chrono::seconds _max_age = std::chrono::seconds(63072000); - std::atomic _map_index = 0; + std::chrono::seconds _max_age = DEFAULT_MAX_AGE; + size_t _map_index = 0; cripts::Time::Point _last_sync = cripts::Time::Point{}; std::vector<_MapSlot> _slots; @@ -166,8 +175,19 @@ class Group if (std::filesystem::exists(_base_dir)) { _base_dir += "/cache_groups"; if (!std::filesystem::exists(_base_dir)) { - std::filesystem::create_directories(_base_dir); - std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSError("cripts::Cache::Group::Manager: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + + if (ec) { + TSWarning("cripts::Cache::Group::Manager: Failed to set permissions on `%s': %s", _base_dir.c_str(), + ec.message().c_str()); + } + } } } _scheduleCont(); // Kick it off diff --git a/include/cripts/Matcher.hpp b/include/cripts/Matcher.hpp index a97e53dc75f..6a749910a36 100644 --- a/include/cripts/Matcher.hpp +++ b/include/cripts/Matcher.hpp @@ -23,7 +23,6 @@ #include #include #include -#include #include "cripts/Headers.hpp" #include "cripts/Lulu.hpp" diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc index 03208e6aec4..b476d629db9 100644 --- a/src/cripts/CacheGroup.cc +++ b/src/cripts/CacheGroup.cc @@ -17,8 +17,8 @@ */ #include -#include #include +#include #include "ts/ts.h" #include "cripts/CacheGroup.hpp" @@ -32,7 +32,7 @@ _make_prefix_int(cripts::string_view key) return prefix; } -// Stuff around the disk sync contination +// Stuff around the disk sync continuation constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s @@ -47,14 +47,15 @@ _cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) size_t processed = 0; auto now = cripts::Time::Clock::now(); - for (auto it = groups.begin(); it != groups.end() && processed < max_to_process; ++it) { + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { if (auto group = it->second.lock()) { if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { group->WriteToDisk(); ++processed; } + ++it; } else { - // The group has been deleted, remove it from the map ?? + // The group has been deleted, remove it from the map it = groups.erase(it); } } @@ -66,7 +67,7 @@ namespace cripts { void -Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t num_maps, size_t max_entries, +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t max_entries, size_t num_maps, std::chrono::seconds max_age) { cripts::Time::Point zero = cripts::Time::Point{}; @@ -81,8 +82,17 @@ Cache::Group::Initialize(const std::string &name, const std::string &base_dir, s _log_path = _base_dir + "/" + "txn.log"; if (!std::filesystem::exists(_base_dir)) { - std::filesystem::create_directories(_base_dir); - std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add); + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } } for (size_t ix = 0; ix < _num_maps; ++ix) { @@ -203,16 +213,14 @@ Cache::Group::Lookup(const std::vector &keys, cripts::Time: void Cache::Group::LoadFromDisk() { - std::unique_lock lock(_mutex); - std::ifstream log(_log_path, std::ios::binary); + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + std::unordered_set loaded_hashes; for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { - auto &slot = _slots[slot_ix]; - uint64_t version_id = 0; - size_t count = 0; - time_t created_ts = 0; - time_t last_write_ts = 0; - time_t last_sync_ts = 0; + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; std::ifstream file(slot.path, std::ios::binary); if (!file) { @@ -226,21 +234,27 @@ Cache::Group::LoadFromDisk() continue; } - file.read(reinterpret_cast(&created_ts), sizeof(created_ts)); - file.read(reinterpret_cast(&last_write_ts), sizeof(last_write_ts)); - file.read(reinterpret_cast(&last_sync_ts), sizeof(last_sync_ts)); - file.read(reinterpret_cast(&count), sizeof(count)); + file.read(reinterpret_cast(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } - slot.created = cripts::Time::Clock::from_time_t(created_ts); - slot.last_write = cripts::Time::Clock::from_time_t(last_write_ts); - slot.last_sync = cripts::Time::Clock::from_time_t(last_sync_ts); + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); - for (size_t i = 0; i < count; ++i) { + for (size_t i = 0; i < header.count; ++i) { Cache::Group::_Entry entry; file.read(reinterpret_cast(&entry), sizeof(entry)); - if (!std::ranges::any_of(_slots, [&](const auto &slot) { return slot.map->find(entry.hash) != slot.map->end(); })) { + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (loaded_hashes.find(entry.hash) == loaded_hashes.end()) { slot.map->insert_or_assign(entry.hash, entry); + loaded_hashes.insert(entry.hash); } } } @@ -286,14 +300,14 @@ Cache::Group::WriteToDisk() // // Here comes the private member methods, these must never be called without -// already hodling an exclusive lock on the mutex. +// already holding an exclusive lock on the mutex. // void Cache::Group::appendLog(const Cache::Group::_Entry &entry) { if (!_txn_log.is_open() || !_txn_log.good()) { - _txn_log.open(_log_path, std::ios::app | std::ios::out); + _txn_log.open(_log_path, std::ios::app | std::ios::out | std::ios::binary); if (!_txn_log) { TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); return; @@ -309,47 +323,58 @@ Cache::Group::syncMap(size_t index) { constexpr size_t BUFFER_SIZE = 64 * 1024; std::array buffer; - size_t buf_pos = 0; - const auto &slot = _slots[index]; - const std::string tmp_path = slot.path + ".tmp"; - std::ofstream o_file(tmp_path, std::ios::binary | std::ios::trunc); - - if (!o_file) { - std::cerr << "Failed to open temp file for sync: " << tmp_path << "\n"; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream tmp_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!tmp_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); return; } // Helper lambda to append data to the write buffer - auto _AppendToBuffer = [&](const void *data, size_t size) { + auto _appendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } if (buf_pos + size > buffer.size()) { - o_file.write(reinterpret_cast(buffer.data()), buf_pos); + tmp_file.write(reinterpret_cast(buffer.data()), buf_pos); + if (!tmp_file) { + write_failed = true; + return; + } buf_pos = 0; } std::memcpy(buffer.data() + buf_pos, static_cast(data), size); buf_pos += size; }; - time_t created_ts = cripts::Time::Clock::to_time_t(slot.created); - time_t last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write); - time_t last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync); - size_t count = slot.map->size(); + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; - _AppendToBuffer(&VERSION, sizeof(VERSION)); - _AppendToBuffer(&created_ts, sizeof(created_ts)); - _AppendToBuffer(&last_write_ts, sizeof(last_write_ts)); - _AppendToBuffer(&last_sync_ts, sizeof(last_sync_ts)); - _AppendToBuffer(&count, sizeof(count)); + _appendToBuffer(&VERSION, sizeof(VERSION)); + _appendToBuffer(&header, sizeof(header)); // Write entries for (const auto &[_, entry] : *slot.map) { - _AppendToBuffer(&entry, sizeof(entry)); + _appendToBuffer(&entry, sizeof(entry)); } - if (buf_pos > 0) { - o_file.write(reinterpret_cast(buffer.data()), buf_pos); + if (buf_pos > 0 && !write_failed) { + tmp_file.write(reinterpret_cast(buffer.data()), buf_pos); + } + tmp_file.flush(); + tmp_file.close(); + + if (write_failed || !tmp_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::filesystem::remove(tmp_path); + return; } - o_file.flush(); - o_file.close(); if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); @@ -409,7 +434,7 @@ Cache::Group::Manager::_scheduleCont() } if (_action) { - TSActionCancel(_action); // Can this even happen ? + TSActionCancel(_action); _action = nullptr; } From cb279c4fd0a6f232c5ce2f1f11075320b0b99d00 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Fri, 27 Feb 2026 09:17:08 -0700 Subject: [PATCH 3/5] Address Bryan's review issues WriteToDisk previously updated last_sync and cleared the transaction log before confirming syncMap succeeded. On rename failure the log would be lost even though the map was never written to disk. syncMap now returns bool; WriteToDisk reverts last_sync on failure and only calls clearLog when all dirty slots have been successfully synced. Adds Catch2 unit tests covering basic insert/lookup, persist-and-reload, transaction log replay across restarts, corrupt/truncated/wrong-version map file handling, sync failure log preservation, and map rotation. More unit tests will be done in a future PR. --- include/cripts/CacheGroup.hpp | 2 +- src/cripts/CMakeLists.txt | 9 +- src/cripts/CacheGroup.cc | 34 ++-- src/cripts/unit_tests/stub.cc | 78 ++++++++ src/cripts/unit_tests/test_CacheGroup.cc | 224 +++++++++++++++++++++++ 5 files changed, 332 insertions(+), 15 deletions(-) create mode 100644 src/cripts/unit_tests/stub.cc create mode 100644 src/cripts/unit_tests/test_CacheGroup.cc diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp index eab93670389..802ac68a90c 100644 --- a/include/cripts/CacheGroup.hpp +++ b/include/cripts/CacheGroup.hpp @@ -147,7 +147,7 @@ class Group void appendLog(const _Entry &entry); void clearLog(); - void syncMap(size_t index); + bool syncMap(size_t index); public: class Manager diff --git a/src/cripts/CMakeLists.txt b/src/cripts/CMakeLists.txt index 62bc6164850..dbe17a3f181 100644 --- a/src/cripts/CMakeLists.txt +++ b/src/cripts/CMakeLists.txt @@ -23,7 +23,7 @@ pkg_check_modules(PCRE2 REQUIRED IMPORTED_TARGET libpcre2-8) # The source files, globbed so we can drop in local / custom Bundles and extensions. file(GLOB CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/*.cc ${PROJECT_SOURCE_DIR}/src/cripts/*/*.cc) -file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc) +file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc ${PROJECT_SOURCE_DIR}/src/cripts/unit_tests/*.cc) list(REMOVE_ITEM CPP_FILES ${TEST_CPP_FILES}) set(CRIPTS_PUBLIC_HEADERS @@ -93,3 +93,10 @@ add_custom_target( ) clang_tidy_check(cripts) + +if(BUILD_TESTING) + add_executable(test_cripts unit_tests/test_CacheGroup.cc CacheGroup.cc unit_tests/stub.cc) + target_include_directories(test_cripts PRIVATE ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(test_cripts PRIVATE libswoc::libswoc fmt::fmt Catch2::Catch2WithMain) + add_test(NAME test_cripts COMMAND test_cripts) +endif() diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc index b476d629db9..17d9222eab5 100644 --- a/src/cripts/CacheGroup.cc +++ b/src/cripts/CacheGroup.cc @@ -281,21 +281,26 @@ Cache::Group::WriteToDisk() { std::unique_lock unique_lock(_mutex); - _last_sync = cripts::Time::Clock::now(); - for (size_t ix = 0; ix < _slots.size(); ++ix) { - bool need_sync = false; + auto now = cripts::Time::Clock::now(); + bool any_dirty = false; + bool all_synced = true; + for (size_t ix = 0; ix < _slots.size(); ++ix) { if (_slots[ix].last_write > _slots[ix].last_sync) { - _slots[ix].last_sync = _last_sync; - need_sync = true; - } - - if (need_sync) { - syncMap(ix); + any_dirty = true; + _slots[ix].last_sync = now; + if (syncMap(ix)) { + _last_sync = now; + } else { + _slots[ix].last_sync = _slots[ix].last_write; // revert so next call retries + all_synced = false; + } } } - clearLog(); + if (any_dirty && all_synced) { + clearLog(); + } } // @@ -318,7 +323,7 @@ Cache::Group::appendLog(const Cache::Group::_Entry &entry) _txn_log.flush(); } -void +bool Cache::Group::syncMap(size_t index) { constexpr size_t BUFFER_SIZE = 64 * 1024; @@ -331,7 +336,7 @@ Cache::Group::syncMap(size_t index) if (!tmp_file) { TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); - return; + return false; } // Helper lambda to append data to the write buffer @@ -373,13 +378,16 @@ Cache::Group::syncMap(size_t index) if (write_failed || !tmp_file) { TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); std::filesystem::remove(tmp_path); - return; + return false; } if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); std::filesystem::remove(tmp_path); + return false; } + + return true; } void diff --git a/src/cripts/unit_tests/stub.cc b/src/cripts/unit_tests/stub.cc new file mode 100644 index 00000000000..65680db4a19 --- /dev/null +++ b/src/cripts/unit_tests/stub.cc @@ -0,0 +1,78 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Stubs for the ATS plugin API symbols referenced by CacheGroup.cc. +// Only the Group class (not Manager) is exercised in unit tests, so the +// TSCont/TSAction/TSMutex stubs never execute — they just satisfy the linker. + +#include "ts/ts.h" + +void +TSWarning(const char * /* fmt */, ...) +{ +} + +void +TSError(const char * /* fmt */, ...) +{ +} + +TSCont +TSContCreate(TSEventFunc, TSMutex) +{ + return nullptr; +} + +void +TSContDataSet(TSCont, void *) +{ +} + +void * +TSContDataGet(TSCont) +{ + return nullptr; +} + +TSMutex +TSMutexCreate() +{ + return nullptr; +} + +TSAction +TSContScheduleEveryOnPool(TSCont, TSHRTime, TSThreadPool) +{ + return nullptr; +} + +void +TSActionCancel(TSAction) +{ +} + +void +TSContDestroy(TSCont) +{ +} + +const char * +TSRuntimeDirGet() +{ + return "/tmp"; +} diff --git a/src/cripts/unit_tests/test_CacheGroup.cc b/src/cripts/unit_tests/test_CacheGroup.cc new file mode 100644 index 00000000000..aa46e01162f --- /dev/null +++ b/src/cripts/unit_tests/test_CacheGroup.cc @@ -0,0 +1,224 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "cripts/CacheGroup.hpp" + +#include +#include +#include + +// RAII temp directory that cleans up after each test +struct TempDir { + std::filesystem::path path; + + TempDir() + { + path = std::filesystem::temp_directory_path() / + ("cg_test_" + std::to_string(std::chrono::steady_clock::now().time_since_epoch().count())); + std::filesystem::create_directories(path); + } + + ~TempDir() { std::filesystem::remove_all(path); } + + std::string + str() const + { + return path.string(); + } +}; + +TEST_CASE("CacheGroup: basic insert and lookup", "[cripts][CacheGroup]") +{ + TempDir dir; + cripts::Cache::Group g("test", dir.str()); + + g.Insert("url1"); + g.Insert("url2"); + + auto epoch = cripts::Time::Clock::from_time_t(0); + CHECK(g.Lookup("url1", epoch)); + CHECK(g.Lookup("url2", epoch)); + CHECK_FALSE(g.Lookup("url3", epoch)); +} + +TEST_CASE("CacheGroup: persist and reload", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str()); + g.Insert("key_a"); + g.Insert("key_b"); + g.WriteToDisk(); + } + + cripts::Cache::Group g2("test", dir.str()); + CHECK(g2.Lookup("key_a", epoch)); + CHECK(g2.Lookup("key_b", epoch)); + CHECK_FALSE(g2.Lookup("key_c", epoch)); +} + +TEST_CASE("CacheGroup: transaction log replay on restart", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str()); + g.Insert("persisted"); + g.WriteToDisk(); + + // Insert more keys — these go to the txn log but maps are not re-synced + g.Insert("in_log_only"); + } + // The txn log should still exist since WriteToDisk was not called after the second Insert + + // Reload: log should be replayed + cripts::Cache::Group g2("test", dir.str()); + CHECK(g2.Lookup("persisted", epoch)); + CHECK(g2.Lookup("in_log_only", epoch)); +} + +TEST_CASE("CacheGroup: corrupt map file is skipped", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("good_key"); + g.WriteToDisk(); + } + + // Corrupt the first map file + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + std::ofstream corrupt(map_path, std::ios::binary | std::ios::trunc); + corrupt << "JUNK_DATA_GARBAGE"; + } + + // Reload — corrupt map should be skipped; good_key is lost (log was cleared by + // WriteToDisk), but the group must still accept new inserts without crashing. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("good_key", epoch)); + g2.Insert("new_key"); + CHECK(g2.Lookup("new_key", epoch)); +} + +TEST_CASE("CacheGroup: truncated map file is handled gracefully", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("truncated_key"); + g.WriteToDisk(); + } + + // Truncate the map file to just the version field (incomplete header) + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + auto size = std::filesystem::file_size(map_path); + if (size > sizeof(uint64_t)) { + std::filesystem::resize_file(map_path, sizeof(uint64_t) + 1); // version + 1 byte of header + } + } + + // Reload — truncated header should be skipped; truncated_key is lost (log was + // cleared by WriteToDisk), but the group must recover and accept new inserts. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("truncated_key", epoch)); + g2.Insert("after_truncation"); + CHECK(g2.Lookup("after_truncation", epoch)); +} + +TEST_CASE("CacheGroup: wrong version map file is skipped", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("versioned_key"); + g.WriteToDisk(); + } + + // Overwrite the version field with a bad value + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + std::fstream f(map_path, std::ios::in | std::ios::out | std::ios::binary); + uint64_t bad_version = 0xDEADBEEFCAFEBABEULL; + f.write(reinterpret_cast(&bad_version), sizeof(bad_version)); + } + + // Reload — version mismatch should be skipped; versioned_key is lost (log was + // cleared by WriteToDisk), but the group must recover and accept new inserts. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("versioned_key", epoch)); + g2.Insert("post_version_check"); + CHECK(g2.Lookup("post_version_check", epoch)); +} + +TEST_CASE("CacheGroup: WriteToDisk does not clear log on sync failure", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("before_fail"); + g.WriteToDisk(); // initial successful sync + log clear + + g.Insert("after_initial_sync"); + + // Make the map directory read-only so syncMap will fail on rename + auto group_dir = dir.path / "test"; + std::filesystem::permissions(group_dir, std::filesystem::perms::owner_read | std::filesystem::perms::owner_exec); + + g.WriteToDisk(); // should fail to sync; log must NOT be cleared + + // Restore permissions so cleanup works + std::filesystem::permissions(group_dir, std::filesystem::perms::owner_all); + + // The txn log should still contain "after_initial_sync" — verify via reload + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK(g2.Lookup("before_fail", epoch)); + CHECK(g2.Lookup("after_initial_sync", epoch)); +} + +TEST_CASE("CacheGroup: map rotation writes empty map to disk", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + // max_entries=2 to trigger rotation after 2 inserts + { + cripts::Cache::Group g("test", dir.str(), 2, 3); + g.Insert("key1"); + g.Insert("key2"); + g.Insert("key3"); // triggers rotation + g.WriteToDisk(); + } + + cripts::Cache::Group g2("test", dir.str(), 2, 3); + // All three keys were in slot 0 at WriteToDisk time and survive the reload. + CHECK(g2.Lookup("key1", epoch)); + CHECK(g2.Lookup("key2", epoch)); + CHECK(g2.Lookup("key3", epoch)); +} From 94cbf019828edbbcca367ccf0714d308773fafb7 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Fri, 27 Feb 2026 10:01:52 -0700 Subject: [PATCH 4/5] Address Copilot review comments Add missing standard headers in CacheGroup.cc. Use std::error_code overload for filesystem::remove in syncMap error paths. Add and to CacheGroup.hpp. Use fixed-width types in _MapHeader for a stable on-disk format. Fix max_to_process calculation in the sync continuation to spread groups evenly across the sync window. --- include/cripts/CacheGroup.hpp | 10 ++++++---- src/cripts/CacheGroup.cc | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp index 802ac68a90c..1c97e33777b 100644 --- a/include/cripts/CacheGroup.hpp +++ b/include/cripts/CacheGroup.hpp @@ -21,11 +21,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include "cripts/Context.hpp" #include "cripts/Time.hpp" @@ -50,10 +52,10 @@ class Group // Header structure for on-disk map files (after VERSION field) struct _MapHeader { - time_t created_ts; - time_t last_write_ts; - time_t last_sync_ts; - size_t count; + int64_t created_ts; + int64_t last_write_ts; + int64_t last_sync_ts; + uint64_t count; }; using _MapType = std::unordered_map; diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc index 17d9222eab5..3d606e602fa 100644 --- a/src/cripts/CacheGroup.cc +++ b/src/cripts/CacheGroup.cc @@ -17,6 +17,10 @@ */ #include +#include +#include +#include +#include #include #include @@ -43,9 +47,10 @@ _cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) std::lock_guard lock(mgr->_mutex); auto &groups = mgr->_groups; - const size_t max_to_process = (groups.size() + (_SYNC_GROUP_EVERY - 1)) / _SYNC_GROUP_EVERY; - size_t processed = 0; - auto now = cripts::Time::Clock::now(); + constexpr size_t runs_per_window = _SYNC_GROUP_EVERY / _CONT_SYNC_INTERVAL; + const size_t max_to_process = (groups.size() + (runs_per_window - 1)) / runs_per_window; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { if (auto group = it->second.lock()) { @@ -377,13 +382,15 @@ Cache::Group::syncMap(size_t index) if (write_failed || !tmp_file) { TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); - std::filesystem::remove(tmp_path); + std::error_code ec; + std::filesystem::remove(tmp_path, ec); return false; } if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); - std::filesystem::remove(tmp_path); + std::error_code ec; + std::filesystem::remove(tmp_path, ec); return false; } From 8e4703ca32722421eae5eb8deec790cf2b21e9b1 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Thu, 5 Mar 2026 17:51:14 -0700 Subject: [PATCH 5/5] Address bryancall's review comments Fix sync retry logic: revert last_sync to its previous value on syncMap failure instead of setting it to last_write. The old code made the slot appear clean, preventing retries until a new Insert bumped last_write again. --- doc/developer-guide/cripts/cripts-misc.en.rst | 4 +++- src/cripts/CacheGroup.cc | 14 ++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/doc/developer-guide/cripts/cripts-misc.en.rst b/doc/developer-guide/cripts/cripts-misc.en.rst index 875e4aac1eb..67c19adb19c 100644 --- a/doc/developer-guide/cripts/cripts-misc.en.rst +++ b/doc/developer-guide/cripts/cripts-misc.en.rst @@ -415,6 +415,8 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm The default debug tag for Cripts is the name of the Cript itself, either the Cript source file, or the compiled plugin name. +.. _cripts-misc-cache-groups: + Cache Groups ============ @@ -422,7 +424,7 @@ As a way to manage association between cache entries, Cripts provides an infrast for cache groups. A cache group is a set of cache entries that are logically associated with each other via custom identifiers. -Example implementation of the Cache Groups RFC +Example implementation of the Cache Groups RFC: .. code-block:: cpp diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc index 3d606e602fa..a20343ab69a 100644 --- a/src/cripts/CacheGroup.cc +++ b/src/cripts/CacheGroup.cc @@ -257,7 +257,7 @@ Cache::Group::LoadFromDisk() TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); break; } - if (loaded_hashes.find(entry.hash) == loaded_hashes.end()) { + if (!loaded_hashes.contains(entry.hash)) { slot.map->insert_or_assign(entry.hash, entry); loaded_hashes.insert(entry.hash); } @@ -284,7 +284,7 @@ Cache::Group::LoadFromDisk() void Cache::Group::WriteToDisk() { - std::unique_lock unique_lock(_mutex); + std::unique_lock lock(_mutex); auto now = cripts::Time::Clock::now(); bool any_dirty = false; @@ -292,12 +292,13 @@ Cache::Group::WriteToDisk() for (size_t ix = 0; ix < _slots.size(); ++ix) { if (_slots[ix].last_write > _slots[ix].last_sync) { + auto old_sync = _slots[ix].last_sync; any_dirty = true; _slots[ix].last_sync = now; if (syncMap(ix)) { _last_sync = now; } else { - _slots[ix].last_sync = _slots[ix].last_write; // revert so next call retries + _slots[ix].last_sync = old_sync; // revert so next call retries all_synced = false; } } @@ -308,11 +309,6 @@ Cache::Group::WriteToDisk() } } -// -// Here comes the private member methods, these must never be called without -// already holding an exclusive lock on the mutex. -// - void Cache::Group::appendLog(const Cache::Group::_Entry &entry) { @@ -344,7 +340,6 @@ Cache::Group::syncMap(size_t index) return false; } - // Helper lambda to append data to the write buffer auto _appendToBuffer = [&](const void *data, size_t size) { if (write_failed) { return; @@ -369,7 +364,6 @@ Cache::Group::syncMap(size_t index) _appendToBuffer(&VERSION, sizeof(VERSION)); _appendToBuffer(&header, sizeof(header)); - // Write entries for (const auto &[_, entry] : *slot.map) { _appendToBuffer(&entry, sizeof(entry)); }