diff --git a/doc/developer-guide/cripts/cripts-misc.en.rst b/doc/developer-guide/cripts/cripts-misc.en.rst index dd582e6edac..67c19adb19c 100644 --- a/doc/developer-guide/cripts/cripts-misc.en.rst +++ b/doc/developer-guide/cripts/cripts-misc.en.rst @@ -414,3 +414,65 @@ Debug logging uses the same format string syntax as ``fmt::format()`` in ``libfm debug tags in your ATS configuration to enable debug output for your Cripts. The default debug tag for Cripts is the name of the Cript itself, either the Cript source file, or the compiled plugin name. + +.. _cripts-misc-cache-groups: + +Cache Groups +============ + +As a way to manage association between cache entries, Cripts provides an infrastructure +for cache groups. A cache group is a set of cache entries that are logically +associated with each other via custom identifiers. + +Example implementation of the Cache Groups RFC: + +.. code-block:: cpp + + do_create_instance() + { + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example_site"); + } + + do_delete_instance() + { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast *>(ptr); + instance.data[0] = nullptr; + } + } + + do_cache_lookup() + { + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + if (date != 0) { + auto cache_groups = cached.response["Cache-Groups"]; + if (!cache_groups.empty()) { + borrow cg = *static_cast *>(ptr); + if (cg->Lookup(cache_groups.split(','), date)) { + cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE; + } + } + } + } + } + } + + do_read_response() + { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto invalidation = client.request["Cache-Group-Invalidation"]; + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + cg->Insert(invalidation.split(',')); + } + } + } diff --git a/example/cripts/cache_groups.cc b/example/cripts/cache_groups.cc new file mode 100644 index 00000000000..b51c1ae27c1 --- /dev/null +++ b/example/cripts/cache_groups.cc @@ -0,0 +1,106 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#define CRIPTS_CONVENIENCE_APIS 1 + +#include +#include + +do_create_instance() +{ + // Create a cache-group for this site / remap rule(s). They can be shared. + instance.data[0] = cripts::Cache::Group::Manager::Factory("example"); +} + +do_delete_instance() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + delete static_cast *>(ptr); + instance.data[0] = nullptr; + } +} + +do_cache_lookup() +{ + if (cached.response.lookupstatus != cripts::LookupStatus::MISS) { + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto date = cached.response.AsDate("Date"); + + if (date != 0) { + auto cache_groups = cached.response["Cache-Groups"]; + + CDebug("Looking up {}", cache_groups); + if (!cache_groups.empty()) { + borrow cg = *static_cast *>(ptr); + + if (cg->Lookup(cache_groups.split(','), date)) { + CDebug("Cache Group hit, forcing revalidation for request"); + cached.response.lookupstatus = cripts::LookupStatus::HIT_STALE; + } + } + } + } + } +} + +do_read_response() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr) { + auto invalidation = client.request["Cache-Group-Invalidation"]; + + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + + cg->Insert(invalidation.split(',')); + } + } + +// This is just for simulating origin responses that would include cache-groups. +#if 0 + server.response["Cache-Groups"] = "\"foo\", \"bar\""; +#endif +} + +// The RFC draft does not support / provide definitions for this. It is useful, +// but should be protected with appropriate ACLs / authentication. +#if 0 +do_remap() +{ + void *ptr = AsPointer(instance.data[0]); + + if (ptr && urls.pristine.path == ".well-known/Cache-Groups") { + auto invalidation = client.request["Cache-Group-Invalidation"]; + + if (!invalidation.empty()) { + borrow cg = *static_cast *>(ptr); + + cg->Insert(invalidation.split(',')); + CDebug("Forcing a cache miss for cache-groups: {}", invalidation); + StatusCode(202); + } + } +} +#endif + +#include diff --git a/include/cripts/CacheGroup.hpp b/include/cripts/CacheGroup.hpp new file mode 100644 index 00000000000..1c97e33777b --- /dev/null +++ b/include/cripts/CacheGroup.hpp @@ -0,0 +1,217 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cripts/Context.hpp" +#include "cripts/Time.hpp" + +// Implemented in the .cc file +int _cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + +namespace cripts::Cache +{ + +class Group +{ +private: + using self_type = Group; + + struct _Entry { + cripts::Time::Point timestamp; // Timestamp of when the entry was created + size_t length; // Length of the group ID + uint32_t prefix; // First 4 characters of the group ID + uint64_t hash; // Hash value of the group ID, needed when writing to disk + }; + + // Header structure for on-disk map files (after VERSION field) + struct _MapHeader { + int64_t created_ts; + int64_t last_write_ts; + int64_t last_sync_ts; + uint64_t count; + }; + + using _MapType = std::unordered_map; + + struct _MapSlot { + std::unique_ptr<_MapType> map; + std::string path; + cripts::Time::Point created; + cripts::Time::Point last_write; + cripts::Time::Point last_sync; + }; + +public: + static constexpr uint64_t VERSION = (static_cast('C') << 56) | (static_cast('G') << 48) | + (static_cast('M') << 40) | (static_cast('A') << 32) | + (static_cast('P') << 24) | (static_cast('S') << 16) | + (static_cast('0') << 8) | 0x00; // Change this on version bump + + static constexpr std::chrono::seconds DEFAULT_MAX_AGE{63072000}; // 2 Years, max cache lifetime in ATS as well + + Group(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3) + { + Initialize(name, base_dir, max_entries, num_maps, DEFAULT_MAX_AGE); + } + + // Not used at the moment. + Group() = default; + + ~Group() { WriteToDisk(); } + + Group(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + void Initialize(const std::string &name, const std::string &base_dir, size_t max_entries = 1024, size_t num_maps = 3, + std::chrono::seconds max_age = DEFAULT_MAX_AGE); + + void + SetMaxEntries(size_t max_entries) + { + std::unique_lock lock(_mutex); + _max_entries = max_entries; + } + + void + SetMaxAge(std::chrono::seconds max_age) + { + std::unique_lock lock(_mutex); + _max_age = max_age; + } + + void Insert(cripts::string_view key); + void Insert(const std::vector &keys); + bool Lookup(cripts::string_view key, cripts::Time::Point age) const; + bool Lookup(const std::vector &keys, cripts::Time::Point age) const; + + bool + Lookup(cripts::string_view key, time_t age) const + { + return Lookup(key, cripts::Time::Clock::from_time_t(age)); + } + + bool + Lookup(const std::vector &keys, time_t age) const + { + return Lookup(keys, cripts::Time::Clock::from_time_t(age)); + } + + cripts::Time::Point + LastSync() const + { + std::shared_lock lock(_mutex); + return _last_sync; + } + + void WriteToDisk(); + void LoadFromDisk(); + +private: + mutable std::shared_mutex _mutex; + std::string _name = "CacheGroup"; + size_t _num_maps = 3; + size_t _max_entries = 1024; + std::chrono::seconds _max_age = DEFAULT_MAX_AGE; + size_t _map_index = 0; + cripts::Time::Point _last_sync = cripts::Time::Point{}; + + std::vector<_MapSlot> _slots; + std::ofstream _txn_log; + std::string _log_path; + std::string _base_dir; + + void appendLog(const _Entry &entry); + void clearLog(); + bool syncMap(size_t index); + +public: + class Manager + { + friend int ::_cripts_cache_group_sync(TSCont cont, TSEvent event, void *edata); + using self_type = Manager; + + public: + static void *Factory(const std::string &name, size_t max_entries = 1024, size_t num_maps = 3); + + Manager(const self_type &) = delete; + self_type &operator=(const self_type &) = delete; + + protected: + void _scheduleCont(); + + std::unordered_map> _groups; + std::mutex _mutex; + + private: + Manager() + { + _base_dir = TSRuntimeDirGet(); + + if (std::filesystem::exists(_base_dir)) { + _base_dir += "/cache_groups"; + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSError("cripts::Cache::Group::Manager: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + + if (ec) { + TSWarning("cripts::Cache::Group::Manager: Failed to set permissions on `%s': %s", _base_dir.c_str(), + ec.message().c_str()); + } + } + } + } + _scheduleCont(); // Kick it off + } + + ~Manager() + { + if (_action) { + TSActionCancel(_action); + _action = nullptr; + } + if (_cont) { + TSContDestroy(_cont); + _cont = nullptr; + } + } + + static self_type &_instance(); + + TSCont _cont = nullptr; + TSAction _action = nullptr; + std::string _base_dir; + }; +}; +} // namespace cripts::Cache diff --git a/src/cripts/CMakeLists.txt b/src/cripts/CMakeLists.txt index 6e151f46f3d..dbe17a3f181 100644 --- a/src/cripts/CMakeLists.txt +++ b/src/cripts/CMakeLists.txt @@ -23,11 +23,12 @@ pkg_check_modules(PCRE2 REQUIRED IMPORTED_TARGET libpcre2-8) # The source files, globbed so we can drop in local / custom Bundles and extensions. file(GLOB CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/*.cc ${PROJECT_SOURCE_DIR}/src/cripts/*/*.cc) -file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc) +file(GLOB TEST_CPP_FILES ${PROJECT_SOURCE_DIR}/src/cripts/tests/*.cc ${PROJECT_SOURCE_DIR}/src/cripts/unit_tests/*.cc) list(REMOVE_ITEM CPP_FILES ${TEST_CPP_FILES}) set(CRIPTS_PUBLIC_HEADERS ${PROJECT_SOURCE_DIR}/include/cripts/Bundle.hpp + ${PROJECT_SOURCE_DIR}/include/cripts/CacheGroup.hpp ${PROJECT_SOURCE_DIR}/include/cripts/Certs.hpp ${PROJECT_SOURCE_DIR}/include/cripts/Configs.hpp ${PROJECT_SOURCE_DIR}/include/cripts/ConfigsBase.hpp @@ -92,3 +93,10 @@ add_custom_target( ) clang_tidy_check(cripts) + +if(BUILD_TESTING) + add_executable(test_cripts unit_tests/test_CacheGroup.cc CacheGroup.cc unit_tests/stub.cc) + target_include_directories(test_cripts PRIVATE ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(test_cripts PRIVATE libswoc::libswoc fmt::fmt Catch2::Catch2WithMain) + add_test(NAME test_cripts COMMAND test_cripts) +endif() diff --git a/src/cripts/CacheGroup.cc b/src/cripts/CacheGroup.cc new file mode 100644 index 00000000000..a20343ab69a --- /dev/null +++ b/src/cripts/CacheGroup.cc @@ -0,0 +1,453 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "ts/ts.h" +#include "cripts/CacheGroup.hpp" + +inline static uint32_t +_make_prefix_int(cripts::string_view key) +{ + uint32_t prefix = 0; + + std::memcpy(&prefix, key.data(), std::min(4, key.size())); + return prefix; +} + +// Stuff around the disk sync continuation +constexpr auto _CONT_SYNC_INTERVAL = 10; // How often to run the continuation +constexpr int _SYNC_GROUP_EVERY = 60; // Sync each group every 60s + +int +_cripts_cache_group_sync(TSCont cont, TSEvent /* event */, void * /* edata */) +{ + auto *mgr = static_cast(TSContDataGet(cont)); + std::lock_guard lock(mgr->_mutex); + auto &groups = mgr->_groups; + + constexpr size_t runs_per_window = _SYNC_GROUP_EVERY / _CONT_SYNC_INTERVAL; + const size_t max_to_process = (groups.size() + (runs_per_window - 1)) / runs_per_window; + size_t processed = 0; + auto now = cripts::Time::Clock::now(); + + for (auto it = groups.begin(); it != groups.end() && processed < max_to_process;) { + if (auto group = it->second.lock()) { + if (group->LastSync() + std::chrono::seconds{_SYNC_GROUP_EVERY} < now) { + group->WriteToDisk(); + ++processed; + } + ++it; + } else { + // The group has been deleted, remove it from the map + it = groups.erase(it); + } + } + + return TS_SUCCESS; +} + +namespace cripts +{ + +void +Cache::Group::Initialize(const std::string &name, const std::string &base_dir, size_t max_entries, size_t num_maps, + std::chrono::seconds max_age) +{ + cripts::Time::Point zero = cripts::Time::Point{}; + cripts::Time::Point now = cripts::Time::Clock::now(); + + _name = name; + _num_maps = num_maps; + _max_entries = max_entries; + _max_age = max_age; + + _base_dir = base_dir + "/" + _name; + _log_path = _base_dir + "/" + "txn.log"; + + if (!std::filesystem::exists(_base_dir)) { + std::error_code ec; + + std::filesystem::create_directories(_base_dir, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to create directory `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } else { + std::filesystem::permissions(_base_dir, std::filesystem::perms::group_write, std::filesystem::perm_options::add, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to set permissions on `%s': %s", _base_dir.c_str(), ec.message().c_str()); + } + } + } + + for (size_t ix = 0; ix < _num_maps; ++ix) { + _slots.emplace_back(_MapSlot{.map = std::make_unique<_MapType>(), + .path = _base_dir + "/map_" + std::to_string(ix) + ".bin", + .created = zero, + .last_write = zero, + .last_sync = zero}); + } + + _slots[0].created = now; + _slots[0].last_write = now; + + LoadFromDisk(); + _last_sync = now; +} + +void +Cache::Group::Insert(cripts::string_view key) +{ + static const std::hash hasher; + + // Trim any "'s and white spaces from the key + key.trim_if(&isspace).trim('"'); + + auto now = cripts::Time::Clock::now(); + auto hash = static_cast(hasher(key)); + uint32_t prefix = _make_prefix_int(key); + + std::unique_lock lock(_mutex); + + auto &slot = _slots[_map_index]; + auto it = slot.map->find(hash); + + if (it != slot.map->end() && it->second.length == key.size() && it->second.prefix == prefix) { + it->second.timestamp = now; + slot.last_write = now; + appendLog(it->second); + + return; + } + + Cache::Group::_Entry entry{.timestamp = now, .length = key.size(), .prefix = prefix, .hash = hash}; + + slot.map->insert_or_assign(hash, entry); + slot.last_write = now; + appendLog(entry); + + if (slot.map->size() > _max_entries || (now - slot.created) > _max_age) { + _map_index = (_map_index + 1) % _slots.size(); + + auto &next_slot = _slots[_map_index]; + + if (next_slot.last_write > next_slot.last_sync) { + TSWarning("cripts::Cache::Group: Rotating unsynced map for `%s'! This may lead to data loss.", _name.c_str()); + } + next_slot.map->clear(); + next_slot.created = now; + next_slot.last_write = now; + syncMap(_map_index); // Sync to disk will make sure it's empty on disk + } +} + +void +Cache::Group::Insert(const std::vector &keys) +{ + for (auto &key : keys) { + Insert(key); + } +} + +bool +Cache::Group::Lookup(cripts::string_view key, cripts::Time::Point age) const +{ + // Trim any "'s and whitespaces from the key + key.trim_if(&isspace).trim('"'); + + uint64_t hash = static_cast(std::hash{}(key)); + std::shared_lock lock(_mutex); + + for (size_t i = 0; i < _slots.size(); ++i) { + size_t map_index = (_map_index + _slots.size() - i) % _slots.size(); + const auto &slot = _slots[map_index]; + + if (slot.last_write < age) { + continue; // Skip maps that haven't been written to since this time + } + + const auto &map = *slot.map; + auto it = map.find(hash); + + if (it != map.end()) { + const Cache::Group::_Entry &entry = it->second; + + if (entry.timestamp < age || entry.length != key.size() || entry.prefix != _make_prefix_int(key)) { + continue; + } + + return true; + } + } + + return false; +} + +bool +Cache::Group::Lookup(const std::vector &keys, cripts::Time::Point age) const +{ + for (auto &key : keys) { + if (Lookup(key, age)) { + return true; + } + } + + return false; +} + +void +Cache::Group::LoadFromDisk() +{ + std::unique_lock lock(_mutex); + std::ifstream log(_log_path, std::ios::binary); + std::unordered_set loaded_hashes; + + for (size_t slot_ix = 0; slot_ix < _slots.size(); ++slot_ix) { + auto &slot = _slots[slot_ix]; + uint64_t version_id = 0; + _MapHeader header{}; + std::ifstream file(slot.path, std::ios::binary); + + if (!file) { + continue; + } + + file.read(reinterpret_cast(&version_id), sizeof(version_id)); + if (version_id != VERSION) { + TSWarning("cripts::Cache::Group: Version mismatch for map file: %s, expected %llu, got %llu. Skipping this map.", + slot.path.c_str(), static_cast(VERSION), static_cast(version_id)); + continue; + } + + file.read(reinterpret_cast(&header), sizeof(header)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read header from map file: %s. Skipping this map.", slot.path.c_str()); + continue; + } + + slot.created = cripts::Time::Clock::from_time_t(header.created_ts); + slot.last_write = cripts::Time::Clock::from_time_t(header.last_write_ts); + slot.last_sync = cripts::Time::Clock::from_time_t(header.last_sync_ts); + + for (size_t i = 0; i < header.count; ++i) { + Cache::Group::_Entry entry; + + file.read(reinterpret_cast(&entry), sizeof(entry)); + if (!file) { + TSWarning("cripts::Cache::Group: Failed to read entry %zu from map file: %s. Stopping entry load.", i, slot.path.c_str()); + break; + } + if (!loaded_hashes.contains(entry.hash)) { + slot.map->insert_or_assign(entry.hash, entry); + loaded_hashes.insert(entry.hash); + } + } + } + + // Sort the slots by creation time, newest first, since we'll start with index 0 upon loading + std::ranges::sort(_slots, [](const _MapSlot &a, const _MapSlot &b) { return a.created > b.created; }); + + // Replay any entries from the transaction log, and then truncate it + if (log) { + Cache::Group::_Entry entry; + auto last_write = cripts::Time::Clock::from_time_t(0); + + while (log.read(reinterpret_cast(&entry), sizeof(entry))) { + _slots[0].map->insert_or_assign(entry.hash, entry); + last_write = std::max(last_write, entry.timestamp); + } + _slots[0].last_write = last_write; + clearLog(); + } +} + +void +Cache::Group::WriteToDisk() +{ + std::unique_lock lock(_mutex); + + auto now = cripts::Time::Clock::now(); + bool any_dirty = false; + bool all_synced = true; + + for (size_t ix = 0; ix < _slots.size(); ++ix) { + if (_slots[ix].last_write > _slots[ix].last_sync) { + auto old_sync = _slots[ix].last_sync; + any_dirty = true; + _slots[ix].last_sync = now; + if (syncMap(ix)) { + _last_sync = now; + } else { + _slots[ix].last_sync = old_sync; // revert so next call retries + all_synced = false; + } + } + } + + if (any_dirty && all_synced) { + clearLog(); + } +} + +void +Cache::Group::appendLog(const Cache::Group::_Entry &entry) +{ + if (!_txn_log.is_open() || !_txn_log.good()) { + _txn_log.open(_log_path, std::ios::app | std::ios::out | std::ios::binary); + if (!_txn_log) { + TSWarning("cripts::Cache::Group: Failed to open transaction log `%s'.", _log_path.c_str()); + return; + } + } + + _txn_log.write(reinterpret_cast(&entry), sizeof(entry)); + _txn_log.flush(); +} + +bool +Cache::Group::syncMap(size_t index) +{ + constexpr size_t BUFFER_SIZE = 64 * 1024; + std::array buffer; + size_t buf_pos = 0; + bool write_failed = false; + const auto &slot = _slots[index]; + const std::string tmp_path = slot.path + ".tmp"; + std::ofstream tmp_file(tmp_path, std::ios::binary | std::ios::trunc); + + if (!tmp_file) { + TSWarning("cripts::Cache::Group: Failed to open temp file for sync: %s.", tmp_path.c_str()); + return false; + } + + auto _appendToBuffer = [&](const void *data, size_t size) { + if (write_failed) { + return; + } + if (buf_pos + size > buffer.size()) { + tmp_file.write(reinterpret_cast(buffer.data()), buf_pos); + if (!tmp_file) { + write_failed = true; + return; + } + buf_pos = 0; + } + std::memcpy(buffer.data() + buf_pos, static_cast(data), size); + buf_pos += size; + }; + + _MapHeader header{.created_ts = cripts::Time::Clock::to_time_t(slot.created), + .last_write_ts = cripts::Time::Clock::to_time_t(slot.last_write), + .last_sync_ts = cripts::Time::Clock::to_time_t(slot.last_sync), + .count = slot.map->size()}; + + _appendToBuffer(&VERSION, sizeof(VERSION)); + _appendToBuffer(&header, sizeof(header)); + + for (const auto &[_, entry] : *slot.map) { + _appendToBuffer(&entry, sizeof(entry)); + } + + if (buf_pos > 0 && !write_failed) { + tmp_file.write(reinterpret_cast(buffer.data()), buf_pos); + } + tmp_file.flush(); + tmp_file.close(); + + if (write_failed || !tmp_file) { + TSWarning("cripts::Cache::Group: Failed to write to temp file `%s'.", tmp_path.c_str()); + std::error_code ec; + std::filesystem::remove(tmp_path, ec); + return false; + } + + if (std::rename(tmp_path.c_str(), slot.path.c_str()) != 0) { + TSWarning("cripts::Cache::Group: Failed to rename temp file `%s' to `%s'.", tmp_path.c_str(), slot.path.c_str()); + std::error_code ec; + std::filesystem::remove(tmp_path, ec); + return false; + } + + return true; +} + +void +Cache::Group::clearLog() +{ + std::error_code ec; + + _txn_log.close(); + std::filesystem::remove(_log_path, ec); + if (ec) { + TSWarning("cripts::Cache::Group: Failed to clear transaction log `%s': %s", _log_path.c_str(), ec.message().c_str()); + } +} + +// Singleton instance for the Cache::Group::Manager +Cache::Group::Manager & +Cache::Group::Manager::_instance() +{ + static Cache::Group::Manager inst; + return inst; +} + +void * +Cache::Group::Manager::Factory(const std::string &name, size_t max_entries, size_t num_maps) +{ + std::lock_guard lock(_instance()._mutex); + auto &groups = _instance()._groups; + + if (auto it = groups.find(name); it != groups.end()) { + if (auto group = it->second.lock()) { + return new std::shared_ptr(std::move(group)); + } + } + + if (!_instance()._base_dir.empty()) { + auto group = std::make_shared(name, _instance()._base_dir, max_entries, num_maps); + + groups[name] = group; + return new std::shared_ptr(std::move(group)); + } else { + TSError("cripts::Cache::Group: Failed to get runtime directory for initialization."); + return nullptr; + } +} + +void +Cache::Group::Manager::_scheduleCont() +{ + if (!_cont) { + _cont = TSContCreate(_cripts_cache_group_sync, TSMutexCreate()); + TSContDataSet(_cont, this); + } + + if (_action) { + TSActionCancel(_action); + _action = nullptr; + } + + _action = TSContScheduleEveryOnPool(_cont, _CONT_SYNC_INTERVAL * 1000, TS_THREAD_POOL_TASK); +} + +} // namespace cripts diff --git a/src/cripts/unit_tests/stub.cc b/src/cripts/unit_tests/stub.cc new file mode 100644 index 00000000000..65680db4a19 --- /dev/null +++ b/src/cripts/unit_tests/stub.cc @@ -0,0 +1,78 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Stubs for the ATS plugin API symbols referenced by CacheGroup.cc. +// Only the Group class (not Manager) is exercised in unit tests, so the +// TSCont/TSAction/TSMutex stubs never execute — they just satisfy the linker. + +#include "ts/ts.h" + +void +TSWarning(const char * /* fmt */, ...) +{ +} + +void +TSError(const char * /* fmt */, ...) +{ +} + +TSCont +TSContCreate(TSEventFunc, TSMutex) +{ + return nullptr; +} + +void +TSContDataSet(TSCont, void *) +{ +} + +void * +TSContDataGet(TSCont) +{ + return nullptr; +} + +TSMutex +TSMutexCreate() +{ + return nullptr; +} + +TSAction +TSContScheduleEveryOnPool(TSCont, TSHRTime, TSThreadPool) +{ + return nullptr; +} + +void +TSActionCancel(TSAction) +{ +} + +void +TSContDestroy(TSCont) +{ +} + +const char * +TSRuntimeDirGet() +{ + return "/tmp"; +} diff --git a/src/cripts/unit_tests/test_CacheGroup.cc b/src/cripts/unit_tests/test_CacheGroup.cc new file mode 100644 index 00000000000..aa46e01162f --- /dev/null +++ b/src/cripts/unit_tests/test_CacheGroup.cc @@ -0,0 +1,224 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "cripts/CacheGroup.hpp" + +#include +#include +#include + +// RAII temp directory that cleans up after each test +struct TempDir { + std::filesystem::path path; + + TempDir() + { + path = std::filesystem::temp_directory_path() / + ("cg_test_" + std::to_string(std::chrono::steady_clock::now().time_since_epoch().count())); + std::filesystem::create_directories(path); + } + + ~TempDir() { std::filesystem::remove_all(path); } + + std::string + str() const + { + return path.string(); + } +}; + +TEST_CASE("CacheGroup: basic insert and lookup", "[cripts][CacheGroup]") +{ + TempDir dir; + cripts::Cache::Group g("test", dir.str()); + + g.Insert("url1"); + g.Insert("url2"); + + auto epoch = cripts::Time::Clock::from_time_t(0); + CHECK(g.Lookup("url1", epoch)); + CHECK(g.Lookup("url2", epoch)); + CHECK_FALSE(g.Lookup("url3", epoch)); +} + +TEST_CASE("CacheGroup: persist and reload", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str()); + g.Insert("key_a"); + g.Insert("key_b"); + g.WriteToDisk(); + } + + cripts::Cache::Group g2("test", dir.str()); + CHECK(g2.Lookup("key_a", epoch)); + CHECK(g2.Lookup("key_b", epoch)); + CHECK_FALSE(g2.Lookup("key_c", epoch)); +} + +TEST_CASE("CacheGroup: transaction log replay on restart", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str()); + g.Insert("persisted"); + g.WriteToDisk(); + + // Insert more keys — these go to the txn log but maps are not re-synced + g.Insert("in_log_only"); + } + // The txn log should still exist since WriteToDisk was not called after the second Insert + + // Reload: log should be replayed + cripts::Cache::Group g2("test", dir.str()); + CHECK(g2.Lookup("persisted", epoch)); + CHECK(g2.Lookup("in_log_only", epoch)); +} + +TEST_CASE("CacheGroup: corrupt map file is skipped", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("good_key"); + g.WriteToDisk(); + } + + // Corrupt the first map file + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + std::ofstream corrupt(map_path, std::ios::binary | std::ios::trunc); + corrupt << "JUNK_DATA_GARBAGE"; + } + + // Reload — corrupt map should be skipped; good_key is lost (log was cleared by + // WriteToDisk), but the group must still accept new inserts without crashing. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("good_key", epoch)); + g2.Insert("new_key"); + CHECK(g2.Lookup("new_key", epoch)); +} + +TEST_CASE("CacheGroup: truncated map file is handled gracefully", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("truncated_key"); + g.WriteToDisk(); + } + + // Truncate the map file to just the version field (incomplete header) + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + auto size = std::filesystem::file_size(map_path); + if (size > sizeof(uint64_t)) { + std::filesystem::resize_file(map_path, sizeof(uint64_t) + 1); // version + 1 byte of header + } + } + + // Reload — truncated header should be skipped; truncated_key is lost (log was + // cleared by WriteToDisk), but the group must recover and accept new inserts. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("truncated_key", epoch)); + g2.Insert("after_truncation"); + CHECK(g2.Lookup("after_truncation", epoch)); +} + +TEST_CASE("CacheGroup: wrong version map file is skipped", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + { + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("versioned_key"); + g.WriteToDisk(); + } + + // Overwrite the version field with a bad value + auto map_path = dir.path / "test" / "map_0.bin"; + if (std::filesystem::exists(map_path)) { + std::fstream f(map_path, std::ios::in | std::ios::out | std::ios::binary); + uint64_t bad_version = 0xDEADBEEFCAFEBABEULL; + f.write(reinterpret_cast(&bad_version), sizeof(bad_version)); + } + + // Reload — version mismatch should be skipped; versioned_key is lost (log was + // cleared by WriteToDisk), but the group must recover and accept new inserts. + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK_FALSE(g2.Lookup("versioned_key", epoch)); + g2.Insert("post_version_check"); + CHECK(g2.Lookup("post_version_check", epoch)); +} + +TEST_CASE("CacheGroup: WriteToDisk does not clear log on sync failure", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + cripts::Cache::Group g("test", dir.str(), 1024, 2); + g.Insert("before_fail"); + g.WriteToDisk(); // initial successful sync + log clear + + g.Insert("after_initial_sync"); + + // Make the map directory read-only so syncMap will fail on rename + auto group_dir = dir.path / "test"; + std::filesystem::permissions(group_dir, std::filesystem::perms::owner_read | std::filesystem::perms::owner_exec); + + g.WriteToDisk(); // should fail to sync; log must NOT be cleared + + // Restore permissions so cleanup works + std::filesystem::permissions(group_dir, std::filesystem::perms::owner_all); + + // The txn log should still contain "after_initial_sync" — verify via reload + cripts::Cache::Group g2("test", dir.str(), 1024, 2); + CHECK(g2.Lookup("before_fail", epoch)); + CHECK(g2.Lookup("after_initial_sync", epoch)); +} + +TEST_CASE("CacheGroup: map rotation writes empty map to disk", "[cripts][CacheGroup]") +{ + TempDir dir; + auto epoch = cripts::Time::Clock::from_time_t(0); + + // max_entries=2 to trigger rotation after 2 inserts + { + cripts::Cache::Group g("test", dir.str(), 2, 3); + g.Insert("key1"); + g.Insert("key2"); + g.Insert("key3"); // triggers rotation + g.WriteToDisk(); + } + + cripts::Cache::Group g2("test", dir.str(), 2, 3); + // All three keys were in slot 0 at WriteToDisk time and survive the reload. + CHECK(g2.Lookup("key1", epoch)); + CHECK(g2.Lookup("key2", epoch)); + CHECK(g2.Lookup("key3", epoch)); +}