From 1e98a87b2f8daf09b36f98056992d4ca7aaf54cb Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 21 Jan 2026 12:04:40 +0000 Subject: [PATCH 01/34] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/cmd_cuckoo_filter.cc | 109 ++++++++++++++++ src/storage/redis_metadata.cc | 44 +++++++ src/storage/redis_metadata.h | 37 +++++- src/types/cuckoo_filter.h | 61 +++++++++ src/types/redis_cuckoo_chain.cc | 113 +++++++++++++++++ src/types/redis_cuckoo_chain.h | 53 ++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 147 ++++++++++++++++++++++ 7 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 src/commands/cmd_cuckoo_filter.cc create mode 100644 src/types/cuckoo_filter.h create mode 100644 src/types/redis_cuckoo_chain.cc create mode 100644 src/types/redis_cuckoo_chain.h create mode 100644 tests/cppunit/types/cuckoo_filter_test.cc diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc new file mode 100644 index 00000000000..eb5e6ce3c94 --- /dev/null +++ b/src/commands/cmd_cuckoo_filter.cc @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "command_parser.h" +#include "commander.h" +#include "error_constants.h" +#include "server/server.h" +#include "types/redis_cuckoo_chain.h" + +namespace redis { + +class CommandCFReserve : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] + if (args.size() < 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + + // Parse capacity (required) + auto parse_capacity = ParseInt(args[2], 10); + if (!parse_capacity) { + return {Status::RedisParseErr, "invalid capacity"}; + } + capacity_ = *parse_capacity; + if (capacity_ <= 0) { + return {Status::RedisParseErr, "capacity must be larger than 0"}; + } + + // Parse optional parameters + CommandParser parser(args, 3); + while (parser.Good()) { + if (parser.EatEqICase("BUCKETSIZE")) { + auto parse_bucket_size = parser.TakeInt(); + if (!parse_bucket_size.IsOK()) { + return {Status::RedisParseErr, "invalid bucket size"}; + } + bucket_size_ = parse_bucket_size.GetValue(); + if (bucket_size_ == 0 || bucket_size_ > 255) { + return {Status::RedisParseErr, "bucket size must be between 1 and 255"}; + } + } else if (parser.EatEqICase("MAXITERATIONS")) { + auto parse_max_iterations = parser.TakeInt(); + if (!parse_max_iterations.IsOK()) { + return {Status::RedisParseErr, "invalid max iterations"}; + } + max_iterations_ = parse_max_iterations.GetValue(); + if (max_iterations_ == 0) { + return {Status::RedisParseErr, "max iterations must be larger than 0"}; + } + } else if (parser.EatEqICase("EXPANSION")) { + auto parse_expansion = parser.TakeInt(); + if (!parse_expansion.IsOK()) { + return {Status::RedisParseErr, "invalid expansion factor"}; + } + expansion_ = parse_expansion.GetValue(); + } else { + return {Status::RedisParseErr, errInvalidSyntax}; + } + } + + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + + if (!s.ok()) { + if (s.IsInvalidArgument()) { + // Return error message to client + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to create cuckoo filter"}; + } + + *output = redis::SimpleString("OK"); + return Status::OK(); + } + + private: + uint64_t capacity_ = kCFDefaultCapacity; + uint8_t bucket_size_ = kCFDefaultBucketSize; + uint16_t max_iterations_ = kCFDefaultMaxIterations; + uint8_t expansion_ = kCFDefaultExpansion; +}; + +// Register the CF.RESERVE command +REDIS_REGISTER_COMMANDS(CuckooFilter, + MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + +} // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 16fb190fe26..a004917f329 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -567,3 +567,47 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } + + +void CuckooChainMetadata::Encode(std::string *dst) const { + Metadata::Encode(dst); + + PutFixed16(dst, n_filters); + PutFixed16(dst, expansion); + PutFixed64(dst, base_capacity); + PutFixed8(dst, bucket_size); + PutFixed16(dst, max_iterations); + PutFixed64(dst, num_deleted_items); +} + +rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { + if (auto s = Metadata::Decode(input); !s.ok()) { + return s; + } + + if (input->size() < 21) { + return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); + } + + GetFixed16(input, &n_filters); + GetFixed16(input, &expansion); + GetFixed64(input, &base_capacity); + GetFixed8(input, &bucket_size); + GetFixed16(input, &max_iterations); + GetFixed64(input, &num_deleted_items); + + return rocksdb::Status::OK(); +} + +uint64_t CuckooChainMetadata::GetTotalCapacity() const { + if (expansion == 0 || n_filters == 1) { + return base_capacity; + } + + // Calculate total capacity across all filters + uint64_t total = 0; + for (uint16_t i = 0; i < n_filters; i++) { + total += base_capacity * std::pow(expansion, i); + } + return total; +} diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 2db3e9d0cec..4bdd66b500b 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -54,12 +54,13 @@ enum RedisType : uint8_t { kRedisHyperLogLog = 11, kRedisTDigest = 12, kRedisTimeSeries = 13, + kRedisCuckooFilter = 14, kRedisTypeMax }; inline constexpr const std::array RedisTypeNames = { "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries"}; + "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -306,6 +307,40 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +class CuckooChainMetadata : public Metadata { + public: + /// The number of sub-filters in the chain + uint16_t n_filters; + + /// Expansion factor for new filters + /// When a filter is full, a new one is created with capacity = base_capacity * expansion^n + uint16_t expansion; + + /// The capacity of the first filter + uint64_t base_capacity; + + /// Number of fingerprints per bucket + uint8_t bucket_size; + + /// Maximum number of cuckoo kicks before considering filter full + uint16_t max_iterations; + + /// Track number of deleted items for maintenance + uint64_t num_deleted_items; + + explicit CuckooChainMetadata(bool generate_version = true) + : Metadata(kRedisCuckooFilter, generate_version), + n_filters(0), expansion(0), base_capacity(0), + bucket_size(0), max_iterations(0), num_deleted_items(0) {} + + void Encode(std::string *dst) const override; + using Metadata::Decode; + rocksdb::Status Decode(Slice *input) override; + + uint64_t GetTotalCapacity() const; + bool IsScaling() const { return expansion > 0; } +}; + enum class JsonStorageFormat : uint8_t { JSON = 0, CBOR = 1, diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h new file mode 100644 index 00000000000..f305f8c4480 --- /dev/null +++ b/src/types/cuckoo_filter.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include +#include + +namespace redis { + +// Cuckoo filter implementation from the paper: +// "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. +// This is a bucket-based storage implementation where each bucket is stored +// as an independent key-value pair in RocksDB +class CuckooFilter { + public: + // Calculate the optimal number of buckets for the filter + static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { + // A load factor of 95.5% is chosen for the cuckoo filter + uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + // Round up to next power of 2 for better hash distribution + if (num_buckets == 0) num_buckets = 1; + uint32_t power = 1; + while (power < num_buckets) power <<= 1; + return power; + } + + // Generate fingerprint from hash (8-bit fingerprint, non-zero) + static uint8_t GenerateFingerprint(uint64_t hash) { + uint8_t fp = hash & 0xFF; + return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + } + + // Calculate alternate bucket index using XOR + static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { + // Use a simple hash of the fingerprint for the XOR operation + uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant + return (bucket_idx ^ fp_hash) % num_buckets; + } +}; + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc new file mode 100644 index 00000000000..5f75e57fdc8 --- /dev/null +++ b/src/types/redis_cuckoo_chain.cc @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "redis_cuckoo_chain.h" + +#include + +#include + +#include "cuckoo_filter.h" + +namespace redis { + +rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); +} + +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index) { + // Create a sub-key that includes both filter index and bucket index + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, bucket_index); + + // Create the internal key using the storage encoding + std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + return bucket_key; +} + +rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, + uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + // Validate parameters + if (capacity == 0) { + return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); + } + + if (bucket_size == 0 || bucket_size > 255) { + return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); + } + + if (max_iterations == 0) { + return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); + } + + std::string ns_key = AppendNamespacePrefix(user_key); + + // Check if the key already exists + CuckooChainMetadata metadata; + rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (s.ok()) { + return rocksdb::Status::InvalidArgument("the key already exists"); + } + if (!s.IsNotFound()) { + return s; // Return other errors + } + + // Initialize metadata for the new cuckoo filter + metadata.size = 0; + metadata.base_capacity = capacity; + metadata.bucket_size = bucket_size; + metadata.max_iterations = max_iterations; + metadata.expansion = expansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + + // Calculate the number of buckets needed for this filter + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity + << ", bucket_size=" << bucket_size + << ", num_buckets=" << num_buckets + << ", max_iterations=" << max_iterations + << ", expansion=" << static_cast(expansion); + + // Create a write batch for atomic operation + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + // Store the metadata + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + // Note: With bucket-based storage, we don't pre-allocate all buckets + // Buckets will be created lazily on first write + // This saves memory for sparse filters + + // Optionally, we could create the first few buckets to ensure the filter is ready + // But for now, we'll keep it fully lazy for maximum memory efficiency + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h new file mode 100644 index 00000000000..f606f5f8c39 --- /dev/null +++ b/src/types/redis_cuckoo_chain.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" + +namespace redis { + +// Default values for Cuckoo Filter +const uint32_t kCFDefaultCapacity = 1024; +const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket +const uint16_t kCFDefaultMaxIterations = 500; +const uint8_t kCFDefaultExpansion = 2; + +class CuckooChain : public Database { + public: + CuckooChain(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} + + // CF.RESERVE command - creates a new cuckoo filter with specified capacity + rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint8_t expansion); + + private: + // Get metadata for the cuckoo filter + rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + + // Generate key for a specific bucket in bucket-based storage + // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index); +}; + +} // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc new file mode 100644 index 00000000000..8a4a43c0f8f --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include +#include + +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooFilterTest : public TestBase { + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), namespace_); + } + ~RedisCuckooFilterTest() override = default; + + void SetUp() override { + key_ = "test_cuckoo_filter_key"; + } + + void TearDown() override { + [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + } + + std::unique_ptr cuckoo_; + std::string key_; +}; + +TEST_F(RedisCuckooFilterTest, ReserveBasic) { + // Test basic reserve operation + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { + // First reserve should succeed + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Second reserve with same key should fail + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + // Test with zero capacity + auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero bucket size + s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero max iterations + s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { + // Test with different capacities + std::vector capacities = {100, 1000, 10000, 100000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { + // Test with different valid bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); + } +} + +TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { + // Test the static helper function + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + // Should be a power of 2 + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + // Should be able to hold the capacity with 95.5% load factor + uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; +} + +TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { + // Test fingerprint generation ensures non-zero values + for (uint64_t hash = 0; hash < 256; ++hash) { + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); + ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + } +} + +TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { + uint32_t num_buckets = 1024; + + // Test that alternate bucket is different from original + for (uint32_t bucket = 0; bucket < 100; ++bucket) { + for (uint8_t fp = 1; fp < 10; ++fp) { + uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); + ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + + // Applying alternate twice should give back original + uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); + ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + } + } +} \ No newline at end of file From 577ee71e710f18297b0affdcb8f22872bbba1d9b Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Thu, 22 Jan 2026 10:42:41 +0000 Subject: [PATCH 02/34] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/commander.h | 1 + src/types/redis_cuckoo_chain.cc | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/commands/commander.h b/src/commands/commander.h index 8b012ff27f1..d34129f58c4 100644 --- a/src/commands/commander.h +++ b/src/commands/commander.h @@ -92,6 +92,7 @@ enum class CommandCategory : uint8_t { Bit, BloomFilter, Cluster, + CuckooFilter, Function, Geo, Hash, diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 5f75e57fdc8..803fd5ffb7d 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,10 +20,10 @@ #include "redis_cuckoo_chain.h" -#include - #include +#include "logging.h" + #include "cuckoo_filter.h" namespace redis { @@ -84,11 +84,8 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity - << ", bucket_size=" << bucket_size - << ", num_buckets=" << num_buckets - << ", max_iterations=" << max_iterations - << ", expansion=" << static_cast(expansion); + info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); From 77b4c4ad612d907da268dfc8947a1edb7507bde6 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:16:45 +0000 Subject: [PATCH 03/34] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/cuckoo_filter.h | 41 +++++++++++--- src/types/redis_cuckoo_chain.cc | 9 ++- tests/cppunit/types/cuckoo_filter_test.cc | 68 +++++++++++++++++++---- 3 files changed, 98 insertions(+), 20 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index f305f8c4480..eb10eba4758 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -25,12 +25,20 @@ #include #include +#include "vendor/murmurhash2.h" + namespace redis { // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. // This is a bucket-based storage implementation where each bucket is stored // as an independent key-value pair in RocksDB +// +// Hash calculation follows RedisBloom's design: +// - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) +// - h1 = hash (primary hash) +// - h2 = h1 ^ (fp * 0x5bd1e995) (alternate hash via XOR) +// - bucket_index = hash % num_buckets (only apply modulo when indexing) class CuckooFilter { public: // Calculate the optimal number of buckets for the filter @@ -44,18 +52,37 @@ class CuckooFilter { return power; } - // Generate fingerprint from hash (8-bit fingerprint, non-zero) + // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) + // Following RedisBloom: fp = hash % 255 + 1 static uint8_t GenerateFingerprint(uint64_t hash) { - uint8_t fp = hash & 0xFF; - return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + return static_cast(hash % 255 + 1); + } + + // Calculate alternate hash using XOR (following RedisBloom) + // h2 = h1 ^ (fp * 0x5bd1e995) + // This preserves symmetry: GetAltHash(fp, GetAltHash(fp, h)) == h + static uint64_t GetAltHash(uint8_t fingerprint, uint64_t hash) { + return hash ^ (static_cast(fingerprint) * 0x5bd1e995); } - // Calculate alternate bucket index using XOR + // Legacy function for backward compatibility with tests + // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { - // Use a simple hash of the fingerprint for the XOR operation - uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant - return (bucket_idx ^ fp_hash) % num_buckets; + // Treat bucket_idx as a hash value for the calculation + uint64_t hash = bucket_idx; + uint64_t alt_hash = GetAltHash(fingerprint, hash); + // Convert back to bucket index + return static_cast(alt_hash % num_buckets); } + + // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) + // This is the entry point for hashing items before they are inserted/checked in the filter + static uint64_t Hash(const char* data, size_t length) { + return HllMurMurHash64A(data, static_cast(length), 0); + } + + // Convenience overload for std::string + static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } }; } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 803fd5ffb7d..952af9f2a9a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -63,8 +63,10 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key std::string ns_key = AppendNamespacePrefix(user_key); // Check if the key already exists - CuckooChainMetadata metadata; - rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + // Read without snapshot to ensure we see any committed data + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); if (s.ok()) { return rocksdb::Status::InvalidArgument("the key already exists"); } @@ -72,6 +74,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return s; // Return other errors } + // Initialize metadata for the new cuckoo filter + CuckooChainMetadata metadata; + // Initialize metadata for the new cuckoo filter metadata.size = 0; metadata.base_capacity = capacity; diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 8a4a43c0f8f..69183713cd8 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -29,7 +29,7 @@ class RedisCuckooFilterTest : public TestBase { protected: explicit RedisCuckooFilterTest() : TestBase() { - cuckoo_ = std::make_unique(storage_.get(), namespace_); + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } ~RedisCuckooFilterTest() override = default; @@ -123,25 +123,71 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { } TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { - // Test fingerprint generation ensures non-zero values - for (uint64_t hash = 0; hash < 256; ++hash) { + // Test fingerprint generation ensures non-zero values in range [1, 255] + // Following RedisBloom: fp = hash % 255 + 1 + for (uint64_t hash = 0; hash < 1000; ++hash) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); - ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } + + // Verify the formula: fp = hash % 255 + 1 + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(0), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(254), 255); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(255), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(256), 2); } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { uint32_t num_buckets = 1024; - // Test that alternate bucket is different from original - for (uint32_t bucket = 0; bucket < 100; ++bucket) { + // Test GetAltHash symmetry at hash level (following RedisBloom design) + // h2 = GetAltHash(fp, h1) + // h1 = GetAltHash(fp, h2) <- this is the symmetry property + for (uint64_t hash = 0; hash < 100; ++hash) { for (uint8_t fp = 1; fp < 10; ++fp) { - uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); - ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; - // Applying alternate twice should give back original - uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); - ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; } } +} + +TEST_F(RedisCuckooFilterTest, HashFunction) { + // Test that Hash function produces consistent 64-bit values + std::string test_item = "hello"; + uint64_t hash1 = redis::CuckooFilter::Hash(test_item); + uint64_t hash2 = redis::CuckooFilter::Hash(test_item.data(), test_item.size()); + + // Both methods should produce the same result + ASSERT_EQ(hash1, hash2) << "Hash methods should be consistent"; + + // Hash should be deterministic + uint64_t hash3 = redis::CuckooFilter::Hash(test_item); + ASSERT_EQ(hash1, hash3) << "Hash should be deterministic"; + + // Different items should produce different hashes (with high probability) + uint64_t hash_world = redis::CuckooFilter::Hash("world"); + ASSERT_NE(hash1, hash_world) << "Different items should have different hashes"; + + // Empty string produces hash value 0 (this is expected with MurmurHash) + uint64_t hash_empty = redis::CuckooFilter::Hash(""); + ASSERT_EQ(hash_empty, 0) << "Empty string should produce hash value 0 with MurmurHash"; + + // Even with hash=0, fingerprint should be non-zero + uint8_t fp_empty = redis::CuckooFilter::GenerateFingerprint(hash_empty); + ASSERT_EQ(fp_empty, 1) << "Fingerprint of hash=0 should be 1 (0 % 255 + 1)"; + + // Test that hash can be used with fingerprint generation + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } \ No newline at end of file From 025ee864df24a0e3f7504e1f204b763a3d0e9683 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:58:29 +0000 Subject: [PATCH 04/34] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/redis_cuckoo_chain.cc | 6 + tests/cppunit/types/cuckoo_filter_test.cc | 172 +++++++++++++++++++++- 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 952af9f2a9a..8ab69e38555 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -52,6 +52,12 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } + // RedisBloom requires minimum capacity to ensure at least one bucket can be created + // With load factor 0.955, capacity=1 and bucket_size=4 results in 0 buckets + if (capacity < 2) { + return rocksdb::Status::InvalidArgument("capacity must be at least 2"); + } + if (bucket_size == 0 || bucket_size > 255) { return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); } diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 69183713cd8..56000f930d6 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -31,14 +31,17 @@ class RedisCuckooFilterTest : public TestBase { explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } - ~RedisCuckooFilterTest() override = default; - - void SetUp() override { - key_ = "test_cuckoo_filter_key"; + ~RedisCuckooFilterTest() override { + // Ensure cuckoo_ is destroyed before storage_ + cuckoo_.reset(); } - void TearDown() override { - [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + void SetUp() override { + // Use a unique key for each test to avoid conflicts + // Include test name to make debugging easier + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + key_ = std::string("cf_test_") + test_info->name(); } std::unique_ptr cuckoo_; @@ -190,4 +193,161 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; +} + + +TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { + // Test capacity = 1 (too small, following RedisBloom behavior) + // With load factor 0.955 and bucket_size=4, this would result in 0 buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); + ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { + // Test valid upper boundary (bucket_size is uint8_t, max = 255) + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; + + // Test bucket_size = 1 (minimum valid value) + s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; + + // Test common power-of-2 bucket sizes + std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; + for (size_t i = 0; i < valid_sizes.size(); ++i) { + std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + // Create the filter + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + + // Verify metadata was stored by trying to reserve again with same key + // This should fail with "already exists" error + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; + ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) + << "Error message should mention 'already exists'"; + + // Verify we can still create filters with different keys + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + + // Verify the original key still exists (can't create it again) + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Original key should still exist"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + + +TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { + // expansion=0 means no auto-expansion when filter is full + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); + ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; + + // Verify different expansion values + std::vector expansions = {0, 1, 2, 4, 8}; + for (size_t i = 0; i < expansions.size(); ++i) { + std::string test_key = "key_exp_" + std::to_string(expansions[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); + ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { + // Test with very large capacity + uint64_t large_capacity = 10000000; // 10 million + auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle large capacity"; + + // Verify num_buckets calculation doesn't overflow + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; + + // Test even larger capacity (100 million) + uint64_t huge_capacity = 100000000; + s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; + + num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { + // Test different max_iterations values + std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; + + for (size_t i = 0; i < iterations.size(); ++i) { + std::string test_key = "key_iter_" + std::to_string(iterations[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); + ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; + } + + // Test max_iterations = 0 (should fail) + auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; +} + +TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { + // Test minimum valid capacity + auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; + + // Test small but valid capacities + std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; + for (size_t i = 0; i < small_capacities.size(); ++i) { + std::string test_key = "small_" + std::to_string(small_capacities[i]); + s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; + + // Verify at least one bucket is created + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { + // Test various parameter combinations to ensure robustness + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint8_t expansion; + bool should_succeed; + std::string description; + }; + + std::vector test_cases = { + {1000, 4, 500, 2, true, "Standard parameters"}, + {2, 1, 1, 0, true, "Minimum all parameters"}, + {100000, 255, 65535, 255, true, "Maximum all parameters"}, + {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, + {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, + {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, + }; + + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& tc = test_cases[i]; + std::string test_key = "combo_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); + + if (tc.should_succeed) { + ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; + } else { + ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; + } + } } \ No newline at end of file From 1fe2c6fb448106b53046b728dcf0276c35912429 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 2 Feb 2026 11:44:33 +0000 Subject: [PATCH 05/34] feat: Implement CF.ADD command with kick-out insertion --- src/commands/cmd_cuckoo_filter.cc | 37 ++- src/types/redis_cuckoo_chain.cc | 307 +++++++++++++++++++++- src/types/redis_cuckoo_chain.h | 12 + tests/cppunit/types/cuckoo_filter_test.cc | 212 +++++++++++++++ 4 files changed, 565 insertions(+), 3 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index eb5e6ce3c94..82364a60a37 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -102,8 +102,41 @@ class CommandCFReserve : public Commander { uint8_t expansion_ = kCFDefaultExpansion; }; -// Register the CF.RESERVE command +class CommandCFAdd : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.ADD key item + if (args.size() != 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + bool added = false; + auto s = cuckoo_db.Add(ctx, args_[1], args_[2], &added); + + if (!s.ok()) { + if (s.IsNotFound()) { + return {Status::RedisExecErr, "key not found"}; + } + if (s.IsAborted()) { + // Filter is full + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; + } + + // Return 1 if added, 0 if already exists (though we don't check for duplicates in this version) + *output = redis::Integer(added ? 1 : 0); + return Status::OK(); + } +}; + +// Register the CF.RESERVE and CF.ADD commands REDIS_REGISTER_COMMANDS(CuckooFilter, - MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), + MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 8ab69e38555..e5a311d78da 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -21,6 +21,7 @@ #include "redis_cuckoo_chain.h" #include +#include #include "logging.h" @@ -95,7 +96,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation @@ -118,4 +119,308 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } +// Helper function: calculate integer power (avoid std::pow for integers) +static uint64_t intPow(uint64_t base, uint16_t exp) { + uint64_t result = 1; + for (uint16_t i = 0; i < exp; ++i) { + result *= base; + } + return result; +} + +// Helper function: try to find empty slot in a bucket and insert fingerprint +static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { + for (size_t i = 0; i < bucket_size; ++i) { + if (static_cast(bucket_data[i]) == 0) { + bucket_data[i] = fingerprint; + *slot_idx = i; + return true; + } + } + return false; +} + +// Helper function: read bucket from storage and ensure correct size +static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, + const std::string &bucket_key, uint8_t bucket_size, + std::string *bucket_data) { + rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); + auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); + if (!s.ok() && !s.IsNotFound()) { + return s; + } + if (s.IsNotFound()) { + bucket_data->clear(); + } + if (bucket_data->size() < bucket_size) { + bucket_data->resize(bucket_size, 0); + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { + std::string ns_key = AppendNamespacePrefix(user_key); + + // Get metadata - use read options without snapshot to see latest data + CuckooChainMetadata metadata(false); + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); + if (!s.ok()) { + if (s.IsNotFound()) { + return rocksdb::Status::NotFound("key not found"); + } + return s; + } + + // Decode metadata + Slice slice(raw_value); + s = metadata.Decode(&slice); + if (!s.ok()) { + return s; + } + + // Validate metadata + if (metadata.n_filters == 0) { + return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); + } + + // Calculate hash and fingerprint for the item + uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); + uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); + + // Try to insert in each sub-filter (starting from the first/smallest one) + // This follows RedisBloom's behavior and is more efficient + for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + // Calculate capacity for this filter using integer power + uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, filter_idx); + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + + // Calculate bucket indices + uint32_t bucket1_idx = hash % num_buckets; + uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); + uint32_t bucket2_idx = alt_hash % num_buckets; + + // Read both buckets using helper function + std::string bucket1_key = getBucketKey(ns_key, metadata, filter_idx, bucket1_idx); + std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); + + std::string bucket1_data, bucket2_data; + s = readBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + if (!s.ok()) return s; + + s = readBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); + if (!s.ok()) return s; + + // Try simple insertion in bucket1 or bucket2 + size_t slot_idx; + std::string *target_bucket_data = nullptr; + std::string target_bucket_key; + + if (tryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { + target_bucket_data = &bucket1_data; + target_bucket_key = bucket1_key; + } else if (tryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { + target_bucket_data = &bucket2_data; + target_bucket_key = bucket2_key; + } + + if (target_bucket_data != nullptr) { + // Successfully inserted, write to storage atomically + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + batch->Put(target_bucket_key, *target_bucket_data); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + } + + // No space found in any filter, try kick-out on the last filter + uint16_t last_filter_idx = metadata.n_filters - 1; + uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, last_filter_idx); + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + + bool inserted = false; + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + if (s.ok() && inserted) { + // Update metadata after successful kick-out + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + + // Kick-out failed, try to expand if allowed + if (metadata.expansion > 0) { + s = expandFilter(ctx, ns_key, &metadata); + if (!s.ok()) { + return s; + } + + // Retry insertion in the new expanded filter + uint16_t new_filter_idx = metadata.n_filters - 1; + uint64_t new_filter_capacity = metadata.base_capacity * intPow(metadata.expansion, new_filter_idx); + uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); + + uint32_t bucket1_idx = hash % new_num_buckets; + std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); + + // Insert into first slot of the new filter's first bucket + std::string bucket1_data(metadata.bucket_size, 0); + bucket1_data[0] = fingerprint; + + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + batch->Put(bucket1_key, bucket1_data); + + metadata.size++; + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + + *added = true; + return rocksdb::Status::OK(); + } + + // No expansion allowed and filter is full + *added = false; + return rocksdb::Status::Aborted("filter is full"); +} + +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted) { + *inserted = false; + + // Track modified buckets to write atomically at the end + std::unordered_map modified_buckets; + + // Start from bucket1 + uint32_t current_bucket_idx = hash % num_buckets; + uint8_t current_fp = fingerprint; + uint32_t victim_slot = 0; + + // Try to kick out existing fingerprints (all operations in memory) + for (uint16_t iteration = 0; iteration < metadata.max_iterations; ++iteration) { + // Read current bucket (check modified_buckets cache first) + std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); + std::string bucket_data; + + auto cached = modified_buckets.find(bucket_key); + if (cached != modified_buckets.end()) { + bucket_data = cached->second; + } else { + auto s = readBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); + if (!s.ok()) return s; + } + + // Swap fingerprint with victim slot + uint8_t old_fp = static_cast(bucket_data[victim_slot]); + bucket_data[victim_slot] = current_fp; + modified_buckets[bucket_key] = bucket_data; // Cache modification + current_fp = old_fp; + + // If kicked-out fingerprint is 0 (empty), we successfully inserted + if (current_fp == 0) { + *inserted = true; + break; + } + + // Calculate alternate bucket for the kicked-out fingerprint + // CRITICAL FIX: Use XOR hash calculation correctly + // We need to reconstruct the hash for the alternate bucket + // Since h2 = h1 ^ (fp * 0x5bd1e995), and we know bucket_idx = h1 % num_buckets + // We calculate alt_hash using the fingerprint and current bucket index + uint64_t current_hash = current_bucket_idx; // Approximate hash from bucket index + uint64_t alt_hash = CuckooFilter::GetAltHash(current_fp, current_hash); + uint32_t alt_bucket_idx = alt_hash % num_buckets; + + // Check if alternate bucket has empty slot + std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); + std::string alt_bucket_data; + + cached = modified_buckets.find(alt_bucket_key); + if (cached != modified_buckets.end()) { + alt_bucket_data = cached->second; + } else { + auto s = readBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); + if (!s.ok()) return s; + } + + size_t empty_slot; + if (tryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { + // Found empty slot! Cache this modification + modified_buckets[alt_bucket_key] = alt_bucket_data; + *inserted = true; + break; + } + + // Move to alternate bucket and try next victim slot + current_bucket_idx = alt_bucket_idx; + victim_slot = (victim_slot + 1) % metadata.bucket_size; + } + + // Write all modified buckets atomically + if (*inserted && !modified_buckets.empty()) { + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + for (const auto &entry : modified_buckets) { + batch->Put(entry.first, entry.second); + } + + auto s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + if (!s.ok()) return s; + } + + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + if (metadata->n_filters >= UINT16_MAX) { + return rocksdb::Status::Aborted("maximum number of filters reached"); + } + + metadata->n_filters++; + INFO("CF.ADD: Expanded to {} filters", metadata->n_filters); + + // Write updated metadata + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.EXPAND", ""}); + batch->PutLogData(log_data.Encode()); + + std::string metadata_bytes; + metadata->Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index f606f5f8c39..4e7ccdfd869 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -40,6 +40,10 @@ class CuckooChain : public Database { rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion); + // CF.ADD command - adds an item to the cuckoo filter + // Returns true if item was added, false if item already exists (probably) + rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); + private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); @@ -48,6 +52,14 @@ class CuckooChain : public Database { // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index); + + // Kick-out insertion: try to insert fingerprint by evicting existing ones + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, + uint64_t hash, bool *inserted); + + // Create a new sub-filter for expansion + rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); }; } // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 56000f930d6..a78f54b2dab 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -350,4 +350,216 @@ TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; } } +} + +TEST_F(RedisCuckooFilterTest, AddBasic) { + // First reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed to reserve: " << s.ToString(); + + // Add an item + bool added = false; + s = cuckoo_->Add(*ctx_, key_, "item1", &added); + ASSERT_TRUE(s.ok()) << "Failed to add item: " << s.ToString(); + ASSERT_TRUE(added) << "Item should have been added"; +} + +TEST_F(RedisCuckooFilterTest, AddMultipleItems) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add multiple items + std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; + for (const auto& item : items) { + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; + ASSERT_TRUE(added) << "Item should have been added: " << item; + } +} + +TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { + // Try to add to a filter that doesn't exist + bool added = false; + auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsNotFound()) << "Should return NotFound error"; +} + +TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { + // Test with different bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(bucket_sizes[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 100, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed to reserve with bucket_size=" << static_cast(bucket_sizes[i]); + + // Add some items + for (int j = 0; j < 10; ++j) { + std::string item = "item_" + std::to_string(j); + bool added = false; + s = cuckoo_->Add(*ctx_, test_key, item, &added); + ASSERT_TRUE(s.ok()) << "Failed to add item with bucket_size=" << static_cast(bucket_sizes[i]); + ASSERT_TRUE(added); + } + } +} + +TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add the same item multiple times (Cuckoo Filter allows duplicates) + std::string item = "duplicate_item"; + for (int i = 0; i < 5; ++i) { + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Iteration " << i; + ASSERT_TRUE(added) << "Should allow duplicate items"; + } +} + +TEST_F(RedisCuckooFilterTest, AddManyItems) { + // Reserve a filter with moderate capacity + uint64_t capacity = 1000; + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add many items (should succeed without hitting limits) + int num_items = 100; + for (int i = 0; i < num_items; ++i) { + std::string item = "item_" + std::to_string(i); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()) << "Failed at item " << i; + ASSERT_TRUE(added); + } +} + +TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { + // Test with very small capacity to trigger potential full filter scenario + uint64_t small_capacity = 10; + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); // expansion=0 means no auto-growth + ASSERT_TRUE(s.ok()); + + // Add items up to capacity + // With bucket_size=2 and capacity=10, we should have very limited space + bool full = false; + for (int i = 0; i < 100; ++i) { + std::string item = "item_" + std::to_string(i); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + + if (!s.ok()) { + // Filter is full + ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; + full = true; + break; + } + } + + // We expect the filter to become full at some point + ASSERT_TRUE(full) << "Small filter should eventually become full"; +} + +TEST_F(RedisCuckooFilterTest, AddEmptyItem) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add empty string + bool added = false; + s = cuckoo_->Add(*ctx_, key_, "", &added); + ASSERT_TRUE(s.ok()) << "Should be able to add empty string"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddLongItem) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add a very long string + std::string long_item(10000, 'x'); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, long_item, &added); + ASSERT_TRUE(s.ok()) << "Should be able to add long string"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddBinaryData) { + // Reserve a filter + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add binary data (including null bytes) + std::string binary_item = std::string("\x00\x01\x02\xFF\xFE", 5); + bool added = false; + s = cuckoo_->Add(*ctx_, key_, binary_item, &added); + ASSERT_TRUE(s.ok()) << "Should be able to add binary data"; + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddWithVariousCapacities) { + // Test that Add works correctly with different filter capacities + std::vector capacities = {10, 100, 1000, 10000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_cap_" + std::to_string(capacities[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Add a reasonable number of items relative to capacity + int num_items = std::min(static_cast(capacities[i] / 10), 50); + for (int j = 0; j < num_items; ++j) { + std::string item = "item_" + std::to_string(j); + bool added = false; + s = cuckoo_->Add(*ctx_, test_key, item, &added); + ASSERT_TRUE(s.ok()) << "Failed for capacity=" << capacities[i] << ", item=" << j; + ASSERT_TRUE(added); + } + } +} + +TEST_F(RedisCuckooFilterTest, AddConsistentHashing) { + // Verify that the same item always hashes to the same buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + std::string item = "test_item"; + + // Calculate hash for the item + uint64_t hash1 = redis::CuckooFilter::Hash(item); + uint64_t hash2 = redis::CuckooFilter::Hash(item); + + // Hashing should be deterministic + ASSERT_EQ(hash1, hash2) << "Hash should be consistent for the same item"; + + // Fingerprint should be consistent + uint8_t fp1 = redis::CuckooFilter::GenerateFingerprint(hash1); + uint8_t fp2 = redis::CuckooFilter::GenerateFingerprint(hash2); + ASSERT_EQ(fp1, fp2) << "Fingerprint should be consistent"; + + // Add the item + bool added = false; + s = cuckoo_->Add(*ctx_, key_, item, &added); + ASSERT_TRUE(s.ok()); + ASSERT_TRUE(added); +} + +TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { + // Verify that different items produce different hashes + std::vector items = {"item1", "item2", "item3", "different", "another"}; + std::set hashes; + + for (const auto& item : items) { + uint64_t hash = redis::CuckooFilter::Hash(item); + hashes.insert(hash); + } + + // All items should have unique hashes (with very high probability) + ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; } \ No newline at end of file From f50125c3085783df26ccd3b07cc5efd5b602e8cb Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 4 Feb 2026 20:25:56 +0800 Subject: [PATCH 06/34] trigger CI checks From 65664feeadfcfe082c97730dd157f7225ba09565 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 4 Feb 2026 12:36:45 +0000 Subject: [PATCH 07/34] style: Apply clang-format to cuckoo filter files --- src/commands/cmd_cuckoo_filter.cc | 3 +-- src/storage/redis_metadata.cc | 1 - src/storage/redis_metadata.h | 12 ++++++++---- src/types/cuckoo_filter.h | 8 ++------ src/types/redis_cuckoo_chain.cc | 20 ++++++++------------ src/types/redis_cuckoo_chain.h | 8 ++++---- tests/cppunit/types/cuckoo_filter_test.cc | 9 +++------ 7 files changed, 26 insertions(+), 35 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 82364a60a37..7bc128d878a 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -135,8 +135,7 @@ class CommandCFAdd : public Commander { }; // Register the CF.RESERVE and CF.ADD commands -REDIS_REGISTER_COMMANDS(CuckooFilter, - MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), +REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) } // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 5f44b151ab6..f385aa6fe24 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -570,7 +570,6 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } - void CuckooChainMetadata::Encode(std::string *dst) const { Metadata::Encode(dst); diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 8d8b5b21666..7d6e1c61575 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -59,8 +59,8 @@ enum RedisType : uint8_t { }; inline constexpr const std::array RedisTypeNames = { - "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; + "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", + "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -330,8 +330,12 @@ class CuckooChainMetadata : public Metadata { explicit CuckooChainMetadata(bool generate_version = true) : Metadata(kRedisCuckooFilter, generate_version), - n_filters(0), expansion(0), base_capacity(0), - bucket_size(0), max_iterations(0), num_deleted_items(0) {} + n_filters(0), + expansion(0), + base_capacity(0), + bucket_size(0), + max_iterations(0), + num_deleted_items(0) {} void Encode(std::string *dst) const override; using Metadata::Decode; diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index eb10eba4758..5a3aa326aa1 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -54,9 +54,7 @@ class CuckooFilter { // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) // Following RedisBloom: fp = hash % 255 + 1 - static uint8_t GenerateFingerprint(uint64_t hash) { - return static_cast(hash % 255 + 1); - } + static uint8_t GenerateFingerprint(uint64_t hash) { return static_cast(hash % 255 + 1); } // Calculate alternate hash using XOR (following RedisBloom) // h2 = h1 ^ (fp * 0x5bd1e995) @@ -77,9 +75,7 @@ class CuckooFilter { // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) // This is the entry point for hashing items before they are inserted/checked in the filter - static uint64_t Hash(const char* data, size_t length) { - return HllMurMurHash64A(data, static_cast(length), 0); - } + static uint64_t Hash(const char* data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } // Convenience overload for std::string static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index e5a311d78da..ce0667e72cb 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -23,9 +23,8 @@ #include #include -#include "logging.h" - #include "cuckoo_filter.h" +#include "logging.h" namespace redis { @@ -34,8 +33,8 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } -std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index) { +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index) { // Create a sub-key that includes both filter index and bucket index std::string sub_key; PutFixed16(&sub_key, filter_index); @@ -141,9 +140,8 @@ static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin } // Helper function: read bucket from storage and ensure correct size -static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, - const std::string &bucket_key, uint8_t bucket_size, - std::string *bucket_data) { +static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, + uint8_t bucket_size, std::string *bucket_data) { rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); if (!s.ok() && !s.IsNotFound()) { @@ -312,9 +310,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co } rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted) { + const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { *inserted = false; // Track modified buckets to write atomically at the end @@ -402,8 +399,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns return rocksdb::Status::OK(); } -rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, - CuckooChainMetadata *metadata) { +rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata) { if (metadata->n_filters >= UINT16_MAX) { return rocksdb::Status::Aborted("maximum number of filters reached"); } diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 4e7ccdfd869..9c5fa4744cd 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -50,13 +50,13 @@ class CuckooChain : public Database { // Generate key for a specific bucket in bucket-based storage // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} - std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index); + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, - uint64_t hash, bool *inserted); + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted); // Create a new sub-filter for expansion rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index a78f54b2dab..8864deb3f63 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -19,6 +19,7 @@ */ #include + #include #include "storage/redis_db.h" @@ -39,8 +40,7 @@ class RedisCuckooFilterTest : public TestBase { void SetUp() override { // Use a unique key for each test to avoid conflicts // Include test name to make debugging easier - const ::testing::TestInfo* const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } @@ -195,7 +195,6 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } - TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { // Test capacity = 1 (too small, following RedisBloom behavior) // With load factor 0.955 and bucket_size=4, this would result in 0 buckets @@ -238,8 +237,7 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; - ASSERT_NE(s.ToString().find("already exists"), std::string::npos) - << "Error message should mention 'already exists'"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); @@ -251,7 +249,6 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } - TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { // expansion=0 means no auto-expansion when filter is full auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); From 73ba1d406b3d13bcb656fd661f468d2b17c39fa3 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 6 Feb 2026 11:25:29 +0000 Subject: [PATCH 08/34] fix: Correct WriteBatchLogData constructor calls in CuckooChain --- src/types/redis_cuckoo_chain.cc | 16 ++++++++-------- src/types/redis_cuckoo_chain.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index ce0667e72cb..d1968ee7829 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -100,7 +100,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.RESERVE", user_key.ToString()}); batch->PutLogData(log_data.Encode()); // Store the metadata @@ -226,7 +226,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (target_bucket_data != nullptr) { // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); batch->Put(target_bucket_key, *target_bucket_data); @@ -249,11 +249,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; - s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + s = kickOutInsert(ctx, user_key, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); if (s.ok() && inserted) { // Update metadata after successful kick-out auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); metadata.size++; @@ -288,7 +288,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co bucket1_data[0] = fingerprint; auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); batch->Put(bucket1_key, bucket1_data); @@ -309,7 +309,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co return rocksdb::Status::Aborted("filter is full"); } -rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { *inserted = false; @@ -385,7 +385,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns // Write all modified buckets atomically if (*inserted && !modified_buckets.empty()) { auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); batch->PutLogData(log_data.Encode()); for (const auto &entry : modified_buckets) { @@ -409,7 +409,7 @@ rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_ // Write updated metadata auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.EXPAND", ""}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.EXPAND", ""}); batch->PutLogData(log_data.Encode()); std::string metadata_bytes; diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 9c5fa4744cd..c8ad5550bba 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -54,9 +54,9 @@ class CuckooChain : public Database { uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones - rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted); + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, + uint8_t fingerprint, uint64_t hash, bool *inserted); // Create a new sub-filter for expansion rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); From 9ddfc8335b67c72488817a7ddda6e98424e46a11 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 4 May 2026 01:07:57 +0800 Subject: [PATCH 09/34] fix: ci --- src/storage/redis_metadata.cc | 4 ++- src/types/cuckoo_filter.h | 4 +-- src/types/redis_cuckoo_chain.cc | 41 +++++++++++------------ tests/cppunit/types/cuckoo_filter_test.cc | 16 +++++---- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index f385aa6fe24..e85d33e25fd 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -607,8 +607,10 @@ uint64_t CuckooChainMetadata::GetTotalCapacity() const { // Calculate total capacity across all filters uint64_t total = 0; + uint64_t filter_capacity = base_capacity; for (uint16_t i = 0; i < n_filters; i++) { - total += base_capacity * std::pow(expansion, i); + total += filter_capacity; + filter_capacity *= expansion; } return total; } diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 5a3aa326aa1..aa39f0936d7 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -44,7 +44,7 @@ class CuckooFilter { // Calculate the optimal number of buckets for the filter static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { // A load factor of 95.5% is chosen for the cuckoo filter - uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + auto num_buckets = static_cast(static_cast(capacity) / bucket_size / 0.955L); // Round up to next power of 2 for better hash distribution if (num_buckets == 0) num_buckets = 1; uint32_t power = 1; @@ -81,4 +81,4 @@ class CuckooFilter { static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } }; -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index d1968ee7829..97652a58c9a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,7 +20,6 @@ #include "redis_cuckoo_chain.h" -#include #include #include "cuckoo_filter.h" @@ -119,7 +118,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key } // Helper function: calculate integer power (avoid std::pow for integers) -static uint64_t intPow(uint64_t base, uint16_t exp) { +static uint64_t IntPow(uint64_t base, uint16_t exp) { uint64_t result = 1; for (uint16_t i = 0; i < exp; ++i) { result *= base; @@ -128,10 +127,10 @@ static uint64_t intPow(uint64_t base, uint16_t exp) { } // Helper function: try to find empty slot in a bucket and insert fingerprint -static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { +static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { for (size_t i = 0; i < bucket_size; ++i) { if (static_cast(bucket_data[i]) == 0) { - bucket_data[i] = fingerprint; + bucket_data[i] = static_cast(fingerprint); *slot_idx = i; return true; } @@ -140,7 +139,7 @@ static bool tryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin } // Helper function: read bucket from storage and ensure correct size -static rocksdb::Status readBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, +static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, uint8_t bucket_size, std::string *bucket_data) { rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); @@ -191,7 +190,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // This follows RedisBloom's behavior and is more efficient for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { // Calculate capacity for this filter using integer power - uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, filter_idx); + uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, filter_idx); uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); // Calculate bucket indices @@ -204,21 +203,21 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); std::string bucket1_data, bucket2_data; - s = readBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + s = ReadBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); if (!s.ok()) return s; - s = readBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); + s = ReadBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); if (!s.ok()) return s; // Try simple insertion in bucket1 or bucket2 - size_t slot_idx; + size_t slot_idx = 0; std::string *target_bucket_data = nullptr; std::string target_bucket_key; - if (tryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { + if (TryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { target_bucket_data = &bucket1_data; target_bucket_key = bucket1_key; - } else if (tryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { + } else if (TryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { target_bucket_data = &bucket2_data; target_bucket_key = bucket2_key; } @@ -245,7 +244,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; - uint64_t filter_capacity = metadata.base_capacity * intPow(metadata.expansion, last_filter_idx); + uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, last_filter_idx); uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; @@ -277,7 +276,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; - uint64_t new_filter_capacity = metadata.base_capacity * intPow(metadata.expansion, new_filter_idx); + uint64_t new_filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, new_filter_idx); uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); uint32_t bucket1_idx = hash % new_num_buckets; @@ -285,7 +284,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Insert into first slot of the new filter's first bucket std::string bucket1_data(metadata.bucket_size, 0); - bucket1_data[0] = fingerprint; + bucket1_data[0] = static_cast(fingerprint); auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); @@ -332,13 +331,13 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us if (cached != modified_buckets.end()) { bucket_data = cached->second; } else { - auto s = readBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); + auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); if (!s.ok()) return s; } // Swap fingerprint with victim slot - uint8_t old_fp = static_cast(bucket_data[victim_slot]); - bucket_data[victim_slot] = current_fp; + auto old_fp = static_cast(bucket_data[victim_slot]); + bucket_data[victim_slot] = static_cast(current_fp); modified_buckets[bucket_key] = bucket_data; // Cache modification current_fp = old_fp; @@ -365,12 +364,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us if (cached != modified_buckets.end()) { alt_bucket_data = cached->second; } else { - auto s = readBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); + auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); if (!s.ok()) return s; } - size_t empty_slot; - if (tryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { + size_t empty_slot = 0; + if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { // Found empty slot! Cache this modification modified_buckets[alt_bucket_key] = alt_bucket_data; *inserted = true; @@ -419,4 +418,4 @@ rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_ return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 8864deb3f63..7a8244675a5 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -32,6 +32,10 @@ class RedisCuckooFilterTest : public TestBase { explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } + RedisCuckooFilterTest(const RedisCuckooFilterTest &) = delete; + RedisCuckooFilterTest &operator=(const RedisCuckooFilterTest &) = delete; + RedisCuckooFilterTest(RedisCuckooFilterTest &&) = delete; + RedisCuckooFilterTest &operator=(RedisCuckooFilterTest &&) = delete; ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); @@ -40,7 +44,7 @@ class RedisCuckooFilterTest : public TestBase { void SetUp() override { // Use a unique key for each test to avoid conflicts // Include test name to make debugging easier - const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } @@ -121,7 +125,7 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; // Should be able to hold the capacity with 95.5% load factor - uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + auto expected_min = static_cast(static_cast(capacity) / bucket_size / 0.955L); ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; } @@ -337,7 +341,7 @@ TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { }; for (size_t i = 0; i < test_cases.size(); ++i) { - const auto& tc = test_cases[i]; + const auto &tc = test_cases[i]; std::string test_key = "combo_" + std::to_string(i); auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); @@ -368,7 +372,7 @@ TEST_F(RedisCuckooFilterTest, AddMultipleItems) { // Add multiple items std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; - for (const auto& item : items) { + for (const auto &item : items) { bool added = false; s = cuckoo_->Add(*ctx_, key_, item, &added); ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; @@ -552,11 +556,11 @@ TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { std::vector items = {"item1", "item2", "item3", "different", "another"}; std::set hashes; - for (const auto& item : items) { + for (const auto &item : items) { uint64_t hash = redis::CuckooFilter::Hash(item); hashes.insert(hash); } // All items should have unique hashes (with very high probability) ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; -} \ No newline at end of file +} From c89f2de829f141f24d6d00bbf5d92f0f4209b86a Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 4 May 2026 23:00:12 +0800 Subject: [PATCH 10/34] fix: ci --- tests/cppunit/types/cuckoo_filter_test.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 7a8244675a5..b95caaed7e5 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -28,14 +28,16 @@ #include "types/redis_cuckoo_chain.h" class RedisCuckooFilterTest : public TestBase { - protected: - explicit RedisCuckooFilterTest() : TestBase() { - cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); - } + public: RedisCuckooFilterTest(const RedisCuckooFilterTest &) = delete; RedisCuckooFilterTest &operator=(const RedisCuckooFilterTest &) = delete; RedisCuckooFilterTest(RedisCuckooFilterTest &&) = delete; RedisCuckooFilterTest &operator=(RedisCuckooFilterTest &&) = delete; + + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); + } ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); From 00abd42061f84a2eac3968e2618c5bb0add8b5d7 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 02:23:42 +0800 Subject: [PATCH 11/34] fix: loses atomicity --- src/commands/cmd_cuckoo_filter.cc | 8 +- src/storage/redis_metadata.cc | 4 +- src/types/cuckoo_filter.h | 7 + src/types/redis_cuckoo_chain.cc | 206 ++++++++++------------ src/types/redis_cuckoo_chain.h | 19 +- tests/cppunit/types/cuckoo_filter_test.cc | 8 + 6 files changed, 129 insertions(+), 123 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 7bc128d878a..830c43e7771 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -31,7 +31,7 @@ class CommandCFReserve : public Commander { Status Parse(const std::vector &args) override { // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] if (args.size() < 3) { - return {Status::RedisParseErr, "wrong number of arguments"}; + return {Status::RedisParseErr, errWrongNumOfArguments}; } // Parse capacity (required) @@ -107,7 +107,7 @@ class CommandCFAdd : public Commander { Status Parse(const std::vector &args) override { // CF.ADD key item if (args.size() != 3) { - return {Status::RedisParseErr, "wrong number of arguments"}; + return {Status::RedisParseErr, errWrongNumOfArguments}; } return Commander::Parse(args); } @@ -128,7 +128,7 @@ class CommandCFAdd : public Commander { return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; } - // Return 1 if added, 0 if already exists (though we don't check for duplicates in this version) + // Duplicate items are allowed, so successful insertions return 1. *output = redis::Integer(added ? 1 : 0); return Status::OK(); } @@ -138,4 +138,4 @@ class CommandCFAdd : public Commander { REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1), MakeCmdAttr("cf.add", 3, "write", 1, 1, 1)) -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 7582887574f..f9c91034e86 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -335,7 +335,7 @@ bool Metadata::IsSingleKVType() const { return Type() == kRedisString || Type() bool Metadata::IsEmptyableType() const { return IsSingleKVType() || Type() == kRedisStream || Type() == kRedisBloomFilter || Type() == kRedisHyperLogLog || - Type() == kRedisTDigest || Type() == kRedisTimeSeries; + Type() == kRedisTDigest || Type() == kRedisTimeSeries || Type() == kRedisCuckooFilter; } bool Metadata::Expired() const { return ExpireAt(util::GetTimeStampMS()); } @@ -661,7 +661,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { return s; } - if (input->size() < 21) { + if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint8_t)) { return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); } diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index aa39f0936d7..211d02cb9fb 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -21,6 +21,7 @@ #pragma once #include +#include #include #include #include @@ -41,6 +42,12 @@ namespace redis { // - bucket_index = hash % num_buckets (only apply modulo when indexing) class CuckooFilter { public: + static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { + if (bucket_size == 0) return false; + auto num_buckets = static_cast(capacity) / bucket_size / 0.955L; + return num_buckets <= static_cast(std::numeric_limits::max() / 2 + 1ULL); + } + // Calculate the optimal number of buckets for the filter static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { // A load factor of 95.5% is chosen for the cuckoo filter diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 97652a58c9a..2d01ca4f410 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,6 +20,7 @@ #include "redis_cuckoo_chain.h" +#include #include #include "cuckoo_filter.h" @@ -32,6 +33,25 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } +rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadata) { + if (metadata.n_filters == 0) { + return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); + } + if (metadata.base_capacity == 0) { + return rocksdb::Status::Corruption("invalid metadata: base_capacity is 0"); + } + if (metadata.bucket_size == 0) { + return rocksdb::Status::Corruption("invalid metadata: bucket_size is 0"); + } + if (metadata.max_iterations == 0) { + return rocksdb::Status::Corruption("invalid metadata: max_iterations is 0"); + } + if (!CuckooFilter::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: base_capacity is too large"); + } + return rocksdb::Status::OK(); +} + std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index) { // Create a sub-key that includes both filter index and bucket index @@ -46,7 +66,6 @@ std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMeta rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { - // Validate parameters if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -64,25 +83,21 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (!CuckooFilter::IsCapacitySupported(capacity, bucket_size)) { + return rocksdb::Status::InvalidArgument("capacity is too large"); + } std::string ns_key = AppendNamespacePrefix(user_key); - // Check if the key already exists - // Read without snapshot to ensure we see any committed data - std::string raw_value; - rocksdb::ReadOptions read_options; // No snapshot - auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); - if (s.ok()) { - return rocksdb::Status::InvalidArgument("the key already exists"); - } + CuckooChainMetadata existing_metadata; + auto s = getCuckooChainMetadata(ctx, ns_key, &existing_metadata); + if (!s.ok() && !s.IsNotFound()) return s; if (!s.IsNotFound()) { - return s; // Return other errors + return rocksdb::Status::InvalidArgument("the key already exists"); } - // Initialize metadata for the new cuckoo filter CuckooChainMetadata metadata; - // Initialize metadata for the new cuckoo filter metadata.size = 0; metadata.base_capacity = capacity; metadata.bucket_size = bucket_size; @@ -100,12 +115,13 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.RESERVE", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; - // Store the metadata std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; // Note: With bucket-based storage, we don't pre-allocate all buckets // Buckets will be created lazily on first write @@ -117,13 +133,15 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -// Helper function: calculate integer power (avoid std::pow for integers) -static uint64_t IntPow(uint64_t base, uint16_t exp) { - uint64_t result = 1; - for (uint16_t i = 0; i < exp; ++i) { - result *= base; +static bool CalculateFilterCapacity(uint64_t base_capacity, uint8_t expansion, uint16_t filter_index, + uint64_t *filter_capacity) { + uint64_t capacity = base_capacity; + for (uint16_t i = 0; i < filter_index; ++i) { + if (expansion != 0 && capacity > std::numeric_limits::max() / expansion) return false; + capacity *= expansion; } - return result; + *filter_capacity = capacity; + return true; } // Helper function: try to find empty slot in a bucket and insert fingerprint @@ -141,8 +159,7 @@ static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uin // Helper function: read bucket from storage and ensure correct size static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, uint8_t bucket_size, std::string *bucket_data) { - rocksdb::ReadOptions read_opts = ctx.DefaultScanOptions(); - auto s = storage->Get(ctx, read_opts, bucket_key, bucket_data); + auto s = storage->Get(ctx, ctx.GetReadOptions(), bucket_key, bucket_data); if (!s.ok() && !s.IsNotFound()) { return s; } @@ -158,29 +175,13 @@ static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); - // Get metadata - use read options without snapshot to see latest data CuckooChainMetadata metadata(false); - std::string raw_value; - rocksdb::ReadOptions read_options; // No snapshot - auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); - if (!s.ok()) { - if (s.IsNotFound()) { - return rocksdb::Status::NotFound("key not found"); - } - return s; - } - - // Decode metadata - Slice slice(raw_value); - s = metadata.Decode(&slice); - if (!s.ok()) { - return s; - } + auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (s.IsNotFound()) return rocksdb::Status::NotFound("key not found"); + if (!s.ok()) return s; - // Validate metadata - if (metadata.n_filters == 0) { - return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); - } + s = ValidateMetadata(metadata); + if (!s.ok()) return s; // Calculate hash and fingerprint for the item uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); @@ -189,8 +190,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Try to insert in each sub-filter (starting from the first/smallest one) // This follows RedisBloom's behavior and is more efficient for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { - // Calculate capacity for this filter using integer power - uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, filter_idx); + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); // Calculate bucket indices @@ -226,13 +230,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - batch->Put(target_bucket_key, *target_bucket_data); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + s = batch->Put(target_bucket_key, *target_bucket_data); + if (!s.ok()) return s; metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -244,21 +251,33 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; - uint64_t filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, last_filter_idx); + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, last_filter_idx, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); bool inserted = false; - s = kickOutInsert(ctx, user_key, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted); + std::unordered_map modified_buckets; + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, + &modified_buckets); if (s.ok() && inserted) { - // Update metadata after successful kick-out auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + + for (const auto &entry : modified_buckets) { + s = batch->Put(entry.first, entry.second); + if (!s.ok()) return s; + } metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -269,14 +288,18 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Kick-out failed, try to expand if allowed if (metadata.expansion > 0) { - s = expandFilter(ctx, ns_key, &metadata); - if (!s.ok()) { - return s; - } + if (metadata.n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); + + metadata.n_filters++; + INFO("CF.ADD: Expanded to {} filters", metadata.n_filters); // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; - uint64_t new_filter_capacity = metadata.base_capacity * IntPow(metadata.expansion, new_filter_idx); + uint64_t new_filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, new_filter_idx, &new_filter_capacity) || + !CuckooFilter::IsCapacitySupported(new_filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Aborted("maximum filter capacity reached"); + } uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); uint32_t bucket1_idx = hash % new_num_buckets; @@ -288,13 +311,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - batch->Put(bucket1_key, bucket1_data); + s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; + s = batch->Put(bucket1_key, bucket1_data); + if (!s.ok()) return s; metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); if (!s.ok()) return s; @@ -308,13 +334,12 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co return rocksdb::Status::Aborted("filter is full"); } -rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, +rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted) { + uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted, + std::unordered_map *modified_buckets) { *inserted = false; - - // Track modified buckets to write atomically at the end - std::unordered_map modified_buckets; + modified_buckets->clear(); // Start from bucket1 uint32_t current_bucket_idx = hash % num_buckets; @@ -327,8 +352,8 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); std::string bucket_data; - auto cached = modified_buckets.find(bucket_key); - if (cached != modified_buckets.end()) { + auto cached = modified_buckets->find(bucket_key); + if (cached != modified_buckets->end()) { bucket_data = cached->second; } else { auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); @@ -338,7 +363,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us // Swap fingerprint with victim slot auto old_fp = static_cast(bucket_data[victim_slot]); bucket_data[victim_slot] = static_cast(current_fp); - modified_buckets[bucket_key] = bucket_data; // Cache modification + (*modified_buckets)[bucket_key] = bucket_data; current_fp = old_fp; // If kicked-out fingerprint is 0 (empty), we successfully inserted @@ -360,8 +385,8 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); std::string alt_bucket_data; - cached = modified_buckets.find(alt_bucket_key); - if (cached != modified_buckets.end()) { + cached = modified_buckets->find(alt_bucket_key); + if (cached != modified_buckets->end()) { alt_bucket_data = cached->second; } else { auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); @@ -370,8 +395,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us size_t empty_slot = 0; if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { - // Found empty slot! Cache this modification - modified_buckets[alt_bucket_key] = alt_bucket_data; + (*modified_buckets)[alt_bucket_key] = alt_bucket_data; *inserted = true; break; } @@ -381,41 +405,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &us victim_slot = (victim_slot + 1) % metadata.bucket_size; } - // Write all modified buckets atomically - if (*inserted && !modified_buckets.empty()) { - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); - batch->PutLogData(log_data.Encode()); - - for (const auto &entry : modified_buckets) { - batch->Put(entry.first, entry.second); - } - - auto s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); - if (!s.ok()) return s; - } - return rocksdb::Status::OK(); } -rocksdb::Status CuckooChain::expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata) { - if (metadata->n_filters >= UINT16_MAX) { - return rocksdb::Status::Aborted("maximum number of filters reached"); - } - - metadata->n_filters++; - INFO("CF.ADD: Expanded to {} filters", metadata->n_filters); - - // Write updated metadata - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.EXPAND", ""}); - batch->PutLogData(log_data.Encode()); - - std::string metadata_bytes; - metadata->Encode(&metadata_bytes); - batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); - - return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); -} - } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index c8ad5550bba..8f3885af026 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -20,6 +20,8 @@ #pragma once +#include + #include "cuckoo_filter.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -40,26 +42,25 @@ class CuckooChain : public Database { rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion); - // CF.ADD command - adds an item to the cuckoo filter - // Returns true if item was added, false if item already exists (probably) + // CF.ADD command - adds an item to the cuckoo filter. + // Duplicate items are allowed, so added is true whenever insertion succeeds. rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); private: // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); + // Generate key for a specific bucket in bucket-based storage // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t bucket_index); // Kick-out insertion: try to insert fingerprint by evicting existing ones - rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &user_key, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, - uint8_t fingerprint, uint64_t hash, bool *inserted); - - // Create a new sub-filter for expansion - rocksdb::Status expandFilter(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, + bool *inserted, std::unordered_map *modified_buckets); }; -} // namespace redis \ No newline at end of file +} // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index b95caaed7e5..93d8c693bee 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -20,6 +20,7 @@ #include +#include #include #include "storage/redis_db.h" @@ -289,6 +290,13 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; } +TEST_F(RedisCuckooFilterTest, ReserveRejectsCapacityOverflow) { + auto s = cuckoo_->Reserve(*ctx_, key_, std::numeric_limits::max(), 1, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("capacity is too large"), std::string::npos); +} + TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { // Test different max_iterations values std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; From 67f4c0d4c1693f7e66ee5fe104520b59cc105d1a Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 11:08:53 +0800 Subject: [PATCH 12/34] fix: create new filter in CuckooChain::Add when key is not found --- src/commands/cmd_cuckoo_filter.cc | 9 +----- src/types/cuckoo_filter.h | 37 ++++++++++++++++------- src/types/redis_cuckoo_chain.cc | 31 +++++++++++++++---- tests/cppunit/types/cuckoo_filter_test.cc | 23 ++++++++++---- 4 files changed, 69 insertions(+), 31 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 830c43e7771..6213e2a33af 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -118,14 +118,7 @@ class CommandCFAdd : public Commander { auto s = cuckoo_db.Add(ctx, args_[1], args_[2], &added); if (!s.ok()) { - if (s.IsNotFound()) { - return {Status::RedisExecErr, "key not found"}; - } - if (s.IsAborted()) { - // Filter is full - return {Status::RedisExecErr, s.ToString()}; - } - return {Status::RedisExecErr, "failed to add item to cuckoo filter"}; + return {Status::RedisExecErr, s.ToString()}; } // Duplicate items are allowed, so successful insertions return 1. diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 211d02cb9fb..3d0688491ef 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -20,6 +20,8 @@ #pragma once +#include + #include #include #include @@ -43,20 +45,33 @@ namespace redis { class CuckooFilter { public: static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { - if (bucket_size == 0) return false; - auto num_buckets = static_cast(capacity) / bucket_size / 0.955L; - return num_buckets <= static_cast(std::numeric_limits::max() / 2 + 1ULL); + uint32_t num_buckets = 0; + return OptimalNumBuckets(capacity, bucket_size, &num_buckets).ok(); } - // Calculate the optimal number of buckets for the filter - static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { - // A load factor of 95.5% is chosen for the cuckoo filter - auto num_buckets = static_cast(static_cast(capacity) / bucket_size / 0.955L); - // Round up to next power of 2 for better hash distribution - if (num_buckets == 0) num_buckets = 1; + // Calculate the optimal number of buckets for the filter. + static rocksdb::Status OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size, uint32_t* num_buckets) { + if (bucket_size == 0) { + return rocksdb::Status::InvalidArgument("bucket_size must be larger than 0"); + } + + constexpr long double kLoadFactor = 0.955L; + constexpr uint64_t kMaxSupportedBuckets = std::numeric_limits::max() / 2 + 1ULL; + auto max_supported_capacity = static_cast(kMaxSupportedBuckets * bucket_size * kLoadFactor); + if (capacity > max_supported_capacity) { + return rocksdb::Status::InvalidArgument("capacity is too large"); + } + + auto exact_buckets = static_cast(capacity) / bucket_size / kLoadFactor; + auto required_buckets = static_cast(exact_buckets); + if (static_cast(required_buckets) < exact_buckets) required_buckets++; + if (required_buckets == 0) required_buckets = 1; + + // Round up to next power of 2 for better hash distribution. uint32_t power = 1; - while (power < num_buckets) power <<= 1; - return power; + while (power < required_buckets) power <<= 1; + *num_buckets = power; + return rocksdb::Status::OK(); } // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 2d01ca4f410..4535a4394ea 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -107,7 +107,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key metadata.num_deleted_items = 0; // Calculate the number of buckets needed for this filter - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + if (!s.ok()) return s; INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); @@ -177,8 +179,19 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co CuckooChainMetadata metadata(false); auto s = getCuckooChainMetadata(ctx, ns_key, &metadata); - if (s.IsNotFound()) return rocksdb::Status::NotFound("key not found"); - if (!s.ok()) return s; + if (s.IsNotFound()) { + // RedisBloom CF.ADD auto-creates the filter when the key does not exist: + // https://redis.io/docs/latest/commands/cf.add/ + metadata = CuckooChainMetadata(); + metadata.size = 0; + metadata.base_capacity = kCFDefaultCapacity; + metadata.bucket_size = kCFDefaultBucketSize; + metadata.max_iterations = kCFDefaultMaxIterations; + metadata.expansion = kCFDefaultExpansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + } + if (!s.ok() && !s.IsNotFound()) return s; s = ValidateMetadata(metadata); if (!s.ok()) return s; @@ -195,7 +208,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; // Calculate bucket indices uint32_t bucket1_idx = hash % num_buckets; @@ -256,7 +271,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size); + uint32_t num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + if (!s.ok()) return s; bool inserted = false; std::unordered_map modified_buckets; @@ -300,7 +317,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co !CuckooFilter::IsCapacitySupported(new_filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Aborted("maximum filter capacity reached"); } - uint32_t new_num_buckets = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size); + uint32_t new_num_buckets = 0; + s = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size, &new_num_buckets); + if (!s.ok()) return s; uint32_t bucket1_idx = hash % new_num_buckets; std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 93d8c693bee..e40603c019d 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -122,7 +122,9 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { uint64_t capacity = 1000; uint8_t bucket_size = 4; - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); // Should be a power of 2 ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; @@ -277,7 +279,9 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { ASSERT_TRUE(s.ok()) << "Should handle large capacity"; // Verify num_buckets calculation doesn't overflow - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; @@ -286,7 +290,8 @@ TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; - num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + s = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; } @@ -325,7 +330,9 @@ TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; // Verify at least one bucket is created - uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; } } @@ -391,11 +398,15 @@ TEST_F(RedisCuckooFilterTest, AddMultipleItems) { } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { - // Try to add to a filter that doesn't exist bool added = false; auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; + + s = cuckoo_->Reserve(*ctx_, "nonexistent_key", 1000, 4, 500, 2); ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsNotFound()) << "Should return NotFound error"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { From 23beb86c5e3a4658e0b759421877e45c9b67a0d7 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 7 May 2026 15:10:57 +0800 Subject: [PATCH 13/34] optimize test --- src/commands/cmd_cuckoo_filter.cc | 7 +- src/types/redis_cuckoo_chain.cc | 7 +- src/types/redis_cuckoo_chain.h | 5 +- tests/cppunit/types/cuckoo_filter_test.cc | 554 ++++++------------ .../unit/type/bloom/cuckoo_filter_test.go | 55 ++ 5 files changed, 244 insertions(+), 384 deletions(-) create mode 100644 tests/gocase/unit/type/bloom/cuckoo_filter_test.go diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 6213e2a33af..6bbe5cb2524 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -66,11 +66,14 @@ class CommandCFReserve : public Commander { return {Status::RedisParseErr, "max iterations must be larger than 0"}; } } else if (parser.EatEqICase("EXPANSION")) { - auto parse_expansion = parser.TakeInt(); + auto parse_expansion = parser.TakeInt(); if (!parse_expansion.IsOK()) { return {Status::RedisParseErr, "invalid expansion factor"}; } expansion_ = parse_expansion.GetValue(); + if (expansion_ > kCFMaxExpansion) { + return {Status::RedisParseErr, "expansion must be between 0 and 32768"}; + } } else { return {Status::RedisParseErr, errInvalidSyntax}; } @@ -99,7 +102,7 @@ class CommandCFReserve : public Commander { uint64_t capacity_ = kCFDefaultCapacity; uint8_t bucket_size_ = kCFDefaultBucketSize; uint16_t max_iterations_ = kCFDefaultMaxIterations; - uint8_t expansion_ = kCFDefaultExpansion; + uint16_t expansion_ = kCFDefaultExpansion; }; class CommandCFAdd : public Commander { diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 4535a4394ea..0e817bea5cd 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -65,7 +65,7 @@ std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMeta } rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, - uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -83,6 +83,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (expansion > kCFMaxExpansion) { + return rocksdb::Status::InvalidArgument("expansion must be between 0 and 32768"); + } if (!CuckooFilter::IsCapacitySupported(capacity, bucket_size)) { return rocksdb::Status::InvalidArgument("capacity is too large"); } @@ -135,7 +138,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -static bool CalculateFilterCapacity(uint64_t base_capacity, uint8_t expansion, uint16_t filter_index, +static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, uint16_t filter_index, uint64_t *filter_capacity) { uint64_t capacity = base_capacity; for (uint16_t i = 0; i < filter_index; ++i) { diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 8f3885af026..2e02c13fbb7 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -32,7 +32,8 @@ namespace redis { const uint32_t kCFDefaultCapacity = 1024; const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket const uint16_t kCFDefaultMaxIterations = 500; -const uint8_t kCFDefaultExpansion = 2; +const uint16_t kCFDefaultExpansion = 2; +const uint16_t kCFMaxExpansion = 32768; class CuckooChain : public Database { public: @@ -40,7 +41,7 @@ class CuckooChain : public Database { // CF.RESERVE command - creates a new cuckoo filter with specified capacity rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, - uint16_t max_iterations, uint8_t expansion); + uint16_t max_iterations, uint16_t expansion); // CF.ADD command - adds an item to the cuckoo filter. // Duplicate items are allowed, so added is true whenever insertion succeeds. diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index e40603c019d..b9f355bb7f1 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -22,6 +22,7 @@ #include #include +#include #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -38,32 +39,126 @@ class RedisCuckooFilterTest : public TestBase { protected: explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); + db_ = std::make_unique(storage_.get(), "cuckoo_ns"); } ~RedisCuckooFilterTest() override { // Ensure cuckoo_ is destroyed before storage_ cuckoo_.reset(); + db_.reset(); } void SetUp() override { - // Use a unique key for each test to avoid conflicts - // Include test name to make debugging easier const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } + void VerifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { + std::string ns_key = db_->AppendNamespacePrefix(key); + CuckooChainMetadata metadata(false); + auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); + ASSERT_TRUE(s.ok()) << key << ": metadata not found"; + EXPECT_EQ(metadata.Type(), kRedisCuckooFilter) << key; + EXPECT_EQ(metadata.base_capacity, capacity) << key; + EXPECT_EQ(metadata.bucket_size, bucket_size) << key; + EXPECT_EQ(metadata.max_iterations, max_iterations) << key; + EXPECT_EQ(metadata.expansion, expansion) << key; + EXPECT_EQ(metadata.size, size) << key; + EXPECT_EQ(metadata.n_filters, n_filters) << key; + EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; + } + + void ReserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + uint16_t expansion) { + auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); + VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + } + + void AddAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint16_t expansion, uint64_t expected_size, uint16_t n_filters = 1) { + bool added = false; + auto s = cuckoo_->Add(*ctx_, key, item, &added); + ASSERT_TRUE(s.ok()) << key << ": add '" << item << "' failed: " << s.ToString(); + ASSERT_TRUE(added) << key << ": item '" << item << "' should have been added"; + VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); + } + std::unique_ptr cuckoo_; + std::unique_ptr db_; std::string key_; }; -TEST_F(RedisCuckooFilterTest, ReserveBasic) { - // Test basic reserve operation - uint64_t capacity = 1000; - uint8_t bucket_size = 4; - uint16_t max_iterations = 500; - uint8_t expansion = 2; +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + struct InvalidTestCase { + std::string key; + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint16_t expansion; + std::string err; + }; - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); - ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); + std::vector invalid_test_cases = { + {"zero_capacity", 0, 4, 500, 2, "capacity must be larger than 0"}, + {"capacity_too_small", 1, 4, 500, 2, "capacity must be at least 2"}, + {"zero_bucket_size", 1000, 0, 500, 2, "bucket_size must be between 1 and 255"}, + {"zero_max_iterations", 1000, 4, 0, 2, "max_iterations must be larger than 0"}, + {"capacity_too_large", std::numeric_limits::max(), 4, 500, 2, "capacity is too large"}, + {"expansion_too_large", 1000, 4, 500, redis::kCFMaxExpansion + 1, "expansion must be between 0 and 32768"}, + }; + + for (const auto &test_case : invalid_test_cases) { + auto s = cuckoo_->Reserve(*ctx_, test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + test_case.expansion); + ASSERT_FALSE(s.ok()) << test_case.key; + ASSERT_TRUE(s.IsInvalidArgument()) << test_case.key << ": " << s.ToString(); + ASSERT_NE(s.ToString().find(test_case.err), std::string::npos) << test_case.key << ": " << s.ToString(); + } +} + +TEST_F(RedisCuckooFilterTest, ReserveValidParams) { + struct TestCase { + std::string key; + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint16_t expansion; + }; + + std::vector test_cases = { + {"min_capacity", 2, 4, 500, 2}, + {"min_bucket_size", 1000, 1, 500, 2}, + {"max_bucket_size", 1000, 255, 500, 2}, + {"min_max_iterations", 1000, 4, 1, 2}, + {"max_max_iterations", 1000, 4, 65535, 2}, + // RedisBloom allows expansion=0; it disables creating additional sub-filters. + {"no_auto_expansion", 2, 1, 1, 0}, + {"capacity_3", 3, 4, 500, 2}, + {"capacity_10", 10, 4, 500, 2}, + {"capacity_100", 100, 4, 500, 2}, + {"capacity_10000", 10000, 4, 500, 2}, + {"capacity_100000", 100000, 4, 500, 2}, + {"bucket_size_2", 1000, 2, 500, 2}, + {"bucket_size_8", 1000, 8, 500, 2}, + {"bucket_size_16", 1000, 16, 500, 2}, + {"bucket_size_128", 1000, 128, 500, 2}, + {"no_auto_expansion_regular", 1000, 4, 500, 0}, + // expansion=1 is valid and creates additional sub-filters with the same capacity. + {"expansion_1", 1000, 4, 500, 1}, + {"expansion_8", 1000, 4, 500, 8}, + {"expansion_256", 1000, 4, 500, 256}, + {"max_expansion", 1000, 4, 500, redis::kCFMaxExpansion}, + {"capacity_10M", 10000000, 4, 500, 2}, + {"capacity_100M", 100000000, 4, 500, 2}, + {"max_params", 100000, 255, 65535, redis::kCFMaxExpansion}, + {"mixed_params", 50000, 8, 1000, 4}, + }; + + for (const auto &test_case : test_cases) { + ReserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + test_case.expansion); + } } TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { @@ -78,60 +173,31 @@ TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } -TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { - // Test with zero capacity - auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test with zero bucket size - s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test with zero max iterations - s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); -} - -TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { - // Test with different capacities - std::vector capacities = {100, 1000, 10000, 100000}; - - for (size_t i = 0; i < capacities.size(); ++i) { - std::string test_key = key_ + "_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { - // Test with different valid bucket sizes - std::vector bucket_sizes = {1, 2, 4, 8, 16}; - - for (size_t i = 0; i < bucket_sizes.size(); ++i) { - std::string test_key = key_ + "_bucket_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); - } -} - TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { - // Test the static helper function - uint64_t capacity = 1000; - uint8_t bucket_size = 4; - - uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint32_t expected_num_buckets; + }; - // Should be a power of 2 - ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + std::vector test_cases = { + {0, 4, 1}, {1, 4, 1}, {2, 1, 4}, {4, 4, 2}, {488, 4, 128}, {489, 4, 256}, + {977, 4, 256}, {978, 4, 512}, {1000, 4, 512}, {1024, 4, 512}, {1000, 1, 2048}, {1000, 16, 128}, + }; - // Should be able to hold the capacity with 95.5% load factor - auto expected_min = static_cast(static_cast(capacity) / bucket_size / 0.955L); - ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; + for (const auto &test_case : test_cases) { + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << "capacity=" << test_case.capacity + << ", bucket_size=" << static_cast(test_case.bucket_size) << ": " << s.ToString(); + ASSERT_EQ(num_buckets, test_case.expected_num_buckets) + << "capacity=" << test_case.capacity << ", bucket_size=" << static_cast(test_case.bucket_size); + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + auto expected_min = + static_cast(static_cast(test_case.capacity) / test_case.bucket_size / 0.955L); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; + } } TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { @@ -151,24 +217,27 @@ TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { - uint32_t num_buckets = 1024; + std::vector num_buckets_cases = {1, 2, 128, 256, 512, 1024, 2048}; // Test GetAltHash symmetry at hash level (following RedisBloom design) // h2 = GetAltHash(fp, h1) // h1 = GetAltHash(fp, h2) <- this is the symmetry property - for (uint64_t hash = 0; hash < 100; ++hash) { - for (uint8_t fp = 1; fp < 10; ++fp) { - uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); - - // Applying GetAltHash twice should return original hash - uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); - ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; - - // Both hashes should map to valid bucket indices - uint32_t bucket1 = hash % num_buckets; - uint32_t bucket2 = alt_hash % num_buckets; - ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; - ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; + for (auto num_buckets : num_buckets_cases) { + for (uint64_t hash = 0; hash < 100; ++hash) { + for (uint16_t fp = 1; fp <= 255; ++fp) { + auto fingerprint = static_cast(fp); + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; + + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; + } } } } @@ -204,38 +273,11 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } -TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { - // Test capacity = 1 (too small, following RedisBloom behavior) - // With load factor 0.955 and bucket_size=4, this would result in 0 buckets - auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); - ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; -} - -TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { - // Test valid upper boundary (bucket_size is uint8_t, max = 255) - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; - - // Test bucket_size = 1 (minimum valid value) - s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; - - // Test common power-of-2 bucket sizes - std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; - for (size_t i = 0; i < valid_sizes.size(); ++i) { - std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); - s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; - } -} - TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint64_t capacity = 1000; uint8_t bucket_size = 4; uint16_t max_iterations = 500; - uint8_t expansion = 2; + uint16_t expansion = 2; // Create the filter auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); @@ -258,217 +300,58 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } -TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { - // expansion=0 means no auto-expansion when filter is full - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); - ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; - - // Verify different expansion values - std::vector expansions = {0, 1, 2, 4, 8}; - for (size_t i = 0; i < expansions.size(); ++i) { - std::string test_key = "key_exp_" + std::to_string(expansions[i]); - s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); - ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { - // Test with very large capacity - uint64_t large_capacity = 10000000; // 10 million - auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Should handle large capacity"; - - // Verify num_buckets calculation doesn't overflow - uint32_t num_buckets = 0; - s = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; - ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; - - // Test even larger capacity (100 million) - uint64_t huge_capacity = 100000000; - s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; - - s = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; -} - -TEST_F(RedisCuckooFilterTest, ReserveRejectsCapacityOverflow) { - auto s = cuckoo_->Reserve(*ctx_, key_, std::numeric_limits::max(), 1, 500, 2); - ASSERT_FALSE(s.ok()); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_NE(s.ToString().find("capacity is too large"), std::string::npos); -} - -TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { - // Test different max_iterations values - std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; - - for (size_t i = 0; i < iterations.size(); ++i) { - std::string test_key = "key_iter_" + std::to_string(iterations[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); - ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; - } - - // Test max_iterations = 0 (should fail) - auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); - ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; -} - -TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { - // Test minimum valid capacity - auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; - - // Test small but valid capacities - std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; - for (size_t i = 0; i < small_capacities.size(); ++i) { - std::string test_key = "small_" + std::to_string(small_capacities[i]); - s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; - - // Verify at least one bucket is created - uint32_t num_buckets = 0; - s = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4, &num_buckets); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; - } -} - -TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { - // Test various parameter combinations to ensure robustness - struct TestCase { - uint64_t capacity; - uint8_t bucket_size; - uint16_t max_iterations; - uint8_t expansion; - bool should_succeed; - std::string description; - }; - - std::vector test_cases = { - {1000, 4, 500, 2, true, "Standard parameters"}, - {2, 1, 1, 0, true, "Minimum all parameters"}, - {100000, 255, 65535, 255, true, "Maximum all parameters"}, - {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, - {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, - {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, - }; - - for (size_t i = 0; i < test_cases.size(); ++i) { - const auto &tc = test_cases[i]; - std::string test_key = "combo_" + std::to_string(i); - auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); - - if (tc.should_succeed) { - ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; - } else { - ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; - } - } -} - TEST_F(RedisCuckooFilterTest, AddBasic) { - // First reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()) << "Failed to reserve: " << s.ToString(); - - // Add an item - bool added = false; - s = cuckoo_->Add(*ctx_, key_, "item1", &added); - ASSERT_TRUE(s.ok()) << "Failed to add item: " << s.ToString(); - ASSERT_TRUE(added) << "Item should have been added"; -} - -TEST_F(RedisCuckooFilterTest, AddMultipleItems) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add multiple items - std::vector items = {"apple", "banana", "cherry", "date", "elderberry"}; - for (const auto &item : items) { - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Failed to add item: " << item; - ASSERT_TRUE(added) << "Item should have been added: " << item; - } + ReserveAndVerify(key_, 1000, 4, 500, 2); + AddAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { + std::string key = "nonexistent_key"; bool added = false; - auto s = cuckoo_->Add(*ctx_, "nonexistent_key", "item1", &added); + auto s = cuckoo_->Add(*ctx_, key, "item1", &added); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; + VerifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + redis::kCFDefaultExpansion, 1, 1, 0); - s = cuckoo_->Reserve(*ctx_, "nonexistent_key", 1000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { - // Test with different bucket sizes std::vector bucket_sizes = {1, 2, 4, 8, 16}; - for (size_t i = 0; i < bucket_sizes.size(); ++i) { - std::string test_key = key_ + "_bucket_" + std::to_string(bucket_sizes[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, 100, bucket_sizes[i], 500, 2); - ASSERT_TRUE(s.ok()) << "Failed to reserve with bucket_size=" << static_cast(bucket_sizes[i]); - - // Add some items + for (auto bs : bucket_sizes) { + std::string test_key = key_ + "_bucket_" + std::to_string(bs); + ReserveAndVerify(test_key, 100, bs, 500, 2); for (int j = 0; j < 10; ++j) { - std::string item = "item_" + std::to_string(j); - bool added = false; - s = cuckoo_->Add(*ctx_, test_key, item, &added); - ASSERT_TRUE(s.ok()) << "Failed to add item with bucket_size=" << static_cast(bucket_sizes[i]); - ASSERT_TRUE(added); + AddAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); } } } TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add the same item multiple times (Cuckoo Filter allows duplicates) - std::string item = "duplicate_item"; + ReserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 5; ++i) { - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Iteration " << i; - ASSERT_TRUE(added) << "Should allow duplicate items"; + AddAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddManyItems) { - // Reserve a filter with moderate capacity - uint64_t capacity = 1000; - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add many items (should succeed without hitting limits) - int num_items = 100; - for (int i = 0; i < num_items; ++i) { - std::string item = "item_" + std::to_string(i); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()) << "Failed at item " << i; - ASSERT_TRUE(added); + ReserveAndVerify(key_, 1000, 4, 500, 2); + for (int i = 0; i < 100; ++i) { + AddAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { - // Test with very small capacity to trigger potential full filter scenario uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); // expansion=0 means no auto-growth + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); ASSERT_TRUE(s.ok()); - // Add items up to capacity - // With bucket_size=2 and capacity=10, we should have very limited space + uint64_t added_count = 0; bool full = false; for (int i = 0; i < 100; ++i) { std::string item = "item_" + std::to_string(i); @@ -476,112 +359,27 @@ TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { s = cuckoo_->Add(*ctx_, key_, item, &added); if (!s.ok()) { - // Filter is full ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; full = true; break; } - } - - // We expect the filter to become full at some point - ASSERT_TRUE(full) << "Small filter should eventually become full"; -} - -TEST_F(RedisCuckooFilterTest, AddEmptyItem) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add empty string - bool added = false; - s = cuckoo_->Add(*ctx_, key_, "", &added); - ASSERT_TRUE(s.ok()) << "Should be able to add empty string"; - ASSERT_TRUE(added); -} - -TEST_F(RedisCuckooFilterTest, AddLongItem) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add a very long string - std::string long_item(10000, 'x'); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, long_item, &added); - ASSERT_TRUE(s.ok()) << "Should be able to add long string"; - ASSERT_TRUE(added); -} -TEST_F(RedisCuckooFilterTest, AddBinaryData) { - // Reserve a filter - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add binary data (including null bytes) - std::string binary_item = std::string("\x00\x01\x02\xFF\xFE", 5); - bool added = false; - s = cuckoo_->Add(*ctx_, key_, binary_item, &added); - ASSERT_TRUE(s.ok()) << "Should be able to add binary data"; - ASSERT_TRUE(added); -} - -TEST_F(RedisCuckooFilterTest, AddWithVariousCapacities) { - // Test that Add works correctly with different filter capacities - std::vector capacities = {10, 100, 1000, 10000}; - - for (size_t i = 0; i < capacities.size(); ++i) { - std::string test_key = key_ + "_cap_" + std::to_string(capacities[i]); - auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); - ASSERT_TRUE(s.ok()); - - // Add a reasonable number of items relative to capacity - int num_items = std::min(static_cast(capacities[i] / 10), 50); - for (int j = 0; j < num_items; ++j) { - std::string item = "item_" + std::to_string(j); - bool added = false; - s = cuckoo_->Add(*ctx_, test_key, item, &added); - ASSERT_TRUE(s.ok()) << "Failed for capacity=" << capacities[i] << ", item=" << j; - ASSERT_TRUE(added); - } + ASSERT_TRUE(added) << "Item should have been added before the filter is full"; + ++added_count; } -} - -TEST_F(RedisCuckooFilterTest, AddConsistentHashing) { - // Verify that the same item always hashes to the same buckets - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); - ASSERT_TRUE(s.ok()); - std::string item = "test_item"; - - // Calculate hash for the item - uint64_t hash1 = redis::CuckooFilter::Hash(item); - uint64_t hash2 = redis::CuckooFilter::Hash(item); - - // Hashing should be deterministic - ASSERT_EQ(hash1, hash2) << "Hash should be consistent for the same item"; - - // Fingerprint should be consistent - uint8_t fp1 = redis::CuckooFilter::GenerateFingerprint(hash1); - uint8_t fp2 = redis::CuckooFilter::GenerateFingerprint(hash2); - ASSERT_EQ(fp1, fp2) << "Fingerprint should be consistent"; - - // Add the item - bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); - ASSERT_TRUE(s.ok()); - ASSERT_TRUE(added); + ASSERT_TRUE(full) << "Small filter should eventually become full"; + VerifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); } -TEST_F(RedisCuckooFilterTest, AddDifferentItemsProduceDifferentHashes) { - // Verify that different items produce different hashes - std::vector items = {"item1", "item2", "item3", "different", "another"}; - std::set hashes; - - for (const auto &item : items) { - uint64_t hash = redis::CuckooFilter::Hash(item); - hashes.insert(hash); +TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { + ReserveAndVerify(key_, 1000, 4, 500, 2); + std::vector items = { + "", + std::string(10000, 'x'), + std::string("\x00\x01\x02\xFF\xFE", 5), + }; + for (size_t i = 0; i < items.size(); ++i) { + AddAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } - - // All items should have unique hashes (with very high probability) - ASSERT_EQ(hashes.size(), items.size()) << "Different items should produce different hashes"; } diff --git a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go new file mode 100644 index 00000000000..1a87e3cc8ef --- /dev/null +++ b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package bloom + +import ( + "context" + "testing" + + "github.com/apache/kvrocks/tests/gocase/util" + "github.com/stretchr/testify/require" +) + +func TestCuckooFilter(t *testing.T) { + srv := util.StartServer(t, map[string]string{}) + defer srv.Close() + ctx := context.Background() + rdb := srv.NewClient() + defer func() { require.NoError(t, rdb.Close()) }() + + t.Run("Add creates filter", func(t *testing.T) { + key := "test_cuckoo_filter_add_create" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.Equal(t, int64(1), rdb.Do(ctx, "cf.add", key, "item").Val()) + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err(), "already exists") + }) + + t.Run("Wrong type", func(t *testing.T) { + key := "test_cuckoo_filter_wrong_type" + require.NoError(t, rdb.Set(ctx, key, "value", 0).Err()) + require.ErrorContains(t, rdb.Do(ctx, "cf.add", key, "item").Err(), "WRONGTYPE") + }) + + t.Run("Reserve expansion", func(t *testing.T) { + require.NoError(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_256", "1000", "EXPANSION", "256").Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_max", "1000", "EXPANSION", "32768").Err()) + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_too_large", "1000", "EXPANSION", "32769").Err(), "expansion must be between 0 and 32768") + }) +} From 1ade399ba84c690d16019510fd593d38399f4b19 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 8 May 2026 13:17:27 +0800 Subject: [PATCH 14/34] fix: lint --- tests/cppunit/types/cuckoo_filter_test.cc | 36 +++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index b9f355bb7f1..17dbe28e520 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -52,7 +52,7 @@ class RedisCuckooFilterTest : public TestBase { key_ = std::string("cf_test_") + test_info->name(); } - void VerifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + void verifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { std::string ns_key = db_->AppendNamespacePrefix(key); CuckooChainMetadata metadata(false); @@ -68,20 +68,20 @@ class RedisCuckooFilterTest : public TestBase { EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; } - void ReserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, + void reserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); - VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); } - void AddAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, + void addAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, uint64_t expected_size, uint16_t n_filters = 1) { bool added = false; auto s = cuckoo_->Add(*ctx_, key, item, &added); ASSERT_TRUE(s.ok()) << key << ": add '" << item << "' failed: " << s.ToString(); ASSERT_TRUE(added) << key << ": item '" << item << "' should have been added"; - VerifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); } std::unique_ptr cuckoo_; @@ -156,7 +156,7 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { }; for (const auto &test_case : test_cases) { - ReserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, + reserveAndVerify(test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, test_case.expansion); } } @@ -301,8 +301,8 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { } TEST_F(RedisCuckooFilterTest, AddBasic) { - ReserveAndVerify(key_, 1000, 4, 500, 2); - AddAndVerify(key_, "item1", 1000, 4, 500, 2, 1); + reserveAndVerify(key_, 1000, 4, 500, 2); + addAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { @@ -311,7 +311,7 @@ TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { auto s = cuckoo_->Add(*ctx_, key, "item1", &added); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; - VerifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, redis::kCFDefaultExpansion, 1, 1, 0); s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); @@ -325,24 +325,24 @@ TEST_F(RedisCuckooFilterTest, AddWithDifferentBucketSizes) { for (auto bs : bucket_sizes) { std::string test_key = key_ + "_bucket_" + std::to_string(bs); - ReserveAndVerify(test_key, 100, bs, 500, 2); + reserveAndVerify(test_key, 100, bs, 500, 2); for (int j = 0; j < 10; ++j) { - AddAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); + addAndVerify(test_key, "item_" + std::to_string(j), 100, bs, 500, 2, j + 1); } } } TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 5; ++i) { - AddAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); + addAndVerify(key_, "duplicate_item", 1000, 4, 500, 2, i + 1); } } TEST_F(RedisCuckooFilterTest, AddManyItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 100; ++i) { - AddAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); + addAndVerify(key_, "item_" + std::to_string(i), 1000, 4, 500, 2, i + 1); } } @@ -369,17 +369,17 @@ TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { } ASSERT_TRUE(full) << "Small filter should eventually become full"; - VerifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); + verifyMetadata(key_, small_capacity, 2, 500, 0, added_count, 1, 0); } TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { - ReserveAndVerify(key_, 1000, 4, 500, 2); + reserveAndVerify(key_, 1000, 4, 500, 2); std::vector items = { "", std::string(10000, 'x'), std::string("\x00\x01\x02\xFF\xFE", 5), }; for (size_t i = 0; i < items.size(); ++i) { - AddAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); + addAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } } From da03915b2c4ecc07b27df2858eb43262607b721b Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 02:10:33 +0800 Subject: [PATCH 15/34] feat: CuckooPageSet --- src/commands/cmd_cuckoo_filter.cc | 3 +- src/storage/redis_metadata.cc | 4 +- src/storage/redis_metadata.h | 8 +- src/types/cuckoo_filter.h | 9 +- src/types/cuckoo_filter_page.cc | 259 ++++++++++++++ src/types/cuckoo_filter_page.h | 91 +++++ src/types/redis_cuckoo_chain.cc | 174 +++------ src/types/redis_cuckoo_chain.h | 11 +- .../cppunit/types/cuckoo_filter_page_test.cc | 337 ++++++++++++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 220 +++++++++++- 10 files changed, 956 insertions(+), 160 deletions(-) create mode 100644 src/types/cuckoo_filter_page.cc create mode 100644 src/types/cuckoo_filter_page.h create mode 100644 tests/cppunit/types/cuckoo_filter_page_test.cc diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index 6bbe5cb2524..2002912f94d 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -84,7 +84,8 @@ class CommandCFReserve : public Commander { Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); - auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_, + kCuckooFilterDefaultPageSize); if (!s.ok()) { if (s.IsInvalidArgument()) { diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index f9c91034e86..027ae06b456 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -654,6 +654,7 @@ void CuckooChainMetadata::Encode(std::string *dst) const { PutFixed8(dst, bucket_size); PutFixed16(dst, max_iterations); PutFixed64(dst, num_deleted_items); + PutFixed32(dst, page_size); } rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { @@ -661,7 +662,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { return s; } - if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint8_t)) { + if (input->size() < sizeof(uint16_t) * 3 + sizeof(uint64_t) * 2 + sizeof(uint32_t) + sizeof(uint8_t)) { return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); } @@ -671,6 +672,7 @@ rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { GetFixed8(input, &bucket_size); GetFixed16(input, &max_iterations); GetFixed64(input, &num_deleted_items); + GetFixed32(input, &page_size); return rocksdb::Status::OK(); } diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 4503fbf444c..e4658336ebf 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -335,6 +335,8 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +constexpr uint32_t kCuckooFilterDefaultPageSize = 2048; + class CuckooChainMetadata : public Metadata { public: /// The number of sub-filters in the chain @@ -356,6 +358,9 @@ class CuckooChainMetadata : public Metadata { /// Track number of deleted items for maintenance uint64_t num_deleted_items; + /// Target maximum payload size for each persisted Cuckoo Filter page + uint32_t page_size; + explicit CuckooChainMetadata(bool generate_version = true) : Metadata(kRedisCuckooFilter, generate_version), n_filters(0), @@ -363,7 +368,8 @@ class CuckooChainMetadata : public Metadata { base_capacity(0), bucket_size(0), max_iterations(0), - num_deleted_items(0) {} + num_deleted_items(0), + page_size(kCuckooFilterDefaultPageSize) {} void Encode(std::string *dst) const override; using Metadata::Decode; diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 3d0688491ef..7e5fbef40cd 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -34,8 +34,8 @@ namespace redis { // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. -// This is a bucket-based storage implementation where each bucket is stored -// as an independent key-value pair in RocksDB +// Buckets are grouped into page values in RocksDB. The Cuckoo algorithm still +// works with logical bucket indexes, while the storage layer maps buckets to pages. // // Hash calculation follows RedisBloom's design: // - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) @@ -85,13 +85,10 @@ class CuckooFilter { return hash ^ (static_cast(fingerprint) * 0x5bd1e995); } - // Legacy function for backward compatibility with tests - // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index + // Calculate an alternate bucket from a bucket index and fingerprint. static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { - // Treat bucket_idx as a hash value for the calculation uint64_t hash = bucket_idx; uint64_t alt_hash = GetAltHash(fingerprint, hash); - // Convert back to bucket index return static_cast(alt_hash % num_buckets); } diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc new file mode 100644 index 00000000000..543506fa97e --- /dev/null +++ b/src/types/cuckoo_filter_page.cc @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "cuckoo_filter_page.h" + +#include + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" + +namespace redis { + +namespace { + +uint32_t GetBucketsPerPage(uint32_t page_size, uint8_t bucket_size) { + return std::max(1, page_size / bucket_size); +} + +uint32_t GetPageIndex(uint32_t bucket_index, uint32_t buckets_per_page) { return bucket_index / buckets_per_page; } + +uint32_t GetBucketOffset(uint32_t bucket_index, uint32_t buckets_per_page, uint8_t bucket_size) { + return (bucket_index % buckets_per_page) * bucket_size; +} + +uint32_t GetPageValueSize(uint32_t page_index, uint32_t num_buckets, uint32_t buckets_per_page, uint8_t bucket_size) { + uint32_t first_bucket = page_index * buckets_per_page; + uint32_t page_bucket_count = std::min(buckets_per_page, num_buckets - first_bucket); + return page_bucket_count * bucket_size; +} + +std::string GetCuckooPageKey(const Slice &ns_key, const CuckooChainMetadata &metadata, bool slot_id_encoded, + uint16_t filter_index, uint32_t page_index) { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(ns_key, sub_key, metadata.version, slot_id_encoded).Encode(); +} + +} // namespace + +CuckooPageSet::CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded) + : storage_(storage), + ctx_(ctx), + ns_key_(ns_key.ToString()), + metadata_(metadata), + slot_id_encoded_(slot_id_encoded) {} + +rocksdb::Status CuckooPageSet::TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, + uint32_t bucket1_index, uint32_t bucket2_index, + uint8_t fingerprint, bool *inserted) { + *inserted = false; + BucketRef bucket1, bucket2; + auto s = EnsureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); + if (!s.ok()) return s; + + size_t slot_idx = 0; + if (TryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { + *inserted = true; + return rocksdb::Status::OK(); + } + if (bucket1_index != bucket2_index && TryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { + *inserted = true; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint8_t fingerprint, bool *inserted) { + *inserted = false; + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + + size_t slot_idx = 0; + *inserted = TryInsertInBucketRef(bucket, fingerprint, &slot_idx); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t *fingerprint) { + if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + *fingerprint = GetBucketRefSlot(bucket, slot_idx); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t fingerprint) { + if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + + BucketRef bucket; + auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + if (!s.ok()) return s; + SetBucketRefSlot(bucket, slot_idx, fingerprint); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batch) { + for (const auto &entry : pages_) { + if (!entry.second.is_dirty) continue; + auto s = batch->Put(entry.first, entry.second.data); + if (!s.ok()) return s; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketLocation *location) const { + if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { + return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); + } + + uint32_t buckets_per_page = GetBucketsPerPage(metadata_.page_size, metadata_.bucket_size); + uint32_t page_index = GetPageIndex(bucket_index, buckets_per_page); + location->page_key = GetCuckooPageKey(ns_key_, metadata_, slot_id_encoded_, filter_index, page_index); + location->offset = GetBucketOffset(bucket_index, buckets_per_page, metadata_.bucket_size); + location->expected_page_size = GetPageValueSize(page_index, num_buckets, buckets_per_page, metadata_.bucket_size); + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketRef *bucket) { + BucketLocation location; + auto s = ResolveBucketLocation(filter_index, num_buckets, bucket_index, &location); + if (!s.ok()) return s; + + PageEntry *page = nullptr; + s = LoadPage(location, &page); + if (!s.ok()) return s; + + bucket->page = page; + bucket->offset = location.offset; + bucket->size = metadata_.bucket_size; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, + uint32_t bucket1_index, uint32_t bucket2_index, + BucketRef *bucket1, BucketRef *bucket2) { + BucketLocation location1, location2; + auto s = ResolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); + if (!s.ok()) return s; + s = ResolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); + if (!s.ok()) return s; + + std::vector missing_locations; + if (pages_.find(location1.page_key) == pages_.end()) missing_locations.push_back(location1); + if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { + missing_locations.push_back(location2); + } + s = LoadPages(missing_locations); + if (!s.ok()) return s; + + bucket1->page = &pages_.at(location1.page_key); + bucket1->offset = location1.offset; + bucket1->size = metadata_.bucket_size; + bucket2->page = &pages_.at(location2.page_key); + bucket2->offset = location2.offset; + bucket2->size = metadata_.bucket_size; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntry **page) { + auto iter = pages_.find(location.page_key); + if (iter != pages_.end()) { + *page = &iter->second; + return rocksdb::Status::OK(); + } + + PageEntry page_entry; + auto s = storage_->Get(ctx_, ctx_.GetReadOptions(), location.page_key, &page_entry.data); + if (!s.ok() && !s.IsNotFound()) return s; + s = NormalizePage(s, location.expected_page_size, &page_entry); + if (!s.ok()) return s; + + auto result = pages_.emplace(location.page_key, std::move(page_entry)); + *page = &result.first->second; + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::LoadPages(const std::vector &locations) { + if (locations.empty()) return rocksdb::Status::OK(); + if (locations.size() == 1) { + PageEntry *page = nullptr; + return LoadPage(locations[0], &page); + } + + std::vector keys; + keys.reserve(locations.size()); + for (const auto &location : locations) keys.emplace_back(location.page_key); + + std::vector values(locations.size()); + std::vector statuses(locations.size()); + storage_->MultiGet(ctx_, ctx_.DefaultMultiGetOptions(), storage_->GetDB()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data()); + + for (size_t i = 0; i < locations.size(); ++i) { + PageEntry page_entry; + if (statuses[i].ok()) page_entry.data.assign(values[i].data(), values[i].size()); + auto s = NormalizePage(statuses[i], locations[i].expected_page_size, &page_entry); + if (!s.ok()) return s; + pages_.emplace(locations[i].page_key, std::move(page_entry)); + } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint32_t expected_size, + PageEntry *page) const { + if (!status.ok() && !status.IsNotFound()) return status; + if (status.IsNotFound()) page->data.clear(); + if (page->data.size() > expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); + if (page->data.size() < expected_size) page->data.resize(expected_size, 0); + return rocksdb::Status::OK(); +} + +bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { + for (size_t i = 0; i < bucket.size; ++i) { + size_t offset = bucket.offset + i; + if (static_cast(bucket.page->data[offset]) == 0) { + bucket.page->data[offset] = static_cast(fingerprint); + bucket.page->is_dirty = true; + *slot_idx = i; + return true; + } + } + return false; +} + +uint8_t CuckooPageSet::GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { + return static_cast(bucket.page->data[bucket.offset + slot_idx]); +} + +void CuckooPageSet::SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { + bucket.page->data[bucket.offset + slot_idx] = static_cast(fingerprint); + bucket.page->is_dirty = true; +} + +} // namespace redis diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h new file mode 100644 index 00000000000..17627eef0ed --- /dev/null +++ b/src/types/cuckoo_filter_page.h @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "storage/redis_metadata.h" +#include "storage/storage.h" + +namespace redis { + +class CuckooPageSet { + public: + CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded); + + rocksdb::Status TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index, uint8_t fingerprint, bool *inserted); + rocksdb::Status TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint8_t fingerprint, bool *inserted); + rocksdb::Status GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, + uint8_t *fingerprint); + rocksdb::Status SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, + uint8_t fingerprint); + rocksdb::Status WriteBackDirtyPages(rocksdb::WriteBatchBase *batch); + + private: + struct PageEntry { + std::string data; + bool is_dirty = false; + }; + + struct BucketRef { + PageEntry *page = nullptr; + uint32_t offset = 0; + uint8_t size = 0; + }; + + struct BucketLocation { + std::string page_key; + uint32_t offset = 0; + uint32_t expected_page_size = 0; + }; + + rocksdb::Status ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketLocation *location) const; + rocksdb::Status EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketRef *bucket); + rocksdb::Status EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); + rocksdb::Status LoadPage(const BucketLocation &location, PageEntry **page); + rocksdb::Status LoadPages(const std::vector &locations); + rocksdb::Status NormalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; + + bool TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); + uint8_t GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; + void SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); + + engine::Storage *storage_ = nullptr; + engine::Context &ctx_; + std::string ns_key_; + const CuckooChainMetadata &metadata_; + bool slot_id_encoded_ = false; + std::unordered_map pages_; +}; + +} // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0e817bea5cd..0cfc76b7000 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -21,9 +21,9 @@ #include "redis_cuckoo_chain.h" #include -#include #include "cuckoo_filter.h" +#include "cuckoo_filter_page.h" #include "logging.h" namespace redis { @@ -46,26 +46,18 @@ rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadat if (metadata.max_iterations == 0) { return rocksdb::Status::Corruption("invalid metadata: max_iterations is 0"); } + if (metadata.page_size < metadata.bucket_size) { + return rocksdb::Status::Corruption("invalid metadata: page_size is smaller than bucket_size"); + } if (!CuckooFilter::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: base_capacity is too large"); } return rocksdb::Status::OK(); } -std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t bucket_index) { - // Create a sub-key that includes both filter index and bucket index - std::string sub_key; - PutFixed16(&sub_key, filter_index); - PutFixed32(&sub_key, bucket_index); - - // Create the internal key using the storage encoding - std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); - return bucket_key; -} - rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, - uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion) { + uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, + uint32_t page_size) { if (capacity == 0) { return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } @@ -83,6 +75,12 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (max_iterations == 0) { return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); } + if (page_size == 0) { + return rocksdb::Status::InvalidArgument("page_size must be larger than 0"); + } + if (page_size < bucket_size) { + return rocksdb::Status::InvalidArgument("page_size must be at least bucket_size"); + } if (expansion > kCFMaxExpansion) { return rocksdb::Status::InvalidArgument("expansion must be between 0 and 32768"); } @@ -108,14 +106,17 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key metadata.expansion = expansion; metadata.n_filters = 1; metadata.num_deleted_items = 0; + metadata.page_size = page_size; // Calculate the number of buckets needed for this filter uint32_t num_buckets = 0; s = CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); if (!s.ok()) return s; - INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", - capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); + INFO( + "Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}, " + "page_size={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion), page_size); // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); @@ -128,12 +129,8 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); if (!s.ok()) return s; - // Note: With bucket-based storage, we don't pre-allocate all buckets - // Buckets will be created lazily on first write - // This saves memory for sparse filters - - // Optionally, we could create the first few buckets to ensure the filter is ready - // But for now, we'll keep it fully lazy for maximum memory efficiency + // Pages are created lazily on first write. Reserve only persists metadata so sparse filters don't preallocate page + // values that may never be used. return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } @@ -149,34 +146,6 @@ static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, return true; } -// Helper function: try to find empty slot in a bucket and insert fingerprint -static bool TryInsertInBucket(std::string &bucket_data, uint8_t bucket_size, uint8_t fingerprint, size_t *slot_idx) { - for (size_t i = 0; i < bucket_size; ++i) { - if (static_cast(bucket_data[i]) == 0) { - bucket_data[i] = static_cast(fingerprint); - *slot_idx = i; - return true; - } - } - return false; -} - -// Helper function: read bucket from storage and ensure correct size -static rocksdb::Status ReadBucket(engine::Storage *storage, engine::Context &ctx, const std::string &bucket_key, - uint8_t bucket_size, std::string *bucket_data) { - auto s = storage->Get(ctx, ctx.GetReadOptions(), bucket_key, bucket_data); - if (!s.ok() && !s.IsNotFound()) { - return s; - } - if (s.IsNotFound()) { - bucket_data->clear(); - } - if (bucket_data->size() < bucket_size) { - bucket_data->resize(bucket_size, 0); - } - return rocksdb::Status::OK(); -} - rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); @@ -193,6 +162,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co metadata.expansion = kCFDefaultExpansion; metadata.n_filters = 1; metadata.num_deleted_items = 0; + metadata.page_size = kCuckooFilterDefaultPageSize; } if (!s.ok() && !s.IsNotFound()) return s; @@ -220,37 +190,18 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); uint32_t bucket2_idx = alt_hash % num_buckets; - // Read both buckets using helper function - std::string bucket1_key = getBucketKey(ns_key, metadata, filter_idx, bucket1_idx); - std::string bucket2_key = getBucketKey(ns_key, metadata, filter_idx, bucket2_idx); - - std::string bucket1_data, bucket2_data; - s = ReadBucket(storage_, ctx, bucket1_key, metadata.bucket_size, &bucket1_data); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + s = pages.TryInsertInCandidateBuckets(filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, &inserted); if (!s.ok()) return s; - s = ReadBucket(storage_, ctx, bucket2_key, metadata.bucket_size, &bucket2_data); - if (!s.ok()) return s; - - // Try simple insertion in bucket1 or bucket2 - size_t slot_idx = 0; - std::string *target_bucket_data = nullptr; - std::string target_bucket_key; - - if (TryInsertInBucket(bucket1_data, metadata.bucket_size, fingerprint, &slot_idx)) { - target_bucket_data = &bucket1_data; - target_bucket_key = bucket1_key; - } else if (TryInsertInBucket(bucket2_data, metadata.bucket_size, fingerprint, &slot_idx)) { - target_bucket_data = &bucket2_data; - target_bucket_key = bucket2_key; - } - - if (target_bucket_data != nullptr) { + if (inserted) { // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = batch->Put(target_bucket_key, *target_bucket_data); + s = pages.WriteBackDirtyPages(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -279,20 +230,13 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!s.ok()) return s; bool inserted = false; - std::unordered_map modified_buckets; - s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, - &modified_buckets); + auto batch = storage_->GetWriteBatchBase(); + s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, batch.Get()); if (s.ok() && inserted) { - auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - for (const auto &entry : modified_buckets) { - s = batch->Put(entry.first, entry.second); - if (!s.ok()) return s; - } - metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); @@ -325,17 +269,16 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!s.ok()) return s; uint32_t bucket1_idx = hash % new_num_buckets; - std::string bucket1_key = getBucketKey(ns_key, metadata, new_filter_idx, bucket1_idx); - - // Insert into first slot of the new filter's first bucket - std::string bucket1_data(metadata.bucket_size, 0); - bucket1_data[0] = static_cast(fingerprint); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + s = pages.TryInsertInBucket(new_filter_idx, new_num_buckets, bucket1_idx, fingerprint, &inserted); + if (!s.ok()) return s; + if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = batch->Put(bucket1_key, bucket1_data); + s = pages.WriteBackDirtyPages(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -359,9 +302,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted, - std::unordered_map *modified_buckets) { + rocksdb::WriteBatchBase *batch) { *inserted = false; - modified_buckets->clear(); + CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); // Start from bucket1 uint32_t current_bucket_idx = hash % num_buckets; @@ -370,22 +313,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns // Try to kick out existing fingerprints (all operations in memory) for (uint16_t iteration = 0; iteration < metadata.max_iterations; ++iteration) { - // Read current bucket (check modified_buckets cache first) - std::string bucket_key = getBucketKey(ns_key, metadata, filter_index, current_bucket_idx); - std::string bucket_data; - - auto cached = modified_buckets->find(bucket_key); - if (cached != modified_buckets->end()) { - bucket_data = cached->second; - } else { - auto s = ReadBucket(storage_, ctx, bucket_key, metadata.bucket_size, &bucket_data); - if (!s.ok()) return s; - } - // Swap fingerprint with victim slot - auto old_fp = static_cast(bucket_data[victim_slot]); - bucket_data[victim_slot] = static_cast(current_fp); - (*modified_buckets)[bucket_key] = bucket_data; + uint8_t old_fp = 0; + auto s = pages.GetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, &old_fp); + if (!s.ok()) return s; + s = pages.SetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, current_fp); + if (!s.ok()) return s; current_fp = old_fp; // If kicked-out fingerprint is 0 (empty), we successfully inserted @@ -394,30 +327,12 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns break; } - // Calculate alternate bucket for the kicked-out fingerprint - // CRITICAL FIX: Use XOR hash calculation correctly - // We need to reconstruct the hash for the alternate bucket - // Since h2 = h1 ^ (fp * 0x5bd1e995), and we know bucket_idx = h1 % num_buckets - // We calculate alt_hash using the fingerprint and current bucket index - uint64_t current_hash = current_bucket_idx; // Approximate hash from bucket index - uint64_t alt_hash = CuckooFilter::GetAltHash(current_fp, current_hash); - uint32_t alt_bucket_idx = alt_hash % num_buckets; - - // Check if alternate bucket has empty slot - std::string alt_bucket_key = getBucketKey(ns_key, metadata, filter_index, alt_bucket_idx); - std::string alt_bucket_data; - - cached = modified_buckets->find(alt_bucket_key); - if (cached != modified_buckets->end()) { - alt_bucket_data = cached->second; - } else { - auto s = ReadBucket(storage_, ctx, alt_bucket_key, metadata.bucket_size, &alt_bucket_data); - if (!s.ok()) return s; - } + uint32_t alt_bucket_idx = CuckooFilter::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets); - size_t empty_slot = 0; - if (TryInsertInBucket(alt_bucket_data, metadata.bucket_size, current_fp, &empty_slot)) { - (*modified_buckets)[alt_bucket_key] = alt_bucket_data; + bool inserted_in_alt_bucket = false; + s = pages.TryInsertInBucket(filter_index, num_buckets, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); + if (!s.ok()) return s; + if (inserted_in_alt_bucket) { *inserted = true; break; } @@ -427,6 +342,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns victim_slot = (victim_slot + 1) % metadata.bucket_size; } + if (*inserted) return pages.WriteBackDirtyPages(batch); return rocksdb::Status::OK(); } diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 2e02c13fbb7..2f469d2be9d 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -20,8 +20,6 @@ #pragma once -#include - #include "cuckoo_filter.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" @@ -41,7 +39,7 @@ class CuckooChain : public Database { // CF.RESERVE command - creates a new cuckoo filter with specified capacity rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, - uint16_t max_iterations, uint16_t expansion); + uint16_t max_iterations, uint16_t expansion, uint32_t page_size); // CF.ADD command - adds an item to the cuckoo filter. // Duplicate items are allowed, so added is true whenever insertion succeeds. @@ -53,15 +51,10 @@ class CuckooChain : public Database { static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); - // Generate key for a specific bucket in bucket-based storage - // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} - std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t bucket_index); - // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted, std::unordered_map *modified_buckets); + bool *inserted, rocksdb::WriteBatchBase *batch); }; } // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc new file mode 100644 index 00000000000..a09b0f6e35f --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "types/cuckoo_filter_page.h" + +#include + +#include +#include + +#include "common/encoding.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooPageSetTest : public TestBase { + public: + RedisCuckooPageSetTest(const RedisCuckooPageSetTest &) = delete; + RedisCuckooPageSetTest &operator=(const RedisCuckooPageSetTest &) = delete; + RedisCuckooPageSetTest(RedisCuckooPageSetTest &&) = delete; + RedisCuckooPageSetTest &operator=(RedisCuckooPageSetTest &&) = delete; + + protected: + explicit RedisCuckooPageSetTest() : TestBase(), db_(std::make_unique(storage_.get(), "cuckoo_ns")) {} + + ~RedisCuckooPageSetTest() override { db_.reset(); } + + void SetUp() override { + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + ns_key_ = db_->AppendNamespacePrefix(std::string("cf_page_test_") + test_info->name()); + } + + static CuckooChainMetadata MakeMetadata(uint8_t bucket_size, uint64_t version = 1, + uint32_t page_size = kCuckooFilterDefaultPageSize) { + CuckooChainMetadata metadata(false); + metadata.version = version; + metadata.size = 0; + metadata.base_capacity = 2; + metadata.bucket_size = bucket_size; + metadata.max_iterations = 500; + metadata.expansion = 0; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + metadata.page_size = page_size; + return metadata; + } + + std::string MakePageKey(const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t page_index) const { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(ns_key_, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + } + + rocksdb::Status ReadPage(const std::string &page_key, std::string *value) { + return storage_->Get(*ctx_, ctx_->GetReadOptions(), storage_->GetCFHandle(ColumnFamilyID::PrimarySubkey), page_key, + value); + } + + void WritePage(const std::string &page_key, const std::string &value) { + auto batch = storage_->GetWriteBatchBase(); + auto s = batch->Put(page_key, value); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + void CommitBatch(rocksdb::WriteBatchBase *batch) { + auto s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + std::unique_ptr db_; + std::string ns_key_; +}; + +TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 1, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(page.substr(0, 4), std::string(4, 0)); + EXPECT_EQ(static_cast(page[4]), 11); + EXPECT_EQ(page.substr(5, 3), std::string(3, 0)); +} + +TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 513, 512, 33, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 4); + EXPECT_EQ(static_cast(page[0]), 33); + EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); + + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { + auto metadata = MakeMetadata(2); + WritePage(MakePageKey(metadata, 0, 0), std::string{static_cast(11), static_cast(12), 0, 0, 0, 0, 0, 0}); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInCandidateBuckets(0, 4, 0, 1, 22, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 11); + EXPECT_EQ(static_cast(page[1]), 12); + EXPECT_EQ(static_cast(page[2]), 22); + EXPECT_EQ(static_cast(page[3]), 0); + EXPECT_EQ(page.substr(4), std::string(4, 0)); +} + +TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnDifferentPagesWriteBothPages) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + auto s = pages.SetBucketSlot(0, 1025, 0, 0, 44); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = pages.SetBucketSlot(0, 1025, 512, 0, 55); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), metadata.page_size); + EXPECT_EQ(static_cast(page[0]), 44); + + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), metadata.page_size); + EXPECT_EQ(static_cast(page[0]), 55); +} + +TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { + auto metadata = MakeMetadata(4, 1, 8); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 3, 2, 66, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); + + s = ReadPage(MakePageKey(metadata, 0, 1), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 4); + EXPECT_EQ(static_cast(page[0]), 66); + EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); +} + +TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { + auto metadata = MakeMetadata(4); + std::string expected(16, static_cast(7)); + WritePage(MakePageKey(metadata, 0, 0), expected); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + auto s = pages.SetBucketSlot(0, 4, 2, 1, 88); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + expected[9] = static_cast(88); + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, expected); +} + +TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { + auto metadata = MakeMetadata(4); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 4, &fingerprint); + EXPECT_TRUE(s.IsInvalidArgument()) << s.ToString(); + + s = pages.SetBucketSlot(0, 2, 0, 4, 11); + EXPECT_TRUE(s.IsInvalidArgument()) << s.ToString(); + + bool inserted = true; + s = pages.TryInsertInBucket(0, 2, 2, 11, &inserted); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + s = pages.TryInsertInBucket(0, 0, 0, 11, &inserted); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { + auto metadata = MakeMetadata(4); + WritePage(MakePageKey(metadata, 0, 0), std::string(9, static_cast(1))); + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + std::string page; + s = ReadPage(MakePageKey(metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, std::string(9, static_cast(1))); +} + +TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { + auto metadata = MakeMetadata(4); + { + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + } + + std::string page; + auto s = ReadPage(MakePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { + auto old_metadata = MakeMetadata(4, 100); + auto new_metadata = MakeMetadata(4, 101); + { + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + } + + redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(fingerprint, 0); + s = pages.SetBucketSlot(0, 2, 0, 0, 22); + ASSERT_TRUE(s.ok()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + CommitBatch(batch.Get()); + + std::string page; + s = ReadPage(MakePageKey(old_metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 11); + + s = ReadPage(MakePageKey(new_metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), 8); + EXPECT_EQ(static_cast(page[0]), 22); +} diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 17dbe28e520..c00c15f41c9 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -20,13 +20,16 @@ #include +#include #include #include #include +#include "common/encoding.h" #include "storage/redis_db.h" #include "storage/redis_metadata.h" #include "test_base.h" +#include "types/cuckoo_filter_page.h" #include "types/redis_cuckoo_chain.h" class RedisCuckooFilterTest : public TestBase { @@ -53,7 +56,8 @@ class RedisCuckooFilterTest : public TestBase { } void verifyMetadata(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, - uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0) { + uint16_t expansion, uint64_t size, uint16_t n_filters, uint64_t num_deleted_items = 0, + uint32_t page_size = kCuckooFilterDefaultPageSize) { std::string ns_key = db_->AppendNamespacePrefix(key); CuckooChainMetadata metadata(false); auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); @@ -66,13 +70,14 @@ class RedisCuckooFilterTest : public TestBase { EXPECT_EQ(metadata.size, size) << key; EXPECT_EQ(metadata.n_filters, n_filters) << key; EXPECT_EQ(metadata.num_deleted_items, num_deleted_items) << key; + EXPECT_EQ(metadata.page_size, page_size) << key; } void reserveAndVerify(const std::string &key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, - uint16_t expansion) { - auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion); + uint16_t expansion, uint32_t page_size = kCuckooFilterDefaultPageSize) { + auto s = cuckoo_->Reserve(*ctx_, key, capacity, bucket_size, max_iterations, expansion, page_size); ASSERT_TRUE(s.ok()) << key << ": " << s.ToString(); - verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0); + verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, 0, 1, 0, page_size); } void addAndVerify(const std::string &key, const std::string &item, uint64_t capacity, uint8_t bucket_size, @@ -84,6 +89,28 @@ class RedisCuckooFilterTest : public TestBase { verifyMetadata(key, capacity, bucket_size, max_iterations, expansion, expected_size, n_filters, 0); } + CuckooChainMetadata getMetadata(const std::string &key) { + std::string ns_key = db_->AppendNamespacePrefix(key); + CuckooChainMetadata metadata(false); + auto s = db_->GetMetadata(*ctx_, {kRedisCuckooFilter}, ns_key, &metadata); + EXPECT_TRUE(s.ok()) << key << ": metadata not found"; + return metadata; + } + + std::string makePageKey(const std::string &key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t page_index) { + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, page_index); + return InternalKey(db_->AppendNamespacePrefix(key), sub_key, metadata.version, storage_->IsSlotIdEncoded()) + .Encode(); + } + + rocksdb::Status readPage(const std::string &page_key, std::string *value) { + return storage_->Get(*ctx_, ctx_->GetReadOptions(), storage_->GetCFHandle(ColumnFamilyID::PrimarySubkey), page_key, + value); + } + std::unique_ptr cuckoo_; std::unique_ptr db_; std::string key_; @@ -106,11 +133,19 @@ TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { {"zero_max_iterations", 1000, 4, 0, 2, "max_iterations must be larger than 0"}, {"capacity_too_large", std::numeric_limits::max(), 4, 500, 2, "capacity is too large"}, {"expansion_too_large", 1000, 4, 500, redis::kCFMaxExpansion + 1, "expansion must be between 0 and 32768"}, + {"zero_page_size", 1000, 4, 500, 2, "page_size must be larger than 0"}, + {"page_size_smaller_than_bucket", 1000, 4, 500, 2, "page_size must be at least bucket_size"}, }; for (const auto &test_case : invalid_test_cases) { + uint32_t page_size = kCuckooFilterDefaultPageSize; + if (test_case.key == "zero_page_size") { + page_size = 0; + } else if (test_case.key == "page_size_smaller_than_bucket") { + page_size = test_case.bucket_size - 1; + } auto s = cuckoo_->Reserve(*ctx_, test_case.key, test_case.capacity, test_case.bucket_size, test_case.max_iterations, - test_case.expansion); + test_case.expansion, page_size); ASSERT_FALSE(s.ok()) << test_case.key; ASSERT_TRUE(s.IsInvalidArgument()) << test_case.key << ": " << s.ToString(); ASSERT_NE(s.ToString().find(test_case.err), std::string::npos) << test_case.key << ": " << s.ToString(); @@ -163,11 +198,11 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { // First reserve should succeed - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()); // Second reserve with same key should fail - s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -280,31 +315,84 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint16_t expansion = 2; // Create the filter - auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + auto s = + cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()) << "First reserve should succeed"; // Verify metadata was stored by trying to reserve again with same key // This should fail with "already exists" error - s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys - s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion, + kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; // Verify the original key still exists (can't create it again) - s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Original key should still exist"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } +TEST_F(RedisCuckooFilterTest, ReservePersistsPageSize) { + constexpr uint32_t page_size = 4096; + reserveAndVerify(key_, 1000, 4, 500, 2, page_size); +} + TEST_F(RedisCuckooFilterTest, AddBasic) { reserveAndVerify(key_, 1000, 4, 500, 2); addAndVerify(key_, "item1", 1000, 4, 500, 2, 1); } +TEST_F(RedisCuckooFilterTest, ReserveDoesNotPreallocatePages) { + reserveAndVerify(key_, 1000, 4, 500, 2); + + auto metadata = getMetadata(key_); + std::string page; + auto s = readPage(makePageKey(key_, metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { + constexpr uint64_t capacity = 1000; + constexpr uint8_t bucket_size = 4; + reserveAndVerify(key_, capacity, bucket_size, 500, 2); + + const std::string item = "item1"; + addAndVerify(key_, item, capacity, bucket_size, 500, 2, 1); + + auto metadata = getMetadata(key_); + uint32_t num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto hash = redis::CuckooFilter::Hash(item); + auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); + auto bucket1_idx = static_cast(hash % num_buckets); + auto bucket2_idx = static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets); + auto buckets_per_page = metadata.page_size / bucket_size; + auto page_index = bucket1_idx / buckets_per_page; + auto page_size = std::min(buckets_per_page, num_buckets - page_index * buckets_per_page) * bucket_size; + + std::string page; + s = readPage(makePageKey(key_, metadata, 0, page_index), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), page_size); + + auto bucket1_offset = (bucket1_idx % buckets_per_page) * bucket_size; + auto bucket2_offset = (bucket2_idx % buckets_per_page) * bucket_size; + bool found = false; + for (uint32_t i = 0; i < bucket_size; ++i) { + found = found || static_cast(page[bucket1_offset + i]) == fingerprint; + if (bucket2_idx / buckets_per_page == page_index) { + found = found || static_cast(page[bucket2_offset + i]) == fingerprint; + } + } + EXPECT_TRUE(found); +} + TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { std::string key = "nonexistent_key"; bool added = false; @@ -314,7 +402,7 @@ TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, redis::kCFDefaultExpansion, 1, 1, 0); - s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2); + s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -348,7 +436,7 @@ TEST_F(RedisCuckooFilterTest, AddManyItems) { TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0); + auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0, kCuckooFilterDefaultPageSize); ASSERT_TRUE(s.ok()); uint64_t added_count = 0; @@ -383,3 +471,109 @@ TEST_F(RedisCuckooFilterTest, AddEdgeCaseItems) { addAndVerify(key_, items[i], 1000, 4, 500, 2, i + 1); } } + +TEST_F(RedisCuckooFilterTest, KickOutSuccessWritesDirtyPages) { + constexpr uint64_t capacity = 2; + constexpr uint8_t bucket_size = 1; + constexpr uint32_t num_buckets = 4; + reserveAndVerify(key_, capacity, bucket_size, 1, 0); + + struct Candidate { + std::string item; + uint8_t fingerprint = 0; + uint32_t bucket1 = 0; + uint32_t bucket2 = 0; + }; + + std::vector candidates; + for (int i = 0; i < 10000; ++i) { + std::string item = "kick_item_" + std::to_string(i); + auto hash = redis::CuckooFilter::Hash(item); + auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); + candidates.push_back({item, fingerprint, static_cast(hash % num_buckets), + static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets)}); + } + + Candidate first; + Candidate second; + Candidate kicked; + uint32_t evicted_bucket = 0; + bool found = false; + for (const auto &candidate : candidates) { + if (candidate.bucket1 == candidate.bucket2) continue; + for (const auto &first_candidate : candidates) { + if (first_candidate.item == candidate.item || first_candidate.bucket1 != candidate.bucket1) continue; + auto alt_for_victim = + redis::CuckooFilter::GetAltBucketIndex(candidate.bucket1, first_candidate.fingerprint, num_buckets); + if (alt_for_victim == candidate.bucket1 || alt_for_victim == candidate.bucket2) continue; + + for (const auto &second_candidate : candidates) { + if (second_candidate.item == candidate.item || second_candidate.item == first_candidate.item || + second_candidate.bucket1 != candidate.bucket2) { + continue; + } + first = first_candidate; + second = second_candidate; + kicked = candidate; + evicted_bucket = alt_for_victim; + found = true; + break; + } + if (found) break; + } + if (found) break; + } + ASSERT_TRUE(found); + + addAndVerify(key_, first.item, capacity, bucket_size, 1, 0, 1); + addAndVerify(key_, second.item, capacity, bucket_size, 1, 0, 2); + addAndVerify(key_, kicked.item, capacity, bucket_size, 1, 0, 3); + + auto metadata = getMetadata(key_); + ASSERT_EQ(metadata.n_filters, 1); + ASSERT_EQ(metadata.size, 3); + + std::string page; + auto s = readPage(makePageKey(key_, metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(page.size(), num_buckets * bucket_size); + EXPECT_EQ(static_cast(page[kicked.bucket1]), kicked.fingerprint); + EXPECT_EQ(static_cast(page[kicked.bucket2]), second.fingerprint); + EXPECT_EQ(static_cast(page[evicted_bucket]), first.fingerprint); +} + +TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { + constexpr uint64_t capacity = 2; + constexpr uint8_t bucket_size = 1; + reserveAndVerify(key_, capacity, bucket_size, 1, 2); + + CuckooChainMetadata metadata(false); + uint64_t added_count = 0; + for (int i = 0; i < 100; ++i) { + bool added = false; + auto s = cuckoo_->Add(*ctx_, key_, "item_" + std::to_string(i), &added); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(added); + ++added_count; + + metadata = getMetadata(key_); + if (metadata.n_filters > 1) break; + } + + ASSERT_GT(metadata.n_filters, 1); + EXPECT_EQ(metadata.size, added_count); + + uint32_t num_buckets = 0; + uint64_t new_filter_capacity = capacity; + for (uint16_t i = 0; i < metadata.n_filters - 1; ++i) { + new_filter_capacity *= metadata.expansion; + } + auto s = redis::CuckooFilter::OptimalNumBuckets(new_filter_capacity, bucket_size, &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto expected_page_size = std::min(metadata.page_size / bucket_size, num_buckets) * bucket_size; + + std::string page; + s = readPage(makePageKey(key_, metadata, metadata.n_filters - 1, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page.size(), expected_page_size); +} From adf0ff042448c8224be0d28215fe542b3b8c263e Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 10:58:19 +0800 Subject: [PATCH 16/34] fix: lint --- src/types/cuckoo_filter_page.cc | 52 ++++++++++++++++----------------- src/types/cuckoo_filter_page.h | 18 ++++++------ src/types/redis_cuckoo_chain.cc | 4 +-- src/types/redis_cuckoo_chain.h | 2 +- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 543506fa97e..8efc2a9622c 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -68,15 +68,15 @@ rocksdb::Status CuckooPageSet::TryInsertInCandidateBuckets(uint16_t filter_index uint8_t fingerprint, bool *inserted) { *inserted = false; BucketRef bucket1, bucket2; - auto s = EnsureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); + auto s = ensureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); if (!s.ok()) return s; size_t slot_idx = 0; - if (TryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { + if (tryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { *inserted = true; return rocksdb::Status::OK(); } - if (bucket1_index != bucket2_index && TryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { + if (bucket1_index != bucket2_index && tryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { *inserted = true; } return rocksdb::Status::OK(); @@ -86,11 +86,11 @@ rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t uint8_t fingerprint, bool *inserted) { *inserted = false; BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; size_t slot_idx = 0; - *inserted = TryInsertInBucketRef(bucket, fingerprint, &slot_idx); + *inserted = tryInsertInBucketRef(bucket, fingerprint, &slot_idx); return rocksdb::Status::OK(); } @@ -99,9 +99,9 @@ rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; - *fingerprint = GetBucketRefSlot(bucket, slot_idx); + *fingerprint = getBucketRefSlot(bucket, slot_idx); return rocksdb::Status::OK(); } @@ -110,9 +110,9 @@ rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; - auto s = EnsureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); + auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); if (!s.ok()) return s; - SetBucketRefSlot(bucket, slot_idx, fingerprint); + setBucketRefSlot(bucket, slot_idx, fingerprint); return rocksdb::Status::OK(); } @@ -125,7 +125,7 @@ rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batc return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, +rocksdb::Status CuckooPageSet::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const { if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); @@ -139,14 +139,14 @@ rocksdb::Status CuckooPageSet::ResolveBucketLocation(uint16_t filter_index, uint return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, +rocksdb::Status CuckooPageSet::ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketRef *bucket) { BucketLocation location; - auto s = ResolveBucketLocation(filter_index, num_buckets, bucket_index, &location); + auto s = resolveBucketLocation(filter_index, num_buckets, bucket_index, &location); if (!s.ok()) return s; PageEntry *page = nullptr; - s = LoadPage(location, &page); + s = loadPage(location, &page); if (!s.ok()) return s; bucket->page = page; @@ -155,13 +155,13 @@ rocksdb::Status CuckooPageSet::EnsureBucketLoaded(uint16_t filter_index, uint32_ return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, +rocksdb::Status CuckooPageSet::ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2) { BucketLocation location1, location2; - auto s = ResolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); + auto s = resolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); if (!s.ok()) return s; - s = ResolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); + s = resolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); if (!s.ok()) return s; std::vector missing_locations; @@ -169,7 +169,7 @@ rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_inde if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { missing_locations.push_back(location2); } - s = LoadPages(missing_locations); + s = loadPages(missing_locations); if (!s.ok()) return s; bucket1->page = &pages_.at(location1.page_key); @@ -181,7 +181,7 @@ rocksdb::Status CuckooPageSet::EnsureCandidateBucketsLoaded(uint16_t filter_inde return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntry **page) { +rocksdb::Status CuckooPageSet::loadPage(const BucketLocation &location, PageEntry **page) { auto iter = pages_.find(location.page_key); if (iter != pages_.end()) { *page = &iter->second; @@ -191,7 +191,7 @@ rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntr PageEntry page_entry; auto s = storage_->Get(ctx_, ctx_.GetReadOptions(), location.page_key, &page_entry.data); if (!s.ok() && !s.IsNotFound()) return s; - s = NormalizePage(s, location.expected_page_size, &page_entry); + s = normalizePage(s, location.expected_page_size, &page_entry); if (!s.ok()) return s; auto result = pages_.emplace(location.page_key, std::move(page_entry)); @@ -199,11 +199,11 @@ rocksdb::Status CuckooPageSet::LoadPage(const BucketLocation &location, PageEntr return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::LoadPages(const std::vector &locations) { +rocksdb::Status CuckooPageSet::loadPages(const std::vector &locations) { if (locations.empty()) return rocksdb::Status::OK(); if (locations.size() == 1) { PageEntry *page = nullptr; - return LoadPage(locations[0], &page); + return loadPage(locations[0], &page); } std::vector keys; @@ -218,14 +218,14 @@ rocksdb::Status CuckooPageSet::LoadPages(const std::vector &loca for (size_t i = 0; i < locations.size(); ++i) { PageEntry page_entry; if (statuses[i].ok()) page_entry.data.assign(values[i].data(), values[i].size()); - auto s = NormalizePage(statuses[i], locations[i].expected_page_size, &page_entry); + auto s = normalizePage(statuses[i], locations[i].expected_page_size, &page_entry); if (!s.ok()) return s; pages_.emplace(locations[i].page_key, std::move(page_entry)); } return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint32_t expected_size, +rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const { if (!status.ok() && !status.IsNotFound()) return status; if (status.IsNotFound()) page->data.clear(); @@ -234,7 +234,7 @@ rocksdb::Status CuckooPageSet::NormalizePage(const rocksdb::Status &status, uint return rocksdb::Status::OK(); } -bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { +bool CuckooPageSet::tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { for (size_t i = 0; i < bucket.size; ++i) { size_t offset = bucket.offset + i; if (static_cast(bucket.page->data[offset]) == 0) { @@ -247,11 +247,11 @@ bool CuckooPageSet::TryInsertInBucketRef(const BucketRef &bucket, uint8_t finger return false; } -uint8_t CuckooPageSet::GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { +uint8_t CuckooPageSet::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { return static_cast(bucket.page->data[bucket.offset + slot_idx]); } -void CuckooPageSet::SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { +void CuckooPageSet::setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { bucket.page->data[bucket.offset + slot_idx] = static_cast(fingerprint); bucket.page->is_dirty = true; } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 17627eef0ed..d8473033203 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -66,19 +66,19 @@ class CuckooPageSet { uint32_t expected_page_size = 0; }; - rocksdb::Status ResolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + rocksdb::Status resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const; - rocksdb::Status EnsureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + rocksdb::Status ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketRef *bucket); - rocksdb::Status EnsureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + rocksdb::Status ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); - rocksdb::Status LoadPage(const BucketLocation &location, PageEntry **page); - rocksdb::Status LoadPages(const std::vector &locations); - rocksdb::Status NormalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; + rocksdb::Status loadPage(const BucketLocation &location, PageEntry **page); + rocksdb::Status loadPages(const std::vector &locations); + rocksdb::Status normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; - bool TryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); - uint8_t GetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; - void SetBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); + bool tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); + uint8_t getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; + void setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); engine::Storage *storage_ = nullptr; engine::Context &ctx_; diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0cfc76b7000..acd38a2b02e 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -33,7 +33,7 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } -rocksdb::Status CuckooChain::ValidateMetadata(const CuckooChainMetadata &metadata) { +rocksdb::Status CuckooChain::validateMetadata(const CuckooChainMetadata &metadata) { if (metadata.n_filters == 0) { return rocksdb::Status::Corruption("invalid metadata: n_filters is 0"); } @@ -166,7 +166,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co } if (!s.ok() && !s.IsNotFound()) return s; - s = ValidateMetadata(metadata); + s = validateMetadata(metadata); if (!s.ok()) return s; // Calculate hash and fingerprint for the item diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 2f469d2be9d..0289c8e160f 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -49,7 +49,7 @@ class CuckooChain : public Database { // Get metadata for the cuckoo filter rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); - static rocksdb::Status ValidateMetadata(const CuckooChainMetadata &metadata); + static rocksdb::Status validateMetadata(const CuckooChainMetadata &metadata); // Kick-out insertion: try to insert fingerprint by evicting existing ones rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, From cb91f43538fdf4af3cf115fdf80b7bb837956165 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Mon, 11 May 2026 11:50:00 +0800 Subject: [PATCH 17/34] optimize code --- src/types/redis_cuckoo_chain.cc | 11 +-- tests/cppunit/types/cuckoo_filter_test.cc | 85 +++++++++++++++++------ 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index acd38a2b02e..0e3c8570f79 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -173,11 +173,11 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); - // Try to insert in each sub-filter (starting from the first/smallest one) - // This follows RedisBloom's behavior and is more efficient - for (uint16_t filter_idx = 0; filter_idx < metadata.n_filters; ++filter_idx) { + // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. + for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { + uint16_t current_filter_idx = static_cast(filter_idx); uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_idx, &filter_capacity) || + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, current_filter_idx, &filter_capacity) || !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } @@ -192,7 +192,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; - s = pages.TryInsertInCandidateBuckets(filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, &inserted); + s = pages.TryInsertInCandidateBuckets(current_filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, + &inserted); if (!s.ok()) return s; if (inserted) { diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index c00c15f41c9..83d4d6de612 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -111,6 +111,17 @@ class RedisCuckooFilterTest : public TestBase { value); } + void writeMetadata(const std::string &key, const CuckooChainMetadata &metadata) { + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + auto batch = storage_->GetWriteBatchBase(); + auto s = + batch->Put(storage_->GetCFHandle(ColumnFamilyID::Metadata), db_->AppendNamespacePrefix(key), metadata_bytes); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + std::unique_ptr cuckoo_; std::unique_ptr db_; std::string key_; @@ -197,12 +208,10 @@ TEST_F(RedisCuckooFilterTest, ReserveValidParams) { } TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { - // First reserve should succeed - auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()); + reserveAndVerify(key_, 1000, 4, 500, 2); // Second reserve with same key should fail - s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); + auto s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -315,21 +324,18 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { uint16_t expansion = 2; // Create the filter - auto s = - cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + reserveAndVerify(key_, capacity, bucket_size, max_iterations, expansion); // Verify metadata was stored by trying to reserve again with same key // This should fail with "already exists" error - s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); + auto s = + cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys - s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion, - kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + reserveAndVerify("different_key", capacity, bucket_size, max_iterations, expansion); // Verify the original key still exists (can't create it again) s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion, kCuckooFilterDefaultPageSize); @@ -395,14 +401,10 @@ TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { TEST_F(RedisCuckooFilterTest, AddToNonExistentFilter) { std::string key = "nonexistent_key"; - bool added = false; - auto s = cuckoo_->Add(*ctx_, key, "item1", &added); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_TRUE(added) << "CF.ADD should create the filter when the key does not exist"; - verifyMetadata(key, redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, - redis::kCFDefaultExpansion, 1, 1, 0); + addAndVerify(key, "item1", redis::kCFDefaultCapacity, redis::kCFDefaultBucketSize, redis::kCFDefaultMaxIterations, + redis::kCFDefaultExpansion, 1); - s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); + auto s = cuckoo_->Reserve(*ctx_, key, 1000, 4, 500, 2, kCuckooFilterDefaultPageSize); ASSERT_FALSE(s.ok()); ASSERT_TRUE(s.IsInvalidArgument()); ASSERT_NE(s.ToString().find("already exists"), std::string::npos); @@ -427,6 +429,48 @@ TEST_F(RedisCuckooFilterTest, AddDuplicateItems) { } } +TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { + CuckooChainMetadata metadata; + metadata.size = 0; + metadata.base_capacity = 1000; + metadata.bucket_size = 4; + metadata.max_iterations = 500; + metadata.expansion = 2; + metadata.n_filters = 2; + metadata.num_deleted_items = 0; + metadata.page_size = kCuckooFilterDefaultPageSize; + writeMetadata(key_, metadata); + + const std::string item = "item"; + addAndVerify(key_, item, metadata.base_capacity, metadata.bucket_size, metadata.max_iterations, metadata.expansion, 1, + metadata.n_filters); + + auto stored_metadata = getMetadata(key_); + ASSERT_EQ(stored_metadata.n_filters, 2); + ASSERT_EQ(stored_metadata.size, 1); + + auto hash = redis::CuckooFilter::Hash(item); + uint32_t old_num_buckets = 0; + auto s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity, metadata.bucket_size, &old_num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto buckets_per_page = stored_metadata.page_size / stored_metadata.bucket_size; + auto old_page_idx = static_cast(hash % old_num_buckets) / buckets_per_page; + + std::string page; + s = readPage(makePageKey(key_, stored_metadata, 0, old_page_idx), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); + + uint32_t num_buckets = 0; + s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity * metadata.expansion, metadata.bucket_size, + &num_buckets); + ASSERT_TRUE(s.ok()) << s.ToString(); + auto bucket1_idx = static_cast(hash % num_buckets); + auto expected_page_idx = bucket1_idx / buckets_per_page; + + s = readPage(makePageKey(key_, stored_metadata, 1, expected_page_idx), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); +} + TEST_F(RedisCuckooFilterTest, AddManyItems) { reserveAndVerify(key_, 1000, 4, 500, 2); for (int i = 0; i < 100; ++i) { @@ -436,15 +480,14 @@ TEST_F(RedisCuckooFilterTest, AddManyItems) { TEST_F(RedisCuckooFilterTest, AddSmallFilterCapacity) { uint64_t small_capacity = 10; - auto s = cuckoo_->Reserve(*ctx_, key_, small_capacity, 2, 500, 0, kCuckooFilterDefaultPageSize); - ASSERT_TRUE(s.ok()); + reserveAndVerify(key_, small_capacity, 2, 500, 0); uint64_t added_count = 0; bool full = false; for (int i = 0; i < 100; ++i) { std::string item = "item_" + std::to_string(i); bool added = false; - s = cuckoo_->Add(*ctx_, key_, item, &added); + auto s = cuckoo_->Add(*ctx_, key_, item, &added); if (!s.ok()) { ASSERT_TRUE(s.IsAborted()) << "Should be Aborted status when full"; From d924d0a2bd30c959615e932b0d2d9d7b261eba43 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 12 May 2026 11:28:47 +0800 Subject: [PATCH 18/34] fix: ci --- src/types/cuckoo_filter_page.cc | 5 +- src/types/cuckoo_filter_page.h | 8 +- src/types/redis_cuckoo_chain.cc | 2 +- .../cppunit/types/cuckoo_filter_page_test.cc | 84 +++++++++---------- 4 files changed, 49 insertions(+), 50 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 8efc2a9622c..7fef71f5a70 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -225,8 +225,7 @@ rocksdb::Status CuckooPageSet::loadPages(const std::vector &loca return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint32_t expected_size, - PageEntry *page) const { +rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) { if (!status.ok() && !status.IsNotFound()) return status; if (status.IsNotFound()) page->data.clear(); if (page->data.size() > expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); @@ -247,7 +246,7 @@ bool CuckooPageSet::tryInsertInBucketRef(const BucketRef &bucket, uint8_t finger return false; } -uint8_t CuckooPageSet::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const { +uint8_t CuckooPageSet::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) { return static_cast(bucket.page->data[bucket.offset + slot_idx]); } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index d8473033203..9e06027685b 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -74,11 +74,11 @@ class CuckooPageSet { uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); rocksdb::Status loadPage(const BucketLocation &location, PageEntry **page); rocksdb::Status loadPages(const std::vector &locations); - rocksdb::Status normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) const; + static rocksdb::Status normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page); - bool tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); - uint8_t getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) const; - void setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); + static bool tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx); + static uint8_t getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx); + static void setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint); engine::Storage *storage_ = nullptr; engine::Context &ctx_; diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 0e3c8570f79..4d3803c85db 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -175,7 +175,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { - uint16_t current_filter_idx = static_cast(filter_idx); + auto current_filter_idx = static_cast(filter_idx); uint64_t filter_capacity = 0; if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, current_filter_idx, &filter_capacity) || !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index a09b0f6e35f..d1415fac291 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -47,7 +47,7 @@ class RedisCuckooPageSetTest : public TestBase { ns_key_ = db_->AppendNamespacePrefix(std::string("cf_page_test_") + test_info->name()); } - static CuckooChainMetadata MakeMetadata(uint8_t bucket_size, uint64_t version = 1, + static CuckooChainMetadata makeMetadata(uint8_t bucket_size, uint64_t version = 1, uint32_t page_size = kCuckooFilterDefaultPageSize) { CuckooChainMetadata metadata(false); metadata.version = version; @@ -62,19 +62,19 @@ class RedisCuckooPageSetTest : public TestBase { return metadata; } - std::string MakePageKey(const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t page_index) const { + std::string makePageKey(const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t page_index) const { std::string sub_key; PutFixed16(&sub_key, filter_index); PutFixed32(&sub_key, page_index); return InternalKey(ns_key_, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); } - rocksdb::Status ReadPage(const std::string &page_key, std::string *value) { + rocksdb::Status readPage(const std::string &page_key, std::string *value) { return storage_->Get(*ctx_, ctx_->GetReadOptions(), storage_->GetCFHandle(ColumnFamilyID::PrimarySubkey), page_key, value); } - void WritePage(const std::string &page_key, const std::string &value) { + void writePage(const std::string &page_key, const std::string &value) { auto batch = storage_->GetWriteBatchBase(); auto s = batch->Put(page_key, value); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -82,7 +82,7 @@ class RedisCuckooPageSetTest : public TestBase { ASSERT_TRUE(s.ok()) << s.ToString(); } - void CommitBatch(rocksdb::WriteBatchBase *batch) { + void commitBatch(rocksdb::WriteBatchBase *batch) { auto s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); ASSERT_TRUE(s.ok()) << s.ToString(); } @@ -92,7 +92,7 @@ class RedisCuckooPageSetTest : public TestBase { }; TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -103,10 +103,10 @@ TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 8); EXPECT_EQ(page.substr(0, 4), std::string(4, 0)); @@ -115,7 +115,7 @@ TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { } TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -126,22 +126,22 @@ TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 1), &page); + s = readPage(makePageKey(metadata, 0, 1), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 4); EXPECT_EQ(static_cast(page[0]), 33); EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { - auto metadata = MakeMetadata(2); - WritePage(MakePageKey(metadata, 0, 0), std::string{static_cast(11), static_cast(12), 0, 0, 0, 0, 0, 0}); + auto metadata = makeMetadata(2); + writePage(makePageKey(metadata, 0, 0), std::string{static_cast(11), static_cast(12), 0, 0, 0, 0, 0, 0}); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -152,10 +152,10 @@ TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 8); EXPECT_EQ(static_cast(page[0]), 11); @@ -166,7 +166,7 @@ TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { } TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnDifferentPagesWriteBothPages) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); auto s = pages.SetBucketSlot(0, 1025, 0, 0, 44); @@ -177,22 +177,22 @@ TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnDifferentPagesWriteBothPages) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), metadata.page_size); EXPECT_EQ(static_cast(page[0]), 44); - s = ReadPage(MakePageKey(metadata, 0, 1), &page); + s = readPage(makePageKey(metadata, 0, 1), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), metadata.page_size); EXPECT_EQ(static_cast(page[0]), 55); } TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { - auto metadata = MakeMetadata(4, 1, 8); + auto metadata = makeMetadata(4, 1, 8); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -203,13 +203,13 @@ TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); EXPECT_TRUE(s.IsNotFound()) << s.ToString(); - s = ReadPage(MakePageKey(metadata, 0, 1), &page); + s = readPage(makePageKey(metadata, 0, 1), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 4); EXPECT_EQ(static_cast(page[0]), 66); @@ -217,9 +217,9 @@ TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { } TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); std::string expected(16, static_cast(7)); - WritePage(MakePageKey(metadata, 0, 0), expected); + writePage(makePageKey(metadata, 0, 0), expected); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); auto s = pages.SetBucketSlot(0, 4, 2, 1, 88); @@ -228,17 +228,17 @@ TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); expected[9] = static_cast(88); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(page, expected); } TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); uint8_t fingerprint = 0; @@ -258,16 +258,16 @@ TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { - auto metadata = MakeMetadata(4); - WritePage(MakePageKey(metadata, 0, 0), std::string(9, static_cast(1))); + auto metadata = makeMetadata(4); + writePage(makePageKey(metadata, 0, 0), std::string(9, static_cast(1))); redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); uint8_t fingerprint = 0; @@ -275,13 +275,13 @@ TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { EXPECT_TRUE(s.IsCorruption()) << s.ToString(); std::string page; - s = ReadPage(MakePageKey(metadata, 0, 0), &page); + s = readPage(makePageKey(metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(page, std::string(9, static_cast(1))); } TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { - auto metadata = MakeMetadata(4); + auto metadata = makeMetadata(4); { redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -291,13 +291,13 @@ TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { } std::string page; - auto s = ReadPage(MakePageKey(metadata, 0, 0), &page); + auto s = readPage(makePageKey(metadata, 0, 0), &page); EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { - auto old_metadata = MakeMetadata(4, 100); - auto new_metadata = MakeMetadata(4, 101); + auto old_metadata = makeMetadata(4, 100); + auto new_metadata = makeMetadata(4, 101); { redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); bool inserted = false; @@ -308,7 +308,7 @@ TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); } redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); @@ -322,15 +322,15 @@ TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); ASSERT_TRUE(s.ok()) << s.ToString(); - CommitBatch(batch.Get()); + commitBatch(batch.Get()); std::string page; - s = ReadPage(MakePageKey(old_metadata, 0, 0), &page); + s = readPage(makePageKey(old_metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 8); EXPECT_EQ(static_cast(page[0]), 11); - s = ReadPage(MakePageKey(new_metadata, 0, 0), &page); + s = readPage(makePageKey(new_metadata, 0, 0), &page); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(page.size(), 8); EXPECT_EQ(static_cast(page[0]), 22); From 87b8bdd98b670958fca5554bd7490ee4175a4d4c Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 12 May 2026 20:06:42 +0800 Subject: [PATCH 19/34] fix: ci --- .../gocase/integration/replication/replication_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/gocase/integration/replication/replication_test.go b/tests/gocase/integration/replication/replication_test.go index 23ccabd1017..ab75a80e489 100644 --- a/tests/gocase/integration/replication/replication_test.go +++ b/tests/gocase/integration/replication/replication_test.go @@ -344,10 +344,10 @@ func TestReplicationWithLimitSpeed(t *testing.T) { util.Populate(t, slaveClient, "", 1026, 1) t.Run("resume broken transfer based files", func(t *testing.T) { - // Try to transfer some files, because max-replication-mb 1, - // so maybe more than 5 files are transferred for sleep 5s. util.SlaveOf(t, slaveClient, master) - time.Sleep(5 * time.Second) + require.Eventually(t, func() bool { + return slave.LogFileMatches(t, `.*\[fetch\] Fetched .*`) + }, 50*time.Second, time.Second) // Restart master server, let the slave try to full sync with master again, // because slave already received some SST files, so we will skip them. @@ -359,7 +359,9 @@ func TestReplicationWithLimitSpeed(t *testing.T) { require.Eventually(t, func() bool { return slave.LogFileMatches(t, ".*skip count: 1.*") }, 50*time.Second, 1000*time.Millisecond) - util.WaitForSync(t, slaveClient) + require.Eventually(t, func() bool { + return util.FindInfoEntry(slaveClient, "master_link_status") == "up" + }, 50*time.Second, 100*time.Millisecond) require.Equal(t, "b", slaveClient.Get(ctx, "a").Val()) }) } From f231a6079d4ef980b816fd9050a6c4cfae95d99b Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 15:37:28 +0800 Subject: [PATCH 20/34] fix: part1 --- src/storage/redis_metadata.h | 10 +++-- src/types/cuckoo_filter.h | 36 ++++++++------- src/types/redis_cuckoo_chain.cc | 52 +++++++++------------- src/types/redis_cuckoo_chain.h | 8 ++-- tests/cppunit/types/cuckoo_filter_test.cc | 15 ++++--- tests/gocase/unit/scan/scan_test.go | 4 +- tests/gocase/unit/type/bloom/bloom_test.go | 2 +- 7 files changed, 61 insertions(+), 66 deletions(-) diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 4053dec4eb6..5a6738458cf 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -60,7 +60,7 @@ enum RedisType : uint8_t { inline constexpr const std::array RedisTypeNames = { "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", - "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; + "stream", "MBbloomCF", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -335,7 +335,7 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; -constexpr uint32_t kCuckooFilterDefaultPageSize = 2048; +constexpr uint32_t kCuckooFilterDefaultPageSize = 2048; // bytes class CuckooChainMetadata : public Metadata { public: @@ -346,7 +346,9 @@ class CuckooChainMetadata : public Metadata { /// When a filter is full, a new one is created with capacity = base_capacity * expansion^n uint16_t expansion; - /// The capacity of the first filter + /// The capacity of the first filter. + /// Later sub-filter capacities are derived as base_capacity * expansion^n after restart, + /// so we need to persist the base value in metadata. uint64_t base_capacity; /// Number of fingerprints per bucket @@ -358,7 +360,7 @@ class CuckooChainMetadata : public Metadata { /// Track number of deleted items for maintenance uint64_t num_deleted_items; - /// Target maximum payload size for each persisted Cuckoo Filter page + /// Target maximum payload size for each persisted Cuckoo Filter page, in bytes uint32_t page_size; explicit CuckooChainMetadata(bool generate_version = true) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 7e5fbef40cd..5f4a793911b 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -32,6 +32,11 @@ namespace redis { +constexpr long double kCuckooFilterLoadFactor = 0.955L; +constexpr uint64_t kCuckooFilterMaxSupportedBuckets = std::numeric_limits::max() / 2 + 1ULL; +constexpr uint64_t kCuckooFilterFingerprintModulus = 255; +constexpr uint64_t kCuckooFilterAltHashMultiplier = 0x5bd1e995ULL; + // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. // Buckets are grouped into page values in RocksDB. The Cuckoo algorithm still @@ -46,23 +51,22 @@ class CuckooFilter { public: static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { uint32_t num_buckets = 0; - return OptimalNumBuckets(capacity, bucket_size, &num_buckets).ok(); + return CalculateRequiredBuckets(capacity, bucket_size, &num_buckets).ok(); } - // Calculate the optimal number of buckets for the filter. - static rocksdb::Status OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size, uint32_t* num_buckets) { + // Returns the power-of-two bucket count required for the requested capacity. + static rocksdb::Status CalculateRequiredBuckets(uint64_t capacity, uint8_t bucket_size, uint32_t *num_buckets) { if (bucket_size == 0) { return rocksdb::Status::InvalidArgument("bucket_size must be larger than 0"); } - constexpr long double kLoadFactor = 0.955L; - constexpr uint64_t kMaxSupportedBuckets = std::numeric_limits::max() / 2 + 1ULL; - auto max_supported_capacity = static_cast(kMaxSupportedBuckets * bucket_size * kLoadFactor); + auto max_supported_capacity = + static_cast(kCuckooFilterMaxSupportedBuckets * bucket_size * kCuckooFilterLoadFactor); if (capacity > max_supported_capacity) { return rocksdb::Status::InvalidArgument("capacity is too large"); } - auto exact_buckets = static_cast(capacity) / bucket_size / kLoadFactor; + auto exact_buckets = static_cast(capacity) / bucket_size / kCuckooFilterLoadFactor; auto required_buckets = static_cast(exact_buckets); if (static_cast(required_buckets) < exact_buckets) required_buckets++; if (required_buckets == 0) required_buckets = 1; @@ -74,15 +78,16 @@ class CuckooFilter { return rocksdb::Status::OK(); } - // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) - // Following RedisBloom: fp = hash % 255 + 1 - static uint8_t GenerateFingerprint(uint64_t hash) { return static_cast(hash % 255 + 1); } + // Following RedisBloom: fp = hash % 255 + 1. + static uint8_t GenerateFingerprint(uint64_t hash) { + return static_cast(hash % kCuckooFilterFingerprintModulus + 1); + } // Calculate alternate hash using XOR (following RedisBloom) - // h2 = h1 ^ (fp * 0x5bd1e995) + // h2 = h1 ^ (fp * kCuckooFilterAltHashMultiplier) // This preserves symmetry: GetAltHash(fp, GetAltHash(fp, h)) == h static uint64_t GetAltHash(uint8_t fingerprint, uint64_t hash) { - return hash ^ (static_cast(fingerprint) * 0x5bd1e995); + return hash ^ (static_cast(fingerprint) * kCuckooFilterAltHashMultiplier); } // Calculate an alternate bucket from a bucket index and fingerprint. @@ -92,12 +97,11 @@ class CuckooFilter { return static_cast(alt_hash % num_buckets); } - // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) - // This is the entry point for hashing items before they are inserted/checked in the filter - static uint64_t Hash(const char* data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } + // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom). + static uint64_t Hash(const char *data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } // Convenience overload for std::string - static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } + static uint64_t Hash(const std::string &item) { return Hash(item.data(), item.size()); } }; } // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 4d3803c85db..6d4760d646a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -108,19 +108,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key metadata.num_deleted_items = 0; metadata.page_size = page_size; - // Calculate the number of buckets needed for this filter - uint32_t num_buckets = 0; - s = CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); - if (!s.ok()) return s; - - INFO( - "Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}, " - "page_size={}", - capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion), page_size); - // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.RESERVE", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"reserve", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; @@ -146,6 +136,16 @@ static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, return true; } +static rocksdb::Status GetFilterNumBuckets(const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t *num_buckets) { + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_index, &filter_capacity) || + !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } + return CuckooFilter::CalculateRequiredBuckets(filter_capacity, metadata.bucket_size, num_buckets); +} + rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); @@ -176,13 +176,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { auto current_filter_idx = static_cast(filter_idx); - uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, current_filter_idx, &filter_capacity) || - !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { - return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); - } uint32_t num_buckets = 0; - s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + s = GetFilterNumBuckets(metadata, current_filter_idx, &num_buckets); if (!s.ok()) return s; // Calculate bucket indices @@ -199,7 +194,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (inserted) { // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; s = pages.WriteBackDirtyPages(batch.Get()); @@ -221,20 +216,15 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; - uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, last_filter_idx, &filter_capacity) || - !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { - return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); - } uint32_t num_buckets = 0; - s = CuckooFilter::OptimalNumBuckets(filter_capacity, metadata.bucket_size, &num_buckets); + s = GetFilterNumBuckets(metadata, last_filter_idx, &num_buckets); if (!s.ok()) return s; bool inserted = false; auto batch = storage_->GetWriteBatchBase(); s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, batch.Get()); if (s.ok() && inserted) { - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; @@ -256,17 +246,15 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (metadata.n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); metadata.n_filters++; - INFO("CF.ADD: Expanded to {} filters", metadata.n_filters); + INFO("add expanded to {} filters", metadata.n_filters); // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; - uint64_t new_filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, new_filter_idx, &new_filter_capacity) || - !CuckooFilter::IsCapacitySupported(new_filter_capacity, metadata.bucket_size)) { + uint32_t new_num_buckets = 0; + s = GetFilterNumBuckets(metadata, new_filter_idx, &new_num_buckets); + if (s.IsCorruption()) { return rocksdb::Status::Aborted("maximum filter capacity reached"); } - uint32_t new_num_buckets = 0; - s = CuckooFilter::OptimalNumBuckets(new_filter_capacity, metadata.bucket_size, &new_num_buckets); if (!s.ok()) return s; uint32_t bucket1_idx = hash % new_num_buckets; @@ -276,7 +264,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"CF.ADD", user_key.ToString()}); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; s = pages.WriteBackDirtyPages(batch.Get()); diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 0289c8e160f..c7bb9e24e95 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -26,7 +26,7 @@ namespace redis { -// Default values for Cuckoo Filter +// Default values for a newly created cuckoo filter. const uint32_t kCFDefaultCapacity = 1024; const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket const uint16_t kCFDefaultMaxIterations = 500; @@ -37,16 +37,16 @@ class CuckooChain : public Database { public: CuckooChain(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} - // CF.RESERVE command - creates a new cuckoo filter with specified capacity + // Creates a new cuckoo filter with the specified parameters. rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, uint16_t max_iterations, uint16_t expansion, uint32_t page_size); - // CF.ADD command - adds an item to the cuckoo filter. + // Adds one item to the cuckoo filter. // Duplicate items are allowed, so added is true whenever insertion succeeds. rocksdb::Status Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added); private: - // Get metadata for the cuckoo filter + // Loads metadata for a cuckoo filter key. rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); static rocksdb::Status validateMetadata(const CuckooChainMetadata &metadata); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 83d4d6de612..36102b01588 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -217,7 +217,7 @@ TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } -TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { +TEST_F(RedisCuckooFilterTest, CalculateRequiredBucketsCalculation) { struct TestCase { uint64_t capacity; uint8_t bucket_size; @@ -231,7 +231,7 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { for (const auto &test_case : test_cases) { uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::OptimalNumBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); + auto s = redis::CuckooFilter::CalculateRequiredBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << "capacity=" << test_case.capacity << ", bucket_size=" << static_cast(test_case.bucket_size) << ": " << s.ToString(); ASSERT_EQ(num_buckets, test_case.expected_num_buckets) @@ -372,7 +372,7 @@ TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { auto metadata = getMetadata(key_); uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size, &num_buckets); + auto s = redis::CuckooFilter::CalculateRequiredBuckets(capacity, bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto hash = redis::CuckooFilter::Hash(item); auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); @@ -451,7 +451,8 @@ TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { auto hash = redis::CuckooFilter::Hash(item); uint32_t old_num_buckets = 0; - auto s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity, metadata.bucket_size, &old_num_buckets); + auto s = + redis::CuckooFilter::CalculateRequiredBuckets(metadata.base_capacity, metadata.bucket_size, &old_num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto buckets_per_page = stored_metadata.page_size / stored_metadata.bucket_size; auto old_page_idx = static_cast(hash % old_num_buckets) / buckets_per_page; @@ -461,8 +462,8 @@ TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); uint32_t num_buckets = 0; - s = redis::CuckooFilter::OptimalNumBuckets(metadata.base_capacity * metadata.expansion, metadata.bucket_size, - &num_buckets); + s = redis::CuckooFilter::CalculateRequiredBuckets(metadata.base_capacity * metadata.expansion, metadata.bucket_size, + &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto bucket1_idx = static_cast(hash % num_buckets); auto expected_page_idx = bucket1_idx / buckets_per_page; @@ -611,7 +612,7 @@ TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { for (uint16_t i = 0; i < metadata.n_filters - 1; ++i) { new_filter_capacity *= metadata.expansion; } - auto s = redis::CuckooFilter::OptimalNumBuckets(new_filter_capacity, bucket_size, &num_buckets); + auto s = redis::CuckooFilter::CalculateRequiredBuckets(new_filter_capacity, bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto expected_page_size = std::min(metadata.page_size / bucket_size, num_buckets) * bucket_size; diff --git a/tests/gocase/unit/scan/scan_test.go b/tests/gocase/unit/scan/scan_test.go index e0eb8e4d751..c3e2ab557ed 100644 --- a/tests/gocase/unit/scan/scan_test.go +++ b/tests/gocase/unit/scan/scan_test.go @@ -463,8 +463,8 @@ func ScanTest(t *testing.T, rdb *redis.Client, ctx context.Context) { require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype1", "0.02", "1000").Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype2", "0.02", "1000").Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype3", "0.02", "1000").Err()) - require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "type", "MBbloom--")) - require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "count", "3", "type", "MBbloom--")) + require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "type", "MBbloomCF")) + require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "count", "3", "type", "MBbloomCF")) //ReJSON-RL type require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype1", "$", ` {"x":1, "y":2} `).Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype2", "$", ` {"x":1, "y":2} `).Err()) diff --git a/tests/gocase/unit/type/bloom/bloom_test.go b/tests/gocase/unit/type/bloom/bloom_test.go index d6ac6af08bf..129526d0135 100644 --- a/tests/gocase/unit/type/bloom/bloom_test.go +++ b/tests/gocase/unit/type/bloom/bloom_test.go @@ -445,7 +445,7 @@ func testBloom(t *testing.T, configs util.KvrocksServerConfigs) { t.Run("Get type of bloom filter", func(t *testing.T) { require.NoError(t, rdb.Del(ctx, key).Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", key, "0.02", "1000").Err()) - require.Equal(t, "MBbloom--", rdb.Type(ctx, key).Val()) + require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) }) t.Run("Get Card of bloom filter", func(t *testing.T) { From 2edeaa0b70898dc7037ba0c6dab4ebf31465f1bf Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 15:39:00 +0800 Subject: [PATCH 21/34] rename: CuckooFilter->CuckooFilterHelper --- src/types/cuckoo_filter.h | 2 +- src/types/redis_cuckoo_chain.cc | 16 +++--- tests/cppunit/types/cuckoo_filter_test.cc | 62 ++++++++++++----------- 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 5f4a793911b..6e20d8b8004 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -47,7 +47,7 @@ constexpr uint64_t kCuckooFilterAltHashMultiplier = 0x5bd1e995ULL; // - h1 = hash (primary hash) // - h2 = h1 ^ (fp * 0x5bd1e995) (alternate hash via XOR) // - bucket_index = hash % num_buckets (only apply modulo when indexing) -class CuckooFilter { +class CuckooFilterHelper { public: static bool IsCapacitySupported(uint64_t capacity, uint8_t bucket_size) { uint32_t num_buckets = 0; diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 6d4760d646a..4af7747957a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -49,7 +49,7 @@ rocksdb::Status CuckooChain::validateMetadata(const CuckooChainMetadata &metadat if (metadata.page_size < metadata.bucket_size) { return rocksdb::Status::Corruption("invalid metadata: page_size is smaller than bucket_size"); } - if (!CuckooFilter::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { + if (!CuckooFilterHelper::IsCapacitySupported(metadata.base_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: base_capacity is too large"); } return rocksdb::Status::OK(); @@ -84,7 +84,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key if (expansion > kCFMaxExpansion) { return rocksdb::Status::InvalidArgument("expansion must be between 0 and 32768"); } - if (!CuckooFilter::IsCapacitySupported(capacity, bucket_size)) { + if (!CuckooFilterHelper::IsCapacitySupported(capacity, bucket_size)) { return rocksdb::Status::InvalidArgument("capacity is too large"); } @@ -140,10 +140,10 @@ static rocksdb::Status GetFilterNumBuckets(const CuckooChainMetadata &metadata, uint32_t *num_buckets) { uint64_t filter_capacity = 0; if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_index, &filter_capacity) || - !CuckooFilter::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { + !CuckooFilterHelper::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - return CuckooFilter::CalculateRequiredBuckets(filter_capacity, metadata.bucket_size, num_buckets); + return CuckooFilterHelper::CalculateRequiredBuckets(filter_capacity, metadata.bucket_size, num_buckets); } rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { @@ -170,8 +170,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (!s.ok()) return s; // Calculate hash and fingerprint for the item - uint64_t hash = CuckooFilter::Hash(item.data(), item.size()); - uint8_t fingerprint = CuckooFilter::GenerateFingerprint(hash); + uint64_t hash = CuckooFilterHelper::Hash(item.data(), item.size()); + uint8_t fingerprint = CuckooFilterHelper::GenerateFingerprint(hash); // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { @@ -182,7 +182,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Calculate bucket indices uint32_t bucket1_idx = hash % num_buckets; - uint64_t alt_hash = CuckooFilter::GetAltHash(fingerprint, hash); + uint64_t alt_hash = CuckooFilterHelper::GetAltHash(fingerprint, hash); uint32_t bucket2_idx = alt_hash % num_buckets; CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); @@ -316,7 +316,7 @@ rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns break; } - uint32_t alt_bucket_idx = CuckooFilter::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets); + uint32_t alt_bucket_idx = CuckooFilterHelper::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets); bool inserted_in_alt_bucket = false; s = pages.TryInsertInBucket(filter_index, num_buckets, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 36102b01588..60ef7ab9be8 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -231,7 +231,8 @@ TEST_F(RedisCuckooFilterTest, CalculateRequiredBucketsCalculation) { for (const auto &test_case : test_cases) { uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::CalculateRequiredBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); + auto s = + redis::CuckooFilterHelper::CalculateRequiredBuckets(test_case.capacity, test_case.bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << "capacity=" << test_case.capacity << ", bucket_size=" << static_cast(test_case.bucket_size) << ": " << s.ToString(); ASSERT_EQ(num_buckets, test_case.expected_num_buckets) @@ -248,16 +249,16 @@ TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { // Test fingerprint generation ensures non-zero values in range [1, 255] // Following RedisBloom: fp = hash % 255 + 1 for (uint64_t hash = 0; hash < 1000; ++hash) { - uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); + uint8_t fp = redis::CuckooFilterHelper::GenerateFingerprint(hash); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } // Verify the formula: fp = hash % 255 + 1 - ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(0), 1); - ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(254), 255); - ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(255), 1); - ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(256), 2); + ASSERT_EQ(redis::CuckooFilterHelper::GenerateFingerprint(0), 1); + ASSERT_EQ(redis::CuckooFilterHelper::GenerateFingerprint(254), 255); + ASSERT_EQ(redis::CuckooFilterHelper::GenerateFingerprint(255), 1); + ASSERT_EQ(redis::CuckooFilterHelper::GenerateFingerprint(256), 2); } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { @@ -270,10 +271,10 @@ TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { for (uint64_t hash = 0; hash < 100; ++hash) { for (uint16_t fp = 1; fp <= 255; ++fp) { auto fingerprint = static_cast(fp); - uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, hash); + uint64_t alt_hash = redis::CuckooFilterHelper::GetAltHash(fingerprint, hash); // Applying GetAltHash twice should return original hash - uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fingerprint, alt_hash); + uint64_t double_alt_hash = redis::CuckooFilterHelper::GetAltHash(fingerprint, alt_hash); ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; // Both hashes should map to valid bucket indices @@ -289,30 +290,30 @@ TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { TEST_F(RedisCuckooFilterTest, HashFunction) { // Test that Hash function produces consistent 64-bit values std::string test_item = "hello"; - uint64_t hash1 = redis::CuckooFilter::Hash(test_item); - uint64_t hash2 = redis::CuckooFilter::Hash(test_item.data(), test_item.size()); + uint64_t hash1 = redis::CuckooFilterHelper::Hash(test_item); + uint64_t hash2 = redis::CuckooFilterHelper::Hash(test_item.data(), test_item.size()); // Both methods should produce the same result ASSERT_EQ(hash1, hash2) << "Hash methods should be consistent"; // Hash should be deterministic - uint64_t hash3 = redis::CuckooFilter::Hash(test_item); + uint64_t hash3 = redis::CuckooFilterHelper::Hash(test_item); ASSERT_EQ(hash1, hash3) << "Hash should be deterministic"; // Different items should produce different hashes (with high probability) - uint64_t hash_world = redis::CuckooFilter::Hash("world"); + uint64_t hash_world = redis::CuckooFilterHelper::Hash("world"); ASSERT_NE(hash1, hash_world) << "Different items should have different hashes"; // Empty string produces hash value 0 (this is expected with MurmurHash) - uint64_t hash_empty = redis::CuckooFilter::Hash(""); + uint64_t hash_empty = redis::CuckooFilterHelper::Hash(""); ASSERT_EQ(hash_empty, 0) << "Empty string should produce hash value 0 with MurmurHash"; // Even with hash=0, fingerprint should be non-zero - uint8_t fp_empty = redis::CuckooFilter::GenerateFingerprint(hash_empty); + uint8_t fp_empty = redis::CuckooFilterHelper::GenerateFingerprint(hash_empty); ASSERT_EQ(fp_empty, 1) << "Fingerprint of hash=0 should be 1 (0 % 255 + 1)"; // Test that hash can be used with fingerprint generation - uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); + uint8_t fp = redis::CuckooFilterHelper::GenerateFingerprint(hash1); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } @@ -372,12 +373,12 @@ TEST_F(RedisCuckooFilterTest, AddWritesPagedLayout) { auto metadata = getMetadata(key_); uint32_t num_buckets = 0; - auto s = redis::CuckooFilter::CalculateRequiredBuckets(capacity, bucket_size, &num_buckets); + auto s = redis::CuckooFilterHelper::CalculateRequiredBuckets(capacity, bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); - auto hash = redis::CuckooFilter::Hash(item); - auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); + auto hash = redis::CuckooFilterHelper::Hash(item); + auto fingerprint = redis::CuckooFilterHelper::GenerateFingerprint(hash); auto bucket1_idx = static_cast(hash % num_buckets); - auto bucket2_idx = static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets); + auto bucket2_idx = static_cast(redis::CuckooFilterHelper::GetAltHash(fingerprint, hash) % num_buckets); auto buckets_per_page = metadata.page_size / bucket_size; auto page_index = bucket1_idx / buckets_per_page; auto page_size = std::min(buckets_per_page, num_buckets - page_index * buckets_per_page) * bucket_size; @@ -449,10 +450,10 @@ TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { ASSERT_EQ(stored_metadata.n_filters, 2); ASSERT_EQ(stored_metadata.size, 1); - auto hash = redis::CuckooFilter::Hash(item); + auto hash = redis::CuckooFilterHelper::Hash(item); uint32_t old_num_buckets = 0; - auto s = - redis::CuckooFilter::CalculateRequiredBuckets(metadata.base_capacity, metadata.bucket_size, &old_num_buckets); + auto s = redis::CuckooFilterHelper::CalculateRequiredBuckets(metadata.base_capacity, metadata.bucket_size, + &old_num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto buckets_per_page = stored_metadata.page_size / stored_metadata.bucket_size; auto old_page_idx = static_cast(hash % old_num_buckets) / buckets_per_page; @@ -462,8 +463,8 @@ TEST_F(RedisCuckooFilterTest, AddPrioritizesNewestFilter) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); uint32_t num_buckets = 0; - s = redis::CuckooFilter::CalculateRequiredBuckets(metadata.base_capacity * metadata.expansion, metadata.bucket_size, - &num_buckets); + s = redis::CuckooFilterHelper::CalculateRequiredBuckets(metadata.base_capacity * metadata.expansion, + metadata.bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto bucket1_idx = static_cast(hash % num_buckets); auto expected_page_idx = bucket1_idx / buckets_per_page; @@ -532,10 +533,11 @@ TEST_F(RedisCuckooFilterTest, KickOutSuccessWritesDirtyPages) { std::vector candidates; for (int i = 0; i < 10000; ++i) { std::string item = "kick_item_" + std::to_string(i); - auto hash = redis::CuckooFilter::Hash(item); - auto fingerprint = redis::CuckooFilter::GenerateFingerprint(hash); - candidates.push_back({item, fingerprint, static_cast(hash % num_buckets), - static_cast(redis::CuckooFilter::GetAltHash(fingerprint, hash) % num_buckets)}); + auto hash = redis::CuckooFilterHelper::Hash(item); + auto fingerprint = redis::CuckooFilterHelper::GenerateFingerprint(hash); + candidates.push_back( + {item, fingerprint, static_cast(hash % num_buckets), + static_cast(redis::CuckooFilterHelper::GetAltHash(fingerprint, hash) % num_buckets)}); } Candidate first; @@ -548,7 +550,7 @@ TEST_F(RedisCuckooFilterTest, KickOutSuccessWritesDirtyPages) { for (const auto &first_candidate : candidates) { if (first_candidate.item == candidate.item || first_candidate.bucket1 != candidate.bucket1) continue; auto alt_for_victim = - redis::CuckooFilter::GetAltBucketIndex(candidate.bucket1, first_candidate.fingerprint, num_buckets); + redis::CuckooFilterHelper::GetAltBucketIndex(candidate.bucket1, first_candidate.fingerprint, num_buckets); if (alt_for_victim == candidate.bucket1 || alt_for_victim == candidate.bucket2) continue; for (const auto &second_candidate : candidates) { @@ -612,7 +614,7 @@ TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { for (uint16_t i = 0; i < metadata.n_filters - 1; ++i) { new_filter_capacity *= metadata.expansion; } - auto s = redis::CuckooFilter::CalculateRequiredBuckets(new_filter_capacity, bucket_size, &num_buckets); + auto s = redis::CuckooFilterHelper::CalculateRequiredBuckets(new_filter_capacity, bucket_size, &num_buckets); ASSERT_TRUE(s.ok()) << s.ToString(); auto expected_page_size = std::min(metadata.page_size / bucket_size, num_buckets) * bucket_size; From 6509d05d84789374d4fb213250f5cc4f6cc63992 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 19:17:08 +0800 Subject: [PATCH 22/34] feat: CuckooSubFilter --- src/types/cuckoo_filter_sub_filter.cc | 94 +++++++++++++++++++++++++++ src/types/cuckoo_filter_sub_filter.h | 57 ++++++++++++++++ src/types/redis_cuckoo_chain.cc | 77 ++++------------------ src/types/redis_cuckoo_chain.h | 5 -- 4 files changed, 165 insertions(+), 68 deletions(-) create mode 100644 src/types/cuckoo_filter_sub_filter.cc create mode 100644 src/types/cuckoo_filter_sub_filter.h diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc new file mode 100644 index 00000000000..9187f67fc4c --- /dev/null +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "cuckoo_filter_sub_filter.h" + +#include "cuckoo_filter.h" + +namespace redis { + +CuckooSubFilter::CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, + bool slot_id_encoded) + : metadata_(metadata), + filter_index_(filter_index), + num_buckets_(num_buckets), + pages_(storage, ctx, ns_key, metadata, slot_id_encoded) {} + +rocksdb::Status CuckooSubFilter::TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted) { + uint32_t bucket1_idx = getPrimaryBucketIndex(hash); + uint32_t bucket2_idx = getSecondaryBucketIndex(hash, fingerprint); + return pages_.TryInsertInCandidateBuckets(filter_index_, num_buckets_, bucket1_idx, bucket2_idx, fingerprint, + inserted); +} + +rocksdb::Status CuckooSubFilter::TryInsertPrimaryBucket(uint64_t hash, uint8_t fingerprint, bool *inserted) { + return pages_.TryInsertInBucket(filter_index_, num_buckets_, getPrimaryBucketIndex(hash), fingerprint, inserted); +} + +rocksdb::Status CuckooSubFilter::KickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, + bool *inserted) { + *inserted = false; + + uint32_t current_bucket_idx = getPrimaryBucketIndex(hash); + uint8_t current_fp = fingerprint; + uint32_t victim_slot = 0; + + for (uint16_t iteration = 0; iteration < max_iterations; ++iteration) { + uint8_t old_fp = 0; + auto s = pages_.GetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, &old_fp); + if (!s.ok()) return s; + s = pages_.SetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, current_fp); + if (!s.ok()) return s; + current_fp = old_fp; + + if (current_fp == 0) { + *inserted = true; + return rocksdb::Status::OK(); + } + + uint32_t alt_bucket_idx = CuckooFilterHelper::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets_); + + bool inserted_in_alt_bucket = false; + s = pages_.TryInsertInBucket(filter_index_, num_buckets_, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); + if (!s.ok()) return s; + if (inserted_in_alt_bucket) { + *inserted = true; + return rocksdb::Status::OK(); + } + + current_bucket_idx = alt_bucket_idx; + victim_slot = (victim_slot + 1) % metadata_.bucket_size; + } + + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooSubFilter::WriteToBatch(rocksdb::WriteBatchBase *batch) { + return pages_.WriteBackDirtyPages(batch); +} + +uint32_t CuckooSubFilter::getPrimaryBucketIndex(uint64_t hash) const { return hash % num_buckets_; } + +uint32_t CuckooSubFilter::getSecondaryBucketIndex(uint64_t hash, uint8_t fingerprint) const { + return CuckooFilterHelper::GetAltHash(fingerprint, hash) % num_buckets_; +} + +} // namespace redis diff --git a/src/types/cuckoo_filter_sub_filter.h b/src/types/cuckoo_filter_sub_filter.h new file mode 100644 index 00000000000..6f38b966197 --- /dev/null +++ b/src/types/cuckoo_filter_sub_filter.h @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include + +#include + +#include "cuckoo_filter_page.h" +#include "storage/redis_metadata.h" + +namespace redis { + +class CuckooSubFilter { + public: + CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, + bool slot_id_encoded); + + uint16_t Index() const { return filter_index_; } + uint32_t NumBuckets() const { return num_buckets_; } + + rocksdb::Status TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted); + rocksdb::Status TryInsertPrimaryBucket(uint64_t hash, uint8_t fingerprint, bool *inserted); + rocksdb::Status KickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, bool *inserted); + rocksdb::Status WriteToBatch(rocksdb::WriteBatchBase *batch); + + private: + uint32_t getPrimaryBucketIndex(uint64_t hash) const; + uint32_t getSecondaryBucketIndex(uint64_t hash, uint8_t fingerprint) const; + + const CuckooChainMetadata &metadata_; + uint16_t filter_index_ = 0; + uint32_t num_buckets_ = 0; + CuckooPageSet pages_; +}; + +} // namespace redis diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 4af7747957a..41064c2d89f 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -23,7 +23,7 @@ #include #include "cuckoo_filter.h" -#include "cuckoo_filter_page.h" +#include "cuckoo_filter_sub_filter.h" #include "logging.h" namespace redis { @@ -180,24 +180,18 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co s = GetFilterNumBuckets(metadata, current_filter_idx, &num_buckets); if (!s.ok()) return s; - // Calculate bucket indices - uint32_t bucket1_idx = hash % num_buckets; - uint64_t alt_hash = CuckooFilterHelper::GetAltHash(fingerprint, hash); - uint32_t bucket2_idx = alt_hash % num_buckets; - - CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); + CuckooSubFilter sub_filter(storage_, ctx, ns_key, metadata, current_filter_idx, num_buckets, + storage_->IsSlotIdEncoded()); bool inserted = false; - s = pages.TryInsertInCandidateBuckets(current_filter_idx, num_buckets, bucket1_idx, bucket2_idx, fingerprint, - &inserted); + s = sub_filter.TryInsert(hash, fingerprint, &inserted); if (!s.ok()) return s; if (inserted) { - // Successfully inserted, write to storage atomically auto batch = storage_->GetWriteBatchBase(); WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = pages.WriteBackDirtyPages(batch.Get()); + s = sub_filter.WriteToBatch(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -222,11 +216,15 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co bool inserted = false; auto batch = storage_->GetWriteBatchBase(); - s = kickOutInsert(ctx, ns_key, metadata, last_filter_idx, num_buckets, fingerprint, hash, &inserted, batch.Get()); + CuckooSubFilter last_filter(storage_, ctx, ns_key, metadata, last_filter_idx, num_buckets, + storage_->IsSlotIdEncoded()); + s = last_filter.KickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); if (s.ok() && inserted) { WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; + s = last_filter.WriteToBatch(batch.Get()); + if (!s.ok()) return s; metadata.size++; std::string metadata_bytes; @@ -257,9 +255,9 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co } if (!s.ok()) return s; - uint32_t bucket1_idx = hash % new_num_buckets; - CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); - s = pages.TryInsertInBucket(new_filter_idx, new_num_buckets, bucket1_idx, fingerprint, &inserted); + CuckooSubFilter new_filter(storage_, ctx, ns_key, metadata, new_filter_idx, new_num_buckets, + storage_->IsSlotIdEncoded()); + s = new_filter.TryInsertPrimaryBucket(hash, fingerprint, &inserted); if (!s.ok()) return s; if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); @@ -267,7 +265,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); if (!s.ok()) return s; - s = pages.WriteBackDirtyPages(batch.Get()); + s = new_filter.WriteToBatch(batch.Get()); if (!s.ok()) return s; metadata.size++; @@ -288,51 +286,4 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co return rocksdb::Status::Aborted("filter is full"); } -rocksdb::Status CuckooChain::kickOutInsert(engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, bool *inserted, - rocksdb::WriteBatchBase *batch) { - *inserted = false; - CuckooPageSet pages(storage_, ctx, ns_key, metadata, storage_->IsSlotIdEncoded()); - - // Start from bucket1 - uint32_t current_bucket_idx = hash % num_buckets; - uint8_t current_fp = fingerprint; - uint32_t victim_slot = 0; - - // Try to kick out existing fingerprints (all operations in memory) - for (uint16_t iteration = 0; iteration < metadata.max_iterations; ++iteration) { - // Swap fingerprint with victim slot - uint8_t old_fp = 0; - auto s = pages.GetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, &old_fp); - if (!s.ok()) return s; - s = pages.SetBucketSlot(filter_index, num_buckets, current_bucket_idx, victim_slot, current_fp); - if (!s.ok()) return s; - current_fp = old_fp; - - // If kicked-out fingerprint is 0 (empty), we successfully inserted - if (current_fp == 0) { - *inserted = true; - break; - } - - uint32_t alt_bucket_idx = CuckooFilterHelper::GetAltBucketIndex(current_bucket_idx, current_fp, num_buckets); - - bool inserted_in_alt_bucket = false; - s = pages.TryInsertInBucket(filter_index, num_buckets, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); - if (!s.ok()) return s; - if (inserted_in_alt_bucket) { - *inserted = true; - break; - } - - // Move to alternate bucket and try next victim slot - current_bucket_idx = alt_bucket_idx; - victim_slot = (victim_slot + 1) % metadata.bucket_size; - } - - if (*inserted) return pages.WriteBackDirtyPages(batch); - return rocksdb::Status::OK(); -} - } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index c7bb9e24e95..608798977fd 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -50,11 +50,6 @@ class CuckooChain : public Database { rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); static rocksdb::Status validateMetadata(const CuckooChainMetadata &metadata); - - // Kick-out insertion: try to insert fingerprint by evicting existing ones - rocksdb::Status kickOutInsert(engine::Context &ctx, const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t num_buckets, uint8_t fingerprint, uint64_t hash, - bool *inserted, rocksdb::WriteBatchBase *batch); }; } // namespace redis From 8a88aa94af6b19055e09dd741a471e31cacbdf12 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 19:20:16 +0800 Subject: [PATCH 23/34] remove: comment --- src/storage/redis_metadata.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 5a6738458cf..2f14e76239b 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -347,8 +347,6 @@ class CuckooChainMetadata : public Metadata { uint16_t expansion; /// The capacity of the first filter. - /// Later sub-filter capacities are derived as base_capacity * expansion^n after restart, - /// so we need to persist the base value in metadata. uint64_t base_capacity; /// Number of fingerprints per bucket From ced9bc642554af4bf81f5fa9b2cc4b559e0755d3 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 20:27:22 +0800 Subject: [PATCH 24/34] fix: MBbloomCF --- src/storage/redis_metadata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 2f14e76239b..937d5aae972 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -60,7 +60,7 @@ enum RedisType : uint8_t { inline constexpr const std::array RedisTypeNames = { "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", - "stream", "MBbloomCF", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; + "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "MBbloomCF"}; struct RedisTypes { RedisTypes(std::initializer_list list) { From 9d7f5e81564c8a9ce191d0df548edeb7641e7663 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 19 May 2026 21:19:44 +0800 Subject: [PATCH 25/34] fix: CuckooPageSet->CuckooPageCache --- src/types/cuckoo_filter_page.cc | 89 +++++-------- src/types/cuckoo_filter_page.h | 12 +- src/types/cuckoo_filter_sub_filter.cc | 10 +- src/types/cuckoo_filter_sub_filter.h | 2 +- .../cppunit/types/cuckoo_filter_page_test.cc | 117 +++++++++--------- 5 files changed, 100 insertions(+), 130 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 7fef71f5a70..8786131f9f9 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -55,35 +55,32 @@ std::string GetCuckooPageKey(const Slice &ns_key, const CuckooChainMetadata &met } // namespace -CuckooPageSet::CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, bool slot_id_encoded) +CuckooPageCache::CuckooPageCache(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded) : storage_(storage), ctx_(ctx), ns_key_(ns_key.ToString()), metadata_(metadata), slot_id_encoded_(slot_id_encoded) {} -rocksdb::Status CuckooPageSet::TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, - uint32_t bucket1_index, uint32_t bucket2_index, - uint8_t fingerprint, bool *inserted) { - *inserted = false; - BucketRef bucket1, bucket2; - auto s = ensureCandidateBucketsLoaded(filter_index, num_buckets, bucket1_index, bucket2_index, &bucket1, &bucket2); +rocksdb::Status CuckooPageCache::PrefetchBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index) { + BucketLocation location1, location2; + auto s = resolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); + if (!s.ok()) return s; + s = resolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); if (!s.ok()) return s; - size_t slot_idx = 0; - if (tryInsertInBucketRef(bucket1, fingerprint, &slot_idx)) { - *inserted = true; - return rocksdb::Status::OK(); - } - if (bucket1_index != bucket2_index && tryInsertInBucketRef(bucket2, fingerprint, &slot_idx)) { - *inserted = true; + std::vector missing_locations; + if (pages_.find(location1.page_key) == pages_.end()) missing_locations.push_back(location1); + if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { + missing_locations.push_back(location2); } - return rocksdb::Status::OK(); + return loadPages(missing_locations); } -rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - uint8_t fingerprint, bool *inserted) { +rocksdb::Status CuckooPageCache::TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint8_t fingerprint, bool *inserted) { *inserted = false; BucketRef bucket; auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); @@ -94,8 +91,8 @@ rocksdb::Status CuckooPageSet::TryInsertInBucket(uint16_t filter_index, uint32_t return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - uint32_t slot_idx, uint8_t *fingerprint) { +rocksdb::Status CuckooPageCache::GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t *fingerprint) { if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; @@ -105,8 +102,8 @@ rocksdb::Status CuckooPageSet::GetBucketSlot(uint16_t filter_index, uint32_t num return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - uint32_t slot_idx, uint8_t fingerprint) { +rocksdb::Status CuckooPageCache::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + uint32_t slot_idx, uint8_t fingerprint) { if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; @@ -116,7 +113,7 @@ rocksdb::Status CuckooPageSet::SetBucketSlot(uint16_t filter_index, uint32_t num return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batch) { +rocksdb::Status CuckooPageCache::WriteBackDirtyPages(rocksdb::WriteBatchBase *batch) { for (const auto &entry : pages_) { if (!entry.second.is_dirty) continue; auto s = batch->Put(entry.first, entry.second.data); @@ -125,8 +122,8 @@ rocksdb::Status CuckooPageSet::WriteBackDirtyPages(rocksdb::WriteBatchBase *batc return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - BucketLocation *location) const { +rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, + uint32_t bucket_index, BucketLocation *location) const { if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); } @@ -139,8 +136,8 @@ rocksdb::Status CuckooPageSet::resolveBucketLocation(uint16_t filter_index, uint return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - BucketRef *bucket) { +rocksdb::Status CuckooPageCache::ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, + BucketRef *bucket) { BucketLocation location; auto s = resolveBucketLocation(filter_index, num_buckets, bucket_index, &location); if (!s.ok()) return s; @@ -155,33 +152,7 @@ rocksdb::Status CuckooPageSet::ensureBucketLoaded(uint16_t filter_index, uint32_ return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, - uint32_t bucket1_index, uint32_t bucket2_index, - BucketRef *bucket1, BucketRef *bucket2) { - BucketLocation location1, location2; - auto s = resolveBucketLocation(filter_index, num_buckets, bucket1_index, &location1); - if (!s.ok()) return s; - s = resolveBucketLocation(filter_index, num_buckets, bucket2_index, &location2); - if (!s.ok()) return s; - - std::vector missing_locations; - if (pages_.find(location1.page_key) == pages_.end()) missing_locations.push_back(location1); - if (location2.page_key != location1.page_key && pages_.find(location2.page_key) == pages_.end()) { - missing_locations.push_back(location2); - } - s = loadPages(missing_locations); - if (!s.ok()) return s; - - bucket1->page = &pages_.at(location1.page_key); - bucket1->offset = location1.offset; - bucket1->size = metadata_.bucket_size; - bucket2->page = &pages_.at(location2.page_key); - bucket2->offset = location2.offset; - bucket2->size = metadata_.bucket_size; - return rocksdb::Status::OK(); -} - -rocksdb::Status CuckooPageSet::loadPage(const BucketLocation &location, PageEntry **page) { +rocksdb::Status CuckooPageCache::loadPage(const BucketLocation &location, PageEntry **page) { auto iter = pages_.find(location.page_key); if (iter != pages_.end()) { *page = &iter->second; @@ -199,7 +170,7 @@ rocksdb::Status CuckooPageSet::loadPage(const BucketLocation &location, PageEntr return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::loadPages(const std::vector &locations) { +rocksdb::Status CuckooPageCache::loadPages(const std::vector &locations) { if (locations.empty()) return rocksdb::Status::OK(); if (locations.size() == 1) { PageEntry *page = nullptr; @@ -225,7 +196,7 @@ rocksdb::Status CuckooPageSet::loadPages(const std::vector &loca return rocksdb::Status::OK(); } -rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) { +rocksdb::Status CuckooPageCache::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) { if (!status.ok() && !status.IsNotFound()) return status; if (status.IsNotFound()) page->data.clear(); if (page->data.size() > expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); @@ -233,7 +204,7 @@ rocksdb::Status CuckooPageSet::normalizePage(const rocksdb::Status &status, uint return rocksdb::Status::OK(); } -bool CuckooPageSet::tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { +bool CuckooPageCache::tryInsertInBucketRef(const BucketRef &bucket, uint8_t fingerprint, size_t *slot_idx) { for (size_t i = 0; i < bucket.size; ++i) { size_t offset = bucket.offset + i; if (static_cast(bucket.page->data[offset]) == 0) { @@ -246,11 +217,11 @@ bool CuckooPageSet::tryInsertInBucketRef(const BucketRef &bucket, uint8_t finger return false; } -uint8_t CuckooPageSet::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) { +uint8_t CuckooPageCache::getBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx) { return static_cast(bucket.page->data[bucket.offset + slot_idx]); } -void CuckooPageSet::setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { +void CuckooPageCache::setBucketRefSlot(const BucketRef &bucket, uint32_t slot_idx, uint8_t fingerprint) { bucket.page->data[bucket.offset + slot_idx] = static_cast(fingerprint); bucket.page->is_dirty = true; } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 9e06027685b..6ec67cbda3c 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -33,13 +33,13 @@ namespace redis { -class CuckooPageSet { +class CuckooPageCache { public: - CuckooPageSet(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, bool slot_id_encoded); + CuckooPageCache(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, + const CuckooChainMetadata &metadata, bool slot_id_encoded); - rocksdb::Status TryInsertInCandidateBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, - uint32_t bucket2_index, uint8_t fingerprint, bool *inserted); + rocksdb::Status PrefetchBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, + uint32_t bucket2_index); rocksdb::Status TryInsertInBucket(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint8_t fingerprint, bool *inserted); rocksdb::Status GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, @@ -70,8 +70,6 @@ class CuckooPageSet { BucketLocation *location) const; rocksdb::Status ensureBucketLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketRef *bucket); - rocksdb::Status ensureCandidateBucketsLoaded(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, - uint32_t bucket2_index, BucketRef *bucket1, BucketRef *bucket2); rocksdb::Status loadPage(const BucketLocation &location, PageEntry **page); rocksdb::Status loadPages(const std::vector &locations); static rocksdb::Status normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page); diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc index 9187f67fc4c..8a1f651b1fa 100644 --- a/src/types/cuckoo_filter_sub_filter.cc +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -33,10 +33,16 @@ CuckooSubFilter::CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, pages_(storage, ctx, ns_key, metadata, slot_id_encoded) {} rocksdb::Status CuckooSubFilter::TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted) { + *inserted = false; uint32_t bucket1_idx = getPrimaryBucketIndex(hash); uint32_t bucket2_idx = getSecondaryBucketIndex(hash, fingerprint); - return pages_.TryInsertInCandidateBuckets(filter_index_, num_buckets_, bucket1_idx, bucket2_idx, fingerprint, - inserted); + auto s = pages_.PrefetchBuckets(filter_index_, num_buckets_, bucket1_idx, bucket2_idx); + if (!s.ok()) return s; + + s = pages_.TryInsertInBucket(filter_index_, num_buckets_, bucket1_idx, fingerprint, inserted); + if (!s.ok() || *inserted || bucket1_idx == bucket2_idx) return s; + + return pages_.TryInsertInBucket(filter_index_, num_buckets_, bucket2_idx, fingerprint, inserted); } rocksdb::Status CuckooSubFilter::TryInsertPrimaryBucket(uint64_t hash, uint8_t fingerprint, bool *inserted) { diff --git a/src/types/cuckoo_filter_sub_filter.h b/src/types/cuckoo_filter_sub_filter.h index 6f38b966197..fa37c478f92 100644 --- a/src/types/cuckoo_filter_sub_filter.h +++ b/src/types/cuckoo_filter_sub_filter.h @@ -51,7 +51,7 @@ class CuckooSubFilter { const CuckooChainMetadata &metadata_; uint16_t filter_index_ = 0; uint32_t num_buckets_ = 0; - CuckooPageSet pages_; + CuckooPageCache pages_; }; } // namespace redis diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index d1415fac291..3f9cb2891a9 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -30,17 +30,18 @@ #include "test_base.h" #include "types/redis_cuckoo_chain.h" -class RedisCuckooPageSetTest : public TestBase { +class RedisCuckooPageCacheTest : public TestBase { public: - RedisCuckooPageSetTest(const RedisCuckooPageSetTest &) = delete; - RedisCuckooPageSetTest &operator=(const RedisCuckooPageSetTest &) = delete; - RedisCuckooPageSetTest(RedisCuckooPageSetTest &&) = delete; - RedisCuckooPageSetTest &operator=(RedisCuckooPageSetTest &&) = delete; + RedisCuckooPageCacheTest(const RedisCuckooPageCacheTest &) = delete; + RedisCuckooPageCacheTest &operator=(const RedisCuckooPageCacheTest &) = delete; + RedisCuckooPageCacheTest(RedisCuckooPageCacheTest &&) = delete; + RedisCuckooPageCacheTest &operator=(RedisCuckooPageCacheTest &&) = delete; protected: - explicit RedisCuckooPageSetTest() : TestBase(), db_(std::make_unique(storage_.get(), "cuckoo_ns")) {} + explicit RedisCuckooPageCacheTest() + : TestBase(), db_(std::make_unique(storage_.get(), "cuckoo_ns")) {} - ~RedisCuckooPageSetTest() override { db_.reset(); } + ~RedisCuckooPageCacheTest() override { db_.reset(); } void SetUp() override { const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); @@ -91,9 +92,9 @@ class RedisCuckooPageSetTest : public TestBase { std::string ns_key_; }; -TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { +TEST_F(RedisCuckooPageCacheTest, TryInsertWritesSmallPage) { auto metadata = makeMetadata(4); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 1, 11, &inserted); @@ -114,9 +115,9 @@ TEST_F(RedisCuckooPageSetTest, TryInsertWritesSmallPage) { EXPECT_EQ(page.substr(5, 3), std::string(3, 0)); } -TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { +TEST_F(RedisCuckooPageCacheTest, TryInsertWritesLastPartialPage) { auto metadata = makeMetadata(4); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; auto s = pages.TryInsertInBucket(0, 513, 512, 33, &inserted); @@ -139,61 +140,55 @@ TEST_F(RedisCuckooPageSetTest, TryInsertWritesLastPartialPage) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } -TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnSamePagePreserveBothBuckets) { +TEST_F(RedisCuckooPageCacheTest, PrefetchBucketsOnSamePageLoadsCachedPage) { auto metadata = makeMetadata(2); - writePage(makePageKey(metadata, 0, 0), std::string{static_cast(11), static_cast(12), 0, 0, 0, 0, 0, 0}); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + auto page_key = makePageKey(metadata, 0, 0); + writePage(page_key, std::string{static_cast(11), static_cast(12), static_cast(21), + static_cast(22), 0, 0, 0, 0}); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); - bool inserted = false; - auto s = pages.TryInsertInCandidateBuckets(0, 4, 0, 1, 22, &inserted); + auto s = pages.PrefetchBuckets(0, 4, 0, 1); ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_TRUE(inserted); + writePage(page_key, std::string(8, static_cast(99))); - auto batch = storage_->GetWriteBatchBase(); - s = pages.WriteBackDirtyPages(batch.Get()); + uint8_t fingerprint = 0; + s = pages.GetBucketSlot(0, 4, 0, 0, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); - commitBatch(batch.Get()); - - std::string page; - s = readPage(makePageKey(metadata, 0, 0), &page); + EXPECT_EQ(fingerprint, 11); + s = pages.GetBucketSlot(0, 4, 1, 1, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(page.size(), 8); - EXPECT_EQ(static_cast(page[0]), 11); - EXPECT_EQ(static_cast(page[1]), 12); - EXPECT_EQ(static_cast(page[2]), 22); - EXPECT_EQ(static_cast(page[3]), 0); - EXPECT_EQ(page.substr(4), std::string(4, 0)); + EXPECT_EQ(fingerprint, 22); } -TEST_F(RedisCuckooPageSetTest, CandidateBucketsOnDifferentPagesWriteBothPages) { +TEST_F(RedisCuckooPageCacheTest, PrefetchBucketsOnDifferentPagesLoadsBothPages) { auto metadata = makeMetadata(4); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); - - auto s = pages.SetBucketSlot(0, 1025, 0, 0, 44); + auto page0_key = makePageKey(metadata, 0, 0); + auto page1_key = makePageKey(metadata, 0, 1); + std::string page0(metadata.page_size, 0); + std::string page1(metadata.page_size, 0); + page0[0] = static_cast(44); + page1[0] = static_cast(55); + writePage(page0_key, page0); + writePage(page1_key, page1); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + + auto s = pages.PrefetchBuckets(0, 1025, 0, 512); ASSERT_TRUE(s.ok()) << s.ToString(); - s = pages.SetBucketSlot(0, 1025, 512, 0, 55); - ASSERT_TRUE(s.ok()) << s.ToString(); - - auto batch = storage_->GetWriteBatchBase(); - s = pages.WriteBackDirtyPages(batch.Get()); - ASSERT_TRUE(s.ok()) << s.ToString(); - commitBatch(batch.Get()); + writePage(page0_key, std::string(metadata.page_size, static_cast(99))); + writePage(page1_key, std::string(metadata.page_size, static_cast(88))); - std::string page; - s = readPage(makePageKey(metadata, 0, 0), &page); + uint8_t fingerprint = 0; + s = pages.GetBucketSlot(0, 1025, 0, 0, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(page.size(), metadata.page_size); - EXPECT_EQ(static_cast(page[0]), 44); - - s = readPage(makePageKey(metadata, 0, 1), &page); + EXPECT_EQ(fingerprint, 44); + s = pages.GetBucketSlot(0, 1025, 512, 0, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_EQ(page.size(), metadata.page_size); - EXPECT_EQ(static_cast(page[0]), 55); + EXPECT_EQ(fingerprint, 55); } -TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { +TEST_F(RedisCuckooPageCacheTest, NonDefaultPageSizeControlsBucketMapping) { auto metadata = makeMetadata(4, 1, 8); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; auto s = pages.TryInsertInBucket(0, 3, 2, 66, &inserted); @@ -216,11 +211,11 @@ TEST_F(RedisCuckooPageSetTest, NonDefaultPageSizeControlsBucketMapping) { EXPECT_EQ(page.substr(1, 3), std::string(3, 0)); } -TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { +TEST_F(RedisCuckooPageCacheTest, SetBucketSlotWritesOnlyTargetSlot) { auto metadata = makeMetadata(4); std::string expected(16, static_cast(7)); writePage(makePageKey(metadata, 0, 0), expected); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); auto s = pages.SetBucketSlot(0, 4, 2, 1, 88); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -237,9 +232,9 @@ TEST_F(RedisCuckooPageSetTest, SetBucketSlotWritesOnlyTargetSlot) { EXPECT_EQ(page, expected); } -TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { +TEST_F(RedisCuckooPageCacheTest, InvalidBucketAndSlotArguments) { auto metadata = makeMetadata(4); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 4, &fingerprint); @@ -265,10 +260,10 @@ TEST_F(RedisCuckooPageSetTest, InvalidBucketAndSlotArguments) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } -TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { +TEST_F(RedisCuckooPageCacheTest, OversizedPageReturnsCorruption) { auto metadata = makeMetadata(4); writePage(makePageKey(metadata, 0, 0), std::string(9, static_cast(1))); - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); @@ -280,10 +275,10 @@ TEST_F(RedisCuckooPageSetTest, OversizedPageReturnsCorruption) { EXPECT_EQ(page, std::string(9, static_cast(1))); } -TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { +TEST_F(RedisCuckooPageCacheTest, DirtyPagesAreDiscardedWithoutWriteBack) { auto metadata = makeMetadata(4); { - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -295,11 +290,11 @@ TEST_F(RedisCuckooPageSetTest, DirtyPagesAreDiscardedWithoutWriteBack) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } -TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { +TEST_F(RedisCuckooPageCacheTest, PageKeyUsesMetadataVersion) { auto old_metadata = makeMetadata(4, 100); auto new_metadata = makeMetadata(4, 101); { - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -311,7 +306,7 @@ TEST_F(RedisCuckooPageSetTest, PageKeyUsesMetadataVersion) { commitBatch(batch.Get()); } - redis::CuckooPageSet pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); From 3c5c3a7322dcf75131123eec83a346d549c42b5e Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Wed, 20 May 2026 13:48:23 +0800 Subject: [PATCH 26/34] fix: ci --- src/storage/redis_metadata.h | 2 +- tests/gocase/unit/type/bloom/bloom_test.go | 2 +- tests/gocase/unit/type/bloom/cuckoo_filter_test.go | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 937d5aae972..5ad5677a1db 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -59,7 +59,7 @@ enum RedisType : uint8_t { }; inline constexpr const std::array RedisTypeNames = { - "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", + "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "MBbloomCF"}; struct RedisTypes { diff --git a/tests/gocase/unit/type/bloom/bloom_test.go b/tests/gocase/unit/type/bloom/bloom_test.go index 129526d0135..d6ac6af08bf 100644 --- a/tests/gocase/unit/type/bloom/bloom_test.go +++ b/tests/gocase/unit/type/bloom/bloom_test.go @@ -445,7 +445,7 @@ func testBloom(t *testing.T, configs util.KvrocksServerConfigs) { t.Run("Get type of bloom filter", func(t *testing.T) { require.NoError(t, rdb.Del(ctx, key).Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", key, "0.02", "1000").Err()) - require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) + require.Equal(t, "MBbloom--", rdb.Type(ctx, key).Val()) }) t.Run("Get Card of bloom filter", func(t *testing.T) { diff --git a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go index 1a87e3cc8ef..fd6517fd892 100644 --- a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go +++ b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go @@ -38,6 +38,7 @@ func TestCuckooFilter(t *testing.T) { key := "test_cuckoo_filter_add_create" require.NoError(t, rdb.Del(ctx, key).Err()) require.Equal(t, int64(1), rdb.Do(ctx, "cf.add", key, "item").Val()) + require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err(), "already exists") }) @@ -52,4 +53,11 @@ func TestCuckooFilter(t *testing.T) { require.NoError(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_max", "1000", "EXPANSION", "32768").Err()) require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "test_cuckoo_filter_expansion_too_large", "1000", "EXPANSION", "32769").Err(), "expansion must be between 0 and 32768") }) + + t.Run("Reserve creates cuckoo filter type", func(t *testing.T) { + key := "test_cuckoo_filter_reserve_type" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err()) + require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) + }) } From dc4fe95b996dee46a423ed03b5b1c839970530cb Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 22 May 2026 14:00:30 +0800 Subject: [PATCH 27/34] optimize CuckooPageCache --- src/types/cuckoo_filter.h | 24 ++++ src/types/cuckoo_filter_page.cc | 36 ++--- src/types/cuckoo_filter_page.h | 9 +- src/types/cuckoo_filter_sub_filter.cc | 10 +- src/types/cuckoo_filter_sub_filter.h | 9 +- src/types/redis_cuckoo_chain.cc | 44 ++---- .../cppunit/types/cuckoo_filter_page_test.cc | 33 +++-- .../unit/type/bloom/cuckoo_filter_test.go | 127 ++++++++++++++++++ 8 files changed, 218 insertions(+), 74 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 6e20d8b8004..9d1dd6ebdff 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -102,6 +102,30 @@ class CuckooFilterHelper { // Convenience overload for std::string static uint64_t Hash(const std::string &item) { return Hash(item.data(), item.size()); } + + // Calculate the capacity of a sub-filter at a given index in the chain. + // Returns false if overflow would occur. + static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, uint16_t filter_index, + uint64_t *filter_capacity) { + uint64_t capacity = base_capacity; + for (uint16_t i = 0; i < filter_index; ++i) { + if (expansion != 0 && capacity > std::numeric_limits::max() / expansion) return false; + capacity *= expansion; + } + *filter_capacity = capacity; + return true; + } + + // Calculate the number of buckets for a sub-filter at a given index. + static rocksdb::Status GetFilterNumBuckets(uint64_t base_capacity, uint16_t expansion, uint8_t bucket_size, + uint16_t filter_index, uint32_t *num_buckets) { + uint64_t filter_capacity = 0; + if (!CalculateFilterCapacity(base_capacity, expansion, filter_index, &filter_capacity) || + !IsCapacitySupported(filter_capacity, bucket_size)) { + return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + } + return CalculateRequiredBuckets(filter_capacity, bucket_size, num_buckets); + } }; } // namespace redis diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 8786131f9f9..b053aae4e27 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -22,7 +22,7 @@ #include -#include "cuckoo_filter.h" +#include "common/encoding.h" #include "storage/redis_db.h" namespace redis { @@ -45,23 +45,25 @@ uint32_t GetPageValueSize(uint32_t page_index, uint32_t num_buckets, uint32_t bu return page_bucket_count * bucket_size; } -std::string GetCuckooPageKey(const Slice &ns_key, const CuckooChainMetadata &metadata, bool slot_id_encoded, - uint16_t filter_index, uint32_t page_index) { +std::string GetCuckooPageKey(const Slice &ns_key, uint64_t version, bool slot_id_encoded, uint16_t filter_index, + uint32_t page_index) { std::string sub_key; PutFixed16(&sub_key, filter_index); PutFixed32(&sub_key, page_index); - return InternalKey(ns_key, sub_key, metadata.version, slot_id_encoded).Encode(); + return InternalKey(ns_key, sub_key, version, slot_id_encoded).Encode(); } } // namespace CuckooPageCache::CuckooPageCache(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, bool slot_id_encoded) + bool slot_id_encoded, uint64_t version, uint8_t bucket_size, uint32_t page_size) : storage_(storage), ctx_(ctx), ns_key_(ns_key.ToString()), - metadata_(metadata), - slot_id_encoded_(slot_id_encoded) {} + slot_id_encoded_(slot_id_encoded), + version_(version), + bucket_size_(bucket_size), + page_size_(page_size) {} rocksdb::Status CuckooPageCache::PrefetchBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index) { @@ -93,7 +95,7 @@ rocksdb::Status CuckooPageCache::TryInsertInBucket(uint16_t filter_index, uint32 rocksdb::Status CuckooPageCache::GetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, uint32_t slot_idx, uint8_t *fingerprint) { - if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + if (slot_idx >= bucket_size_) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); @@ -103,8 +105,8 @@ rocksdb::Status CuckooPageCache::GetBucketSlot(uint16_t filter_index, uint32_t n } rocksdb::Status CuckooPageCache::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - uint32_t slot_idx, uint8_t fingerprint) { - if (slot_idx >= metadata_.bucket_size) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); + uint32_t slot_idx, uint8_t fingerprint) { + if (slot_idx >= bucket_size_) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; auto s = ensureBucketLoaded(filter_index, num_buckets, bucket_index, &bucket); @@ -123,16 +125,16 @@ rocksdb::Status CuckooPageCache::WriteBackDirtyPages(rocksdb::WriteBatchBase *ba } rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, - uint32_t bucket_index, BucketLocation *location) const { - if (metadata_.bucket_size == 0 || num_buckets == 0 || bucket_index >= num_buckets) { + uint32_t bucket_index, BucketLocation *location) const { + if (bucket_size_ == 0 || num_buckets == 0 || bucket_index >= num_buckets) { return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); } - uint32_t buckets_per_page = GetBucketsPerPage(metadata_.page_size, metadata_.bucket_size); + uint32_t buckets_per_page = GetBucketsPerPage(page_size_, bucket_size_); uint32_t page_index = GetPageIndex(bucket_index, buckets_per_page); - location->page_key = GetCuckooPageKey(ns_key_, metadata_, slot_id_encoded_, filter_index, page_index); - location->offset = GetBucketOffset(bucket_index, buckets_per_page, metadata_.bucket_size); - location->expected_page_size = GetPageValueSize(page_index, num_buckets, buckets_per_page, metadata_.bucket_size); + location->page_key = GetCuckooPageKey(ns_key_, version_, slot_id_encoded_, filter_index, page_index); + location->offset = GetBucketOffset(bucket_index, buckets_per_page, bucket_size_); + location->expected_page_size = GetPageValueSize(page_index, num_buckets, buckets_per_page, bucket_size_); return rocksdb::Status::OK(); } @@ -148,7 +150,7 @@ rocksdb::Status CuckooPageCache::ensureBucketLoaded(uint16_t filter_index, uint3 bucket->page = page; bucket->offset = location.offset; - bucket->size = metadata_.bucket_size; + bucket->size = bucket_size_; return rocksdb::Status::OK(); } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 6ec67cbda3c..10d876355d1 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -28,15 +28,14 @@ #include #include -#include "storage/redis_metadata.h" #include "storage/storage.h" namespace redis { class CuckooPageCache { public: - CuckooPageCache(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, bool slot_id_encoded); + CuckooPageCache(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, bool slot_id_encoded, + uint64_t version, uint8_t bucket_size, uint32_t page_size); rocksdb::Status PrefetchBuckets(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket1_index, uint32_t bucket2_index); @@ -81,8 +80,10 @@ class CuckooPageCache { engine::Storage *storage_ = nullptr; engine::Context &ctx_; std::string ns_key_; - const CuckooChainMetadata &metadata_; bool slot_id_encoded_ = false; + uint64_t version_ = 0; + uint8_t bucket_size_ = 0; + uint32_t page_size_ = 0; std::unordered_map pages_; }; diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc index 8a1f651b1fa..6589522fb66 100644 --- a/src/types/cuckoo_filter_sub_filter.cc +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -25,12 +25,12 @@ namespace redis { CuckooSubFilter::CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, - bool slot_id_encoded) - : metadata_(metadata), + bool slot_id_encoded, uint64_t version, uint8_t bucket_size, uint32_t page_size, + uint16_t filter_index, uint32_t num_buckets) + : bucket_size_(bucket_size), filter_index_(filter_index), num_buckets_(num_buckets), - pages_(storage, ctx, ns_key, metadata, slot_id_encoded) {} + pages_(storage, ctx, ns_key, slot_id_encoded, version, bucket_size, page_size) {} rocksdb::Status CuckooSubFilter::TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted) { *inserted = false; @@ -81,7 +81,7 @@ rocksdb::Status CuckooSubFilter::KickOutInsert(uint64_t hash, uint8_t fingerprin } current_bucket_idx = alt_bucket_idx; - victim_slot = (victim_slot + 1) % metadata_.bucket_size; + victim_slot = (victim_slot + 1) % bucket_size_; } return rocksdb::Status::OK(); diff --git a/src/types/cuckoo_filter_sub_filter.h b/src/types/cuckoo_filter_sub_filter.h index fa37c478f92..8b589b12a42 100644 --- a/src/types/cuckoo_filter_sub_filter.h +++ b/src/types/cuckoo_filter_sub_filter.h @@ -26,15 +26,14 @@ #include #include "cuckoo_filter_page.h" -#include "storage/redis_metadata.h" namespace redis { class CuckooSubFilter { public: - CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, - const CuckooChainMetadata &metadata, uint16_t filter_index, uint32_t num_buckets, - bool slot_id_encoded); + CuckooSubFilter(engine::Storage *storage, engine::Context &ctx, const Slice &ns_key, bool slot_id_encoded, + uint64_t version, uint8_t bucket_size, uint32_t page_size, uint16_t filter_index, + uint32_t num_buckets); uint16_t Index() const { return filter_index_; } uint32_t NumBuckets() const { return num_buckets_; } @@ -48,7 +47,7 @@ class CuckooSubFilter { uint32_t getPrimaryBucketIndex(uint64_t hash) const; uint32_t getSecondaryBucketIndex(uint64_t hash, uint8_t fingerprint) const; - const CuckooChainMetadata &metadata_; + uint8_t bucket_size_ = 0; uint16_t filter_index_ = 0; uint32_t num_buckets_ = 0; CuckooPageCache pages_; diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 41064c2d89f..803b7ed4e3b 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,8 +20,6 @@ #include "redis_cuckoo_chain.h" -#include - #include "cuckoo_filter.h" #include "cuckoo_filter_sub_filter.h" #include "logging.h" @@ -125,27 +123,6 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } -static bool CalculateFilterCapacity(uint64_t base_capacity, uint16_t expansion, uint16_t filter_index, - uint64_t *filter_capacity) { - uint64_t capacity = base_capacity; - for (uint16_t i = 0; i < filter_index; ++i) { - if (expansion != 0 && capacity > std::numeric_limits::max() / expansion) return false; - capacity *= expansion; - } - *filter_capacity = capacity; - return true; -} - -static rocksdb::Status GetFilterNumBuckets(const CuckooChainMetadata &metadata, uint16_t filter_index, - uint32_t *num_buckets) { - uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(metadata.base_capacity, metadata.expansion, filter_index, &filter_capacity) || - !CuckooFilterHelper::IsCapacitySupported(filter_capacity, metadata.bucket_size)) { - return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); - } - return CuckooFilterHelper::CalculateRequiredBuckets(filter_capacity, metadata.bucket_size, num_buckets); -} - rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, const Slice &item, bool *added) { std::string ns_key = AppendNamespacePrefix(user_key); @@ -177,11 +154,12 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { auto current_filter_idx = static_cast(filter_idx); uint32_t num_buckets = 0; - s = GetFilterNumBuckets(metadata, current_filter_idx, &num_buckets); + s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, + current_filter_idx, &num_buckets); if (!s.ok()) return s; - CuckooSubFilter sub_filter(storage_, ctx, ns_key, metadata, current_filter_idx, num_buckets, - storage_->IsSlotIdEncoded()); + CuckooSubFilter sub_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size, current_filter_idx, num_buckets); bool inserted = false; s = sub_filter.TryInsert(hash, fingerprint, &inserted); if (!s.ok()) return s; @@ -211,13 +189,14 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // No space found in any filter, try kick-out on the last filter uint16_t last_filter_idx = metadata.n_filters - 1; uint32_t num_buckets = 0; - s = GetFilterNumBuckets(metadata, last_filter_idx, &num_buckets); + s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, + last_filter_idx, &num_buckets); if (!s.ok()) return s; bool inserted = false; auto batch = storage_->GetWriteBatchBase(); - CuckooSubFilter last_filter(storage_, ctx, ns_key, metadata, last_filter_idx, num_buckets, - storage_->IsSlotIdEncoded()); + CuckooSubFilter last_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size, last_filter_idx, num_buckets); s = last_filter.KickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); if (s.ok() && inserted) { WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); @@ -249,14 +228,15 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co // Retry insertion in the new expanded filter uint16_t new_filter_idx = metadata.n_filters - 1; uint32_t new_num_buckets = 0; - s = GetFilterNumBuckets(metadata, new_filter_idx, &new_num_buckets); + s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, + new_filter_idx, &new_num_buckets); if (s.IsCorruption()) { return rocksdb::Status::Aborted("maximum filter capacity reached"); } if (!s.ok()) return s; - CuckooSubFilter new_filter(storage_, ctx, ns_key, metadata, new_filter_idx, new_num_buckets, - storage_->IsSlotIdEncoded()); + CuckooSubFilter new_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size, new_filter_idx, new_num_buckets); s = new_filter.TryInsertPrimaryBucket(hash, fingerprint, &inserted); if (!s.ok()) return s; if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index 3f9cb2891a9..d8ec08cc44b 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -94,7 +94,8 @@ class RedisCuckooPageCacheTest : public TestBase { TEST_F(RedisCuckooPageCacheTest, TryInsertWritesSmallPage) { auto metadata = makeMetadata(4); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 1, 11, &inserted); @@ -117,7 +118,8 @@ TEST_F(RedisCuckooPageCacheTest, TryInsertWritesSmallPage) { TEST_F(RedisCuckooPageCacheTest, TryInsertWritesLastPartialPage) { auto metadata = makeMetadata(4); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 513, 512, 33, &inserted); @@ -145,7 +147,8 @@ TEST_F(RedisCuckooPageCacheTest, PrefetchBucketsOnSamePageLoadsCachedPage) { auto page_key = makePageKey(metadata, 0, 0); writePage(page_key, std::string{static_cast(11), static_cast(12), static_cast(21), static_cast(22), 0, 0, 0, 0}); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); auto s = pages.PrefetchBuckets(0, 4, 0, 1); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -170,7 +173,8 @@ TEST_F(RedisCuckooPageCacheTest, PrefetchBucketsOnDifferentPagesLoadsBothPages) page1[0] = static_cast(55); writePage(page0_key, page0); writePage(page1_key, page1); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); auto s = pages.PrefetchBuckets(0, 1025, 0, 512); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -188,7 +192,8 @@ TEST_F(RedisCuckooPageCacheTest, PrefetchBucketsOnDifferentPagesLoadsBothPages) TEST_F(RedisCuckooPageCacheTest, NonDefaultPageSizeControlsBucketMapping) { auto metadata = makeMetadata(4, 1, 8); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 3, 2, 66, &inserted); @@ -215,7 +220,8 @@ TEST_F(RedisCuckooPageCacheTest, SetBucketSlotWritesOnlyTargetSlot) { auto metadata = makeMetadata(4); std::string expected(16, static_cast(7)); writePage(makePageKey(metadata, 0, 0), expected); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); auto s = pages.SetBucketSlot(0, 4, 2, 1, 88); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -234,7 +240,8 @@ TEST_F(RedisCuckooPageCacheTest, SetBucketSlotWritesOnlyTargetSlot) { TEST_F(RedisCuckooPageCacheTest, InvalidBucketAndSlotArguments) { auto metadata = makeMetadata(4); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 4, &fingerprint); @@ -263,7 +270,8 @@ TEST_F(RedisCuckooPageCacheTest, InvalidBucketAndSlotArguments) { TEST_F(RedisCuckooPageCacheTest, OversizedPageReturnsCorruption) { auto metadata = makeMetadata(4); writePage(makePageKey(metadata, 0, 0), std::string(9, static_cast(1))); - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); @@ -278,7 +286,8 @@ TEST_F(RedisCuckooPageCacheTest, OversizedPageReturnsCorruption) { TEST_F(RedisCuckooPageCacheTest, DirtyPagesAreDiscardedWithoutWriteBack) { auto metadata = makeMetadata(4); { - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -294,7 +303,8 @@ TEST_F(RedisCuckooPageCacheTest, PageKeyUsesMetadataVersion) { auto old_metadata = makeMetadata(4, 100); auto new_metadata = makeMetadata(4, 101); { - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, old_metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), old_metadata.version, + old_metadata.bucket_size, old_metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -306,7 +316,8 @@ TEST_F(RedisCuckooPageCacheTest, PageKeyUsesMetadataVersion) { commitBatch(batch.Get()); } - redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, new_metadata, storage_->IsSlotIdEncoded()); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), new_metadata.version, + new_metadata.bucket_size, new_metadata.page_size); uint8_t fingerprint = 0; auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); ASSERT_TRUE(s.ok()) << s.ToString(); diff --git a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go index fd6517fd892..852dde75ed0 100644 --- a/tests/gocase/unit/type/bloom/cuckoo_filter_test.go +++ b/tests/gocase/unit/type/bloom/cuckoo_filter_test.go @@ -21,6 +21,7 @@ package bloom import ( "context" + "fmt" "testing" "github.com/apache/kvrocks/tests/gocase/util" @@ -60,4 +61,130 @@ func TestCuckooFilter(t *testing.T) { require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err()) require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) }) + + t.Run("Reserve invalid capacity", func(t *testing.T) { + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_cap_str", "abc").Err(), "invalid capacity") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_cap_neg", "-1").Err(), "invalid capacity") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_cap_zero", "0").Err(), "capacity must be larger than 0") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_cap_one", "1").Err(), "capacity must be at least 2") + }) + + t.Run("Reserve invalid bucket size", func(t *testing.T) { + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_bs_str", "1000", "BUCKETSIZE", "abc").Err(), "invalid bucket size") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_bs_zero", "1000", "BUCKETSIZE", "0").Err(), "bucket size must be between 1 and 255") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_bs_neg", "1000", "BUCKETSIZE", "-1").Err(), "invalid bucket size") + }) + + t.Run("Reserve invalid max iterations", func(t *testing.T) { + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_mi_str", "1000", "MAXITERATIONS", "abc").Err(), "invalid max iterations") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_mi_zero", "1000", "MAXITERATIONS", "0").Err(), "max iterations must be larger than 0") + }) + + t.Run("Reserve invalid expansion", func(t *testing.T) { + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_exp_str", "1000", "EXPANSION", "abc").Err(), "invalid expansion factor") + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_exp_big", "1000", "EXPANSION", "32769").Err(), "expansion must be between 0 and 32768") + }) + + t.Run("Reserve invalid syntax", func(t *testing.T) { + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", "cf_bad_syntax", "1000", "UNKNOWNOPT").Err(), "syntax error") + }) + + t.Run("Reserve duplicate key", func(t *testing.T) { + key := "test_cuckoo_filter_reserve_dup" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err()) + require.ErrorContains(t, rdb.Do(ctx, "cf.reserve", key, "2000").Err(), "already exists") + }) + + t.Run("Reserve wrong number of arguments", func(t *testing.T) { + require.Error(t, rdb.Do(ctx, "cf.reserve").Err()) + require.Error(t, rdb.Do(ctx, "cf.reserve", "key_only").Err()) + }) + + t.Run("Add wrong number of arguments", func(t *testing.T) { + require.Error(t, rdb.Do(ctx, "cf.add").Err()) + require.Error(t, rdb.Do(ctx, "cf.add", "key_only").Err()) + require.Error(t, rdb.Do(ctx, "cf.add", "key", "item1", "item2").Err()) + }) + + t.Run("Add many items", func(t *testing.T) { + key := "test_cuckoo_filter_add_many" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err()) + for i := 0; i < 500; i++ { + result := rdb.Do(ctx, "cf.add", key, fmt.Sprintf("item_%d", i)) + require.NoError(t, result.Err()) + require.Equal(t, int64(1), result.Val()) + } + }) + + t.Run("Add duplicate items allowed", func(t *testing.T) { + key := "test_cuckoo_filter_add_dup" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "1000").Err()) + for i := 0; i < 10; i++ { + result := rdb.Do(ctx, "cf.add", key, "same_item") + require.NoError(t, result.Err()) + require.Equal(t, int64(1), result.Val()) + } + }) + + t.Run("Add triggers expansion", func(t *testing.T) { + key := "test_cuckoo_filter_add_expansion" + require.NoError(t, rdb.Del(ctx, key).Err()) + // Small capacity with expansion enabled to trigger expansion quickly + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "4", "BUCKETSIZE", "1", "MAXITERATIONS", "1", "EXPANSION", "2").Err()) + // Add enough items to trigger expansion + for i := 0; i < 20; i++ { + result := rdb.Do(ctx, "cf.add", key, fmt.Sprintf("expand_item_%d", i)) + require.NoError(t, result.Err()) + require.Equal(t, int64(1), result.Val()) + } + }) + + t.Run("Add to full non-scaling filter returns error", func(t *testing.T) { + key := "test_cuckoo_filter_full_nonscaling" + require.NoError(t, rdb.Del(ctx, key).Err()) + // expansion=0 disables scaling; small capacity fills quickly + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "4", "BUCKETSIZE", "1", "MAXITERATIONS", "1", "EXPANSION", "0").Err()) + full := false + for i := 0; i < 100; i++ { + result := rdb.Do(ctx, "cf.add", key, fmt.Sprintf("full_item_%d", i)) + if result.Err() != nil { + require.ErrorContains(t, result.Err(), "filter is full") + full = true + break + } + } + require.True(t, full, "Non-scaling filter should eventually become full") + }) + + t.Run("Reserve with all optional params", func(t *testing.T) { + key := "test_cuckoo_filter_all_opts" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "5000", "BUCKETSIZE", "8", "MAXITERATIONS", "100", "EXPANSION", "4").Err()) + require.Equal(t, "MBbloomCF", rdb.Type(ctx, key).Val()) + }) + + t.Run("Add empty string item", func(t *testing.T) { + key := "test_cuckoo_filter_empty_item" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "100").Err()) + result := rdb.Do(ctx, "cf.add", key, "") + require.NoError(t, result.Err()) + require.Equal(t, int64(1), result.Val()) + }) + + t.Run("Add large item", func(t *testing.T) { + key := "test_cuckoo_filter_large_item" + require.NoError(t, rdb.Del(ctx, key).Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", key, "100").Err()) + largeItem := make([]byte, 10000) + for i := range largeItem { + largeItem[i] = byte('a' + i%26) + } + result := rdb.Do(ctx, "cf.add", key, string(largeItem)) + require.NoError(t, result.Err()) + require.Equal(t, int64(1), result.Val()) + }) } From 2fd2b3b1a87301d437bcbee75d2a3d1f08942e4d Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 26 May 2026 03:04:16 +0800 Subject: [PATCH 28/34] fix: rename GetPageValueSize->GetExpectedPageSize; remove TryInsertPrimaryBucket; rename KickOutInsert->TryKickOutInsert move metadata.n_filters++ after insert --- src/types/cuckoo_filter_page.cc | 16 ++++++++----- src/types/cuckoo_filter_page.h | 1 + src/types/cuckoo_filter_sub_filter.cc | 8 ++----- src/types/cuckoo_filter_sub_filter.h | 3 +-- src/types/redis_cuckoo_chain.cc | 10 ++++---- .../cppunit/types/cuckoo_filter_page_test.cc | 24 ++++++++++++++++++- 6 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index b053aae4e27..cbfa2d52ba0 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -39,7 +39,8 @@ uint32_t GetBucketOffset(uint32_t bucket_index, uint32_t buckets_per_page, uint8 return (bucket_index % buckets_per_page) * bucket_size; } -uint32_t GetPageValueSize(uint32_t page_index, uint32_t num_buckets, uint32_t buckets_per_page, uint8_t bucket_size) { +uint32_t GetExpectedPageSize(uint32_t page_index, uint32_t num_buckets, uint32_t buckets_per_page, + uint8_t bucket_size) { uint32_t first_bucket = page_index * buckets_per_page; uint32_t page_bucket_count = std::min(buckets_per_page, num_buckets - first_bucket); return page_bucket_count * bucket_size; @@ -105,7 +106,7 @@ rocksdb::Status CuckooPageCache::GetBucketSlot(uint16_t filter_index, uint32_t n } rocksdb::Status CuckooPageCache::SetBucketSlot(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, - uint32_t slot_idx, uint8_t fingerprint) { + uint32_t slot_idx, uint8_t fingerprint) { if (slot_idx >= bucket_size_) return rocksdb::Status::InvalidArgument("invalid cuckoo filter bucket slot"); BucketRef bucket; @@ -125,7 +126,7 @@ rocksdb::Status CuckooPageCache::WriteBackDirtyPages(rocksdb::WriteBatchBase *ba } rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, - uint32_t bucket_index, BucketLocation *location) const { + uint32_t bucket_index, BucketLocation *location) const { if (bucket_size_ == 0 || num_buckets == 0 || bucket_index >= num_buckets) { return rocksdb::Status::Corruption("invalid cuckoo filter bucket location"); } @@ -134,7 +135,7 @@ rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, ui uint32_t page_index = GetPageIndex(bucket_index, buckets_per_page); location->page_key = GetCuckooPageKey(ns_key_, version_, slot_id_encoded_, filter_index, page_index); location->offset = GetBucketOffset(bucket_index, buckets_per_page, bucket_size_); - location->expected_page_size = GetPageValueSize(page_index, num_buckets, buckets_per_page, bucket_size_); + location->expected_page_size = GetExpectedPageSize(page_index, num_buckets, buckets_per_page, bucket_size_); return rocksdb::Status::OK(); } @@ -200,9 +201,12 @@ rocksdb::Status CuckooPageCache::loadPages(const std::vector &lo rocksdb::Status CuckooPageCache::normalizePage(const rocksdb::Status &status, uint32_t expected_size, PageEntry *page) { if (!status.ok() && !status.IsNotFound()) return status; - if (status.IsNotFound()) page->data.clear(); + if (status.IsNotFound()) { + page->data.assign(expected_size, 0); + return rocksdb::Status::OK(); + } if (page->data.size() > expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); - if (page->data.size() < expected_size) page->data.resize(expected_size, 0); + if (page->data.size() < expected_size) return rocksdb::Status::Corruption("invalid cuckoo filter page size"); return rocksdb::Status::OK(); } diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 10d876355d1..d389d4dd08e 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -84,6 +84,7 @@ class CuckooPageCache { uint64_t version_ = 0; uint8_t bucket_size_ = 0; uint32_t page_size_ = 0; + // Maps encoded cuckoo page keys to cached page entries. std::unordered_map pages_; }; diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc index 6589522fb66..3779d464546 100644 --- a/src/types/cuckoo_filter_sub_filter.cc +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -45,12 +45,8 @@ rocksdb::Status CuckooSubFilter::TryInsert(uint64_t hash, uint8_t fingerprint, b return pages_.TryInsertInBucket(filter_index_, num_buckets_, bucket2_idx, fingerprint, inserted); } -rocksdb::Status CuckooSubFilter::TryInsertPrimaryBucket(uint64_t hash, uint8_t fingerprint, bool *inserted) { - return pages_.TryInsertInBucket(filter_index_, num_buckets_, getPrimaryBucketIndex(hash), fingerprint, inserted); -} - -rocksdb::Status CuckooSubFilter::KickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, - bool *inserted) { +rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, + bool *inserted) { *inserted = false; uint32_t current_bucket_idx = getPrimaryBucketIndex(hash); diff --git a/src/types/cuckoo_filter_sub_filter.h b/src/types/cuckoo_filter_sub_filter.h index 8b589b12a42..bedea82dbaf 100644 --- a/src/types/cuckoo_filter_sub_filter.h +++ b/src/types/cuckoo_filter_sub_filter.h @@ -39,8 +39,7 @@ class CuckooSubFilter { uint32_t NumBuckets() const { return num_buckets_; } rocksdb::Status TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted); - rocksdb::Status TryInsertPrimaryBucket(uint64_t hash, uint8_t fingerprint, bool *inserted); - rocksdb::Status KickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, bool *inserted); + rocksdb::Status TryKickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, bool *inserted); rocksdb::Status WriteToBatch(rocksdb::WriteBatchBase *batch); private: diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 803b7ed4e3b..dc95ba8f3be 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -197,7 +197,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co auto batch = storage_->GetWriteBatchBase(); CuckooSubFilter last_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, metadata.bucket_size, metadata.page_size, last_filter_idx, num_buckets); - s = last_filter.KickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); + s = last_filter.TryKickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); if (s.ok() && inserted) { WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); s = batch->PutLogData(log_data.Encode()); @@ -222,11 +222,8 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co if (metadata.expansion > 0) { if (metadata.n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); - metadata.n_filters++; - INFO("add expanded to {} filters", metadata.n_filters); - // Retry insertion in the new expanded filter - uint16_t new_filter_idx = metadata.n_filters - 1; + uint16_t new_filter_idx = metadata.n_filters; uint32_t new_num_buckets = 0; s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, new_filter_idx, &new_num_buckets); @@ -237,7 +234,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co CuckooSubFilter new_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, metadata.bucket_size, metadata.page_size, new_filter_idx, new_num_buckets); - s = new_filter.TryInsertPrimaryBucket(hash, fingerprint, &inserted); + s = new_filter.TryInsert(hash, fingerprint, &inserted); if (!s.ok()) return s; if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); @@ -248,6 +245,7 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co s = new_filter.WriteToBatch(batch.Get()); if (!s.ok()) return s; + metadata.n_filters++; metadata.size++; std::string metadata_bytes; metadata.Encode(&metadata_bytes); diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index d8ec08cc44b..edce4fa6c3a 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -283,11 +283,33 @@ TEST_F(RedisCuckooPageCacheTest, OversizedPageReturnsCorruption) { EXPECT_EQ(page, std::string(9, static_cast(1))); } +TEST_F(RedisCuckooPageCacheTest, UndersizedPageReturnsCorruption) { + auto metadata = makeMetadata(4); + auto page_key = makePageKey(metadata, 0, 0); + writePage(page_key, std::string{static_cast(1), static_cast(2), static_cast(3)}); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); + + uint8_t fingerprint = 0; + auto s = pages.GetBucketSlot(0, 2, 0, 0, &fingerprint); + EXPECT_TRUE(s.IsCorruption()) << s.ToString(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + commitBatch(batch.Get()); + + std::string page; + s = readPage(page_key, &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, std::string({static_cast(1), static_cast(2), static_cast(3)})); +} + TEST_F(RedisCuckooPageCacheTest, DirtyPagesAreDiscardedWithoutWriteBack) { auto metadata = makeMetadata(4); { redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, - metadata.bucket_size, metadata.page_size); + metadata.bucket_size, metadata.page_size); bool inserted = false; auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); ASSERT_TRUE(s.ok()) << s.ToString(); From 7fd5abba4dda9079e4d3ea6b7f5aaa5647c24c0f Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 26 May 2026 16:51:34 +0800 Subject: [PATCH 29/34] optimize code --- src/types/cuckoo_filter.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 9d1dd6ebdff..8d35af64f06 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -120,11 +120,13 @@ class CuckooFilterHelper { static rocksdb::Status GetFilterNumBuckets(uint64_t base_capacity, uint16_t expansion, uint8_t bucket_size, uint16_t filter_index, uint32_t *num_buckets) { uint64_t filter_capacity = 0; - if (!CalculateFilterCapacity(base_capacity, expansion, filter_index, &filter_capacity) || - !IsCapacitySupported(filter_capacity, bucket_size)) { + if (!CalculateFilterCapacity(base_capacity, expansion, filter_index, &filter_capacity)) { return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); } - return CalculateRequiredBuckets(filter_capacity, bucket_size, num_buckets); + + auto s = CalculateRequiredBuckets(filter_capacity, bucket_size, num_buckets); + if (!s.ok()) return rocksdb::Status::Corruption("invalid metadata: filter capacity is too large"); + return rocksdb::Status::OK(); } }; From 58abab9dcd1b618ea6da3ebc7a83097b55abf537 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Tue, 26 May 2026 16:52:32 +0800 Subject: [PATCH 30/34] fix: split add into three phase --- src/types/redis_cuckoo_chain.cc | 193 ++++++++++++++++++-------------- src/types/redis_cuckoo_chain.h | 12 ++ 2 files changed, 121 insertions(+), 84 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index dc95ba8f3be..2cadbc550e4 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -150,118 +150,143 @@ rocksdb::Status CuckooChain::Add(engine::Context &ctx, const Slice &user_key, co uint64_t hash = CuckooFilterHelper::Hash(item.data(), item.size()); uint8_t fingerprint = CuckooFilterHelper::GenerateFingerprint(hash); + bool inserted = false; + s = tryCuckooInsert(ctx, user_key, ns_key, &metadata, hash, fingerprint, &inserted); + if (!s.ok()) return s; + if (inserted) { + *added = true; + return rocksdb::Status::OK(); + } + + s = tryCuckooKickOut(ctx, user_key, ns_key, &metadata, hash, fingerprint, &inserted); + if (!s.ok()) return s; + if (inserted) { + *added = true; + return rocksdb::Status::OK(); + } + + s = expandAndInsertCuckooChain(ctx, user_key, ns_key, &metadata, hash, fingerprint, &inserted); + if (!s.ok()) return s; + if (inserted) { + *added = true; + return rocksdb::Status::OK(); + } + + // No expansion allowed and filter is full + *added = false; + return rocksdb::Status::Aborted("filter is full"); +} + +rocksdb::Status CuckooChain::tryCuckooInsert(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, uint64_t hash, uint8_t fingerprint, + bool *inserted) { + *inserted = false; + // RedisBloom prioritizes the newest sub-filter to avoid repeatedly probing older, fuller filters. - for (int filter_idx = static_cast(metadata.n_filters) - 1; filter_idx >= 0; --filter_idx) { + for (int filter_idx = static_cast(metadata->n_filters) - 1; filter_idx >= 0; --filter_idx) { auto current_filter_idx = static_cast(filter_idx); uint32_t num_buckets = 0; - s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, - current_filter_idx, &num_buckets); + auto s = CuckooFilterHelper::GetFilterNumBuckets(metadata->base_capacity, metadata->expansion, + metadata->bucket_size, current_filter_idx, &num_buckets); if (!s.ok()) return s; - CuckooSubFilter sub_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, - metadata.bucket_size, metadata.page_size, current_filter_idx, num_buckets); - bool inserted = false; - s = sub_filter.TryInsert(hash, fingerprint, &inserted); + CuckooSubFilter sub_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata->version, + metadata->bucket_size, metadata->page_size, current_filter_idx, num_buckets); + bool current_inserted = false; + s = sub_filter.TryInsert(hash, fingerprint, ¤t_inserted); if (!s.ok()) return s; - if (inserted) { - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); - s = batch->PutLogData(log_data.Encode()); + if (current_inserted) { + s = commitSubFilterAndMetadata(ctx, user_key, ns_key, metadata, &sub_filter); if (!s.ok()) return s; - s = sub_filter.WriteToBatch(batch.Get()); - if (!s.ok()) return s; - - metadata.size++; - std::string metadata_bytes; - metadata.Encode(&metadata_bytes); - s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); - if (!s.ok()) return s; - - s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); - if (!s.ok()) return s; - - *added = true; + *inserted = true; return rocksdb::Status::OK(); } } + return rocksdb::Status::OK(); +} + +rocksdb::Status CuckooChain::tryCuckooKickOut(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, uint64_t hash, uint8_t fingerprint, + bool *inserted) { + *inserted = false; + // No space found in any filter, try kick-out on the last filter - uint16_t last_filter_idx = metadata.n_filters - 1; + uint16_t last_filter_idx = metadata->n_filters - 1; uint32_t num_buckets = 0; - s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, - last_filter_idx, &num_buckets); + auto s = CuckooFilterHelper::GetFilterNumBuckets(metadata->base_capacity, metadata->expansion, metadata->bucket_size, + last_filter_idx, &num_buckets); if (!s.ok()) return s; - bool inserted = false; - auto batch = storage_->GetWriteBatchBase(); - CuckooSubFilter last_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, - metadata.bucket_size, metadata.page_size, last_filter_idx, num_buckets); - s = last_filter.TryKickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); - if (s.ok() && inserted) { - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); - s = batch->PutLogData(log_data.Encode()); - if (!s.ok()) return s; - s = last_filter.WriteToBatch(batch.Get()); + CuckooSubFilter last_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata->version, + metadata->bucket_size, metadata->page_size, last_filter_idx, num_buckets); + bool kickout_inserted = false; + s = last_filter.TryKickOutInsert(hash, fingerprint, metadata->max_iterations, &kickout_inserted); + if (!s.ok()) return s; + if (kickout_inserted) { + s = commitSubFilterAndMetadata(ctx, user_key, ns_key, metadata, &last_filter); if (!s.ok()) return s; + *inserted = true; + return rocksdb::Status::OK(); + } - metadata.size++; - std::string metadata_bytes; - metadata.Encode(&metadata_bytes); - s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); - if (!s.ok()) return s; + return rocksdb::Status::OK(); +} - s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); - if (!s.ok()) return s; +rocksdb::Status CuckooChain::expandAndInsertCuckooChain(engine::Context &ctx, const Slice &user_key, + const std::string &ns_key, CuckooChainMetadata *metadata, + uint64_t hash, uint8_t fingerprint, bool *inserted) { + *inserted = false; - *added = true; - return rocksdb::Status::OK(); + // Kick-out failed, try to expand if allowed + if (metadata->expansion == 0) return rocksdb::Status::OK(); + + if (metadata->n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); + + // Retry insertion in the new expanded filter + uint16_t new_filter_idx = metadata->n_filters; + uint32_t new_num_buckets = 0; + auto s = CuckooFilterHelper::GetFilterNumBuckets(metadata->base_capacity, metadata->expansion, metadata->bucket_size, + new_filter_idx, &new_num_buckets); + if (s.IsCorruption()) { + return rocksdb::Status::Aborted("maximum filter capacity reached"); } + if (!s.ok()) return s; - // Kick-out failed, try to expand if allowed - if (metadata.expansion > 0) { - if (metadata.n_filters >= UINT16_MAX) return rocksdb::Status::Aborted("maximum number of filters reached"); - - // Retry insertion in the new expanded filter - uint16_t new_filter_idx = metadata.n_filters; - uint32_t new_num_buckets = 0; - s = CuckooFilterHelper::GetFilterNumBuckets(metadata.base_capacity, metadata.expansion, metadata.bucket_size, - new_filter_idx, &new_num_buckets); - if (s.IsCorruption()) { - return rocksdb::Status::Aborted("maximum filter capacity reached"); - } - if (!s.ok()) return s; + CuckooSubFilter new_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata->version, + metadata->bucket_size, metadata->page_size, new_filter_idx, new_num_buckets); + bool new_filter_inserted = false; + s = new_filter.TryInsert(hash, fingerprint, &new_filter_inserted); + if (!s.ok()) return s; + if (!new_filter_inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); - CuckooSubFilter new_filter(storage_, ctx, ns_key, storage_->IsSlotIdEncoded(), metadata.version, - metadata.bucket_size, metadata.page_size, new_filter_idx, new_num_buckets); - s = new_filter.TryInsert(hash, fingerprint, &inserted); - if (!s.ok()) return s; - if (!inserted) return rocksdb::Status::Corruption("failed to insert into new cuckoo filter"); + metadata->n_filters++; + s = commitSubFilterAndMetadata(ctx, user_key, ns_key, metadata, &new_filter); + if (!s.ok()) return s; - auto batch = storage_->GetWriteBatchBase(); - WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); - s = batch->PutLogData(log_data.Encode()); - if (!s.ok()) return s; - s = new_filter.WriteToBatch(batch.Get()); - if (!s.ok()) return s; + *inserted = true; + return rocksdb::Status::OK(); +} - metadata.n_filters++; - metadata.size++; - std::string metadata_bytes; - metadata.Encode(&metadata_bytes); - s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); - if (!s.ok()) return s; +rocksdb::Status CuckooChain::commitSubFilterAndMetadata(engine::Context &ctx, const Slice &user_key, + const std::string &ns_key, CuckooChainMetadata *metadata, + CuckooSubFilter *sub_filter) { + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, std::vector{"add", user_key.ToString()}); + auto s = batch->PutLogData(log_data.Encode()); + if (!s.ok()) return s; - s = storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); - if (!s.ok()) return s; + s = sub_filter->WriteToBatch(batch.Get()); + if (!s.ok()) return s; - *added = true; - return rocksdb::Status::OK(); - } + metadata->size++; + std::string metadata_bytes; + metadata->Encode(&metadata_bytes); + s = batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + if (!s.ok()) return s; - // No expansion allowed and filter is full - *added = false; - return rocksdb::Status::Aborted("filter is full"); + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); } } // namespace redis diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index 608798977fd..d66f4549a3b 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -26,6 +26,8 @@ namespace redis { +class CuckooSubFilter; + // Default values for a newly created cuckoo filter. const uint32_t kCFDefaultCapacity = 1024; const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket @@ -50,6 +52,16 @@ class CuckooChain : public Database { rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); static rocksdb::Status validateMetadata(const CuckooChainMetadata &metadata); + + rocksdb::Status tryCuckooInsert(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, uint64_t hash, uint8_t fingerprint, bool *inserted); + rocksdb::Status tryCuckooKickOut(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, uint64_t hash, uint8_t fingerprint, bool *inserted); + rocksdb::Status expandAndInsertCuckooChain(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, uint64_t hash, uint8_t fingerprint, + bool *inserted); + rocksdb::Status commitSubFilterAndMetadata(engine::Context &ctx, const Slice &user_key, const std::string &ns_key, + CuckooChainMetadata *metadata, CuckooSubFilter *sub_filter); }; } // namespace redis From cc2cc1b99990a7f9734c8423a955104eeeef38ef Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Thu, 28 May 2026 03:34:52 +0800 Subject: [PATCH 31/34] fix: sematic for the TryKickOutInsert --- src/types/cuckoo_filter_page.cc | 2 + src/types/cuckoo_filter_page.h | 2 + src/types/cuckoo_filter_sub_filter.cc | 16 ++++-- src/types/cuckoo_filter_sub_filter.h | 2 + .../cppunit/types/cuckoo_filter_page_test.cc | 22 ++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 53 +++++++++++++++++++ 6 files changed, 94 insertions(+), 3 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index cbfa2d52ba0..4b82c09a8b3 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -125,6 +125,8 @@ rocksdb::Status CuckooPageCache::WriteBackDirtyPages(rocksdb::WriteBatchBase *ba return rocksdb::Status::OK(); } +void CuckooPageCache::Discard() { pages_.clear(); } + rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const { if (bucket_size_ == 0 || num_buckets == 0 || bucket_index >= num_buckets) { diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index d389d4dd08e..1a2e5aa9a8f 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -47,6 +47,8 @@ class CuckooPageCache { uint8_t fingerprint); rocksdb::Status WriteBackDirtyPages(rocksdb::WriteBatchBase *batch); + void Discard(); + private: struct PageEntry { std::string data; diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc index 3779d464546..56005f6fe90 100644 --- a/src/types/cuckoo_filter_sub_filter.cc +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -56,9 +56,15 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp for (uint16_t iteration = 0; iteration < max_iterations; ++iteration) { uint8_t old_fp = 0; auto s = pages_.GetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, &old_fp); - if (!s.ok()) return s; + if (!s.ok()) { + pages_.Discard(); + return s; + } s = pages_.SetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, current_fp); - if (!s.ok()) return s; + if (!s.ok()) { + pages_.Discard(); + return s; + } current_fp = old_fp; if (current_fp == 0) { @@ -70,7 +76,10 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp bool inserted_in_alt_bucket = false; s = pages_.TryInsertInBucket(filter_index_, num_buckets_, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); - if (!s.ok()) return s; + if (!s.ok()) { + pages_.Discard(); + return s; + } if (inserted_in_alt_bucket) { *inserted = true; return rocksdb::Status::OK(); @@ -80,6 +89,7 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp victim_slot = (victim_slot + 1) % bucket_size_; } + pages_.Discard(); return rocksdb::Status::OK(); } diff --git a/src/types/cuckoo_filter_sub_filter.h b/src/types/cuckoo_filter_sub_filter.h index bedea82dbaf..2bd26df8541 100644 --- a/src/types/cuckoo_filter_sub_filter.h +++ b/src/types/cuckoo_filter_sub_filter.h @@ -39,6 +39,8 @@ class CuckooSubFilter { uint32_t NumBuckets() const { return num_buckets_; } rocksdb::Status TryInsert(uint64_t hash, uint8_t fingerprint, bool *inserted); + // Performs speculative kick-out mutations in the page cache. On success, dirty pages remain staged for + // WriteToBatch(); on inserted=false or non-OK status, cached pages are discarded before returning. rocksdb::Status TryKickOutInsert(uint64_t hash, uint8_t fingerprint, uint16_t max_iterations, bool *inserted); rocksdb::Status WriteToBatch(rocksdb::WriteBatchBase *batch); diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index edce4fa6c3a..634fa84a6fd 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -321,6 +321,28 @@ TEST_F(RedisCuckooPageCacheTest, DirtyPagesAreDiscardedWithoutWriteBack) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } +TEST_F(RedisCuckooPageCacheTest, DiscardDropsDirtyPages) { + auto metadata = makeMetadata(4); + redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, + metadata.bucket_size, metadata.page_size); + + bool inserted = false; + auto s = pages.TryInsertInBucket(0, 2, 0, 11, &inserted); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_TRUE(inserted); + + pages.Discard(); + + auto batch = storage_->GetWriteBatchBase(); + s = pages.WriteBackDirtyPages(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + commitBatch(batch.Get()); + + std::string page; + s = readPage(makePageKey(metadata, 0, 0), &page); + EXPECT_TRUE(s.IsNotFound()) << s.ToString(); +} + TEST_F(RedisCuckooPageCacheTest, PageKeyUsesMetadataVersion) { auto old_metadata = makeMetadata(4, 100); auto new_metadata = makeMetadata(4, 101); diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 60ef7ab9be8..c665038b189 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -30,6 +30,7 @@ #include "storage/redis_metadata.h" #include "test_base.h" #include "types/cuckoo_filter_page.h" +#include "types/cuckoo_filter_sub_filter.h" #include "types/redis_cuckoo_chain.h" class RedisCuckooFilterTest : public TestBase { @@ -111,6 +112,14 @@ class RedisCuckooFilterTest : public TestBase { value); } + void writePage(const std::string &page_key, const std::string &value) { + auto batch = storage_->GetWriteBatchBase(); + auto s = batch->Put(page_key, value); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + void writeMetadata(const std::string &key, const CuckooChainMetadata &metadata) { std::string metadata_bytes; metadata.Encode(&metadata_bytes); @@ -588,6 +597,50 @@ TEST_F(RedisCuckooFilterTest, KickOutSuccessWritesDirtyPages) { EXPECT_EQ(static_cast(page[evicted_bucket]), first.fingerprint); } +TEST_F(RedisCuckooFilterTest, KickOutErrorDiscardsDirtyPages) { + constexpr uint8_t bucket_size = 1; + constexpr uint32_t page_size = 1; + constexpr uint32_t num_buckets = 2; + CuckooChainMetadata metadata(false); + metadata.version = 1; + metadata.size = 0; + metadata.base_capacity = 1; + metadata.bucket_size = bucket_size; + metadata.max_iterations = 2; + metadata.expansion = 0; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + metadata.page_size = page_size; + writeMetadata(key_, metadata); + + constexpr uint64_t hash = 0; + constexpr uint8_t fingerprint = 2; + constexpr uint8_t existing_fingerprint = 1; + ASSERT_EQ(redis::CuckooFilterHelper::GetAltBucketIndex(0, existing_fingerprint, num_buckets), 1); + std::string original_page{static_cast(existing_fingerprint)}; + writePage(makePageKey(key_, metadata, 0, 0), original_page); + writePage(makePageKey(key_, metadata, 0, 1), std::string(2, static_cast(9))); + + redis::CuckooSubFilter sub_filter(storage_.get(), *ctx_, db_->AppendNamespacePrefix(key_), + storage_->IsSlotIdEncoded(), metadata.version, metadata.bucket_size, + metadata.page_size, 0, num_buckets); + bool inserted = true; + auto s = sub_filter.TryKickOutInsert(hash, fingerprint, metadata.max_iterations, &inserted); + ASSERT_TRUE(s.IsCorruption()) << s.ToString(); + ASSERT_FALSE(inserted); + + auto batch = storage_->GetWriteBatchBase(); + s = sub_filter.WriteToBatch(batch.Get()); + ASSERT_TRUE(s.ok()) << s.ToString(); + s = storage_->Write(*ctx_, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); + ASSERT_TRUE(s.ok()) << s.ToString(); + + std::string page; + s = readPage(makePageKey(key_, metadata, 0, 0), &page); + ASSERT_TRUE(s.ok()) << s.ToString(); + EXPECT_EQ(page, original_page); +} + TEST_F(RedisCuckooFilterTest, ExpansionWritesNewFilterIndexPage) { constexpr uint64_t capacity = 2; constexpr uint8_t bucket_size = 1; From 98cbb54e78387cd24ff4f3e851e8169fbf45446f Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 29 May 2026 15:04:07 +0800 Subject: [PATCH 32/34] rename: Discard->DiscardCachedPages --- src/types/cuckoo_filter_page.cc | 2 +- src/types/cuckoo_filter_page.h | 2 +- src/types/cuckoo_filter_sub_filter.cc | 8 ++++---- tests/cppunit/types/cuckoo_filter_page_test.cc | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/types/cuckoo_filter_page.cc b/src/types/cuckoo_filter_page.cc index 4b82c09a8b3..1ef4a4c803d 100644 --- a/src/types/cuckoo_filter_page.cc +++ b/src/types/cuckoo_filter_page.cc @@ -125,7 +125,7 @@ rocksdb::Status CuckooPageCache::WriteBackDirtyPages(rocksdb::WriteBatchBase *ba return rocksdb::Status::OK(); } -void CuckooPageCache::Discard() { pages_.clear(); } +void CuckooPageCache::DiscardCachedPages() { pages_.clear(); } rocksdb::Status CuckooPageCache::resolveBucketLocation(uint16_t filter_index, uint32_t num_buckets, uint32_t bucket_index, BucketLocation *location) const { diff --git a/src/types/cuckoo_filter_page.h b/src/types/cuckoo_filter_page.h index 1a2e5aa9a8f..ff2efeb50c7 100644 --- a/src/types/cuckoo_filter_page.h +++ b/src/types/cuckoo_filter_page.h @@ -47,7 +47,7 @@ class CuckooPageCache { uint8_t fingerprint); rocksdb::Status WriteBackDirtyPages(rocksdb::WriteBatchBase *batch); - void Discard(); + void DiscardCachedPages(); private: struct PageEntry { diff --git a/src/types/cuckoo_filter_sub_filter.cc b/src/types/cuckoo_filter_sub_filter.cc index 56005f6fe90..0a0748cc354 100644 --- a/src/types/cuckoo_filter_sub_filter.cc +++ b/src/types/cuckoo_filter_sub_filter.cc @@ -57,12 +57,12 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp uint8_t old_fp = 0; auto s = pages_.GetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, &old_fp); if (!s.ok()) { - pages_.Discard(); + pages_.DiscardCachedPages(); return s; } s = pages_.SetBucketSlot(filter_index_, num_buckets_, current_bucket_idx, victim_slot, current_fp); if (!s.ok()) { - pages_.Discard(); + pages_.DiscardCachedPages(); return s; } current_fp = old_fp; @@ -77,7 +77,7 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp bool inserted_in_alt_bucket = false; s = pages_.TryInsertInBucket(filter_index_, num_buckets_, alt_bucket_idx, current_fp, &inserted_in_alt_bucket); if (!s.ok()) { - pages_.Discard(); + pages_.DiscardCachedPages(); return s; } if (inserted_in_alt_bucket) { @@ -89,7 +89,7 @@ rocksdb::Status CuckooSubFilter::TryKickOutInsert(uint64_t hash, uint8_t fingerp victim_slot = (victim_slot + 1) % bucket_size_; } - pages_.Discard(); + pages_.DiscardCachedPages(); return rocksdb::Status::OK(); } diff --git a/tests/cppunit/types/cuckoo_filter_page_test.cc b/tests/cppunit/types/cuckoo_filter_page_test.cc index 634fa84a6fd..8be87aefada 100644 --- a/tests/cppunit/types/cuckoo_filter_page_test.cc +++ b/tests/cppunit/types/cuckoo_filter_page_test.cc @@ -321,7 +321,7 @@ TEST_F(RedisCuckooPageCacheTest, DirtyPagesAreDiscardedWithoutWriteBack) { EXPECT_TRUE(s.IsNotFound()) << s.ToString(); } -TEST_F(RedisCuckooPageCacheTest, DiscardDropsDirtyPages) { +TEST_F(RedisCuckooPageCacheTest, DiscardCachedPagesDropsDirtyPages) { auto metadata = makeMetadata(4); redis::CuckooPageCache pages(storage_.get(), *ctx_, ns_key_, storage_->IsSlotIdEncoded(), metadata.version, metadata.bucket_size, metadata.page_size); @@ -331,7 +331,7 @@ TEST_F(RedisCuckooPageCacheTest, DiscardDropsDirtyPages) { ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(inserted); - pages.Discard(); + pages.DiscardCachedPages(); auto batch = storage_->GetWriteBatchBase(); s = pages.WriteBackDirtyPages(batch.Get()); From a9364c42c65eb09119312d9c77ff41b38162d694 Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 29 May 2026 17:36:31 +0800 Subject: [PATCH 33/34] restore: scan test --- tests/gocase/unit/scan/scan_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/gocase/unit/scan/scan_test.go b/tests/gocase/unit/scan/scan_test.go index c3e2ab557ed..e0eb8e4d751 100644 --- a/tests/gocase/unit/scan/scan_test.go +++ b/tests/gocase/unit/scan/scan_test.go @@ -463,8 +463,8 @@ func ScanTest(t *testing.T, rdb *redis.Client, ctx context.Context) { require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype1", "0.02", "1000").Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype2", "0.02", "1000").Err()) require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype3", "0.02", "1000").Err()) - require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "type", "MBbloomCF")) - require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "count", "3", "type", "MBbloomCF")) + require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "type", "MBbloom--")) + require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "count", "3", "type", "MBbloom--")) //ReJSON-RL type require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype1", "$", ` {"x":1, "y":2} `).Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype2", "$", ` {"x":1, "y":2} `).Err()) From e0435dd8f027be60d93ab05303ba383b9144044b Mon Sep 17 00:00:00 2001 From: nagisa-kun <1434936049@qq.com> Date: Fri, 29 May 2026 19:46:43 +0800 Subject: [PATCH 34/34] add: cf scan test --- tests/gocase/unit/scan/scan_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/gocase/unit/scan/scan_test.go b/tests/gocase/unit/scan/scan_test.go index e0eb8e4d751..c20267f33fa 100644 --- a/tests/gocase/unit/scan/scan_test.go +++ b/tests/gocase/unit/scan/scan_test.go @@ -465,6 +465,12 @@ func ScanTest(t *testing.T, rdb *redis.Client, ctx context.Context) { require.NoError(t, rdb.Do(ctx, "bf.reserve", "MBbloomtype3", "0.02", "1000").Err()) require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "type", "MBbloom--")) require.Equal(t, []string{"MBbloomtype1", "MBbloomtype2", "MBbloomtype3"}, scanAll(t, rdb, "match", "MBbloomtype*", "count", "3", "type", "MBbloom--")) + //MBbloomCF type + require.NoError(t, rdb.Do(ctx, "cf.reserve", "MBbloomCFtype1", "1000").Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", "MBbloomCFtype2", "1000").Err()) + require.NoError(t, rdb.Do(ctx, "cf.reserve", "MBbloomCFtype3", "1000").Err()) + require.Equal(t, []string{"MBbloomCFtype1", "MBbloomCFtype2", "MBbloomCFtype3"}, scanAll(t, rdb, "match", "MBbloomCFtype*", "type", "MBbloomCF")) + require.Equal(t, []string{"MBbloomCFtype1", "MBbloomCFtype2", "MBbloomCFtype3"}, scanAll(t, rdb, "match", "MBbloomCFtype*", "count", "3", "type", "MBbloomCF")) //ReJSON-RL type require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype1", "$", ` {"x":1, "y":2} `).Err()) require.NoError(t, rdb.Do(ctx, "JSON.SET", "ReJSONtype2", "$", ` {"x":1, "y":2} `).Err())