diff --git a/src/core/algorithm/hnsw/hnsw_context.cc b/src/core/algorithm/hnsw/hnsw_context.cc index b930e4189..3d64d2ff1 100644 --- a/src/core/algorithm/hnsw/hnsw_context.cc +++ b/src/core/algorithm/hnsw/hnsw_context.cc @@ -18,15 +18,26 @@ namespace zvec { namespace core { -HnswContext::HnswContext(size_t dimension, const IndexMetric::Pointer &metric, +HnswContext::HnswContext(size_t dimension, + zvec::turbo::Quantizer::Pointer quantizer, + IndexMeta::DataType qmeta_data_type, + const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity) : IndexContext(metric), entity_(entity), - dc_(entity_.get(), metric, dimension) {} + dc_(entity_.get(), std::move(quantizer), metric, dimension, + qmeta_data_type) { + metric_ = metric; +} -HnswContext::HnswContext(const IndexMetric::Pointer &metric, +HnswContext::HnswContext(zvec::turbo::Quantizer::Pointer quantizer, + const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity) - : IndexContext(metric), entity_(entity), dc_(entity_.get(), metric) {} + : IndexContext(metric), + entity_(entity), + dc_(entity_.get(), std::move(quantizer), metric) { + metric_ = metric; +} HnswContext::~HnswContext() { visit_filter_.destroy(); @@ -200,6 +211,7 @@ int HnswContext::update(const ailego::Params ¶ms) { } int HnswContext::update_context(ContextType type, const IndexMeta &meta, + zvec::turbo::Quantizer::Pointer quantizer, const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity, uint32_t magic_num) { @@ -251,7 +263,9 @@ int HnswContext::update_context(ContextType type, const IndexMeta &meta, } entity_ = entity; - dc_.update(entity_.get(), metric, meta.dimension()); + dc_.update(entity_.get(), std::move(quantizer), metric, meta.dimension(), + meta.data_type()); + metric_ = metric; magic_ = magic_num; level_topks_.clear(); diff --git a/src/core/algorithm/hnsw/hnsw_context.h b/src/core/algorithm/hnsw/hnsw_context.h index e776b81a7..e67c42f56 100644 --- a/src/core/algorithm/hnsw/hnsw_context.h +++ b/src/core/algorithm/hnsw/hnsw_context.h @@ -34,12 +34,16 @@ class HnswContext : public IndexContext { kStreamerContext = 3 }; - //! Construct - HnswContext(size_t dimension, const IndexMetric::Pointer &metric, + //! Construct with an explicit turbo quantizer (used for building the + //! internal HnswDistCalculator). + HnswContext(size_t dimension, zvec::turbo::Quantizer::Pointer quantizer, + IndexMeta::DataType qmeta_data_type, + const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity); - //! Construct - HnswContext(const IndexMetric::Pointer &metric, + //! Construct without dimension (lazy init via update_context). + HnswContext(zvec::turbo::Quantizer::Pointer quantizer, + const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity); //! Destructor @@ -113,6 +117,7 @@ class HnswContext : public IndexContext { //! Update context, the context may be shared by different searcher/streamer int update_context(ContextType type, const IndexMeta &meta, + zvec::turbo::Quantizer::Pointer quantizer, const IndexMetric::Pointer &metric, const HnswEntity::Pointer &entity, uint32_t magic_num); @@ -444,10 +449,20 @@ class HnswContext : public IndexContext { return debug_mode_; } - inline void update_dist_caculator_distance( - const IndexMetric::MatrixDistance &distance, - const IndexMetric::MatrixBatchDistance &batch_distance) { - dc_.update_distance(distance, batch_distance); + //! Swap the turbo quantizer used by the dist calculator (e.g. when + //! switching between add/search metrics). Caller must then invoke + //! reset_query before using the calculator. + inline void update_dist_caculator_quantizer( + zvec::turbo::Quantizer::Pointer quantizer) { + dc_.update_quantizer(std::move(quantizer)); + } + + //! Swap the IndexMetric fallback used by the dist calculator (e.g. when + //! switching between add/search metrics for MipsSquaredEuclidean, whose + //! query-time metric is InnerProduct). Caller must then invoke + //! reset_query before using the calculator. + inline void update_dist_caculator_metric(IndexMetric::Pointer metric) { + dc_.update_metric(std::move(metric)); } //! Get topk diff --git a/src/core/algorithm/hnsw/hnsw_dist_calculator.h b/src/core/algorithm/hnsw/hnsw_dist_calculator.h index 2e4b22d1f..803a3a822 100644 --- a/src/core/algorithm/hnsw/hnsw_dist_calculator.h +++ b/src/core/algorithm/hnsw/hnsw_dist_calculator.h @@ -13,12 +13,20 @@ // limitations under the License. #pragma once +#include #include +#include #include "hnsw_entity.h" namespace zvec { namespace core { +//! Dist calculator used by HNSW. Prefers the turbo Quantizer's +//! DistanceImpl when it is available for the current metric/dtype; +//! otherwise falls back to IndexMetric's distance / batch_distance +//! handles. This keeps HNSW functional for metric/dtype combos that +//! turbo does not yet implement (e.g. MipsSquaredEuclidean, Cosine +//! with cached norm, non-FP32 converter pipelines). class HnswDistCalculator { public: typedef std::shared_ptr Pointer; @@ -32,65 +40,113 @@ class HnswDistCalculator { }; public: - //! Constructor + //! Constructor with a turbo quantizer and an IndexMetric fallback. + //! `dim` is the dimension of the stored vectors. `qmeta_data_type` + //! is the data type of the raw query accepted by `reset_query`. HnswDistCalculator(const HnswEntity *entity, - const IndexMetric::Pointer &metric, uint32_t dim) + zvec::turbo::Quantizer::Pointer quantizer, + IndexMetric::Pointer metric, uint32_t dim, + IndexMeta::DataType qmeta_data_type) : entity_(entity), - distance_(metric->distance()), - batch_distance_(metric->batch_distance()), + quantizer_(std::move(quantizer)), + metric_(std::move(metric)), query_(nullptr), dim_(dim), - compare_cnt_(0) {} - - //! Constructor - HnswDistCalculator(const HnswEntity *entity, - const IndexMetric::Pointer &metric, uint32_t dim, - const void *query) - : entity_(entity), - distance_(metric->distance()), - batch_distance_(metric->batch_distance()), - query_(query), - dim_(dim), - compare_cnt_(0) {} + compare_cnt_(0) { + qmeta_.set_meta(qmeta_data_type, dim); + if (metric_) { + distance_ = metric_->distance(); + batch_distance_ = metric_->batch_distance(); + } + } - //! Constructor + //! Constructor without dimension (for lazy init via update()). HnswDistCalculator(const HnswEntity *entity, - const IndexMetric::Pointer &metric) + zvec::turbo::Quantizer::Pointer quantizer, + IndexMetric::Pointer metric) : entity_(entity), - distance_(metric->distance()), - batch_distance_(metric->batch_distance()), + quantizer_(std::move(quantizer)), + metric_(std::move(metric)), query_(nullptr), dim_(0), - compare_cnt_(0) {} + compare_cnt_(0) { + if (metric_) { + distance_ = metric_->distance(); + batch_distance_ = metric_->batch_distance(); + } + } - void update(const HnswEntity *entity, const IndexMetric::Pointer &metric) { + void update(const HnswEntity *entity, + zvec::turbo::Quantizer::Pointer quantizer, + IndexMetric::Pointer metric) { entity_ = entity; - distance_ = metric->distance(); - batch_distance_ = metric->batch_distance(); + quantizer_ = std::move(quantizer); + metric_ = std::move(metric); + dist_impl_ = zvec::turbo::DistanceImpl{}; + if (metric_) { + distance_ = metric_->distance(); + batch_distance_ = metric_->batch_distance(); + } else { + distance_ = nullptr; + batch_distance_ = nullptr; + } } - void update(const HnswEntity *entity, const IndexMetric::Pointer &metric, - uint32_t dim) { + void update(const HnswEntity *entity, + zvec::turbo::Quantizer::Pointer quantizer, + IndexMetric::Pointer metric, uint32_t dim, + IndexMeta::DataType qmeta_data_type) { entity_ = entity; - distance_ = metric->distance(); - batch_distance_ = metric->batch_distance(); + quantizer_ = std::move(quantizer); + metric_ = std::move(metric); dim_ = dim; + qmeta_.set_meta(qmeta_data_type, dim); + dist_impl_ = zvec::turbo::DistanceImpl{}; + if (metric_) { + distance_ = metric_->distance(); + batch_distance_ = metric_->batch_distance(); + } else { + distance_ = nullptr; + batch_distance_ = nullptr; + } + } + + //! Replace the quantizer used by this calculator. Invalidates the + //! cached DistanceImpl; caller should follow up with reset_query. + inline void update_quantizer(zvec::turbo::Quantizer::Pointer quantizer) { + quantizer_ = std::move(quantizer); + dist_impl_ = zvec::turbo::DistanceImpl{}; } - inline void update_distance( - const IndexMetric::MatrixDistance &distance, - const IndexMetric::MatrixBatchDistance &batch_distance) { - distance_ = distance; - batch_distance_ = batch_distance; + //! Replace the IndexMetric fallback. + inline void update_metric(IndexMetric::Pointer metric) { + metric_ = std::move(metric); + if (metric_) { + distance_ = metric_->distance(); + batch_distance_ = metric_->batch_distance(); + } else { + distance_ = nullptr; + batch_distance_ = nullptr; + } } - //! Reset query vector data + //! Reset query vector data. Quantizes the query via the turbo + //! quantizer and caches a DistanceImpl for subsequent `dist(...)` + //! calls. Falls back to IndexMetric's raw query when turbo does not + //! support this metric/dtype combination. inline void reset_query(const void *query) { error_ = false; query_ = query; + if (quantizer_) { + dist_impl_ = quantizer_->distance(query, qmeta_); + } else { + dist_impl_ = zvec::turbo::DistanceImpl{}; + } } - //! Returns distance + //! Returns distance between two already-quantized vectors (pairwise). + //! Uses the scalar DistanceFunc bound by the last reset_query when + //! available; otherwise falls back to IndexMetric. inline dist_t dist(const void *vec_lhs, const void *vec_rhs) { if (ailego_unlikely(vec_lhs == nullptr || vec_rhs == nullptr)) { LOG_ERROR("Nullptr of dense vector"); @@ -98,18 +154,43 @@ class HnswDistCalculator { return 0.0f; } - float score{0.0f}; - + float score = 0.0f; + const auto &func = dist_impl_.func(); + if (func) { + // dist_impl_ holds the RAW dim expected by the turbo distance + // function. The metric-side dim_ is the inflated storage dim and + // would point past the data into the per-record extras. + func(vec_lhs, vec_rhs, dist_impl_.dim(), &score); + return score; + } + if (ailego_unlikely(!distance_)) { + LOG_ERROR("No distance handle available"); + error_ = true; + return 0.0f; + } distance_(vec_lhs, vec_rhs, dim_, &score); - return score; } //! Returns distance between query and vec. inline dist_t dist(const void *vec) { compare_cnt_++; - - return dist(vec, query_); + if (ailego_unlikely(vec == nullptr)) { + LOG_ERROR("Nullptr of dense vector"); + error_ = true; + return 0.0f; + } + if (dist_impl_.valid()) { + return dist_impl_(vec); + } + if (ailego_unlikely(!distance_ || query_ == nullptr)) { + LOG_ERROR("No distance handle or query available"); + error_ = true; + return 0.0f; + } + float score = 0.0f; + distance_(vec, query_, dim_, &score); + return score; } //! Return distance between query and node id. @@ -128,15 +209,23 @@ class HnswDistCalculator { error_ = true; return 0.0f; } - - return dist(feat, query_); + if (dist_impl_.valid()) { + return dist_impl_(feat); + } + if (ailego_unlikely(!distance_ || query_ == nullptr)) { + LOG_ERROR("No distance handle or query available"); + error_ = true; + return 0.0f; + } + float score = 0.0f; + distance_(feat, query_, dim_, &score); + return score; } //! Return dist node lhs between node rhs inline dist_t dist(node_id_t lhs, node_id_t rhs) { compare_cnt_++; - IndexStorage::MemoryBlock vec_block_feat; int ret = entity_->get_vector(lhs, vec_block_feat); if (ailego_unlikely(ret != 0)) { @@ -177,8 +266,19 @@ class HnswDistCalculator { void batch_dist(const void **vecs, size_t num, dist_t *distances) { compare_cnt_++; - - batch_distance_(vecs, query_, num, dim_, distances); + if (dist_impl_.batch_valid()) { + dist_impl_.batch(vecs, num, distances); + return; + } + if (batch_distance_ && query_ != nullptr) { + batch_distance_(vecs, query_, num, dim_, distances); + return; + } + // Last-resort scalar fallback using whatever single-distance path + // is available. + for (size_t i = 0; i < num; ++i) { + distances[i] = dist(vecs[i]); + } } inline dist_t batch_dist(node_id_t id) { @@ -197,10 +297,19 @@ class HnswDistCalculator { error_ = true; return 0.0f; } - dist_t score = 0; - batch_distance_(&feat, query_, 1, dim_, &score); - - return score; + if (dist_impl_.batch_valid()) { + dist_t score = 0; + const void *feats[1] = {feat}; + dist_impl_.batch(feats, 1, &score); + return score; + } + if (batch_distance_ && query_ != nullptr) { + dist_t score = 0; + const void *feats[1] = {feat}; + batch_distance_(feats, query_, 1, dim_, &score); + return score; + } + return dist(feat); } inline void clear() { @@ -225,6 +334,12 @@ class HnswDistCalculator { return dim_; } + //! Expose the underlying turbo quantizer (for clients that need to + //! reach lower-level turbo APIs). + inline const zvec::turbo::Quantizer::Pointer &quantizer() const { + return quantizer_; + } + private: HnswDistCalculator(const HnswDistCalculator &) = delete; HnswDistCalculator &operator=(const HnswDistCalculator &) = delete; @@ -232,14 +347,18 @@ class HnswDistCalculator { private: const HnswEntity *entity_; - IndexMetric::MatrixDistance distance_; - IndexMetric::MatrixBatchDistance batch_distance_; + zvec::turbo::Quantizer::Pointer quantizer_{}; + IndexMetric::Pointer metric_{}; + zvec::turbo::DistanceImpl dist_impl_{}; + IndexQueryMeta qmeta_{}; + + IndexMetric::MatrixDistance distance_{nullptr}; + IndexMetric::MatrixBatchDistance batch_distance_{nullptr}; const void *query_; uint32_t dim_; uint32_t compare_cnt_; // record distance compute times - // uint32_t compare_cnt_batch_; // record batch distance compute time bool error_{false}; }; diff --git a/src/core/algorithm/hnsw/hnsw_params.h b/src/core/algorithm/hnsw/hnsw_params.h index 4caa148d5..4d1309a0f 100644 --- a/src/core/algorithm/hnsw/hnsw_params.h +++ b/src/core/algorithm/hnsw/hnsw_params.h @@ -111,5 +111,10 @@ static const std::string PARAM_HNSW_REDUCER_EFCONSTRUCTION( static const std::string PARAM_HNSW_STREAMER_USE_CONTIGUOUS_MEMORY( "proxima.hnsw.streamer.use_contiguous_memory"); +//! Turbo quantizer class name used by HnswStreamer. Defaults to +//! "Fp32Quantizer" to preserve the legacy FP32 distance path. +static const std::string PARAM_HNSW_STREAMER_TURBO_QUANTIZER_CLASS( + "proxima.hnsw.streamer.turbo_quantizer_class"); + } // namespace core } // namespace zvec diff --git a/src/core/algorithm/hnsw/hnsw_streamer.cc b/src/core/algorithm/hnsw/hnsw_streamer.cc index 935cae5d4..1830f52e7 100644 --- a/src/core/algorithm/hnsw/hnsw_streamer.cc +++ b/src/core/algorithm/hnsw/hnsw_streamer.cc @@ -21,6 +21,7 @@ #include "hnsw_context.h" #include "hnsw_dist_calculator.h" #include "hnsw_index_provider.h" +#include "hnsw_params.h" namespace zvec { namespace core { @@ -71,6 +72,13 @@ int HnswStreamer::init(const IndexMeta &imeta, const ailego::Params ¶ms) { params.get(PARAM_HNSW_STREAMER_USE_CONTIGUOUS_MEMORY, &use_contiguous_memory_); + turbo_quantizer_class_ = "Fp32Quantizer"; + params.get(PARAM_HNSW_STREAMER_TURBO_QUANTIZER_CLASS, + &turbo_quantizer_class_); + if (turbo_quantizer_class_.empty()) { + turbo_quantizer_class_ = "Fp32Quantizer"; + } + params.get(PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_); if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) { LOG_ERROR("[%s] must be >= [%s]", @@ -183,6 +191,8 @@ int HnswStreamer::cleanup(void) { meta_.clear(); metric_.reset(); + add_quantizer_.reset(); + search_quantizer_.reset(); stats_.clear(); if (entity_) { entity_->cleanup(); @@ -314,16 +324,36 @@ int HnswStreamer::open(IndexStorage::Pointer stg) { return IndexError_InvalidArgument; } - add_distance_ = metric_->distance(); - add_batch_distance_ = metric_->batch_distance(); - - search_distance_ = add_distance_; - search_batch_distance_ = add_batch_distance_; - + // Create and initialize the turbo quantizer used by HnswDistCalculator. + add_quantizer_ = IndexFactory::CreateQuantizer(turbo_quantizer_class_); + if (!add_quantizer_) { + LOG_ERROR("Failed to create turbo quantizer '%s'", + turbo_quantizer_class_.c_str()); + return IndexError_NoExist; + } + ret = add_quantizer_->init(meta_, meta_.streamer_params()); + if (ret != 0) { + LOG_ERROR("Failed to init turbo quantizer '%s', ret=%d", + turbo_quantizer_class_.c_str(), ret); + return ret; + } + // Default: use the same quantizer for search. When the underlying + // metric exposes a query-side variant (e.g. MipsSquaredEuclidean) we + // still keep the add_quantizer_ as a conservative choice here. Any + // specialized handling can be layered on top later. + search_quantizer_ = add_quantizer_; + + // Resolve the search-side metric. For metrics like MipsSquaredEuclidean + // the index distance (used while building the graph) is not the same as + // the user-facing query distance: the metric exposes a `query_metric` + // (e.g. InnerProduct) which should be used at search time so that the + // top-k results reflect the intended ranking. Fall back to `metric_` + // when no usable query metric is provided. if (metric_->query_metric() && metric_->query_metric()->distance() && metric_->query_metric()->batch_distance()) { - search_distance_ = metric_->query_metric()->distance(); - search_batch_distance_ = metric_->query_metric()->batch_distance(); + search_metric_ = metric_->query_metric(); + } else { + search_metric_ = metric_; } // Create algorithm based on entity storage mode @@ -410,8 +440,8 @@ IndexStreamer::Context::Pointer HnswStreamer::create_context(void) const { LOG_ERROR("CreateContext clone init failed"); return Context::Pointer(); } - HnswContext *ctx = - new (std::nothrow) HnswContext(meta_.dimension(), metric_, entity); + HnswContext *ctx = new (std::nothrow) HnswContext( + meta_.dimension(), add_quantizer_, meta_.data_type(), metric_, entity); if (ailego_unlikely(ctx == nullptr)) { LOG_ERROR("Failed to new HnswContext"); return Context::Pointer(); @@ -465,8 +495,8 @@ int HnswStreamer::update_context(HnswContext *ctx) const { ctx->set_min_scan_limit(min_scan_limit_); ctx->set_max_scan_ratio(max_scan_ratio_); ctx->set_bruteforce_threshold(bruteforce_threshold_); - return ctx->update_context(HnswContext::kStreamerContext, meta_, metric_, - entity, magic_); + return ctx->update_context(HnswContext::kStreamerContext, meta_, + add_quantizer_, metric_, entity, magic_); } //! Add a vector with id into index @@ -511,7 +541,8 @@ int HnswStreamer::add_with_id_impl(uint32_t id, const void *query, AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); }); ctx->clear(); - ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_); + ctx->update_dist_caculator_quantizer(add_quantizer_); + ctx->update_dist_caculator_metric(metric_); ctx->reset_query(query); ctx->check_need_adjuct_ctx(entity_->doc_cnt()); @@ -591,7 +622,8 @@ int HnswStreamer::add_impl(uint64_t pkey, const void *query, AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); }); ctx->clear(); - ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_); + ctx->update_dist_caculator_quantizer(add_quantizer_); + ctx->update_dist_caculator_metric(metric_); ctx->reset_query(query); ctx->check_need_adjuct_ctx(entity_->doc_cnt()); @@ -663,7 +695,8 @@ int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta, } ctx->clear(); - ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_); + ctx->update_dist_caculator_quantizer(search_quantizer_); + ctx->update_dist_caculator_metric(search_metric_); ctx->resize_results(count); ctx->check_need_adjuct_ctx(entity_->doc_cnt()); for (size_t q = 0; q < count; ++q) { @@ -733,7 +766,8 @@ int HnswStreamer::search_bf_impl( } ctx->clear(); - ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_); + ctx->update_dist_caculator_quantizer(search_quantizer_); + ctx->update_dist_caculator_metric(search_metric_); ctx->resize_results(count); if (ctx->group_by_search()) { @@ -827,7 +861,8 @@ int HnswStreamer::search_bf_by_p_keys_impl( } ctx->clear(); - ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_); + ctx->update_dist_caculator_quantizer(search_quantizer_); + ctx->update_dist_caculator_metric(search_metric_); ctx->resize_results(count); if (ctx->group_by_search()) { diff --git a/src/core/algorithm/hnsw/hnsw_streamer.h b/src/core/algorithm/hnsw/hnsw_streamer.h index 3f4511ab1..8aaea0cd2 100644 --- a/src/core/algorithm/hnsw/hnsw_streamer.h +++ b/src/core/algorithm/hnsw/hnsw_streamer.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include #include "hnsw_algorithm.h" #include "hnsw_streamer_entity.h" @@ -199,12 +200,19 @@ class HnswStreamer : public IndexStreamer { HnswAlgorithmBase::UPointer alg_; IndexMeta meta_{}; IndexMetric::Pointer metric_{}; - - IndexMetric::MatrixDistance add_distance_{}; - IndexMetric::MatrixDistance search_distance_{}; - - IndexMetric::MatrixBatchDistance add_batch_distance_{}; - IndexMetric::MatrixBatchDistance search_batch_distance_{}; + //! Search-side metric, used as fallback when the search-side turbo + //! quantizer does not implement a distance for the current metric/dtype + //! (e.g. MipsSquaredEuclidean's query_metric is InnerProduct). + IndexMetric::Pointer search_metric_{}; + + //! Turbo quantizers bound to this streamer. `add_quantizer_` is used + //! when inserting vectors (mirrors the old `metric_->distance()`). + //! `search_quantizer_` is used for queries and falls back to + //! `add_quantizer_` when the metric does not expose a query-side + //! variant. + zvec::turbo::Quantizer::Pointer add_quantizer_{}; + zvec::turbo::Quantizer::Pointer search_quantizer_{}; + std::string turbo_quantizer_class_{}; Stats stats_{}; std::mutex mutex_{}; diff --git a/src/core/framework/index_factory.cc b/src/core/framework/index_factory.cc index 69fe0e98d..e93f57bc7 100644 --- a/src/core/framework/index_factory.cc +++ b/src/core/framework/index_factory.cc @@ -257,5 +257,18 @@ std::vector IndexFactory::AllRefiners(void) { return ailego::Factory::Classes(); } +std::shared_ptr IndexFactory::CreateQuantizer( + const std::string &name) { + return ailego::Factory::MakeShared(name.c_str()); +} + +bool IndexFactory::HasQuantizer(const std::string &name) { + return ailego::Factory::Has(name.c_str()); +} + +std::vector IndexFactory::AllQuantizers(void) { + return ailego::Factory::Classes(); +} + } // namespace core } // namespace zvec diff --git a/src/core/framework/index_meta.cc b/src/core/framework/index_meta.cc index 11d54cb63..d0eadb02d 100644 --- a/src/core/framework/index_meta.cc +++ b/src/core/framework/index_meta.cc @@ -30,7 +30,8 @@ struct IndexMetaFormatHeader { uint32_t space_id; uint32_t attachment_offset; uint32_t attachment_size; - uint8_t reserved_[4092]; + uint32_t extra_meta_size; + uint8_t reserved_[4088]; }; static_assert(sizeof(IndexMetaFormatHeader) % 32 == 0, @@ -47,6 +48,7 @@ void IndexMeta::serialize(std::string *out) const { format.dimension = dimension_; format.unit_size = unit_size_; format.space_id = space_id_; + format.extra_meta_size = extra_meta_size_; if (!metric_name_.empty()) { ailego::Params item; diff --git a/src/core/metric/quantized_integer_metric.cc b/src/core/metric/quantized_integer_metric.cc index e4db83146..6fa492d75 100644 --- a/src/core/metric/quantized_integer_metric.cc +++ b/src/core/metric/quantized_integer_metric.cc @@ -98,22 +98,41 @@ class QuantizedIntegerMetric : public IndexMetric { if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { auto turbo_ret = turbo::get_distance_func( turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8, - turbo::QuantizeType::kDefault); + static_cast(quantize_type_)); if (turbo_ret && m == 1 && n == 1) { - return turbo_ret; + return wrap_turbo_distance(std::move(turbo_ret)); } + return DistanceMatrixCompute(m, n); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_distance_func( + turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret && m == 1 && n == 1) { + return wrap_turbo_distance(std::move(turbo_ret)); + } return DistanceMatrixCompute(m, n); } break; case MetricType::kInnerProduct: if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { + auto turbo_ret = turbo::get_distance_func( + turbo::MetricType::kInnerProduct, turbo::DataType::kInt8, + static_cast(quantize_type_)); + if (turbo_ret && m == 1 && n == 1) { + return wrap_turbo_distance(std::move(turbo_ret)); + } return DistanceMatrixCompute(m, n); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_distance_func( + turbo::MetricType::kInnerProduct, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret && m == 1 && n == 1) { + return wrap_turbo_distance(std::move(turbo_ret)); + } return DistanceMatrixCompute(m, n); } break; @@ -139,13 +158,19 @@ class QuantizedIntegerMetric : public IndexMetric { if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { auto turbo_ret = turbo::get_distance_func( turbo::MetricType::kCosine, turbo::DataType::kInt8, - turbo::QuantizeType::kDefault); + static_cast(quantize_type_)); if (turbo_ret) { - return turbo_ret; + return wrap_turbo_distance(std::move(turbo_ret)); } return DistanceMatrixCompute(m, n); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_distance_func( + turbo::MetricType::kCosine, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret) { + return wrap_turbo_distance(std::move(turbo_ret)); + } return DistanceMatrixCompute(m, n); } break; @@ -160,15 +185,21 @@ class QuantizedIntegerMetric : public IndexMetric { if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { auto turbo_ret = turbo::get_batch_distance_func( turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8, - turbo::QuantizeType::kDefault); + static_cast(quantize_type_)); if (turbo_ret) { - return turbo_ret; + return wrap_turbo_batch_distance(std::move(turbo_ret)); } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized::ComputeBatch); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_batch_distance_func( + turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret) { + return wrap_turbo_batch_distance(std::move(turbo_ret)); + } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized::ComputeBatch); @@ -177,11 +208,23 @@ class QuantizedIntegerMetric : public IndexMetric { case MetricType::kInnerProduct: if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { + auto turbo_ret = turbo::get_batch_distance_func( + turbo::MetricType::kInnerProduct, turbo::DataType::kInt8, + static_cast(quantize_type_)); + if (turbo_ret) { + return wrap_turbo_batch_distance(std::move(turbo_ret)); + } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized::ComputeBatch); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_batch_distance_func( + turbo::MetricType::kInnerProduct, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret) { + return wrap_turbo_batch_distance(std::move(turbo_ret)); + } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized::ComputeBatch); @@ -215,15 +258,21 @@ class QuantizedIntegerMetric : public IndexMetric { if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { auto turbo_ret = turbo::get_batch_distance_func( turbo::MetricType::kCosine, turbo::DataType::kInt8, - turbo::QuantizeType::kDefault); + static_cast(quantize_type_)); if (turbo_ret) { - return turbo_ret; + return wrap_turbo_batch_distance(std::move(turbo_ret)); } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized< CosineMinusInnerProduct, int8_t, 12, 2>::ComputeBatch); } if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + auto turbo_ret = turbo::get_batch_distance_func( + turbo::MetricType::kCosine, turbo::DataType::kInt4, + static_cast(quantize_type_)); + if (turbo_ret) { + return wrap_turbo_batch_distance(std::move(turbo_ret)); + } return reinterpret_cast( BaseDistanceBatchWithScoreUnquantized< CosineMinusInnerProduct, uint8_t, 12, 2>::ComputeBatch); @@ -290,7 +339,12 @@ class QuantizedIntegerMetric : public IndexMetric { turbo::MetricType::kCosine, turbo::DataType::kInt8, turbo::QuantizeType::kDefault); if (turbo_ret) { - return turbo_ret; + // Turbo's batch distance function preprocesses the query internally + // (per-call, into a thread-local buffer) so the single-distance path + // can keep receiving raw int8 queries. Return nullptr here to avoid + // a global shift that would corrupt the symmetric single-distance + // contract used by node-vs-node calls. + return nullptr; } return CosineMinusInnerProductDistanceBatchWithScoreUnquantized< int8_t, 1, 1>::GetQueryPreprocessFunc(); @@ -300,7 +354,8 @@ class QuantizedIntegerMetric : public IndexMetric { turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8, turbo::QuantizeType::kDefault); if (turbo_ret) { - return turbo_ret; + // See comment above: turbo handles query preprocessing internally. + return nullptr; } return SquaredEuclideanDistanceBatchWithScoreUnquantized< int8_t, 1, 1>::GetQueryPreprocessFunc(); @@ -310,6 +365,48 @@ class QuantizedIntegerMetric : public IndexMetric { private: + //! Extras embedded in each quantized record by the converter/reformer. + //! The HnswStreamer (and friends) inflate the meta dimension by these + //! "extra" units so element_size() reflects per-vector storage. Turbo + //! distance funcs expect the *raw* original dim, so we need to subtract. + //! + //! Layouts: + //! - IntegerStreamingReformer (IP/L2): + //! INT8: data + 20 bytes extras (extra_units = 20) + //! INT4: data + 32 nibbles extras (extra_units = 32 == 16 bytes) + //! - CosineConverter (Cosine): + //! INT8: data + 20 bytes extras + 4 bytes norm (extra_units = 24) + //! INT4: data + 32 nibbles extras + 8 nibbles norm (extra_units = 40) + size_t extra_dim() const { + bool is_cosine = (origin_metric_type_ == MetricType::kCosine || + origin_metric_type_ == MetricType::kNormalizedCosine); + if (meta_.data_type() == IndexMeta::DataType::DT_INT8) { + return is_cosine ? 24 : 20; + } + if (meta_.data_type() == IndexMeta::DataType::DT_INT4) { + return is_cosine ? 40 : 32; + } + return 0; + } + + //! Wrap a turbo distance function so callers can keep passing the inflated + //! dim from IndexMeta::dimension(); turbo expects the raw original dim. + MatrixDistance wrap_turbo_distance(turbo::DistanceFunc f) const { + size_t extra = extra_dim(); + return [f = std::move(f), extra](const void *m, const void *q, size_t dim, + float *out) { f(m, q, dim - extra, out); }; + } + + //! Wrap a turbo batch distance function with the same dim adjustment. + MatrixBatchDistance wrap_turbo_batch_distance( + turbo::BatchDistanceFunc f) const { + size_t extra = extra_dim(); + return [f = std::move(f), extra](const void **m, const void *q, size_t num, + size_t dim, float *out) { + f(m, q, num, dim - extra, out); + }; + } + //! Returns m x n distance matrix compute function. template