diff --git a/src/db/index/segment/segment_helper.cc b/src/db/index/segment/segment_helper.cc index 2c1038f2c..86852d6bb 100644 --- a/src/db/index/segment/segment_helper.cc +++ b/src/db/index/segment/segment_helper.cc @@ -655,7 +655,7 @@ Status SegmentHelper::ReduceVectorIndex( output_block_metas->push_back(new_block_meta); } else { - auto vector_index_path = FileHelper::MakeQuantizeVectorIndexPath( + auto vector_index_path = FileHelper::MakeVectorIndexPath( output_segment_path, field->name(), vector_block_id); auto field_without_quantize = std::make_shared(*field); @@ -697,6 +697,9 @@ Status SegmentHelper::ReduceVectorIndex( new_block_meta.set_id(vector_block_id); new_block_meta.set_type(BlockType::VECTOR_INDEX); new_block_meta.set_columns({field->name()}); + new_block_meta.set_min_doc_id(min_doc_id); + new_block_meta.set_max_doc_id(max_doc_id); + new_block_meta.set_doc_count(doc_count); output_block_metas->push_back(new_block_meta); // create quantize index @@ -731,6 +734,9 @@ Status SegmentHelper::ReduceVectorIndex( new_block_meta.set_id(vector_quan_block_id); new_block_meta.set_type(BlockType::VECTOR_INDEX_QUANTIZE); new_block_meta.set_columns({field->name()}); + new_block_meta.set_min_doc_id(min_doc_id); + new_block_meta.set_max_doc_id(max_doc_id); + new_block_meta.set_doc_count(doc_count); output_block_metas->push_back(new_block_meta); } } diff --git a/tests/db/collection_test.cc b/tests/db/collection_test.cc index 8738895e3..974d2fed2 100644 --- a/tests/db/collection_test.cc +++ b/tests/db/collection_test.cc @@ -2588,82 +2588,99 @@ TEST_F(CollectionTest, Feature_Optimize_General) { } TEST_F(CollectionTest, Feature_Optimize_Repeated) { - int doc_count = 1000; + auto func = [&](QuantizeType quantize_type = QuantizeType::UNDEFINED) { + FileHelper::RemoveDirectory(col_path); - // create empty collection - auto schema = TestHelper::CreateSchemaWithVectorIndex(); - auto options = CollectionOptions{false, true, 64 * 1024 * 1024}; - auto collection = TestHelper::CreateCollectionWithDoc( - col_path, *schema, options, 0, doc_count, false); + int doc_count = 1000; - auto check_doc = [&]() { - for (int i = 0; i < doc_count; i++) { - auto expect_doc = TestHelper::CreateDoc(i, *schema); - auto result = collection->Fetch({expect_doc.pk()}); - ASSERT_TRUE(result.has_value()); - ASSERT_EQ(result.value().size(), 1); - ASSERT_EQ(result.value().count(expect_doc.pk()), 1); - auto doc = result.value()[expect_doc.pk()]; - if (doc == nullptr) { - std::cout << "doc is null, pk: " << expect_doc.pk() << std::endl; - } - ASSERT_NE(doc, nullptr); - if (*doc != expect_doc) { - std::cout << " doc:" << doc->to_detail_string() << std::endl; - std::cout << "expect_doc:" << expect_doc.to_detail_string() - << std::endl; - } - ASSERT_EQ(*doc, expect_doc); + // create empty collection + CollectionSchema::Ptr schema; + if (quantize_type == QuantizeType::UNDEFINED) { + schema = TestHelper::CreateSchemaWithVectorIndex(); + } else { + schema = TestHelper::CreateSchemaWithVectorIndex( + false, "demo", + std::make_shared(MetricType::IP, 16, 200, + quantize_type)); } - }; - - check_doc(); - std::cout << "check success 1" << std::endl; + auto options = CollectionOptions{false, true, 64 * 1024 * 1024}; + auto collection = TestHelper::CreateCollectionWithDoc( + col_path, *schema, options, 0, doc_count, false); - ASSERT_TRUE(collection->Flush().ok()); - auto stats = collection->Stats().value(); - ASSERT_EQ(stats.doc_count, doc_count); - ASSERT_EQ(stats.index_completeness["dense_fp32"], 0); + auto check_doc = [&]() { + for (int i = 0; i < doc_count; i++) { + auto expect_doc = TestHelper::CreateDoc(i, *schema); + auto result = collection->Fetch({expect_doc.pk()}); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value().size(), 1); + ASSERT_EQ(result.value().count(expect_doc.pk()), 1); + auto doc = result.value()[expect_doc.pk()]; + if (doc == nullptr) { + std::cout << "doc is null, pk: " << expect_doc.pk() << std::endl; + } + ASSERT_NE(doc, nullptr); + if (*doc != expect_doc) { + std::cout << " doc:" << doc->to_detail_string() << std::endl; + std::cout << "expect_doc:" << expect_doc.to_detail_string() + << std::endl; + } + ASSERT_EQ(*doc, expect_doc); + } + }; - auto s = collection->Optimize(); - ASSERT_TRUE(s.ok()); - stats = collection->Stats().value(); - ASSERT_EQ(stats.doc_count, doc_count); - ASSERT_EQ(stats.index_completeness["dense_fp32"], 1); + check_doc(); + std::cout << "check success 1" << std::endl; - int loop_count = 10; - uint64_t start_doc_id = doc_count; - for (int i = 0; i < loop_count; i++) { - std::cout << "loop: " << i << " begin" << std::endl; + ASSERT_TRUE(collection->Flush().ok()); + auto stats = collection->Stats().value(); + ASSERT_EQ(stats.doc_count, doc_count); + ASSERT_EQ(stats.index_completeness["dense_fp32"], 0); - s = TestHelper::CollectionInsertDoc(collection, start_doc_id, - start_doc_id + 1); + auto s = collection->Optimize(); ASSERT_TRUE(s.ok()); - stats = collection->Stats().value(); - ASSERT_EQ(stats.doc_count, doc_count + i + 1); - ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], - 1.0 * (doc_count + i) / (doc_count + i + 1)); + ASSERT_EQ(stats.doc_count, doc_count); + ASSERT_EQ(stats.index_completeness["dense_fp32"], 1); + int loop_count = 10; + uint64_t start_doc_id = doc_count; + for (int i = 0; i < loop_count; i++) { + std::cout << "loop: " << i << " begin" << std::endl; - s = collection->Optimize(); - if (!s.ok()) { - std::cout << "optimize failed: " << s.message() << std::endl; - } - ASSERT_TRUE(s.ok()); + s = TestHelper::CollectionInsertDoc(collection, start_doc_id, + start_doc_id + 1); + ASSERT_TRUE(s.ok()); - start_doc_id += 1; + stats = collection->Stats().value(); + ASSERT_EQ(stats.doc_count, doc_count + i + 1); + ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], + 1.0 * (doc_count + i) / (doc_count + i + 1)); - std::cout << "loop: " << i << " end" << std::endl; - } - stats = collection->Stats().value(); - ASSERT_EQ(stats.doc_count, doc_count + loop_count); - ASSERT_EQ(stats.index_completeness["dense_fp32"], 1); + s = collection->Optimize(); + if (!s.ok()) { + std::cout << "optimize failed: " << s.message() << std::endl; + } + ASSERT_TRUE(s.ok()); - doc_count += loop_count; - check_doc(); - std::cout << "check success 2" << std::endl; + start_doc_id += 1; + + std::cout << "loop: " << i << " end" << std::endl; + } + + stats = collection->Stats().value(); + ASSERT_EQ(stats.doc_count, doc_count + loop_count); + ASSERT_EQ(stats.index_completeness["dense_fp32"], 1); + + doc_count += loop_count; + check_doc(); + std::cout << "check success 2" << std::endl; + }; + + // unquantized + func(); + // quantized + func(QuantizeType::FP16); } TEST_F(CollectionTest, Feature_Optimize_MetricType) { @@ -4346,7 +4363,8 @@ TEST_F(CollectionTest, Feature_Query_NullableFilter_WithoutIndex) { auto run_test = [&](bool with_scalar_index) { FileHelper::RemoveDirectory(col_path); IndexParams::Ptr scalar_idx = - with_scalar_index ? std::make_shared(false) : nullptr; + with_scalar_index ? std::make_shared(false) + : nullptr; auto schema = TestHelper::CreateNormalSchema(/*nullable=*/true, "demo", scalar_idx); CollectionOptions options{false, true, 100 * 1024 * 1024};