Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/db/index/segment/segment_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ Status SegmentHelper::ReduceVectorIndex(

output_block_metas->push_back(new_block_meta);
} else {
auto vector_index_path = FileHelper::MakeQuantizeVectorIndexPath(
auto vector_index_path = FileHelper::MakeVectorIndexPath(
output_segment_path, field->name(), vector_block_id);

auto field_without_quantize = std::make_shared<FieldSchema>(*field);
Expand Down Expand Up @@ -697,6 +697,9 @@ Status SegmentHelper::ReduceVectorIndex(
new_block_meta.set_id(vector_block_id);
new_block_meta.set_type(BlockType::VECTOR_INDEX);
new_block_meta.set_columns({field->name()});
new_block_meta.set_min_doc_id(min_doc_id);
new_block_meta.set_max_doc_id(max_doc_id);
new_block_meta.set_doc_count(doc_count);
output_block_metas->push_back(new_block_meta);

// create quantize index
Expand Down Expand Up @@ -731,6 +734,9 @@ Status SegmentHelper::ReduceVectorIndex(
new_block_meta.set_id(vector_quan_block_id);
new_block_meta.set_type(BlockType::VECTOR_INDEX_QUANTIZE);
new_block_meta.set_columns({field->name()});
new_block_meta.set_min_doc_id(min_doc_id);
new_block_meta.set_max_doc_id(max_doc_id);
new_block_meta.set_doc_count(doc_count);
output_block_metas->push_back(new_block_meta);
}
}
Expand Down
142 changes: 80 additions & 62 deletions tests/db/collection_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2588,82 +2588,99 @@ TEST_F(CollectionTest, Feature_Optimize_General) {
}

TEST_F(CollectionTest, Feature_Optimize_Repeated) {
int doc_count = 1000;
auto func = [&](QuantizeType quantize_type = QuantizeType::UNDEFINED) {
FileHelper::RemoveDirectory(col_path);

// create empty collection
auto schema = TestHelper::CreateSchemaWithVectorIndex();
auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
auto collection = TestHelper::CreateCollectionWithDoc(
col_path, *schema, options, 0, doc_count, false);
int doc_count = 1000;

auto check_doc = [&]() {
for (int i = 0; i < doc_count; i++) {
auto expect_doc = TestHelper::CreateDoc(i, *schema);
auto result = collection->Fetch({expect_doc.pk()});
ASSERT_TRUE(result.has_value());
ASSERT_EQ(result.value().size(), 1);
ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
auto doc = result.value()[expect_doc.pk()];
if (doc == nullptr) {
std::cout << "doc is null, pk: " << expect_doc.pk() << std::endl;
}
ASSERT_NE(doc, nullptr);
if (*doc != expect_doc) {
std::cout << " doc:" << doc->to_detail_string() << std::endl;
std::cout << "expect_doc:" << expect_doc.to_detail_string()
<< std::endl;
}
ASSERT_EQ(*doc, expect_doc);
// create empty collection
CollectionSchema::Ptr schema;
if (quantize_type == QuantizeType::UNDEFINED) {
schema = TestHelper::CreateSchemaWithVectorIndex();
} else {
schema = TestHelper::CreateSchemaWithVectorIndex(
false, "demo",
std::make_shared<HnswIndexParams>(MetricType::IP, 16, 200,
quantize_type));
}
};

check_doc();
std::cout << "check success 1" << std::endl;
auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
auto collection = TestHelper::CreateCollectionWithDoc(
col_path, *schema, options, 0, doc_count, false);

ASSERT_TRUE(collection->Flush().ok());
auto stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);
auto check_doc = [&]() {
for (int i = 0; i < doc_count; i++) {
auto expect_doc = TestHelper::CreateDoc(i, *schema);
auto result = collection->Fetch({expect_doc.pk()});
ASSERT_TRUE(result.has_value());
ASSERT_EQ(result.value().size(), 1);
ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
auto doc = result.value()[expect_doc.pk()];
if (doc == nullptr) {
std::cout << "doc is null, pk: " << expect_doc.pk() << std::endl;
}
ASSERT_NE(doc, nullptr);
if (*doc != expect_doc) {
std::cout << " doc:" << doc->to_detail_string() << std::endl;
std::cout << "expect_doc:" << expect_doc.to_detail_string()
<< std::endl;
}
ASSERT_EQ(*doc, expect_doc);
}
};

auto s = collection->Optimize();
ASSERT_TRUE(s.ok());
stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
check_doc();
std::cout << "check success 1" << std::endl;

int loop_count = 10;
uint64_t start_doc_id = doc_count;
for (int i = 0; i < loop_count; i++) {
std::cout << "loop: " << i << " begin" << std::endl;
ASSERT_TRUE(collection->Flush().ok());
auto stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

s = TestHelper::CollectionInsertDoc(collection, start_doc_id,
start_doc_id + 1);
auto s = collection->Optimize();
ASSERT_TRUE(s.ok());

stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count + i + 1);
ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"],
1.0 * (doc_count + i) / (doc_count + i + 1));
ASSERT_EQ(stats.doc_count, doc_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

int loop_count = 10;
uint64_t start_doc_id = doc_count;
for (int i = 0; i < loop_count; i++) {
std::cout << "loop: " << i << " begin" << std::endl;

s = collection->Optimize();
if (!s.ok()) {
std::cout << "optimize failed: " << s.message() << std::endl;
}
ASSERT_TRUE(s.ok());
s = TestHelper::CollectionInsertDoc(collection, start_doc_id,
start_doc_id + 1);
ASSERT_TRUE(s.ok());

start_doc_id += 1;
stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count + i + 1);
ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"],
1.0 * (doc_count + i) / (doc_count + i + 1));

std::cout << "loop: " << i << " end" << std::endl;
}

stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count + loop_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
s = collection->Optimize();
if (!s.ok()) {
std::cout << "optimize failed: " << s.message() << std::endl;
}
ASSERT_TRUE(s.ok());

doc_count += loop_count;
check_doc();
std::cout << "check success 2" << std::endl;
start_doc_id += 1;

std::cout << "loop: " << i << " end" << std::endl;
}

stats = collection->Stats().value();
ASSERT_EQ(stats.doc_count, doc_count + loop_count);
ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

doc_count += loop_count;
check_doc();
std::cout << "check success 2" << std::endl;
};

// unquantized
func();
// quantized
func(QuantizeType::FP16);
}

TEST_F(CollectionTest, Feature_Optimize_MetricType) {
Expand Down Expand Up @@ -4346,7 +4363,8 @@ TEST_F(CollectionTest, Feature_Query_NullableFilter_WithoutIndex) {
auto run_test = [&](bool with_scalar_index) {
FileHelper::RemoveDirectory(col_path);
IndexParams::Ptr scalar_idx =
with_scalar_index ? std::make_shared<InvertIndexParams>(false) : nullptr;
with_scalar_index ? std::make_shared<InvertIndexParams>(false)
: nullptr;
auto schema =
TestHelper::CreateNormalSchema(/*nullable=*/true, "demo", scalar_idx);
CollectionOptions options{false, true, 100 * 1024 * 1024};
Expand Down
Loading