-
Notifications
You must be signed in to change notification settings - Fork 554
feat: integrate OMEGA adaptive early termination into zvec #301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ebd71b0
d0e6be3
7890bcd
ccbb36b
4fbdbd4
65ee973
4a19996
3d6cb13
87cc148
607b17c
9f38ea2
4f982d0
7e0042b
8640773
f770de3
347bcf4
bb8c5a2
55875cd
5a195f0
53e0b55
420b4c4
27fda47
543d16d
9d750ff
308d8ba
e548017
5300d35
4871eae
5df2d5a
4078546
acf2a83
c03a460
66f6533
16e2fc4
4f66406
c5ce7cd
0f53cf6
97cd0f6
bbd3259
ca7e2e3
6d1da95
255fd53
9027bb9
6f3d2b4
33cea2f
6a2ca32
deed4ad
3eaadd2
af7dbac
22d2308
7ae1d76
7fc1894
5c453ac
08bde0c
2a4ed6a
209cdae
567ea41
97c9722
cf0a70a
fecd57e
16be71e
16b2605
e6e5113
f1caada
cfe6756
0578983
8f73f01
52e657c
d87adfc
b279fa2
f2e1dc8
4f67fb1
0013fb4
dbba6dc
1c53fd5
9d4db08
7a9d8e2
c2a82c9
1d3d89e
b51bde8
22bbd40
6dd8289
e898606
9bae453
546a838
43ca6ec
61dc836
f086f4f
c7e4449
5a30ab4
d98b153
758895f
a0c59c1
ec74517
7d1c191
0968728
1976fc9
2c30539
6a53e63
07980c7
90ffba5
0b7309c
0c6f8c4
43b3afd
be84830
6607743
0c728dd
77f56b8
527dc70
013795d
9258982
fceebba
5f36f3c
7dfd196
63bc93b
c045298
02246e3
346d8b6
aec406c
c173e8f
2464112
04dedbd
5fafa0e
c61bf58
1bcae21
0c0d767
bd4697c
88a1aa7
e373003
66ffe2b
3c4c43b
14df8ab
ec4371c
80cc953
34f9e2d
a9303c8
d0ce404
5729ebe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,6 +85,10 @@ jobs: | |
| run: | | ||
| if [ ! -f "build_host/bin/protoc" ]; then | ||
| git submodule foreach --recursive 'git stash --include-untracked' 2>/dev/null || true | ||
| export CCACHE_BASEDIR="$GITHUB_WORKSPACE" | ||
| export CCACHE_NOHASHDIR=1 | ||
| export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个作用是啥? |
||
| cmake -S . -B build_host \ | ||
| -DCMAKE_BUILD_TYPE=Release \ | ||
| -DCMAKE_TOOLCHAIN_FILE="" \ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,13 @@ endif() | |
|
|
||
| set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include) | ||
| set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib) | ||
| set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib) | ||
|
|
||
| if(ANDROID) | ||
| option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF) | ||
| else() | ||
| option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ON) | ||
| endif() | ||
|
|
||
| # Add include and library search paths | ||
| include_directories(${ZVEC_INCLUDE_DIR}) | ||
|
|
@@ -28,7 +35,39 @@ if(WIN32) | |
| set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") | ||
| endif() | ||
|
|
||
| if(ZVEC_ENABLE_OMEGA) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个检查有必要吗?如果omega确实没有被编译进来,即使链接不报错 程序运行也会报错吧?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如果zvec以ZVEC_ENABLE_OMEGA=OFF编译,example运行的时候会报错omega对应的资源不存在,core内部是通过register机制注册的 |
||
| # OMEGA support is compiled into libzvec.so; no extra libraries needed | ||
| # at link time for the examples. We only verify the host build produced | ||
| # the OMEGA static libs so we know the feature was actually compiled in. | ||
| set(_zvec_omega_search_paths | ||
| ${ZVEC_DEPENDENCY_LIB_DIR} | ||
| ${ZVEC_DEPENDENCY_LIB_DIR}/Debug | ||
| ${ZVEC_DEPENDENCY_LIB_DIR}/Release | ||
| ${ZVEC_DEPENDENCY_LIB_DIR}/RelWithDebInfo | ||
| ${ZVEC_DEPENDENCY_LIB_DIR}/MinSizeRel | ||
| ) | ||
| find_library(ZVEC_OMEGA_LIB | ||
| NAMES omega | ||
| PATHS ${_zvec_omega_search_paths} | ||
| NO_DEFAULT_PATH | ||
| ) | ||
| find_library(ZVEC_LIGHTGBM_LIB | ||
| NAMES lib_lightgbm _lightgbm | ||
| PATHS ${_zvec_omega_search_paths} | ||
| NO_DEFAULT_PATH | ||
| ) | ||
| if(NOT ZVEC_OMEGA_LIB OR NOT ZVEC_LIGHTGBM_LIB) | ||
| message(WARNING | ||
| "ZVEC_ENABLE_OMEGA=ON but could not locate OMEGA host libraries under " | ||
| "${ZVEC_DEPENDENCY_LIB_DIR}. " | ||
| "omega-example will still link against libzvec (which should bundle OMEGA)." | ||
| ) | ||
| endif() | ||
| endif() | ||
|
|
||
| # --- Dependency groups --- | ||
| find_package(Threads REQUIRED) | ||
| find_package(OpenMP QUIET) | ||
|
|
||
| # --- Create INTERFACE target for libzvec (all-in-one C++ shared library) --- | ||
| # libzvec.so/.dylib/.dll already bundles all zvec internal components | ||
|
|
@@ -56,6 +95,11 @@ target_link_libraries(core-example PRIVATE zvec-lib) | |
| add_executable(ailego-example ailego/main.cc) | ||
| target_link_libraries(ailego-example PRIVATE zvec-lib) | ||
|
|
||
| if(ZVEC_ENABLE_OMEGA) | ||
| add_executable(omega-example omega/main.cc) | ||
| target_link_libraries(omega-example PRIVATE zvec-lib) | ||
| endif() | ||
|
|
||
| # Strip symbols to reduce executable size | ||
| if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID) | ||
| add_custom_command(TARGET db-example POST_BUILD | ||
|
|
@@ -67,6 +111,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID) | |
| add_custom_command(TARGET ailego-example POST_BUILD | ||
| COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:ailego-example>" | ||
| COMMENT "Stripping symbols from ailego-example") | ||
| if(ZVEC_ENABLE_OMEGA) | ||
| add_custom_command(TARGET omega-example POST_BUILD | ||
| COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:omega-example>" | ||
| COMMENT "Stripping symbols from omega-example") | ||
| endif() | ||
| endif() | ||
|
|
||
| # Optimize for size | ||
|
|
@@ -75,4 +124,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID) | |
| PROPERTY COMPILE_FLAGS "-Os") | ||
| set_property(TARGET db-example core-example ailego-example | ||
| PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) | ||
| if(ZVEC_ENABLE_OMEGA) | ||
| set_property(TARGET omega-example | ||
| PROPERTY COMPILE_FLAGS "-Os") | ||
| set_property(TARGET omega-example | ||
| PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) | ||
| endif() | ||
| endif() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| #include <cmath> | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 加上Copyright吧 |
||
| #include <filesystem> | ||
| #include <iostream> | ||
| #include <random> | ||
| #include <vector> | ||
| #include <zvec/core/interface/index.h> | ||
| #include <zvec/core/interface/index_factory.h> | ||
| #include <zvec/core/interface/index_param_builders.h> | ||
|
|
||
| using namespace zvec::core_interface; | ||
|
|
||
| namespace { | ||
|
|
||
| constexpr uint32_t kDimension = 32; | ||
| constexpr uint32_t kNumDocuments = 10000; | ||
| constexpr uint32_t kQueryDocId = 7777; | ||
| const std::string kIndexPath = "omega_example.index"; | ||
|
|
||
| std::vector<float> MakeRandomUnitVector(std::mt19937 &rng) { | ||
| std::uniform_real_distribution<float> dist(-1.0f, 1.0f); | ||
|
|
||
| std::vector<float> values(kDimension, 0.0f); | ||
| float norm_sq = 0.0f; | ||
| for (auto &value : values) { | ||
| value = dist(rng); | ||
| norm_sq += value * value; | ||
| } | ||
|
|
||
| const float norm = std::sqrt(norm_sq); | ||
| if (norm > 0.0f) { | ||
| for (auto &value : values) { | ||
| value /= norm; | ||
| } | ||
| } | ||
| return values; | ||
| } | ||
|
|
||
| BaseIndexParam::Pointer CreateOmegaParam() { | ||
| return OmegaIndexParamBuilder() | ||
| .WithMetricType(MetricType::kInnerProduct) | ||
| .WithDataType(DataType::DT_FP32) | ||
| .WithDimension(kDimension) | ||
| .WithIsSparse(false) | ||
| .WithM(32) | ||
| .WithEFConstruction(500) | ||
| .WithMinVectorThreshold(10000) | ||
| .WithNumTrainingQueries(1000) | ||
| .WithEFTraining(500) | ||
| .WithEFGroundTruth(1000) | ||
| .Build(); | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| int main() { | ||
| std::filesystem::remove_all(kIndexPath); | ||
| std::filesystem::remove_all("omega_model"); | ||
|
|
||
| auto index = IndexFactory::CreateAndInitIndex(*CreateOmegaParam()); | ||
| if (!index) { | ||
| std::cerr << "failed to create omega index" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| if (index->Open(kIndexPath, StorageOptions{StorageOptions::StorageType::kMMAP, | ||
| true}) != 0) { | ||
| std::cerr << "failed to open omega index" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| std::mt19937 rng(42); | ||
| std::vector<std::vector<float>> dataset; | ||
| dataset.reserve(kNumDocuments); | ||
| for (uint32_t doc_id = 0; doc_id < kNumDocuments; ++doc_id) { | ||
| dataset.push_back(MakeRandomUnitVector(rng)); | ||
| VectorData vector_data; | ||
| vector_data.vector = DenseVector{dataset.back().data()}; | ||
| if (index->Add(vector_data, doc_id) != 0) { | ||
| std::cerr << "failed to add document " << doc_id << std::endl; | ||
| return 1; | ||
| } | ||
| } | ||
|
|
||
| if (index->Train() != 0) { | ||
| std::cerr << "failed to train omega index" << std::endl; | ||
| return 1; | ||
| } | ||
| if (!std::filesystem::exists("omega_model/model.txt")) { | ||
| std::cerr << "omega model was not generated" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| VectorData query{DenseVector{dataset[kQueryDocId].data()}}; | ||
|
|
||
| auto query_param = OmegaQueryParamBuilder() | ||
| .with_topk(3) | ||
| .with_fetch_vector(true) | ||
| .with_ef_search(32) | ||
| .with_target_recall(0.95f) | ||
| .build(); | ||
|
|
||
| SearchResult result; | ||
| if (index->Search(query, query_param, &result) != 0) { | ||
| std::cerr << "failed to search omega index" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| std::cout << "omega results: " << result.doc_list_.size() << std::endl; | ||
| if (result.doc_list_.empty()) { | ||
| std::cerr << "omega example returned no results" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| std::cout << "top result key=" << result.doc_list_[0].key() | ||
| << " score=" << result.doc_list_[0].score() << std::endl; | ||
| if (result.doc_list_[0].key() != kQueryDocId) { | ||
| std::cerr << "unexpected top result key" << std::endl; | ||
| return 1; | ||
| } | ||
| if (index->Close() != 0) { | ||
| std::cerr << "failed to close omega index" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,13 @@ endif() | |
| set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include) | ||
| set(ZVEC_GENERATED_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/src/generated) | ||
| set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib) | ||
| set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 没必要加ZVEC_DEPENDENCY_LIB_DIR吧 |
||
|
|
||
| if(ANDROID) | ||
| option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" OFF) | ||
| else() | ||
| option(ZVEC_ENABLE_OMEGA "Link examples against OMEGA support from the host build" ON) | ||
| endif() | ||
|
|
||
| # Add include and library search paths | ||
| include_directories(${ZVEC_INCLUDE_DIR} ${ZVEC_GENERATED_INCLUDE_DIR}) | ||
|
|
@@ -42,6 +49,12 @@ endif() | |
|
|
||
| # Find required packages | ||
| find_package(Threads REQUIRED) | ||
| find_package(OpenMP QUIET) | ||
|
|
||
| set(zvec_openmp_deps) | ||
| if(OpenMP_FOUND AND NOT ANDROID) | ||
| list(APPEND zvec_openmp_deps OpenMP::OpenMP_CXX) | ||
| endif() | ||
|
|
||
| # Create INTERFACE target for zvec_c_api (fat shared library) | ||
| # No whole-archive flags needed — all symbols are already resolved in the .so/.dylib | ||
|
|
@@ -93,6 +106,13 @@ target_link_libraries(c_api_optimized_example PRIVATE | |
| zvec-c-api | ||
| ) | ||
|
|
||
| if(ZVEC_ENABLE_OMEGA) | ||
| add_executable(c_api_omega_example omega_example.c) | ||
| target_link_libraries(c_api_omega_example PRIVATE | ||
| zvec-c-api | ||
| ) | ||
| endif() | ||
|
|
||
| # Strip symbols to reduce executable size | ||
| if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQUAL "Linux"))) | ||
| add_custom_command(TARGET c_api_basic_example POST_BUILD | ||
|
|
@@ -113,6 +133,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQU | |
| add_custom_command(TARGET c_api_optimized_example POST_BUILD | ||
| COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:c_api_optimized_example>" | ||
| COMMENT "Stripping symbols from c_api_optimized_example") | ||
| if(ZVEC_ENABLE_OMEGA) | ||
| add_custom_command(TARGET c_api_omega_example POST_BUILD | ||
| COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:c_api_omega_example>" | ||
| COMMENT "Stripping symbols from c_api_omega_example") | ||
| endif() | ||
| endif() | ||
|
|
||
| # Optimize for size | ||
|
|
@@ -123,4 +148,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID) | |
| set_property(TARGET c_api_basic_example c_api_collection_schema_example c_api_doc_example | ||
| c_api_index_example c_api_field_schema_example c_api_optimized_example | ||
| PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) | ||
| endif() | ||
| if(ZVEC_ENABLE_OMEGA) | ||
| set_property(TARGET c_api_omega_example | ||
| PROPERTY COMPILE_FLAGS "-Os") | ||
| set_property(TARGET c_api_omega_example | ||
| PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) | ||
| endif() | ||
| endif() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里作用是什么?