Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/ut.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ jobs:
timeout-minutes: 180
strategy:
fail-fast: true
services:
redis:
image: redis
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
53 changes: 53 additions & 0 deletions Dockerfile.builder
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV CMAKE_VERSION="v3.28.5"
ENV CMAKE_TAR="cmake-3.28.5-linux-x86_64.tar.gz"
ENV CCACHE_VERSION="v4.9.1"
ENV CCACHE_DIR="ccache-4.9.1-linux-x86_64"
ENV CCACHE_TAR="ccache-4.9.1-linux-x86_64.tar.xz"
ENV BFLOAT16_WHL="bfloat16-1.4.0-cp311-cp311-linux_x86_64.whl"

RUN apt update \
&& apt install -y ca-certificates apt-transport-https software-properties-common lsb-release \
&& apt install -y --no-install-recommends wget curl git make gfortran gcc g++ swig \
&& apt install -y gcc-12 g++-12 \
&& apt install -y python3.11 python3.11-dev python3.11-distutils \
&& apt install -y python3-setuptools \
&& apt install -y openssh-client \
&& apt-get install -y --no-install-recommends libpci3 libpci-dev redis-server \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 120 \
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 110 \
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 120 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 310 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 311 \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3 \
&& export PATH=$PATH:$HOME/.local/bin \
&& pip3 install wheel \
&& apt remove --purge -y \
&& rm -rf /var/lib/apt/lists/*

# install cmake, ccache and bfloat16
RUN cd /tmp \
&& wget https://github.com/Kitware/CMake/releases/download/${CMAKE_VERSION}/${CMAKE_TAR} \
&& tar --strip-components=1 -xz -C /usr/local -f ${CMAKE_TAR} \
&& rm -f ${CMAKE_TAR} \
&& wget https://github.com/ccache/ccache/releases/download/${CCACHE_VERSION}/${CCACHE_TAR} \
&& tar -xf ${CCACHE_TAR} \
&& cp ${CCACHE_DIR}/ccache /usr/local/bin \
&& rm -f ${CCACHE_TAR} \
&& wget https://github.com/zilliztech/knowhere/releases/download/v2.3.1/${BFLOAT16_WHL} \
&& pip3 install ${BFLOAT16_WHL} \
&& rm -f ${BFLOAT16_WHL}

# install knowhere dependancies
RUN apt update \
&& apt install -y libopenblas-openmp-dev libcurl4-openssl-dev libaio-dev libevent-dev lcov \
&& pip3 install conan==1.61.0 \
&& conan remote add default-conan-local https://milvus01.jfrog.io/artifactory/api/conan/default-conan-local

WORKDIR /workspace

# Default entrypoint is an interactive shell so you can run build steps manually.
ENTRYPOINT ["/bin/bash"]
37 changes: 37 additions & 0 deletions builder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash

# Parse arguments
FULL_MODE=""

# Parse flags (support both in any order)
while [[ $# -gt 0 ]]; do
case "$1" in
--full)
if [[ -z "$2" || ("$2" != "Debug" && "$2" != "Release") ]]; then
echo "Usage: $0 [--full <Debug|Release>]"
exit 1
fi
FULL_MODE="$2"
shift 2
;;
*)
echo "Usage: $0 [--full <Debug|Release>]"
exit 1
;;
esac
done

docker build -f Dockerfile.builder -t knowhere-builder:latest .

# Set entrypoint based on mode
ENTRYPOINT="/bin/bash"
if [[ -n "$FULL_MODE" ]]; then
ENTRYPOINT="/workspace/builder_entrypoint.sh"
fi

docker run --rm -it \
-v "$(pwd)":/workspace \
-v "${HOME}/.conan":/root/.conan \
-w /workspace \
--entrypoint "$ENTRYPOINT" \
knowhere-builder:latest ${FULL_MODE:+"$FULL_MODE"}
34 changes: 34 additions & 0 deletions builder_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

BUILD_TYPE=$1

# Trap to ensure we always drop into shell, even on error
trap 'echo "==> Error occurred! Entering interactive shell for debugging..."; exec /bin/bash' ERR

set -e

echo "==> Starting full build with mode: $BUILD_TYPE"

echo "==> Step 1: Cleaning build directory"
rm -rf build

echo "==> Step 2: Creating build directory"
mkdir build && cd build

echo "==> Step 3: Running conan install"
conan install .. --build=missing \
-o with_ut=True \
-o with_diskann=True \
-o with_asan=True \
-s compiler.libcxx=libstdc++11 \
-s build_type=$BUILD_TYPE

echo "==> Step 4: Running conan build"
conan build ..
echo "==> Build complete!"

echo "==> Step 5: Starting Redis server for NCS tests"
redis-server --daemonize yes

echo "Entering interactive shell..."
exec /bin/bash
2 changes: 2 additions & 0 deletions cmake/libs/libdiskann.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ set(DISKANN_SOURCES
thirdparty/DiskANN/src/pq_flash_aisaq_index.cpp
thirdparty/DiskANN/src/aisaq_utils.cpp
thirdparty/DiskANN/src/aisaq_pq_reader.cpp
thirdparty/DiskANN/src/file_index_reader.cpp
thirdparty/DiskANN/src/ncs_reader.cpp
thirdparty/DiskANN/src/logger.cpp
thirdparty/DiskANN/src/utils.cpp)

Expand Down
4 changes: 2 additions & 2 deletions cmake/libs/libmilvus-common.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
set( MILVUS-COMMON-VERSION b6629f7 )
set( GIT_REPOSITORY "https://github.com/zilliztech/milvus-common.git" )
set( MILVUS-COMMON-VERSION 55e16501 )
set( GIT_REPOSITORY "https://github.com/ronmarcus/milvus-common.git" )
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please fix this one appropriately to https://github.com/zilliztech/milvus-common.git or let us know about the proposed change there

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have three related PRs across three repositories: milvus-common, knowhere, and milvus. The high-level idea behind them is described in the linked GitHub issue.

From the point of view of DiskANN, NCS (Near Compute Storage) is an alternative to a locally attached SSD for storing index data. Specifically, NCS can replace local storage when querying the index; the build process remains unchanged.

The proposed change in milvus-common provides the NCS infrastructure, including:

  • Ncs class for NCS bucket management
  • NcsConnector class for writing and reading to/from the NCS backend

NCS is used by both knowhere (to access the data) and milvus (the coordinator manages NCS). Thus, we added the NCS infrastructure to milvus-common.


message(STATUS "milvus-common repo: ${GIT_REPOSITORY}")
message(STATUS "milvus-common version: ${MILVUS-COMMON-VERSION}")
Expand Down
1 change: 1 addition & 0 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def requirements(self):
self.requires("libcurl/8.2.1")
self.requires("simde/0.8.2")
self.requires("xxhash/0.8.3")
self.requires("hiredis/1.2.0")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this used exactly?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hiredis is a client library for the Redis database. It is used by diskann when the Redis implementation of NCS is enabled. Specifically, Hiredis is used in the RedisNcsConnector and RedisNcs classes in milvus-common, which are used by diskann:

  • diskann uses the IndexReader abstract class, which has two implementations, one of which is NCSReader. This uses the NcsConnector abstract class, with RedisNcsConnector as one implementation.
  • In the unit tests under test_diskann.cc, the Ncs abstract class is used for NCS bucket management, with RedisNcs as one of its implementations.

if self.settings.os == "Android":
self.requires("openblas/0.3.27")
if not self.options.with_light:
Expand Down
67 changes: 66 additions & 1 deletion include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#include "knowhere/log.h"
#include "nlohmann/json.hpp"

#include "ncs/ncs.h"


namespace knowhere {

typedef nlohmann::json Json;
Expand Down Expand Up @@ -59,6 +62,10 @@ typedef nlohmann::json Json;
#define CFG_MATERIALIZED_VIEW_SEARCH_INFO_TYPE std::optional<knowhere::MaterializedViewSearchInfo>
#endif

#ifndef CFG_NCS_DESCRIPTOR
#define CFG_NCS_DESCRIPTOR std::optional<milvus::NcsDescriptor>
#endif

template <typename T>
struct Range {
T left;
Expand Down Expand Up @@ -242,6 +249,29 @@ struct Entry<CFG_MATERIALIZED_VIEW_SEARCH_INFO_TYPE> {
bool allow_empty_without_default = false;
};

template <>
struct Entry<CFG_NCS_DESCRIPTOR> {
explicit Entry(CFG_NCS_DESCRIPTOR* v) {
val = v;
default_val = std::nullopt;
type = 0x0;
desc = std::nullopt;
}

Entry() {
val = nullptr;
default_val = std::nullopt;
type = 0x0;
desc = std::nullopt;
}

CFG_NCS_DESCRIPTOR* val;
std::optional<CFG_NCS_DESCRIPTOR::value_type> default_val;
uint32_t type;
std::optional<std::string> desc;
bool allow_empty_without_default = false;
};

template <typename T>
class EntryAccess {
public:
Expand Down Expand Up @@ -541,6 +571,26 @@ class Config {
}
*ptr->val = json[it.first];
}

if (const Entry<CFG_NCS_DESCRIPTOR>* ptr = std::get_if<Entry<CFG_NCS_DESCRIPTOR>>(&var)) {
if (!(type & ptr->type)) {
continue;
}
if (json.find(it.first) == json.end()) {
if (!ptr->default_val.has_value()) {
if (ptr->allow_empty_without_default) {
continue;
}
std::string msg = "param '" + it.first + "' not exist in json";
return HandleError(err_msg, msg, Status::invalid_param_in_json);
} else {
*ptr->val = ptr->default_val;
continue;
}
}
// Accept JSON object for NcsDescriptor (use get<> to invoke from_json)
*ptr->val = json[it.first].get<milvus::NcsDescriptor>();
}
}

if (!err_msg) {
Expand All @@ -554,7 +604,7 @@ class Config {
}

using VarEntry = std::variant<Entry<CFG_STRING>, Entry<CFG_FLOAT>, Entry<CFG_INT>, Entry<CFG_INT64>,
Entry<CFG_BOOL>, Entry<CFG_MATERIALIZED_VIEW_SEARCH_INFO_TYPE>>;
Entry<CFG_BOOL>, Entry<CFG_MATERIALIZED_VIEW_SEARCH_INFO_TYPE>, Entry<CFG_NCS_DESCRIPTOR>>;
std::unordered_map<std::string, VarEntry> __DICT__;

protected:
Expand Down Expand Up @@ -646,6 +696,10 @@ class BaseConfig : public Config {
CFG_FLOAT retrieval_ann_ratio;
CFG_STRING emb_list_meta_file_path; // for mmap
CFG_STRING emb_list_offset_file_path; // for build
// NCS descriptor, convertible to milvus::NcsDescriptor. Allows DiskAnn to open NcsConnector for NCS IO operations.
CFG_NCS_DESCRIPTOR ncs_descriptor;
// Enable or disable NCS usage for DiskANN. Defaults to false.
CFG_BOOL ncs_enable;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(dim).allow_empty_without_default().description("vector dim").for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type)
Expand Down Expand Up @@ -822,6 +876,17 @@ class BaseConfig : public Config {
.description("file name of emb_list offsets for build")
.allow_empty_without_default()
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(ncs_descriptor)
.description("NCS descriptor. Allows DiskAnn to open NcsConnector for NCS IO operations.")
.allow_empty_without_default()
.for_deserialize()
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(ncs_enable)
.description("enable NCS integration for DiskANN")
.set_default(false)
.for_deserialize()
.for_train()
.for_static();
}
};
} // namespace knowhere
Expand Down
9 changes: 9 additions & 0 deletions include/knowhere/index/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "knowhere/expected.h"
#include "knowhere/index/index_node.h"
#include "knowhere/index/interrupt.h"
#include "ncs/ncs.h"

namespace knowhere {

template <typename T1>
Expand Down Expand Up @@ -205,6 +207,13 @@ class Index {
[[nodiscard]] bool
LoadIndexWithStream() const;

std::vector<std::string>
ListFilesForNcsUpload() const;

Status
NcsUpload(const Json& json);


~Index() {
if (node == nullptr)
return;
Expand Down
18 changes: 18 additions & 0 deletions include/knowhere/index/index_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "knowhere/operands.h"
#include "knowhere/utils.h"
#include "knowhere/version.h"
#include "ncs/ncs.h"

#if defined(NOT_COMPILE_FOR_SWIG)
#include "common/OpContext.h"
Expand Down Expand Up @@ -543,6 +544,23 @@ class IndexNode : public Object {
return false;
}

/**
* @brief Lists files required for NCS upload. The strings should be considered as patterns. Any file matching "*<pattern>*" is required.
*
* @return A vector of file name patterns.
* @note The default implementation returns an empty vector, indicating no files are required for NCS upload.
*/
virtual std::vector<std::string>
ListFilesForNcsUpload() const{
return {};
}

virtual milvus::NcsStatus
NcsUpload(std::shared_ptr<Config> cfg) {
return milvus::NcsStatus::ERROR;
}


virtual ~IndexNode() {
}

Expand Down
1 change: 1 addition & 0 deletions python/knowhere/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .swigknowhere import BruteForceSearchBF16, BruteForceRangeSearchBF16
from .swigknowhere import BruteForceSearchInt8, BruteForceRangeSearchInt8
from .swigknowhere import BruteForceSearchBin, BruteForceRangeSearchBin
from .swigknowhere import InitNcs, CreateNcsBucket, DeleteNcsBucket, IsNcsBucketExist

import numpy as np
from bfloat16 import bfloat16
Expand Down
Loading
Loading