diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-25.12.diff b/cpp/cmake/patches/faiss-1.14-cuvs-25.12.diff deleted file mode 100644 index 7fabbc675f..0000000000 --- a/cpp/cmake/patches/faiss-1.14-cuvs-25.12.diff +++ /dev/null @@ -1,182 +0,0 @@ -diff --git a/faiss/gpu/GpuDistance.cu b/faiss/gpu/GpuDistance.cu -index c82c73e7d..b9100c272 100644 ---- a/faiss/gpu/GpuDistance.cu -+++ b/faiss/gpu/GpuDistance.cu -@@ -239,7 +239,7 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) { - if (should_use_cuvs(args) && args.queriesRowMajor == args.vectorsRowMajor && - args.outIndicesType == IndicesDataType::I64 && - args.vectorType == DistanceDataType::F32 && args.k > 0) { -- cuvsDistanceType distance = metricFaissToCuvs(args.metric, false); -+ auto distance = metricFaissToCuvs(args.metric, false); - - auto resImpl = prov->getResources(); - auto res = resImpl.get(); -diff --git a/faiss/gpu/GpuResources.h b/faiss/gpu/GpuResources.h -index c0c851a89..61d9d4dbe 100644 ---- a/faiss/gpu/GpuResources.h -+++ b/faiss/gpu/GpuResources.h -@@ -33,7 +33,7 @@ - - #if defined USE_NVIDIA_CUVS - #include --#include -+#include - #endif - - namespace faiss { -diff --git a/faiss/gpu/StandardGpuResources.cpp b/faiss/gpu/StandardGpuResources.cpp -index 649b7cb5c..622443044 100644 ---- a/faiss/gpu/StandardGpuResources.cpp -+++ b/faiss/gpu/StandardGpuResources.cpp -@@ -23,9 +23,9 @@ - - #if defined USE_NVIDIA_CUVS - #include --#include --#include --#include -+#include -+#include -+#include - #include - #endif - -@@ -93,7 +93,7 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl() - : - #if defined USE_NVIDIA_CUVS - mmr_(new rmm::mr::managed_memory_resource), -- pmr_(new rmm::mr::pinned_memory_resource), -+ pmr_(new rmm::mr::pinned_host_memory_resource), - #endif - pinnedMemAlloc_(nullptr), - pinnedMemAllocSize_(0), -@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() { - - if (pinnedMemAlloc_) { - #if defined USE_NVIDIA_CUVS -- pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_); -+ pmr_->deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_); - #else - auto err = cudaFreeHost(pinnedMemAlloc_); - FAISS_ASSERT_FMT( -@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) { - // pinned memory allocation - if (defaultStreams_.empty() && pinnedMemSize_ > 0) { - try { -- pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_); -+ pinnedMemAlloc_ = pmr_->allocate_sync(pinnedMemSize_); - } catch (const std::bad_alloc& rmm_ex) { - FAISS_THROW_MSG("CUDA memory allocation error"); - } -@@ -549,7 +549,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) { - rmm::mr::device_memory_resource* current_mr = - rmm::mr::get_per_device_resource( - rmm::cuda_device_id{adjReq.device}); -- p = current_mr->allocate_async(adjReq.size, adjReq.stream); -+ p = current_mr->allocate(adjReq.stream, adjReq.size); - adjReq.mr = current_mr; - } catch (const std::bad_alloc& rmm_ex) { - FAISS_THROW_MSG("CUDA memory allocation error"); -@@ -584,7 +584,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) { - // TODO: change this to use the current device resource once RMM has - // a way to retrieve a "guaranteed" managed memory resource for a - // device. -- p = mmr_->allocate_async(adjReq.size, adjReq.stream); -+ p = mmr_->allocate(adjReq.stream, adjReq.size); - adjReq.mr = mmr_.get(); - } catch (const std::bad_alloc& rmm_ex) { - FAISS_THROW_MSG("CUDA memory allocation error"); -@@ -648,7 +648,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) { - req.space == MemorySpace::Device || - req.space == MemorySpace::Unified) { - #if defined USE_NVIDIA_CUVS -- req.mr->deallocate_async(p, req.size, req.stream); -+ req.mr->deallocate(req.stream, p, req.size); - #else - auto err = cudaFree(p); - FAISS_ASSERT_FMT( -diff --git a/faiss/gpu/StandardGpuResources.h b/faiss/gpu/StandardGpuResources.h -index f23ca19d8..3ba606606 100644 ---- a/faiss/gpu/StandardGpuResources.h -+++ b/faiss/gpu/StandardGpuResources.h -@@ -25,7 +25,8 @@ - - #if defined USE_NVIDIA_CUVS - #include --#include -+#include -+#include - #endif - - #include -@@ -172,8 +173,8 @@ class StandardGpuResourcesImpl : public GpuResources { - // managed_memory_resource - std::unique_ptr mmr_; - -- // pinned_memory_resource -- std::unique_ptr pmr_; -+ // pinned_host_memory_resource -+ std::unique_ptr pmr_; - #endif - - /// Pinned memory allocation for use with this GPU -diff --git a/faiss/gpu/impl/CuvsFlatIndex.cu b/faiss/gpu/impl/CuvsFlatIndex.cu -index 15cf427cf..d877e766d 100644 ---- a/faiss/gpu/impl/CuvsFlatIndex.cu -+++ b/faiss/gpu/impl/CuvsFlatIndex.cu -@@ -92,7 +92,7 @@ void CuvsFlatIndex::query( - outDistances.getSize(0), - outDistances.getSize(1)); - -- cuvsDistanceType distance = metricFaissToCuvs(metric, exactDistance); -+ auto distance = metricFaissToCuvs(metric, exactDistance); - - std::optional> - norms_view = raft::make_device_vector_view( -diff --git a/faiss/gpu/utils/CuvsUtils.h b/faiss/gpu/utils/CuvsUtils.h -index e44e5f12d..42fe8ca48 100644 ---- a/faiss/gpu/utils/CuvsUtils.h -+++ b/faiss/gpu/utils/CuvsUtils.h -@@ -27,32 +27,32 @@ - #include - #include - --#include -+#include - - #pragma GCC visibility push(default) - namespace faiss { - namespace gpu { - --inline cuvsDistanceType metricFaissToCuvs( -+inline cuvs::distance::DistanceType metricFaissToCuvs( - MetricType metric, - bool exactDistance) { - switch (metric) { - case MetricType::METRIC_INNER_PRODUCT: -- return cuvsDistanceType::InnerProduct; -+ return cuvs::distance::DistanceType::InnerProduct; - case MetricType::METRIC_L2: -- return cuvsDistanceType::L2Expanded; -+ return cuvs::distance::DistanceType::L2Expanded; - case MetricType::METRIC_L1: -- return cuvsDistanceType::L1; -+ return cuvs::distance::DistanceType::L1; - case MetricType::METRIC_Linf: -- return cuvsDistanceType::Linf; -+ return cuvs::distance::DistanceType::Linf; - case MetricType::METRIC_Lp: -- return cuvsDistanceType::LpUnexpanded; -+ return cuvs::distance::DistanceType::LpUnexpanded; - case MetricType::METRIC_Canberra: -- return cuvsDistanceType::Canberra; -+ return cuvs::distance::DistanceType::Canberra; - case MetricType::METRIC_BrayCurtis: -- return cuvsDistanceType::BrayCurtis; -+ return cuvs::distance::DistanceType::BrayCurtis; - case MetricType::METRIC_JensenShannon: -- return cuvsDistanceType::JensenShannon; -+ return cuvs::distance::DistanceType::JensenShannon; - default: - RAFT_FAIL("Distance type not supported"); - } diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-26.02.diff b/cpp/cmake/patches/faiss-1.14-cuvs-26.02.diff deleted file mode 100644 index d41bee79f4..0000000000 --- a/cpp/cmake/patches/faiss-1.14-cuvs-26.02.diff +++ /dev/null @@ -1,175 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 8e398785..0789e59d 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -53,11 +53,7 @@ project(faiss - LANGUAGES ${FAISS_LANGUAGES}) - include(GNUInstallDirs) - --if(FAISS_ENABLE_CUVS) -- set(CMAKE_CXX_STANDARD 17) --else() -- set(CMAKE_CXX_STANDARD 20) --endif() -+set(CMAKE_CXX_STANDARD 20) - - list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") - -diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu -index 1e2fef22..f2f29302 100644 ---- a/faiss/gpu/impl/CuvsIVFPQ.cu -+++ b/faiss/gpu/impl/CuvsIVFPQ.cu -@@ -129,8 +129,14 @@ void CuvsIVFPQ::updateQuantizer(Index* quantizer) { - - cuvs::neighbors::ivf_pq::helpers::reset_index( - raft_handle, cuvs_index.get()); -+ auto mutable_rotation_matrix_view = -+ raft::make_device_matrix_view( -+ const_cast( -+ cuvs_index->rotation_matrix().data_handle()), -+ cuvs_index->rotation_matrix().extent(0), -+ cuvs_index->rotation_matrix().extent(1)); - cuvs::neighbors::ivf_pq::helpers::make_rotation_matrix( -- raft_handle, cuvs_index.get(), false); -+ raft_handle, mutable_rotation_matrix_view, false); - - // If the index instance is a GpuIndexFlat, then we can use direct access to - // the centroids within. -@@ -149,22 +155,60 @@ void CuvsIVFPQ::updateQuantizer(Index* quantizer) { - // as float32 and store locally - gpuData->reconstruct(0, gpuData->getSize(), centroids); - -- cuvs::neighbors::ivf_pq::helpers::set_centers( -- raft_handle, -- cuvs_index.get(), -+ auto mutable_centers_view = -+ raft::make_device_matrix_view( -+ const_cast( -+ cuvs_index->centers().data_handle()), -+ numLists_, -+ cuvs_index->centers().extent(1)); -+ auto mutable_centers_rot_view = - raft::make_device_matrix_view( -- centroids.data(), numLists_, dim_)); -+ const_cast( -+ cuvs_index->centers_rot().data_handle()), -+ cuvs_index->centers_rot().extent(0), -+ cuvs_index->centers_rot().extent(1)); -+ -+ cuvs::neighbors::ivf_pq::helpers::pad_centers_with_norms( -+ raft_handle, -+ raft::make_const_mdspan( -+ raft::make_device_matrix_view( -+ centroids.data(), numLists_, dim_)), -+ mutable_centers_view); -+ cuvs::neighbors::ivf_pq::helpers::rotate_padded_centers( -+ raft_handle, -+ cuvs_index->centers(), -+ cuvs_index->rotation_matrix(), -+ mutable_centers_rot_view); - } else { - /// No reconstruct needed since the centers are already in float32 - // The FlatIndex keeps its data in float32, so we can merely - // reference it - auto centroids = gpuData->getVectorsFloat32Ref(); - -- cuvs::neighbors::ivf_pq::helpers::set_centers( -- raft_handle, -- cuvs_index.get(), -+ auto mutable_centers_view = -+ raft::make_device_matrix_view( -+ const_cast( -+ cuvs_index->centers().data_handle()), -+ numLists_, -+ cuvs_index->centers().extent(1)); -+ auto mutable_centers_rot_view = - raft::make_device_matrix_view( -- centroids.data(), numLists_, dim_)); -+ const_cast( -+ cuvs_index->centers_rot().data_handle()), -+ cuvs_index->centers_rot().extent(0), -+ cuvs_index->centers_rot().extent(1)); -+ -+ cuvs::neighbors::ivf_pq::helpers::pad_centers_with_norms( -+ raft_handle, -+ raft::make_const_mdspan( -+ raft::make_device_matrix_view( -+ centroids.data(), numLists_, dim_)), -+ mutable_centers_view); -+ cuvs::neighbors::ivf_pq::helpers::rotate_padded_centers( -+ raft_handle, -+ cuvs_index->centers(), -+ cuvs_index->rotation_matrix(), -+ mutable_centers_rot_view); - } - } else { - DeviceTensor centroids( -@@ -180,11 +224,30 @@ void CuvsIVFPQ::updateQuantizer(Index* quantizer) { - - centroids.copyFrom(vecs, stream); - -- cuvs::neighbors::ivf_pq::helpers::set_centers( -- raft_handle, -- cuvs_index.get(), -+ // Create mutable views for output parameters -+ auto mutable_centers_view = -+ raft::make_device_matrix_view( -+ const_cast(cuvs_index->centers().data_handle()), -+ numLists_, -+ cuvs_index->centers().extent(1)); -+ auto mutable_centers_rot_view = - raft::make_device_matrix_view( -- centroids.data(), numLists_, dim_)); -+ const_cast( -+ cuvs_index->centers_rot().data_handle()), -+ cuvs_index->centers_rot().extent(0), -+ cuvs_index->centers_rot().extent(1)); -+ -+ cuvs::neighbors::ivf_pq::helpers::pad_centers_with_norms( -+ raft_handle, -+ raft::make_const_mdspan( -+ raft::make_device_matrix_view( -+ centroids.data(), numLists_, dim_)), -+ mutable_centers_view); -+ cuvs::neighbors::ivf_pq::helpers::rotate_padded_centers( -+ raft_handle, -+ cuvs_index->centers(), -+ cuvs_index->rotation_matrix(), -+ mutable_centers_rot_view); - } - - setPQCentroids_(); -@@ -404,10 +467,11 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { - auto& cuvs_index_lists = cuvs_index->lists(); - - // conservative memory alloc for cloning cpu inverted lists -- cuvs::neighbors::ivf_pq::list_spec ivf_list_spec{ -- static_cast(bitsPerSubQuantizer_), -- static_cast(numSubQuantizers_), -- true}; -+ cuvs::neighbors::ivf_pq::list_spec_interleaved -+ ivf_list_spec{ -+ static_cast(bitsPerSubQuantizer_), -+ static_cast(numSubQuantizers_), -+ true}; - - for (size_t i = 0; i < nlist; ++i) { - size_t listSize = ivf->list_size(i); -@@ -426,7 +490,7 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { - // This cuVS list must currently be empty - FAISS_ASSERT(getListLength(i) == 0); - -- cuvs::neighbors::ivf::resize_list( -+ cuvs::neighbors::ivf_pq::helpers::resize_list( - raft_handle, - cuvs_index_lists[i], - ivf_list_spec, -@@ -520,7 +584,7 @@ void CuvsIVFPQ::setPQCentroids_() { - auto stream = resources_->getDefaultStreamCurrentDevice(); - - raft::copy( -- cuvs_index->pq_centers().data_handle(), -+ const_cast(cuvs_index->pq_centers().data_handle()), - pqCentroidsInnermostCode_.data(), - pqCentroidsInnermostCode_.numElements(), - stream); diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-26.04.diff b/cpp/cmake/patches/faiss-1.14-cuvs-26.04.diff index f7774bb139..fae9e4aa85 100644 --- a/cpp/cmake/patches/faiss-1.14-cuvs-26.04.diff +++ b/cpp/cmake/patches/faiss-1.14-cuvs-26.04.diff @@ -1,12 +1,25 @@ -diff --git a/faiss/gpu/utils/CuvsUtils.cu b/faiss/gpu/utils/CuvsUtils.cu -index 1ec32179c..3751dfa62 100644 ---- a/faiss/gpu/utils/CuvsUtils.cu -+++ b/faiss/gpu/utils/CuvsUtils.cu -@@ -31,6 +31,7 @@ +diff --git a/faiss/gpu/impl/CuvsFlatIndex.cu b/faiss/gpu/impl/CuvsFlatIndex.cu +index d4a2d99fe..3c7e6abfa 100644 +--- a/faiss/gpu/impl/CuvsFlatIndex.cu ++++ b/faiss/gpu/impl/CuvsFlatIndex.cu +@@ -30,7 +30,7 @@ + #include - #include - #include -+#include - #include - - namespace faiss { + #include +-#include ++#include + #include + #include + #include +diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h +--- a/faiss/impl/platform_macros.h ++++ b/faiss/impl/platform_macros.h +@@ -185,7 +185,7 @@ + #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END + #endif + #endif +-#elif defined(__GNUC__) ++#elif defined(__GNUC__) && !defined(__NVCC__) + // Unfortunately, GCC does not provide a pragma for detecting it. + // So, we have to stick to GNUC, which is defined by MANY compilers. + // This is why clang/icc needs to be checked first. diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json index fa106bdf89..dcc81d0c0e 100644 --- a/cpp/cmake/patches/faiss_override.json +++ b/cpp/cmake/patches/faiss_override.json @@ -1,20 +1,10 @@ { "packages" : { "faiss" : { - "version": "1.14.0", + "version": "1.14.2", "git_url": "https://github.com/facebookresearch/faiss.git", "git_tag": "v${version}", "patches" : [ - { - "file" : "${current_json_dir}/faiss-1.14-cuvs-25.12.diff", - "issue" : "Multiple fixes for cuVS and RMM compatibility", - "fixed_in" : "" - }, - { - "file" : "${current_json_dir}/faiss-1.14-cuvs-26.02.diff", - "issue" : "Multiple fixes for cuVS and RMM compatibility", - "fixed_in" : "" - }, { "file" : "${current_json_dir}/faiss-1.14-cuvs-26.04.diff", "issue" : "Multiple fixes for cuVS compatibility",