Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions diskann/src/ivf/glue.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/

use std::fmt::Debug;

use diskann_utils::future::SendFuture;

use crate::{
error::{StandardError, ToRanked},
graph::SearchOutputBuffer,
provider::{DataProvider, HasId},
};

/////////////////
// Search path //
/////////////////

/// Selects candidate inverted lists for a bound query or insert vector.
pub trait ListAccessor: Send + Sync {
/// Opaque handle identifying an inverted list within the index.
type Id: Copy + Send + Sync;

/// The error type for [`Self::select_lists`].
type Error: ToRanked + Debug + Send + Sync + 'static;

/// Push the selected lists into `output` in distance order.
fn select_lists<B>(
&mut self,
nprobe: usize,
output: &mut B,
) -> impl SendFuture<Result<(), Self::Error>>
where
B: SearchOutputBuffer<Self::Id> + Send + ?Sized;
}

/// Scans a set of lists for search.
pub trait SearchAccessor: HasId + Send + Sync {
/// Opaque handle identifying an inverted list within the index.
type ListId: Copy + Send + Sync;

type Error: ToRanked + Debug + Send + Sync + 'static;

/// Score members of `lists` and invoke `f` for each `(id, distance)` pair.
fn scan_lists<Itr, F>(&mut self, lists: Itr, f: F) -> impl SendFuture<Result<(), Self::Error>>
where
Itr: Iterator<Item = Self::ListId> + Send,
F: Send + FnMut(Self::Id, f32);
}

/// Per-call factory for IVF search.
pub trait SearchStrategy<'a, Provider, T>: Send + Sync
where
Provider: DataProvider,
{
/// The inverted-list handle type, shared by both accessors.
type ListId: Copy + Send + Sync;

/// The fine accessor, keyed to the provider's internal id and the shared list handle.
type SearchAccessor: SearchAccessor<Id = Provider::InternalId, ListId = Self::ListId>;

/// The coarse accessor, keyed to the shared list handle.
type ListAccessor: ListAccessor<Id = Self::ListId>;

/// An error that can occur when constructing either accessor.
type Error: StandardError;

/// Construct the fine scan accessor.
fn search_accessor(
&'a self,
provider: &'a Provider,
context: &'a Provider::Context,
query: T,
) -> Result<Self::SearchAccessor, Self::Error>;

/// Construct the coarse list-selection accessor.
fn list_accessor(
&'a self,
provider: &'a Provider,
context: &'a Provider::Context,
query: T,
) -> Result<Self::ListAccessor, Self::Error>;
}

/////////////////
// Insert path //
/////////////////

/// Appends a vector to a chosen list during insert.
pub trait InsertAccessor<T>: HasId + Send + Sync {
/// Opaque handle identifying an inverted list within the index.
type ListId: Copy + Send + Sync;

/// The error type for [`Self::append`].
type Error: ToRanked + Debug + Send + Sync + 'static;

/// Append `vector` to `list` under `id`.
fn append(
&mut self,
list: Self::ListId,
id: Self::Id,
vector: T,
) -> impl SendFuture<Result<(), Self::Error>>;
}

/// Per-call factory for IVF insert.
pub trait InsertStrategy<'a, Provider, T>: Send + Sync
where
Provider: DataProvider,
{
/// The inverted-list handle type, shared by both accessors.
type ListId: Copy + Send + Sync;

/// The append accessor, keyed to the provider's internal id and the shared list handle.
type InsertAccessor: InsertAccessor<T, Id = Provider::InternalId, ListId = Self::ListId>;

/// The coarse accessor, keyed to the shared list handle.
type ListAccessor: ListAccessor<Id = Self::ListId>;

/// An error that can occur when constructing either accessor.
type Error: StandardError;

/// Construct the append accessor.
fn insert_accessor(
&'a self,
provider: &'a Provider,
context: &'a Provider::Context,
) -> Result<Self::InsertAccessor, Self::Error>;

/// Construct the coarse list-selection accessor.
fn list_accessor(
&'a self,
provider: &'a Provider,
context: &'a Provider::Context,
vector: T,
) -> Result<Self::ListAccessor, Self::Error>;
}
154 changes: 154 additions & 0 deletions diskann/src/ivf/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/

//! IVF index wrapper.

use std::num::NonZeroUsize;

use diskann_utils::future::SendFuture;

use crate::{
ANNResult,
error::{ANNError, ANNErrorKind, ErrorExt, IntoANNResult},
graph::SearchOutputBuffer,
ivf::{InsertAccessor, InsertStrategy, ListAccessor, SearchAccessor, SearchStrategy},
neighbor::{Neighbor, NeighborPriorityQueue, NeighborPriorityQueueIdType},
provider::{DataProvider, Guard, SetElement},
};

/// Statistics collected during an IVF search.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct SearchStats {
/// Distance computations performed while scanning lists.
pub cmps: u32,

/// Results written to the output buffer.
pub result_count: u32,
}

/// IVF index wrapper over a [`DataProvider`].
#[derive(Debug)]
pub struct IvfIndex<P: DataProvider> {
provider: P,
}

impl<P: DataProvider> IvfIndex<P> {
/// Construct a new index around `provider`.
pub fn new(provider: P) -> Self {
Self { provider }
}

/// Borrow the underlying provider.
pub fn provider(&self) -> &P {
&self.provider
}

/// Run IVF k-nearest-neighbor search.
pub fn knn_search<'a, S, T, OB>(
&'a self,
k: NonZeroUsize,
nprobe: usize,
strategy: &'a S,
context: &'a P::Context,
query: T,
output: &mut OB,
) -> impl SendFuture<ANNResult<SearchStats>>
where
S: SearchStrategy<'a, P, T>,
S::ListId: Eq,
P::InternalId: NeighborPriorityQueueIdType,
T: Copy + Send,
OB: SearchOutputBuffer<P::InternalId> + Send + ?Sized,
{
async move {
let mut list_accessor = strategy
.list_accessor(&self.provider, context, query)
.into_ann_result()?;

let mut lists: Vec<Neighbor<S::ListId>> = Vec::with_capacity(nprobe);
list_accessor
.select_lists(nprobe, &mut lists)
.await
.escalate("IVF coarse list selection must complete")?;

let mut search_accessor = strategy
.search_accessor(&self.provider, context, query)
.into_ann_result()?;

let k = k.get();
let mut queue = NeighborPriorityQueue::new(k);
let mut cmps: u32 = 0;

search_accessor
.scan_lists(lists.iter().map(|n| n.id), |id, dist| {
cmps += 1;
queue.insert(Neighbor::new(id, dist));
})
.await
.escalate("IVF list scan must complete to produce correct k-NN results")?;

let result_count =
output.extend(queue.iter().take(k).map(|n| (n.id, n.distance))) as u32;

Ok(SearchStats { cmps, result_count })
}
}

/// Insert a vector under external id `id`.
pub fn insert<'a, S, T>(
&'a self,
strategy: &'a S,
context: &'a P::Context,
id: &P::ExternalId,
vector: T,
) -> impl SendFuture<ANNResult<()>>
where
S: InsertStrategy<'a, P, T>,
S::ListId: Eq,
P: SetElement<T>,
T: Copy + Send,
{
async move {
let guard = self
.provider
.set_element(context, id, vector)
.await
.escalate("IVF insert requires a successful `set_element`")?;

let internal_id = guard.id();

let mut list_accessor = strategy
.list_accessor(&self.provider, context, vector)
.into_ann_result()?;

let mut lists: Vec<Neighbor<S::ListId>> = Vec::with_capacity(1);

list_accessor
.select_lists(1, &mut lists)
.await
.escalate("IVF insert must select a target list")?;

let list = lists.first().map(|n| n.id).ok_or_else(|| {
ANNError::message(
ANNErrorKind::IndexError,
"IVF insert: list selection returned no candidate list",
)
})?;

let mut insert_accessor = strategy
.insert_accessor(&self.provider, context)
.into_ann_result()?;

insert_accessor
.append(list, internal_id, vector)
.await
.escalate("IVF insert must append the vector to its assigned list")?;

guard.complete().await;

Ok(())
}
}
}
18 changes: 18 additions & 0 deletions diskann/src/ivf/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/

//! IVF index traits and wrapper.
//!
//! Search selects candidate lists, scans them, and returns the best `k` results.
//! Insert selects one list and appends the new point to it.

pub mod glue;
pub mod index;

pub use glue::{InsertAccessor, InsertStrategy, ListAccessor, SearchAccessor, SearchStrategy};
pub use index::{IvfIndex, SearchStats};

#[cfg(test)]
mod test;
Loading
Loading