Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions diskann-benchmark-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ default = []
# BigANN Runbook support requires parsing a YAML file.
bigann = ["dep:serde_yaml"]

# Diversity-aware search wrappers (experimental).
experimental_diversity_search = ["diskann/experimental_diversity_search"]

[lints]
workspace = true

Expand Down
150 changes: 150 additions & 0 deletions diskann-benchmark-core/src/search/graph/diverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright (c) Microsoft Corporation.
* Licensed under the MIT license.
*/

//! A built-in helper for benchmarking diversity-aware K-nearest neighbors search.

use std::sync::Arc;

use diskann::{
ANNResult,
graph::{self, glue},
neighbor::AttributeValueProvider,
provider,
};
use diskann_utils::{future::AsyncFriendly, views::Matrix};

use crate::search::{self, graph::Strategy, graph::knn::Metrics};

/// A built-in helper for benchmarking diversity-aware search via
/// [`graph::search::Diverse`].
///
/// This mirrors [`super::KNN`] but runs each query through a
/// [`graph::search::Diverse`] wrapper constructed from the shared
/// [`AttributeValueProvider`] and diversity parameters stored on this struct. The
/// [`Search::Parameters`] remain the base [`graph::search::Knn`] parameters so that the
/// same benchmark driving code (search list sweeps, recall computation, aggregation) can be
/// reused unchanged.
///
/// # Type Parameters
///
/// - `DP`: The data provider type.
/// - `T`: The query element type.
/// - `S`: The search strategy type.
/// - `P`: The attribute value provider used to derive diversity attributes.
#[derive(Debug)]
pub struct DiverseKNN<DP, T, S, P>
where
DP: provider::DataProvider,
P: AttributeValueProvider,
{
index: Arc<graph::DiskANNIndex<DP>>,
queries: Arc<Matrix<T>>,
strategy: Strategy<S>,
attribute_provider: Arc<P>,
diverse_attribute_id: usize,
diverse_results_k: usize,
}

impl<DP, T, S, P> DiverseKNN<DP, T, S, P>
where
DP: provider::DataProvider,
P: AttributeValueProvider,
{
/// Construct a new [`DiverseKNN`] searcher.
///
/// # Errors
///
/// Returns an error if the number of elements in `strategy` is not compatible with
/// the number of rows in `queries`.
pub fn new(
index: Arc<graph::DiskANNIndex<DP>>,
queries: Arc<Matrix<T>>,
strategy: Strategy<S>,
attribute_provider: Arc<P>,
diverse_attribute_id: usize,
diverse_results_k: usize,
) -> anyhow::Result<Arc<Self>> {
strategy.length_compatible(queries.nrows())?;

Ok(Arc::new(Self {
index,
queries,
strategy,
attribute_provider,
diverse_attribute_id,
diverse_results_k,
}))
}

/// Access the index.
pub fn index(&self) -> &Arc<graph::DiskANNIndex<DP>> {
&self.index
}
}

impl<DP, T, S, P> search::Search for DiverseKNN<DP, T, S, P>
where
DP: provider::DataProvider<Context: Default, ExternalId: search::Id>,
S: for<'a> glue::DefaultSearchStrategy<
'a,
DP,
&'a [T],
DP::ExternalId,
SearchAccessor: glue::SearchAccessor,
> + Clone
+ AsyncFriendly,
P: AttributeValueProvider<Id = DP::InternalId> + AsyncFriendly,
graph::search::Diverse<P>:
for<'a> graph::Search<'a, DP, S, &'a [T], Output = graph::index::SearchStats>,
T: AsyncFriendly + Clone,
{
type Id = DP::ExternalId;
type Parameters = graph::search::Knn;
type Output = Metrics;

fn num_queries(&self) -> usize {
self.queries.nrows()
}

fn id_count(&self, parameters: &Self::Parameters) -> search::IdCount {
search::IdCount::Fixed(parameters.k_value())
}

async fn search<O>(
&self,
parameters: &Self::Parameters,
buffer: &mut O,
index: usize,
) -> ANNResult<Self::Output>
where
O: graph::SearchOutputBuffer<DP::ExternalId> + Send,
{
let context = DP::Context::default();
let strategy = self.strategy.get(index)?;

let diverse_params = graph::DiverseSearchParams::new(
self.diverse_attribute_id,
self.diverse_results_k,
self.attribute_provider.clone(),
);
let diverse_search = graph::search::Diverse::new(*parameters, diverse_params);

let stats = self
.index
.search(
diverse_search,
strategy,
&context,
self.queries.row(index),
buffer,
)
.await?;

Ok(Metrics {
comparisons: stats.cmps,
hops: stats.hops,
})
}
}
6 changes: 6 additions & 0 deletions diskann-benchmark-core/src/search/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@ pub mod range;

pub mod strategy;

#[cfg(feature = "experimental_diversity_search")]
pub mod diverse;

pub use inline::InlineFilterSearch;
pub use knn::KNN;
pub use multihop::MultiHop;
pub use range::Range;

pub use strategy::Strategy;

#[cfg(feature = "experimental_diversity_search")]
pub use diverse::DiverseKNN;

////////////////
// Test Utils //
////////////////
Expand Down
7 changes: 7 additions & 0 deletions diskann-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ minmax-quantization = []
# Enable multi-vector MaxSim distance benchmarks
multi-vector = []

# Enable experimental diversity-aware search benchmarks
experimental_diversity_search = [
"diskann/experimental_diversity_search",
"diskann-providers/experimental_diversity_search",
"diskann-benchmark-core/experimental_diversity_search",
]

# Enable bftree backend
bftree = ["dep:diskann-bftree"]

Expand Down
49 changes: 49 additions & 0 deletions diskann-benchmark/example/async-diverse-search.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"search_directories": [
"test_data/disk_index_search"
],
"jobs": [
{
"type": "graph-index-build",
"content": {
"source": {
"index-source": "Build",
"data_type": "float32",
"data": "disk_index_siftsmall_learn_256pts_data.fbin",
"distance": "squared_l2",
"max_degree": 32,
"l_build": 50,
"alpha": 1.2,
"backedge_ratio": 1.0,
"num_threads": 1,
"start_point_strategy": "medoid",
"num_insert_attempts": 1,
"saturate_inserts": false
},
"search_phase": {
"search-type": "topk-diverse-search",
"queries": "disk_index_sample_query_10pts.fbin",
"groundtruth": "disk_index_10pts_idx_uint32_truth_search_res.bin",
"attributes": "disk_index_siftsmall_learn_256pts_attributes.txt",
"reps": 5,
"num_threads": [
1
],
"diverse_attribute_id": 0,
"diverse_results_k": 1,
"runs": [
{
"search_n": 20,
"search_l": [
20,
30,
40
],
"recall_k": 10
}
]
}
}
}
]
}
68 changes: 58 additions & 10 deletions diskann-benchmark/src/index/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,16 @@ pub(crate) fn register_benchmarks(registry: &mut Registry) -> anyhow::Result<()>
// care.

// Full Precision
registry.register(
"graph-index-full-precision-f32",
FullPrecision::<f32>::new()
.search(plugins::Topk)
.search(plugins::Range)
.search(plugins::TopkBetaFilter)
.search(plugins::TopkMultihopFilter)
.search(plugins::TopkInlineFilter)
.search(plugins::DeterminantDiversity),
)?;
let full_precision_f32 = FullPrecision::<f32>::new()
.search(plugins::Topk)
.search(plugins::Range)
.search(plugins::TopkBetaFilter)
.search(plugins::TopkMultihopFilter)
.search(plugins::TopkInlineFilter)
.search(plugins::DeterminantDiversity);
#[cfg(feature = "experimental_diversity_search")]
let full_precision_f32 = full_precision_f32.search(plugins::DiverseSearch);
registry.register("graph-index-full-precision-f32", full_precision_f32)?;

registry.register(
"graph-index-full-precision-f16",
Expand Down Expand Up @@ -484,6 +484,54 @@ impl search::Plugin<FullPrecisionProvider<f32>, SearchPhase, Strategy<common::Fu
}
}

#[cfg(feature = "experimental_diversity_search")]
impl search::Plugin<FullPrecisionProvider<f32>, SearchPhase, Strategy<common::FullPrecision>>
for plugins::DiverseSearch
{
fn is_match(&self, phase: &SearchPhase) -> bool {
plugins::DiverseSearch::is_match(phase)
}

fn kind(&self) -> &'static str {
plugins::DiverseSearch::as_str()
}

fn run(
&self,
index: Arc<DiskANNIndex<FullPrecisionProvider<f32>>>,
phase: &SearchPhase,
_strategy: &Strategy<common::FullPrecision>,
) -> anyhow::Result<AggregatedSearchResults> {
let phase = plugins::DiverseSearch::get(phase)?;

let queries = Arc::new(datafiles::load_dataset::<f32>(datafiles::BinFile(
&phase.queries,
))?);
let groundtruth = datafiles::load_groundtruth(
datafiles::BinFile(&phase.groundtruth),
Some(phase.max_k()),
)?;

let attribute_provider = Arc::new(
crate::utils::attributes::FileAttributeProvider::load(&phase.attributes)?,
);

let knn = benchmark_core::search::graph::DiverseKNN::new(
index,
queries,
benchmark_core::search::graph::Strategy::broadcast(common::FullPrecision),
attribute_provider,
phase.diverse_attribute_id,
phase.diverse_results_k,
)?;

let steps = search::knn::SearchSteps::new(phase.reps, &phase.num_threads, &phase.runs);
let results = search::knn::run(&knn, &groundtruth, steps)?;

Ok(AggregatedSearchResults::Topk(results))
}
}

impl<DP, S> search::Plugin<DP, SearchPhase, Strategy<S>> for plugins::Topk
where
DP: DataProvider<Context: Default, InternalId = u32, ExternalId = u32> + QueryType,
Expand Down
33 changes: 33 additions & 0 deletions diskann-benchmark/src/index/search/knn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,36 @@ where
Ok(results.into_iter().map(SearchResults::new).collect())
}
}

#[cfg(feature = "experimental_diversity_search")]
impl<DP, T, S, P> Knn<DP::InternalId> for Arc<core_search::graph::DiverseKNN<DP, T, S, P>>
where
DP: diskann::provider::DataProvider,
P: diskann::neighbor::AttributeValueProvider,
core_search::graph::DiverseKNN<DP, T, S, P>: core_search::Search<
Id = DP::InternalId,
Parameters = diskann::graph::search::Knn,
Output = core_search::graph::knn::Metrics,
>,
{
fn search_all(
&self,
parameters: Vec<core_search::Run<diskann::graph::search::Knn>>,
groundtruth: &dyn benchmark_core::recall::Rows<DP::InternalId>,
recall_k: usize,
recall_n: usize,
) -> anyhow::Result<Vec<SearchResults>> {
let results = core_search::search_all(
self.clone(),
parameters.into_iter(),
core_search::graph::knn::Aggregator::new(
groundtruth,
recall_k,
recall_n,
GroundTruthMode::Fixed,
),
)?;

Ok(results.into_iter().map(SearchResults::new).collect())
}
}
22 changes: 22 additions & 0 deletions diskann-benchmark/src/index/search/plugins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,28 @@ impl DeterminantDiversity {
}
}

/// A search plugin for diversity-aware top-k search.
#[cfg(feature = "experimental_diversity_search")]
#[derive(Debug, Clone, Copy)]
pub(crate) struct DiverseSearch;

#[cfg(feature = "experimental_diversity_search")]
impl DiverseSearch {
pub(crate) fn is_match(phase: &SearchPhase) -> bool {
phase.as_topk_diverse_search().is_ok()
}

pub(crate) const fn as_str() -> &'static str {
"topk-diverse-search"
}

pub(crate) fn get(
phase: &SearchPhase,
) -> anyhow::Result<&crate::inputs::graph_index::TopkDiverseSearchPhase> {
Ok(phase.as_topk_diverse_search()?)
}
}

/// A search plugin for range search.
#[derive(Debug, Clone, Copy)]
pub(crate) struct Range;
Expand Down
Loading
Loading