Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ members = [
"diskann-benchmark-simd",
"diskann-benchmark",
"diskann-tools",
# Experimental
"experimental-multi-vector-bench",
]

default-members = [
Expand Down Expand Up @@ -62,6 +64,8 @@ diskann-label-filter = { path = "diskann-label-filter", version = "0.45.0" }
diskann-benchmark-runner = { path = "diskann-benchmark-runner", version = "0.45.0" }
diskann-benchmark-core = { path = "diskann-benchmark-core", version = "0.45.0" }
diskann-tools = { path = "diskann-tools", version = "0.45.0" }
# Experimental
experimental-multi-vector-bench = { path = "experimental-multi-vector-bench", version = "0.45.0" }

# External dependencies (shared versions)
anyhow = "1.0.98"
Expand Down
34 changes: 31 additions & 3 deletions diskann-quantization/src/multi_vector/matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,13 @@ impl<T: ReprOwned> Mat<T> {
}
}

pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
/// Returns the i-th row without bounds checking.
///
/// # Safety
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors(). The constructors for this type
// ensure that `ptr` is compatible with `T`.
unsafe { self.repr.get_row(self.ptr, i) }
Expand Down Expand Up @@ -581,6 +587,17 @@ impl<T: Copy> Mat<Standard<T>> {
pub fn vector_dim(&self) -> usize {
self.repr.ncols()
}

/// Returns the underlying data as a contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice(&self) -> &[T] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something to think about: the creep of Standard specific methods is not something I think we should lean into - especially if we want this to replace diskann_utils::views::Matrix and friends.

I've found myself needing something like this for some other multi-vector related work and I think it makes sense to have something like

trait Dense: Repr {
    type Element;
    unsafe fn as_slice(&self, ptr: NonNull<u8>) -> &[Self::Element];

}

trait DenseMut: Dense + ReprMut {
    unsafe fn as_slice_mut(&mut self, ptr: NonNull<u8>) -> &mut [Self::Element];
}

This way, MinMax, transposed, blocked etc. can all opt-in to this as well. That said, I feel that the lack of ability to add inherent methods to be a little unfortunate.

let len = self.repr.nrows() * self.repr.ncols();
// SAFETY: Standard representation guarantees contiguous row-major layout.
// The Mat was constructed with valid data of the correct length.
unsafe { std::slice::from_raw_parts(self.ptr.as_ptr().cast::<T>(), len) }
}
}

////////////
Expand Down Expand Up @@ -651,7 +668,7 @@ impl<'a, T: Repr> MatRef<'a, T> {
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors().
unsafe { self.repr.get_row(self.ptr, i) }
}
Expand Down Expand Up @@ -683,6 +700,17 @@ impl<'a, T: Copy> MatRef<'a, Standard<T>> {
pub fn vector_dim(&self) -> usize {
self.repr.ncols()
}

/// Returns the underlying data as a contiguous slice.
///
/// The data is stored in row-major order: `[row0_col0, row0_col1, ..., row0_colN, row1_col0, ...]`.
#[inline]
pub fn as_slice(&self) -> &[T] {
let len = self.repr.nrows() * self.repr.ncols();
// SAFETY: Standard representation guarantees contiguous row-major layout.
// The MatRef was constructed with valid data of the correct length.
unsafe { std::slice::from_raw_parts(self.ptr.as_ptr().cast::<T>(), len) }
}
}

// Reborrow: Mat -> MatRef
Expand Down Expand Up @@ -784,7 +812,7 @@ impl<'a, T: ReprMut> MatMut<'a, T> {
///
/// `i` must be less than `self.num_vectors()`.
#[inline]
pub(crate) unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
pub unsafe fn get_row_unchecked(&self, i: usize) -> T::Row<'_> {
// SAFETY: Caller must ensure i < self.num_vectors().
unsafe { self.repr.get_row(self.ptr, i) }
}
Expand Down
35 changes: 35 additions & 0 deletions experimental-multi-vector-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
[package]
name = "experimental-multi-vector-bench"
edition.workspace = true
version.workspace = true
authors.workspace = true
description = "Experimental multi-vector benchmarking support for DiskANN"
documentation.workspace = true
license.workspace = true

[[bin]]
name = "multivec-bench"
path = "src/bin/multivec_bench.rs"

[dependencies]
diskann-linalg.workspace = true
diskann-utils.workspace = true
diskann-quantization.workspace = true
diskann-vector.workspace = true
diskann-wide.workspace = true

# Benchmark dependencies
anyhow.workspace = true
diskann-benchmark-runner.workspace = true
rand.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
thiserror.workspace = true

[dev-dependencies]
tempfile.workspace = true

[lints]
workspace = true
135 changes: 135 additions & 0 deletions experimental-multi-vector-bench/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# experimental-multi-vector-bench

Experimental multi-vector benchmarking support for DiskANN, enabling late interaction retrieval with token-level embeddings.

## Scope & Goals

This crate is an **experimental workspace** focused on:

1. **Fast Chamfer distance implementation for `f32`** - Develop and benchmark high-performance implementations of the Chamfer distance function for multi-vector representations using 32-bit floating point values.

2. **Multiple computation approaches** - Compare naive scalar, SIMD-accelerated, transposed, tiling, and SGEMM implementations to quantify performance gains.

3. **Benchmarking infrastructure** - Provide tooling to measure and compare different implementation strategies.

## Current Status

- ✅ `MultiVector` type alias for `Mat<Standard<f32>>` (row-major storage from diskann-quantization)
- ✅ `TransposedMultiVector` type for block-transposed storage (SIMD-optimized)
- ✅ `Chamfer<Approach>` - Generic distance calculator using Inner Product similarity
- ✅ `Chamfer<NaiveApproach>` - Scalar baseline implementation
- ✅ `Chamfer<SimdApproach>` - SIMD-accelerated implementation
- ✅ `Chamfer<TransposedApproach>` - Block-transposed SIMD with transposed documents
- ✅ `Chamfer<TransposedWithTilingApproach>` - Block-transposed SIMD with query pair tiling
- ✅ `Chamfer<QueryTransposedWithTilingApproach>` - Transposed query with doc pair tiling
- ✅ `Chamfer<SgemmApproach>` - BLAS SGEMM + SIMD row-max
- ✅ Implements `diskann_vector::DistanceFunction` trait for ecosystem compatibility
- ✅ Benchmark utility integrated with diskann-benchmark-runner

## Usage

```rust
use experimental_multi_vector_bench::{
Chamfer, SimdApproach, TransposedWithTilingApproach, QueryTransposedWithTilingApproach,
MultiVector, TransposedMultiVector, Standard,
};
use diskann_vector::DistanceFunction;

// Create a multi-vector (3 vectors of dimension 4)
let mv = MultiVector::new(Standard::new(3, 4), 0.0f32).unwrap();

// Basic usage with row-major vectors (NaiveApproach or SimdApproach)
let chamfer = Chamfer::<SimdApproach>::new();
let distance = chamfer.evaluate_similarity(&query, &document);

// Optimized for few query tokens (≤8): transpose documents
let chamfer = Chamfer::<TransposedWithTilingApproach>::new();
let transposed_doc = TransposedMultiVector::from(&document);
let distance = chamfer.evaluate_similarity(&query, &transposed_doc);

// Optimized for many query tokens (≥16): transpose query instead
let chamfer = Chamfer::<QueryTransposedWithTilingApproach>::new();
let transposed_query = TransposedMultiVector::from(&query);
let distance = chamfer.evaluate_similarity(&transposed_query, &document);

// For large Q×D: use SGEMM
use experimental_multi_vector_bench::{SgemmApproach, SgemmScratch};
let chamfer = Chamfer::<SgemmApproach>::new();
let mut scratch = SgemmScratch::new();
let distance = chamfer.evaluate_similarity_with_scratch(&query, &document, &mut scratch);
```

## Type Aliases

This crate uses shared types from `diskann-quantization` for multi-vector representation:

```rust
// Row-major owning matrix
pub type MultiVector = Mat<Standard<f32>>;

// Immutable view
pub type MultiVectorRef<'a> = MatRef<'a, Standard<f32>>;
```

The `Standard<f32>` representation provides:

- Contiguous row-major storage
- Direct `as_slice()` access for BLAS operations
- Zero-copy views via `MatRef`

## Future Work

- [ ] Add RFC based on findings for DiskANN integration
- [ ] Additional similarity measures (Cosine, SquaredL2)
- [ ] Support for additional element types (`f16`, `u8` quantized, etc.)

## Running Benchmarks

```bash
# Run benchmarks with example configuration
cargo run --release -p experimental-multi-vector-bench --bin multivec-bench -- run \
--input-file experimental-multi-vector-bench/examples/bench.json \
--output-file results.json

# Verify correctness (all approaches should produce same checksum)
cargo run --release -p experimental-multi-vector-bench --bin multivec-bench -- run \
--input-file experimental-multi-vector-bench/examples/verify.json \
--output-file verify_results.json
```

See [examples/bench.json](examples/bench.json) for benchmark configuration format.

### Benchmark Configuration

The benchmark supports six approaches via the `approach` field:

- `"naive"` - Scalar baseline
- `"simd"` - SIMD-accelerated
- `"transposed_simd"` - Block-transposed SIMD
- `"transposed_with_tiling"` - Block-transposed SIMD with query pair tiling
- `"query_transposed_with_tiling"` - Transposed query with doc pair tiling
- `"sgemm"` - BLAS SGEMM + SIMD row-max

## Module Structure

```text
src/
├── lib.rs # Crate root with re-exports and type aliases
├── multi_vector.rs # TransposedMultiVector type (block-transposed storage)
├── distance/
│ ├── mod.rs # Chamfer<Approach> generic struct
│ ├── naive.rs # Scalar implementation (NaiveApproach)
│ ├── simd.rs # SIMD-accelerated (SimdApproach)
│ ├── transposed.rs # Transposed docs (TransposedApproach)
│ ├── transposed_tiling.rs # Transposed docs + query tiling (TransposedWithTilingApproach)
│ ├── query_transposed_tiling.rs # Transposed query + doc tiling (QueryTransposedWithTilingApproach)
│ └── sgemm.rs # BLAS SGEMM + row-max (SgemmApproach)
└── bench/
├── mod.rs # Benchmark registration and dispatch
├── input.rs # Benchmark input types
└── runner.rs # Benchmark execution logic
```

## Contributing

This work is experimental and will be submitted as separate PRs.
95 changes: 95 additions & 0 deletions experimental-multi-vector-bench/examples/bench.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{
"search_directories": [],
"jobs": [
{
"type": "multivec-op",
"content": {
"approach": "simd",
"runs": [
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 16 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 }
]
}
},
{
"type": "multivec-op",
"content": {
"approach": "transposed_simd",
"runs": [
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 16 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 }
]
}
},
{
"type": "multivec-op",
"content": {
"approach": "transposed_with_tiling",
"runs": [
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 16 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 }
]
}
},
{
"type": "multivec-op",
"content": {
"approach": "query_transposed_with_tiling",
"runs": [
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 16 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 }
]
}
},
{
"type": "multivec-op",
"content": {
"approach": "sgemm",
"runs": [
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 128, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 },
{ "dim": 256, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 16 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 8, "num_doc_token": 32 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 16, "num_doc_token": 64 },
{ "dim": 384, "num_points": 100, "loops_per_measurement": 10, "num_measurements": 50, "num_query_token": 32, "num_doc_token": 128 }
]
}
}
]
}
Loading
Loading