Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions diskann-providers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ serde_json.workspace = true
[dev-dependencies]
approx.workspace = true
criterion.workspace = true
diskann-utils = { workspace = true, features = ["testing"] }
iai-callgrind.workspace = true
itertools.workspace = true
tempfile.workspace = true
Expand Down
22 changes: 5 additions & 17 deletions diskann-providers/src/index/diskann_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub(crate) mod tests {
utils::{IntoUsize, ONE, async_tools::VectorIdBoxSlice},
};
use diskann_quantization::scalar::train::ScalarQuantizationParameters;
use diskann_utils::views::Matrix;
use diskann_utils::{test_data_root, views::Matrix};
use diskann_vector::{
DistanceFunction, PureDistanceFunction,
distance::{Metric, SquaredL2},
Expand Down Expand Up @@ -2130,7 +2130,7 @@ pub(crate) mod tests {
}
}

const SIFTSMALL: &str = "/test_data/sift/siftsmall_learn_256pts.fbin";
const SIFTSMALL: &str = "/sift/siftsmall_learn_256pts.fbin";

#[rstest]
#[tokio::test]
Expand Down Expand Up @@ -2363,11 +2363,7 @@ pub(crate) mod tests {
DP: DataProvider<Context = DefaultContext, ExternalId = u32>
+ diskann::provider::SetElement<[f32]>,
{
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage = VirtualStorageProvider::new_overlay(workspace_root);
let storage = VirtualStorageProvider::new_overlay(test_data_root());
let (data_vec, npoints, dim) = file_util::load_bin(&storage, file, 0).unwrap();
let data =
Arc::new(Matrix::<f32>::try_from(data_vec.into_boxed_slice(), npoints, dim).unwrap());
Expand Down Expand Up @@ -2989,11 +2985,7 @@ pub(crate) mod tests {
S::PruneStrategy: Clone,
{
let ctx = &DefaultContext;
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage = VirtualStorageProvider::new_overlay(workspace_root);
let storage = VirtualStorageProvider::new_overlay(test_data_root());

let mut iter = VectorDataIterator::<_, crate::model::graph::traits::AdHoc<f32>>::new(
file, None, &storage,
Expand Down Expand Up @@ -3047,11 +3039,7 @@ pub(crate) mod tests {
for<'a> aliases::InsertPruneAccessor<'a, S, TestProvider, [f32]>: AsElement<&'a [f32]>,
S::PruneStrategy: Clone,
{
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage = VirtualStorageProvider::new_overlay(workspace_root);
let storage = VirtualStorageProvider::new_overlay(test_data_root());
let (train_data, npoints, dim) = file_util::load_bin(&storage, file, 0).unwrap();

let train_data_view =
Expand Down
25 changes: 7 additions & 18 deletions diskann-providers/src/model/pq/fixed_chunk_pq_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,7 @@ mod fixed_chunk_pq_table_test {

use crate::storage::{StorageReadProvider, VirtualStorageProvider};
use approx::assert_relative_eq;
use diskann_utils::test_data_root;
use diskann_vector::{
PureDistanceFunction,
distance::{InnerProduct, SquaredL2},
Expand Down Expand Up @@ -940,12 +941,8 @@ mod fixed_chunk_pq_table_test {

#[test]
fn load_pivot_test() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin";
let (dim, pq_table, centroids, chunk_offsets) =
load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap();
let fixed_chunk_pq_table = FixedChunkPQTable::new(
Expand Down Expand Up @@ -1015,13 +1012,9 @@ mod fixed_chunk_pq_table_test {

#[test]
fn preprocess_query_test() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());

let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin";
let (dim, pq_table, centroids, chunk_offsets) =
load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap();
let fixed_chunk_pq_table = FixedChunkPQTable::new(
Expand Down Expand Up @@ -1114,13 +1107,9 @@ mod fixed_chunk_pq_table_test {

#[test]
fn calculate_distances_tests() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());

let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin";

let (dim, pq_table, centroids, chunk_offsets) =
load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap();
Expand Down
37 changes: 11 additions & 26 deletions diskann-providers/src/model/pq/pq_construction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,7 @@ mod pq_test {
use crate::storage::VirtualStorageProvider;
use approx::assert_relative_eq;
use diskann::utils::IntoUsize;
use diskann_utils::test_data_root;
use rand_distr::{Distribution, Uniform};
use rstest::rstest;
use vfs::{MemoryFS, OverlayFS};
Expand Down Expand Up @@ -1235,7 +1236,7 @@ mod pq_test {
fn read_pivot_metadata_existing_test() {
// no real data except pivot data.
const DATA_FILE: &str = "/test/test/fake.bin";
const PQ_PIVOT_PATH: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
const PQ_PIVOT_PATH: &str = "/sift/siftsmall_learn_pq_pivots.bin";
const PQ_COMPRESSED_PATH: &str = "/test/test/fake.bin";

let mut train_data = vec![0.0; 10 * 5];
Expand All @@ -1244,11 +1245,7 @@ mod pq_test {
let num_centers = 256;
let num_pq_chunks = dim - 1;
let max_k_means_reps = 10;
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE));
let pool = create_thread_pool_for_test();
let result = generate_pq_pivots(
Expand Down Expand Up @@ -1428,13 +1425,9 @@ mod pq_test {
#[case] num_pq_chunks: usize,
) {
// Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem.
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());

let data_file = "/test_data/sift/siftsmall_learn.bin";
let data_file = "/sift/siftsmall_learn.bin";
let pq_pivots_path = "/pq_pivots_validation.bin";
let pq_compressed_vectors_path = "/pq_validation.bin";
let mut pq_storage: PQStorage =
Expand Down Expand Up @@ -1679,16 +1672,12 @@ mod pq_test {
#[test]
fn pq_end_to_end_validation_with_codebook_test() {
// Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem.
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
type ReaderType = <VirtualStorageProvider<OverlayFS> as StorageReadProvider>::Reader;

let data_file = "/test_data/sift/siftsmall_learn.bin";
let pq_pivots_path = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
let ground_truth_path = "/test_data/sift/siftsmall_learn_pq_compressed.bin";
let data_file = "/sift/siftsmall_learn.bin";
let pq_pivots_path = "/sift/siftsmall_learn_pq_pivots.bin";
let ground_truth_path = "/sift/siftsmall_learn_pq_compressed.bin";
let pq_compressed_vectors_path = "/validation.bin";
let mut pq_storage =
PQStorage::new(pq_pivots_path, pq_compressed_vectors_path, Some(data_file));
Expand Down Expand Up @@ -1785,13 +1774,9 @@ mod pq_test {
#[case] num_pq_chunks: usize,
) {
// Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem.
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());

let data_file = "/test_data/sift/siftsmall_learn.bin";
let data_file = "/sift/siftsmall_learn.bin";
let pq_pivots_path = "/pq_pivots_validation.bin";
let pq_compressed_vectors_path = "/pq_validation.bin";
let pq_storage: PQStorage =
Expand Down
10 changes: 3 additions & 7 deletions diskann-providers/src/storage/index_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ mod tests {
utils::{IntoUsize, ONE},
};
use diskann_utils::{
Reborrow,
Reborrow, test_data_root,
views::{Matrix, MatrixView},
};
use diskann_vector::distance::Metric;
Expand Down Expand Up @@ -269,13 +269,9 @@ mod tests {
#[tokio::test]
async fn test_save_and_load() {
let save_path = "/index";
let file_path = "/test_data/sift/siftsmall_learn_256pts.fbin";
let file_path = "/sift/siftsmall_learn_256pts.fbin";
let train_data = {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage = VirtualStorageProvider::new_overlay(workspace_root);
let storage = VirtualStorageProvider::new_overlay(test_data_root());
let (train_data, npoints, dim) = file_util::load_bin(&storage, file_path, 0).unwrap();
Matrix::<f32>::try_from(train_data.into(), npoints, dim).unwrap()
};
Expand Down
31 changes: 8 additions & 23 deletions diskann-providers/src/storage/pq_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,14 +436,15 @@ impl PQStorage {
mod pq_storage_tests {

use crate::storage::VirtualStorageProvider;
use diskann_utils::test_data_root;
use vfs::MemoryFS;

use super::*;
use crate::utils::{gen_random_slice, read_metadata};

const DATA_FILE: &str = "/test_data/sift/siftsmall_learn.bin";
const PQ_PIVOT_PATH: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin";
const PQ_COMPRESSED_PATH: &str = "/test_data/sift/empty_pq_compressed.bin";
const DATA_FILE: &str = "/sift/siftsmall_learn.bin";
const PQ_PIVOT_PATH: &str = "/sift/siftsmall_learn_pq_pivots.bin";
const PQ_COMPRESSED_PATH: &str = "/sift/empty_pq_compressed.bin";

#[test]
fn new_test() {
Expand Down Expand Up @@ -481,11 +482,7 @@ mod pq_storage_tests {

#[test]
fn pivot_data_exist_test() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE));
assert!(result.pivot_data_exist(&storage_provider));

Expand All @@ -496,11 +493,7 @@ mod pq_storage_tests {

#[test]
fn read_pivot_metadata_test() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE));
let (npt, dim) = result
.read_existing_pivot_metadata(&storage_provider)
Expand All @@ -512,11 +505,7 @@ mod pq_storage_tests {

#[test]
fn load_pivot_data_test() {
let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE));
let (pq_pivot_data, centroids, chunk_offsets, _) = result
.load_existing_pivot_data(&1, &256, &128, &storage_provider, false)
Expand All @@ -535,11 +524,7 @@ mod pq_storage_tests {
// Create dummy OPQ matrix with test data
let dummy_opq_matrix = vec![1.345; OPQ_MATRIX_SIZE];

let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE));

// Write OPQ test data
Expand Down
11 changes: 3 additions & 8 deletions diskann-providers/src/utils/kmeans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,10 @@ pub fn k_means_clustering<Pool: AsThreadPool>(

#[cfg(test)]
mod kmeans_test {
use std::path::PathBuf;

use crate::storage::{StorageReadProvider, VirtualStorageProvider};
use approx::assert_relative_eq;
use diskann::ANNErrorKind;
use diskann_utils::test_data_root;
use rstest::rstest;

use super::*;
Expand Down Expand Up @@ -713,17 +712,13 @@ mod kmeans_test {

#[test]
fn k_meanspp_selecting_pivots_should_not_hang() {
let test_data_path: &str = "/test_data/kmeans_test_data_file.fbin";
let test_data_path: &str = "/kmeans_test_data_file.fbin";
let dim = 1;
let num_points = 256;
let num_centers = 75; // Number of unique points in this dataset
let mut data: Vec<f32> = Vec::with_capacity(256);

let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let mut reader =
std::io::BufReader::new(storage_provider.open_reader(test_data_path).unwrap());
for _ in 0..256 {
Expand Down
11 changes: 4 additions & 7 deletions diskann-providers/src/utils/normalizing_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,18 @@ pub fn normalize_data_internal<Pool: AsThreadPool>(
#[cfg(test)]
mod normalizing_utils_test {
use crate::storage::{StorageReadProvider, VirtualStorageProvider};
use diskann_utils::test_data_root;

use super::*;
use crate::utils::{create_thread_pool_for_test, storage_utils::*};

#[test]
fn test_normalize_data_file() {
let in_file_name = "/test_data/sift/siftsmall_learn_256pts.fbin";
let norm_file_name = "/test_data/sift/siftsmall_learn_256pts_normalized.fbin";
let in_file_name = "/sift/siftsmall_learn_256pts.fbin";
let norm_file_name = "/sift/siftsmall_learn_256pts_normalized.fbin";
let out_file_name = "/siftsmall_learn_256pts_normalized.fbin";

let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.to_path_buf();
let storage_provider = VirtualStorageProvider::new_overlay(workspace_root);
let storage_provider = VirtualStorageProvider::new_overlay(test_data_root());
let pool = create_thread_pool_for_test();
normalize_data_file(in_file_name, out_file_name, &storage_provider, &pool).unwrap();

Expand Down
Loading