diff --git a/diskann-providers/Cargo.toml b/diskann-providers/Cargo.toml index c0deaeca6..70acabca2 100644 --- a/diskann-providers/Cargo.toml +++ b/diskann-providers/Cargo.toml @@ -48,6 +48,7 @@ serde_json.workspace = true [dev-dependencies] approx.workspace = true criterion.workspace = true +diskann-utils = { workspace = true, features = ["testing"] } iai-callgrind.workspace = true itertools.workspace = true tempfile.workspace = true diff --git a/diskann-providers/src/index/diskann_async.rs b/diskann-providers/src/index/diskann_async.rs index d7ad8f9ee..5451af3ad 100644 --- a/diskann-providers/src/index/diskann_async.rs +++ b/diskann-providers/src/index/diskann_async.rs @@ -188,7 +188,7 @@ pub(crate) mod tests { utils::{IntoUsize, ONE, async_tools::VectorIdBoxSlice}, }; use diskann_quantization::scalar::train::ScalarQuantizationParameters; - use diskann_utils::views::Matrix; + use diskann_utils::{test_data_root, views::Matrix}; use diskann_vector::{ DistanceFunction, PureDistanceFunction, distance::{Metric, SquaredL2}, @@ -2130,7 +2130,7 @@ pub(crate) mod tests { } } - const SIFTSMALL: &str = "/test_data/sift/siftsmall_learn_256pts.fbin"; + const SIFTSMALL: &str = "/sift/siftsmall_learn_256pts.fbin"; #[rstest] #[tokio::test] @@ -2363,11 +2363,7 @@ pub(crate) mod tests { DP: DataProvider + diskann::provider::SetElement<[f32]>, { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage = VirtualStorageProvider::new_overlay(workspace_root); + let storage = VirtualStorageProvider::new_overlay(test_data_root()); let (data_vec, npoints, dim) = file_util::load_bin(&storage, file, 0).unwrap(); let data = Arc::new(Matrix::::try_from(data_vec.into_boxed_slice(), npoints, dim).unwrap()); @@ -2989,11 +2985,7 @@ pub(crate) mod tests { S::PruneStrategy: Clone, { let ctx = &DefaultContext; - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage = VirtualStorageProvider::new_overlay(workspace_root); + let storage = VirtualStorageProvider::new_overlay(test_data_root()); let mut iter = VectorDataIterator::<_, crate::model::graph::traits::AdHoc>::new( file, None, &storage, @@ -3047,11 +3039,7 @@ pub(crate) mod tests { for<'a> aliases::InsertPruneAccessor<'a, S, TestProvider, [f32]>: AsElement<&'a [f32]>, S::PruneStrategy: Clone, { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage = VirtualStorageProvider::new_overlay(workspace_root); + let storage = VirtualStorageProvider::new_overlay(test_data_root()); let (train_data, npoints, dim) = file_util::load_bin(&storage, file, 0).unwrap(); let train_data_view = diff --git a/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs b/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs index 7939bb93f..ed903d8e7 100644 --- a/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs +++ b/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs @@ -763,6 +763,7 @@ mod fixed_chunk_pq_table_test { use crate::storage::{StorageReadProvider, VirtualStorageProvider}; use approx::assert_relative_eq; + use diskann_utils::test_data_root; use diskann_vector::{ PureDistanceFunction, distance::{InnerProduct, SquaredL2}, @@ -940,12 +941,8 @@ mod fixed_chunk_pq_table_test { #[test] fn load_pivot_test() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); - let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); + let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); let fixed_chunk_pq_table = FixedChunkPQTable::new( @@ -1015,13 +1012,9 @@ mod fixed_chunk_pq_table_test { #[test] fn preprocess_query_test() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); - let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; + let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); let fixed_chunk_pq_table = FixedChunkPQTable::new( @@ -1114,13 +1107,9 @@ mod fixed_chunk_pq_table_test { #[test] fn calculate_distances_tests() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); - let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; + let pq_pivots_path: &str = "/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); diff --git a/diskann-providers/src/model/pq/pq_construction.rs b/diskann-providers/src/model/pq/pq_construction.rs index cbaf478e6..f146ebd9b 100644 --- a/diskann-providers/src/model/pq/pq_construction.rs +++ b/diskann-providers/src/model/pq/pq_construction.rs @@ -1007,6 +1007,7 @@ mod pq_test { use crate::storage::VirtualStorageProvider; use approx::assert_relative_eq; use diskann::utils::IntoUsize; + use diskann_utils::test_data_root; use rand_distr::{Distribution, Uniform}; use rstest::rstest; use vfs::{MemoryFS, OverlayFS}; @@ -1235,7 +1236,7 @@ mod pq_test { fn read_pivot_metadata_existing_test() { // no real data except pivot data. const DATA_FILE: &str = "/test/test/fake.bin"; - const PQ_PIVOT_PATH: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; + const PQ_PIVOT_PATH: &str = "/sift/siftsmall_learn_pq_pivots.bin"; const PQ_COMPRESSED_PATH: &str = "/test/test/fake.bin"; let mut train_data = vec![0.0; 10 * 5]; @@ -1244,11 +1245,7 @@ mod pq_test { let num_centers = 256; let num_pq_chunks = dim - 1; let max_k_means_reps = 10; - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let pool = create_thread_pool_for_test(); let result = generate_pq_pivots( @@ -1428,13 +1425,9 @@ mod pq_test { #[case] num_pq_chunks: usize, ) { // Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem. - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); - let data_file = "/test_data/sift/siftsmall_learn.bin"; + let data_file = "/sift/siftsmall_learn.bin"; let pq_pivots_path = "/pq_pivots_validation.bin"; let pq_compressed_vectors_path = "/pq_validation.bin"; let mut pq_storage: PQStorage = @@ -1679,16 +1672,12 @@ mod pq_test { #[test] fn pq_end_to_end_validation_with_codebook_test() { // Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem. - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); type ReaderType = as StorageReadProvider>::Reader; - let data_file = "/test_data/sift/siftsmall_learn.bin"; - let pq_pivots_path = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; - let ground_truth_path = "/test_data/sift/siftsmall_learn_pq_compressed.bin"; + let data_file = "/sift/siftsmall_learn.bin"; + let pq_pivots_path = "/sift/siftsmall_learn_pq_pivots.bin"; + let ground_truth_path = "/sift/siftsmall_learn_pq_compressed.bin"; let pq_compressed_vectors_path = "/validation.bin"; let mut pq_storage = PQStorage::new(pq_pivots_path, pq_compressed_vectors_path, Some(data_file)); @@ -1785,13 +1774,9 @@ mod pq_test { #[case] num_pq_chunks: usize, ) { // Creates a new filesystem using a read/write MemoryFS with PhysicalFS as a fall-back read-only filesystem. - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); - let data_file = "/test_data/sift/siftsmall_learn.bin"; + let data_file = "/sift/siftsmall_learn.bin"; let pq_pivots_path = "/pq_pivots_validation.bin"; let pq_compressed_vectors_path = "/pq_validation.bin"; let pq_storage: PQStorage = diff --git a/diskann-providers/src/storage/index_storage.rs b/diskann-providers/src/storage/index_storage.rs index 674726e5f..7a5d87bfe 100644 --- a/diskann-providers/src/storage/index_storage.rs +++ b/diskann-providers/src/storage/index_storage.rs @@ -223,7 +223,7 @@ mod tests { utils::{IntoUsize, ONE}, }; use diskann_utils::{ - Reborrow, + Reborrow, test_data_root, views::{Matrix, MatrixView}, }; use diskann_vector::distance::Metric; @@ -269,13 +269,9 @@ mod tests { #[tokio::test] async fn test_save_and_load() { let save_path = "/index"; - let file_path = "/test_data/sift/siftsmall_learn_256pts.fbin"; + let file_path = "/sift/siftsmall_learn_256pts.fbin"; let train_data = { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage = VirtualStorageProvider::new_overlay(workspace_root); + let storage = VirtualStorageProvider::new_overlay(test_data_root()); let (train_data, npoints, dim) = file_util::load_bin(&storage, file_path, 0).unwrap(); Matrix::::try_from(train_data.into(), npoints, dim).unwrap() }; diff --git a/diskann-providers/src/storage/pq_storage.rs b/diskann-providers/src/storage/pq_storage.rs index ddf12593b..99eb78c0a 100644 --- a/diskann-providers/src/storage/pq_storage.rs +++ b/diskann-providers/src/storage/pq_storage.rs @@ -436,14 +436,15 @@ impl PQStorage { mod pq_storage_tests { use crate::storage::VirtualStorageProvider; + use diskann_utils::test_data_root; use vfs::MemoryFS; use super::*; use crate::utils::{gen_random_slice, read_metadata}; - const DATA_FILE: &str = "/test_data/sift/siftsmall_learn.bin"; - const PQ_PIVOT_PATH: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; - const PQ_COMPRESSED_PATH: &str = "/test_data/sift/empty_pq_compressed.bin"; + const DATA_FILE: &str = "/sift/siftsmall_learn.bin"; + const PQ_PIVOT_PATH: &str = "/sift/siftsmall_learn_pq_pivots.bin"; + const PQ_COMPRESSED_PATH: &str = "/sift/empty_pq_compressed.bin"; #[test] fn new_test() { @@ -481,11 +482,7 @@ mod pq_storage_tests { #[test] fn pivot_data_exist_test() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); assert!(result.pivot_data_exist(&storage_provider)); @@ -496,11 +493,7 @@ mod pq_storage_tests { #[test] fn read_pivot_metadata_test() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let (npt, dim) = result .read_existing_pivot_metadata(&storage_provider) @@ -512,11 +505,7 @@ mod pq_storage_tests { #[test] fn load_pivot_data_test() { - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let (pq_pivot_data, centroids, chunk_offsets, _) = result .load_existing_pivot_data(&1, &256, &128, &storage_provider, false) @@ -535,11 +524,7 @@ mod pq_storage_tests { // Create dummy OPQ matrix with test data let dummy_opq_matrix = vec![1.345; OPQ_MATRIX_SIZE]; - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); // Write OPQ test data diff --git a/diskann-providers/src/utils/kmeans.rs b/diskann-providers/src/utils/kmeans.rs index 034210f0b..1653bd4da 100644 --- a/diskann-providers/src/utils/kmeans.rs +++ b/diskann-providers/src/utils/kmeans.rs @@ -424,11 +424,10 @@ pub fn k_means_clustering( #[cfg(test)] mod kmeans_test { - use std::path::PathBuf; - use crate::storage::{StorageReadProvider, VirtualStorageProvider}; use approx::assert_relative_eq; use diskann::ANNErrorKind; + use diskann_utils::test_data_root; use rstest::rstest; use super::*; @@ -713,17 +712,13 @@ mod kmeans_test { #[test] fn k_meanspp_selecting_pivots_should_not_hang() { - let test_data_path: &str = "/test_data/kmeans_test_data_file.fbin"; + let test_data_path: &str = "/kmeans_test_data_file.fbin"; let dim = 1; let num_points = 256; let num_centers = 75; // Number of unique points in this dataset let mut data: Vec = Vec::with_capacity(256); - let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let mut reader = std::io::BufReader::new(storage_provider.open_reader(test_data_path).unwrap()); for _ in 0..256 { diff --git a/diskann-providers/src/utils/normalizing_util.rs b/diskann-providers/src/utils/normalizing_util.rs index 517b18afc..8cbad1103 100644 --- a/diskann-providers/src/utils/normalizing_util.rs +++ b/diskann-providers/src/utils/normalizing_util.rs @@ -139,21 +139,18 @@ pub fn normalize_data_internal( #[cfg(test)] mod normalizing_utils_test { use crate::storage::{StorageReadProvider, VirtualStorageProvider}; + use diskann_utils::test_data_root; use super::*; use crate::utils::{create_thread_pool_for_test, storage_utils::*}; #[test] fn test_normalize_data_file() { - let in_file_name = "/test_data/sift/siftsmall_learn_256pts.fbin"; - let norm_file_name = "/test_data/sift/siftsmall_learn_256pts_normalized.fbin"; + let in_file_name = "/sift/siftsmall_learn_256pts.fbin"; + let norm_file_name = "/sift/siftsmall_learn_256pts_normalized.fbin"; let out_file_name = "/siftsmall_learn_256pts_normalized.fbin"; - let workspace_root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let pool = create_thread_pool_for_test(); normalize_data_file(in_file_name, out_file_name, &storage_provider, &pool).unwrap();