diff --git a/diskann-tools/src/bin/compute_groundtruth.rs b/diskann-tools/src/bin/compute_groundtruth.rs index 6fc3a77c2..c6f1481a8 100644 --- a/diskann-tools/src/bin/compute_groundtruth.rs +++ b/diskann-tools/src/bin/compute_groundtruth.rs @@ -108,40 +108,40 @@ fn main() -> CMDResult<()> { #[derive(Debug, Parser)] struct ComputeGroundTruthArgs { /// data type (required) - #[arg(long = "data_type", default_value = "float")] + #[arg(long = "data-type", default_value = "float")] pub data_type: DataType, /// Distance function to use. - #[arg(long = "dist_fn", default_value = "l2")] + #[arg(long = "dist-fn", default_value = "l2")] pub distance_function: Metric, /// File containing the base vectors in binary format - #[arg(long = "base_file", short, required = true)] + #[arg(long = "base-file", short, required = true)] pub base_file: String, - #[arg(long = "base_file_labels", default_value = None)] + #[arg(long = "base-file-labels", default_value = None)] pub base_file_labels: Option, /// File containing the query vectors in binary format - #[arg(long = "query_file", short, required = true)] + #[arg(long = "query-file", short, required = true)] pub query_file: String, - #[arg(long = "query_file_labels", default_value = None)] + #[arg(long = "query-file-labels", default_value = None)] pub query_file_labels: Option, /// Path of the file to write the ground truth to in binary format. Please don't append .bin at the end if no filter_label or filter_label_file is provided. It will save the file with '.bin' at the end. Otherwise it will save the file as filename_label.bin. - #[arg(long = "gt_file", short, required = true)] + #[arg(long = "gt-file", short, required = true)] pub ground_truth_file: String, /// Filter bitmap file in the range ground truth format - #[arg(long = "filter_bitmap_file", short, default_value = None)] + #[arg(long = "filter-bitmap-file", short, default_value = None)] pub filter_bitmap_file: Option, /// Number of ground truth nearest neigbhors to compute - #[arg(long = "recall_at", short = 'K', default_value = "10")] + #[arg(long = "recall-at", short = 'K', default_value = "10")] pub recall_at: u32, /// File containing the associated data in binary format - #[arg(long = "associated_data_file", required = false, default_value = None)] + #[arg(long = "associated-data-file")] pub associated_data_file: Option, } diff --git a/diskann-tools/src/bin/compute_multivec_groundtruth.rs b/diskann-tools/src/bin/compute_multivec_groundtruth.rs index 452187375..220aff622 100644 --- a/diskann-tools/src/bin/compute_multivec_groundtruth.rs +++ b/diskann-tools/src/bin/compute_multivec_groundtruth.rs @@ -94,11 +94,11 @@ fn main() -> CMDResult<()> { #[derive(Debug, Parser)] struct ComputeMultivecGroundTruthArgs { /// data type (required) - #[arg(long = "data_type", default_value = "float")] + #[arg(long = "data-type", default_value = "float")] pub data_type: DataType, /// Distance function to use. - #[arg(long = "dist_fn", default_value = "l2")] + #[arg(long = "dist-fn", default_value = "l2")] pub distance_function: Metric, /// Whether to use average or min aggregation @@ -106,24 +106,24 @@ struct ComputeMultivecGroundTruthArgs { pub aggregation: MultivecAggregationMethod, /// File containing the base vectors in binary format - #[arg(long = "base_file", short, required = true)] + #[arg(long = "base-file", short, required = true)] pub base_file: String, - #[arg(long = "base_file_labels", default_value = None)] + #[arg(long = "base-file-labels", default_value = None)] pub base_file_labels: Option, /// File containing the query vectors in binary format - #[arg(long = "query_file", short, required = true)] + #[arg(long = "query-file", short, required = true)] pub query_file: String, - #[arg(long = "query_file_labels", default_value = None)] + #[arg(long = "query-file-labels")] pub query_file_labels: Option, /// Path of the file to write the ground truth to in binary format. Please don't append .bin at the end if no filter_label or filter_label_file is provided. It will save the file with '.bin' at the end. Otherwise it will save the file as filename_label.bin. - #[arg(long = "gt_file", short, required = true)] + #[arg(long = "gt-file", short, required = true)] pub ground_truth_file: String, /// Number of ground truth nearest neighbors to compute - #[arg(long = "recall_at", short = 'K', default_value = "10")] + #[arg(long = "recall-at", short = 'K', default_value = "10")] pub recall_at: u32, } diff --git a/diskann-tools/src/bin/compute_range_groundtruth.rs b/diskann-tools/src/bin/compute_range_groundtruth.rs index 057c49035..a8e8c88f6 100644 --- a/diskann-tools/src/bin/compute_range_groundtruth.rs +++ b/diskann-tools/src/bin/compute_range_groundtruth.rs @@ -93,35 +93,35 @@ fn main() -> CMDResult<()> { #[derive(Debug, Parser)] struct ComputeRangeGroundTruthArgs { /// data type - #[arg(long = "data_type", default_value = "float")] + #[arg(long = "data-type", default_value = "float")] pub data_type: DataType, /// Distance function to use. - #[arg(long = "dist_fn", default_value = "l2")] + #[arg(long = "dist-fn", default_value = "l2")] pub distance_function: Metric, /// File containing the base vectors in binary format - #[arg(long = "base_file", short, required = true)] + #[arg(long = "base-file", short, required = true)] pub base_file: String, /// Optional labels file for base vectors - #[arg(long = "base_file_labels", default_value = None)] + #[arg(long = "base-file-labels")] pub base_file_labels: Option, /// File containing the query vectors in binary format - #[arg(long = "query_file", short, required = true)] + #[arg(long = "query-file", short, required = true)] pub query_file: String, /// Optional labels file for query vectors - #[arg(long = "query_file_labels", default_value = None)] + #[arg(long = "query-file-labels", default_value = None)] pub query_file_labels: Option, /// Path of the file to write range ground truth to in binary format - #[arg(long = "gt_file", short, required = true)] + #[arg(long = "gt-file", short, required = true)] pub ground_truth_file: String, /// Filter bitmap file in range ground truth format - #[arg(long = "filter_bitmap_file", short, default_value = None)] + #[arg(long = "filter-bitmap-file", short, default_value = None)] pub filter_bitmap_file: Option, /// Radius threshold used to include neighbors in range-groundtruth diff --git a/diskann-tools/src/bin/compute_specificities.rs b/diskann-tools/src/bin/compute_specificities.rs index 3cb2bf174..b01d3389d 100644 --- a/diskann-tools/src/bin/compute_specificities.rs +++ b/diskann-tools/src/bin/compute_specificities.rs @@ -18,15 +18,15 @@ use std::process; )] struct Args { /// File containing the base labels - #[arg(long = "base_label_file", short = 'b')] + #[arg(long = "base-file-labels", short = 'b')] pub base_label_file: String, /// File containing the query labels - #[arg(long = "query_label_file", short = 'q')] + #[arg(long = "query-file-labels", short = 'q')] pub query_label_file: String, /// Output file for specificities (optional) - #[arg(long = "specificity_output_file", short = 'o')] + #[arg(long = "specificity-output-file", short = 'o')] pub specificity_output_file: Option, } diff --git a/diskann-tools/src/bin/gen_associated_data_from_range.rs b/diskann-tools/src/bin/gen_associated_data_from_range.rs index 708dc5004..1661406c6 100644 --- a/diskann-tools/src/bin/gen_associated_data_from_range.rs +++ b/diskann-tools/src/bin/gen_associated_data_from_range.rs @@ -20,7 +20,7 @@ fn main() -> CMDResult<()> { #[derive(Debug, Parser)] struct GenAssociatedDataFromRangeArgs { - #[arg(long = "associated_data_path")] + #[arg(long = "associated-data-file")] pub associated_data_path: String, #[arg(long = "start")] diff --git a/diskann-tools/src/bin/generate_minmax.rs b/diskann-tools/src/bin/generate_minmax.rs index 7ced9e2ef..43739a743 100644 --- a/diskann-tools/src/bin/generate_minmax.rs +++ b/diskann-tools/src/bin/generate_minmax.rs @@ -27,11 +27,11 @@ use rand::{rngs::StdRng, SeedableRng}; #[command(author, version, about, long_about = None)] struct Args { /// Input binary file path containing vector data - #[arg(short, long)] + #[arg(short, long = "input-file")] input: String, /// Output binary file path for quantized vectors - #[arg(short, long)] + #[arg(short, long = "output-file")] output: String, /// Number of bits for quantization (1, 2, 4, or 8) diff --git a/diskann-tools/src/bin/generate_pq.rs b/diskann-tools/src/bin/generate_pq.rs index 740ab9aa1..ce641ac05 100644 --- a/diskann-tools/src/bin/generate_pq.rs +++ b/diskann-tools/src/bin/generate_pq.rs @@ -52,30 +52,30 @@ fn main() -> Result<(), CMDToolError> { #[derive(Debug, Parser)] struct BuildPQArgs { /// data type (required) - #[arg(long = "data_type", default_value = "float")] + #[arg(long = "data-type", default_value = "float")] pub data_type: DataType, /// Distance function to use. - #[arg(long = "dist_fn", default_value = "l2")] + #[arg(long = "dist-fn", default_value = "l2")] pub dist_fn: Metric, /// Path to the data file. The file should be in the format specified by the `data_type` argument. - #[arg(long = "data_path", short, required = true)] + #[arg(long = "data-file", short, required = true)] pub data_path: String, /// Path to the index file. The index will be saved to this prefixed name. - #[arg(long = "index_path_prefix", short, required = true)] + #[arg(long = "index-path-prefix", short, required = true)] pub index_path_prefix: String, /// Number of threads to use. - #[arg(long = "num_threads", short = 'T')] + #[arg(long = "num-threads", short = 'T')] pub num_threads: Option, /// Ratio of PQ training set size to data size - #[arg(long = "p_val", short = 'p', default_value = "0.1")] + #[arg(long = "p-val", short = 'p', default_value = "0.1")] pub p_val: f64, /// Number of PQ bytee - #[arg(long = "pq_bytes", short, default_value = "10")] + #[arg(long = "pq-bytes", short, default_value = "10")] pub pq_bytes: usize, } diff --git a/diskann-tools/src/bin/generate_synthetic_labels.rs b/diskann-tools/src/bin/generate_synthetic_labels.rs index 1c3479819..d615cc2dd 100644 --- a/diskann-tools/src/bin/generate_synthetic_labels.rs +++ b/diskann-tools/src/bin/generate_synthetic_labels.rs @@ -11,7 +11,7 @@ use tracing::{error, info}; #[derive(Debug, Parser)] struct GenerateSyntheticLabelsArgs { /// Filename for saving the label file - #[arg(long = "output_file", required = true)] + #[arg(long = "output-file", required = true)] pub output_file: String, /// Number of vectors diff --git a/diskann-tools/src/bin/random_data_generator.rs b/diskann-tools/src/bin/random_data_generator.rs index 3fc258dc9..f6e708aed 100644 --- a/diskann-tools/src/bin/random_data_generator.rs +++ b/diskann-tools/src/bin/random_data_generator.rs @@ -10,11 +10,11 @@ use diskann_tools::utils::{write_random_data, CMDResult, CMDToolError, DataType} #[derive(Debug, Parser)] struct RandomDataGeneratorArgs { /// data type (required) - #[arg(long = "data_type", required = true)] + #[arg(long = "data-type", required = true)] pub data_type: DataType, /// File name for saving the random vectors - #[arg(long = "output_file", required = true)] + #[arg(long = "output-file", required = true)] pub output_file: String, /// Dimensionality of the vector diff --git a/diskann-tools/src/bin/relative_contrast.rs b/diskann-tools/src/bin/relative_contrast.rs index 14dce99cc..c3216511e 100644 --- a/diskann-tools/src/bin/relative_contrast.rs +++ b/diskann-tools/src/bin/relative_contrast.rs @@ -93,26 +93,26 @@ fn main() -> CMDResult<()> { #[derive(Debug, Parser)] struct RelativeContrastArgs { /// Data type - #[arg(long = "data_type", default_value = "fp16")] + #[arg(long = "data-type", default_value = "fp16")] pub data_type: DataType, /// Vector data file path - #[arg(long = "data_file", short, required = true)] + #[arg(long = "data-file", short, required = true)] pub data_file: String, /// Query file in binary format - #[arg(long = "query_file", short, required = true)] + #[arg(long = "query-file", short, required = true)] pub query_file: String, /// Ground truth file for the queryset - #[arg(long = "gt_file", required = true)] + #[arg(long = "gt-file", required = true)] pub gt_file: String, /// Number of neighbors to use from ground truth - #[arg(long = "recall_at", short = 'K', default_value = "10")] + #[arg(long = "recall-at", short = 'K', default_value = "10")] pub recall_at: usize, /// Number of random distances to average per query - #[arg(long = "search_list", short = 'L', default_value = "10")] + #[arg(long = "search-list", short = 'L', default_value = "10")] pub search_list: usize, } diff --git a/diskann-tools/src/bin/subsample_bin.rs b/diskann-tools/src/bin/subsample_bin.rs index 6612ea91b..a7927adce 100644 --- a/diskann-tools/src/bin/subsample_bin.rs +++ b/diskann-tools/src/bin/subsample_bin.rs @@ -24,19 +24,23 @@ use diskann_utils::io::Metadata; #[command(name = "subsample_bin", about = "Subsample vectors from a binary file")] struct Args { /// Data type of the vectors, one of: float, int8, uint8, fp16 - #[arg(value_enum)] + #[arg(long = "data-type", value_enum)] data_type: DataType, /// Input base binary file + #[arg(long = "base-bin-file")] base_bin_file: PathBuf, /// Output file for sampled vectors + #[arg(long = "sampled-output-file")] sampled_output_file: PathBuf, /// Sampling probability between 0 and 1, for example 0.1 + #[arg(long = "sampling-probability")] sampling_probability: f64, /// Optional random seed for reproducible sampling + #[arg(long = "random-seed")] random_seed: Option, }