Skip to content

Commit a47a367

Browse files
committed
attribute diversity
1 parent e8a4d92 commit a47a367

5 files changed

Lines changed: 173 additions & 42 deletions

File tree

AnyBuildLogs/latest.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
20260116-140907-2cca51ce
1+
20260127-171934-6eb71ae2

include/defaults.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ const bool NUM_DIVERSE_BUILD = 1;
3737

3838
const bool REORDER_INDEX = false;
3939
const uint32_t REORDER_DIM = 0;
40+
const bool ATTRIBUTE_DIVERSITY = false;
41+
const float ATTR_DIST_THRESHOLD = 0.2f;
4042

4143
} // namespace defaults
4244
} // namespace diskann

include/index.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,12 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
267267
// determines navigating node of the graph by calculating medoid of datafopt
268268
uint32_t calculate_entry_point();
269269

270-
void parse_label_file(const std::string &label_file, size_t &num_pts_labels, size_t& total_labels);
271-
void parse_seller_file(const std::string& label_file, size_t& num_pts_labels);
270+
template <typename ValueT>
271+
void parse_integer_string_file(const std::string &file_path, size_t &num_points, size_t& total_values,
272+
std::vector<std::vector<ValueT>>& location_to_values,
273+
tsl::robin_set<ValueT>* unique_values = nullptr);
274+
void parse_seller_file(const std::string& label_file, size_t& num_pts_labels,
275+
std::vector<uint32_t>& location_to_seller, uint32_t& num_unique_sellers);
272276

273277
void convert_pts_label_to_bitmask(std::vector<std::vector<LabelT>>& pts_to_labels, simple_bitmask_buf& bitmask_buf, size_t num_labels);
274278

@@ -357,6 +361,8 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
357361
void initialize_query_scratch(uint32_t num_threads, uint32_t search_l, uint32_t indexing_l, uint32_t r,
358362
uint32_t maxc, size_t dim, size_t bitmask_size = 0);
359363

364+
double attribute_distance(const std::vector<uint32_t> &a, const std::vector<uint32_t> &b);
365+
360366
// Do not call without acquiring appropriate locks
361367
// call public member functions save and load to invoke these.
362368
DISKANN_DLLEXPORT size_t save_graph(std::string filename);
@@ -426,6 +432,10 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
426432
std::vector<uint32_t> _location_to_seller;
427433
uint32_t _num_unique_sellers = 0;
428434
std::string _seller_file;
435+
bool _attribute_diversity = false;
436+
float _attr_dist_threshold = 0.2f;
437+
std::string _attribute_file;
438+
std::vector<std::vector<std::uint32_t>> _location_to_attributes;
429439

430440
bool _use_universal_label = false;
431441
LabelT _universal_label = 0;

include/parameters.h

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,18 @@ class IndexWriteParameters
3232
const bool diverse_index;
3333
const std::string seller_file;
3434
const uint32_t num_diverse_build;
35+
const bool attribute_diversity;
36+
const std::string attribute_file;
37+
const float attr_dist_threshold;
3538

3639
IndexWriteParameters(const uint32_t search_list_size, const uint32_t max_degree, const bool saturate_graph,
3740
const uint32_t max_occlusion_size, const float alpha, const uint32_t num_threads,
38-
const uint32_t filter_list_size, bool diverse_index, const std::string& seller_file, uint32_t num_diverse_build)
41+
const uint32_t filter_list_size, bool diverse_index, const std::string& seller_file, uint32_t num_diverse_build,
42+
bool attribute_diversity, const std::string& attribute_file, float attr_dist_threshold)
3943
: search_list_size(search_list_size), max_degree(max_degree), saturate_graph(saturate_graph),
4044
max_occlusion_size(max_occlusion_size), alpha(alpha), num_threads(num_threads),
41-
filter_list_size(filter_list_size), diverse_index(diverse_index), seller_file(seller_file), num_diverse_build(num_diverse_build)
45+
filter_list_size(filter_list_size), diverse_index(diverse_index), seller_file(seller_file), num_diverse_build(num_diverse_build),
46+
attribute_diversity(attribute_diversity), attribute_file(attribute_file), attr_dist_threshold(attr_dist_threshold)
4247
{
4348
}
4449

@@ -100,6 +105,24 @@ class IndexWriteParametersBuilder
100105
return *this;
101106
}
102107

108+
IndexWriteParametersBuilder& with_attribute_diversity(const bool attribute_diversity)
109+
{
110+
_attribute_diversity = attribute_diversity;
111+
return *this;
112+
}
113+
114+
IndexWriteParametersBuilder& with_attr_dist_threshold(const float attr_dist_threshold)
115+
{
116+
_attr_dist_threshold = attr_dist_threshold;
117+
return *this;
118+
}
119+
120+
IndexWriteParametersBuilder& with_attribute_file(const std::string attribute_file)
121+
{
122+
_attribute_file = attribute_file;
123+
return *this;
124+
}
125+
103126
IndexWriteParametersBuilder &with_alpha(const float alpha)
104127
{
105128
_alpha = alpha;
@@ -121,13 +144,16 @@ class IndexWriteParametersBuilder
121144
IndexWriteParameters build() const
122145
{
123146
return IndexWriteParameters(_search_list_size, _max_degree, _saturate_graph, _max_occlusion_size, _alpha,
124-
_num_threads, _filter_list_size, _diverse_index, _seller_file, _num_diverse_build);
147+
_num_threads, _filter_list_size, _diverse_index, _seller_file, _num_diverse_build,
148+
_attribute_diversity, _attribute_file, _attr_dist_threshold);
125149
}
126150

127151
IndexWriteParametersBuilder(const IndexWriteParameters &wp)
128152
: _search_list_size(wp.search_list_size), _max_degree(wp.max_degree),
129153
_max_occlusion_size(wp.max_occlusion_size), _saturate_graph(wp.saturate_graph), _alpha(wp.alpha),
130-
_filter_list_size(wp.filter_list_size)
154+
_num_threads(wp.num_threads), _filter_list_size(wp.filter_list_size), _diverse_index(wp.diverse_index),
155+
_seller_file(wp.seller_file), _num_diverse_build(wp.num_diverse_build), _attribute_diversity(wp.attribute_diversity),
156+
_attribute_file(wp.attribute_file), _attr_dist_threshold(wp.attr_dist_threshold)
131157
{
132158
}
133159
IndexWriteParametersBuilder(const IndexWriteParametersBuilder &) = delete;
@@ -143,7 +169,10 @@ class IndexWriteParametersBuilder
143169
uint32_t _filter_list_size{defaults::FILTER_LIST_SIZE};
144170
bool _diverse_index{ defaults::DIVERSE_INDEX };
145171
std::string _seller_file{ defaults::EMPTY_STRING };
146-
uint32_t _num_diverse_build{ defaults::NUM_DIVERSE_BUILD };
172+
uint32_t _num_diverse_build{ defaults::NUM_DIVERSE_BUILD };
173+
bool _attribute_diversity{ defaults::ATTRIBUTE_DIVERSITY };
174+
std::string _attribute_file{ defaults::EMPTY_STRING };
175+
float _attr_dist_threshold{ defaults::ATTR_DIST_THRESHOLD };
147176
};
148177

149178
struct IndexLoadParams

0 commit comments

Comments
 (0)