diff --git a/.gitignore b/.gitignore index f80e5c682..46b23fe11 100644 --- a/.gitignore +++ b/.gitignore @@ -378,3 +378,6 @@ gperftools # Rust rust/target + +# DiskANN unit-test scratch artifacts (generated by tests/unified_index_tests.cpp) +unified_index_test_* diff --git a/docs/unified_index_format.md b/docs/unified_index_format.md new file mode 100644 index 000000000..428267561 --- /dev/null +++ b/docs/unified_index_format.md @@ -0,0 +1,348 @@ +# Unified Index Format + +**Status:** Draft v1 +**Scope:** static (non-streaming) indices; no tags; no disk-side secondary PQ +**Audience:** DiskANN maintainers, third-party loader implementers (e.g. `rust/` crate, Python tools) + +--- + +## 1. Motivation + +Today, building an index for SSD-served (`PQFlashIndex`) versus in-memory (`Index`) serving requires two distinct build pipelines that produce different on-disk artifacts: + +- **In-memory build** (`Index::save` in `src/index.cpp`) writes a variable-width graph file, a `.data` file (full-precision vectors), `.tags`, `_labels.txt`, `_labels_to_medoids.txt`, `_labels_map.txt`, `_bitmask_labels.bin`, `_integer_labels.bin`, etc. +- **Disk build** (`build_disk_index` in `src/disk_utils.cpp`) writes `_disk.index` (4 KiB sector-packed graph + full-precision coords), `_pq_pivots.bin`, `_pq_compressed.bin`, `_medoids.bin`, `_centroids.bin`, `_max_base_norm.bin`, plus the same family of label files. + +Both pipelines build the same underlying Vamana graph (shared `build_merged_vamana_index` call), but the serialized artifacts diverge. As a result an index built for one serving mode cannot be loaded in the other, and users must commit to a serving mode at build time. + +**Goal:** define a single self-describing container file that can be: + +- produced once by a unified build pipeline, and +- loaded as either an in-memory index (`Index::load_unified`) or an SSD-served index (`PQFlashIndex::load_unified`), + +with PQ data simply ignored on the in-memory path. + +### 1.1 Non-goals (v1) + +- **No tags.** `Index` template instantiations stay (existing code untouched), but the unified writer/reader does not emit or consume tags. +- **No frozen points / streaming.** No dynamic-index support in this version. No `num_frozen_pts` field. +- **No disk-side secondary PQ.** The optional `_disk.index_pq_pivots.bin` / `_use_disk_index_pq` path at `src/pq_flash_index.cpp:828-835, 1534/1542` is not supported. Users needing very-high-dim disk PQ keep using the legacy format. +- **No centroids region.** `_centroids.bin` today is a load-time optimization that pre-populates `_centroid_data` (used as query-expansion seeds at `src/pq_flash_index.cpp:1327`). It is always derivable from the medoid node records via `use_medoids_data_as_centroids()` (`src/pq_flash_index.cpp:401-438`). The unified loader calls that fallback at startup, paying `_num_medoids` extra disk reads. +- **No legacy→unified conversion tool.** New format lives alongside legacy. Existing indices keep loading via their legacy code paths. + +### 1.2 Design principles + +1. **One file, self-describing.** All sidecar files merge into one container with a fixed-layout header that declares which optional sections are present. +2. **Disk and memory share one graph encoding.** The graph + embeddings region is byte-for-byte identical regardless of which loader consumes it. +3. **Region-level 4 KiB alignment, intra-region packing.** Major regions begin at 4 KiB-aligned file offsets to preserve the `AlignedFileReader` invariants (`include/aligned_file_reader.h:73-90`, `src/windows_aligned_file_reader.cpp:125-127`). Inside a region, payload is packed without per-record padding. +4. **No redundant per-node metadata.** Embedding size is constant (`dim * sizeof(T)`, from header) and neighbor IDs are fixed-width `uint32_t`. Per-node degree is *derived* from the offset table, not stored. + +### 1.3 Supporting facts from existing code + +- In-memory graph load is sequential per-node `[degree:u32][nbrs:u32*degree]` (`src/in_mem_graph_store.cpp:138-202`). The in-memory search loop is degree-oblivious, so a fixed-stride or offset-indexed layout works equally well. +- Disk search keeps full-precision coords *inside each sector record* (`src/pq_flash_index.cpp:1651`) alongside the adjacency list. PQ codes live in memory (`_pq_compressed.bin`) and approximate distances during traversal. +- In-memory `Index::search` never references PQ (`grep -n _pq_table src/index.cpp` returns zero hits). +- `load_bin_impl` already accepts a `file_offset` parameter (`include/utils.h:412-426`), so embedded sub-files can be read from a byte range with no API change. + +--- + +## 2. Format Specification (normative) + +All multi-byte integers are little-endian. All offsets and lengths are in bytes from the start of the file. + +### 2.1 File layout + +``` ++--------------------------------------------------+ +| Header (4 KiB) | offset 0 ++--------------------------------------------------+ +| Node Offset Table: uint64[npts + 1] | offset = header.offset_table_off +| (padded to next 4 KiB boundary) | ++--------------------------------------------------+ +| Graph + Embeddings Region | offset = header.graph_region_off +| Per node N: [coords:T*dim][nbrs:u32*degree] | +| No per-node degree field. | +| Variable-width packing, no sector padding. | +| (region padded to next 4 KiB boundary) | ++--------------------------------------------------+ +| Medoids Region (always present) | +| uint32[num_medoids] of node IDs. | +| num_medoids = medoids_len / sizeof(uint32_t). | +| (padded to 4 KiB) | ++--------------------------------------------------+ +| PQ Pivots Region [optional] | present iff HAS_PQ +| Mirrors current _pq_pivots.bin payload byte | +| for byte. (padded to 4 KiB) | ++--------------------------------------------------+ +| PQ Compressed Codes Region [optional] | present iff HAS_PQ +| Mirrors current _pq_compressed.bin payload | +| byte for byte. (padded to 4 KiB) | ++--------------------------------------------------+ +| Max Base Norm Region [optional] | present iff HAS_MAX_BASE_NORM +| float[1]. MIPS preprocessing only. | +| (padded to 4 KiB) | ++--------------------------------------------------+ +| Labels Region [optional] | present iff HAS_LABELS +| Three sub-sections — see §2.4. | ++--------------------------------------------------+ +``` + +Optional regions whose flag is unset have both their `off` and `len` header fields set to `0`. Regions appear in the order above; readers MUST locate each region via its header offset, not by position. + +### 2.2 Header (fixed 4 KiB) + +```cpp +// include/unified_index_format.h +namespace diskann { + +constexpr uint32_t UNIFIED_FORMAT_MAGIC = 0x444E4E55; // "UNND" in little-endian ASCII +constexpr uint32_t UNIFIED_FORMAT_VERSION = 1; + +enum class DataTypeTag : uint32_t { Float = 1, Uint8 = 2, Int8 = 3 }; +enum class MetricTag : uint32_t { L2 = 1, InnerProduct = 2, Cosine = 3 }; +enum class LabelEncoding : uint32_t { None = 0, Bitmask = 1, Integer = 2 }; + +enum UnifiedFormatFlags : uint32_t { + HAS_PQ = 1u << 0, + HAS_LABELS = 1u << 1, + HAS_MAX_BASE_NORM = 1u << 2, +}; + +struct UnifiedIndexHeader { // total reserved 4096 bytes (one sector) + uint32_t magic; + uint32_t version; + DataTypeTag data_type; + MetricTag metric; + uint64_t npts; + uint64_t dim; + uint64_t aligned_dim; + uint32_t max_degree; + uint32_t flags; + uint64_t start_node; + + // Section pointers. (off = 0, len = 0) means the optional region is absent. + uint64_t offset_table_off, offset_table_len; + uint64_t graph_region_off, graph_region_len; + uint64_t medoids_off, medoids_len; // always present + uint64_t pq_pivots_off, pq_pivots_len; // optional + uint64_t pq_codes_off, pq_codes_len; // optional + uint64_t max_base_norm_off, max_base_norm_len;// MIPS only + + // Labels (when HAS_LABELS) + LabelEncoding label_encoding; // Bitmask or Integer + uint64_t universal_label; // 0 if none; else the integer label value + uint64_t total_labels; // distinct label count; derives bitmask row width + uint64_t label_dictionary_off, label_dictionary_len; + uint64_t per_point_labels_off, per_point_labels_len; + uint64_t per_point_label_offsets_off, per_point_label_offsets_len; // Integer encoding only + + uint64_t file_size_bytes; // total file size in bytes, set by writer in finalize(); 0 in v1 files + + // Implementation must pad with reserved zero bytes to reach exactly 4096 bytes. +}; +static_assert(sizeof(UnifiedIndexHeader) <= 4096, "header must fit in one sector"); + +} // namespace diskann +``` + +Readers MUST: + +- Reject files whose `magic != UNIFIED_FORMAT_MAGIC`. +- Reject files whose `version > UNIFIED_FORMAT_VERSION` they understand (no silent partial parsing). +- Treat reserved trailing bytes within the header as opaque (do not assume zero). +- When `file_size_bytes != 0`, reject files whose on-disk size does not match the recorded value (truncation / partial write / corruption check). The `!= 0` guard allows v1 files (which did not carry this field) to load through a v2 reader without spurious rejection. + +### 2.3 Node Offset Table and Graph Region + +The offset table is `uint64[npts + 1]` values, packed contiguously. For node `N` (0 ≤ N < npts): + +- record start (in file): `header.graph_region_off + offset_table[N]` +- record end (in file): `header.graph_region_off + offset_table[N + 1]` +- record size: `offset_table[N + 1] - offset_table[N]` + +The trailing sentinel `offset_table[npts]` equals `header.graph_region_len` (the size of the graph region payload, not counting trailing 4 KiB padding). + +Each node record contains, in order: + +1. `coords`: exactly `dim * sizeof(T)` bytes of vector data, where `T` corresponds to `header.data_type`. +2. `neighbors`: zero or more `uint32_t` neighbor node IDs. + +There is no per-node degree field. The degree is derived: + +``` +degree = (record_size - dim * sizeof(T)) / sizeof(uint32_t) +``` + +The graph region is otherwise unstructured. Implementations MUST pad with zero bytes from `header.graph_region_off + header.graph_region_len` to the next 4 KiB-aligned file offset, so that subsequent regions begin sector-aligned. Padding bytes are not part of `graph_region_len`. + +### 2.4 Labels Region + +Present iff `flags & HAS_LABELS`. Three sub-sections: + +#### 2.4.1 Label dictionary + +Replaces today's `_labels_map.txt` + `_labels_to_medoids.txt`. One row per distinct label, packed contiguously: + +``` +[label_string_len:u32][label_string bytes (label_string_len bytes, no nul terminator)] +[label_integer:u32][medoid_node_id:u32] +``` + +`label_integer` is always written as a 4-byte little-endian unsigned integer, independent of the build-time `LabelT` template parameter (`uint16_t` values are zero-extended). This makes the on-disk dictionary self-describing and uniform across writer instantiations. Row count is implicit: read rows until `label_dictionary_len` bytes are consumed. + +If `header.universal_label != 0`, the dictionary MAY contain a row whose `label_integer` matches it; otherwise the universal label has no explicit dictionary entry. + +#### 2.4.2 Per-point labels + +The payload format depends on `header.label_encoding`: + +- **`Bitmask`**: row width is fixed at `simple_bitmask::get_bitmask_size(total_labels) * sizeof(uint64_t)` bytes (see `include/label_bitmask.h:57`). Random access: point N starts at offset `N * row_width` within the region. Each row's payload is the equivalent of today's `_bitmask_labels.bin` row. +- **`Integer`**: payload bytes are raw `uint32_t` label integers packed in point order, equivalent to `integer_label_vector::_data` (`include/integer_label_vector.h:38`). To locate point N's labels, use the per-point label offsets sub-section (§2.4.3). + +#### 2.4.3 Per-point label offsets + +Present iff `header.label_encoding == Integer`. Format: `uint64[npts + 1]` offsets into the per-point labels region. Point N's labels span the range `_data[offsets[N] : offsets[N+1]]` (each element a `uint32_t`). Mirrors `integer_label_vector::_offset` (`include/integer_label_vector.h:37`). + +For `header.label_encoding == Bitmask`, `per_point_label_offsets_off` and `per_point_label_offsets_len` MUST both be `0`. + +**On-disk ordering (Integer encoding):** for symmetry with the graph region's `[offset_table, graph_data]` layout, the writer emits the per-point-label *offsets* first, then the per-point-label *payload*. Since both regions are addressed by absolute file offsets from the header, readers are unaffected by the ordering. + +### 2.5 Medoids Region (always present) + +A packed `uint32_t` array of node IDs. Length: `medoids_len / sizeof(uint32_t)`. Unfiltered indices write exactly one entry; filtered indices write one entry per label-bound medoid (semantics identical to today's `_medoids.bin`). + +### 2.6 PQ Regions (optional) + +When `HAS_PQ` is set, both `pq_pivots_off` and `pq_codes_off` MUST be non-zero. Each region's payload is byte-identical to today's `_pq_pivots.bin` / `_pq_compressed.bin`, including the in-bin metadata header that `load_bin_impl` expects (`include/utils.h:412-426`). Loaders read these via `load_bin_impl(path, pq_pivots_off)` and `load_bin_impl(path, pq_codes_off)`. + +When `HAS_PQ` is unset, both fields MUST be zero, and an SSD loader MUST reject the file with a clear error (SSD serving requires PQ). + +### 2.7 Max Base Norm Region (optional) + +Present iff `HAS_MAX_BASE_NORM` (MIPS preprocessing only). Payload: byte-identical to today's `_max_base_norm.bin`. + +--- + +## 3. Load Paths (informative) + +### 3.1 In-memory load — `Index::load_unified(path)` + +1. Open file, read first 4 KiB → parse `UnifiedIndexHeader`. Validate magic and version. +2. Read the offset table (`npts + 1` `uint64`s starting at `header.offset_table_off`). +3. Read the graph region into a buffer (or stream it in chunks). +4. For each node N in `[0, npts)`: + - `record = region_buf[offset_table[N] : offset_table[N+1]]` + - `coords = record[0 : dim * sizeof(T)]` → copy into `_data_store` + - `degree = (len(record) - dim * sizeof(T)) / sizeof(uint32_t)` + - `neighbors = record[dim * sizeof(T) :]` interpreted as `uint32_t[degree]` → copy into `InMemGraphStore::_graph[N]` +5. If `flags & HAS_LABELS`: + - Read the dictionary; reconstruct in-memory `label_map` and `labels_to_medoids`. + - Read `per_point_labels`; dispatch on `header.label_encoding`: + - `Bitmask`: feed bytes into `simple_bitmask_buf` with row width derived from `total_labels`. + - `Integer`: also read `per_point_label_offsets`; feed both into `integer_label_vector`. + - If `header.universal_label != 0`, apply it to the label holder. +6. Read the medoids region (always present) into the in-memory medoid list (used by filtered search). +7. **PQ regions are skipped entirely.** + +### 3.2 SSD load — `PQFlashIndex::load_unified(num_threads, path)` + +1. Open the file via `AlignedFileReader` plus a sync `ifstream` for the small bits. +2. Read header and offset table synchronously. Keep the offset table in memory as `_node_offsets` (`8 * npts` bytes — same order of magnitude as the existing `_medoids` / cache overhead). +3. Set `_disk_index_file = path` and `_graph_region_base = header.graph_region_off`. +4. Load PQ pivots and PQ codes via `load_bin_impl(path, header.pq_pivots_off)` and `load_bin_impl(path, header.pq_codes_off)`. SSD load fails fast if `HAS_PQ` is unset. +5. Load medoids (always present) and `max_base_norm` (if `HAS_MAX_BASE_NORM`) from their `(off, len)`. Centroids are populated by calling `use_medoids_data_as_centroids()` (`src/pq_flash_index.cpp:401`) after the medoid list is known — this reads each medoid's full-precision vector from the graph region. +6. Load labels (when `HAS_LABELS`) by the same dispatch as §3.1 step 5. +7. At search time, replace the implicit per-node sector arithmetic (`get_node_sector(N) * SECTOR_LEN`, currently at `src/pq_flash_index.cpp:1430-1431`) with an offset-table lookup: + ``` + start_byte = graph_region_base + node_offsets[N] + end_byte = graph_region_base + node_offsets[N + 1] + aligned_start = start_byte & ~(SECTOR_LEN - 1) + aligned_end = (end_byte + SECTOR_LEN - 1) & ~(SECTOR_LEN - 1) + ``` + Issue the aligned read; advance the in-buffer pointer by `(start_byte - aligned_start)` to land on the node record. Degree is `(end_byte - start_byte - dim * sizeof(T)) / 4`. + +This change is encapsulated in a single helper (`node_read_window(N)`) so the bulk of `cached_beam_search` is unchanged. + +--- + +## 4. Build Path (informative) + +`build_unified_index` reuses the existing pipeline (preprocess → optional PQ training → `build_merged_vamana_index`) up to the point where the legacy code would write separate files or call `create_disk_layout`. From there: + +1. Train PQ if requested (same as today; skip entirely for in-memory-only builds). +2. Stream each node from the in-memory Vamana graph + base vector file into `UnifiedIndexWriter`. The writer: + - Reserves the 4 KiB header. + - Reserves space for the offset table (`8 * (npts + 1)` bytes, rounded up to 4 KiB). + - Streams node records into the graph region, recording each record's offset in the offset-table buffer. + - Pads to 4 KiB, writes the medoids region. + - If PQ trained, pads and writes pivots + codes. + - If MIPS, pads and writes `max_base_norm`. + - If labels present, pads and writes the dictionary, per-point label offsets (Integer encoding only), and per-point labels, in that order. + - Seeks back to the start of the offset table and writes it. + - Seeks back to byte 0 and writes the final populated `UnifiedIndexHeader`. + +PQ-less builds simply leave `HAS_PQ = 0` and omit the PQ regions. + +--- + +## 5. Implementation Roadmap + +### 5.1 New files + +| Path | Purpose | +|------|---------| +| `include/unified_index_format.h` | `UnifiedIndexHeader`, magic/version/flag constants, `DataTypeTag`/`MetricTag`/`LabelEncoding` enums, alignment helpers (`align_up_4k`). | +| `include/unified_index_io.h` + `src/unified_index_io.cpp` | `UnifiedIndexWriter` (assembles container with correct alignment, accumulates offset table as it streams nodes) and `UnifiedIndexReader` (parses header, exposes region `(off, len)` pairs, plus a `read_node(N)` helper for in-memory loaders). | + +### 5.2 Modified files (additive only) + +| File | Change | +|------|--------| +| `src/disk_utils.cpp` | Add `build_unified_index(...)` next to `build_disk_index`. Same pipeline, but the post-Vamana repack step calls `UnifiedIndexWriter` instead of `create_disk_layout`, and label/medoid emission writes into the container instead of sidecar files. `build_disk_index` is untouched. | +| `include/index.h`, `src/index.cpp` | Add `Index::save_unified(path)` and `Index::load_unified(path)`. `save_unified` walks `_data_store` + `InMemGraphStore::_graph` + label holders into `UnifiedIndexWriter`. `load_unified` parses the header and populates `_data_store` + `InMemGraphStore::_graph` from the graph region. Existing `save`/`load` paths are untouched. | +| `include/pq_flash_index.h`, `src/pq_flash_index.cpp` | Add `PQFlashIndex::load_unified(num_threads, path)`. Replaces the load path; search path adds `node_read_window(N)` helper and routes the existing async read through it. Existing `load` / `load_from_separate_paths` are untouched. | +| `src/in_mem_graph_store.cpp` | Add `set_graph_from_unified(npts, max_degree, start, per_node_adjacency_view)` so `Index::load_unified` can populate the graph without going through the file-based `load_impl`. No change to `load`/`save`/`get_neighbours`. | +| `src/abstract_index.cpp` | (Optional, follow-up.) Expose `save_unified` / `load_unified` through the virtual dispatch (`_save_unified`, `_load_unified`), mirroring the recently added `_debug_search` pattern. | + +### 5.3 Phasing + +The implementation is broken into phases so that each lands as a reviewable unit and can be reverted without affecting legacy paths. + +1. **Phase 1 — Format primitives.** Add `include/unified_index_format.h` and the `UnifiedIndexWriter`/`UnifiedIndexReader` library. Unit tests: round-trip header, round-trip a few graph regions, round-trip both label encodings. +2. **Phase 2 — In-memory save/load.** Add `Index::save_unified` and `Index::load_unified`. Test: build a small in-memory index the legacy way, `save_unified`, `load_unified` into a fresh `Index`, run search, compare top-K against the original. +3. **Phase 3 — Disk build (unified).** Add `build_unified_index` reusing the existing PQ training and Vamana code. Test: build dataset twice (legacy vs unified) with the same parameters; compare PQ pivots/codes/medoids/labels byte-for-byte where the legacy bins are payload-identical to the corresponding unified regions. +4. **Phase 4 — SSD load (unified).** Add `PQFlashIndex::load_unified` and the `node_read_window` helper. Test: cross-load — `build_unified_index` → `PQFlashIndex::load_unified` → search → compare recall and latency against legacy disk-build + legacy disk-load. +5. **Phase 5 — Optional virtual dispatch.** Expose `save_unified` / `load_unified` on `AbstractIndex`. + +Each phase keeps legacy paths fully working and adds no caller-side migration burden. + +--- + +## 6. Verification + +1. **Build symmetry.** Build a small dataset (~10 K vectors) the legacy way and the unified way with identical parameters. The unified file's PQ pivots, PQ codes, medoids, max-norm, and label payload bytes should match the corresponding legacy bin payloads byte-for-byte (modulo any in-bin headers that `load_bin_impl` handles). +2. **Cross-load (memory).** Build unified → load with `Index::load_unified` → run search; compare recall@10 against legacy in-memory build + legacy load over the same dataset. The graph is identical so recall should match within a tight margin. +3. **Cross-load (disk).** Build unified → load with `PQFlashIndex::load_unified` → run search; compare recall@10 *and* latency against legacy disk build + legacy disk load. Flag if the unaligned-slice read amplification regresses by more than ~10 % (this is a known "test later" item). +4. **PQ-less unified.** Build unified without PQ (in-memory-only). Confirm: file is smaller; `PQFlashIndex::load_unified` rejects it with a clear "missing PQ" error; `Index::load_unified` succeeds. +5. **Legacy regression.** Run the existing test suite (`tests/`, `tests/utils/`). All legacy load/build paths must continue to pass unchanged. +6. **Forward-compat.** Hand-craft a unified file with `version = UNIFIED_FORMAT_VERSION + 1` and confirm both loaders fail fast with an "unsupported version" error rather than silently misinterpreting. + +--- + +## 7. Open Questions and Follow-ups + +- **Read amplification.** Dropping per-node sector padding means SSD reads slice from a 4 KiB-aligned window that may be up to 2 × `(node_record_size + SECTOR_LEN)` bytes. This is the regression the user has flagged for measurement. If unacceptable, a follow-up can add an opt-in `pad_nodes_to_sector` build flag whose payload format is a strict subset of v1 (same header, same offset table, just larger `offset_table[N+1] - offset_table[N]` deltas). +- **`AbstractIndex` virtual dispatch.** Whether `save_unified`/`load_unified` need to be exposed through the type-erased base depends on caller demand; deferred to Phase 5. +- **Conversion tool.** Not in v1. If needed later, a small `legacy_to_unified` utility can be added that calls `UnifiedIndexReader`/`UnifiedIndexWriter` and reads legacy bins via existing helpers; no format change required. + +--- + +## 8. Glossary + +| Term | Definition | +|------|------------| +| `SECTOR_LEN` | 4096 bytes. Sector size required by `AlignedFileReader` on Windows (`FILE_FLAG_NO_BUFFERING`) and by libaio at 512-byte minimum on Linux. The unified format uses 4096 throughout for cross-platform compatibility. | +| `T` | The vector element type, one of `float`, `uint8_t`, `int8_t`, encoded as `DataTypeTag`. | +| `LabelT` | Label integer type, `uint16_t` or `uint32_t`, fixed at build time by the template instantiation. | +| `medoid` | Graph entry node for search. Unfiltered indices have one; filtered indices have one per label. | +| `universal_label` | A label value that matches every point unconditionally. Sentinel `0` means none. | diff --git a/include/filter_match_proxy.h b/include/filter_match_proxy.h index 51ec52e9e..1224dedcc 100644 --- a/include/filter_match_proxy.h +++ b/include/filter_match_proxy.h @@ -20,11 +20,18 @@ namespace diskann const std::vector& filter_labels, LabelT unv_label); + // Ctor variant that owns its per-query scratch buffer internally. + // Used by the unified-index path (see unified_label_data_bitmask::make_match_proxy). + bitmask_filter_match(simple_bitmask_buf& bitmask_filters, + const std::vector& filter_labels, + LabelT unv_label); + virtual bool contain_filtered_label(uint32_t id) override; private: simple_bitmask_buf& _bitmask_filters; - std::vector& _query_bitmask_buf; + std::vector _owned_query_bitmask_buf; // populated only by the 3-arg ctor + std::vector& _query_bitmask_buf; // refs either external or _owned simple_bitmask_full_val _bitmask_full_val; }; diff --git a/include/index.h b/include/index.h index 79f0cef1c..97bd2420a 100644 --- a/include/index.h +++ b/include/index.h @@ -85,9 +85,17 @@ template clas DISKANN_DLLEXPORT void load(const IndexLoadParams& load_params); DISKANN_DLLEXPORT void load(const char *index_file, uint32_t num_threads, uint32_t search_l, LabelFormatType label_format_type = LabelFormatType::String); - + #endif + // Unified single-file format. See docs/unified_index_format.md. + DISKANN_DLLEXPORT void save_unified(const char *filename); + // Variant of save_unified that also emits a PQ region. Pass empty + // buffers to skip PQ (equivalent to the no-arg overload). Used by + // unified_index_builder. + DISKANN_DLLEXPORT void save_unified(const char *filename, const std::vector &pq_pivots_bytes, + const std::vector &pq_codes_bytes); + // get some private variables DISKANN_DLLEXPORT size_t get_num_points(); DISKANN_DLLEXPORT size_t get_max_points(); diff --git a/include/integer_label_vector.h b/include/integer_label_vector.h index 68688419f..4351c76c0 100644 --- a/include/integer_label_vector.h +++ b/include/integer_label_vector.h @@ -12,6 +12,17 @@ class integer_label_vector bool initialize_from_file(const std::string &label_file, size_t &numpoints); + bool initialize_from_buffers(const size_t *offsets, size_t num_points, + const uint32_t *labels, size_t total_labels); + + // Zero-copy load path: caller pre-sizes both buffers, writes into the raw + // pointers, and the integer_label_vector is ready to use. The two-step + // form lets the caller skip the intermediate vector + assign() + // copies that initialize_from_buffers incurs. + void resize_for_load(size_t num_points, size_t total_labels); + size_t *mutable_offset_data(); // size: num_points + 1 entries (size_t each) + uint32_t *mutable_label_data(); // size: total_labels entries (uint32_t each) + bool write_to_file(const std::string &label_file) const; template diff --git a/include/label_bitmask.h b/include/label_bitmask.h index e0917bec0..ac0e669dd 100644 --- a/include/label_bitmask.h +++ b/include/label_bitmask.h @@ -2,6 +2,8 @@ #include #include +#include "windows_customizations.h" + namespace diskann { @@ -45,7 +47,15 @@ struct simple_bitmask_buf }; -class simple_bitmask +// NOTE: simple_bitmask stays DISKANN_DLLEXPORT even though the unit tests now +// link the static diskann_s lib (where DISKANN_DLLEXPORT is a no-op) and no +// longer need it exported. It is kept because ColorInfoVector's inline +// constructor (include/color_info.h, pulled in widely via neighbor.h) odr-uses +// simple_bitmask's out-of-line methods (ctor, get_bitmask_size), so any DLL +// consumer that instantiates it must import them. TODO: once that inline +// dependency is removed or proven unused by every DLL consumer, drop this +// export too -- simple_bitmask is otherwise an internal helper. +class DISKANN_DLLEXPORT simple_bitmask { public: simple_bitmask(std::uint64_t* bitsets, std::uint64_t bitmask_size); diff --git a/include/pq.h b/include/pq.h index 3e6119f22..1055467ed 100644 --- a/include/pq.h +++ b/include/pq.h @@ -30,6 +30,14 @@ class FixedChunkPQTable void load_pq_centroid_bin(const char *pq_table_file, size_t num_chunks); #endif + // In-memory variant of load_pq_centroid_bin. Parses the same on-disk + // pq_pivots blob format (outer bin -> 4 or 5 sub-bins for offsets, + // pivot table, centroid, [old per-chunk dim], chunk offsets), but reads + // straight from a caller-supplied buffer -- no temp file, no disk IO. + // Does NOT support OPQ rotation matrix (unified-format PQ is always + // standard PQ). + void load_pq_centroid_bin_from_memory(const uint8_t *blob, size_t blob_len, size_t num_chunks); + uint32_t get_num_chunks(); void preprocess_query(float *query_vec); diff --git a/include/pq_flash_index.h b/include/pq_flash_index.h index ec024c4a2..45f90025e 100644 --- a/include/pq_flash_index.h +++ b/include/pq_flash_index.h @@ -52,6 +52,8 @@ template class PQFlashIndex LabelFormatType label_format_type = LabelFormatType::String); #endif + // (load_unified removed; use diskann::make_unified_index_ssd(reader, ctx) — see include/unified_index.h.) + DISKANN_DLLEXPORT void load_cache_list(std::vector &node_list); DISKANN_DLLEXPORT void cache_bfs_levels(uint64_t num_nodes_to_cache, std::vector &node_list, diff --git a/include/unified_index.h b/include/unified_index.h new file mode 100644 index 000000000..5e2d80334 --- /dev/null +++ b/include/unified_index.h @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "aligned_file_reader.h" +#include "distance.h" +#include "percentile_stats.h" +#include "unified_index_format.h" +#include "windows_customizations.h" + +namespace diskann +{ + +struct QueryStats; +struct DebugTraversalInfo; + +// Knobs passed to unified_index::load. Path identifies the unified container +// file. `num_threads` and `search_l` size per-thread scratch on the memory +// implementation. `num_nodes_to_cache` triggers SSD static-cache priming +// (no-op for the memory implementation). +struct UnifiedLoadContext +{ + std::string path; + uint32_t num_threads = 1; + uint32_t search_l = 100; + uint64_t num_nodes_to_cache = 0; +}; + +// Single in/out container for a search call. The caller fills inputs and +// allocates the output buffers; search() writes outputs (and optional +// telemetry) directly. No allocation happens inside search(). +struct UnifiedSearchContext +{ + // ---- Inputs ---- + const void *query = nullptr; // typed by caller as const T* + size_t K = 10; + uint32_t L = 100; + // Filter labels as user-facing strings. Required non-empty if the loaded + // index has labels; required empty otherwise. The index converts strings + // to internal label ints per its encoding. + std::vector filter_labels; + std::optional beam_width; // SSD-only + std::optional io_limit; // SSD-only + std::function rerank_fn; // SSD-only + + // ---- Outputs (caller-allocated, length >= K) ---- + uint64_t *indices = nullptr; + float *distances = nullptr; + + // ---- Optional telemetry sinks (nullptr = no telemetry) ---- + QueryStats *stats = nullptr; + DebugTraversalInfo *debug_info = nullptr; +}; + +// Non-templated public interface returned by the factory. Users program +// against this; the templated `unified_index_base` implements it. +class unified_index +{ + public: + virtual ~unified_index() = default; + + virtual void load(const UnifiedLoadContext &ctx) = 0; + virtual void search(UnifiedSearchContext &ctx) = 0; + + virtual const UnifiedIndexHeader &header() const = 0; + virtual uint64_t num_points() const = 0; + virtual uint64_t dim() const = 0; + virtual uint64_t aligned_dim() const = 0; + virtual diskann::Metric metric() const = 0; + virtual DataTypeTag data_type() const = 0; + virtual bool has_labels() const = 0; + + // Resident memory / cardinality accounting for the loaded index, mirroring + // Index::get_table_stats() and PQFlashIndex::get_table_stats(). + virtual TableStats get_table_stats() const = 0; +}; + +// Factory: open a unified file fully in memory. Peeks the 4 KiB header, +// dispatches on `data_type`, instantiates the right templated implementation, +// calls load(ctx), returns the owning pointer as the non-templated interface. +std::unique_ptr make_unified_index_memory(const UnifiedLoadContext &ctx); + +// Factory: open a unified file in disk-resident (SSD) mode. The supplied +// AlignedFileReader is handed to the constructed unified_index_ssd. +std::unique_ptr make_unified_index_ssd( + std::shared_ptr reader, const UnifiedLoadContext &ctx); + +} // namespace diskann diff --git a/include/unified_index_base.h b/include/unified_index_base.h new file mode 100644 index 000000000..708e15589 --- /dev/null +++ b/include/unified_index_base.h @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include "distance.h" +#include "unified_index.h" +#include "unified_index_format.h" +#include "unified_label_data.h" +#include "unified_node_store.h" +#include "windows_customizations.h" + +namespace diskann +{ + +class UnifiedIndexReader; + +// Templated implementation of the non-templated `unified_index` interface. +// Holds the parsed header, the metric, the label data (built by +// make_unified_label_data), and the node store (a unified_node_store_memory +// or unified_node_store_ssd, plugged in by the derived class's +// `load_storage`). +template +class unified_index_base : public unified_index +{ + public: + explicit unified_index_base(diskann::Metric metric); + ~unified_index_base() override; + + void load(const UnifiedLoadContext &ctx) override; + void search(UnifiedSearchContext &ctx) override; + + const UnifiedIndexHeader &header() const override + { + return _header; + } + uint64_t num_points() const override + { + return _header.npts; + } + uint64_t dim() const override + { + return _header.dim; + } + uint64_t aligned_dim() const override + { + return _header.aligned_dim; + } + diskann::Metric metric() const override + { + return _metric; + } + DataTypeTag data_type() const override + { + return data_type_tag_of(); + } + bool has_labels() const override + { + return _labels && _labels->has_labels(); + } + TableStats get_table_stats() const override + { + return _table_stats; + } + + // Templated read-only accessors for in-process callers that *do* know T + // (unit tests, the index's own search loop). Not on the public interface. + const unified_label_data_base *labels() const + { + return _labels.get(); + } + const unified_node_store_base *nodes() const + { + return _store.get(); + } + unified_node_store_base *nodes() + { + return _store.get(); + } + + protected: + // Derived class is responsible for instantiating the right _store subclass + // and calling its load(). It may inspect ctx for SSD-only knobs like + // ctx.num_nodes_to_cache. + virtual void load_storage(UnifiedIndexReader &r, const UnifiedLoadContext &ctx) = 0; + virtual void search_impl(UnifiedSearchContext &ctx) = 0; + + // Fill the storage-specific resident-memory fields (node_mem_usage, + // graph_mem_usage) of `stats`. Memory reports resident coords/graph; SSD + // reports the resident PQ codes (graph lives on disk). Called by load() + // after load_storage() so the store is populated. + virtual void fill_storage_stats(TableStats &stats) const = 0; + + void validate_header(const UnifiedIndexHeader &h) const; + void validate_search_context(const UnifiedSearchContext &ctx) const; + + UnifiedIndexHeader _header{}; + diskann::Metric _metric; + std::unique_ptr _labels; // nullptr when header has no labels + std::unique_ptr> _store; // built by derived load_storage() + std::string _index_path; + TableStats _table_stats; +}; + +} // namespace diskann diff --git a/include/unified_index_builder.h b/include/unified_index_builder.h new file mode 100644 index 000000000..f7e3267b7 --- /dev/null +++ b/include/unified_index_builder.h @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include "distance.h" +#include "unified_index_format.h" +#include "windows_customizations.h" + +namespace diskann +{ + +// All parameters required to build a unified-format index file. +// +// One struct, runtime-typed (no template). The data_type field selects which +// concrete `Index` is instantiated internally; coords are read from +// `data_file_path` in `.bin` format (the legacy DiskANN file layout). +struct UnifiedBuildContext +{ + // --- Input data --- + std::string data_file_path; // .bin file holding N points x dim coords of `data_type` + DataTypeTag data_type = DataTypeTag::Float; + diskann::Metric metric = diskann::Metric::L2; + + // --- Graph build parameters (Vamana) --- + uint32_t R = 64; // max degree + uint32_t L = 100; // search list size during build + float alpha = 1.2f; // pruning alpha + uint32_t num_threads = 0; // 0 = use omp_get_num_procs() + + // --- PQ parameters --- + // pq_dim == 0 => no PQ (memory-only unified file; SSD load will reject). + // 0 < pq_dim < dim => train PQ with `pq_dim` chunks on a sampled subset and + // emit pivots + codes into the unified file. + // pq_dim >= dim => train PQ with `dim` chunks (chunk size 1, full-precision + // per dimension). Clamped so the SSD load path -- which + // requires HAS_PQ -- can always load the produced file. + uint32_t pq_dim = 0; + double pq_sampling_rate = 0.1; // fraction of points to sample for pivot training (clamped server-side) + + // --- Optional filtered-index inputs --- + std::string label_file; // per-point labels (.txt), empty = unfiltered + std::string universal_label; // string to treat as "any label" + bool use_integer_labels = false; + + // --- Output --- + std::string output_path; // destination unified container file +}; + +// Builds a unified-format index file end-to-end: trains the Vamana graph from +// the input data file, optionally trains PQ on a sampled subset, then writes +// graph + medoids + (optional) PQ + (optional) labels into the unified +// container at `ctx.output_path`. +// +// Class shape (instead of free function) leaves room for future stateful build +// modes (incremental build, multi-pass, etc.). For now `build()` is the only +// method. +class unified_index_builder +{ + public: + unified_index_builder(); + ~unified_index_builder(); + + // Throws ANNException on failure (file open, mismatched dims, build crash, + // PQ training error, etc.). Returns successfully when the unified file is + // fully written and closed. + void build(const UnifiedBuildContext &ctx); +}; + +} // namespace diskann diff --git a/include/unified_index_format.h b/include/unified_index_format.h new file mode 100644 index 000000000..31b802cbb --- /dev/null +++ b/include/unified_index_format.h @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include + +namespace diskann +{ + +constexpr uint32_t UNIFIED_FORMAT_MAGIC = 0x444E4E55; // "UNND" little-endian +constexpr uint32_t UNIFIED_FORMAT_VERSION = 2; +constexpr uint64_t UNIFIED_FORMAT_ALIGN = 4096; + +enum class DataTypeTag : uint32_t +{ + Float = 1, + Uint8 = 2, + Int8 = 3, +}; + +enum class MetricTag : uint32_t +{ + L2 = 1, + InnerProduct = 2, + Cosine = 3, +}; + +enum class LabelEncoding : uint32_t +{ + None = 0, + Bitmask = 1, + Integer = 2, +}; + +enum UnifiedFormatFlags : uint32_t +{ + HAS_PQ = 1u << 0, + HAS_LABELS = 1u << 1, + HAS_MAX_BASE_NORM = 1u << 2, +}; + +#pragma pack(push, 1) +struct UnifiedIndexHeader +{ + uint32_t magic; + uint32_t version; + DataTypeTag data_type; + MetricTag metric; + uint64_t npts; + uint64_t dim; + uint64_t aligned_dim; + uint32_t max_degree; + uint32_t flags; + uint64_t start_node; + + uint64_t offset_table_off, offset_table_len; + uint64_t graph_region_off, graph_region_len; + uint64_t medoids_off, medoids_len; + uint64_t pq_pivots_off, pq_pivots_len; + uint64_t pq_codes_off, pq_codes_len; + uint64_t max_base_norm_off, max_base_norm_len; + + LabelEncoding label_encoding; + uint64_t universal_label; + uint64_t total_labels; + uint64_t label_dictionary_off, label_dictionary_len; + uint64_t per_point_labels_off, per_point_labels_len; + uint64_t per_point_label_offsets_off, per_point_label_offsets_len; + + // Total size of the file in bytes. Populated by finalize() and validated + // by readers on load (truncated / over-sized files are rejected). + // Also useful for disk-quota / capacity-planning logs. + uint64_t file_size_bytes; + + uint8_t _reserved[4096 - (sizeof(uint32_t) * 7 + sizeof(uint64_t) * 25)]; +}; +#pragma pack(pop) + +static_assert(sizeof(UnifiedIndexHeader) == 4096, "header must occupy exactly one sector"); + +inline uint64_t align_up_4k(uint64_t v) +{ + return (v + UNIFIED_FORMAT_ALIGN - 1) & ~(UNIFIED_FORMAT_ALIGN - 1); +} + +template constexpr DataTypeTag data_type_tag_of() +{ + if constexpr (std::is_same_v) + return DataTypeTag::Float; + else if constexpr (std::is_same_v) + return DataTypeTag::Uint8; + else if constexpr (std::is_same_v) + return DataTypeTag::Int8; + else + static_assert(!sizeof(T), "unsupported data type"); +} + +} // namespace diskann diff --git a/include/unified_index_io.h b/include/unified_index_io.h new file mode 100644 index 000000000..3eb88286e --- /dev/null +++ b/include/unified_index_io.h @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include + +#include "unified_index_format.h" +#include "windows_customizations.h" + +namespace diskann +{ + +// Streaming writer for the unified index container. +// +// Caller drives the writer in this order: +// 1) begin(npts, dim, aligned_dim, max_degree, data_type, metric, start_node) +// 2) begin_graph_region() +// for each node N in [0, npts): write_node(coords_ptr, neighbors_ptr, degree) +// end_graph_region() +// 3) write_medoids(medoid_ids, num_medoids) // always called +// 4) (optional) write_pq(pivots_bytes, ..., codes_bytes, ...) +// 5) (optional) write_max_base_norm(value) +// 6) (optional) write_labels(...) +// 7) finalize() — seeks back, writes offset table and header +// +// Writer assumes nodes are appended in strict id order 0..npts-1. +class UnifiedIndexWriter +{ + public: + explicit UnifiedIndexWriter(const std::string &path); + ~UnifiedIndexWriter(); + + void begin(uint64_t npts, uint64_t dim, uint64_t aligned_dim, uint32_t max_degree, + DataTypeTag data_type, MetricTag metric, uint64_t start_node); + + void begin_graph_region(); + void write_node(const void *coords, const uint32_t *neighbors, uint32_t degree); + void end_graph_region(); + + void write_medoids(const uint32_t *medoid_ids, uint64_t num_medoids); + void write_pq(const void *pivots_bytes, uint64_t pivots_len, const void *codes_bytes, + uint64_t codes_len); + void write_max_base_norm(float value); + + // Bitmask encoding: bitmask_bytes = packed rows of `bitmask_size_words * 8` bytes each, npts rows. + void write_labels_bitmask(uint64_t total_labels, uint64_t universal_label, + const void *dictionary_bytes, uint64_t dictionary_len, + const void *bitmask_bytes, uint64_t bitmask_bytes_len); + + // Integer encoding: per_point_offsets is uint64[npts+1] into per_point_data. + void write_labels_integer(uint64_t total_labels, uint64_t universal_label, + const void *dictionary_bytes, uint64_t dictionary_len, + const void *per_point_data, uint64_t per_point_data_len, + const uint64_t *per_point_offsets); + + void finalize(); + + private: + void pad_to_4k(); + void write_raw(const void *bytes, uint64_t len); + uint64_t cur_offset(); + + std::string _path; + std::ofstream _out; + UnifiedIndexHeader _header{}; + std::vector _node_offsets; // size npts+1, byte offsets within graph region + uint64_t _graph_region_start = 0; + uint64_t _written_nodes = 0; + bool _graph_open = false; + bool _finalized = false; +}; + +// Read-only view over a unified container file. +// +// Holds the parsed header and provides byte ranges for each region. Does not +// own the file — callers re-open as needed (e.g. AlignedFileReader for SSD path). +class UnifiedIndexReader +{ + public: + explicit UnifiedIndexReader(const std::string &path); + + const UnifiedIndexHeader &header() const + { + return _header; + } + const std::string &path() const + { + return _path; + } + + // Load and return the uint64[npts+1] offset table. + std::vector load_offset_table(); + + // Load a region's bytes into a freshly-allocated buffer. + std::vector load_region(uint64_t off, uint64_t len); + + // Load a region's bytes directly into a caller-owned buffer. Caller is + // responsible for sizing the buffer to at least `len` bytes. Avoids the + // intermediate allocation+copy that the vector-returning overload incurs; + // intended for hot load paths that already own (or can size) the final + // destination storage. + void load_region(uint64_t off, uint64_t len, uint8_t *dst); + + private: + void parse_header(); + + std::string _path; + UnifiedIndexHeader _header{}; +}; + +} // namespace diskann diff --git a/include/unified_index_memory.h b/include/unified_index_memory.h new file mode 100644 index 000000000..25e8876a7 --- /dev/null +++ b/include/unified_index_memory.h @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include + +#include "concurrent_queue.h" +#include "distance.h" +#include "filter_match_proxy.h" +#include "scratch.h" +#include "unified_index_base.h" + +namespace diskann +{ + +// Fully in-memory implementation of the unified-format index. +// +// load_storage() constructs a unified_node_store_memory and calls its +// load(), then sizes the per-thread InMemQueryScratch pool. search_impl() +// runs a Vamana-style greedy traversal, reading coords/neighbors via the +// inherited _store (downcast to unified_node_store_memory* in the hot path +// for non-virtual access). +template +class unified_index_memory final : public unified_index_base +{ + public: + explicit unified_index_memory(diskann::Metric metric); + ~unified_index_memory() override; + + protected: + void load_storage(UnifiedIndexReader &r, const UnifiedLoadContext &ctx) override; + void search_impl(UnifiedSearchContext &ctx) override; + void fill_storage_stats(TableStats &stats) const override; + + private: + void init_scratch_pool(uint32_t num_threads, uint32_t search_l); + std::pair iterate_to_fixed_point(InMemQueryScratch *scratch, uint32_t L, const T *query, + const std::vector &init_ids, + filter_match_proxy *match_proxy); + + ConcurrentQueue *> _query_scratch; + std::shared_ptr> _dist_cmp; + uint32_t _start = 0; + uint32_t _max_observed_degree = 0; + std::vector _medoids; // mirrors unified_index_ssd::_medoids +}; + +} // namespace diskann diff --git a/include/unified_index_ssd.h b/include/unified_index_ssd.h new file mode 100644 index 000000000..bf41639e1 --- /dev/null +++ b/include/unified_index_ssd.h @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include "aligned_file_reader.h" +#include "concurrent_queue.h" +#include "distance.h" +#include "filter_match_proxy.h" +#include "pq.h" +#include "scratch.h" +#include "unified_index_base.h" + +namespace diskann +{ + +// Disk-resident (SSD) implementation of the unified-format index. +// +// load_storage() constructs a unified_node_store_ssd wrapping the supplied +// AlignedFileReader, calls its load(), and -- when ctx.num_nodes_to_cache > 0 +// -- primes the static cache via _store->cache_bfs_levels(). Then loads PQ +// pivots/codes (currently via temp-file extraction; direct-from-region read +// is a follow-up). search_impl() runs the beam-search loop, pulling beam-wide +// neighborhoods via _store->get_nodes() once per hop. +template +class unified_index_ssd final : public unified_index_base +{ + public: + unified_index_ssd(std::shared_ptr reader, diskann::Metric metric); + ~unified_index_ssd() override; + + protected: + void load_storage(UnifiedIndexReader &r, const UnifiedLoadContext &ctx) override; + void search_impl(UnifiedSearchContext &ctx) override; + void fill_storage_stats(TableStats &stats) const override; + + private: + void load_pq_from_unified(UnifiedIndexReader &r); + void load_medoids_from_unified(UnifiedIndexReader &r); + void setup_thread_data(uint64_t nthreads, uint64_t visited_reserve = 4096); + void use_medoids_data_as_centroids(); + + void cached_beam_search(const T *query, uint64_t K, uint64_t L, uint64_t *indices, float *distances, + uint32_t beam_width, const std::vector &filter_label_strings, + uint32_t io_limit, QueryStats *stats, DebugTraversalInfo *debug_info); + + std::shared_ptr _reader; + ConcurrentQueue *> _thread_data; + uint64_t _max_nthreads = 0; + float _max_base_norm = 0.0f; + + FixedChunkPQTable _pq_table; + std::vector _pq_codes; + uint64_t _n_chunks = 0; + + std::vector _medoids; + float *_centroid_data = nullptr; + std::shared_ptr> _dist_cmp; + std::shared_ptr> _dist_cmp_float; +}; + +} // namespace diskann diff --git a/include/unified_label_data.h b/include/unified_label_data.h new file mode 100644 index 000000000..99fd141b2 --- /dev/null +++ b/include/unified_label_data.h @@ -0,0 +1,198 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include +#include +#include + +#include "filter_match_proxy.h" +#include "integer_label_vector.h" +#include "label_bitmask.h" +#include "unified_index_format.h" +#include "windows_customizations.h" + +namespace diskann +{ + +class UnifiedIndexReader; + +// --------------------------------------------------------------------------- +// Abstract base for the label-data trio. +// Owns the shared, encoding-independent state (label dictionary, universal +// label, per-label medoids) and exposes the read-only query API. +// Derived classes own encoding-specific storage and produce encoding-specific +// match proxies via `make_match_proxy`. +// +// All label ints are uint32 on the API surface; the on-disk dictionary entry +// stores them as uint32 unconditionally (see docs/unified_index_format.md). +// --------------------------------------------------------------------------- +class unified_label_data_base +{ + public: + virtual ~unified_label_data_base() = default; + + // Template method: parse shared dictionary, then dispatch to derived + // load_encoding(). Caller has the reader open and validated. + void load(UnifiedIndexReader &r, const UnifiedIndexHeader &h, uint64_t npts); + + // --- Shared query API --- + bool has_labels() const + { + return _has_labels; + } + bool has_universal_label() const + { + return _use_universal_label; + } + uint32_t universal_label() const + { + return _universal_label; + } + size_t num_labels() const + { + return _label_map.size(); + } + virtual LabelEncoding encoding() const = 0; + + // Resident bytes of the encoding-specific per-point label storage. + virtual uint64_t memory_usage() const + { + return 0; + } + + bool is_valid_label(const std::string &s) const; + bool get_converted_label(const std::string &s, uint32_t &out) const; + + // Resolve filter label strings to their internal label ints AND per-label + // medoids in a single dictionary probe per string. out_label_ints[i] and + // out_medoids[i] both correspond to filter_label_strings[i]; both vectors + // are caller-owned, cleared, then filled in lockstep. Throws ANNException + // on an unknown label string. The unified format stores exactly one medoid + // per label, packed in the same dictionary row as the label int, so the + // search path gets the proxy input (label int) and the init-id seed + // (medoid) from one map lookup instead of two. + void resolve_filters(const std::vector &filter_label_strings, + std::vector &out_label_ints, + std::vector &out_medoids) const; + + // Append every per-label entry-point medoid (the unified format stores + // exactly one per label) to `out`. Used to seed SSD cache priming so that + // filtered-search entry points -- and their BFS neighborhoods -- get + // cached, mirroring the legacy PQFlashIndex::cache_bfs_levels seeding from + // _filter_to_medoid_ids. `out` is appended to (not cleared); the caller + // typically pre-fills it with the global medoids first. + void collect_label_medoids(std::vector &out) const; + + // Build a search-loop-ready matcher from pre-resolved internal label ints + // (see resolve_filters -- the string -> int conversion happens once there + // and is shared with init-id seeding). The returned proxy borrows internal + // storage of `this` -- lifetime must not exceed `this`. No external scratch + // is needed; the concrete proxy owns any per-query scratch it requires. + virtual std::unique_ptr make_match_proxy( + const std::vector &filter_label_ints) = 0; + + protected: + // Derived classes load their encoding-specific region(s) after the base + // has parsed the shared dictionary. + virtual void load_encoding(UnifiedIndexReader &r, const UnifiedIndexHeader &h, uint64_t npts) = 0; + + // Helper: convert strings -> uint32 label ints via dictionary; throws on unknown. + void parse_dictionary(const std::vector &dict_bytes); + + bool _has_labels = false; + bool _use_universal_label = false; + uint32_t _universal_label = 0; + + // Dictionary row: label string -> {internal label int, per-label medoid}. + // Both fields come from the same on-disk dictionary entry (see + // parse_dictionary / docs/unified_index_format.md), so packing them lets a + // single lookup serve both the match proxy (label int) and init-id seeding + // (medoid) at search time -- avoiding a second probe of a separate map. + struct label_dict_entry + { + uint32_t label_int = 0; + uint32_t medoid = 0; + }; + std::unordered_map _label_map; +}; + +// Bitmask-encoded label storage. One bitmask row of +// `_bitmask_buf._bitmask_size` uint64 words per point. +class unified_label_data_bitmask final : public unified_label_data_base +{ + public: + LabelEncoding encoding() const override + { + return LabelEncoding::Bitmask; + } + + uint64_t memory_usage() const override + { + return _bitmask_buf._buf.size() * sizeof(std::uint64_t); + } + + std::unique_ptr make_match_proxy( + const std::vector &filter_label_ints) override; + + simple_bitmask_buf &bitmask_buf() + { + return _bitmask_buf; + } + const simple_bitmask_buf &bitmask_buf() const + { + return _bitmask_buf; + } + + protected: + void load_encoding(UnifiedIndexReader &r, const UnifiedIndexHeader &h, uint64_t npts) override; + + private: + simple_bitmask_buf _bitmask_buf; +}; + +// Integer-encoded label storage. Variable-length label list per point with an +// offset table of size npts+1 into a flat uint32 label array. +class unified_label_data_integer final : public unified_label_data_base +{ + public: + LabelEncoding encoding() const override + { + return LabelEncoding::Integer; + } + + uint64_t memory_usage() const override + { + return _label_vector.get_memory_usage(); + } + + std::unique_ptr make_match_proxy( + const std::vector &filter_label_ints) override; + + integer_label_vector &label_vector() + { + return _label_vector; + } + const integer_label_vector &label_vector() const + { + return _label_vector; + } + + protected: + void load_encoding(UnifiedIndexReader &r, const UnifiedIndexHeader &h, uint64_t npts) override; + + private: + integer_label_vector _label_vector; +}; + +// Factory: peeks at `h.label_encoding`, constructs the correct derived class, +// runs load(), and returns it. Returns nullptr when the header carries no +// labels (HAS_LABELS flag unset or encoding == None). +std::unique_ptr make_unified_label_data(UnifiedIndexReader &r, + const UnifiedIndexHeader &h, + uint64_t npts); + +} // namespace diskann diff --git a/include/unified_node_store.h b/include/unified_node_store.h new file mode 100644 index 000000000..1c08ae026 --- /dev/null +++ b/include/unified_node_store.h @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma once + +#include +#include +#include + +#include "tsl/robin_map.h" + +#include "aligned_file_reader.h" +#include "defaults.h" +#include "unified_index_format.h" +#include "windows_customizations.h" + +namespace diskann +{ + +class UnifiedIndexReader; + +// Per-thread scratch passed into get_nodes(). Holds: +// - `ctx`: the AlignedFileReader's per-thread IOContext (registered once at +// load time -- get_nodes does NOT touch the reader's thread-registration +// map, so no mutex on the search hot path). +// - A sector slab: either *owned* (allocated via reserve(), used by tests +// and load-time helpers) or *borrowed* (set via attach_borrowed(), used +// by the index's beam-search to reuse SSDQueryScratch::sector_scratch). +// +// Memory store ignores the entire scratch. +struct NodeFetchScratch +{ + NodeFetchScratch(); + NodeFetchScratch(const NodeFetchScratch &) = delete; + NodeFetchScratch &operator=(const NodeFetchScratch &) = delete; + NodeFetchScratch(NodeFetchScratch &&other) noexcept; + NodeFetchScratch &operator=(NodeFetchScratch &&other) noexcept; + ~NodeFetchScratch(); + + // Self-owned slab: (re)allocate to hold `max_batch * sectors_per_node` + // sectors. No-op if the existing slab is already large enough. Allocates + // -- use only at load time or in tests, not in the search hot path. + void reserve(uint64_t max_batch, uint32_t sectors_per_node); + + // Borrowed slab: set pointers without allocating. The slab buffer must + // outlive the scratch and must be at least `slab_capacity_bytes` big. + // This is what the index uses on the search hot path: ctx and slab both + // come from SSDThreadData allocated at load time. + void attach_borrowed(IOContext &ctx, char *external_slab, uint64_t slab_capacity_bytes); + + // Attach an IOContext to a scratch whose slab is already owned via + // reserve(). Lets the same scratch flip from "no ctx" to "ready" without + // disturbing the slab. Use in load-time helpers that allocate their own + // slab but borrow the ctx from a registered thread. + void set_ctx(IOContext &ctx); + + char *slab() const + { + return _sector_slab; + } + uint64_t slab_capacity() const + { + return _capacity_bytes; + } + IOContext *io_ctx() const + { + return _ctx; + } + + std::vector requests; + + private: + char *_sector_slab = nullptr; + uint64_t _capacity_bytes = 0; + bool _owns_slab = false; // true => destructor aligned_free's _sector_slab + IOContext *_ctx = nullptr; // not owned; lifetime tied to the reader's per-thread map +}; + +// View into one node. Lifetime depends on the store: +// - memory store returns pointers into its resident `_packed` blob; +// - SSD store returns pointers into the supplied scratch's sector slab +// (or into the static cache buffers on a cache hit). +template +struct NodeView +{ + const T *coords = nullptr; + const uint32_t *neighbors = nullptr; + uint32_t degree = 0; +}; + +// --------------------------------------------------------------------------- +// unified_node_store_base +// Abstract base. Owns header copy, offset table, cached max_node_len. +// Per-node wire layout is [coords (aligned_dim*sizeof(T) bytes), +// neighbors (degree*sizeof(uint32_t) bytes)]. +// Degree is recovered from the offset delta -- there is no per-node degree +// field in the wire format. +// --------------------------------------------------------------------------- +template +class unified_node_store_base +{ + public: + virtual ~unified_node_store_base() = default; + + // --- Geometry --- + uint64_t num_points() const + { + return _header.npts; + } + uint64_t dim() const + { + return _header.dim; + } + uint64_t aligned_dim() const + { + return _header.aligned_dim; + } + uint32_t max_degree() const + { + return _header.max_degree; + } + uint64_t graph_region_base() const + { + return _header.graph_region_off; + } + + // --- Offset math (valid after init_geometry) --- + uint64_t node_byte_offset(uint64_t id) const + { + return _offsets[id]; + } + uint64_t node_byte_length(uint64_t id) const + { + return _offsets[id + 1] - _offsets[id]; + } + // Absolute byte offset of node `id`'s payload in the unified file. + // Convenience: same as `graph_region_base() + node_byte_offset(id)`. + uint64_t node_disk_offset(uint64_t id) const + { + return graph_region_base() + _offsets[id]; + } + uint32_t degree(uint64_t id) const; + uint32_t num_sectors_per_node() const; + uint64_t max_node_len() const + { + return _max_node_len; + } + // aligned_dim * sizeof(T) -- cached in init_geometry(). + uint64_t coord_bytes() const + { + return _coord_bytes; + } + + // --- Single virtual API for node access --- + // Resolve `ids` into `out` (one NodeView per id, same order). + virtual void get_nodes(const std::vector &ids, NodeFetchScratch &scratch, + std::vector> &out) = 0; + + protected: + // Subclasses call this from their `load` after parsing header + offset table. + void init_geometry(const UnifiedIndexHeader &h, std::vector offset_table); + + UnifiedIndexHeader _header{}; // own copy + std::vector _offsets; + uint64_t _max_node_len = 0; + uint64_t _coord_bytes = 0; +}; + +// --------------------------------------------------------------------------- +// unified_node_store_memory +// Fully-resident. Loads the graph region into _packed during load(). +// --------------------------------------------------------------------------- +template +class unified_node_store_memory final : public unified_node_store_base +{ + public: + void load(UnifiedIndexReader &r, const UnifiedIndexHeader &h); + + void get_nodes(const std::vector &ids, NodeFetchScratch &scratch, + std::vector> &out) override; + + // Non-virtual fast path for unified_index_memory::iterate_to_fixed_point. + const T *get_coords(uint64_t id) const; + const uint32_t *get_neighbors(uint64_t id, uint32_t &out_degree) const; + + // Total resident bytes of the graph region ([coords, neighbors] for all + // nodes), pulled fully into memory by load(). + uint64_t resident_bytes() const + { + return _packed.size(); + } + + private: + std::vector _packed; +}; + +// --------------------------------------------------------------------------- +// unified_node_store_ssd +// AlignedFileReader-backed. Owns the static _nhood_cache / _coord_cache. +// --------------------------------------------------------------------------- +template +class unified_node_store_ssd final : public unified_node_store_base +{ + public: + explicit unified_node_store_ssd(std::shared_ptr reader) : _reader(std::move(reader)) + { + } + ~unified_node_store_ssd() override; + + void load(UnifiedIndexReader &r, const UnifiedIndexHeader &h); + + void get_nodes(const std::vector &ids, NodeFetchScratch &scratch, + std::vector> &out) override; + + // Internal helpers (used by unified_index_ssd::load_storage when the user + // requests cache priming via UnifiedLoadContext::num_nodes_to_cache). + // Pin `node_list` (read once, kept resident). Caller supplies a + // pre-attached NodeFetchScratch (slab + IOContext) -- typically borrowed + // from an SSDThreadData via attach_borrowed(), or from a self-owned + // build via make_fetch_scratch(). + void load_cache_list(const std::vector &node_list, NodeFetchScratch &scratch); + + // BFS-based cache primer. Caller supplies the seed nodes (typically the + // unified file's medoids; the store doesn't own medoid data). Walks the + // graph from each seed in breadth-first order, collects up to + // num_nodes_to_cache unique ids into `out_node_list`, then calls + // load_cache_list(out_node_list, scratch). + void cache_bfs_levels(const std::vector &seed_nodes, uint64_t num_nodes_to_cache, + std::vector &out_node_list, NodeFetchScratch &scratch); + + // Convenience: build a NodeFetchScratch sized for `max_batch` nodes, + // register the calling thread with the AlignedFileReader (idempotent; + // safe to call from already-registered threads), and attach the resulting + // IOContext. Used by tests and any standalone caller. Allocates an owned + // slab -- not for the hot path. The hot path attaches an existing + // SSDThreadData via NodeFetchScratch::attach_borrowed(). + NodeFetchScratch make_fetch_scratch(uint64_t max_batch); + + // Test/observability counter: number of AlignedRead requests this store + // has issued. Cheap (uint64 increment per get_nodes call), always compiled. + uint64_t io_count() const + { + return _io_count; + } + + private: + std::shared_ptr _reader; + + // Static caches. + tsl::robin_map> _nhood_cache; + uint32_t *_nhood_cache_buf = nullptr; + tsl::robin_map _coord_cache; + T *_coord_cache_buf = nullptr; + + // Always-compiled IO counter (cheap; one uint64 per get_nodes batch). + uint64_t _io_count = 0; +}; + +} // namespace diskann diff --git a/include/windows_customizations.h b/include/windows_customizations.h index e6c58466a..c2dacc497 100644 --- a/include/windows_customizations.h +++ b/include/windows_customizations.h @@ -5,7 +5,12 @@ #ifdef _WINDOWS -#ifdef _WINDLL +#if defined(DISKANN_STATIC_LIB) +// Static-library build/consumer (e.g. the unit tests): the internal symbols +// are compiled straight into the linking target, so no dllimport/dllexport +// decoration is needed. Checked first so it wins over _WINDLL. +#define DISKANN_DLLEXPORT +#elif defined(_WINDLL) #define DISKANN_DLLEXPORT __declspec(dllexport) #else #define DISKANN_DLLEXPORT __declspec(dllimport) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2f70194d4..3d09fbeaa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,37 @@ set(CMAKE_COMPILE_WARNING_AS_ERROR ON) if(MSVC) add_subdirectory(dll) + + # Static-library variant of the DiskANN core, built for the unit tests so + # they can link the internal symbols directly instead of going through the + # DLL's export table. Compiled with DISKANN_STATIC_LIB (PUBLIC, so consumers + # see it too) which makes DISKANN_DLLEXPORT a no-op -- see + # include/windows_customizations.h. This lets internal-only types drop their + # DISKANN_DLLEXPORT annotations without breaking the tests. + set(DISKANN_STATIC_SOURCES + abstract_data_store.cpp abstract_index.cpp ann_exception.cpp + color_helper.cpp color_info.cpp disk_utils.cpp distance.cpp + filter_match_proxy.cpp filter_utils.cpp + in_mem_data_store.cpp in_mem_graph_reformat_store.cpp in_mem_graph_store.cpp + in_mem_reorder_data_store.cpp in_mem_static_graph_reformat.cpp in_mem_static_graph_store.cpp + index.cpp index_factory.cpp integer_label_vector.cpp + label_bitmask.cpp label_helper.cpp logger.cpp + math_utils.cpp memory_mapper.cpp natural_number_map.cpp natural_number_set.cpp + neighbor_list.cpp partition.cpp pq.cpp pq_data_store.cpp pq_flash_index.cpp + pq_l2_distance.cpp scratch.cpp unified_index.cpp unified_index_builder.cpp unified_index_io.cpp + unified_label_data.cpp unified_node_store.cpp + unified_index_base.cpp unified_index_memory.cpp unified_index_ssd.cpp + utils.cpp windows_aligned_file_reader.cpp) + + add_library(${PROJECT_NAME}_s STATIC ${DISKANN_STATIC_SOURCES}) + target_compile_definitions(${PROJECT_NAME}_s PUBLIC DISKANN_STATIC_LIB) + # index.cpp exceeds the COFF section limit without /GL (which the DLL uses); + # /bigobj lifts it for the static build. + target_compile_options(${PROJECT_NAME}_s PRIVATE /bigobj) + target_include_directories(${PROJECT_NAME}_s PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES}) + # MKL + synchronization.lib are PRIVATE to the DLL; the static lib exposes + # them PUBLICly so anything linking diskann_s (the tests) inherits them. + target_link_libraries(${PROJECT_NAME}_s PUBLIC ${DISKANN_MKL_LINK_LIBRARIES} synchronization.lib) else() #file(GLOB CPP_SOURCES *.cpp) set(CPP_SOURCES abstract_data_store.cpp abstract_index.cpp ann_exception.cpp @@ -17,7 +48,9 @@ else() label_bitmask.cpp label_helper.cpp linux_aligned_file_reader.cpp logger.cpp math_utils.cpp memory_mapper.cpp natural_number_map.cpp natural_number_set.cpp neighbor_list.cpp partition.cpp pq.cpp pq_data_store.cpp pq_flash_index.cpp - pq_l2_distance.cpp scratch.cpp utils.cpp) + pq_l2_distance.cpp scratch.cpp unified_index.cpp unified_index_builder.cpp unified_index_io.cpp + unified_label_data.cpp unified_node_store.cpp + unified_index_base.cpp unified_index_memory.cpp unified_index_ssd.cpp utils.cpp) if (RESTAPI) list(APPEND CPP_SOURCES restapi/search_wrapper.cpp restapi/server.cpp) endif() diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index fa7d90568..40b0a7c13 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -18,6 +18,10 @@ #include "pq_flash_index.h" #include "timer.h" #include "tsl/robin_set.h" +#include "unified_index_io.h" +#include "label_helper.h" +#include "label_bitmask.h" +#include "integer_label_vector.h" namespace diskann { @@ -1447,6 +1451,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const return 0; } + template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, const std::string mem_index_file, const std::string output_file, @@ -1455,7 +1460,8 @@ template DISKANN_DLLEXPORT void create_disk_layout(const std::string ba const std::string mem_index_file, const std::string output_file, const std::string reorder_data_file); -template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, const std::string mem_index_file, +template DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, + const std::string mem_index_file, const std::string output_file, const std::string reorder_data_file); diff --git a/src/dll/CMakeLists.txt b/src/dll/CMakeLists.txt index 4b23b41d4..92d643fae 100644 --- a/src/dll/CMakeLists.txt +++ b/src/dll/CMakeLists.txt @@ -11,7 +11,10 @@ add_library(${PROJECT_NAME} SHARED dllmain.cpp ../label_bitmask.cpp ../label_helper.cpp ../logger.cpp ../math_utils.cpp ../memory_mapper.cpp ../natural_number_map.cpp ../natural_number_set.cpp ../neighbor_list.cpp ../partition.cpp ../pq.cpp ../pq_data_store.cpp ../pq_flash_index.cpp - ../pq_l2_distance.cpp ../scratch.cpp ../utils.cpp ../windows_aligned_file_reader.cpp) + ../pq_l2_distance.cpp ../scratch.cpp ../unified_index.cpp ../unified_index_builder.cpp ../unified_index_io.cpp + ../unified_label_data.cpp ../unified_node_store.cpp + ../unified_index_base.cpp ../unified_index_memory.cpp ../unified_index_ssd.cpp + ../utils.cpp ../windows_aligned_file_reader.cpp) set(TARGET_DIR "$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}>") diff --git a/src/filter_match_proxy.cpp b/src/filter_match_proxy.cpp index 4ba606a50..2b613e260 100644 --- a/src/filter_match_proxy.cpp +++ b/src/filter_match_proxy.cpp @@ -15,8 +15,8 @@ bitmask_filter_match::bitmask_filter_match( // _bitmask_size == 0 means no filter is set if (_bitmask_filters._bitmask_size > 0) { - query_bitmask_buf.resize(_bitmask_filters._bitmask_size, 0); - _bitmask_full_val._mask = query_bitmask_buf.data(); + _query_bitmask_buf.resize(_bitmask_filters._bitmask_size, 0); + _bitmask_full_val._mask = _query_bitmask_buf.data(); for (const auto& filter_label : filter_labels) { @@ -30,6 +30,30 @@ bitmask_filter_match::bitmask_filter_match( } } +template +bitmask_filter_match::bitmask_filter_match( + simple_bitmask_buf& bitmask_filters, + const std::vector& filter_labels, + LabelT unv_label) + : _bitmask_filters(bitmask_filters), + _query_bitmask_buf(_owned_query_bitmask_buf) +{ + if (_bitmask_filters._bitmask_size > 0) + { + _query_bitmask_buf.resize(_bitmask_filters._bitmask_size, 0); + _bitmask_full_val._mask = _query_bitmask_buf.data(); + + for (const auto& filter_label : filter_labels) + { + auto bitmask_val = simple_bitmask::get_bitmask_val(filter_label); + _bitmask_full_val.merge_bitmask_val(bitmask_val); + } + + auto bitmask_val = simple_bitmask::get_bitmask_val(unv_label); + _bitmask_full_val.merge_bitmask_val(bitmask_val); + } +} + template bool bitmask_filter_match::contain_filtered_label(uint32_t id) { diff --git a/src/index.cpp b/src/index.cpp index 5261adb5e..6e6811aa3 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -18,6 +18,7 @@ #include "color_helper.h" #include "filter_match_proxy.h" #include "in_mem_reorder_data_store.h" +#include "unified_index_io.h" #if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) #include "gperftools/malloc_extension.h" @@ -438,6 +439,134 @@ void Index::save(const char *filename, bool compact_before_save diskann::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl; } +template +void Index::save_unified(const char *filename) +{ + static const std::vector kEmpty; + save_unified(filename, kEmpty, kEmpty); +} + +template +void Index::save_unified(const char *filename, const std::vector &pq_pivots_bytes, + const std::vector &pq_codes_bytes) +{ + diskann::Timer timer; + + std::unique_lock ul(_update_lock); + std::unique_lock cl(_consolidate_lock); + std::unique_lock tl(_tag_lock); + std::unique_lock dl(_delete_lock); + + if (!_data_compacted) + { + compact_data(); + } + + if (_dynamic_index || _delete_set->size() > 0 || _enable_tags) + { + throw ANNException("save_unified does not support dynamic/tagged/deletion indices in v1", -1, + __FUNCSIG__, __FILE__, __LINE__); + } + + const uint64_t npts = static_cast(_nd); + const uint64_t dim = static_cast(_dim); + const uint64_t aligned_dim = static_cast(_data_store->get_aligned_dim()); + const uint32_t max_degree = _graph_store->get_max_observed_degree(); + + UnifiedIndexWriter writer(filename); + MetricTag metric_tag = MetricTag::L2; + if (_dist_metric == diskann::Metric::INNER_PRODUCT) + metric_tag = MetricTag::InnerProduct; + else if (_dist_metric == diskann::Metric::COSINE) + metric_tag = MetricTag::Cosine; + + writer.begin(npts, dim, aligned_dim, max_degree, data_type_tag_of(), metric_tag, + static_cast(_start)); + + writer.begin_graph_region(); + std::vector vec(dim); + for (uint32_t i = 0; i < npts; ++i) + { + _data_store->get_vector(i, vec.data()); + const NeighborList nbrs = _graph_store->get_neighbours(i); + writer.write_node(vec.data(), nbrs.data(), static_cast(nbrs.size())); + } + writer.end_graph_region(); + + if (_filtered_index && !_label_to_start_id.empty()) + { + std::vector medoid_list; + medoid_list.reserve(_label_to_start_id.size()); + for (const auto &kv : _label_to_start_id) + medoid_list.push_back(kv.second); + writer.write_medoids(medoid_list.data(), medoid_list.size()); + } + else + { + const uint32_t single_medoid = _start; + writer.write_medoids(&single_medoid, 1); + } + + // Optional PQ region. Caller supplies both pivots and codes; empty buffers + // skip the PQ write entirely (no HAS_PQ flag). + if (!pq_pivots_bytes.empty() && !pq_codes_bytes.empty()) + { + writer.write_pq(pq_pivots_bytes.data(), pq_pivots_bytes.size(), pq_codes_bytes.data(), + pq_codes_bytes.size()); + } + + if (_filtered_index) + { + std::vector dict_bytes; + { + std::vector> label_to_medoid_list(_label_to_start_id.begin(), + _label_to_start_id.end()); + std::unordered_map int_to_str; + for (const auto &kv : _label_map) + int_to_str.emplace(kv.second, kv.first); + for (const auto &lm : label_to_medoid_list) + { + const auto it = int_to_str.find(lm.first); + const std::string &s = (it != int_to_str.end()) ? it->second : std::string(); + const uint32_t label_int = static_cast(lm.first); + const uint32_t slen = static_cast(s.size()); + const size_t old = dict_bytes.size(); + dict_bytes.resize(old + sizeof(uint32_t) + slen + sizeof(uint32_t) + sizeof(uint32_t)); + uint8_t *p = dict_bytes.data() + old; + std::memcpy(p, &slen, sizeof(uint32_t)); + p += sizeof(uint32_t); + std::memcpy(p, s.data(), slen); + p += slen; + std::memcpy(p, &label_int, sizeof(uint32_t)); + p += sizeof(uint32_t); + std::memcpy(p, &lm.second, sizeof(uint32_t)); + } + } + + const uint64_t universal = _use_universal_label ? static_cast(_universal_label) : 0; + const uint64_t total_labels = static_cast(_label_map.size()); + + if (_use_integer_labels) + { + const auto &offsets = _label_vector.get_offset_vector(); + const auto &data = _label_vector.get_data_vector(); + std::vector off_u64(offsets.begin(), offsets.end()); + writer.write_labels_integer(total_labels, universal, dict_bytes.data(), dict_bytes.size(), + data.data(), data.size() * sizeof(uint32_t), off_u64.data()); + } + else if (_bitmask_buf._buf.size() > 0) + { + const uint64_t bitmap_bytes = _bitmask_buf._buf.size() * sizeof(uint64_t); + writer.write_labels_bitmask(total_labels, universal, dict_bytes.data(), dict_bytes.size(), + _bitmask_buf._buf.data(), bitmap_bytes); + } + } + + writer.finalize(); + + diskann::cout << "Time taken for save_unified: " << timer.elapsed() / 1000000.0 << "s." << std::endl; +} + #ifdef EXEC_ENV_OLS template size_t Index::load_tags(AlignedFileReader &reader) @@ -2133,6 +2262,12 @@ void Index::build(const std::string &data_file, const size_t nu std::string mem_labels_int_map_file = filter_params.save_path_prefix + "_labels_map.txt"; convert_labels_string_to_int(filter_params.label_file, labels_file_to_use, mem_labels_int_map_file, filter_params.universal_label, unv_label_as_num); + // Populate the in-memory string->int label map. convert_labels_string_to_int + // only writes it to disk; without this _label_map stays empty until a + // load(), so a save_unified() called after build() (e.g. from + // unified_index_builder) would emit an empty label dictionary / + // total_labels == 0 and produce an unloadable filtered unified file. + _label_map = load_label_map(mem_labels_int_map_file); if (filter_params.universal_label != "") { if (unv_label_as_num != 0) diff --git a/src/integer_label_vector.cpp b/src/integer_label_vector.cpp index c467050f1..ad8b5f23a 100644 --- a/src/integer_label_vector.cpp +++ b/src/integer_label_vector.cpp @@ -45,6 +45,30 @@ bool integer_label_vector::initialize_from_file(const std::string& label_file, s return true; } +bool integer_label_vector::initialize_from_buffers(const size_t *offsets, size_t num_points, + const uint32_t *labels, size_t total_labels) +{ + _offset.assign(offsets, offsets + num_points + 1); + _data.assign(labels, labels + total_labels); + return true; +} + +void integer_label_vector::resize_for_load(size_t num_points, size_t total_labels) +{ + _offset.resize(num_points + 1); + _data.resize(total_labels); +} + +size_t *integer_label_vector::mutable_offset_data() +{ + return _offset.data(); +} + +uint32_t *integer_label_vector::mutable_label_data() +{ + return _data.data(); +} + template bool integer_label_vector::add_labels(uint32_t point_id, std::vector &labels) { if (point_id >= _offset.size() - 1) diff --git a/src/pq.cpp b/src/pq.cpp index d2b545c79..ffe0c6d84 100644 --- a/src/pq.cpp +++ b/src/pq.cpp @@ -168,6 +168,115 @@ uint32_t FixedChunkPQTable::get_num_chunks() return static_cast(n_chunks); } +void FixedChunkPQTable::load_pq_centroid_bin_from_memory(const uint8_t *blob, size_t blob_len, size_t num_chunks) +{ + // The pq_pivots.bin format is a "bin-with-offsets" container: + // Outer bin at offset 0: [int32 nr][int32 nc][size_t offsets[nr]] (nr = 4 or 5) + // Sub-bin at offsets[0] (pivots): [int32 256][int32 dim][float[256*dim]] + // Sub-bin at offsets[1] (centroid): [int32 dim][int32 1 ][float[dim]] + // (nr==5 only) offsets[2] is an old-format per-chunk dims sub-bin (ignored). + // Sub-bin at offsets[chunk_offsets_index]: [int32 n_chunks+1][int32 1][uint32_t[n_chunks+1]] + // chunk_offsets_index = 2 (new) or 3 (old, when nr==5). + // + // This mirrors the disk loader's parsing (FixedChunkPQTable::load_pq_centroid_bin + // above) but reads straight from `blob` with no IO. OPQ rotation matrix is + // NOT supported -- unified-format PQ is always standard PQ. + + auto read_sub_bin_header = [&](size_t off, size_t &out_nr, size_t &out_nc, size_t &out_payload_off) { + if (off + 2 * sizeof(int32_t) > blob_len) + throw diskann::ANNException("PQ blob: truncated sub-bin header", -1, __FUNCSIG__, __FILE__, __LINE__); + int32_t nr_i32 = 0, nc_i32 = 0; + std::memcpy(&nr_i32, blob + off, sizeof(int32_t)); + std::memcpy(&nc_i32, blob + off + sizeof(int32_t), sizeof(int32_t)); + out_nr = static_cast(nr_i32); + out_nc = static_cast(nc_i32); + out_payload_off = off + 2 * sizeof(int32_t); + }; + + // --- Outer bin: size_t offset table. --- + size_t nr = 0, nc = 0, payload_off = 0; + read_sub_bin_header(/*off=*/0, nr, nc, payload_off); + + if (nr != 4 && nr != 5) + { + throw diskann::ANNException("PQ blob: outer offsets have unexpected count " + std::to_string(nr) + + " (expecting 4 or 5)", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + const size_t outer_bytes = nr * nc * sizeof(size_t); + if (payload_off + outer_bytes > blob_len) + throw diskann::ANNException("PQ blob: truncated outer offsets", -1, __FUNCSIG__, __FILE__, __LINE__); + + std::vector file_offset_data(nr); + std::memcpy(file_offset_data.data(), blob + payload_off, outer_bytes); + + const bool use_old_filetype = (nr == 5); + + // --- Pivot table at offsets[0]. --- + read_sub_bin_header(file_offset_data[0], nr, nc, payload_off); + if (nr != NUM_PQ_CENTROIDS) + { + throw diskann::ANNException("PQ blob: pivots row count = " + std::to_string(nr) + " (expecting " + + std::to_string(NUM_PQ_CENTROIDS) + ")", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + this->ndims = nc; + const size_t pivots_bytes = nr * nc * sizeof(float); + if (payload_off + pivots_bytes > blob_len) + throw diskann::ANNException("PQ blob: truncated pivot table", -1, __FUNCSIG__, __FILE__, __LINE__); + if (tables != nullptr) + delete[] tables; + tables = new float[nr * nc]; + std::memcpy(tables, blob + payload_off, pivots_bytes); + + // --- Centroid at offsets[1]. --- + read_sub_bin_header(file_offset_data[1], nr, nc, payload_off); + if (nr != this->ndims || nc != 1) + { + throw diskann::ANNException("PQ blob: centroid shape mismatch", -1, __FUNCSIG__, __FILE__, __LINE__); + } + const size_t centroid_bytes = nr * nc * sizeof(float); + if (payload_off + centroid_bytes > blob_len) + throw diskann::ANNException("PQ blob: truncated centroid", -1, __FUNCSIG__, __FILE__, __LINE__); + if (centroid != nullptr) + delete[] centroid; + centroid = new float[nr * nc]; + std::memcpy(centroid, blob + payload_off, centroid_bytes); + + // --- Chunk offsets at offsets[2] (new) or [3] (old-filetype). --- + const int chunk_offsets_index = use_old_filetype ? 3 : 2; + read_sub_bin_header(file_offset_data[chunk_offsets_index], nr, nc, payload_off); + if (nc != 1 || (nr != num_chunks + 1 && num_chunks != 0)) + { + throw diskann::ANNException("PQ blob: chunk-offsets shape mismatch (nr=" + std::to_string(nr) + + ", nc=" + std::to_string(nc) + ")", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + const size_t chunk_bytes = nr * nc * sizeof(uint32_t); + if (payload_off + chunk_bytes > blob_len) + throw diskann::ANNException("PQ blob: truncated chunk offsets", -1, __FUNCSIG__, __FILE__, __LINE__); + if (chunk_offsets != nullptr) + delete[] chunk_offsets; + chunk_offsets = new uint32_t[nr * nc]; + std::memcpy(chunk_offsets, blob + payload_off, chunk_bytes); + + this->n_chunks = nr - 1; + diskann::cout << "Loaded PQ Pivots from memory: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->ndims + << ", #chunks: " << this->n_chunks << std::endl; + + // Compute the transpose used by the distance hot path. + if (tables_tr != nullptr) + delete[] tables_tr; + tables_tr = new float[256 * this->ndims]; + for (size_t i = 0; i < 256; i++) + { + for (size_t j = 0; j < this->ndims; j++) + { + tables_tr[j * 256 + i] = tables[i * this->ndims + j]; + } + } +} + void FixedChunkPQTable::preprocess_query(float *query_vec) { for (uint32_t d = 0; d < ndims; d++) diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index 8d8870e57..848f47368 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -11,6 +11,7 @@ #include "cosine_similarity.h" #include "color_helper.h" #include "filter_match_proxy.h" +#include "unified_index_io.h" #include #include @@ -142,7 +143,9 @@ std::vector PQFlashIndex::read_nodes(const std::vector retval(node_ids.size(), true); char *buf = nullptr; - auto num_sectors = _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + auto num_sectors = _nnodes_per_sector > 0 + ? 1 + : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); alloc_aligned((void **)&buf, node_ids.size() * num_sectors * defaults::SECTOR_LEN, defaults::SECTOR_LEN); // create read requests @@ -186,9 +189,12 @@ std::vector PQFlashIndex::read_nodes(const std::vector::cached_beam_search(const T *query1, const uint64_t // sector scratch char *sector_scratch = query_scratch->sector_scratch; uint64_t §or_scratch_idx = query_scratch->sector_idx; + // In unified mode nodes are not sector-padded, so an unaligned node can + // straddle one extra sector beyond DIV_ROUND_UP(node_len, SECTOR_LEN). const uint64_t num_sectors_per_node = - _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + _nnodes_per_sector > 0 ? 1 + : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); // query <-> PQ chunk centers distances _pq_table.preprocess_query(query_rotated); // center the query and rotate if @@ -1427,8 +1436,9 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t fnhood.second = sector_scratch + num_sectors_per_node * sector_scratch_idx * defaults::SECTOR_LEN; sector_scratch_idx++; frontier_nhoods.push_back(fnhood); - frontier_read_reqs.emplace_back(get_node_sector((size_t)id) * defaults::SECTOR_LEN, - num_sectors_per_node * defaults::SECTOR_LEN, fnhood.second); + uint64_t read_offset = get_node_sector((size_t)id) * defaults::SECTOR_LEN; + uint64_t read_length = num_sectors_per_node * defaults::SECTOR_LEN; + frontier_read_reqs.emplace_back(read_offset, read_length, fnhood.second); if (stats != nullptr) { stats->n_4k++; @@ -1526,7 +1536,10 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t #endif char *node_disk_buf = offset_to_node(frontier_nhood.second, frontier_nhood.first); uint32_t *node_buf = offset_to_node_nhood(node_disk_buf); - uint64_t nnbrs = (uint64_t)(*node_buf); + uint64_t nnbrs; + uint32_t *node_nbrs; + nnbrs = (uint64_t)(*node_buf); + node_nbrs = (node_buf + 1); T *node_fp_coords = offset_to_node_coords(node_disk_buf); memcpy(data_buf, node_fp_coords, _disk_bytes_per_point); float cur_expanded_dist; @@ -1542,7 +1555,6 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t cur_expanded_dist = _disk_pq_table.l2_distance(query_float, (uint8_t *)data_buf); } full_retset.push_back(Neighbor(frontier_nhood.first, cur_expanded_dist)); - uint32_t *node_nbrs = (node_buf + 1); // compute node_nbrs <-> query dist in PQ space cpu_timer.reset(); compute_dists(node_nbrs, nnbrs, dist_scratch); diff --git a/src/unified_index.cpp b/src/unified_index.cpp new file mode 100644 index 000000000..d39eca9ae --- /dev/null +++ b/src/unified_index.cpp @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#include "unified_index.h" + +#include "ann_exception.h" +#include "unified_index_io.h" +#include "unified_index_memory.h" +#include "unified_index_ssd.h" + +namespace diskann +{ + +namespace +{ + +// Map MetricTag from the header to the runtime Metric enum. +diskann::Metric metric_from_tag(MetricTag tag) +{ + switch (tag) + { + case MetricTag::L2: + return diskann::Metric::L2; + case MetricTag::InnerProduct: + return diskann::Metric::INNER_PRODUCT; + case MetricTag::Cosine: + return diskann::Metric::COSINE; + default: + throw ANNException("unified_index factory: unknown metric tag in header", -1, __FUNCSIG__, __FILE__, + __LINE__); + } +} + +// Peek the 4 KiB header to decide which T to instantiate. Reader is closed +// before the index's own load(ctx) reopens the file. +UnifiedIndexHeader peek_header(const std::string &path) +{ + UnifiedIndexReader peek(path); + return peek.header(); +} + +template