Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
- name: install cbindgen
env:
RUSTFLAGS: ""
run: cargo install cbindgen --version 0.29.2 --locked
run: cargo install cbindgen --version 0.29.3 --locked
- name: cbindgen --verify
run: cbindgen ordvec-ffi --config ordvec-ffi/cbindgen.toml --output ordvec-ffi/include/ordvec.h --verify
- name: cargo build -p ordvec-ffi
Expand Down
11 changes: 11 additions & 0 deletions docs/c-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,17 @@ Search is synchronous. Caller pointers are borrowed only for the duration of
`ordvec_index_search`; no query, candidate, hit, stats, or path pointer is
retained after the function returns.

`ordvec_index_load` takes a non-null, NUL-terminated, valid UTF-8 path string.
Invalid UTF-8 paths return `ORDVEC_STATUS_BAD_ARGUMENT` in ABI v1.

`ordvec_index_probe` is the metadata-only inspection path for C and Go callers.
It takes the same UTF-8 path contract as `ordvec_index_load` and fills
`ordvec_index_info_t` without returning an index handle or allocating payload
rows. The probe validates the fixed header, declared dimensions, payload byte
count, and exact file length. It does not validate row payload invariants;
call `ordvec_index_load` when the caller needs a searchable handle and full
loader validation.

Rows are internal row ordinals. ABI v1 has no external ID map:
`ordvec_hit_t.id` is always equal to `ordvec_hit_t.row_id` widened to
`uint64_t`.
Expand Down
22 changes: 19 additions & 3 deletions ordvec-ffi/include/ordvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#pragma once

/* Generated with cbindgen:0.29.2 */
/* Generated with cbindgen:0.29.3 */

/* Generated by cbindgen. Do not edit by hand. */

Expand Down Expand Up @@ -184,11 +184,27 @@ void ordvec_search_stats_init(ordvec_search_stats_t *stats);
*
* # Safety
*
* `path` must be a non-null, NUL-terminated C string. `out` must be non-null
* and point to writable memory for one `ordvec_index_t *`.
* `path` must be a non-null, NUL-terminated, valid UTF-8 C string. `out`
* must be non-null and point to writable memory for one `ordvec_index_t *`.
*/
ordvec_status_t ordvec_index_load(const char *path, uint64_t flags, ordvec_index_t **out);

Comment thread
Fieldnote-Echo marked this conversation as resolved.
/**
* Probe on-disk metadata for a `.tvrq` RankQuant or `.tvbm` Bitmap index
* without loading payload rows into an index handle.
*
* This validates the fixed header, declared dimensions, payload byte count,
* and exact file length. Full row-invariant validation remains the job of
* `ordvec_index_load`.
*
* # Safety
*
* `path` must be a non-null, NUL-terminated, valid UTF-8 C string. `info_out`
* must be non-null, initialized with `ordvec_index_info_init`, and point to
* writable memory for `ordvec_index_info_t`.
*/
ordvec_status_t ordvec_index_probe(const char *path, uint64_t flags, ordvec_index_info_t *info_out);

/**
* Copy metadata from a loaded index into `info_out`.
*
Expand Down
128 changes: 125 additions & 3 deletions ordvec-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::path::Path;
use std::ptr;
use std::time::Instant;

use ordvec::{Bitmap, RankQuant};
use ordvec::{probe_index_metadata, Bitmap, IndexKind, IndexMetadata, IndexParams, RankQuant};

pub type ordvec_status_t = u32;
pub type ordvec_index_kind_t = u32;
Expand Down Expand Up @@ -362,6 +362,38 @@ fn info_for_handle(handle: &IndexHandle) -> ordvec_index_info_t {
info
}

fn info_for_metadata(meta: &IndexMetadata) -> Result<ordvec_index_info_t, FfiError> {
let mut info = default_info();
info.kind =
match meta.kind {
IndexKind::RankQuant => ORDVEC_INDEX_KIND_RANK_QUANT,
IndexKind::Bitmap => ORDVEC_INDEX_KIND_BITMAP,
IndexKind::Rank | IndexKind::SignBitmap => return Err(FfiError::new(
ORDVEC_STATUS_UNSUPPORTED_FORMAT,
"ABI v1 supports metadata probes only for TVRQ RankQuant and TVBM Bitmap indexes",
)),
};
info.format_version = u32::from(meta.format_version);
info.dim = meta.dim as u64;
info.vector_count = meta.vector_count as u64;
info.bytes_per_vec = meta.bytes_per_vec as u64;
info.source_file_size_bytes = meta.file_size_bytes;
match meta.params {
IndexParams::RankQuant { bits } => {
info.bit_width = u32::from(bits);
}
IndexParams::Bitmap { n_top } => {
info.n_top = n_top as u32;
}
IndexParams::Rank | IndexParams::SignBitmap => {}
}
info.capabilities = ORDVEC_CAP_FULL_SEARCH
| ORDVEC_CAP_SUBSET_SEARCH
| ORDVEC_CAP_STATS
| ORDVEC_CAP_ID_EQUALS_ROW_ID;
Ok(info)
}

fn copy_hits(scores: &[f32], indices: &[i64], hits_out: *mut ordvec_hit_t) {
debug_assert_eq!(scores.len(), indices.len());
for (slot, (&score, &row)) in scores.iter().zip(indices).enumerate() {
Expand Down Expand Up @@ -643,8 +675,8 @@ pub unsafe extern "C" fn ordvec_search_stats_init(stats: *mut ordvec_search_stat
///
/// # Safety
///
/// `path` must be a non-null, NUL-terminated C string. `out` must be non-null
/// and point to writable memory for one `ordvec_index_t *`.
/// `path` must be a non-null, NUL-terminated, valid UTF-8 C string. `out`
/// must be non-null and point to writable memory for one `ordvec_index_t *`.
pub unsafe extern "C" fn ordvec_index_load(
path: *const c_char,
flags: u64,
Expand Down Expand Up @@ -720,6 +752,70 @@ pub unsafe extern "C" fn ordvec_index_load(
})
}

#[no_mangle]
/// Probe on-disk metadata for a `.tvrq` RankQuant or `.tvbm` Bitmap index
/// without loading payload rows into an index handle.
///
/// This validates the fixed header, declared dimensions, payload byte count,
/// and exact file length. Full row-invariant validation remains the job of
/// `ordvec_index_load`.
///
/// # Safety
///
/// `path` must be a non-null, NUL-terminated, valid UTF-8 C string. `info_out`
/// must be non-null, initialized with `ordvec_index_info_init`, and point to
/// writable memory for `ordvec_index_info_t`.
pub unsafe extern "C" fn ordvec_index_probe(
path: *const c_char,
flags: u64,
info_out: *mut ordvec_index_info_t,
) -> ordvec_status_t {
ffi_boundary(|| {
if path.is_null() {
return Err(FfiError::new(
ORDVEC_STATUS_NULL_POINTER,
"path pointer is NULL",
));
}
if info_out.is_null() {
return Err(FfiError::new(
ORDVEC_STATUS_NULL_POINTER,
"info_out pointer is NULL",
));
}
if flags != 0 {
return Err(FfiError::new(
ORDVEC_STATUS_BAD_ARGUMENT,
format!("unknown probe flags: {flags}"),
));
}
// SAFETY: info_out is non-null; read only the leading struct_size
// field before overwriting the full output struct.
let info_size = unsafe { ptr::addr_of!((*info_out).struct_size).read() };
check_exact_size(
info_size,
std::mem::size_of::<ordvec_index_info_t>(),
"ordvec_index_info_t",
)?;
// SAFETY: path is a non-null NUL-terminated C string by caller contract.
let path = unsafe { CStr::from_ptr(path) };
let path = path.to_str().map_err(|_| {
FfiError::new(
ORDVEC_STATUS_BAD_ARGUMENT,
"path must be valid UTF-8 in ABI v1",
)
})?;
let meta =
probe_index_metadata(path).map_err(|err| io_to_ffi(err, "probe index metadata"))?;
let info = info_for_metadata(&meta)?;
// SAFETY: info_out is non-null and points to writable output storage.
unsafe {
ptr::write(info_out, info);
}
Ok(())
})
}

#[no_mangle]
/// Copy metadata from a loaded index into `info_out`.
///
Expand Down Expand Up @@ -958,6 +1054,32 @@ mod tests {
std::fs::remove_file(path).ok();
}

#[test]
fn probe_rankquant_metadata_without_loading() {
let path = make_rankquant_fixture();
let cpath = CString::new(path.to_str().unwrap()).unwrap();
unsafe {
let mut info = default_info();
assert_eq!(
ordvec_index_probe(cpath.as_ptr(), 0, &mut info),
ORDVEC_STATUS_OK
);
assert_eq!(info.kind, ORDVEC_INDEX_KIND_RANK_QUANT);
assert_eq!(info.format_version, 1);
assert_eq!(info.dim, 16);
assert_eq!(info.bit_width, 2);
assert_eq!(info.n_top, 0);
assert_eq!(info.vector_count, 4);
assert_eq!(info.bytes_per_vec, 4);
assert!(info.source_file_size_bytes > 0);
assert_eq!(
info.capabilities & ORDVEC_CAP_SUBSET_SEARCH,
ORDVEC_CAP_SUBSET_SEARCH
);
}
std::fs::remove_file(path).ok();
}

#[test]
fn full_and_subset_search_rankquant() {
let path = make_rankquant_fixture();
Expand Down
22 changes: 15 additions & 7 deletions ordvec-ffi/tests/c_link_smoke.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,27 @@ fn c_program_links_and_runs_against_static_library() {
#include "ordvec.h"

int main(void) {{
ordvec_index_t *idx = 0;
ordvec_status_t st = ordvec_index_load({fixture}, 0, &idx);
ordvec_index_info_t probed;
ordvec_index_info_init(&probed);
ordvec_status_t st = ordvec_index_probe({fixture}, 0, &probed);
if (st != ORDVEC_STATUS_OK) return 1;
if (probed.kind != ORDVEC_INDEX_KIND_RANK_QUANT || probed.dim != 16 || probed.vector_count != 4) {{
return 2;
}}

ordvec_index_t *idx = 0;
st = ordvec_index_load({fixture}, 0, &idx);
if (st != ORDVEC_STATUS_OK) return 3;

ordvec_index_info_t info;
ordvec_index_info_init(&info);
if (ordvec_index_info(idx, &info) != ORDVEC_STATUS_OK) {{
ordvec_index_free(idx);
return 2;
return 4;
}}
if (info.kind != ORDVEC_INDEX_KIND_RANK_QUANT || info.dim != 16 || info.vector_count != 4) {{
ordvec_index_free(idx);
return 3;
return 5;
}}

float q[16] = {{0}};
Expand All @@ -137,9 +145,9 @@ int main(void) {{

st = ordvec_index_search(idx, &p, hits, 2, &returned, &stats);
ordvec_index_free(idx);
if (st != ORDVEC_STATUS_OK) return 4;
if (returned > 2) return 5;
if (stats.returned_count != returned) return 6;
if (st != ORDVEC_STATUS_OK) return 6;
if (returned > 2) return 7;
if (stats.returned_count != returned) return 8;
return 0;
}}
"#,
Expand Down
31 changes: 30 additions & 1 deletion ordvec-go/ordvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ const (
KindBitmap Kind = C.ORDVEC_INDEX_KIND_BITMAP
)

const (
CapFullSearch uint64 = C.ORDVEC_CAP_FULL_SEARCH
CapSubsetSearch uint64 = C.ORDVEC_CAP_SUBSET_SEARCH
CapStats uint64 = C.ORDVEC_CAP_STATS
CapIDEqualsRowID uint64 = C.ORDVEC_CAP_ID_EQUALS_ROW_ID
)

var ErrClosed = errors.New("ordvec: index closed")

type StatusError struct {
Expand Down Expand Up @@ -172,6 +179,24 @@ func callStatus(fn func() C.ordvec_status_t) error {
return statusError(st)
}

func Probe(path string) (Info, error) {
if strings.IndexByte(path, 0) >= 0 {
return Info{}, errors.New("ordvec: path contains null byte")
}
cpath := C.CString(path)
defer C.free(unsafe.Pointer(cpath))

var ci C.ordvec_index_info_t
C.ordvec_index_info_init(&ci)
err := callStatus(func() C.ordvec_status_t {
return C.ordvec_index_probe(cpath, 0, &ci)
})
if err != nil {
return Info{}, err
}
return infoFromC(ci), nil
}

func Load(path string) (*Index, error) {
if strings.IndexByte(path, 0) >= 0 {
return nil, errors.New("ordvec: path contains null byte")
Expand Down Expand Up @@ -232,6 +257,10 @@ func (idx *Index) infoLocked() (Info, error) {
if err != nil {
return Info{}, err
}
return infoFromC(ci), nil
}

func infoFromC(ci C.ordvec_index_info_t) Info {
return Info{
Kind: Kind(ci.kind),
FormatVersion: uint32(ci.format_version),
Expand All @@ -242,7 +271,7 @@ func (idx *Index) infoLocked() (Info, error) {
BytesPerVec: uint64(ci.bytes_per_vec),
SourceFileSizeBytes: uint64(ci.source_file_size_bytes),
Capabilities: uint64(ci.capabilities),
}, nil
}
}

func (idx *Index) Search(query []float32, k uint64, opts *SearchOptions) ([]Hit, Stats, error) {
Expand Down
32 changes: 32 additions & 0 deletions ordvec-go/ordvec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ func TestLoadInfoSearchRankQuant(t *testing.T) {
if info.Kind != KindRankQuant || info.Dim != 16 || info.BitWidth != 2 || info.VectorCount != 4 {
t.Fatalf("unexpected info: %+v", info)
}
wantCaps := CapFullSearch | CapSubsetSearch | CapStats | CapIDEqualsRowID
if info.Capabilities&wantCaps != wantCaps {
t.Fatalf("missing capabilities: got %#x want all %#x", info.Capabilities, wantCaps)
}

hits, stats, err := idx.Search(query16(), 2, &SearchOptions{UserTag: 99})
if err != nil {
Expand All @@ -117,6 +121,34 @@ func TestLoadInfoSearchRankQuant(t *testing.T) {
}
}

func TestProbeRankQuantInfo(t *testing.T) {
path := writeRankQuantFixture(t)

probed, err := Probe(path)
if err != nil {
t.Fatal(err)
}
if probed.Kind != KindRankQuant || probed.Dim != 16 || probed.BitWidth != 2 || probed.VectorCount != 4 {
t.Fatalf("unexpected probed info: %+v", probed)
}
if probed.BytesPerVec != 4 || probed.SourceFileSizeBytes == 0 {
t.Fatalf("unexpected probed byte metadata: %+v", probed)
}

idx, err := Load(path)
if err != nil {
t.Fatal(err)
}
defer idx.Close()
loaded, err := idx.Info()
if err != nil {
t.Fatal(err)
}
if probed != loaded {
t.Fatalf("probe/load metadata mismatch: probe=%+v load=%+v", probed, loaded)
}
}

func TestRankQuantSubsetSearchOrdersByRowID(t *testing.T) {
idx, err := Load(writeRankQuantFixture(t))
if err != nil {
Expand Down
Loading
Loading