Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ members = [
"examples/real-eeg-multi-seizure",
# ruvllm sparse attention kernel for Hailo-10H cluster (ADR-183 – ADR-190)
"crates/ruvllm_sparse_attention",
# Locally-Adaptive Vector Quantization (ADR-193, nightly research 2026-05-08)
"crates/ruvector-lvq",
]
resolver = "2"

Expand Down
31 changes: 31 additions & 0 deletions crates/ruvector-lvq/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[package]
name = "ruvector-lvq"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
description = "Locally-Adaptive Vector Quantization (LVQ) primary + two-level residual quantizer for fast asymmetric ANN reranking"

[dependencies]
serde = { workspace = true }
thiserror = { workspace = true }
rand = { workspace = true }
rand_distr = { workspace = true }
rayon = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }

[[bin]]
name = "ruvector-lvq-bench"
path = "src/main.rs"

[[bench]]
name = "lvq_bench"
harness = false

[features]
default = ["parallel"]
parallel = []
77 changes: 77 additions & 0 deletions crates/ruvector-lvq/benches/lvq_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};

use ruvector_lvq::{FlatF32, FlatLvqIndex};

fn random_dataset(n: usize, dim: usize, seed: u64) -> Vec<f32> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n * dim).map(|_| rng.gen_range(-1.0_f32..1.0)).collect()
}

fn bench_search(c: &mut Criterion) {
let dim = 128;
let n = 20_000;
let data = random_dataset(n, dim, 7);
let queries = random_dataset(64, dim, 9);

let mut gt = FlatF32::new(dim);
for v in data.chunks_exact(dim) {
gt.push(v).unwrap();
}

let mut lvq8 = FlatLvqIndex::new_lvq8(dim);
lvq8.extend_from_flat(&data).unwrap();

let mut lvq8x8 = FlatLvqIndex::new_lvq8x8(dim);
lvq8x8.extend_from_flat(&data).unwrap();

let q0: Vec<f32> = queries[..dim].to_vec();

c.bench_function("flat_f32_l2_n20k_d128_k10", |b| {
b.iter_batched(
|| q0.clone(),
|q| {
let h = gt.search_l2(black_box(&q), 10).unwrap();
black_box(h);
},
BatchSize::SmallInput,
)
});

c.bench_function("lvq8_l2_n20k_d128_k10", |b| {
b.iter_batched(
|| q0.clone(),
|q| {
let h = lvq8.search_l2(black_box(&q), 10).unwrap();
black_box(h);
},
BatchSize::SmallInput,
)
});

c.bench_function("lvq8x8_full_l2_n20k_d128_k10", |b| {
b.iter_batched(
|| q0.clone(),
|q| {
let h = lvq8x8.search_l2(black_box(&q), 10).unwrap();
black_box(h);
},
BatchSize::SmallInput,
)
});

c.bench_function("lvq8x8_rerank10x_l2_n20k_d128_k10", |b| {
b.iter_batched(
|| q0.clone(),
|q| {
let h = lvq8x8.search_l2_reranked(black_box(&q), 10, 100).unwrap();
black_box(h);
},
BatchSize::SmallInput,
)
});
}

criterion_group!(benches, bench_search);
criterion_main!(benches);
99 changes: 99 additions & 0 deletions crates/ruvector-lvq/src/distance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//! Asymmetric distance kernels for LVQ.
//!
//! Queries are kept in fp32. Database vectors are decoded on the fly while
//! computing the inner product or squared L2 — this keeps memory traffic
//! at one byte per dimension while preserving fp32 query precision.
//!
//! All kernels are written in straight-line scalar code. The compiler
//! auto-vectorises them on x86_64 (`-C target-cpu=native` produces AVX2
//! tight loops) and arm64 (NEON). We intentionally avoid platform-specific
//! intrinsics so the crate stays portable and reproducible.

use crate::quantize::Lvq8Stats;
use crate::two_level::Lvq8x8;

/// Squared L2 distance: `||q - decode(code, stats)||²`.
#[inline]
pub fn lvq8_l2sq(q: &[f32], code: &[u8], stats: Lvq8Stats) -> f32 {
debug_assert_eq!(q.len(), code.len());
let bias = stats.mean + stats.bias;
let scale = stats.scale;
let mut acc = 0.0_f32;
for j in 0..q.len() {
let recon = bias + scale * (code[j] as f32);
let d = q[j] - recon;
acc += d * d;
}
acc
}

/// Inner product: `<q, decode(code, stats)>`.
#[inline]
pub fn lvq8_dot(q: &[f32], code: &[u8], stats: Lvq8Stats) -> f32 {
debug_assert_eq!(q.len(), code.len());
let bias = stats.mean + stats.bias;
let scale = stats.scale;
let mut q_sum = 0.0_f32;
let mut q_dot_code = 0.0_f32;
for j in 0..q.len() {
q_sum += q[j];
q_dot_code += q[j] * (code[j] as f32);
}
bias * q_sum + scale * q_dot_code
}

/// Squared L2 distance against the two-level reconstruction:
/// `||q - (decode_primary + decode_residual)||²`.
#[inline]
pub fn lvq8x8_l2sq(q: &[f32], idx: usize, db: &Lvq8x8) -> f32 {
let dim = db.dim();
debug_assert_eq!(q.len(), dim);
let p_stats = db.primary_stats(idx);
let r_stats = db.residual_stats_at(idx);
let p_row = db.primary_row(idx);
let r_row = db.residual_row(idx);

let p_bias = p_stats.mean + p_stats.bias;
let p_scale = p_stats.scale;
let r_bias = r_stats.mean + r_stats.bias;
let r_scale = r_stats.scale;

let mut acc = 0.0_f32;
for j in 0..dim {
let recon =
p_bias + p_scale * (p_row[j] as f32) + r_bias + r_scale * (r_row[j] as f32);
let d = q[j] - recon;
acc += d * d;
}
acc
}

/// Squared L2 against the *primary only* level — used for fast prefiltering.
#[inline]
pub fn lvq8x8_l2sq_primary(q: &[f32], idx: usize, db: &Lvq8x8) -> f32 {
let stats = db.primary_stats(idx);
let row = db.primary_row(idx);
lvq8_l2sq(q, row, stats)
}

#[cfg(test)]
mod tests {
use super::*;
use crate::quantize::encode_one;

#[test]
fn lvq8_l2sq_matches_decoded_reference() {
let q: Vec<f32> = (0..64).map(|i| ((i as f32) * 0.1).cos()).collect();
let v: Vec<f32> = (0..64).map(|i| ((i as f32) * 0.1).sin()).collect();
let (stats, code) = encode_one(&v).unwrap();

let approx = lvq8_l2sq(&q, &code, stats);
let decoded: Vec<f32> = code.iter().map(|&c| stats.decode_lane(c)).collect();
let reference: f32 = q
.iter()
.zip(decoded.iter())
.map(|(a, b)| (a - b).powi(2))
.sum();
assert!((approx - reference).abs() < 1e-3, "{approx} vs {reference}");
}
}
19 changes: 19 additions & 0 deletions crates/ruvector-lvq/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use thiserror::Error;

#[derive(Debug, Error)]
pub enum LvqError {
#[error("dimension mismatch: expected {expected}, got {actual}")]
DimMismatch { expected: usize, actual: usize },

#[error("empty input")]
Empty,

#[error("vector contains non-finite component at index {0}")]
NonFinite(usize),

#[error("index already finalized; cannot mutate after build")]
AlreadyBuilt,

#[error("k = {0} is larger than the dataset size {1}")]
KTooLarge(usize, usize),
}
Loading