Skip to content

Commit 18103b4

Browse files
authored
Merge pull request #172 from ruvnet/fix/hnsw-agent-sparql-lru-issues
fix: HNSW index bugs, agent/SPARQL crashes, lru security
2 parents 32c27bd + 91e7aac commit 18103b4

19 files changed

Lines changed: 979 additions & 48 deletions

File tree

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ members = [
9999
resolver = "2"
100100

101101
[workspace.package]
102-
version = "2.0.2"
102+
version = "2.0.3"
103103
edition = "2021"
104104
rust-version = "1.77"
105105
license = "MIT"
@@ -171,7 +171,7 @@ opt-level = 3
171171
lto = "fat"
172172
codegen-units = 1
173173
strip = true
174-
panic = "abort"
174+
panic = "unwind"
175175

176176
[profile.bench]
177177
inherits = "release"

crates/ruvector-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ tokio-postgres = { version = "0.7", optional = true }
3131
deadpool-postgres = { version = "0.14", optional = true }
3232

3333
# LRU cache for performance optimization
34-
lru = "0.12"
34+
lru = "0.16"
3535

3636
# Compression for storage
3737
flate2 = "1.0"

crates/ruvector-graph/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ pest_derive = { version = "2.7", optional = true }
6969
lalrpop-util = { version = "0.21", optional = true }
7070

7171
# Cache
72-
lru = "0.12"
72+
lru = "0.16"
7373
moka = { version = "0.12", features = ["future"], optional = true }
7474

7575
# Compression (for storage optimization, optional for WASM)

crates/ruvector-postgres/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ruvector-postgres"
3-
version = "2.0.1"
3+
version = "2.0.2"
44
edition = "2021"
55
license = "MIT"
66
description = "High-performance PostgreSQL vector database extension v2 - pgvector drop-in replacement with 230+ SQL functions, SIMD acceleration, Flash Attention, GNN layers, hybrid search, multi-tenancy, self-healing, and self-learning capabilities"

crates/ruvector-postgres/sql/ruvector--2.0.0.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE;
525525

526526
-- List all agents
527527
CREATE OR REPLACE FUNCTION ruvector_list_agents()
528-
RETURNS SETOF jsonb
528+
RETURNS TABLE(name text, agent_type text, capabilities text[], cost_per_request real, avg_latency_ms real, quality_score real, success_rate real, total_requests bigint, is_active boolean)
529529
AS 'MODULE_PATHNAME', 'ruvector_list_agents_wrapper'
530530
LANGUAGE C VOLATILE PARALLEL SAFE;
531531

@@ -537,7 +537,7 @@ LANGUAGE C VOLATILE PARALLEL SAFE;
537537

538538
-- Find agents by capability
539539
CREATE OR REPLACE FUNCTION ruvector_find_agents_by_capability(capability text, max_results int DEFAULT 10)
540-
RETURNS SETOF jsonb
540+
RETURNS TABLE(name text, quality_score real, avg_latency_ms real, cost_per_request real)
541541
AS 'MODULE_PATHNAME', 'ruvector_find_agents_by_capability_wrapper'
542542
LANGUAGE C VOLATILE PARALLEL SAFE;
543543

crates/ruvector-postgres/src/graph/operators.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,11 @@ fn ruvector_create_rdf_store(name: &str) -> bool {
324324
/// ```
325325
#[pg_extern]
326326
fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result<String, String> {
327+
// Validate input to prevent panics
328+
if query.trim().is_empty() {
329+
return Err("SPARQL query cannot be empty".to_string());
330+
}
331+
327332
let store = get_store(store_name)
328333
.ok_or_else(|| format!("Triple store '{}' does not exist", store_name))?;
329334

@@ -350,6 +355,11 @@ fn ruvector_sparql(store_name: &str, query: &str, format: &str) -> Result<String
350355
/// ```
351356
#[pg_extern]
352357
fn ruvector_sparql_json(store_name: &str, query: &str) -> Result<JsonB, String> {
358+
// Validate input to prevent panics that would abort PostgreSQL
359+
if query.trim().is_empty() {
360+
return Err("SPARQL query cannot be empty".to_string());
361+
}
362+
353363
let result = ruvector_sparql(store_name, query, "json")?;
354364

355365
let json_value: JsonValue =

crates/ruvector-postgres/src/index/hnsw_am.rs

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,21 @@ unsafe fn read_vector(
505505

506506
let header = page as *const PageHeaderData;
507507
let data_ptr = (header as *const u8).add(size_of::<PageHeaderData>());
508+
509+
// Bounds check: prevent reading past page boundary. Fixes #164 segfault.
510+
let page_size = pg_sys::BLCKSZ as usize;
511+
let total_read_end = size_of::<PageHeaderData>()
512+
+ size_of::<HnswNodePageHeader>()
513+
+ dimensions * size_of::<f32>();
514+
if total_read_end > page_size {
515+
pgrx::warning!(
516+
"HNSW: Vector read would exceed page boundary ({} > {}), skipping block {}",
517+
total_read_end, page_size, block
518+
);
519+
pg_sys::UnlockReleaseBuffer(buffer);
520+
return None;
521+
}
522+
508523
let vector_ptr = data_ptr.add(size_of::<HnswNodePageHeader>()) as *const f32;
509524

510525
let mut vector = Vec::with_capacity(dimensions);
@@ -550,6 +565,23 @@ unsafe fn read_neighbors(
550565
offset += count * size_of::<HnswNeighbor>();
551566
}
552567

568+
// Bounds check: prevent reading past page boundary. Fixes #164 segfault.
569+
let page_size = pg_sys::BLCKSZ as usize;
570+
let header_size = size_of::<PageHeaderData>();
571+
let total_read_end = header_size
572+
+ size_of::<HnswNodePageHeader>()
573+
+ vector_size
574+
+ offset
575+
+ neighbor_count * size_of::<HnswNeighbor>();
576+
if total_read_end > page_size {
577+
pgrx::warning!(
578+
"HNSW: Neighbor read would exceed page boundary ({} > {}), skipping block {}",
579+
total_read_end, page_size, block
580+
);
581+
pg_sys::UnlockReleaseBuffer(buffer);
582+
return Vec::new();
583+
}
584+
553585
let neighbors_ptr = neighbors_base.add(offset) as *const HnswNeighbor;
554586
let mut neighbors = Vec::with_capacity(neighbor_count);
555587
for i in 0..neighbor_count {
@@ -712,16 +744,16 @@ unsafe fn hnsw_search(
712744
}
713745
}
714746

715-
// Convert to sorted result vector
747+
// Convert to sorted result vector.
748+
// Use into_sorted_vec() for deterministic ordering instead of into_iter()
749+
// which yields arbitrary order from BinaryHeap. Fixes #171.
716750
let mut result_vec: Vec<_> = results
751+
.into_sorted_vec()
717752
.into_iter()
718753
.take(k)
719754
.map(|r| (r.block, r.tid, r.distance))
720755
.collect();
721756

722-
result_vec.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(Ordering::Equal));
723-
result_vec.truncate(k);
724-
725757
result_vec
726758
}
727759

@@ -738,8 +770,32 @@ unsafe extern "C" fn hnsw_build(
738770
) -> *mut IndexBuildResult {
739771
pgrx::log!("HNSW v2: Starting index build");
740772

741-
// Get dimensions from first tuple or index definition
742-
let dimensions = 128; // TODO: Extract from index column definition
773+
// Extract dimensions from the indexed column's type modifier (atttypmod).
774+
// For ruvector(384), atttypmod == 384. Fixes #171 and #164.
775+
let dimensions = {
776+
let tupdesc = (*heap).rd_att;
777+
let natts = (*index_info).ii_NumIndexAttrs as isize;
778+
let mut dims: u32 = 0;
779+
if natts > 0 && !tupdesc.is_null() {
780+
let attnum = *(*index_info).ii_IndexAttrNumbers.offset(0);
781+
if attnum > 0 && (attnum as isize) <= (*tupdesc).natts as isize {
782+
let attr = (*tupdesc).attrs.as_ptr().offset((attnum - 1) as isize);
783+
let typmod = (*attr).atttypmod;
784+
if typmod > 0 {
785+
dims = typmod as u32;
786+
}
787+
}
788+
}
789+
if dims == 0 {
790+
pgrx::warning!(
791+
"HNSW: Could not determine vector dimensions from column type modifier, \
792+
defaulting to 384. Ensure column is defined as ruvector(N)."
793+
);
794+
dims = 384;
795+
}
796+
pgrx::log!("HNSW v2: Building index with {} dimensions", dims);
797+
dims as usize
798+
};
743799
let config = HnswConfig::default();
744800

745801
// Parse options from WITH clause
@@ -1399,6 +1455,14 @@ unsafe extern "C" fn hnsw_rescan(
13991455
state.search_done = false;
14001456
state.query_valid = false; // Reset validity flag
14011457

1458+
// Non-kNN scan (e.g., COUNT(*), WHERE embedding IS NOT NULL)
1459+
// When there are no ORDER BY operators, we cannot perform a vector search.
1460+
// Return early and let hnsw_gettuple return false, forcing PostgreSQL to
1461+
// fall back to a sequential scan. Fixes #152.
1462+
if norderbys <= 0 || orderbys.is_null() {
1463+
return;
1464+
}
1465+
14021466
// Extract query vector from ORDER BY
14031467
if norderbys > 0 && !orderbys.is_null() {
14041468
let orderby = &*orderbys;
@@ -1483,6 +1547,9 @@ unsafe extern "C" fn hnsw_rescan(
14831547
}
14841548

14851549
// Validate query vector - CRITICAL: Prevent crashes from invalid queries
1550+
// Note: if query_valid is false due to norderbys==0 (non-kNN scan),
1551+
// we already returned early above. This check only fires for kNN scans
1552+
// where vector extraction genuinely failed.
14861553
if !state.query_valid || state.query_vector.is_empty() {
14871554
// Instead of using zeros which crash, raise a proper error
14881555
pgrx::error!(
@@ -1577,6 +1644,13 @@ unsafe extern "C" fn hnsw_gettuple(scan: IndexScanDesc, direction: ScanDirection
15771644
let state = &mut *((*scan).opaque as *mut HnswScanState);
15781645
let index = (*scan).indexRelation;
15791646

1647+
// Non-kNN scan: no query vector was provided (e.g., COUNT(*), WHERE IS NOT NULL).
1648+
// Return false to tell PostgreSQL this index cannot satisfy this scan type,
1649+
// forcing fallback to sequential scan. Fixes #152.
1650+
if !state.query_valid && !state.search_done {
1651+
return false;
1652+
}
1653+
15801654
// Execute search on first call
15811655
if !state.search_done {
15821656
let (meta_page, meta_buffer) = get_meta_page(index);

0 commit comments

Comments
 (0)