@@ -505,6 +505,21 @@ unsafe fn read_vector(
505505
506506 let header = page as * const PageHeaderData ;
507507 let data_ptr = ( header as * const u8 ) . add ( size_of :: < PageHeaderData > ( ) ) ;
508+
509+ // Bounds check: prevent reading past page boundary. Fixes #164 segfault.
510+ let page_size = pg_sys:: BLCKSZ as usize ;
511+ let total_read_end = size_of :: < PageHeaderData > ( )
512+ + size_of :: < HnswNodePageHeader > ( )
513+ + dimensions * size_of :: < f32 > ( ) ;
514+ if total_read_end > page_size {
515+ pgrx:: warning!(
516+ "HNSW: Vector read would exceed page boundary ({} > {}), skipping block {}" ,
517+ total_read_end, page_size, block
518+ ) ;
519+ pg_sys:: UnlockReleaseBuffer ( buffer) ;
520+ return None ;
521+ }
522+
508523 let vector_ptr = data_ptr. add ( size_of :: < HnswNodePageHeader > ( ) ) as * const f32 ;
509524
510525 let mut vector = Vec :: with_capacity ( dimensions) ;
@@ -550,6 +565,23 @@ unsafe fn read_neighbors(
550565 offset += count * size_of :: < HnswNeighbor > ( ) ;
551566 }
552567
568+ // Bounds check: prevent reading past page boundary. Fixes #164 segfault.
569+ let page_size = pg_sys:: BLCKSZ as usize ;
570+ let header_size = size_of :: < PageHeaderData > ( ) ;
571+ let total_read_end = header_size
572+ + size_of :: < HnswNodePageHeader > ( )
573+ + vector_size
574+ + offset
575+ + neighbor_count * size_of :: < HnswNeighbor > ( ) ;
576+ if total_read_end > page_size {
577+ pgrx:: warning!(
578+ "HNSW: Neighbor read would exceed page boundary ({} > {}), skipping block {}" ,
579+ total_read_end, page_size, block
580+ ) ;
581+ pg_sys:: UnlockReleaseBuffer ( buffer) ;
582+ return Vec :: new ( ) ;
583+ }
584+
553585 let neighbors_ptr = neighbors_base. add ( offset) as * const HnswNeighbor ;
554586 let mut neighbors = Vec :: with_capacity ( neighbor_count) ;
555587 for i in 0 ..neighbor_count {
@@ -712,16 +744,16 @@ unsafe fn hnsw_search(
712744 }
713745 }
714746
715- // Convert to sorted result vector
747+ // Convert to sorted result vector.
748+ // Use into_sorted_vec() for deterministic ordering instead of into_iter()
749+ // which yields arbitrary order from BinaryHeap. Fixes #171.
716750 let mut result_vec: Vec < _ > = results
751+ . into_sorted_vec ( )
717752 . into_iter ( )
718753 . take ( k)
719754 . map ( |r| ( r. block , r. tid , r. distance ) )
720755 . collect ( ) ;
721756
722- result_vec. sort_by ( |a, b| a. 2 . partial_cmp ( & b. 2 ) . unwrap_or ( Ordering :: Equal ) ) ;
723- result_vec. truncate ( k) ;
724-
725757 result_vec
726758}
727759
@@ -738,8 +770,32 @@ unsafe extern "C" fn hnsw_build(
738770) -> * mut IndexBuildResult {
739771 pgrx:: log!( "HNSW v2: Starting index build" ) ;
740772
741- // Get dimensions from first tuple or index definition
742- let dimensions = 128 ; // TODO: Extract from index column definition
773+ // Extract dimensions from the indexed column's type modifier (atttypmod).
774+ // For ruvector(384), atttypmod == 384. Fixes #171 and #164.
775+ let dimensions = {
776+ let tupdesc = ( * heap) . rd_att ;
777+ let natts = ( * index_info) . ii_NumIndexAttrs as isize ;
778+ let mut dims: u32 = 0 ;
779+ if natts > 0 && !tupdesc. is_null ( ) {
780+ let attnum = * ( * index_info) . ii_IndexAttrNumbers . offset ( 0 ) ;
781+ if attnum > 0 && ( attnum as isize ) <= ( * tupdesc) . natts as isize {
782+ let attr = ( * tupdesc) . attrs . as_ptr ( ) . offset ( ( attnum - 1 ) as isize ) ;
783+ let typmod = ( * attr) . atttypmod ;
784+ if typmod > 0 {
785+ dims = typmod as u32 ;
786+ }
787+ }
788+ }
789+ if dims == 0 {
790+ pgrx:: warning!(
791+ "HNSW: Could not determine vector dimensions from column type modifier, \
792+ defaulting to 384. Ensure column is defined as ruvector(N)."
793+ ) ;
794+ dims = 384 ;
795+ }
796+ pgrx:: log!( "HNSW v2: Building index with {} dimensions" , dims) ;
797+ dims as usize
798+ } ;
743799 let config = HnswConfig :: default ( ) ;
744800
745801 // Parse options from WITH clause
@@ -1399,6 +1455,14 @@ unsafe extern "C" fn hnsw_rescan(
13991455 state. search_done = false ;
14001456 state. query_valid = false ; // Reset validity flag
14011457
1458+ // Non-kNN scan (e.g., COUNT(*), WHERE embedding IS NOT NULL)
1459+ // When there are no ORDER BY operators, we cannot perform a vector search.
1460+ // Return early and let hnsw_gettuple return false, forcing PostgreSQL to
1461+ // fall back to a sequential scan. Fixes #152.
1462+ if norderbys <= 0 || orderbys. is_null ( ) {
1463+ return ;
1464+ }
1465+
14021466 // Extract query vector from ORDER BY
14031467 if norderbys > 0 && !orderbys. is_null ( ) {
14041468 let orderby = & * orderbys;
@@ -1483,6 +1547,9 @@ unsafe extern "C" fn hnsw_rescan(
14831547 }
14841548
14851549 // Validate query vector - CRITICAL: Prevent crashes from invalid queries
1550+ // Note: if query_valid is false due to norderbys==0 (non-kNN scan),
1551+ // we already returned early above. This check only fires for kNN scans
1552+ // where vector extraction genuinely failed.
14861553 if !state. query_valid || state. query_vector . is_empty ( ) {
14871554 // Instead of using zeros which crash, raise a proper error
14881555 pgrx:: error!(
@@ -1577,6 +1644,13 @@ unsafe extern "C" fn hnsw_gettuple(scan: IndexScanDesc, direction: ScanDirection
15771644 let state = & mut * ( ( * scan) . opaque as * mut HnswScanState ) ;
15781645 let index = ( * scan) . indexRelation ;
15791646
1647+ // Non-kNN scan: no query vector was provided (e.g., COUNT(*), WHERE IS NOT NULL).
1648+ // Return false to tell PostgreSQL this index cannot satisfy this scan type,
1649+ // forcing fallback to sequential scan. Fixes #152.
1650+ if !state. query_valid && !state. search_done {
1651+ return false ;
1652+ }
1653+
15801654 // Execute search on first call
15811655 if !state. search_done {
15821656 let ( meta_page, meta_buffer) = get_meta_page ( index) ;
0 commit comments