Skip to content

Commit 33ce786

Browse files
committed
Add vector type support and fix stoolap 0.3.3 API compatibility
- Handle Value::Extension for both JSON and Vector data types - Return vectors as Float32Array in JS (zero-copy from packed LE f32) - Accept Float32Array as bind parameters for vector operations - Add V8 TAG_FLOAT32_ARRAY cell type for direct Float32Array creation - Add 19 vector tests: CRUD, distance functions, HNSW, transactions - Fix Value::Json → Value::Extension migration (stoolap 0.3.3 API change)
1 parent 560ff75 commit 33ce786

4 files changed

Lines changed: 250 additions & 15 deletions

File tree

__test__/index.spec.mjs

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,3 +1487,179 @@ describe('Edge cases', () => {
14871487
assert.ok(names.some(n => n === 'ws_test'));
14881488
});
14891489
});
1490+
1491+
// ============================================================
1492+
// Vector support
1493+
// ============================================================
1494+
1495+
describe('Vector support', () => {
1496+
let db;
1497+
1498+
before(async () => {
1499+
db = await Database.open(':memory:');
1500+
});
1501+
1502+
after(async () => {
1503+
await db.close();
1504+
});
1505+
1506+
it('should create table with VECTOR column', async () => {
1507+
await db.exec('CREATE TABLE embeddings (id INTEGER PRIMARY KEY, embedding VECTOR(3))');
1508+
const tables = await db.query('SHOW TABLES');
1509+
const names = tables.map(t => t.table_name || t.Tables);
1510+
assert.ok(names.some(n => n === 'embeddings'));
1511+
});
1512+
1513+
it('should insert vectors via SQL string literals', async () => {
1514+
const result = await db.execute(
1515+
"INSERT INTO embeddings (id, embedding) VALUES (1, '[0.1, 0.2, 0.3]')"
1516+
);
1517+
assert.equal(result.changes, 1);
1518+
});
1519+
1520+
it('should insert multiple vectors', async () => {
1521+
await db.execute("INSERT INTO embeddings (id, embedding) VALUES (2, '[0.4, 0.5, 0.6]')");
1522+
await db.execute("INSERT INTO embeddings (id, embedding) VALUES (3, '[0.7, 0.8, 0.9]')");
1523+
const rows = await db.query('SELECT id FROM embeddings ORDER BY id');
1524+
assert.equal(rows.length, 3);
1525+
});
1526+
1527+
it('should return vectors as Float32Array', async () => {
1528+
const row = await db.queryOne('SELECT embedding FROM embeddings WHERE id = 1');
1529+
assert.ok(row.embedding instanceof Float32Array, 'embedding should be Float32Array');
1530+
assert.equal(row.embedding.length, 3);
1531+
assert.ok(Math.abs(row.embedding[0] - 0.1) < 0.001);
1532+
assert.ok(Math.abs(row.embedding[1] - 0.2) < 0.001);
1533+
assert.ok(Math.abs(row.embedding[2] - 0.3) < 0.001);
1534+
});
1535+
1536+
it('should return vectors in query() results', async () => {
1537+
const rows = await db.query('SELECT * FROM embeddings ORDER BY id');
1538+
assert.equal(rows.length, 3);
1539+
for (const row of rows) {
1540+
assert.ok(row.embedding instanceof Float32Array);
1541+
assert.equal(row.embedding.length, 3);
1542+
}
1543+
});
1544+
1545+
it('should return vectors in querySync() results', () => {
1546+
const rows = db.querySync('SELECT * FROM embeddings ORDER BY id');
1547+
assert.equal(rows.length, 3);
1548+
assert.ok(rows[0].embedding instanceof Float32Array);
1549+
});
1550+
1551+
it('should return vectors in queryOneSync() results', () => {
1552+
const row = db.queryOneSync('SELECT embedding FROM embeddings WHERE id = 2');
1553+
assert.ok(row.embedding instanceof Float32Array);
1554+
assert.ok(Math.abs(row.embedding[0] - 0.4) < 0.001);
1555+
});
1556+
1557+
it('should return vectors in queryRaw() results', async () => {
1558+
const raw = await db.queryRaw('SELECT id, embedding FROM embeddings ORDER BY id');
1559+
assert.ok(raw.columns.includes('embedding'));
1560+
assert.equal(raw.rows.length, 3);
1561+
const embIdx = raw.columns.indexOf('embedding');
1562+
assert.ok(raw.rows[0][embIdx] instanceof Float32Array);
1563+
});
1564+
1565+
it('should return vectors in queryRawSync() results', () => {
1566+
const raw = db.queryRawSync('SELECT id, embedding FROM embeddings ORDER BY id');
1567+
const embIdx = raw.columns.indexOf('embedding');
1568+
assert.ok(raw.rows[0][embIdx] instanceof Float32Array);
1569+
});
1570+
1571+
it('should handle NULL vectors', async () => {
1572+
await db.execute("INSERT INTO embeddings (id, embedding) VALUES (4, NULL)");
1573+
const row = await db.queryOne('SELECT embedding FROM embeddings WHERE id = 4');
1574+
assert.equal(row.embedding, null);
1575+
});
1576+
1577+
it('should compute L2 distance', async () => {
1578+
const rows = await db.query(
1579+
"SELECT id, VEC_DISTANCE_L2(embedding, '[0.1, 0.2, 0.3]') AS dist FROM embeddings WHERE id <= 3 ORDER BY dist"
1580+
);
1581+
assert.equal(rows.length, 3);
1582+
// id=1 has the same vector, distance should be ~0
1583+
assert.equal(rows[0].id, 1);
1584+
assert.ok(rows[0].dist < 0.001);
1585+
});
1586+
1587+
it('should compute cosine distance', async () => {
1588+
const rows = await db.query(
1589+
"SELECT id, VEC_DISTANCE_COSINE(embedding, '[0.1, 0.2, 0.3]') AS dist FROM embeddings WHERE id <= 3 ORDER BY dist"
1590+
);
1591+
assert.equal(rows.length, 3);
1592+
assert.equal(rows[0].id, 1);
1593+
assert.ok(rows[0].dist < 0.001);
1594+
});
1595+
1596+
it('should support k-NN search with ORDER BY + LIMIT', async () => {
1597+
const rows = await db.query(
1598+
"SELECT id, VEC_DISTANCE_L2(embedding, '[0.4, 0.5, 0.6]') AS dist FROM embeddings WHERE id <= 3 ORDER BY dist LIMIT 2"
1599+
);
1600+
assert.equal(rows.length, 2);
1601+
// id=2 has exact match [0.4, 0.5, 0.6]
1602+
assert.equal(rows[0].id, 2);
1603+
});
1604+
1605+
it('should work with higher-dimensional vectors', async () => {
1606+
await db.exec('CREATE TABLE hd_vecs (id INTEGER PRIMARY KEY, vec VECTOR(128))');
1607+
const dims = 128;
1608+
const values = Array.from({ length: dims }, (_, i) => (i / dims).toFixed(6));
1609+
const vecStr = `[${values.join(', ')}]`;
1610+
await db.execute(`INSERT INTO hd_vecs (id, vec) VALUES (1, '${vecStr}')`);
1611+
const row = await db.queryOne('SELECT vec FROM hd_vecs WHERE id = 1');
1612+
assert.ok(row.vec instanceof Float32Array);
1613+
assert.equal(row.vec.length, dims);
1614+
});
1615+
1616+
it('should accept Float32Array as bind parameter', async () => {
1617+
await db.exec('CREATE TABLE vec_params (id INTEGER PRIMARY KEY, vec VECTOR(3))');
1618+
const vec = new Float32Array([1.0, 2.0, 3.0]);
1619+
await db.execute("INSERT INTO vec_params (id, vec) VALUES (1, '[1.0, 2.0, 3.0]')");
1620+
// Use Float32Array in distance computation param
1621+
const row = db.queryOneSync(
1622+
"SELECT VEC_DISTANCE_L2(vec, '[1.0, 2.0, 3.0]') AS dist FROM vec_params WHERE id = 1"
1623+
);
1624+
assert.ok(row.dist < 0.001);
1625+
});
1626+
1627+
it('should support vectors in transactions', async () => {
1628+
await db.exec('CREATE TABLE tx_vecs (id INTEGER PRIMARY KEY, vec VECTOR(3))');
1629+
const tx = await db.begin();
1630+
await tx.execute("INSERT INTO tx_vecs (id, vec) VALUES (1, '[1.0, 2.0, 3.0]')");
1631+
await tx.execute("INSERT INTO tx_vecs (id, vec) VALUES (2, '[4.0, 5.0, 6.0]')");
1632+
const rows = await tx.query('SELECT * FROM tx_vecs ORDER BY id');
1633+
assert.equal(rows.length, 2);
1634+
assert.ok(rows[0].vec instanceof Float32Array);
1635+
await tx.commit();
1636+
1637+
const afterCommit = await db.query('SELECT * FROM tx_vecs ORDER BY id');
1638+
assert.equal(afterCommit.length, 2);
1639+
});
1640+
1641+
it('should support VEC_DIMS utility function', async () => {
1642+
const row = await db.queryOne('SELECT VEC_DIMS(embedding) AS dims FROM embeddings WHERE id = 1');
1643+
assert.equal(row.dims, 3);
1644+
});
1645+
1646+
it('should reject wrong dimension count on insert', async () => {
1647+
await assert.rejects(
1648+
db.execute("INSERT INTO embeddings (id, embedding) VALUES (99, '[0.1, 0.2]')"),
1649+
/dimension|mismatch|expected/i
1650+
);
1651+
});
1652+
1653+
it('should support HNSW index creation', async () => {
1654+
await db.exec('CREATE TABLE hnsw_test (id INTEGER PRIMARY KEY, vec VECTOR(3))');
1655+
await db.execute("INSERT INTO hnsw_test (id, vec) VALUES (1, '[0.1, 0.2, 0.3]')");
1656+
await db.execute("INSERT INTO hnsw_test (id, vec) VALUES (2, '[0.4, 0.5, 0.6]')");
1657+
await db.execute("INSERT INTO hnsw_test (id, vec) VALUES (3, '[0.7, 0.8, 0.9]')");
1658+
await db.exec('CREATE INDEX idx_hnsw ON hnsw_test(vec) USING HNSW');
1659+
const rows = await db.query(
1660+
"SELECT id, VEC_DISTANCE_L2(vec, '[0.4, 0.5, 0.6]') AS dist FROM hnsw_test ORDER BY dist LIMIT 2"
1661+
);
1662+
assert.equal(rows.length, 2);
1663+
assert.equal(rows[0].id, 2);
1664+
});
1665+
});

src/tasks.rs

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const TAG_INT32: u8 = 3;
3434
const TAG_DOUBLE: u8 = 4;
3535
const TAG_STRING: u8 = 5;
3636
const TAG_INT64: u8 = 6;
37+
const TAG_FLOAT32_ARRAY: u8 = 7;
3738

3839
/// C-compatible cell data — must match C++ CellData layout exactly.
3940
/// Passed to V8 helper for direct value creation (bypasses NAPI).
@@ -241,14 +242,33 @@ fn value_to_cell(val: &Value, temp_strings: &mut Vec<String>) -> CellData {
241242
str_len: last.len() as i32,
242243
}
243244
}
244-
Value::Json(s) => {
245-
let s_ref: &str = s.as_ref();
246-
CellData {
247-
tag: TAG_STRING,
248-
int_val: 0,
249-
float_val: 0.0,
250-
str_ptr: s_ref.as_ptr(),
251-
str_len: s_ref.len() as i32,
245+
Value::Extension(data) => {
246+
if let Some(s_ref) = val.as_json() {
247+
CellData {
248+
tag: TAG_STRING,
249+
int_val: 0,
250+
float_val: 0.0,
251+
str_ptr: s_ref.as_ptr(),
252+
str_len: s_ref.len() as i32,
253+
}
254+
} else if data.first() == Some(&7) {
255+
// Vector: tag byte (7) + packed little-endian f32 payload
256+
let payload = &data[1..];
257+
CellData {
258+
tag: TAG_FLOAT32_ARRAY,
259+
int_val: 0,
260+
float_val: 0.0,
261+
str_ptr: payload.as_ptr(),
262+
str_len: payload.len() as i32,
263+
}
264+
} else {
265+
CellData {
266+
tag: TAG_NULL,
267+
int_val: 0,
268+
float_val: 0.0,
269+
str_ptr: ptr::null(),
270+
str_len: 0,
271+
}
252272
}
253273
}
254274
}

src/v8_helpers.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,14 @@
2727

2828
// Cell type tags — must match Rust #[repr(u8)] CellTag
2929
enum CellTag : uint8_t {
30-
TAG_NULL = 0,
31-
TAG_BOOL_FALSE = 1,
32-
TAG_BOOL_TRUE = 2,
33-
TAG_INT32 = 3,
34-
TAG_DOUBLE = 4,
35-
TAG_STRING = 5,
36-
TAG_INT64 = 6,
30+
TAG_NULL = 0,
31+
TAG_BOOL_FALSE = 1,
32+
TAG_BOOL_TRUE = 2,
33+
TAG_INT32 = 3,
34+
TAG_DOUBLE = 4,
35+
TAG_STRING = 5,
36+
TAG_INT64 = 6,
37+
TAG_FLOAT32_ARRAY = 7,
3738
};
3839

3940
// C-compatible cell data — must match Rust #[repr(C)] CellData layout
@@ -82,6 +83,13 @@ static inline v8::Local<v8::Value> cell_to_v8(v8::Isolate* isolate,
8283
// Large integers outside i32 range — still a JS Number (double).
8384
// Matches napi_create_int64 behavior (converts to double).
8485
return v8::Number::New(isolate, static_cast<double>(cell.int_val));
86+
case TAG_FLOAT32_ARRAY: {
87+
// Vector: str_ptr = packed LE f32 bytes, str_len = byte count
88+
int byte_len = cell.str_len;
89+
auto backing = v8::ArrayBuffer::New(isolate, byte_len);
90+
memcpy(backing->GetBackingStore()->Data(), cell.str_ptr, byte_len);
91+
return v8::Float32Array::New(backing, 0, byte_len / 4);
92+
}
8593
default:
8694
return v8::Null(isolate);
8795
}

src/value.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,37 @@ fn js_to_value_typed(env: sys::napi_env, val: sys::napi_value) -> napi::Result<V
9999
}
100100

101101
napi::ValueType::Object => {
102+
// Check TypedArray (Float32Array for vector params)
103+
let mut is_typedarray = false;
104+
check(unsafe { sys::napi_is_typedarray(env, val, &mut is_typedarray) })?;
105+
if is_typedarray {
106+
let mut typedarray_type = 0;
107+
let mut length = 0;
108+
let mut data = ptr::null_mut();
109+
let mut arraybuffer = ptr::null_mut();
110+
let mut offset = 0;
111+
check(unsafe {
112+
sys::napi_get_typedarray_info(
113+
env,
114+
val,
115+
&mut typedarray_type,
116+
&mut length,
117+
&mut data,
118+
&mut arraybuffer,
119+
&mut offset,
120+
)
121+
})?;
122+
// napi_float32_array = 4
123+
if typedarray_type == 4 {
124+
let slice =
125+
unsafe { std::slice::from_raw_parts(data as *const f32, length) };
126+
return Ok(Value::vector(slice.to_vec()));
127+
}
128+
return Err(napi::Error::from_reason(
129+
"Only Float32Array is supported for vector parameters",
130+
));
131+
}
132+
102133
// Check Date
103134
let mut is_date = false;
104135
check(unsafe { sys::napi_is_date(env, val, &mut is_date) })?;

0 commit comments

Comments
 (0)