Skip to content

Commit b9a7032

Browse files
committed
bug fixes and performance improvements
1 parent 721e9d3 commit b9a7032

13 files changed

Lines changed: 660 additions & 41 deletions

File tree

.github/workflows/ci.yml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
name: ci
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
test:
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
- uses: actions/checkout@v4
13+
14+
- name: Install build dependencies
15+
run: |
16+
sudo apt-get update
17+
sudo apt-get install -y \
18+
meson \
19+
ninja-build \
20+
pkg-config \
21+
libsqlite3-dev \
22+
libbenchmark-dev \
23+
python3-pip \
24+
gcovr
25+
26+
- name: Configure (coverage)
27+
run: meson setup build_coverage -Dbuildtype=debugoptimized -Db_coverage=true
28+
29+
- name: Build
30+
run: meson compile -C build_coverage
31+
32+
- name: Test
33+
run: meson test -C build_coverage
34+
35+
- name: Coverage (summary + artifacts)
36+
run: |
37+
gcovr -r . \
38+
--object-directory build_coverage \
39+
--exclude '.*build.*' \
40+
--exclude '.*third_party.*' \
41+
--exclude '.*benchmarks.*' \
42+
--gcov-ignore-parse-errors=suspicious_hits.warn \
43+
--merge-mode-functions=merge-use-line-0 \
44+
--print-summary \
45+
--html-details coverage/index.html \
46+
--cobertura coverage/coverage.xml
47+
48+
- name: Upload coverage artifacts
49+
uses: actions/upload-artifact@v4
50+
with:
51+
name: coverage
52+
path: coverage/
53+
54+
tsan:
55+
runs-on: ubuntu-latest
56+
57+
steps:
58+
- uses: actions/checkout@v4
59+
60+
- name: Install build dependencies
61+
run: |
62+
sudo apt-get update
63+
sudo apt-get install -y \
64+
clang \
65+
meson \
66+
ninja-build \
67+
pkg-config \
68+
libsqlite3-dev
69+
70+
- name: Configure (TSAN)
71+
run: |
72+
CC=clang CXX=clang++ meson setup build_tsan \
73+
-Dbuildtype=debugoptimized \
74+
-Db_sanitize=thread
75+
76+
- name: Build
77+
run: meson compile -C build_tsan
78+
79+
- name: Test
80+
run: meson test -C build_tsan
81+
82+
benchmarks:
83+
runs-on: ubuntu-latest
84+
85+
steps:
86+
- uses: actions/checkout@v4
87+
88+
- name: Install build dependencies
89+
run: |
90+
sudo apt-get update
91+
sudo apt-get install -y \
92+
meson \
93+
ninja-build \
94+
pkg-config \
95+
libsqlite3-dev \
96+
libbenchmark-dev
97+
98+
- name: Configure (release + benchmarks)
99+
run: meson setup build_bench -Dbuildtype=release -Denable_benchmarks=true
100+
101+
- name: Build
102+
run: meson compile -C build_bench
103+
104+
- name: Run benchmarks
105+
run: |
106+
mkdir -p bench_results
107+
./build_bench/benchmarks/rag_pipeline_benchmark --benchmark_min_time=0.2s --benchmark_out=bench_results/rag.json --benchmark_out_format=json
108+
./build_bench/benchmarks/batch_distance_benchmark --benchmark_min_time=0.2s --benchmark_out=bench_results/batch.json --benchmark_out_format=json
109+
110+
- name: Upload benchmark artifacts
111+
uses: actions/upload-artifact@v4
112+
with:
113+
name: bench_results
114+
path: bench_results/*.json

include/sqlite-vec-cpp/distances/batch.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <algorithm>
44
#include <execution>
5+
#include <numeric>
56
#include <span>
67
#include <vector>
78
#include "../concepts/distance_metric.hpp"

include/sqlite-vec-cpp/index/hnsw.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,10 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
238238
return;
239239

240240
size_t actual_threads = num_threads ? num_threads : std::thread::hardware_concurrency();
241+
if (actual_threads == 0) {
242+
actual_threads = 1;
243+
}
244+
241245
ThreadPool pool(actual_threads);
242246

243247
pool.parallel_for(ids.size(),
@@ -260,6 +264,10 @@ template <concepts::VectorElement StorageT, typename MetricT> class HNSWIndex {
260264
return;
261265

262266
size_t actual_threads = num_threads ? num_threads : std::thread::hardware_concurrency();
267+
if (actual_threads == 0) {
268+
actual_threads = 1;
269+
}
270+
263271
ThreadPool pool(actual_threads);
264272

265273
constexpr size_t DEFAULT_BATCH_SIZE = 256;

include/sqlite-vec-cpp/sqlite/context.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,11 @@ class Context {
7070

7171
/// Set blob result with custom destructor
7272
template <typename Deleter>
73-
void result_blob(std::span<const std::uint8_t> blob, Deleter&& deleter) const noexcept {
73+
void result_blob(std::span<const std::uint8_t> blob,
74+
[[maybe_unused]] Deleter&& deleter) const noexcept {
7475
// Note: For custom deleters, would need to wrap in a C callback
7576
// For now, just use SQLITE_TRANSIENT
77+
(void)deleter;
7678
result_blob(blob);
7779
}
7880

include/sqlite-vec-cpp/sqlite/registration.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,32 @@ inline Result<void> register_all_functions(sqlite3* db) {
9898
return err<void>(Error::sqlite_error("Failed to register vec_bit", rc));
9999
}
100100

101+
// Register vec_f32_simple function (for vec0 compatibility)
102+
// This version doesn't set subtype so virtual table xUpdate can access blob
103+
rc = sqlite3_create_function_v2(db, "vec_f32_simple", 1, SQLITE_UTF8 | SQLITE_DETERMINISTIC,
104+
nullptr, vec_f32_simple, nullptr, nullptr, nullptr);
105+
if (rc != SQLITE_OK) {
106+
return err<void>(Error::sqlite_error("Failed to register vec_f32_simple", rc));
107+
}
108+
109+
// Register vec_int8 function (int8 vector creation from JSON)
110+
rc = sqlite3_create_function_v2(db, "vec_int8", 1,
111+
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_SUBTYPE |
112+
SQLITE_RESULT_SUBTYPE,
113+
nullptr, vec_int8, nullptr, nullptr, nullptr);
114+
if (rc != SQLITE_OK) {
115+
return err<void>(Error::sqlite_error("Failed to register vec_int8", rc));
116+
}
117+
118+
// Register vec_bit function (bit vector creation from blob)
119+
rc = sqlite3_create_function_v2(db, "vec_bit", 1,
120+
SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_SUBTYPE |
121+
SQLITE_RESULT_SUBTYPE,
122+
nullptr, vec_bit, nullptr, nullptr, nullptr);
123+
if (rc != SQLITE_OK) {
124+
return err<void>(Error::sqlite_error("Failed to register vec_bit", rc));
125+
}
126+
101127
// Register enhanced functions (C++20/23 specific features)
102128
rc = sqlite3_create_function_v2(db, "vec_dot", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, nullptr,
103129
vec_dot, nullptr, nullptr, nullptr);

include/sqlite-vec-cpp/sqlite/utility_functions.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,9 @@ inline void vec_f32_impl(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
6363
std::size_t size = vec.size() * sizeof(float);
6464
unsigned int subtype = static_cast<unsigned int>(VectorElementType::Float32);
6565

66-
fprintf(stderr, "[vec_f32_impl] DEBUG: vec.size()=%zu, data=%p, size=%zu, subtype=%u\n",
67-
vec.size(), data, size, subtype);
68-
6966
// Use the wrapper method that correctly sets blob + subtype
7067
context.result_blob_with_subtype(
7168
std::span<const std::uint8_t>(reinterpret_cast<const std::uint8_t*>(data), size), subtype);
72-
73-
fprintf(stderr, "[vec_f32_impl] DEBUG: result_blob_with_subtype called successfully\n");
7469
}
7570

7671
/// SQLite function: vec_int8(json_or_blob) -> int8 vector blob

include/sqlite-vec-cpp/sqlite/vec0_module.hpp

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#pragma once
22

33
#include <sqlite3.h>
4+
#include <cstdint>
45
#include <cstring>
56
#include <memory>
67
#include <sstream>
78
#include <string>
89
#include <vector>
910
#include "../utils/error.hpp"
11+
#include "parsers.hpp"
12+
#include "value.hpp"
1013

1114
namespace sqlite_vec_cpp::sqlite {
1215

@@ -84,7 +87,11 @@ inline bool parse_vec0_schema(int argc, const char* const* argv, std::string& em
8487

8588
// Helper: Create shadow tables for vec0 storage
8689
inline int create_shadow_tables(sqlite3* db, const char* schema, const char* table,
87-
const char* embedding_col, size_t dims, char** pzErr) {
90+
const char* embedding_col, [[maybe_unused]] size_t dims,
91+
char** pzErr) {
92+
// Note: dims parameter is reserved for future use (validation, typed vectors)
93+
(void)dims;
94+
8895
// Shadow table for metadata
8996
std::ostringstream meta_sql;
9097
meta_sql << "CREATE TABLE IF NOT EXISTS \"" << schema << "\".\"" << table << "_metadata\" ("
@@ -124,6 +131,9 @@ inline int vec0Create(sqlite3* db, void* pAux, int argc, const char* const* argv
124131
sqlite3_vtab** ppVTab, char** pzErr) {
125132
(void)pAux;
126133

134+
// Enable shadow table writes through xUpdate
135+
sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
136+
127137
if (argc < 3) {
128138
*pzErr = sqlite3_mprintf("vec0: insufficient arguments");
129139
return SQLITE_ERROR;
@@ -342,12 +352,33 @@ inline int vec0Update(sqlite3_vtab* pVTab, int argc, sqlite3_value** argv, sqlit
342352
bool is_insert = (sqlite3_value_type(argv[0]) == SQLITE_NULL);
343353
int64_t rowid = is_insert ? 0 : sqlite3_value_int64(argv[0]);
344354

345-
// argv[2] = embedding column value
346-
const void* blob = sqlite3_value_blob(argv[2]);
347-
int bytes = sqlite3_value_bytes(argv[2]);
355+
// argv layout per SQLite vtab spec:
356+
// - argv[0]: old rowid (or NULL)
357+
// - argv[1]: new rowid (or NULL)
358+
// - argv[2..]: column values in declared order
359+
if (argc < 4) {
360+
return SQLITE_MISUSE;
361+
}
362+
363+
// Our declared schema is: CREATE TABLE x(rowid INTEGER PRIMARY KEY, "embedding")
364+
// So the embedding column value is argv[3] (argv[2] corresponds to rowid column value).
365+
sqlite3_value* embedding_val = argv[3];
366+
const void* blob = sqlite3_value_blob(embedding_val);
367+
int bytes = sqlite3_value_bytes(embedding_val);
368+
369+
// If xUpdate doesn't materialize function-returned blobs, parse JSON/text and bind as blob
370+
std::vector<float> parsed_vec;
371+
if (sqlite3_value_type(embedding_val) == SQLITE_TEXT) {
372+
Value value(embedding_val);
373+
auto parsed = parse_vector_from_value<float>(value);
374+
if (parsed) {
375+
parsed_vec = std::move(parsed.value());
376+
blob = parsed_vec.data();
377+
bytes = static_cast<int>(parsed_vec.size() * sizeof(float));
378+
}
379+
}
348380

349381
if (is_insert) {
350-
// INSERT
351382
std::ostringstream sql;
352383
sql << "INSERT INTO \"" << table->schema_name << "\".\"" << table->table_name
353384
<< "_vectors\" (\"" << table->embedding_column << "\") VALUES (?)";
@@ -357,9 +388,13 @@ inline int vec0Update(sqlite3_vtab* pVTab, int argc, sqlite3_value** argv, sqlit
357388
if (rc != SQLITE_OK)
358389
return rc;
359390

360-
sqlite3_bind_blob(stmt, 1, blob, bytes, SQLITE_TRANSIENT);
361-
rc = sqlite3_step(stmt);
391+
if (blob && bytes > 0) {
392+
sqlite3_bind_blob(stmt, 1, blob, bytes, SQLITE_TRANSIENT);
393+
} else {
394+
sqlite3_bind_null(stmt, 1);
395+
}
362396

397+
rc = sqlite3_step(stmt);
363398
if (rc == SQLITE_DONE) {
364399
*pRowid = sqlite3_last_insert_rowid(table->db);
365400
rc = SQLITE_OK;
@@ -368,7 +403,6 @@ inline int vec0Update(sqlite3_vtab* pVTab, int argc, sqlite3_value** argv, sqlit
368403
sqlite3_finalize(stmt);
369404
return rc;
370405
} else {
371-
// UPDATE
372406
std::ostringstream sql;
373407
sql << "UPDATE \"" << table->schema_name << "\".\"" << table->table_name
374408
<< "_vectors\" SET \"" << table->embedding_column << "\"=? WHERE rowid=" << rowid;
@@ -378,9 +412,13 @@ inline int vec0Update(sqlite3_vtab* pVTab, int argc, sqlite3_value** argv, sqlit
378412
if (rc != SQLITE_OK)
379413
return rc;
380414

381-
sqlite3_bind_blob(stmt, 1, blob, bytes, SQLITE_TRANSIENT);
382-
rc = sqlite3_step(stmt);
415+
if (blob && bytes > 0) {
416+
sqlite3_bind_blob(stmt, 1, blob, bytes, SQLITE_TRANSIENT);
417+
} else {
418+
sqlite3_bind_null(stmt, 1);
419+
}
383420

421+
rc = sqlite3_step(stmt);
384422
if (rc == SQLITE_DONE) {
385423
*pRowid = rowid;
386424
rc = SQLITE_OK;
@@ -419,7 +457,8 @@ static sqlite3_module vec0_module = {
419457
/* xSavepoint */ nullptr,
420458
/* xRelease */ nullptr,
421459
/* xRollbackTo */ nullptr,
422-
/* xShadowName */ nullptr};
460+
/* xShadowName */ nullptr,
461+
/* xIntegrity */ nullptr};
423462

424463
inline Result<void> register_vec0_module(sqlite3* db) {
425464
if (!db) {

0 commit comments

Comments
 (0)