Skip to content

Commit 399e7bf

Browse files
committed
fix: use SET instead of CONTENT in UPSERT to avoid overwriting record IDs
Critical fix for SurrealDB 2.x: Changed UPSERT queries to use SET clause instead of CONTENT to prevent attempting to overwrite built-in record ID fields with string values. Root cause: CONTENT $doc includes the id field from the Rust struct (which is a String), and tries to overwrite the record's id field (which is a built-in record type in SurrealDB). This causes errors or data corruption. Fixed queries: 1. UPSERT_NODES_QUERY: Changed to SET with explicit field list (excludes id) 2. UPSERT_EDGES_QUERY: Changed to SET (excludes id, converts from/to to records) 3. UPSERT_SYMBOL_EMBEDDINGS_QUERY: Changed to SET with explicit field list Pattern: Before: UPSERT type::thing('nodes', $doc.id) CONTENT $doc; After: UPSERT type::thing('nodes', $doc.id) SET field1 = $doc.field1, ...; Benefits: - Record ID stays as proper record type - No attempt to overwrite id with string - Explicit field mapping shows what's being stored - Compatible with SurrealDB 2.x record ID system This should eliminate the "Failed to read SurrealDB node count" warnings during indexing.
1 parent 0a79d3e commit 399e7bf

File tree

1 file changed

+43
-11
lines changed

1 file changed

+43
-11
lines changed

crates/codegraph-graph/src/surrealdb_storage.rs

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,29 +1367,61 @@ pub struct NodeEmbeddingRecord {
13671367
const UPSERT_NODES_QUERY: &str = r#"
13681368
LET $batch = $data;
13691369
FOR $doc IN $batch {
1370-
UPSERT type::thing('nodes', $doc.id) CONTENT $doc;
1370+
UPSERT type::thing('nodes', $doc.id) SET
1371+
name = $doc.name,
1372+
node_type = $doc.node_type,
1373+
language = $doc.language,
1374+
content = $doc.content,
1375+
file_path = $doc.file_path,
1376+
start_line = $doc.start_line,
1377+
end_line = $doc.end_line,
1378+
embedding_384 = $doc.embedding_384,
1379+
embedding_768 = $doc.embedding_768,
1380+
embedding_1024 = $doc.embedding_1024,
1381+
embedding_2048 = $doc.embedding_2048,
1382+
embedding_4096 = $doc.embedding_4096,
1383+
embedding_model = $doc.embedding_model,
1384+
complexity = $doc.complexity,
1385+
metadata = $doc.metadata,
1386+
project_id = $doc.project_id,
1387+
organization_id = $doc.organization_id,
1388+
repository_url = $doc.repository_url,
1389+
domain = $doc.domain,
1390+
updated_at = time::now();
13711391
}
13721392
"#;
13731393

13741394
const UPSERT_EDGES_QUERY: &str = r#"
13751395
LET $batch = $data;
13761396
FOR $doc IN $batch {
1377-
UPSERT type::thing('edges', $doc.id) CONTENT {
1378-
id: $doc.id,
1379-
from: type::thing('nodes', $doc.from),
1380-
to: type::thing('nodes', $doc.to),
1381-
edge_type: $doc.edge_type,
1382-
weight: $doc.weight,
1383-
metadata: $doc.metadata,
1384-
created_at: time::now()
1385-
};
1397+
UPSERT type::thing('edges', $doc.id) SET
1398+
from = type::thing('nodes', $doc.from),
1399+
to = type::thing('nodes', $doc.to),
1400+
edge_type = $doc.edge_type,
1401+
weight = $doc.weight,
1402+
metadata = $doc.metadata,
1403+
created_at = time::now();
13861404
}
13871405
"#;
13881406

13891407
const UPSERT_SYMBOL_EMBEDDINGS_QUERY: &str = r#"
13901408
LET $batch = $data;
13911409
FOR $doc IN $batch {
1392-
UPSERT type::thing('symbol_embeddings', $doc.id) CONTENT $doc;
1410+
UPSERT type::thing('symbol_embeddings', $doc.id) SET
1411+
symbol = $doc.symbol,
1412+
normalized_symbol = $doc.normalized_symbol,
1413+
project_id = $doc.project_id,
1414+
organization_id = $doc.organization_id,
1415+
embedding_384 = $doc.embedding_384,
1416+
embedding_768 = $doc.embedding_768,
1417+
embedding_1024 = $doc.embedding_1024,
1418+
embedding_2048 = $doc.embedding_2048,
1419+
embedding_4096 = $doc.embedding_4096,
1420+
embedding_model = $doc.embedding_model,
1421+
node_id = $doc.node_id,
1422+
source_edge_id = $doc.source_edge_id,
1423+
metadata = $doc.metadata,
1424+
access_count = $doc.access_count;
13931425
}
13941426
"#;
13951427

0 commit comments

Comments
 (0)