Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
484 changes: 484 additions & 0 deletions internal/graph/store_sqlite/meta_json.go

Large diffs are not rendered by default.

175 changes: 175 additions & 0 deletions internal/graph/store_sqlite/meta_json_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package store_sqlite

import (
"bytes"
"encoding/gob"
"reflect"
"testing"

"github.com/zzet/gortex/internal/contracts"
)

// roundTrip encodes Meta to JSON and decodes it back, the persist->reload
// path every reader sees after a daemon restart / store hydration.
func roundTrip(t *testing.T, in map[string]any) map[string]any {
t.Helper()
b, err := encodeMeta(in)
if err != nil {
t.Fatalf("encodeMeta: %v", err)
}
if !isJSONObject(b) {
t.Fatalf("encodeMeta did not produce a JSON object: %q", b)
}
out, err := decodeMeta(b)
if err != nil {
t.Fatalf("decodeMeta: %v", err)
}
return out
}

// TestMetaRoundTripExactTypes is the fidelity canary: every key the audit
// found read with a raw type-assertion must survive a JSON round-trip with
// its exact Go type, or the corresponding reader silently breaks.
func TestMetaRoundTripExactTypes(t *testing.T) {
shape := &contracts.Shape{
Kind: "struct",
Fields: []contracts.ShapeField{{Name: "id", Type: "int64", Required: true}},
Notes: []string{"partial"},
}
node := map[string]any{
"signature": "func F(x int) error",
"visibility": "public",
"doc": "F does a thing.",
"external": true,
"complexity": 7,
"loop_depth": 2,
"parse_errors": 0,
"position": 3,
"line": 42,
"confidence": 1.0, // integral float — must stay float64
"coverage_pct": 83.5,
"shape": shape,
"response_envelope": []map[string]any{{"name": "data", "type": "User"}},
"path_param_names": []string{"id", "org"},
"query_params": []string{"limit"},
"status_codes": []string{"200", "404"},
"churn": map[string]any{"commit_count": 12, "age_days": 365, "churn_rate": 2.0, "last_author": "a@b.c"},
"coverage": map[string]any{"num_stmt": 40, "hit": 33},
"last_authored": map[string]any{"timestamp": int64(1700000000), "email": "x@y.z"},
"some_plugin_flag": "go_linkname", // Extra tail (string)
"is_generated": false, // Extra tail (bool)
}
got := roundTrip(t, node)

assertType[int](t, got, "complexity", 7)
assertType[int](t, got, "loop_depth", 2)
assertType[int](t, got, "parse_errors", 0)
assertType[int](t, got, "position", 3)
assertType[int](t, got, "line", 42)
assertType[float64](t, got, "confidence", 1.0)
assertType[float64](t, got, "coverage_pct", 83.5)
assertType[string](t, got, "signature", "func F(x int) error")
assertType[string](t, got, "visibility", "public")
assertType[bool](t, got, "external", true)
assertType[string](t, got, "some_plugin_flag", "go_linkname")
assertType[bool](t, got, "is_generated", false)

// Shape must rebuild as *contracts.Shape, not map[string]any.
gotShape, ok := got["shape"].(*contracts.Shape)
if !ok {
t.Fatalf("shape: want *contracts.Shape, got %T", got["shape"])
}
if !reflect.DeepEqual(gotShape, shape) {
t.Errorf("shape mismatch: %+v vs %+v", gotShape, shape)
}

// response_envelope must be []map[string]any, not []any.
if _, ok := got["response_envelope"].([]map[string]any); !ok {
t.Errorf("response_envelope: want []map[string]any, got %T", got["response_envelope"])
}
// []string keys.
for _, k := range []string{"path_param_names", "query_params", "status_codes"} {
if _, ok := got[k].([]string); !ok {
t.Errorf("%s: want []string, got %T", k, got[k])
}
}

// Nested map children keep exact types.
churn := got["churn"].(map[string]any)
assertType[int](t, churn, "commit_count", 12)
assertType[int](t, churn, "age_days", 365)
assertType[float64](t, churn, "churn_rate", 2.0) // integral float, nested
assertType[string](t, churn, "last_author", "a@b.c")
cov := got["coverage"].(map[string]any)
assertType[int](t, cov, "num_stmt", 40)
assertType[int](t, cov, "hit", 33)
la := got["last_authored"].(map[string]any)
assertType[int64](t, la, "timestamp", int64(1700000000))
}

func TestEdgeMetaRoundTripExactTypes(t *testing.T) {
edge := map[string]any{
"candidate_count": 2,
"similarity": 0.875,
"score": 1.0, // integral float — must stay float64
"count": 5,
"clone_tokens": 128,
"synthesized_by": "grpc", // Extra tail
}
got := roundTrip(t, edge)
assertType[int](t, got, "candidate_count", 2)
assertType[float64](t, got, "similarity", 0.875)
assertType[float64](t, got, "score", 1.0)
assertType[int](t, got, "count", 5)
assertType[int](t, got, "clone_tokens", 128)
assertType[string](t, got, "synthesized_by", "grpc")
}

// TestDecodeLegacyGob proves existing on-disk gob blobs still decode.
func TestDecodeLegacyGob(t *testing.T) {
orig := map[string]any{"visibility": "private", "complexity": 9, "confidence": 1.0}
var buf bytes.Buffer
if err := gob.NewEncoder(&buf).Encode(orig); err != nil {
t.Fatalf("gob encode: %v", err)
}
got, err := decodeMeta(buf.Bytes())
if err != nil {
t.Fatalf("decodeMeta(gob): %v", err)
}
// gob preserves exact types natively.
assertType[string](t, got, "visibility", "private")
assertType[int](t, got, "complexity", 9)
assertType[float64](t, got, "confidence", 1.0)
}

func TestEncodeMetaEmpty(t *testing.T) {
b, err := encodeMeta(nil)
if err != nil || b != nil {
t.Fatalf("encodeMeta(nil) = %q, %v; want nil, nil", b, err)
}
b, err = encodeMeta(map[string]any{})
if err != nil || b != nil {
t.Fatalf("encodeMeta(empty) = %q, %v; want nil, nil", b, err)
}
m, err := decodeMeta(nil)
if err != nil || m != nil {
t.Fatalf("decodeMeta(nil) = %v, %v; want nil, nil", m, err)
}
}

func assertType[T comparable](t *testing.T, m map[string]any, key string, want T) {
t.Helper()
v, ok := m[key]
if !ok {
t.Errorf("%s: missing from decoded map", key)
return
}
got, ok := v.(T)
if !ok {
t.Errorf("%s: want type %T, got %T (value %v)", key, want, v, v)
return
}
if got != want {
t.Errorf("%s: want %v, got %v", key, want, got)
}
}
125 changes: 125 additions & 0 deletions internal/graph/store_sqlite/meta_promoted_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package store_sqlite

import (
"database/sql"
"path/filepath"
"strings"
"testing"

"github.com/zzet/gortex/internal/graph"
)

// TestPromotedColumns_RoundTrip verifies the promoted keys land in their
// columns, are stripped from the JSON blob, and restore into Meta with
// exact types — while non-promoted keys stay in the blob.
func TestPromotedColumns_RoundTrip(t *testing.T) {
s, err := Open(filepath.Join(t.TempDir(), "p.sqlite"))
if err != nil {
t.Fatal(err)
}
defer func() { _ = s.Close() }()

s.AddNode(&graph.Node{
ID: "f.go::F", Kind: graph.KindFunction, Name: "F", FilePath: "f.go",
Meta: map[string]any{
"signature": "func F()",
"visibility": "public",
"doc": "F docs",
"external": true,
"complexity": 5, // non-promoted — must stay in the blob
},
})

n := s.GetNode("f.go::F")
if n == nil {
t.Fatal("GetNode returned nil")
}
assertType[string](t, n.Meta, "signature", "func F()")
assertType[string](t, n.Meta, "visibility", "public")
assertType[string](t, n.Meta, "doc", "F docs")
assertType[bool](t, n.Meta, "external", true)
assertType[int](t, n.Meta, "complexity", 5)

var sig, vis, doc sql.NullString
var ext sql.NullBool
var blob []byte
row := s.db.QueryRow(`SELECT signature, visibility, doc, external, meta FROM nodes WHERE id=?`, "f.go::F")
if err := row.Scan(&sig, &vis, &doc, &ext, &blob); err != nil {
t.Fatal(err)
}
if !sig.Valid || sig.String != "func F()" {
t.Errorf("signature column = %+v", sig)
}
if !ext.Valid || !ext.Bool {
t.Errorf("external column = %+v", ext)
}
blobStr := string(blob)
for _, k := range []string{"signature", "visibility", "external"} {
if strings.Contains(blobStr, k) {
t.Errorf("blob still contains promoted key %q: %s", k, blobStr)
}
}
if !strings.Contains(blobStr, "complexity") {
t.Errorf("blob missing non-promoted key complexity: %s", blobStr)
}
}

// TestPromotedColumns_ExternalFalse guards the NULL-vs-false distinction:
// a stored false must round-trip as false, not vanish.
func TestPromotedColumns_ExternalFalse(t *testing.T) {
s, err := Open(filepath.Join(t.TempDir(), "p.sqlite"))
if err != nil {
t.Fatal(err)
}
defer func() { _ = s.Close() }()
s.AddNode(&graph.Node{
ID: "x", Kind: graph.KindFunction, Name: "x", FilePath: "x.go",
Meta: map[string]any{"external": false},
})
n := s.GetNode("x")
if n == nil {
t.Fatal("nil")
}
v, ok := n.Meta["external"].(bool)
if !ok || v != false {
t.Errorf("external false: got %v (%T)", n.Meta["external"], n.Meta["external"])
}
}

// TestPromotedColumns_Migration verifies ensureNodeColumns adds the
// promoted columns to a database created with the pre-promotion schema.
func TestPromotedColumns_Migration(t *testing.T) {
path := filepath.Join(t.TempDir(), "old.sqlite")
raw, err := sql.Open("sqlite", path)
if err != nil {
t.Fatal(err)
}
_, err = raw.Exec(`CREATE TABLE nodes (
id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL,
qual_name TEXT NOT NULL DEFAULT '', file_path TEXT NOT NULL,
start_line INTEGER NOT NULL DEFAULT 0, end_line INTEGER NOT NULL DEFAULT 0,
language TEXT NOT NULL DEFAULT '', repo_prefix TEXT NOT NULL DEFAULT '',
workspace_id TEXT NOT NULL DEFAULT '', project_id TEXT NOT NULL DEFAULT '',
meta BLOB
) WITHOUT ROWID`)
if err != nil {
t.Fatal(err)
}
_ = raw.Close()

s, err := Open(path)
if err != nil {
t.Fatalf("Open old-schema db: %v", err)
}
defer func() { _ = s.Close() }()
s.AddNode(&graph.Node{
ID: "m", Kind: graph.KindFunction, Name: "m", FilePath: "m.go",
Meta: map[string]any{"signature": "sig", "external": true},
})
n := s.GetNode("m")
if n == nil {
t.Fatal("nil after migration")
}
assertType[string](t, n.Meta, "signature", "sig")
assertType[bool](t, n.Meta, "external", true)
}
24 changes: 18 additions & 6 deletions internal/graph/store_sqlite/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ package store_sqlite
// IGNORE on that constraint matches the in-memory "second AddEdge
// for the same key is a no-op" semantics.
//
// - meta is a gob-encoded blob. nil / empty Meta is stored as NULL.
// - meta is a JSON document (see meta_json.go). nil / empty Meta is
// stored as NULL. Four universal, hot-read node keys are promoted to
// their own nullable columns (signature / visibility / doc /
// external): they are stripped from the JSON blob on write and
// restored into Meta on read, so the in-memory map is unchanged. A
// NULL column means "not set" (legacy gob rows predate the columns
// and keep their values in the blob). Existing databases gain the
// columns via ALTER on the next Open (ensureNodeColumns).
//
// - Secondary indexes mirror the in-memory store's hot lookup paths:
// nodes_by_name -- FindNodesByName / FindNodesByNameInRepo
Expand Down Expand Up @@ -45,6 +52,10 @@ CREATE TABLE IF NOT EXISTS nodes (
repo_prefix TEXT NOT NULL DEFAULT '',
workspace_id TEXT NOT NULL DEFAULT '',
project_id TEXT NOT NULL DEFAULT '',
signature TEXT,
visibility TEXT,
doc TEXT,
external INTEGER,
meta BLOB
) WITHOUT ROWID;

Expand Down Expand Up @@ -121,7 +132,7 @@ CREATE TABLE IF NOT EXISTS clone_shingles (

-- constant_values is the per-KindConstant literal-value sidecar: one row
-- per constant whose RHS is a string / numeric literal, keyed by node_id
-- (the join key back to nodes.id). Lifting the value out of the gob Meta
-- (the join key back to nodes.id). Lifting the value out of the JSON Meta
-- blob keeps it queryable (and out of the every-node-load decode path) so
-- the resolver can dereference a const-identifier dispatch name to its
-- value across files. file_path scopes per-file eviction on reindex;
Expand Down Expand Up @@ -171,10 +182,11 @@ CREATE TABLE IF NOT EXISTS vectors (
) WITHOUT ROWID;

-- churn_enrichment is the per-node git-churn sidecar (change A: move
-- enrichment OUT of nodes.meta so the node hot path stops gob-encoding
-- rarely-read data and get_churn_rate does an indexed read instead of an
-- AllNodes+gob scan). One typed row per enriched file/function/method
-- node, keyed by node_id (join key back to nodes.id); repo_prefix scopes
-- enrichment OUT of nodes.meta so the node hot path stops encoding
-- rarely-read data into the blob and get_churn_rate does an indexed read
-- instead of an AllNodes+meta-decode scan). One typed row per enriched
-- file/function/method node, keyed by node_id (join key back to
-- nodes.id); repo_prefix scopes
-- per-repo reseeds/wipes. head_sha/branch/computed_at are file-level only
-- (empty for symbols). WITHOUT ROWID: the PK index IS the table.
CREATE TABLE IF NOT EXISTS churn_enrichment (
Expand Down
Loading
Loading