From b6e8e923c3be68a52e36d355a2f18d723388ee18 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:28:43 +0200 Subject: [PATCH 1/7] store/sqlite: encode node and edge meta as JSON instead of gob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Meta was gob-encoded with a fresh encoder/decoder constructed per blob, so gob recompiled its type-decode engine on every edge — that dominated cold-load CPU and allocation and could pin the daemon at multi-hundred percent CPU while a whole-graph resolve walked the edges. Encode meta as JSON and decode it through metaWire, a typed DTO whose fields parse each known key as its exact Go type (int / int64 / float64 / *contracts.Shape / []string / []map[string]any). The open tail and nested maps are normalised with a small key-type table, so the in-memory map a caller receives is type-identical to what gob produced and no reader changes. JSON needs no per-call engine compilation and carries no custom binary versioning. Existing on-disk stores hold gob blobs; decodeMeta sniffs the leading byte ('{' => JSON) and falls back to gob for legacy rows, which migrate to JSON on their next write. No schema migration required. --- internal/graph/store_sqlite/meta_json.go | 355 ++++++++++++++++++ internal/graph/store_sqlite/meta_json_test.go | 175 +++++++++ internal/graph/store_sqlite/store.go | 27 +- 3 files changed, 532 insertions(+), 25 deletions(-) create mode 100644 internal/graph/store_sqlite/meta_json.go create mode 100644 internal/graph/store_sqlite/meta_json_test.go diff --git a/internal/graph/store_sqlite/meta_json.go b/internal/graph/store_sqlite/meta_json.go new file mode 100644 index 00000000..9694793a --- /dev/null +++ b/internal/graph/store_sqlite/meta_json.go @@ -0,0 +1,355 @@ +package store_sqlite + +import ( + "bytes" + "encoding/gob" + "encoding/json" + + "github.com/zzet/gortex/internal/contracts" +) + +// Node / edge Meta is a map[string]any persisted in the `meta` column. +// It is stored as JSON, not gob: JSON needs no per-call type-engine +// compilation (the gob hot path recompiled its decoder on every edge, +// which dominated cold-load CPU and allocation), and a JSON document is +// human-readable and free of any custom binary versioning. +// +// JSON has one numeric type, so a naive json.Unmarshal into a +// map[string]any widens every number to float64 and every []T to []any, +// silently corrupting the readers that type-assert .(int) / .(float64) / +// .([]string) / .(*contracts.Shape). decodeMeta therefore routes the +// document through metaWire — a typed DTO whose fields parse each known +// key as its exact Go type — and normalises the open tail (Extra plus +// nested maps) with a small key-type table. The in-memory map a caller +// receives is byte-for-byte type-identical to what the old gob path +// produced, so no reader changes. +// +// Existing on-disk stores still hold gob blobs; decodeMeta sniffs the +// leading byte ('{' => JSON) and falls back to gob for legacy rows, which +// migrate to JSON on their next write. No schema migration is required. + +// metaWire is the decode-side DTO. Scalar fields are pointers so an absent +// key (nil) is distinguished from a present zero value — comma-ok readers +// rely on that distinction. Slices, maps and Shape are already nil-able. +type metaWire struct { + // Symbol-shape keys stamped by language extractors (node). + Signature *string `json:"signature,omitempty"` + Visibility *string `json:"visibility,omitempty"` + Doc *string `json:"doc,omitempty"` + External *bool `json:"external,omitempty"` + + // Analyzer / contract scalar keys (node). + Complexity *int `json:"complexity,omitempty"` + LoopDepth *int `json:"loop_depth,omitempty"` + ParseErrors *int `json:"parse_errors,omitempty"` + Position *int `json:"position,omitempty"` + Line *int `json:"line,omitempty"` + Confidence *float64 `json:"confidence,omitempty"` + CoveragePct *float64 `json:"coverage_pct,omitempty"` + + // Contract structural keys (node). + Shape *contracts.Shape `json:"shape,omitempty"` + ResponseEnvelope []map[string]any `json:"response_envelope,omitempty"` + PathParamNames []string `json:"path_param_names,omitempty"` + QueryParams []string `json:"query_params,omitempty"` + StatusCodes []string `json:"status_codes,omitempty"` + + // Edge scalar keys. + CandidateCount *int `json:"candidate_count,omitempty"` + Similarity *float64 `json:"similarity,omitempty"` + Score *float64 `json:"score,omitempty"` + Count *int `json:"count,omitempty"` + CloneTokens *int `json:"clone_tokens,omitempty"` + + // Nested enrichment maps (sidecar-primary; the meta map is the + // un-migrated / in-memory fallback). Decoded as plain maps then + // normalised via the key-type table so their integer children come + // back as int / int64 rather than float64. + Churn map[string]any `json:"churn,omitempty"` + Coverage map[string]any `json:"coverage,omitempty"` + LastAuthored map[string]any `json:"last_authored,omitempty"` + ContractMeta map[string]any `json:"contract_meta,omitempty"` + + // Extra captures every key not named above (the open / plugin / + // per-language tail, overwhelmingly strings and bools). + Extra map[string]any `json:"-"` +} + +// metaWireKnownKeys are the JSON keys consumed by metaWire's typed fields; +// everything else is captured into Extra. +var metaWireKnownKeys = []string{ + "signature", "visibility", "doc", "external", + "complexity", "loop_depth", "parse_errors", "position", "line", + "confidence", "coverage_pct", + "shape", "response_envelope", "path_param_names", "query_params", "status_codes", + "candidate_count", "similarity", "score", "count", "clone_tokens", + "churn", "coverage", "last_authored", "contract_meta", +} + +// metaFloatKeys are keys whose numeric value must stay float64 even when it +// happens to be integral (e.g. confidence 1.0 marshals as "1"); without +// this they would normalise to int and break a .(float64) reader. +var metaFloatKeys = map[string]bool{ + "confidence": true, "coverage_pct": true, "score": true, + "similarity": true, "churn_rate": true, "rate": true, +} + +// metaInt64Keys are keys whose numeric value must be int64 (unix +// timestamps), matching readers that assert .(int64). +var metaInt64Keys = map[string]bool{ + "timestamp": true, "ts": true, +} + +// metaStringSliceKeys are keys whose array value must be []string (JSON +// arrays decode to []any); readers assert .([]string). +var metaStringSliceKeys = map[string]bool{ + "path_param_names": true, "query_params": true, "status_codes": true, + "notes": true, "methods": true, "arg_names": true, "repos": true, +} + +// metaMapSliceKeys are keys whose array value must be []map[string]any. +var metaMapSliceKeys = map[string]bool{ + "response_envelope": true, +} + +// UnmarshalJSON decodes the typed fields and captures every other key into +// Extra (with UseNumber so the tail keeps int/float fidelity). +func (w *metaWire) UnmarshalJSON(b []byte) error { + type alias metaWire + if err := json.Unmarshal(b, (*alias)(w)); err != nil { + return err + } + dec := json.NewDecoder(bytes.NewReader(b)) + dec.UseNumber() + var raw map[string]any + if err := dec.Decode(&raw); err != nil { + return err + } + for _, k := range metaWireKnownKeys { + delete(raw, k) + } + if len(raw) > 0 { + w.Extra = make(map[string]any, len(raw)) + for k, v := range raw { + w.Extra[k] = normalizeMetaValue(k, v) + } + } + return nil +} + +// toMap rebuilds the in-memory map[string]any with exact Go types. +func (w *metaWire) toMap() map[string]any { + m := make(map[string]any, len(metaWireKnownKeys)+len(w.Extra)) + putString(m, "signature", w.Signature) + putString(m, "visibility", w.Visibility) + putString(m, "doc", w.Doc) + putBool(m, "external", w.External) + putInt(m, "complexity", w.Complexity) + putInt(m, "loop_depth", w.LoopDepth) + putInt(m, "parse_errors", w.ParseErrors) + putInt(m, "position", w.Position) + putInt(m, "line", w.Line) + putFloat(m, "confidence", w.Confidence) + putFloat(m, "coverage_pct", w.CoveragePct) + if w.Shape != nil { + m["shape"] = w.Shape + } + if w.ResponseEnvelope != nil { + m["response_envelope"] = w.ResponseEnvelope + } + if w.PathParamNames != nil { + m["path_param_names"] = w.PathParamNames + } + if w.QueryParams != nil { + m["query_params"] = w.QueryParams + } + if w.StatusCodes != nil { + m["status_codes"] = w.StatusCodes + } + putInt(m, "candidate_count", w.CandidateCount) + putFloat(m, "similarity", w.Similarity) + putFloat(m, "score", w.Score) + putInt(m, "count", w.Count) + putInt(m, "clone_tokens", w.CloneTokens) + putNestedMap(m, "churn", w.Churn) + putNestedMap(m, "coverage", w.Coverage) + putNestedMap(m, "last_authored", w.LastAuthored) + putNestedMap(m, "contract_meta", w.ContractMeta) + for k, v := range w.Extra { + m[k] = v + } + if len(m) == 0 { + return nil + } + return m +} + +func putString(m map[string]any, k string, v *string) { + if v != nil { + m[k] = *v + } +} + +func putBool(m map[string]any, k string, v *bool) { + if v != nil { + m[k] = *v + } +} + +func putInt(m map[string]any, k string, v *int) { + if v != nil { + m[k] = *v + } +} + +func putFloat(m map[string]any, k string, v *float64) { + if v != nil { + m[k] = *v + } +} + +// putNestedMap normalises a nested enrichment map (decoded by the standard +// json path, so its numbers are float64) into exact Go types. +func putNestedMap(m map[string]any, k string, nested map[string]any) { + if nested == nil { + return + } + out := make(map[string]any, len(nested)) + for nk, nv := range nested { + out[nk] = normalizeMetaValue(nk, nv) + } + m[k] = out +} + +// normalizeMetaValue coerces a json-decoded value to the exact Go type the +// readers expect, recursing through nested maps and slices. It accepts both +// json.Number (the Extra path uses UseNumber) and float64 (the typed-field +// path decodes nested maps with standard json), so it is correct for both. +func normalizeMetaValue(key string, v any) any { + switch vv := v.(type) { + case json.Number: + return normalizeNumber(key, numberToFloat(vv), &vv) + case float64: + return normalizeNumber(key, vv, nil) + case []any: + return normalizeSlice(key, vv) + case map[string]any: + out := make(map[string]any, len(vv)) + for nk, nv := range vv { + out[nk] = normalizeMetaValue(nk, nv) + } + return out + default: + return v + } +} + +func numberToFloat(n json.Number) float64 { + f, _ := n.Float64() + return f +} + +// normalizeNumber picks the Go numeric type for key. num is the float view; +// jn (may be nil) is the original json.Number for exact integer recovery. +func normalizeNumber(key string, num float64, jn *json.Number) any { + if metaFloatKeys[key] { + return num + } + if metaInt64Keys[key] { + if jn != nil { + if i, err := jn.Int64(); err == nil { + return i + } + } + return int64(num) + } + if num == float64(int64(num)) { + if jn != nil { + if i, err := jn.Int64(); err == nil { + return int(i) + } + } + return int(num) + } + return num +} + +func normalizeSlice(key string, s []any) any { + if metaStringSliceKeys[key] { + out := make([]string, 0, len(s)) + for _, e := range s { + if str, ok := e.(string); ok { + out = append(out, str) + } + } + return out + } + if metaMapSliceKeys[key] { + out := make([]map[string]any, 0, len(s)) + for _, e := range s { + if mm, ok := e.(map[string]any); ok { + norm := make(map[string]any, len(mm)) + for nk, nv := range mm { + norm[nk] = normalizeMetaValue(nk, nv) + } + out = append(out, norm) + } + } + return out + } + out := make([]any, len(s)) + for i, e := range s { + out[i] = normalizeMetaValue(key, e) + } + return out +} + +// encodeMeta serialises Meta to JSON. nil / empty Meta stores as NULL. +func encodeMeta(m map[string]any) ([]byte, error) { + if len(m) == 0 { + return nil, nil + } + return json.Marshal(m) +} + +// decodeMeta reads a meta blob. New rows are JSON (routed through metaWire +// for exact types); legacy rows are gob and decode through the fallback. +func decodeMeta(b []byte) (map[string]any, error) { + if len(b) == 0 { + return nil, nil + } + if isJSONObject(b) { + var w metaWire + if err := json.Unmarshal(b, &w); err != nil { + // A gob blob whose first byte is '{' would land here; fall + // back rather than fail the row. + return decodeMetaGob(b) + } + return w.toMap(), nil + } + return decodeMetaGob(b) +} + +// isJSONObject reports whether b looks like a JSON object (the only shape +// encodeMeta ever produces). Leading whitespace is tolerated. +func isJSONObject(b []byte) bool { + for _, c := range b { + switch c { + case ' ', '\t', '\n', '\r': + continue + case '{': + return true + default: + return false + } + } + return false +} + +func decodeMetaGob(b []byte) (map[string]any, error) { + var m map[string]any + if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&m); err != nil { + return nil, err + } + return m, nil +} diff --git a/internal/graph/store_sqlite/meta_json_test.go b/internal/graph/store_sqlite/meta_json_test.go new file mode 100644 index 00000000..8b0f782e --- /dev/null +++ b/internal/graph/store_sqlite/meta_json_test.go @@ -0,0 +1,175 @@ +package store_sqlite + +import ( + "bytes" + "encoding/gob" + "reflect" + "testing" + + "github.com/zzet/gortex/internal/contracts" +) + +// roundTrip encodes Meta to JSON and decodes it back, the persist->reload +// path every reader sees after a daemon restart / store hydration. +func roundTrip(t *testing.T, in map[string]any) map[string]any { + t.Helper() + b, err := encodeMeta(in) + if err != nil { + t.Fatalf("encodeMeta: %v", err) + } + if !isJSONObject(b) { + t.Fatalf("encodeMeta did not produce a JSON object: %q", b) + } + out, err := decodeMeta(b) + if err != nil { + t.Fatalf("decodeMeta: %v", err) + } + return out +} + +// TestMetaRoundTripExactTypes is the fidelity canary: every key the audit +// found read with a raw type-assertion must survive a JSON round-trip with +// its exact Go type, or the corresponding reader silently breaks. +func TestMetaRoundTripExactTypes(t *testing.T) { + shape := &contracts.Shape{ + Kind: "struct", + Fields: []contracts.ShapeField{{Name: "id", Type: "int64", Required: true}}, + Notes: []string{"partial"}, + } + node := map[string]any{ + "signature": "func F(x int) error", + "visibility": "public", + "doc": "F does a thing.", + "external": true, + "complexity": 7, + "loop_depth": 2, + "parse_errors": 0, + "position": 3, + "line": 42, + "confidence": 1.0, // integral float — must stay float64 + "coverage_pct": 83.5, + "shape": shape, + "response_envelope": []map[string]any{{"name": "data", "type": "User"}}, + "path_param_names": []string{"id", "org"}, + "query_params": []string{"limit"}, + "status_codes": []string{"200", "404"}, + "churn": map[string]any{"commit_count": 12, "age_days": 365, "churn_rate": 2.0, "last_author": "a@b.c"}, + "coverage": map[string]any{"num_stmt": 40, "hit": 33}, + "last_authored": map[string]any{"timestamp": int64(1700000000), "email": "x@y.z"}, + "some_plugin_flag": "go_linkname", // Extra tail (string) + "is_generated": false, // Extra tail (bool) + } + got := roundTrip(t, node) + + assertType[int](t, got, "complexity", 7) + assertType[int](t, got, "loop_depth", 2) + assertType[int](t, got, "parse_errors", 0) + assertType[int](t, got, "position", 3) + assertType[int](t, got, "line", 42) + assertType[float64](t, got, "confidence", 1.0) + assertType[float64](t, got, "coverage_pct", 83.5) + assertType[string](t, got, "signature", "func F(x int) error") + assertType[string](t, got, "visibility", "public") + assertType[bool](t, got, "external", true) + assertType[string](t, got, "some_plugin_flag", "go_linkname") + assertType[bool](t, got, "is_generated", false) + + // Shape must rebuild as *contracts.Shape, not map[string]any. + gotShape, ok := got["shape"].(*contracts.Shape) + if !ok { + t.Fatalf("shape: want *contracts.Shape, got %T", got["shape"]) + } + if !reflect.DeepEqual(gotShape, shape) { + t.Errorf("shape mismatch: %+v vs %+v", gotShape, shape) + } + + // response_envelope must be []map[string]any, not []any. + if _, ok := got["response_envelope"].([]map[string]any); !ok { + t.Errorf("response_envelope: want []map[string]any, got %T", got["response_envelope"]) + } + // []string keys. + for _, k := range []string{"path_param_names", "query_params", "status_codes"} { + if _, ok := got[k].([]string); !ok { + t.Errorf("%s: want []string, got %T", k, got[k]) + } + } + + // Nested map children keep exact types. + churn := got["churn"].(map[string]any) + assertType[int](t, churn, "commit_count", 12) + assertType[int](t, churn, "age_days", 365) + assertType[float64](t, churn, "churn_rate", 2.0) // integral float, nested + assertType[string](t, churn, "last_author", "a@b.c") + cov := got["coverage"].(map[string]any) + assertType[int](t, cov, "num_stmt", 40) + assertType[int](t, cov, "hit", 33) + la := got["last_authored"].(map[string]any) + assertType[int64](t, la, "timestamp", int64(1700000000)) +} + +func TestEdgeMetaRoundTripExactTypes(t *testing.T) { + edge := map[string]any{ + "candidate_count": 2, + "similarity": 0.875, + "score": 1.0, // integral float — must stay float64 + "count": 5, + "clone_tokens": 128, + "synthesized_by": "grpc", // Extra tail + } + got := roundTrip(t, edge) + assertType[int](t, got, "candidate_count", 2) + assertType[float64](t, got, "similarity", 0.875) + assertType[float64](t, got, "score", 1.0) + assertType[int](t, got, "count", 5) + assertType[int](t, got, "clone_tokens", 128) + assertType[string](t, got, "synthesized_by", "grpc") +} + +// TestDecodeLegacyGob proves existing on-disk gob blobs still decode. +func TestDecodeLegacyGob(t *testing.T) { + orig := map[string]any{"visibility": "private", "complexity": 9, "confidence": 1.0} + var buf bytes.Buffer + if err := gob.NewEncoder(&buf).Encode(orig); err != nil { + t.Fatalf("gob encode: %v", err) + } + got, err := decodeMeta(buf.Bytes()) + if err != nil { + t.Fatalf("decodeMeta(gob): %v", err) + } + // gob preserves exact types natively. + assertType[string](t, got, "visibility", "private") + assertType[int](t, got, "complexity", 9) + assertType[float64](t, got, "confidence", 1.0) +} + +func TestEncodeMetaEmpty(t *testing.T) { + b, err := encodeMeta(nil) + if err != nil || b != nil { + t.Fatalf("encodeMeta(nil) = %q, %v; want nil, nil", b, err) + } + b, err = encodeMeta(map[string]any{}) + if err != nil || b != nil { + t.Fatalf("encodeMeta(empty) = %q, %v; want nil, nil", b, err) + } + m, err := decodeMeta(nil) + if err != nil || m != nil { + t.Fatalf("decodeMeta(nil) = %v, %v; want nil, nil", m, err) + } +} + +func assertType[T comparable](t *testing.T, m map[string]any, key string, want T) { + t.Helper() + v, ok := m[key] + if !ok { + t.Errorf("%s: missing from decoded map", key) + return + } + got, ok := v.(T) + if !ok { + t.Errorf("%s: want type %T, got %T (value %v)", key, want, v, v) + return + } + if got != want { + t.Errorf("%s: want %v, got %v", key, want, got) + } +} diff --git a/internal/graph/store_sqlite/store.go b/internal/graph/store_sqlite/store.go index 01e483ec..3d31c90a 100644 --- a/internal/graph/store_sqlite/store.go +++ b/internal/graph/store_sqlite/store.go @@ -21,9 +21,7 @@ package store_sqlite import ( - "bytes" "database/sql" - "encoding/gob" "errors" "fmt" "iter" @@ -404,29 +402,8 @@ func (s *Store) prepare() error { return err } -// -- meta encode/decode ---------------------------------------------------- - -func encodeMeta(m map[string]any) ([]byte, error) { - if len(m) == 0 { - return nil, nil - } - var buf bytes.Buffer - if err := gob.NewEncoder(&buf).Encode(m); err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func decodeMeta(b []byte) (map[string]any, error) { - if len(b) == 0 { - return nil, nil - } - var m map[string]any - if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&m); err != nil { - return nil, err - } - return m, nil -} +// encodeMeta / decodeMeta live in meta_json.go (JSON codec + the +// metaWire typed DTO + the legacy-gob dual-read fallback). // -- row scanners --------------------------------------------------------- From 3281ea21b6a008af6babd5da7ca83d8414bcb882 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:31:33 +0200 Subject: [PATCH 2/7] mcp: fix stale_code inspection reading last_authored as a string runStaleCodeInspection asserted n.Meta["last_authored"].(string), but blame writes last_authored as a nested map (commit / email / timestamp), so the assertion always missed; it additionally gated on an is_stale flag that nothing ever writes. The inspection surfaced nothing. Read last_authored through the shared lastAuthoredFrom helper (blame sidecar with node-meta fallback) and apply the same 365-day age threshold analyze stale_code uses, so the inspection lists genuinely stale functions/methods with their age and author. --- internal/mcp/tools_inspections.go | 30 ++++++++++----- internal/mcp/tools_inspections_stale_test.go | 40 ++++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 internal/mcp/tools_inspections_stale_test.go diff --git a/internal/mcp/tools_inspections.go b/internal/mcp/tools_inspections.go index f24f7446..3611ee3d 100644 --- a/internal/mcp/tools_inspections.go +++ b/internal/mcp/tools_inspections.go @@ -2,8 +2,10 @@ package mcp import ( "context" + "fmt" "sort" "strings" + "time" "github.com/mark3labs/mcp-go/mcp" "github.com/zzet/gortex/internal/analysis" @@ -336,8 +338,16 @@ func runCoverageGapsInspection(s *Server, scope inspectionScope) []inspectionVio return out } +// staleInspectionDays mirrors analyze stale_code's default threshold: a +// function/method whose latest blame authorship is older than this is +// surfaced as a stale_code inspection. +const staleInspectionDays = 365 + func runStaleCodeInspection(s *Server, scope inspectionScope) []inspectionViolation { out := make([]inspectionViolation, 0) + now := time.Now().Unix() + cutoff := now - staleInspectionDays*24*3600 + blame := blameRowsByID(s.graph) for _, n := range s.graph.AllNodes() { if n.Kind != graph.KindFunction && n.Kind != graph.KindMethod { continue @@ -345,16 +355,18 @@ func runStaleCodeInspection(s *Server, scope inspectionScope) []inspectionViolat if !scope.keep(n.FilePath) { continue } - ts, ok := n.Meta["last_authored"].(string) - if !ok { + // last_authored is a nested map (commit / email / timestamp), + // primarily from the blame sidecar and falling back to the + // node's meta — lastAuthoredFrom normalises both. Reading it as + // a bare string (as this inspection once did) always missed. + la, ok := lastAuthoredFrom(blame, n) + if !ok || la.Timestamp == 0 || la.Timestamp > cutoff { continue } - // We only surface what blame already marked stale via meta. - // The full age computation lives in analyze stale_code; for - // the inspection surface, presence of a `stale` tag in meta - // is the cheapest filter. - if isStale, _ := n.Meta["is_stale"].(bool); !isStale { - continue + ageDays := (now - la.Timestamp) / (24 * 3600) + msg := fmt.Sprintf("stale: %s last authored %dd ago", n.Name, ageDays) + if la.Email != "" { + msg += " by " + la.Email } out = append(out, inspectionViolation{ Inspection: "stale_code", @@ -362,7 +374,7 @@ func runStaleCodeInspection(s *Server, scope inspectionScope) []inspectionViolat File: n.FilePath, Line: n.StartLine, SymbolID: n.ID, - Message: "stale: " + n.Name + " last authored " + ts, + Message: msg, }) } return out diff --git a/internal/mcp/tools_inspections_stale_test.go b/internal/mcp/tools_inspections_stale_test.go new file mode 100644 index 00000000..b5245602 --- /dev/null +++ b/internal/mcp/tools_inspections_stale_test.go @@ -0,0 +1,40 @@ +package mcp + +import ( + "strings" + "testing" +) + +// TestRunStaleCodeInspection guards the fix for the dead stale_code +// inspection: it once read last_authored as a bare string (always a miss, +// since blame writes a map) and gated on a never-written is_stale flag. +func TestRunStaleCodeInspection(t *testing.T) { + srv, _ := setupTestServer(t) + addBlameEnrichedNode(srv.graph, "f.go::Recent", "f.go", 1, "alice@x", "aaa", 30) + addBlameEnrichedNode(srv.graph, "f.go::Stale", "f.go", 5, "bob@x", "bbb", 400) + addBlameEnrichedNode(srv.graph, "f.go::Ancient", "f.go", 9, "carol@x", "ccc", 800) + + got := runStaleCodeInspection(srv, inspectionScope{}) + if len(got) != 2 { + t.Fatalf("want 2 stale violations (Stale+Ancient, 365d default), got %d: %+v", len(got), got) + } + ids := map[string]bool{} + for _, v := range got { + ids[v.SymbolID] = true + if v.Inspection != "stale_code" { + t.Errorf("inspection = %q, want stale_code", v.Inspection) + } + } + if !ids["f.go::Stale"] || !ids["f.go::Ancient"] || ids["f.go::Recent"] { + t.Errorf("wrong stale set: %v", ids) + } + // The message must carry the author email read from the nested + // last_authored map — proves the nested read, not a bare .(string). + joined := "" + for _, v := range got { + joined += v.Message + "\n" + } + if !strings.Contains(joined, "bob@x") || !strings.Contains(joined, "carol@x") { + t.Errorf("messages missing nested author email: %q", joined) + } +} From 18af3bc6f6a86ae2448e9c5782869acf7ec4ffd2 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:34:08 +0200 Subject: [PATCH 3/7] mcp: read contract route fields from nested contract_meta MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit routeMethodAndPath read method / path / service / topic / operation off a contract node's top-level Meta, but the contract-to-node build nests the contract's own Meta under Meta["contract_meta"] — the node top level only holds type / role / symbol_id / line / confidence. Every route lookup therefore returned empty. Read the route fields from the nested contract_meta map, falling back to the top level for any node that stamps them directly. --- internal/mcp/tools_analyze_framework.go | 11 +++++- .../mcp/tools_analyze_framework_route_test.go | 38 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 internal/mcp/tools_analyze_framework_route_test.go diff --git a/internal/mcp/tools_analyze_framework.go b/internal/mcp/tools_analyze_framework.go index 300e55e9..d9a34cd6 100644 --- a/internal/mcp/tools_analyze_framework.go +++ b/internal/mcp/tools_analyze_framework.go @@ -108,7 +108,16 @@ func routeMethodAndPath(n *graph.Node) (string, string) { if n == nil { return "", "" } - meta := n.Meta + // The route fields live in the nested contract_meta map — the + // contract's own Meta, copied in wholesale at node-build time. The + // node's top-level Meta only carries type/role/symbol_id/line/ + // confidence, so reading these keys off n.Meta directly always + // missed. Fall back to the top level for any node that does stamp + // them there. + meta, _ := n.Meta["contract_meta"].(map[string]any) + if meta == nil { + meta = n.Meta + } method, _ := meta["method"].(string) path, _ := meta["path"].(string) if path != "" || method != "" { diff --git a/internal/mcp/tools_analyze_framework_route_test.go b/internal/mcp/tools_analyze_framework_route_test.go new file mode 100644 index 00000000..32ffa455 --- /dev/null +++ b/internal/mcp/tools_analyze_framework_route_test.go @@ -0,0 +1,38 @@ +package mcp + +import ( + "testing" + + "github.com/zzet/gortex/internal/graph" +) + +// TestRouteMethodAndPath_NestedContractMeta guards the fix for the +// read/write key mismatch: contract nodes carry route fields under the +// nested contract_meta map, not at the top level where the reader looked. +func TestRouteMethodAndPath_NestedContractMeta(t *testing.T) { + http := &graph.Node{Kind: graph.KindContract, Meta: map[string]any{ + "type": "http", "role": "provider", + "contract_meta": map[string]any{"method": "GET", "path": "/v1/users"}, + }} + if m, p := routeMethodAndPath(http); m != "GET" || p != "/v1/users" { + t.Fatalf("nested http: got (%q,%q), want (GET,/v1/users)", m, p) + } + + // gRPC service falls through the method/path branch (the existing + // short-circuit returns method first); a service-only node exercises + // the service branch reading from the nested map. + grpc := &graph.Node{Kind: graph.KindContract, Meta: map[string]any{ + "contract_meta": map[string]any{"service": "UserSvc"}, + }} + if m, p := routeMethodAndPath(grpc); m != "" || p != "UserSvc" { + t.Fatalf("nested grpc service: got (%q,%q), want (\"\",UserSvc)", m, p) + } + + // A node that stamps the fields at the top level still resolves. + top := &graph.Node{Kind: graph.KindContract, Meta: map[string]any{ + "method": "POST", "path": "/x", + }} + if m, p := routeMethodAndPath(top); m != "POST" || p != "/x" { + t.Fatalf("top-level fallback: got (%q,%q), want (POST,/x)", m, p) + } +} From 5395b9e27c7cc94b812f174fd09ffbaf7d1f1a28 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:43:02 +0200 Subject: [PATCH 4/7] store/sqlite: promote signature/visibility/doc/external to columns These four node meta keys are universal and hot-read (signature is the single hottest meta read in the graph). Lift them into dedicated nullable columns: stripped from the JSON meta blob on write and restored into Meta on read, so the in-memory map is unchanged while the keys become queryable and the common blob shrinks. A NULL column means "not set", so a legacy row that still carries the keys in its (gob) blob is left untouched; databases created before the columns existed gain them via ALTER on the next Open. Every node-shaped SELECT now resolves to a single column-list constant so the projection and scanNode order can never drift apart again. --- internal/graph/store_sqlite/meta_json.go | 129 ++++++++++++++++++ .../graph/store_sqlite/meta_promoted_test.go | 125 +++++++++++++++++ internal/graph/store_sqlite/schema.go | 13 +- internal/graph/store_sqlite/store.go | 39 ++++-- internal/graph/store_sqlite/store_lookups.go | 6 +- 5 files changed, 297 insertions(+), 15 deletions(-) create mode 100644 internal/graph/store_sqlite/meta_promoted_test.go diff --git a/internal/graph/store_sqlite/meta_json.go b/internal/graph/store_sqlite/meta_json.go index 9694793a..4d9afde9 100644 --- a/internal/graph/store_sqlite/meta_json.go +++ b/internal/graph/store_sqlite/meta_json.go @@ -2,10 +2,12 @@ package store_sqlite import ( "bytes" + "database/sql" "encoding/gob" "encoding/json" "github.com/zzet/gortex/internal/contracts" + "github.com/zzet/gortex/internal/graph" ) // Node / edge Meta is a map[string]any persisted in the `meta` column. @@ -353,3 +355,130 @@ func decodeMetaGob(b []byte) (map[string]any, error) { } return m, nil } + +// -- promoted node columns ------------------------------------------------ +// +// signature / visibility / doc / external are universal, hot-read node +// keys. They are lifted into dedicated nullable columns: stripped from the +// JSON blob on write (extractPromotedMeta) and restored into Meta on read +// (restorePromotedMeta), so the in-memory map is unchanged while the keys +// become queryable and the common blob shrinks. + +var promotedMetaColumns = []struct { + name string + ddl string +}{ + {"signature", "signature TEXT"}, + {"visibility", "visibility TEXT"}, + {"doc", "doc TEXT"}, + {"external", "external INTEGER"}, +} + +// ensureNodeColumns adds the promoted columns to a nodes table created +// before they existed. A fresh DB already has them from the DDL, so this is +// a no-op; an older DB is altered in place (ADD COLUMN defaults to NULL). +func ensureNodeColumns(db *sql.DB) error { + rows, err := db.Query(`PRAGMA table_info(nodes)`) + if err != nil { + return err + } + existing := make(map[string]bool) + for rows.Next() { + var ( + cid, notnull, pk int + name, ctype string + dflt sql.NullString + ) + if err := rows.Scan(&cid, &name, &ctype, ¬null, &dflt, &pk); err != nil { + rows.Close() + return err + } + existing[name] = true + } + if err := rows.Err(); err != nil { + rows.Close() + return err + } + rows.Close() + for _, c := range promotedMetaColumns { + if existing[c.name] { + continue + } + if _, err := db.Exec(`ALTER TABLE nodes ADD COLUMN ` + c.ddl); err != nil { + return err + } + } + return nil +} + +// extractPromotedMeta splits the promoted keys out of m into typed column +// values and returns the remaining map destined for the JSON blob. m is +// not mutated; a copy is made only when a promoted key is present and has +// the expected type (otherwise the value stays in the blob). +func extractPromotedMeta(m map[string]any) (sig, vis, doc sql.NullString, ext sql.NullBool, rest map[string]any) { + rest = m + if len(m) == 0 { + return + } + has := false + for _, c := range promotedMetaColumns { + if _, ok := m[c.name]; ok { + has = true + break + } + } + if !has { + return + } + rest = make(map[string]any, len(m)) + for k, v := range m { + switch k { + case "signature": + if s, ok := v.(string); ok { + sig = sql.NullString{String: s, Valid: true} + continue + } + case "visibility": + if s, ok := v.(string); ok { + vis = sql.NullString{String: s, Valid: true} + continue + } + case "doc": + if s, ok := v.(string); ok { + doc = sql.NullString{String: s, Valid: true} + continue + } + case "external": + if b, ok := v.(bool); ok { + ext = sql.NullBool{Bool: b, Valid: true} + continue + } + } + rest[k] = v + } + return +} + +// restorePromotedMeta writes the non-NULL promoted columns back into the +// node's Meta. A NULL column is left alone so a legacy gob row's blob value +// survives. +func restorePromotedMeta(n *graph.Node, sig, vis, doc sql.NullString, ext sql.NullBool) { + if !sig.Valid && !vis.Valid && !doc.Valid && !ext.Valid { + return + } + if n.Meta == nil { + n.Meta = make(map[string]any, 4) + } + if sig.Valid { + n.Meta["signature"] = sig.String + } + if vis.Valid { + n.Meta["visibility"] = vis.String + } + if doc.Valid { + n.Meta["doc"] = doc.String + } + if ext.Valid { + n.Meta["external"] = ext.Bool + } +} diff --git a/internal/graph/store_sqlite/meta_promoted_test.go b/internal/graph/store_sqlite/meta_promoted_test.go new file mode 100644 index 00000000..6f152fe0 --- /dev/null +++ b/internal/graph/store_sqlite/meta_promoted_test.go @@ -0,0 +1,125 @@ +package store_sqlite + +import ( + "database/sql" + "path/filepath" + "strings" + "testing" + + "github.com/zzet/gortex/internal/graph" +) + +// TestPromotedColumns_RoundTrip verifies the promoted keys land in their +// columns, are stripped from the JSON blob, and restore into Meta with +// exact types — while non-promoted keys stay in the blob. +func TestPromotedColumns_RoundTrip(t *testing.T) { + s, err := Open(filepath.Join(t.TempDir(), "p.sqlite")) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + s.AddNode(&graph.Node{ + ID: "f.go::F", Kind: graph.KindFunction, Name: "F", FilePath: "f.go", + Meta: map[string]any{ + "signature": "func F()", + "visibility": "public", + "doc": "F docs", + "external": true, + "complexity": 5, // non-promoted — must stay in the blob + }, + }) + + n := s.GetNode("f.go::F") + if n == nil { + t.Fatal("GetNode returned nil") + } + assertType[string](t, n.Meta, "signature", "func F()") + assertType[string](t, n.Meta, "visibility", "public") + assertType[string](t, n.Meta, "doc", "F docs") + assertType[bool](t, n.Meta, "external", true) + assertType[int](t, n.Meta, "complexity", 5) + + var sig, vis, doc sql.NullString + var ext sql.NullBool + var blob []byte + row := s.db.QueryRow(`SELECT signature, visibility, doc, external, meta FROM nodes WHERE id=?`, "f.go::F") + if err := row.Scan(&sig, &vis, &doc, &ext, &blob); err != nil { + t.Fatal(err) + } + if !sig.Valid || sig.String != "func F()" { + t.Errorf("signature column = %+v", sig) + } + if !ext.Valid || !ext.Bool { + t.Errorf("external column = %+v", ext) + } + blobStr := string(blob) + for _, k := range []string{"signature", "visibility", "external"} { + if strings.Contains(blobStr, k) { + t.Errorf("blob still contains promoted key %q: %s", k, blobStr) + } + } + if !strings.Contains(blobStr, "complexity") { + t.Errorf("blob missing non-promoted key complexity: %s", blobStr) + } +} + +// TestPromotedColumns_ExternalFalse guards the NULL-vs-false distinction: +// a stored false must round-trip as false, not vanish. +func TestPromotedColumns_ExternalFalse(t *testing.T) { + s, err := Open(filepath.Join(t.TempDir(), "p.sqlite")) + if err != nil { + t.Fatal(err) + } + defer s.Close() + s.AddNode(&graph.Node{ + ID: "x", Kind: graph.KindFunction, Name: "x", FilePath: "x.go", + Meta: map[string]any{"external": false}, + }) + n := s.GetNode("x") + if n == nil { + t.Fatal("nil") + } + v, ok := n.Meta["external"].(bool) + if !ok || v != false { + t.Errorf("external false: got %v (%T)", n.Meta["external"], n.Meta["external"]) + } +} + +// TestPromotedColumns_Migration verifies ensureNodeColumns adds the +// promoted columns to a database created with the pre-promotion schema. +func TestPromotedColumns_Migration(t *testing.T) { + path := filepath.Join(t.TempDir(), "old.sqlite") + raw, err := sql.Open("sqlite", path) + if err != nil { + t.Fatal(err) + } + _, err = raw.Exec(`CREATE TABLE nodes ( + id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL, + qual_name TEXT NOT NULL DEFAULT '', file_path TEXT NOT NULL, + start_line INTEGER NOT NULL DEFAULT 0, end_line INTEGER NOT NULL DEFAULT 0, + language TEXT NOT NULL DEFAULT '', repo_prefix TEXT NOT NULL DEFAULT '', + workspace_id TEXT NOT NULL DEFAULT '', project_id TEXT NOT NULL DEFAULT '', + meta BLOB + ) WITHOUT ROWID`) + if err != nil { + t.Fatal(err) + } + _ = raw.Close() + + s, err := Open(path) + if err != nil { + t.Fatalf("Open old-schema db: %v", err) + } + defer s.Close() + s.AddNode(&graph.Node{ + ID: "m", Kind: graph.KindFunction, Name: "m", FilePath: "m.go", + Meta: map[string]any{"signature": "sig", "external": true}, + }) + n := s.GetNode("m") + if n == nil { + t.Fatal("nil after migration") + } + assertType[string](t, n.Meta, "signature", "sig") + assertType[bool](t, n.Meta, "external", true) +} diff --git a/internal/graph/store_sqlite/schema.go b/internal/graph/store_sqlite/schema.go index 1c8d7fc2..dec467e5 100644 --- a/internal/graph/store_sqlite/schema.go +++ b/internal/graph/store_sqlite/schema.go @@ -16,7 +16,14 @@ package store_sqlite // IGNORE on that constraint matches the in-memory "second AddEdge // for the same key is a no-op" semantics. // -// - meta is a gob-encoded blob. nil / empty Meta is stored as NULL. +// - meta is a JSON document (see meta_json.go). nil / empty Meta is +// stored as NULL. Four universal, hot-read node keys are promoted to +// their own nullable columns (signature / visibility / doc / +// external): they are stripped from the JSON blob on write and +// restored into Meta on read, so the in-memory map is unchanged. A +// NULL column means "not set" (legacy gob rows predate the columns +// and keep their values in the blob). Existing databases gain the +// columns via ALTER on the next Open (ensureNodeColumns). // // - Secondary indexes mirror the in-memory store's hot lookup paths: // nodes_by_name -- FindNodesByName / FindNodesByNameInRepo @@ -45,6 +52,10 @@ CREATE TABLE IF NOT EXISTS nodes ( repo_prefix TEXT NOT NULL DEFAULT '', workspace_id TEXT NOT NULL DEFAULT '', project_id TEXT NOT NULL DEFAULT '', + signature TEXT, + visibility TEXT, + doc TEXT, + external INTEGER, meta BLOB ) WITHOUT ROWID; diff --git a/internal/graph/store_sqlite/store.go b/internal/graph/store_sqlite/store.go index 3d31c90a..3b47df38 100644 --- a/internal/graph/store_sqlite/store.go +++ b/internal/graph/store_sqlite/store.go @@ -184,6 +184,13 @@ func Open(path string) (*Store, error) { _ = db.Close() return nil, fmt.Errorf("sqlite edges_external index: %w", err) } + // Add the promoted node columns to databases created before they + // existed (CREATE TABLE IF NOT EXISTS won't alter an existing table). + // Must run before prepare(), whose node INSERT references them. + if err := ensureNodeColumns(db); err != nil { + _ = db.Close() + return nil, fmt.Errorf("sqlite node columns: %w", err) + } s := &Store{db: db, dbPath: path} // Initialise the bundle cache at construction so its pointer is @@ -307,10 +314,10 @@ func (s *Store) prepare() error { *out = st } - const nodeCols = `id, kind, name, qual_name, file_path, start_line, end_line, language, repo_prefix, workspace_id, project_id, meta` + const nodeCols = lookupNodeCols prep(&s.stmtInsertNode, - `INSERT OR REPLACE INTO nodes (`+nodeCols+`) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)`) + `INSERT OR REPLACE INTO nodes (`+nodeCols+`) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`) prep(&s.stmtGetNode, `SELECT `+nodeCols+` FROM nodes WHERE id = ?`) prep(&s.stmtGetNodeByQual, @@ -411,13 +418,16 @@ func scanNode(scanner interface { Scan(...any) error }) (*graph.Node, error) { var ( - n graph.Node - metaBlob []byte + n graph.Node + metaBlob []byte + sig, vis, doc sql.NullString + ext sql.NullBool ) err := scanner.Scan( &n.ID, &n.Kind, &n.Name, &n.QualName, &n.FilePath, &n.StartLine, &n.EndLine, &n.Language, - &n.RepoPrefix, &n.WorkspaceID, &n.ProjectID, &metaBlob, + &n.RepoPrefix, &n.WorkspaceID, &n.ProjectID, + &sig, &vis, &doc, &ext, &metaBlob, ) if err != nil { return nil, err @@ -429,6 +439,10 @@ func scanNode(scanner interface { } n.Meta = m } + // Restore the promoted columns into Meta. They are authoritative for + // rows written after the promotion; a NULL column (legacy gob rows) + // is left alone so the blob-carried value survives. + restorePromotedMeta(&n, sig, vis, doc, ext) return &n, nil } @@ -488,14 +502,16 @@ func (s *Store) AddNode(n *graph.Node) { } func (s *Store) insertNodeLocked(stmt *sql.Stmt, n *graph.Node) error { - metaBlob, err := encodeMeta(n.Meta) + sig, vis, doc, ext, blobMeta := extractPromotedMeta(n.Meta) + metaBlob, err := encodeMeta(blobMeta) if err != nil { return err } _, err = stmt.Exec( n.ID, string(n.Kind), n.Name, n.QualName, n.FilePath, n.StartLine, n.EndLine, n.Language, - n.RepoPrefix, n.WorkspaceID, n.ProjectID, metaBlob, + n.RepoPrefix, n.WorkspaceID, n.ProjectID, + sig, vis, doc, ext, metaBlob, ) return err } @@ -1306,10 +1322,7 @@ FROM edges WHERE kind = ?`, string(kind)) // NodesByKind: indexed SELECT on the (kind) column. func (s *Store) NodesByKind(kind graph.NodeKind) iter.Seq[*graph.Node] { return func(yield func(*graph.Node) bool) { - out := s.queryNodesSQL(` -SELECT id, kind, name, qual_name, file_path, start_line, end_line, language, - repo_prefix, workspace_id, project_id, meta -FROM nodes WHERE kind = ?`, string(kind)) + out := s.queryNodesSQL(`SELECT `+lookupNodeCols+` FROM nodes WHERE kind = ?`, string(kind)) for _, n := range out { if !yield(n) { return @@ -1407,7 +1420,7 @@ func (s *Store) GetNodesByIDs(ids []string) map[string]*graph.Node { uniq = append(uniq, id) } out := make(map[string]*graph.Node, len(uniq)) - const nodeCols = `id, kind, name, qual_name, file_path, start_line, end_line, language, repo_prefix, workspace_id, project_id, meta` + const nodeCols = lookupNodeCols for i := 0; i < len(uniq); i += lookupChunkSize { end := minInt(i+lookupChunkSize, len(uniq)) chunk := uniq[i:end] @@ -1448,7 +1461,7 @@ func (s *Store) FindNodesByNames(names []string) map[string][]*graph.Node { uniq = append(uniq, name) } out := make(map[string][]*graph.Node, len(uniq)) - const nodeCols = `id, kind, name, qual_name, file_path, start_line, end_line, language, repo_prefix, workspace_id, project_id, meta` + const nodeCols = lookupNodeCols for i := 0; i < len(uniq); i += lookupChunkSize { end := minInt(i+lookupChunkSize, len(uniq)) chunk := uniq[i:end] diff --git a/internal/graph/store_sqlite/store_lookups.go b/internal/graph/store_sqlite/store_lookups.go index 06122c3d..e5cdfd52 100644 --- a/internal/graph/store_sqlite/store_lookups.go +++ b/internal/graph/store_sqlite/store_lookups.go @@ -12,7 +12,11 @@ import ( // (queryNodesSQL / queryEdgesSQL / lookupChunkSize / minInt). SQLite's // planner drives every one through the existing secondary indexes. -const lookupNodeCols = `id, kind, name, qual_name, file_path, start_line, end_line, language, repo_prefix, workspace_id, project_id, meta` +// lookupNodeCols is the canonical node column list (and scan order) for +// every node-shaped SELECT in the package. It must stay in sync with +// scanNode. The promoted columns (signature/visibility/doc/external) +// precede meta. +const lookupNodeCols = `id, kind, name, qual_name, file_path, start_line, end_line, language, repo_prefix, workspace_id, project_id, signature, visibility, doc, external, meta` const lookupEdgeCols = `from_id, to_id, kind, file_path, line, confidence, confidence_label, origin, tier, cross_repo, meta` // FindNodesByNameContaining returns nodes whose Name contains substr, From ba61aa19abb2709345e1555f30c33dfee4eb01b2 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:48:33 +0200 Subject: [PATCH 5/7] indexer: fetch only the edge kinds dataflow and reconcile need MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit materializeDataflowParams and ReconcileContractEdges scanned the entire edge set via AllEdges and filtered down to two or three kinds — decoding every edge's meta along the way. On the sqlite backend that is a full-table read plus a meta decode per edge on every resolve, when the pass only ever touches arg_of/returns_to (dataflow) or matches/produces_topic/consumes_topic (reconcile). Fetch those kinds directly through the edges_by_kind index instead, so only the relevant rows are read and only their meta is decoded. Behaviour is unchanged — the same edges are processed. --- internal/indexer/dataflow.go | 17 +++++++++-------- internal/indexer/multi.go | 19 ++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/internal/indexer/dataflow.go b/internal/indexer/dataflow.go index 69554432..b053a090 100644 --- a/internal/indexer/dataflow.go +++ b/internal/indexer/dataflow.go @@ -40,14 +40,15 @@ import ( // re-run of this pass becomes a no-op. func (idx *Indexer) materializeDataflowParams() { g := idx.graph - edges := g.AllEdges() - for _, e := range edges { - switch e.Kind { - case graph.EdgeArgOf: - rewriteArgOf(g, e) - case graph.EdgeReturnsTo: - rewriteReturnsTo(g, e) - } + // Only arg_of / returns_to edges are rewritten here. Fetch exactly + // those kinds — each an edges_by_kind index probe on the sqlite + // backend — instead of scanning (and meta-decoding) the whole edge + // set; every other edge in the graph is irrelevant to this pass. + for e := range g.EdgesByKind(graph.EdgeArgOf) { + rewriteArgOf(g, e) + } + for e := range g.EdgesByKind(graph.EdgeReturnsTo) { + rewriteReturnsTo(g, e) } } diff --git a/internal/indexer/multi.go b/internal/indexer/multi.go index eb246868..61d3d041 100644 --- a/internal/indexer/multi.go +++ b/internal/indexer/multi.go @@ -2237,15 +2237,16 @@ func (mi *MultiIndexer) ReconcileContractEdges() int { type edgeKey struct{ from, to string } var stale []edgeKey var staleTopicProduces, staleTopicConsumes []edgeKey - for _, e := range g.AllEdges() { - switch e.Kind { - case graph.EdgeMatches: - stale = append(stale, edgeKey{e.From, e.To}) - case graph.EdgeProducesTopic: - staleTopicProduces = append(staleTopicProduces, edgeKey{e.From, e.To}) - case graph.EdgeConsumesTopic: - staleTopicConsumes = append(staleTopicConsumes, edgeKey{e.From, e.To}) - } + // Collect only the three reconciled edge kinds via the edges_by_kind + // index, rather than scanning (and meta-decoding) the whole edge set. + for e := range g.EdgesByKind(graph.EdgeMatches) { + stale = append(stale, edgeKey{e.From, e.To}) + } + for e := range g.EdgesByKind(graph.EdgeProducesTopic) { + staleTopicProduces = append(staleTopicProduces, edgeKey{e.From, e.To}) + } + for e := range g.EdgesByKind(graph.EdgeConsumesTopic) { + staleTopicConsumes = append(staleTopicConsumes, edgeKey{e.From, e.To}) } for _, k := range stale { g.RemoveEdge(k.from, k.to, graph.EdgeMatches) From 802a7f7b2e6d6b3c110ea3a67b3faf8b7c3b3a40 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 13:56:06 +0200 Subject: [PATCH 6/7] store/sqlite: check Close error returns in promoted-column paths errcheck: route rows.Close()/s.Close() through the package's "_ = ...Close()" convention in ensureNodeColumns and its test. --- internal/graph/store_sqlite/meta_json.go | 6 +++--- internal/graph/store_sqlite/meta_promoted_test.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/graph/store_sqlite/meta_json.go b/internal/graph/store_sqlite/meta_json.go index 4d9afde9..4408616c 100644 --- a/internal/graph/store_sqlite/meta_json.go +++ b/internal/graph/store_sqlite/meta_json.go @@ -390,16 +390,16 @@ func ensureNodeColumns(db *sql.DB) error { dflt sql.NullString ) if err := rows.Scan(&cid, &name, &ctype, ¬null, &dflt, &pk); err != nil { - rows.Close() + _ = rows.Close() return err } existing[name] = true } if err := rows.Err(); err != nil { - rows.Close() + _ = rows.Close() return err } - rows.Close() + _ = rows.Close() for _, c := range promotedMetaColumns { if existing[c.name] { continue diff --git a/internal/graph/store_sqlite/meta_promoted_test.go b/internal/graph/store_sqlite/meta_promoted_test.go index 6f152fe0..a4deb4ce 100644 --- a/internal/graph/store_sqlite/meta_promoted_test.go +++ b/internal/graph/store_sqlite/meta_promoted_test.go @@ -17,7 +17,7 @@ func TestPromotedColumns_RoundTrip(t *testing.T) { if err != nil { t.Fatal(err) } - defer s.Close() + defer func() { _ = s.Close() }() s.AddNode(&graph.Node{ ID: "f.go::F", Kind: graph.KindFunction, Name: "F", FilePath: "f.go", @@ -71,7 +71,7 @@ func TestPromotedColumns_ExternalFalse(t *testing.T) { if err != nil { t.Fatal(err) } - defer s.Close() + defer func() { _ = s.Close() }() s.AddNode(&graph.Node{ ID: "x", Kind: graph.KindFunction, Name: "x", FilePath: "x.go", Meta: map[string]any{"external": false}, @@ -111,7 +111,7 @@ func TestPromotedColumns_Migration(t *testing.T) { if err != nil { t.Fatalf("Open old-schema db: %v", err) } - defer s.Close() + defer func() { _ = s.Close() }() s.AddNode(&graph.Node{ ID: "m", Kind: graph.KindFunction, Name: "m", FilePath: "m.go", Meta: map[string]any{"signature": "sig", "external": true}, From 6a340793ad5eab9155c4183871a2718e86a44ce1 Mon Sep 17 00:00:00 2001 From: Andrey Kumanyaev Date: Wed, 17 Jun 2026 14:01:42 +0200 Subject: [PATCH 7/7] store/sqlite: refresh meta-encoding comments from gob to JSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The meta column now stores JSON, so update the in-code docs that still called it gob-encoded (package doc, the constant_values / churn sidecar rationale, and the analysis read paths). References to the separate gob+gzip persistence snapshot and to legacy gob rows are intentionally left untouched — those are accurate. --- internal/graph/store_sqlite/schema.go | 11 ++++++----- internal/graph/store_sqlite/store.go | 5 +++-- internal/graph/store_sqlite/store_analysis.go | 6 +++--- internal/graph/store_sqlite/store_churn_enrichment.go | 6 +++--- internal/graph/store_sqlite/store_constvalues.go | 2 +- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/internal/graph/store_sqlite/schema.go b/internal/graph/store_sqlite/schema.go index dec467e5..d81352b2 100644 --- a/internal/graph/store_sqlite/schema.go +++ b/internal/graph/store_sqlite/schema.go @@ -132,7 +132,7 @@ CREATE TABLE IF NOT EXISTS clone_shingles ( -- constant_values is the per-KindConstant literal-value sidecar: one row -- per constant whose RHS is a string / numeric literal, keyed by node_id --- (the join key back to nodes.id). Lifting the value out of the gob Meta +-- (the join key back to nodes.id). Lifting the value out of the JSON Meta -- blob keeps it queryable (and out of the every-node-load decode path) so -- the resolver can dereference a const-identifier dispatch name to its -- value across files. file_path scopes per-file eviction on reindex; @@ -182,10 +182,11 @@ CREATE TABLE IF NOT EXISTS vectors ( ) WITHOUT ROWID; -- churn_enrichment is the per-node git-churn sidecar (change A: move --- enrichment OUT of nodes.meta so the node hot path stops gob-encoding --- rarely-read data and get_churn_rate does an indexed read instead of an --- AllNodes+gob scan). One typed row per enriched file/function/method --- node, keyed by node_id (join key back to nodes.id); repo_prefix scopes +-- enrichment OUT of nodes.meta so the node hot path stops encoding +-- rarely-read data into the blob and get_churn_rate does an indexed read +-- instead of an AllNodes+meta-decode scan). One typed row per enriched +-- file/function/method node, keyed by node_id (join key back to +-- nodes.id); repo_prefix scopes -- per-repo reseeds/wipes. head_sha/branch/computed_at are file-level only -- (empty for symbols). WITHOUT ROWID: the PK index IS the table. CREATE TABLE IF NOT EXISTS churn_enrichment ( diff --git a/internal/graph/store_sqlite/store.go b/internal/graph/store_sqlite/store.go index 3b47df38..50fcccba 100644 --- a/internal/graph/store_sqlite/store.go +++ b/internal/graph/store_sqlite/store.go @@ -11,8 +11,9 @@ // fans out 8 concurrent writers; reads still run concurrently under // WAL mode. // -// Meta maps are encoded with gob; an empty / nil Meta is stored as -// NULL so the common case adds no row weight beyond the column header. +// Meta maps are encoded as JSON (see meta_json.go); an empty / nil Meta +// is stored as NULL so the common case adds no row weight beyond the +// column header. // // EdgeIdentityRevisions is tracked in memory (atomic counter) -- it // mirrors the in-memory store's monotonic "provenance churn" signal diff --git a/internal/graph/store_sqlite/store_analysis.go b/internal/graph/store_sqlite/store_analysis.go index 38be53f7..d26c6d9d 100644 --- a/internal/graph/store_sqlite/store_analysis.go +++ b/internal/graph/store_sqlite/store_analysis.go @@ -7,7 +7,7 @@ package store_sqlite // // Shape: push the structural filter into one indexed SELECT via the raw- // SQL helpers (queryNodesSQL / s.db.Query), then do any Meta-dependent -// (gob-decoded) or distinct-counting filtering in Go. No new prepared +// (JSON-decoded) or distinct-counting filtering in Go. No new prepared // statements are added — every query rides the secondary indexes already // created in schema.go (edges_by_from / edges_by_to / nodes_by_kind). @@ -97,7 +97,7 @@ ORDER BY n.id` // IfaceImplementsRows returns one row per EdgeImplements edge whose // target is a KindInterface carrying Meta["methods"]. The interface's // decoded Meta rides on the row (callers pull the "methods" field, which -// gob round-trips as []string or []any). Interfaces with no Meta or no +// round-trips as []string or []any). Interfaces with no Meta or no // "methods" key are elided server-side. func (s *Store) IfaceImplementsRows() []graph.IfaceImplementsRow { q := `SELECT e.from_id, n.id, n.meta @@ -457,7 +457,7 @@ func (s *Store) ThrowerErrorSurface(pathPrefix string) []graph.ThrowerErrorRow { // Pass 2: attach the literal error messages each thrower emits. Join // each thrower's EdgeEmits out-edges to KindString targets and filter // Meta["context"] == "error_msg" Go-side (the context lives in the - // gob-encoded Meta blob). + // JSON Meta blob). for _, id := range order { acc := accums[id] mq := `SELECT n.name, n.meta diff --git a/internal/graph/store_sqlite/store_churn_enrichment.go b/internal/graph/store_sqlite/store_churn_enrichment.go index 72ccd0ea..e1d364ae 100644 --- a/internal/graph/store_sqlite/store_churn_enrichment.go +++ b/internal/graph/store_sqlite/store_churn_enrichment.go @@ -9,8 +9,8 @@ import ( // Compile-time assertions that the SQLite Store satisfies the optional // git-churn enrichment sidecar capabilities (change A: enrichment moved // out of nodes.meta into a typed table so the node hot path stops -// gob-encoding rarely-read data and get_churn_rate reads via an index -// instead of an AllNodes scan). +// encoding rarely-read data into the meta blob and get_churn_rate reads +// via an index instead of an AllNodes scan). var ( _ graph.ChurnEnrichmentWriter = (*Store)(nil) _ graph.ChurnEnrichmentReader = (*Store)(nil) @@ -123,7 +123,7 @@ func (s *Store) DeleteChurn(nodeIDs []string) error { // ChurnRows returns every churn row for repoPrefix; an EMPTY repoPrefix // returns ALL rows across repos. This is an index-only read over the // (small) enriched set — the whole point of the sidecar, replacing the -// AllNodes()+gob-decode scan get_churn_rate used to do. +// AllNodes()+meta-decode scan get_churn_rate used to do. func (s *Store) ChurnRows(repoPrefix string) []graph.ChurnEnrichment { var ( rows *sql.Rows diff --git a/internal/graph/store_sqlite/store_constvalues.go b/internal/graph/store_sqlite/store_constvalues.go index c1b33d20..23aefe47 100644 --- a/internal/graph/store_sqlite/store_constvalues.go +++ b/internal/graph/store_sqlite/store_constvalues.go @@ -6,7 +6,7 @@ import ( // Compile-time assertions that the SQLite Store satisfies the optional // constant-value persistence capability. A KindConstant node's literal -// value lives in this queryable sidecar (not the gob-encoded Meta blob) +// value lives in this queryable sidecar (not the JSON Meta blob) // so the resolver can dereference a const-identifier dispatch name across // files without an unindexable per-node blob decode. var (