diff --git a/MCP_SERVER.md b/MCP_SERVER.md index cd56b9c..893f04f 100644 --- a/MCP_SERVER.md +++ b/MCP_SERVER.md @@ -50,7 +50,7 @@ docker run -v ~/.agentmemory:/data -e BRAIN_DB=/data/brain.db brainctl The `CMD` defaults to `brainctl-mcp`, so the container runs the MCP server over stdio. -## Available Tools (201) +## Available Tools (209) | Tool | Description | |------|-------------| diff --git a/db/migrations/052_procedural_memory_layer.sql b/db/migrations/052_procedural_memory_layer.sql new file mode 100644 index 0000000..d96ef29 --- /dev/null +++ b/db/migrations/052_procedural_memory_layer.sql @@ -0,0 +1,510 @@ +PRAGMA foreign_keys = OFF; +BEGIN; + +DROP TRIGGER IF EXISTS memories_fts_insert; +DROP TRIGGER IF EXISTS memories_fts_update_delete; +DROP TRIGGER IF EXISTS memories_fts_update_insert; +DROP TRIGGER IF EXISTS memories_fts_delete; +DROP TRIGGER IF EXISTS memories_temporal_class_check; +DROP TRIGGER IF EXISTS memories_temporal_class_update_check; +DROP TRIGGER IF EXISTS memories_validate_ts_insert; +DROP TRIGGER IF EXISTS memories_validate_ts_update; +DROP TRIGGER IF EXISTS meb_after_memory_insert; +DROP TRIGGER IF EXISTS meb_after_memory_update; +DROP TRIGGER IF EXISTS trg_memory_ignition_insert; +DROP TRIGGER IF EXISTS trg_gw_broadcast_meb; +DROP TRIGGER IF EXISTS trg_gw_broadcast_workspace; +DROP TRIGGER IF EXISTS memories_visibility_check_insert; +DROP TRIGGER IF EXISTS memories_visibility_check_update; +DROP TRIGGER IF EXISTS trg_memory_delete_cascade_edges; +DROP TRIGGER IF EXISTS trg_agent_delete_nullify_validation; +DROP VIEW IF EXISTS decoherent_memories; +DROP TABLE IF EXISTS memories_fts; + +CREATE TEMP TABLE memories_backup AS +SELECT + id, agent_id, category, scope, content, confidence, source_event_id, + supersedes_id, tags, expires_at, recalled_count, last_recalled_at, + created_at, updated_at, retired_at, epoch_id, temporal_class, + validation_agent_id, validated_at, trust_score, derived_from_ids, + retracted_at, retraction_reason, version, memory_type, protected, + salience_score, gw_broadcast, visibility, read_acl, ewc_importance, + alpha, beta, confidence_phase, hilbert_projection, coherence_syndrome, + decoherence_rate, gated_from_memory_id, file_path, file_line, write_tier, + indexed, promoted_at, replay_priority, ripple_tags, labile_until, + labile_agent_id, retrieval_prediction_error, encoding_affect_id, + tag_cycles_remaining, stability, encoding_task_context, + encoding_context_hash, temporal_level, next_review_at, q_value +FROM memories; + +DROP TABLE memories; + +CREATE TABLE memories ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agent_id TEXT NOT NULL REFERENCES agents(id), -- who wrote this + category TEXT NOT NULL, -- 'identity', 'user', 'environment', 'convention', + -- 'project', 'decision', 'lesson', 'preference' + scope TEXT NOT NULL DEFAULT 'global', -- 'global', 'project:', 'agent:' + content TEXT NOT NULL, -- the actual memory + confidence REAL NOT NULL DEFAULT 1.0, -- 0.0-1.0, decays or gets boosted + source_event_id INTEGER, -- event that spawned this memory + supersedes_id INTEGER REFERENCES memories(id), -- if this replaces an older memory + tags TEXT, -- JSON array of tags + expires_at TEXT, -- optional TTL + recalled_count INTEGER NOT NULL DEFAULT 0, -- how often this memory was retrieved + last_recalled_at TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')), + retired_at TEXT, -- soft delete + epoch_id INTEGER REFERENCES epochs(id), + temporal_class TEXT NOT NULL DEFAULT 'medium', + validation_agent_id TEXT REFERENCES agents(id), + validated_at TEXT, + trust_score REAL DEFAULT 1.0, + derived_from_ids TEXT, + retracted_at TEXT, + retraction_reason TEXT, + version INTEGER NOT NULL DEFAULT 1, + memory_type TEXT NOT NULL DEFAULT 'episodic' CHECK(memory_type IN ('episodic','semantic','procedural')), + protected INTEGER NOT NULL DEFAULT 0, + salience_score REAL NOT NULL DEFAULT 0.0, + gw_broadcast INTEGER NOT NULL DEFAULT 0, + visibility TEXT NOT NULL DEFAULT 'public', + read_acl TEXT, + ewc_importance REAL NOT NULL DEFAULT 0.0, + alpha REAL DEFAULT 1.0, + beta REAL DEFAULT 1.0, + confidence_alpha REAL GENERATED ALWAYS AS (alpha) VIRTUAL, + confidence_beta REAL GENERATED ALWAYS AS (beta) VIRTUAL, + confidence_phase REAL NOT NULL DEFAULT 0.0, + hilbert_projection BLOB DEFAULT NULL, + coherence_syndrome TEXT DEFAULT NULL, + decoherence_rate REAL DEFAULT NULL, + gated_from_memory_id INTEGER REFERENCES memories(id), + file_path TEXT, + file_line INTEGER, + write_tier TEXT NOT NULL DEFAULT 'full' CHECK(write_tier IN ('skip', 'construct', 'full')), + indexed INTEGER NOT NULL DEFAULT 1, + promoted_at TEXT DEFAULT NULL, + replay_priority REAL NOT NULL DEFAULT 0.0, + ripple_tags INTEGER NOT NULL DEFAULT 0, + labile_until TEXT DEFAULT NULL, + labile_agent_id TEXT DEFAULT NULL, + retrieval_prediction_error REAL DEFAULT NULL, + encoding_affect_id INTEGER REFERENCES affect_log(id) DEFAULT NULL, + tag_cycles_remaining INTEGER DEFAULT 0, + stability REAL DEFAULT 1.0, + encoding_task_context TEXT DEFAULT NULL, + encoding_context_hash TEXT DEFAULT NULL, + temporal_level TEXT NOT NULL DEFAULT 'moment' + CHECK(temporal_level IN ('moment','session','day','week','month','quarter')), + next_review_at TEXT DEFAULT NULL, + q_value REAL DEFAULT 0.5 +); + +INSERT INTO memories ( + id, agent_id, category, scope, content, confidence, source_event_id, + supersedes_id, tags, expires_at, recalled_count, last_recalled_at, + created_at, updated_at, retired_at, epoch_id, temporal_class, + validation_agent_id, validated_at, trust_score, derived_from_ids, + retracted_at, retraction_reason, version, memory_type, protected, + salience_score, gw_broadcast, visibility, read_acl, ewc_importance, + alpha, beta, confidence_phase, hilbert_projection, coherence_syndrome, + decoherence_rate, gated_from_memory_id, file_path, file_line, write_tier, + indexed, promoted_at, replay_priority, ripple_tags, labile_until, + labile_agent_id, retrieval_prediction_error, encoding_affect_id, + tag_cycles_remaining, stability, encoding_task_context, + encoding_context_hash, temporal_level, next_review_at, q_value +) +SELECT + id, agent_id, category, scope, content, confidence, source_event_id, + supersedes_id, tags, expires_at, recalled_count, last_recalled_at, + created_at, updated_at, retired_at, epoch_id, temporal_class, + validation_agent_id, validated_at, trust_score, derived_from_ids, + retracted_at, retraction_reason, version, memory_type, protected, + salience_score, gw_broadcast, visibility, read_acl, ewc_importance, + alpha, beta, confidence_phase, hilbert_projection, coherence_syndrome, + decoherence_rate, gated_from_memory_id, file_path, file_line, write_tier, + indexed, promoted_at, replay_priority, ripple_tags, labile_until, + labile_agent_id, retrieval_prediction_error, encoding_affect_id, + tag_cycles_remaining, stability, encoding_task_context, + encoding_context_hash, temporal_level, next_review_at, q_value +FROM memories_backup; + +DROP TABLE memories_backup; + +CREATE INDEX idx_memories_agent ON memories(agent_id); +CREATE INDEX idx_memories_category ON memories(category); +CREATE INDEX idx_memories_scope ON memories(scope); +CREATE INDEX idx_memories_active ON memories(retired_at) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_confidence ON memories(confidence DESC); +CREATE INDEX idx_memories_agent_active_cat ON memories(agent_id, category) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_agent_time ON memories(agent_id, created_at DESC) WHERE retired_at IS NULL; +CREATE INDEX IF NOT EXISTS idx_memories_encoding_affect + ON memories(encoding_affect_id) WHERE encoding_affect_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_memories_context_hash + ON memories(encoding_context_hash) WHERE encoding_context_hash IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_memories_next_review + ON memories(next_review_at) WHERE next_review_at IS NOT NULL AND retired_at IS NULL; +CREATE INDEX idx_memories_epoch ON memories(epoch_id); +CREATE INDEX idx_memories_temporal_class ON memories(temporal_class); +CREATE INDEX idx_memories_trust_score ON memories(trust_score); +CREATE INDEX idx_memories_retracted ON memories(retracted_at) WHERE retracted_at IS NOT NULL; +CREATE INDEX idx_memories_validation ON memories(validation_agent_id); +CREATE INDEX idx_memories_id_version ON memories(id, version) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_type ON memories(memory_type); +CREATE INDEX idx_memories_protected ON memories(protected) WHERE protected = 1; +CREATE INDEX idx_memories_gw_broadcast ON memories(gw_broadcast) WHERE gw_broadcast = 1; +CREATE INDEX idx_memories_salience ON memories(salience_score DESC) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_visibility ON memories(visibility); +CREATE INDEX idx_memories_ewc_importance ON memories(ewc_importance DESC) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_alpha ON memories(alpha) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_beta ON memories(beta) WHERE retired_at IS NULL; +CREATE INDEX idx_memories_confidence_phase ON memories(agent_id, confidence_phase) WHERE confidence_phase != 0.0; +CREATE INDEX idx_memories_decoherence_rate ON memories(decoherence_rate DESC) WHERE decoherence_rate IS NOT NULL; +CREATE INDEX idx_memories_coherence_syndrome ON memories(agent_id) WHERE coherence_syndrome IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_memories_replay ON memories(replay_priority DESC) WHERE retired_at IS NULL; +CREATE INDEX IF NOT EXISTS idx_memories_labile ON memories(labile_until) WHERE labile_until IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_memories_temporal_level ON memories(temporal_level, agent_id); + +CREATE VIEW decoherent_memories AS + SELECT id, content, confidence, coherence_syndrome, decoherence_rate, + temporal_class, created_at, updated_at + FROM memories + WHERE coherence_syndrome IS NOT NULL OR decoherence_rate IS NOT NULL + ORDER BY decoherence_rate DESC; + +CREATE VIRTUAL TABLE memories_fts USING fts5( + content, + category, + tags, + content=memories, + content_rowid=id, + tokenize='porter unicode61' +); + +CREATE TRIGGER memories_fts_insert AFTER INSERT ON memories WHEN new.indexed = 1 BEGIN + INSERT INTO memories_fts(rowid, content, category, tags) VALUES (new.id, new.content, new.category, new.tags); +END; + +CREATE TRIGGER memories_fts_update_delete AFTER UPDATE ON memories WHEN old.indexed = 1 BEGIN + INSERT INTO memories_fts(memories_fts, rowid, content, category, tags) + VALUES ('delete', old.id, old.content, old.category, old.tags); +END; + +CREATE TRIGGER memories_fts_update_insert AFTER UPDATE ON memories WHEN new.indexed = 1 AND new.retired_at IS NULL BEGIN + INSERT INTO memories_fts(rowid, content, category, tags) + VALUES (new.id, new.content, new.category, new.tags); +END; + +CREATE TRIGGER memories_fts_delete AFTER DELETE ON memories BEGIN + INSERT INTO memories_fts(memories_fts, rowid, content, category, tags) VALUES('delete', old.id, old.content, old.category, old.tags); +END; + +CREATE TRIGGER memories_temporal_class_check +BEFORE INSERT ON memories +WHEN NEW.temporal_class NOT IN ('permanent', 'long', 'medium', 'short', 'ephemeral') +BEGIN + SELECT RAISE(ABORT, 'temporal_class must be one of: permanent, long, medium, short, ephemeral'); +END; + +CREATE TRIGGER memories_temporal_class_update_check +BEFORE UPDATE OF temporal_class ON memories +WHEN NEW.temporal_class NOT IN ('permanent', 'long', 'medium', 'short', 'ephemeral') +BEGIN + SELECT RAISE(ABORT, 'temporal_class must be one of: permanent, long, medium, short, ephemeral'); +END; + +CREATE TRIGGER memories_validate_ts_insert +BEFORE INSERT ON memories +WHEN NEW.created_at NOT LIKE '____-__-__T%' +BEGIN + SELECT RAISE(ABORT, 'memories.created_at must be ISO 8601 (YYYY-MM-DDTHH:MM:SS)'); +END; + +CREATE TRIGGER memories_validate_ts_update +BEFORE UPDATE OF created_at ON memories +WHEN NEW.created_at NOT LIKE '____-__-__T%' +BEGIN + SELECT RAISE(ABORT, 'memories.created_at must be ISO 8601 (YYYY-MM-DDTHH:MM:SS)'); +END; + +CREATE TRIGGER IF NOT EXISTS trg_agent_delete_nullify_validation +AFTER DELETE ON agents +BEGIN + UPDATE memories + SET validation_agent_id = NULL + WHERE validation_agent_id = OLD.id; +END; + +CREATE TRIGGER meb_after_memory_insert +AFTER INSERT ON memories +BEGIN + INSERT INTO memory_events (memory_id, agent_id, operation, category, scope, memory_type, created_at) + VALUES ( + new.id, + new.agent_id, + 'insert', + new.category, + new.scope, + COALESCE(new.memory_type, 'episodic'), + strftime('%Y-%m-%dT%H:%M:%S', 'now') + ); +END; + +CREATE TRIGGER meb_after_memory_update +AFTER UPDATE OF content, category, scope, confidence, trust_score, memory_type ON memories +WHEN new.retired_at IS NULL +BEGIN + INSERT INTO memory_events (memory_id, agent_id, operation, category, scope, memory_type, created_at) + VALUES ( + new.id, + new.agent_id, + 'update', + new.category, + new.scope, + COALESCE(new.memory_type, 'episodic'), + strftime('%Y-%m-%dT%H:%M:%S', 'now') + ); +END; + +CREATE TRIGGER trg_memory_ignition_insert +AFTER INSERT ON memories +WHEN NEW.retired_at IS NULL +BEGIN + -- Compute salience: priority signal (via category) + confidence + recency boost + -- Categories map to implicit priority: decision/identity/convention = high + -- We approximate salience from confidence since we don't have event priority here. + -- Full salience scoring is done in Python; trigger handles high-confidence fast path. + INSERT INTO workspace_broadcasts (memory_id, agent_id, salience, summary, target_scope, triggered_by) + SELECT + NEW.id, + NEW.agent_id, + NEW.confidence, + substr(NEW.content, 1, 200), + COALESCE(NEW.scope, 'global'), + 'auto' + WHERE NEW.confidence >= COALESCE( + -- Use urgent threshold if neuromod org_state = 'incident', else normal + CASE + WHEN EXISTS ( + SELECT 1 FROM neuromodulation_state WHERE id = 1 AND org_state = 'incident' + ) THEN (SELECT CAST(value AS REAL) FROM workspace_config WHERE key = 'urgent_threshold') + ELSE (SELECT CAST(value AS REAL) FROM workspace_config WHERE key = 'ignition_threshold') + END, + 0.85 + ) + AND (SELECT value FROM workspace_config WHERE key = 'enabled') = '1' + -- Governor: don't fire if we've already broadcast governor_max_per_hour in last hour + AND ( + SELECT COUNT(*) FROM workspace_broadcasts + WHERE broadcast_at >= strftime('%Y-%m-%dT%H:%M:%S', datetime('now', '-1 hour')) + ) < CAST((SELECT value FROM workspace_config WHERE key = 'governor_max_per_hour') AS INTEGER); +END; + +CREATE TRIGGER trg_gw_broadcast_meb +AFTER UPDATE OF gw_broadcast ON memories +WHEN NEW.gw_broadcast = 1 AND OLD.gw_broadcast = 0 AND NEW.retired_at IS NULL +BEGIN + INSERT INTO memory_events (memory_id, agent_id, operation, category, scope, memory_type, created_at) + VALUES ( + NEW.id, + NEW.agent_id, + 'broadcast', + NEW.category, + COALESCE(NEW.scope, 'global'), + COALESCE(NEW.memory_type, 'episodic'), + strftime('%Y-%m-%dT%H:%M:%S', 'now') + ); +END; + +CREATE TRIGGER trg_gw_broadcast_workspace +AFTER UPDATE OF gw_broadcast ON memories +WHEN NEW.gw_broadcast = 1 AND OLD.gw_broadcast = 0 AND NEW.retired_at IS NULL +BEGIN + INSERT OR IGNORE INTO workspace_broadcasts (memory_id, agent_id, salience, summary, target_scope, triggered_by) + SELECT + NEW.id, + NEW.agent_id, + NEW.salience_score, + substr(NEW.content, 1, 200), + COALESCE(NEW.scope, 'global'), + 'gw_score' + WHERE NOT EXISTS ( + SELECT 1 FROM workspace_broadcasts wb WHERE wb.memory_id = NEW.id + AND wb.broadcast_at >= strftime('%Y-%m-%dT%H:%M:%S', datetime('now', '-48 hours')) + ); +END; + +CREATE TRIGGER memories_visibility_check_insert +BEFORE INSERT ON memories +WHEN NEW.visibility NOT IN ('public', 'project', 'agent', 'restricted') +BEGIN + SELECT RAISE(ABORT, 'memories.visibility must be one of: public, project, agent, restricted'); +END; + +CREATE TRIGGER memories_visibility_check_update +BEFORE UPDATE OF visibility ON memories +WHEN NEW.visibility NOT IN ('public', 'project', 'agent', 'restricted') +BEGIN + SELECT RAISE(ABORT, 'memories.visibility must be one of: public, project, agent, restricted'); +END; + +CREATE TRIGGER IF NOT EXISTS trg_memory_delete_cascade_edges +AFTER DELETE ON memories +BEGIN + DELETE FROM knowledge_edges + WHERE (source_table = 'memories' AND source_id = OLD.id) + OR (target_table = 'memories' AND target_id = OLD.id); +END; + +INSERT INTO memories_fts(memories_fts) VALUES ('rebuild'); + +CREATE TABLE IF NOT EXISTS procedures ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + memory_id INTEGER NOT NULL UNIQUE REFERENCES memories(id) ON DELETE CASCADE, + procedure_key TEXT UNIQUE, + title TEXT, + goal TEXT NOT NULL, + description TEXT, + task_family TEXT, + procedure_kind TEXT NOT NULL DEFAULT 'workflow', + trigger_conditions TEXT, + preconditions TEXT, + constraints_json TEXT, + steps_json TEXT NOT NULL, + tools_json TEXT, + failure_modes_json TEXT, + rollback_steps_json TEXT, + success_criteria_json TEXT, + repair_strategies_json TEXT, + tool_policy_json TEXT, + expected_outcomes TEXT, + applicability_scope TEXT NOT NULL DEFAULT 'global', + temporal_class TEXT DEFAULT 'durable', + status TEXT NOT NULL DEFAULT 'active' + CHECK(status IN ('active','candidate','stale','needs_review','superseded','retired')), + automation_ready INTEGER NOT NULL DEFAULT 0, + determinism REAL NOT NULL DEFAULT 0.5, + confidence REAL NOT NULL DEFAULT 0.5, + utility_score REAL NOT NULL DEFAULT 0.5, + generality_score REAL NOT NULL DEFAULT 0.5, + support_count INTEGER NOT NULL DEFAULT 0, + execution_count INTEGER NOT NULL DEFAULT 0, + success_count INTEGER NOT NULL DEFAULT 0, + failure_count INTEGER NOT NULL DEFAULT 0, + last_used_at TEXT, + last_executed_at TEXT, + last_validated_at TEXT, + stale_after_days INTEGER NOT NULL DEFAULT 90, + supersedes_procedure_id INTEGER REFERENCES procedures(id), + retired_at TEXT, + search_text TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_procedures_kind ON procedures(procedure_kind); +CREATE INDEX IF NOT EXISTS idx_procedures_status ON procedures(status); +CREATE INDEX IF NOT EXISTS idx_procedures_last_validated ON procedures(last_validated_at); +CREATE INDEX IF NOT EXISTS idx_procedures_execution_count ON procedures(execution_count DESC); +CREATE INDEX IF NOT EXISTS idx_procedures_scope ON procedures(applicability_scope); +CREATE INDEX IF NOT EXISTS idx_procedures_memory_id ON procedures(memory_id); +CREATE INDEX IF NOT EXISTS idx_procedures_supersedes ON procedures(supersedes_procedure_id); + +CREATE TABLE IF NOT EXISTS procedure_steps ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + step_order INTEGER NOT NULL, + action TEXT NOT NULL, + rationale TEXT, + tool_name TEXT, + expected_output TEXT, + stop_condition TEXT, + retry_policy TEXT, + rollback_hint TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_procedure_steps_procedure_order +ON procedure_steps(procedure_id, step_order); + +CREATE TABLE IF NOT EXISTS procedure_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + memory_id INTEGER REFERENCES memories(id) ON DELETE CASCADE, + event_id INTEGER REFERENCES events(id) ON DELETE CASCADE, + decision_id INTEGER REFERENCES decisions(id) ON DELETE CASCADE, + entity_id INTEGER REFERENCES entities(id) ON DELETE CASCADE, + source_role TEXT NOT NULL DEFAULT 'evidence', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_procedure_sources_procedure ON procedure_sources(procedure_id); +CREATE INDEX IF NOT EXISTS idx_procedure_sources_memory ON procedure_sources(memory_id); +CREATE INDEX IF NOT EXISTS idx_procedure_sources_event ON procedure_sources(event_id); +CREATE INDEX IF NOT EXISTS idx_procedure_sources_decision ON procedure_sources(decision_id); + +CREATE TABLE IF NOT EXISTS procedure_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + agent_id TEXT REFERENCES agents(id), + task_family TEXT, + task_signature TEXT, + input_summary TEXT, + outcome_summary TEXT, + success INTEGER NOT NULL DEFAULT 0, + usefulness_score REAL, + errors_seen TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_procedure_runs_procedure_created +ON procedure_runs(procedure_id, created_at DESC); + +CREATE TABLE IF NOT EXISTS procedure_candidates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + candidate_signature TEXT NOT NULL UNIQUE, + task_family TEXT, + normalized_signature TEXT NOT NULL, + support_count INTEGER NOT NULL DEFAULT 0, + evidence_json TEXT, + mean_success REAL NOT NULL DEFAULT 0.0, + promoted_procedure_id INTEGER REFERENCES procedures(id), + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_procedure_candidates_family ON procedure_candidates(task_family); +CREATE INDEX IF NOT EXISTS idx_procedure_candidates_support ON procedure_candidates(support_count DESC); + +CREATE VIRTUAL TABLE IF NOT EXISTS procedures_fts USING fts5( + title, + goal, + description, + task_family, + search_text, + content=procedures, + content_rowid=id, + tokenize='porter unicode61' +); + +CREATE TRIGGER IF NOT EXISTS procedures_fts_insert AFTER INSERT ON procedures BEGIN + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); +END; + +CREATE TRIGGER IF NOT EXISTS procedures_fts_update AFTER UPDATE ON procedures BEGIN + INSERT INTO procedures_fts(procedures_fts, rowid, title, goal, description, task_family, search_text) + VALUES ('delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text); + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); +END; + +CREATE TRIGGER IF NOT EXISTS procedures_fts_delete AFTER DELETE ON procedures BEGIN + INSERT INTO procedures_fts(procedures_fts, rowid, title, goal, description, task_family, search_text) + VALUES ('delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text); +END; + +COMMIT; +PRAGMA foreign_keys = ON; diff --git a/docs/PROCEDURAL_MEMORY_MIGRATION.md b/docs/PROCEDURAL_MEMORY_MIGRATION.md new file mode 100644 index 0000000..24a74f6 --- /dev/null +++ b/docs/PROCEDURAL_MEMORY_MIGRATION.md @@ -0,0 +1,88 @@ +# Procedural Memory Migration Notes + +This note documents the safety boundary for +`db/migrations/052_procedural_memory_layer.sql`. + +## What Changes + +Migration 052 adds procedural memory as a first-class layer: + +- widens `memories.memory_type` from `episodic|semantic` to + `episodic|semantic|procedural`; +- adds canonical procedure tables: + `procedures`, `procedure_steps`, `procedure_sources`, `procedure_runs`, and + `procedure_candidates`; +- adds `procedures_fts` plus triggers so procedural records are searchable + with plain SQLite FTS5; +- keeps a one-to-one bridge row in `memories` through + `procedures.memory_id` so older generic memory search surfaces still have a + human-readable synopsis. + +## Transaction Safety + +Migration 052 contains its own explicit all-or-nothing transaction boundary: +it starts with `PRAGMA foreign_keys = OFF; BEGIN;` and does not re-enable +foreign keys until after `COMMIT;`. The migration runner records the +`schema_versions` row only after the SQL script completes successfully. If any +statement inside the script fails before `COMMIT`, SQLite rolls back the +in-flight schema rebuild and the migration is not marked applied. + +The `memories` table is rebuilt to widen the CHECK constraint because SQLite +cannot alter CHECK constraints in place. The rebuild copies existing rows +forward into a temporary backup table, recreates `memories` with the expanded +`episodic|semantic|procedural` constraint, restores the original IDs and column +values, then recreates the FTS/index/trigger contracts expected by fresh +install schemas. + +The procedural companion tables are additive. They do not delete or compress +episodic evidence, semantic facts, events, decisions, entities, or graph edges. + +## Backwards Compatibility + +Newer brainctl versions can read older databases and apply migration 052. + +Older brainctl versions are expected to keep reading migrated databases for +ordinary episodic and semantic rows because the existing `memories` columns are +preserved and the widened CHECK constraint still accepts their existing +`episodic` and `semantic` writes. Older versions will not understand canonical +procedure tables or `memory_type='procedural'` rows. Older code paths that +validate `memory_type` in Python may reject or ignore procedural rows, and +older query surfaces will only see the bridge synopsis row in `memories` rather +than the structured `procedures` payload. + +Forward compatibility is therefore read-mostly for older clients: legacy +episodic/semantic reads and writes should continue, but procedure creation, +procedure feedback, and procedure-aware search require the version that ships +migration 052. Operators that need strict mixed-version compatibility should +roll all active writers forward before enabling procedural writes. + +## Failure and Rollback + +If migration application fails before commit, SQLite rolls the transaction back +and the original schema remains in place. + +If an operator needs to roll back after a successful migration, use the normal +local-first backup path: + +1. stop writers using the target `brain.db`; +2. restore the pre-migration `brain.db` backup if one was taken; +3. otherwise run a forward-only corrective migration rather than editing + migration 052 in place. + +Migration files remain append-only. Do not modify 052 after release; add a new +numbered migration for corrections. + +## Versioning Notes + +This schema should ship with a version bump because it introduces a new +user-visible memory type and new public procedure APIs. The compatibility +matrix should state that procedural-memory writes require a version at or above +the release containing migration 052, while older clients may still read +non-procedural rows from the migrated database. + +## Fresh Install Parity + +`db/init_schema.sql` and `src/agentmemory/db/init_schema.sql` must include the +same procedural schema as migration 052 so fresh installs and upgraded +databases converge. Keep `tests/test_schema_parity.py` and +`tests/test_migrate.py` passing when changing either schema path. diff --git a/scripts/check_docs.py b/scripts/check_docs.py index 9beb1ce..1ea29e9 100755 --- a/scripts/check_docs.py +++ b/scripts/check_docs.py @@ -89,7 +89,7 @@ def main(): print(f" ✗ {e}", file=sys.stderr) sys.exit(1) - print("\nAll doc counts match implementation. ✓") + print("\nAll doc counts match implementation. OK") sys.exit(0) diff --git a/src/agentmemory/_impl.py b/src/agentmemory/_impl.py index 83ccb55..5b6ff5c 100644 --- a/src/agentmemory/_impl.py +++ b/src/agentmemory/_impl.py @@ -62,26 +62,26 @@ def _builtin_classify_intent(query): if any(w in q for w in ['who ', 'person', 'agent', 'team', 'assigned']): return _BuiltinIntentResult('entity_lookup', 0.8, 'keyword:entity', 'Show entity details with relations', - ['memories', 'events', 'context']) + ['memories', 'procedures', 'events', 'context']) if any(w in q for w in ['what happened', 'when did', 'history', 'timeline', 'log']): return _BuiltinIntentResult('event_lookup', 0.8, 'keyword:event', 'Show events in chronological order', - ['events', 'memories', 'context']) - if any(w in q for w in ['how to', 'how do', 'procedure', 'steps', 'guide']): + ['events', 'memories', 'context', 'procedures']) + if any(w in q for w in ['how to', 'how do', 'procedure', 'steps', 'guide', 'rollback', 'runbook', 'playbook', 'troubleshoot']): return _BuiltinIntentResult('procedural', 0.7, 'keyword:procedural', 'Show step-by-step instructions', - ['memories', 'context', 'events']) + ['procedures', 'memories', 'decisions', 'events', 'context']) if any(w in q for w in ['why ', 'decision', 'rationale', 'reason']): return _BuiltinIntentResult('decision_lookup', 0.8, 'keyword:decision', 'Show decisions with rationale', - ['memories', 'events', 'context']) + ['decisions', 'memories', 'procedures', 'events', 'context']) if any(w in q for w in ['related', 'connected', 'depends', 'link']): return _BuiltinIntentResult('graph_traversal', 0.7, 'keyword:graph', 'Show connected nodes and edges', - ['memories', 'events', 'context']) + ['memories', 'events', 'context', 'procedures']) return _BuiltinIntentResult('general', 0.5, 'default', 'Standard search results', - ['memories', 'events', 'context']) + ['memories', 'procedures', 'events', 'context']) # Quantum amplitude scorer try: @@ -157,21 +157,18 @@ def _builtin_classify_intent(query): # via `_CE_WARMUP_SEEN[0] = 0`. _CE_WARMUP_SEEN = [0] -# FTS5 special characters that cause sqlite3.OperationalError when unescaped. -# Strip them before passing any user query to a MATCH clause. -# -# Includes `?` and `!` — natural-language queries from agents and humans -# contain these constantly ("What does X prefer?") and used to crash -# cmd_search with "fts5: syntax error near ?". Also includes common ASCII -# punctuation (`,;:`) that has no operator meaning in FTS5 but still breaks -# tokenisation when glued to a word. -_FTS5_SPECIAL = re.compile(r'[.&|*"\'`()\-@^?!,;:]') +# FTS5 MATCH is brittle around punctuation and symbolic tokens. Strip any +# non-word, non-space character, plus `_`, before building the MATCH +# expression. This covers common natural-language queries like "$5 coupon", +# "LGBTQ+", "7/22", "#PlankChallenge", "SIAC_GEE", and smart quotes. +_FTS5_SPECIAL = re.compile(r"[^\w\s]|_") def _sanitize_fts_query(query: str) -> str: """Remove FTS5 special characters to prevent syntax errors. - Strips: . & | * \" ' ` ( ) - @ ^ ? ! , ; : + Strips punctuation and symbolic tokens, plus `_`, before collapsing + whitespace. Then collapses extra whitespace. Returns an empty string if nothing remains so callers can skip the MATCH clause gracefully. """ @@ -186,7 +183,22 @@ def _sanitize_fts_query(query: str) -> str: "a", "an", "and", "are", "as", "at", "be", "by", "do", "does", "for", "from", "has", "have", "how", "i", "in", "is", "it", "its", "of", "on", "or", "that", "the", "to", "was", "we", "what", "when", "where", - "which", "who", "why", "will", "with", "you", + "which", "who", "why", "will", "with", "you", "use", "uses", "used", "using", +} + +_FTS_QUERY_EXPANSIONS = { + "choose": ("chose", "chosen"), + "chose": ("choose", "chosen"), + "chosen": ("choose", "chose"), + "store": ("stores", "stored"), + "stores": ("store", "stored"), + "stored": ("store", "stores"), + "prefer": ("prefers", "preferred"), + "prefers": ("prefer",), + "embedding": ("embeddings", "embed"), + "embeddings": ("embedding", "embed"), + "use": ("uses", "using", "used"), + "uses": ("use", "using"), } @@ -208,7 +220,17 @@ def _build_fts_match_expression(sanitized: str) -> str: meaningful = [t for t in tokens if t.lower() not in _FTS_STOPWORDS and len(t) > 1] if not meaningful: meaningful = tokens - return " OR ".join(meaningful) + expanded: list[str] = [] + seen: set[str] = set() + for token in meaningful: + variants = (token, *_FTS_QUERY_EXPANSIONS.get(token.lower(), ())) + for variant in variants: + key = variant.lower() + if key in _FTS_STOPWORDS or key in seen: + continue + seen.add(key) + expanded.append(variant) + return " OR ".join(expanded or meaningful) # Temporal recency decay constants (lambda) — configurable per scope # half-life: global ~70d, project ~23d, agent ~14d @@ -3186,6 +3208,31 @@ def cmd_memory_add(args): memory_id = cursor.lastrowid db.commit() # ensure the INSERT (and FTS trigger) is committed before subprocess exit + procedure_id = None + if memory_type == "procedural": + try: + from agentmemory import procedural as _procedural + + proc = _procedural.ensure_procedure_for_memory( + db, + memory_id=memory_id, + agent_id=args.agent, + ) + procedure_id = proc.get("id") + db.commit() + except Exception as exc: + logger.debug("procedural bridge creation failed for memory %s: %s", memory_id, exc) + + indexed_row = db.execute( + "SELECT content, category, tags FROM memories WHERE id = ?", + (memory_id,), + ).fetchone() + indexed_content = indexed_row["content"] if indexed_row else args.content + indexed_category = indexed_row["category"] if indexed_row else args.category + indexed_tags = indexed_row["tags"] if indexed_row else (tags_json or "") + if indexed_content != args.content: + blob = None + # Workaround: FTS5 content-external tables may not build the inverted index # from trigger INSERTs on some SQLite versions. Force a re-index for this memory. if do_index: @@ -3193,11 +3240,11 @@ def cmd_memory_add(args): db.execute( "INSERT INTO memories_fts(memories_fts, rowid, content, category, tags) " "VALUES('delete', ?, ?, ?, ?)", - (memory_id, args.content, args.category, tags_json or '')) + (memory_id, indexed_content, indexed_category, indexed_tags or '')) db.execute( "INSERT INTO memories_fts(rowid, content, category, tags) " "VALUES (?, ?, ?, ?)", - (memory_id, args.content, args.category, tags_json or '')) + (memory_id, indexed_content, indexed_category, indexed_tags or '')) db.commit() except Exception: pass # non-fatal: FTS trigger may have already handled it @@ -3320,7 +3367,7 @@ def cmd_memory_add(args): if do_index: try: if not blob: - blob = _embed_query_safe(args.content) + blob = _embed_query_safe(indexed_content) if blob: db_vec = _try_get_db_with_vec() if db_vec: @@ -3349,6 +3396,8 @@ def cmd_memory_add(args): "conflict_logged": conflict_logged, "worthiness_score": worthiness_score, } + if procedure_id is not None: + out["procedure_id"] = procedure_id if auto_linked: out["auto_linked_entities"] = auto_linked if pii_info: @@ -6163,7 +6212,7 @@ def cmd_search(args, *, db=None, db_path: Optional[str] = None): "[brainctl] --benchmark: reranker chain disabled, returning raw FTS+vec ranking", file=sys.stderr, ) - results = {"memories": [], "events": [], "context": [], "decisions": []} + results = {"memories": [], "events": [], "context": [], "decisions": [], "procedures": []} # Accumulator for which signal-informativeness gates tripped this call. # Each value is a string reason like "uniform_timestamps_stdev_3.2s" or a # boolean True for benchmark-mode hard skips. Surfaced under the top-level @@ -6207,7 +6256,7 @@ def cmd_search(args, *, db=None, db_path: Optional[str] = None): os.environ.get("BRAINCTL_DISABLE_INTENT_ROUTER") ) if args.tables: - tables = args.tables.split(",") + tables = [t.strip() for t in args.tables.split(",") if t.strip()] elif _intent_router_disabled: tables = ["memories", "events", "context", "entities", "decisions"] elif _INTENT_AVAILABLE: @@ -6231,6 +6280,17 @@ def cmd_search(args, *, db=None, db_path: Optional[str] = None): and "decisions" not in tables ): tables = list(set(tables) | {"memories", "events", "context", "decisions"}) + _query_plan = None + _query_plan_dict = None + try: + from agentmemory.retrieval.query_planner import plan_query as _plan_query + + _query_plan = _plan_query(query, requested_tables=tables if args.tables else None) + _query_plan_dict = _query_plan.as_dict() + if not args.tables: + tables = list(dict.fromkeys((_query_plan.candidate_tables or []) + list(tables))) + except Exception as exc: + _debug_skips["query_plan.skipped"] = f"{type(exc).__name__}: {exc}" base_fetch = limit * 5 if not no_recency else limit * 3 fetch_limit = max(limit, round(base_fetch * _nm_breadth)) # Build an OR-expanded FTS5 MATCH expression so natural-language queries @@ -6298,6 +6358,7 @@ def _fts_memories(): "m.trust_score, m.replay_priority " "FROM memories m JOIN memories_fts f ON m.id = f.rowid " "WHERE memories_fts MATCH ? AND m.retired_at IS NULL " + "AND COALESCE(m.memory_type, 'episodic') != 'procedural' " "ORDER BY bm25(memories_fts, 3.0, 1.0, 1.0) LIMIT ?", (fts_query, fetch_limit) ).fetchall() @@ -6323,7 +6384,8 @@ def _vec_memories(): f"created_at, recalled_count, temporal_class, last_recalled_at, retrieval_prediction_error, alpha, beta, agent_id, " f"encoding_task_context, encoding_context_hash, q_value, confidence_phase, " f"trust_score, replay_priority " - f"FROM memories WHERE id IN ({ph}) AND retired_at IS NULL", + f"FROM memories WHERE id IN ({ph}) AND retired_at IS NULL " + f"AND COALESCE(memory_type, 'episodic') != 'procedural'", rowids ).fetchall() out = [dict(r) | {"distance": round(dist_map.get(r["id"], 1.0), 4)} for r in src_rows] @@ -6796,7 +6858,9 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke explore_rows = db.execute( "SELECT id, 'memory' as type, category, content, confidence, scope, " "created_at, recalled_count, temporal_class, last_recalled_at " - "FROM memories WHERE retired_at IS NULL ORDER BY recalled_count ASC, RANDOM() LIMIT ?", + "FROM memories WHERE retired_at IS NULL " + "AND COALESCE(memory_type, 'episodic') != 'procedural' " + "ORDER BY recalled_count ASC, RANDOM() LIMIT ?", (limit * 10,) ).fetchall() explore_list = rows_to_list(explore_rows) @@ -6966,6 +7030,73 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke trimmed.extend(graph) results["context"] = trimmed + _procedure_debug = None + _pre_answerability_candidates = [] + if "procedures" in tables: + try: + proc_scope = None + if getattr(args, "project", None): + proc_scope = f"project:{args.project}" + try: + from agentmemory.retrieval.candidate_generation import generate_procedure_candidates as _generate_procedure_candidates + from agentmemory.retrieval.evidence_graph import expand_procedure_evidence as _expand_procedure_evidence + from agentmemory.retrieval.late_reranker import rerank_procedure_candidates as _rerank_procedure_candidates + from agentmemory.retrieval.query_planner import plan_query as _plan_query + except ImportError: + from agentmemory import procedural as _procedural + + direct = _procedural.search_procedures( + db, + query, + limit=limit, + scope=proc_scope, + debug=True, + ) + rows = direct.get("procedures", []) or [] + for row in rows: + row.setdefault("source", "procedure_fts") + row.setdefault("type", "procedure") + results["procedures"] = rows[:limit] + generated = {"debug": {"fallback": "procedural_service", **(direct.get("debug") or {})}} + evidence = {} + else: + if _query_plan is None: + _query_plan = _plan_query(query, requested_tables=tables) + _query_plan_dict = _query_plan.as_dict() + generated = _generate_procedure_candidates( + db, + query, + _query_plan, + limit=fetch_limit, + scope=proc_scope, + ) + evidence = _expand_procedure_evidence( + db, + generated.get("candidates", []), + max_sources_per_candidate=4, + ) + reranked = _rerank_procedure_candidates( + generated.get("candidates", []), + evidence, + benchmark_mode=benchmark_mode, + ) + results["procedures"] = reranked[:limit] + _pre_answerability_candidates = list(results["procedures"]) + _procedure_debug = { + "candidate_generation": generated.get("debug") or {}, + "evidence_clusters": { + str(proc_id): { + "support_bonus": info.get("support_bonus"), + "source_count": len(info.get("sources") or []), + "edge_count": len(info.get("edges") or []), + } + for proc_id, info in evidence.items() + }, + } + except Exception as exc: + results["procedures"] = [] + _debug_skips["procedures.skipped"] = f"{type(exc).__name__}: {exc}" + # Intent-based result weighting and decision search. # # cmd_search accepts two intent taxonomies: @@ -7034,6 +7165,14 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke r["final_score"] = round(r.get("final_score", 0.0) * 2.0, 8) results["events"] = sorted(results.get("events", []), key=lambda r: r.get("final_score", 0), reverse=True) + elif _intent == "procedural": + for r in results.get("procedures", []): + r["final_score"] = round(r.get("final_score", 0.0) * 1.2, 8) + results["procedures"] = sorted( + results.get("procedures", []), + key=lambda r: r.get("final_score", 0.0), + reverse=True, + ) # decision_lookup → also search decisions table elif _intent == "decision_lookup": if fts_query: @@ -7068,6 +7207,65 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke extra = _graph_expand(db, top_items, tbl_key, already) results.get(tbl_key, []).extend(extra) + def _seed_bucket_score(item, position): + try: + final_score = float(item.get("final_score") or 0.0) + except (TypeError, ValueError): + final_score = 0.0 + if final_score > 0: + return final_score + try: + rrf_score = float(item.get("rrf_score") or 0.0) + except (TypeError, ValueError): + rrf_score = 0.0 + if rrf_score > 0: + return rrf_score + try: + fts_rank = float(item.get("fts_rank") or 0.0) + except (TypeError, ValueError): + fts_rank = 0.0 + if fts_rank != 0.0: + return max(-fts_rank, 0.0) + try: + confidence = float(item.get("confidence") or 0.0) + except (TypeError, ValueError): + confidence = 0.0 + if confidence > 0: + return confidence + return max(1.0 / (position + 1), 0.01) + + def _normalize_bucket_scores(bucket_name): + rows = results.get(bucket_name, []) or [] + if not rows: + return + seeds = [_seed_bucket_score(row, idx) for idx, row in enumerate(rows)] + max_seed = max(seeds) or 1.0 + for row, seed in zip(rows, seeds): + row["retrieval_score"] = round(seed, 8) + row["final_score"] = round(seed / max_seed, 8) + rows.sort(key=lambda r: r.get("final_score", 0.0), reverse=True) + results[bucket_name] = rows + + for _bucket_name in ("procedures", "memories", "events", "context", "entities", "decisions"): + _normalize_bucket_scores(_bucket_name) + + _intent_bucket_multipliers = { + "procedural": {"procedures": 1.15, "memories": 0.95, "events": 0.85, "decisions": 0.8, "context": 0.75}, + "troubleshooting": {"procedures": 1.05, "events": 1.0, "memories": 0.95, "decisions": 0.85, "context": 0.75}, + "decision": {"decisions": 1.15, "memories": 1.05, "procedures": 0.65, "events": 0.85, "context": 0.75}, + "temporal": {"events": 1.15, "memories": 0.9, "procedures": 0.55, "decisions": 0.8, "context": 0.75}, + "factual": {"memories": 1.1, "entities": 1.05, "decisions": 0.95, "procedures": 0.55, "events": 0.8, "context": 0.75}, + "orientation": {"memories": 1.0, "events": 0.95, "procedures": 0.75, "context": 0.8, "decisions": 0.8}, + "graph": {"memories": 1.0, "events": 0.95, "decisions": 0.95, "procedures": 0.8, "context": 0.8}, + } + _normalized_intent = (_query_plan.normalized_intent if _query_plan else "factual") + for _bucket_name, _multiplier in _intent_bucket_multipliers.get(_normalized_intent, {}).items(): + _rows = results.get(_bucket_name, []) or [] + for _row in _rows: + _row["final_score"] = round(float(_row.get("final_score") or 0.0) * _multiplier, 8) + _rows.sort(key=lambda r: r.get("final_score", 0.0), reverse=True) + results[_bucket_name] = _rows + if db_vec: db_vec.close() @@ -7082,7 +7280,7 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke # --budget: trim results from lowest-ranked first until output fits within token cap if budget_tokens is not None: # Estimate current size; trim tail entries until we fit - for key in ("memories", "events", "context", "decisions"): + for key in ("memories", "events", "context", "decisions", "procedures"): lst = results.get(key, []) if not lst: continue @@ -7090,6 +7288,31 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke lst.pop() # remove lowest-ranked (already sorted desc) results[key] = lst + _top_candidates = sorted( + [ + item + for bucket in ("procedures", "memories", "events", "context", "decisions") + for item in (results.get(bucket, []) or []) + ], + key=lambda item: item.get("final_score", 0.0), + reverse=True, + ) + _answerability = None + if _query_plan is not None: + try: + from agentmemory.retrieval.answerability import assess_answerability as _assess_answerability + + _answerability = _assess_answerability( + query, + _query_plan, + {k: results.get(k, []) for k in ("procedures", "memories", "events", "context", "decisions")}, + ) + if _answerability.get("abstain") and _query_plan.abstain_allowed: + for key in ("memories", "events", "context", "decisions", "procedures"): + results[key] = [] + except Exception as exc: + _debug_skips["answerability.skipped"] = f"{type(exc).__name__}: {exc}" + total = sum(len(v) for v in results.values()) tokens_out = _estimate_tokens(results) log_access(db, args.agent or "unknown", "search", query=query, result_count=total, tokens_consumed=tokens_out) @@ -7097,37 +7320,41 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke # Update recalled_count for direct (non-graph) memory hits only. # Uses retrieval-practice strengthening: hard retrievals (high prediction error) # boost confidence more than easy ones (Roediger & Karpicke 2006, Bjork 1994). - for r in results.get("memories", []): - if r.get("source") != "graph": - _retrieval_practice_boost( - db, - r["id"], - retrieval_prediction_error=r.get("retrieval_prediction_error") or 0.0, - ) + # + # Benchmark mode deliberately skips these online-learning writes so the + # retrieval corpus stays stable across repeated synthetic queries. + if not benchmark_mode: + for r in results.get("memories", []): + if r.get("source") != "graph": + _retrieval_practice_boost( + db, + r["id"], + retrieval_prediction_error=r.get("retrieval_prediction_error") or 0.0, + ) - # Online phase learning: nudge confidence_phase toward constructive (0) after recall - # Uses existing db connection to avoid lock contention with uncommitted recall_count updates. - try: - _has_phase_col = any( - col[1] == "confidence_phase" - for col in db.execute("PRAGMA table_info(memories)").fetchall() - ) - if _has_phase_col: - _delta = 0.05 - for r in results.get("memories", []): - if r.get("source") != "graph": - _pm_id = r["id"] - _pm_row = db.execute( - "SELECT confidence_phase FROM memories WHERE id=? AND retired_at IS NULL", - (_pm_id,) - ).fetchone() - if _pm_row and _pm_row[0] is not None: - import math as _pmath - _ph = float(_pm_row[0]) - _ph = (_ph + _delta if _ph > _pmath.pi else max(0.0, _ph - _delta)) % (2 * _pmath.pi) - db.execute("UPDATE memories SET confidence_phase=? WHERE id=?", (_ph, _pm_id)) - except Exception: - pass # phase learning is optional; never break search + # Online phase learning: nudge confidence_phase toward constructive (0) after recall + # Uses existing db connection to avoid lock contention with uncommitted recall_count updates. + try: + _has_phase_col = any( + col[1] == "confidence_phase" + for col in db.execute("PRAGMA table_info(memories)").fetchall() + ) + if _has_phase_col: + _delta = 0.05 + for r in results.get("memories", []): + if r.get("source") != "graph": + _pm_id = r["id"] + _pm_row = db.execute( + "SELECT confidence_phase FROM memories WHERE id=? AND retired_at IS NULL", + (_pm_id,) + ).fetchone() + if _pm_row and _pm_row[0] is not None: + import math as _pmath + _ph = float(_pm_row[0]) + _ph = (_ph + _delta if _ph > _pmath.pi else max(0.0, _ph - _delta)) % (2 * _pmath.pi) + db.execute("UPDATE memories SET confidence_phase=? WHERE id=?", (_ph, _pm_id)) + except Exception: + pass # phase learning is optional; never break search # Post-retrieval metacognitive tier annotation # Tier 1: high-confidence fresh results (≥3 direct results, avg_conf ≥ 0.7) @@ -7136,14 +7363,19 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke # Tier 4: coverage gap (0 direct results) # Exclude graph-expanded neighbours (source="graph") — they don't reflect query coverage memory_results = [r for r in results.get("memories", []) if r.get("source") != "graph"] + procedure_results = [r for r in results.get("procedures", []) if r.get("source") != "graph"] + direct_results = memory_results + procedure_results # Keyword/both hits: FTS5 textual matches — strongest evidence of genuine coverage - keyword_hits = [r for r in memory_results if r.get("source") in ("keyword", "both")] + keyword_hits = [ + r for r in direct_results + if r.get("source") in ("keyword", "both", "procedure_fts") + ] k_count = len(keyword_hits) - if not memory_results: + if not direct_results: tier = 4 tier_label = "gap-detected" - tier_note = "COVERAGE GAP — no memories match this query" + tier_note = "COVERAGE GAP — no grounded memories or procedures match this query" try: _log_gap(db, "coverage_hole", f"query:{_sanitize_fts_query(query)[:80]}", 1.0, triggered_by=query[:200]) except Exception: @@ -7171,19 +7403,19 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke elif k_count > 0: tier = 2 tier_label = "moderate" - tier_note = f"Only {k_count} keyword match(es); {len(memory_results)} total (includes semantic)" + tier_note = f"Only {k_count} direct lexical match(es); {len(direct_results)} total direct result(s)" else: tier = 3 tier_label = "weak-coverage" - tier_note = f"No keyword matches; {len(memory_results)} semantic-only result(s) — potential gap" + tier_note = f"No lexical direct matches; {len(direct_results)} semantic/procedural result(s) — potential gap" # Passive search instrumentation — append row to agent_uncertainty_log try: _unc_agent = getattr(args, "agent", None) or "unknown" _unc_domain = getattr(args, "scope", None) or (tables[0] if tables else "memories") _unc_avg_conf = None - if memory_results: - _conf_vals = [r.get("confidence") for r in memory_results if r.get("confidence") is not None] + if direct_results: + _conf_vals = [r.get("confidence") for r in direct_results if r.get("confidence") is not None] if _conf_vals: _unc_avg_conf = round(sum(_conf_vals) / len(_conf_vals), 4) db.execute( @@ -7231,12 +7463,29 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke except Exception: pass # trigger check is optional; never break search + _debug_payload = {} + try: + if _query_plan_dict is not None or _procedure_debug is not None or _answerability is not None: + from agentmemory.retrieval.diagnostics import build_debug_payload as _build_debug_payload + + _debug_payload = _build_debug_payload( + query_plan=_query_plan_dict or {}, + procedure_debug=_procedure_debug, + answerability=_answerability, + top_candidates=_top_candidates, + ) + except Exception as exc: + _debug_skips["diagnostics.skipped"] = f"{type(exc).__name__}: {exc}" + _out = { "mode": mode, "metacognition": { "tier": tier, "label": tier_label, "note": tier_note, + "answerability_score": (_answerability or {}).get("score"), + "answerability_reason": (_answerability or {}).get("reason"), + "abstained": (_answerability or {}).get("abstain", False), **_intent_meta, **_rollout_meta, }, @@ -7254,8 +7503,10 @@ def _apply_recency_and_trim(merged, scope_fn, use_adaptive_salience=False, bucke # "all_signals_informative" marker so downstream tooling can rely on # the key always being present in debug mode. Without `--debug` and # no skips, stay silent to keep the default response compact. - if _debug_skips: - _out["_debug"] = dict(_debug_skips) + if _debug_skips or _debug_payload: + _debug_out = dict(_debug_skips) + _debug_out.update(_debug_payload) + _out["_debug"] = _debug_out elif _debug_mode: _out["_debug"] = {"all_signals_informative": True} _ofmt = getattr(args, "output", "json") @@ -16075,8 +16326,8 @@ def build_parser(): mem_add.add_argument("--confidence", type=float) mem_add.add_argument("--tags", "-t", help="Comma-separated tags") mem_add.add_argument("--source-event", type=int) - mem_add.add_argument("--type", choices=["episodic", "semantic"], default="episodic", - help="Memory type: episodic (time-bound, faster decay) or semantic (durable facts, slower decay)") + mem_add.add_argument("--type", choices=["episodic", "semantic", "procedural"], default="episodic", + help="Memory type: episodic (time-bound, faster decay), semantic (durable facts), or procedural (structured workflows and runbooks)") mem_add.add_argument("--reflexion", action="store_true", help="Shorthand for failure lessons: sets category=lesson, auto-tags with 'reflexion'") mem_add.add_argument("--attribute", action="store_true", @@ -16142,6 +16393,13 @@ def build_parser(): mem_confidence = mem_sub.add_parser("confidence", help="Show Beta(α,β) Bayesian confidence breakdown") mem_confidence.add_argument("id", type=int, help="Memory ID") + try: + from agentmemory.commands.procedure import register_parser as _register_procedure_parser + + _register_procedure_parser(sub) + except Exception: + pass + # --- trust (top-level) --- trust = sub.add_parser("trust", help="Trust Score Engine — show, audit, calibrate, decay") trust_sub = trust.add_subparsers(dest="trust_cmd") @@ -16563,7 +16821,7 @@ def build_parser(): # --- search --- srch = sub.add_parser("search", help="Universal cross-table search") srch.add_argument("query") - srch.add_argument("--tables", help="Comma-separated: memories,events,context") + srch.add_argument("--tables", help="Comma-separated: memories,events,context,decisions,procedures") srch.add_argument("--limit", "-l", type=int, default=10) srch.add_argument("--no-recency", action="store_true", dest="no_recency", help="Disable temporal recency weighting; return raw FTS rank order") @@ -18368,6 +18626,12 @@ def main(): "confidence": cmd_memory_confidence, "pii": cmd_memory_pii, "pii-scan": cmd_memory_pii_scan} fn = dispatch.get(args.mem_cmd) + elif args.command == "procedure": + from agentmemory.commands.procedure import dispatch as _procedure_dispatch + + if _procedure_dispatch(args): + return + fn = None elif args.command == "entity": dispatch = { "create": cmd_entity_create, "get": cmd_entity_get, "search": cmd_entity_search, diff --git a/src/agentmemory/brain.py b/src/agentmemory/brain.py index a3753cd..393433a 100644 --- a/src/agentmemory/brain.py +++ b/src/agentmemory/brain.py @@ -349,31 +349,200 @@ def __del__(self) -> None: # Core: remember, search, forget # ------------------------------------------------------------------ - def remember(self, content: str, category: str = "general", tags: Optional[Union[str, List[str]]] = None, confidence: float = 1.0) -> int: + def remember( + self, + content: str, + category: str = "general", + tags: Optional[Union[str, List[str]]] = None, + confidence: float = 1.0, + *, + memory_type: str = "episodic", + scope: str = "global", + procedure: Optional[Dict[str, Any]] = None, + ) -> int: """Add a memory. Returns memory ID.""" tags_json = json.dumps(tags.split(",")) if isinstance(tags, str) else (json.dumps(tags) if tags else None) now = _now_ts() with self._lock: db = self._get_conn() - cur = db.execute( - "INSERT INTO memories (agent_id, category, content, confidence, tags, created_at, updated_at) VALUES (?,?,?,?,?,?,?)", - (self.agent_id, category, content, confidence, tags_json, now, now) - ) + if procedure is not None: + from agentmemory import procedural as _procedural + + payload = dict(procedure) + payload.setdefault("description", content) + payload.setdefault("goal", payload.get("goal") or content) + payload.setdefault("title", payload.get("title") or payload["goal"]) + payload.setdefault("steps_json", payload.get("steps_json") or [{"action": payload["goal"]}]) + result = _procedural.create_procedure( + db, + agent_id=self.agent_id, + payload=payload, + category=category, + scope=scope, + confidence=confidence, + ) + mid = int(result["memory_id"]) + else: + cur = db.execute( + """ + INSERT INTO memories ( + agent_id, category, scope, content, confidence, tags, + memory_type, created_at, updated_at + ) VALUES (?,?,?,?,?,?,?,?,?) + """, + (self.agent_id, category, scope, content, confidence, tags_json, memory_type, now, now) + ) + mid = int(cur.lastrowid) + if memory_type == "procedural": + from agentmemory import procedural as _procedural + + _procedural.ensure_procedure_for_memory(db, memory_id=mid, agent_id=self.agent_id) db.commit() - mid = cur.lastrowid if _VEC_AVAILABLE: try: - # vec.index_memory opens its own connection to the same DB; - # WAL mode handles concurrent write access cleanly, and it - # does not contend with our RLock because it's a separate - # sqlite3 connection object. Leave untouched — the async - # embedding rework is tracked separately as Phase 1.2. - _vec.index_memory(db, mid, content) + memory_row = db.execute( + "SELECT content FROM memories WHERE id = ?", + (mid,), + ).fetchone() + _vec.index_memory(db, mid, memory_row["content"] if memory_row else content) except Exception as exc: _log.warning("vec.index_memory failed for memory %s: %s", mid, exc) return mid - def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + def remember_procedure( + self, + *, + goal: str, + title: Optional[str] = None, + description: str = "", + steps: Optional[List[Union[str, Dict[str, Any]]]] = None, + procedure_kind: str = "workflow", + scope: str = "global", + category: str = "convention", + confidence: float = 0.9, + **extra: Any, + ) -> Dict[str, Any]: + from agentmemory import procedural as _procedural + + with self._lock: + db = self._get_conn() + result = _procedural.create_procedure( + db, + agent_id=self.agent_id, + payload={ + "title": title, + "goal": goal, + "description": description, + "procedure_kind": procedure_kind, + "steps_json": steps or [{"action": goal}], + **extra, + }, + category=category, + scope=scope, + confidence=confidence, + ) + db.commit() + return result + + def get_procedure(self, procedure_id: int) -> Dict[str, Any]: + from agentmemory import procedural as _procedural + + with self._lock: + return _procedural.get_procedure(self._get_conn(), procedure_id, include_sources=True) + + def list_procedures( + self, + *, + status: str = "all", + scope: Optional[str] = None, + limit: int = 50, + ) -> List[Dict[str, Any]]: + from agentmemory import procedural as _procedural + + with self._lock: + return _procedural.list_procedures(self._get_conn(), status=status, scope=scope, limit=limit) + + def search_procedures( + self, + query: str, + *, + limit: int = 10, + scope: Optional[str] = None, + status: str = "all", + debug: bool = False, + ) -> Dict[str, Any]: + from agentmemory import procedural as _procedural + + with self._lock: + return _procedural.search_procedures( + self._get_conn(), + query, + limit=limit, + scope=scope, + status=status, + debug=debug, + ) + + def procedure_feedback( + self, + procedure_id: int, + *, + success: bool, + usefulness_score: Optional[float] = None, + outcome_summary: Optional[str] = None, + errors_seen: Optional[str] = None, + validated: bool = False, + task_signature: Optional[str] = None, + input_summary: Optional[str] = None, + ) -> Dict[str, Any]: + from agentmemory import procedural as _procedural + + with self._lock: + db = self._get_conn() + result = _procedural.record_feedback( + db, + procedure_id=procedure_id, + agent_id=self.agent_id, + success=success, + usefulness_score=usefulness_score, + outcome_summary=outcome_summary, + errors_seen=errors_seen, + validated=validated, + task_signature=task_signature, + input_summary=input_summary, + ) + db.commit() + return result + + def backfill_procedures( + self, + *, + scope: Optional[str] = None, + limit: int = 100, + dry_run: bool = False, + ) -> Dict[str, Any]: + from agentmemory import procedural as _procedural + + with self._lock: + db = self._get_conn() + result = _procedural.backfill_procedures( + db, + agent_id=self.agent_id, + scope=scope, + limit=limit, + dry_run=dry_run, + ) + if not dry_run: + db.commit() + return result + + def search( + self, + query: str, + limit: int = 10, + *, + memory_type: Optional[str] = None, + ) -> List[Dict[str, Any]]: """Search memories via the unified hybrid + reranker pipeline. Delegates to ``agentmemory._impl.cmd_search`` so programmatic callers @@ -390,6 +559,8 @@ def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: """ if not query or not query.strip(): return [] + if memory_type == "procedural": + return list(self.search_procedures(query, limit=limit).get("procedures") or [])[:limit] # Primary path: unified pipeline via cmd_search. try: from types import SimpleNamespace @@ -414,9 +585,12 @@ def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: with self._lock: out = _cmd_search(args, db=self._get_conn(), db_path=str(self.db_path)) if isinstance(out, dict): - mems = out.get("memories") or [] - if isinstance(mems, list): - return mems[:limit] + combined: List[Dict[str, Any]] = [] + combined.extend(out.get("memories") or []) + combined.extend(out.get("procedures") or []) + if isinstance(combined, list): + combined.sort(key=lambda r: r.get("final_score", 0.0), reverse=True) + return combined[:limit] except Exception: # Fall through to the lightweight path — unified pipeline failures # should never take down Brain.search, which has a minimal @@ -628,7 +802,7 @@ def orient(self, project: Optional[str] = None, query: Optional[str] = None) -> except sqlite3.OperationalError: result["triggers"] = [] - # 4. Search for relevant memories (if query or project given) + # 4. Search for relevant memories and procedures (if query or project given) search_q = query or project if search_q: try: @@ -650,6 +824,7 @@ def orient(self, project: Optional[str] = None, query: Optional[str] = None) -> "SELECT m.id, m.content, m.category, m.confidence, m.created_at " "FROM memories_fts fts JOIN memories m ON m.id = fts.rowid " "WHERE memories_fts MATCH ? AND m.retired_at IS NULL " + "AND COALESCE(m.memory_type, 'episodic') != 'procedural' " "ORDER BY fts.rank LIMIT 10", (fts_q,) ).fetchall() @@ -660,6 +835,30 @@ def orient(self, project: Optional[str] = None, query: Optional[str] = None) -> result["memories"] = [] else: result["memories"] = [] + try: + if search_q: + result["procedures"] = self.search_procedures( + search_q, + limit=5, + scope=f"project:{project}" if project else None, + ).get("procedures", []) + elif result.get("handoff"): + handoff_query = " ".join( + str(result["handoff"].get(key, "") or "") + for key in ("goal", "open_loops", "next_step") + ).strip() + if handoff_query: + result["procedures"] = self.search_procedures( + handoff_query, + limit=5, + scope=f"project:{project}" if project else None, + ).get("procedures", []) + else: + result["procedures"] = [] + else: + result["procedures"] = [] + except Exception: + result["procedures"] = [] # 5. Quick stats try: @@ -667,6 +866,7 @@ def orient(self, project: Optional[str] = None, query: Optional[str] = None) -> "active_memories": db.execute( "SELECT count(*) FROM memories WHERE retired_at IS NULL" ).fetchone()[0], + "total_procedures": db.execute("SELECT count(*) FROM procedures").fetchone()[0], "total_events": db.execute("SELECT count(*) FROM events").fetchone()[0], "total_entities": db.execute("SELECT count(*) FROM entities").fetchone()[0], } @@ -844,7 +1044,16 @@ def stats(self) -> Dict[str, int]: stats: Dict[str, int] = {} with self._lock: db = self._get_conn() - for tbl in ["memories", "events", "entities", "decisions", "knowledge_edges", "affect_log"]: + for tbl in [ + "memories", + "procedures", + "procedure_candidates", + "events", + "entities", + "decisions", + "knowledge_edges", + "affect_log", + ]: try: stats[tbl] = db.execute(f"SELECT count(*) FROM {tbl}").fetchone()[0] except Exception: diff --git a/src/agentmemory/commands/procedure.py b/src/agentmemory/commands/procedure.py new file mode 100644 index 0000000..5f63c54 --- /dev/null +++ b/src/agentmemory/commands/procedure.py @@ -0,0 +1,260 @@ +"""CLI procedure commands.""" + +from __future__ import annotations + +import sqlite3 +from typing import Any + +from agentmemory import procedural + + +def _impl(): + from agentmemory import _impl + + return _impl + + +def _open_db() -> sqlite3.Connection: + return _impl().get_db() + + +def _payload_from_args(args) -> dict[str, Any]: + steps = [{"action": step} for step in (getattr(args, "step", None) or [])] + return { + "title": getattr(args, "title", None), + "goal": getattr(args, "goal", None), + "description": getattr(args, "description", None), + "task_family": getattr(args, "task_family", None), + "procedure_kind": getattr(args, "kind", None), + "trigger_conditions": getattr(args, "trigger", None) or [], + "preconditions": getattr(args, "precondition", None) or [], + "steps_json": steps, + "tools_json": getattr(args, "tool", None) or [], + "failure_modes_json": getattr(args, "failure", None) or [], + "rollback_steps_json": getattr(args, "rollback", None) or [], + "success_criteria_json": getattr(args, "success_criterion", None) or [], + "expected_outcomes": getattr(args, "expected_outcome", None) or [], + "applicability_scope": getattr(args, "scope", None) or "global", + "status": getattr(args, "status", None) or "active", + } + + +def cmd_procedure_add(args) -> None: + db = _open_db() + try: + payload = _payload_from_args(args) + result = procedural.create_procedure( + db, + agent_id=args.agent, + payload=payload, + category=args.category, + scope=args.scope, + confidence=args.confidence, + ) + db.commit() + _impl().json_out({"ok": True, **result}) + finally: + db.close() + + +def cmd_procedure_get(args) -> None: + db = _open_db() + try: + result = procedural.get_procedure(db, args.id, include_sources=True) + _impl().json_out({"ok": True, **result}) + finally: + db.close() + + +def cmd_procedure_list(args) -> None: + db = _open_db() + try: + result = procedural.list_procedures( + db, + status=args.status, + scope=args.scope, + limit=args.limit, + ) + _impl().json_out({"ok": True, "count": len(result), "procedures": result}) + finally: + db.close() + + +def cmd_procedure_search(args) -> None: + db = _open_db() + try: + result = procedural.search_procedures( + db, + args.query, + limit=args.limit, + scope=args.scope, + status=args.status, + debug=getattr(args, "debug", False), + ) + _impl().json_out(result) + finally: + db.close() + + +def cmd_procedure_update(args) -> None: + db = _open_db() + try: + changes = {k: v for k, v in _payload_from_args(args).items() if v not in (None, [], "")} + result = procedural.update_procedure(db, args.id, changes) + db.commit() + _impl().json_out({"ok": True, **result}) + finally: + db.close() + + +def cmd_procedure_feedback(args) -> None: + db = _open_db() + try: + result = procedural.record_feedback( + db, + procedure_id=args.id, + agent_id=args.agent, + success=bool(args.success), + usefulness_score=args.usefulness, + outcome_summary=args.outcome, + errors_seen=args.errors, + validated=args.validated, + task_signature=args.task_signature, + input_summary=args.input_summary, + ) + db.commit() + _impl().json_out({"ok": True, **result}) + finally: + db.close() + + +def cmd_procedure_backfill(args) -> None: + db = _open_db() + try: + result = procedural.backfill_procedures( + db, + agent_id=args.agent, + scope=args.scope, + limit=args.limit, + dry_run=args.dry_run, + ) + if not args.dry_run: + db.commit() + _impl().json_out(result) + finally: + db.close() + + +def cmd_procedure_stats(args) -> None: + db = _open_db() + try: + result = procedural.procedure_stats(db) + _impl().json_out(result) + finally: + db.close() + + +def register_parser(sub) -> None: + proc = sub.add_parser("procedure", help="Manage canonical procedural memories") + proc_sub = proc.add_subparsers(dest="procedure_cmd") + + add = proc_sub.add_parser("add", help="Create a structured procedure") + add.add_argument("--title") + add.add_argument("--goal", required=True) + add.add_argument("--description", default="") + add.add_argument("--kind", default="workflow") + add.add_argument("--task-family", dest="task_family") + add.add_argument("--category", default="convention") + add.add_argument("--scope", default="global") + add.add_argument("--confidence", type=float, default=0.9) + add.add_argument("--status", default="active") + add.add_argument("--step", action="append", default=[], help="Repeatable ordered step") + add.add_argument("--trigger", action="append", default=[]) + add.add_argument("--precondition", action="append", default=[]) + add.add_argument("--tool", action="append", default=[]) + add.add_argument("--failure", action="append", default=[]) + add.add_argument("--rollback", action="append", default=[]) + add.add_argument("--success-criterion", dest="success_criterion", action="append", default=[]) + add.add_argument("--expected-outcome", dest="expected_outcome", action="append", default=[]) + + get = proc_sub.add_parser("get", help="Fetch a procedure by id") + get.add_argument("id", type=int) + + lst = proc_sub.add_parser("list", help="List procedures") + lst.add_argument("--status", default="all") + lst.add_argument("--scope") + lst.add_argument("--limit", type=int, default=50) + + search = proc_sub.add_parser("search", help="Search procedures") + search.add_argument("query") + search.add_argument("--limit", type=int, default=10) + search.add_argument("--scope") + search.add_argument("--status", default="all") + search.add_argument("--debug", action="store_true") + + update = proc_sub.add_parser("update", help="Update a procedure") + update.add_argument("id", type=int) + update.add_argument("--title") + update.add_argument("--goal") + update.add_argument("--description") + update.add_argument("--kind") + update.add_argument("--task-family", dest="task_family") + update.add_argument("--scope") + update.add_argument("--status") + update.add_argument("--step", action="append", default=None) + update.add_argument("--trigger", action="append", default=None) + update.add_argument("--precondition", action="append", default=None) + update.add_argument("--tool", action="append", default=None) + update.add_argument("--failure", action="append", default=None) + update.add_argument("--rollback", action="append", default=None) + update.add_argument("--success-criterion", dest="success_criterion", action="append", default=None) + update.add_argument("--expected-outcome", dest="expected_outcome", action="append", default=None) + + feedback = proc_sub.add_parser("feedback", help="Record procedural execution feedback") + feedback.add_argument("id", type=int) + feedback.add_argument("--success", action="store_true", default=False) + feedback.add_argument("--failure", dest="success", action="store_false") + feedback.add_argument("--validated", action="store_true") + feedback.add_argument("--usefulness", type=float, default=None) + feedback.add_argument("--outcome", default=None) + feedback.add_argument("--errors", default=None) + feedback.add_argument("--task-signature", dest="task_signature", default=None) + feedback.add_argument("--input-summary", dest="input_summary", default=None) + + backfill = proc_sub.add_parser("backfill", help="Backfill procedures from existing evidence") + backfill.add_argument("--scope") + backfill.add_argument("--limit", type=int, default=100) + backfill.add_argument("--dry-run", action="store_true") + + proc_sub.add_parser("stats", help="Show procedure stats") + + +def dispatch(args) -> bool: + fn = { + "add": cmd_procedure_add, + "get": cmd_procedure_get, + "list": cmd_procedure_list, + "search": cmd_procedure_search, + "update": cmd_procedure_update, + "feedback": cmd_procedure_feedback, + "backfill": cmd_procedure_backfill, + "stats": cmd_procedure_stats, + }.get(getattr(args, "procedure_cmd", None)) + if not fn: + return False + fn(args) + return True + + +__all__ = [ + "cmd_procedure_add", + "cmd_procedure_backfill", + "cmd_procedure_feedback", + "cmd_procedure_get", + "cmd_procedure_list", + "cmd_procedure_search", + "cmd_procedure_stats", + "cmd_procedure_update", + "dispatch", + "register_parser", +] diff --git a/src/agentmemory/db/init_schema.sql b/src/agentmemory/db/init_schema.sql index 9bb2555..33056a9 100644 --- a/src/agentmemory/db/init_schema.sql +++ b/src/agentmemory/db/init_schema.sql @@ -59,7 +59,7 @@ CREATE TABLE memories ( retracted_at TEXT, retraction_reason TEXT, version INTEGER NOT NULL DEFAULT 1, - memory_type TEXT NOT NULL DEFAULT 'episodic' CHECK(memory_type IN ('episodic','semantic')), + memory_type TEXT NOT NULL DEFAULT 'episodic' CHECK(memory_type IN ('episodic','semantic','procedural')), protected INTEGER NOT NULL DEFAULT 0, salience_score REAL NOT NULL DEFAULT 0.0, gw_broadcast INTEGER NOT NULL DEFAULT 0, @@ -854,6 +854,162 @@ CREATE TRIGGER pm_fts_delete AFTER DELETE ON policy_memories BEGIN VALUES ('delete', old.rowid, old.trigger_condition, old.action_directive, old.name); END; +CREATE TABLE procedures ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + memory_id INTEGER NOT NULL UNIQUE REFERENCES memories(id) ON DELETE CASCADE, + procedure_key TEXT UNIQUE, + title TEXT, + goal TEXT NOT NULL, + description TEXT, + task_family TEXT, + procedure_kind TEXT NOT NULL DEFAULT 'workflow', + trigger_conditions TEXT, + preconditions TEXT, + constraints_json TEXT, + steps_json TEXT NOT NULL, + tools_json TEXT, + failure_modes_json TEXT, + rollback_steps_json TEXT, + success_criteria_json TEXT, + repair_strategies_json TEXT, + tool_policy_json TEXT, + expected_outcomes TEXT, + applicability_scope TEXT NOT NULL DEFAULT 'global', + temporal_class TEXT DEFAULT 'durable', + status TEXT NOT NULL DEFAULT 'active' + CHECK(status IN ('active','candidate','stale','needs_review','superseded','retired')), + automation_ready INTEGER NOT NULL DEFAULT 0, + determinism REAL NOT NULL DEFAULT 0.5, + confidence REAL NOT NULL DEFAULT 0.5, + utility_score REAL NOT NULL DEFAULT 0.5, + generality_score REAL NOT NULL DEFAULT 0.5, + support_count INTEGER NOT NULL DEFAULT 0, + execution_count INTEGER NOT NULL DEFAULT 0, + success_count INTEGER NOT NULL DEFAULT 0, + failure_count INTEGER NOT NULL DEFAULT 0, + last_used_at TEXT, + last_executed_at TEXT, + last_validated_at TEXT, + stale_after_days INTEGER NOT NULL DEFAULT 90, + supersedes_procedure_id INTEGER REFERENCES procedures(id), + retired_at TEXT, + search_text TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX idx_procedures_kind ON procedures(procedure_kind); + +CREATE INDEX idx_procedures_status ON procedures(status); + +CREATE INDEX idx_procedures_last_validated ON procedures(last_validated_at); + +CREATE INDEX idx_procedures_execution_count ON procedures(execution_count DESC); + +CREATE INDEX idx_procedures_scope ON procedures(applicability_scope); + +CREATE INDEX idx_procedures_memory_id ON procedures(memory_id); + +CREATE INDEX idx_procedures_supersedes ON procedures(supersedes_procedure_id); + +CREATE TABLE procedure_steps ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + step_order INTEGER NOT NULL, + action TEXT NOT NULL, + rationale TEXT, + tool_name TEXT, + expected_output TEXT, + stop_condition TEXT, + retry_policy TEXT, + rollback_hint TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX idx_procedure_steps_procedure_order +ON procedure_steps(procedure_id, step_order); + +CREATE TABLE procedure_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + memory_id INTEGER REFERENCES memories(id) ON DELETE CASCADE, + event_id INTEGER REFERENCES events(id) ON DELETE CASCADE, + decision_id INTEGER REFERENCES decisions(id) ON DELETE CASCADE, + entity_id INTEGER REFERENCES entities(id) ON DELETE CASCADE, + source_role TEXT NOT NULL DEFAULT 'evidence', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX idx_procedure_sources_procedure ON procedure_sources(procedure_id); + +CREATE INDEX idx_procedure_sources_memory ON procedure_sources(memory_id); + +CREATE INDEX idx_procedure_sources_event ON procedure_sources(event_id); + +CREATE INDEX idx_procedure_sources_decision ON procedure_sources(decision_id); + +CREATE TABLE procedure_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + agent_id TEXT REFERENCES agents(id), + task_family TEXT, + task_signature TEXT, + input_summary TEXT, + outcome_summary TEXT, + success INTEGER NOT NULL DEFAULT 0, + usefulness_score REAL, + errors_seen TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX idx_procedure_runs_procedure_created +ON procedure_runs(procedure_id, created_at DESC); + +CREATE TABLE procedure_candidates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + candidate_signature TEXT NOT NULL UNIQUE, + task_family TEXT, + normalized_signature TEXT NOT NULL, + support_count INTEGER NOT NULL DEFAULT 0, + evidence_json TEXT, + mean_success REAL NOT NULL DEFAULT 0.0, + promoted_procedure_id INTEGER REFERENCES procedures(id), + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX idx_procedure_candidates_family ON procedure_candidates(task_family); + +CREATE INDEX idx_procedure_candidates_support ON procedure_candidates(support_count DESC); + +CREATE VIRTUAL TABLE procedures_fts USING fts5( + title, + goal, + description, + task_family, + search_text, + content=procedures, + content_rowid=id, + tokenize='porter unicode61' +); + +CREATE TRIGGER procedures_fts_insert AFTER INSERT ON procedures BEGIN + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); +END; + +CREATE TRIGGER procedures_fts_update AFTER UPDATE ON procedures BEGIN + INSERT INTO procedures_fts(procedures_fts, rowid, title, goal, description, task_family, search_text) + VALUES ('delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text); + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); +END; + +CREATE TRIGGER procedures_fts_delete AFTER DELETE ON procedures BEGIN + INSERT INTO procedures_fts(procedures_fts, rowid, title, goal, description, task_family, search_text) + VALUES ('delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text); +END; + CREATE TABLE agent_beliefs ( id INTEGER PRIMARY KEY AUTOINCREMENT, agent_id TEXT NOT NULL REFERENCES agents(id), diff --git a/src/agentmemory/hippocampus.py b/src/agentmemory/hippocampus.py index ce51238..1c3b9d0 100755 --- a/src/agentmemory/hippocampus.py +++ b/src/agentmemory/hippocampus.py @@ -720,6 +720,23 @@ def cmd_consolidate(args): print("\n[DRY RUN] No changes written.") else: print(f"\nDone. {total_clusters} cluster(s) consolidated, {total_retired} memories retired.") + try: + from agentmemory import procedural as _procedural + + synth_stats = _procedural.synthesize_procedure_candidates( + conn, + agent_id=args.agent, + dry_run=args.dry_run, + ) + print( + "Procedural synthesis: " + f"candidates_updated={synth_stats.get('candidates_updated', 0)}, " + f"promoted={synth_stats.get('promoted', 0)}" + ) + if not args.dry_run: + conn.commit() + except Exception as exc: + print(f"Procedural synthesis skipped: {exc}", file=sys.stderr) # ============================================================================= @@ -2279,6 +2296,18 @@ def cmd_consolidation_cycle(args): # Pass 7: Episodic-to-semantic promotion promotion_stats = promote_episodic_to_semantic(db) + # Pass 7b: repeated procedural traces -> procedure candidates / canonical procedures + try: + from agentmemory import procedural as _procedural + + procedural_stats = _procedural.synthesize_procedure_candidates( + db, + agent_id=args.agent, + dry_run=dry_run, + ) + except Exception as exc: + procedural_stats = {"error": str(exc), "candidates_updated": 0, "promoted": 0} + # Pass 8 (CLF): Experience replay — re-process top-10 highest-recalled memories # Prevents catastrophic forgetting by re-anchoring important old knowledge. replay_stats = experience_replay(db, top_k=10, now=now) @@ -2326,6 +2355,7 @@ def cmd_consolidation_cycle(args): "semantic_memories_created": promotion_stats.get("semantic_memories_created", 0), "source_memories_tagged": promotion_stats.get("source_memories_tagged", 0), }, + "procedural_synthesis": procedural_stats, "experience_replay": replay_stats, "hebbian": hebbian_stats, "causal_chain_mining": causal_stats, diff --git a/src/agentmemory/mcp_server.py b/src/agentmemory/mcp_server.py index 9f5f189..af939f4 100755 --- a/src/agentmemory/mcp_server.py +++ b/src/agentmemory/mcp_server.py @@ -53,6 +53,7 @@ mcp_tools_merge, mcp_tools_neuro, mcp_tools_policy, + mcp_tools_procedural, mcp_tools_reasoning, mcp_tools_reconcile, mcp_tools_reflexion, @@ -84,6 +85,7 @@ mcp_tools_merge, mcp_tools_neuro, mcp_tools_policy, + mcp_tools_procedural, mcp_tools_reasoning, mcp_tools_reconcile, mcp_tools_reflexion, @@ -431,8 +433,8 @@ def tool_memory_add(agent_id: str, content: str, category: str, scope: str = "gl return {"ok": False, "error": f"Invalid category: {category}. Must be one of: {', '.join(VALID_MEMORY_CATEGORIES)}"} if not (0.0 <= confidence <= 1.0): return {"ok": False, "error": "confidence must be between 0.0 and 1.0"} - if memory_type not in ("episodic", "semantic"): - return {"ok": False, "error": "memory_type must be 'episodic' or 'semantic'"} + if memory_type not in ("episodic", "semantic", "procedural"): + return {"ok": False, "error": "memory_type must be 'episodic', 'semantic', or 'procedural'"} if scope != "global" and not scope.startswith("project:") and not scope.startswith("agent:"): return {"ok": False, "error": "scope must be 'global', 'project:', or 'agent:'"} if source not in _SOURCE_TRUST_WEIGHTS: @@ -700,6 +702,28 @@ def tool_memory_add(agent_id: str, content: str, category: str, scope: str = "gl mid = cur.lastrowid db.commit() # ensure the INSERT (and FTS trigger) is committed + procedure_id = None + indexed_content = content + indexed_category = category + indexed_tags = tags_json or "" + if memory_type == "procedural": + try: + from agentmemory import procedural as _procedural + + proc = _procedural.ensure_procedure_for_memory(db, memory_id=mid, agent_id=agent_id) + procedure_id = proc.get("id") + db.commit() + indexed_row = db.execute( + "SELECT content, category, tags FROM memories WHERE id = ?", + (mid,), + ).fetchone() + if indexed_row: + indexed_content = indexed_row["content"] + indexed_category = indexed_row["category"] + indexed_tags = indexed_row["tags"] or "" + except Exception: + pass + # Workaround: FTS5 content-external tables may not build the inverted index # from trigger INSERTs on some SQLite versions. Force a re-index for this memory. if do_index: @@ -707,11 +731,11 @@ def tool_memory_add(agent_id: str, content: str, category: str, scope: str = "gl db.execute( "INSERT INTO memories_fts(memories_fts, rowid, content, category, tags) " "VALUES('delete', ?, ?, ?, ?)", - (mid, content, category, tags_json or '')) + (mid, indexed_content, indexed_category, indexed_tags)) db.execute( "INSERT INTO memories_fts(rowid, content, category, tags) " "VALUES (?, ?, ?, ?)", - (mid, content, category, tags_json or '')) + (mid, indexed_content, indexed_category, indexed_tags)) db.commit() except Exception: pass # non-fatal @@ -752,7 +776,7 @@ def tool_memory_add(agent_id: str, content: str, category: str, scope: str = "gl if do_index: try: if not blob: - blob = _embed_safe(content) + blob = _embed_safe(indexed_content) if blob: vdb = _get_vec_db() if vdb: @@ -771,6 +795,8 @@ def tool_memory_add(agent_id: str, content: str, category: str, scope: str = "gl "surprise_score": surprise, "surprise_method": surprise_method, "source": source, "trust_score": source_trust, "memory_type": memory_type} + if procedure_id is not None: + result["procedure_id"] = procedure_id if _schema_resonance_hit: result["schema_resonance"] = _schema_resonance result["schema_resonance_fast_track"] = True @@ -806,8 +832,8 @@ def tool_memory_search(agent_id: str, query: str, category: str = None, expansion adjuncts come after). Falls through gracefully if sentence-transformers isn't installed. """ - if memory_type and memory_type not in ("episodic", "semantic"): - return {"ok": False, "error": "memory_type must be 'episodic' or 'semantic'"} + if memory_type and memory_type not in ("episodic", "semantic", "procedural"): + return {"ok": False, "error": "memory_type must be 'episodic', 'semantic', or 'procedural'"} # Cross-agent borrow restricts the SQL to `scope='global'` (line ~846). # Combining that with an explicit non-global scope produces an @@ -2176,7 +2202,7 @@ def tool_resolve_conflict( "scope": {"type": "string", "description": "Scope: 'global', 'project:', or 'agent:'", "default": "global"}, "confidence": {"type": "number", "description": "Confidence 0.0-1.0", "default": 1.0}, "tags": {"type": "string", "description": "Comma-separated tags"}, - "memory_type": {"type": "string", "enum": ["episodic", "semantic"], "default": "episodic"}, + "memory_type": {"type": "string", "enum": ["episodic", "semantic", "procedural"], "default": "episodic"}, "force": {"type": "boolean", "description": "Bypass W(m) worthiness gate", "default": False}, "supersedes_id": {"type": "integer", "description": "ID of memory being superseded; triggers PII recency gate"}, "source": { @@ -2203,7 +2229,7 @@ def tool_resolve_conflict( "category": {"type": "string", "enum": VALID_MEMORY_CATEGORIES}, "scope": {"type": "string"}, "limit": {"type": "integer", "default": 20, "description": "Max results; capped by agent tier (7 × tier)"}, - "memory_type": {"type": "string", "enum": ["episodic", "semantic"], "description": "Filter to one CLS store. Unset = both stores, semantic gets 1.1x confidence bonus."}, + "memory_type": {"type": "string", "enum": ["episodic", "semantic", "procedural"], "description": "Filter to one memory store. Unset searches all supported memory types; semantic gets a mild confidence bonus in memory_search."}, "pagerank_boost": {"type": "number", "default": 0.0, "description": "Re-rank by graph centrality (0=FTS-only, 1=equal FTS+PageRank). Requires prior pagerank run. Implements SR retrieval."}, "borrow_from": {"type": "string", "description": "Agent ID to borrow from. When set, searches only that agent's scope='global' memories and logs the cross-agent access in access_log."}, "multi_pass": {"type": "boolean", "default": False, "description": "SDM-style iterative convergence: use pass-1 results to build a richer pass-2 query; merge and deduplicate both passes (items in both passes ranked first)."}, diff --git a/src/agentmemory/mcp_tools_meb.py b/src/agentmemory/mcp_tools_meb.py index 203f0bb..d15dc19 100644 --- a/src/agentmemory/mcp_tools_meb.py +++ b/src/agentmemory/mcp_tools_meb.py @@ -50,8 +50,10 @@ def _find_vec_dylib(): _MEB_TTL_HOURS_DEFAULT = 72 _MEB_MAX_DEPTH_DEFAULT = 10_000 -# FTS5 special characters — strip everything that isn't word chars or spaces -_FTS5_SPECIAL = re.compile(r'[.&|*"()\-@^?!]') +# FTS5 MATCH is brittle around punctuation and symbolic tokens. Strip any +# non-word, non-space character, plus `_`, so questions like "$5 coupon" or +# "LGBTQ+" cannot crash the tool path. +_FTS5_SPECIAL = re.compile(r"[^\w\s]|_") # --------------------------------------------------------------------------- # DB helpers diff --git a/src/agentmemory/mcp_tools_procedural.py b/src/agentmemory/mcp_tools_procedural.py new file mode 100644 index 0000000..d487347 --- /dev/null +++ b/src/agentmemory/mcp_tools_procedural.py @@ -0,0 +1,324 @@ +"""brainctl MCP tools — procedural memory system.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from mcp.types import Tool + +from agentmemory import procedural +from agentmemory.lib.mcp_helpers import open_db +from agentmemory.paths import get_db_path + +DB_PATH: Path = get_db_path() + + +def _db(): + conn = open_db(str(DB_PATH)) + procedural.ensure_procedure_schema(conn) + return conn + + +def tool_procedure_add( + agent_id: str = "mcp-client", + goal: str = "", + title: str | None = None, + description: str | None = None, + procedure_kind: str = "workflow", + task_family: str | None = None, + scope: str = "global", + category: str = "convention", + confidence: float = 0.9, + steps: list[str] | None = None, + trigger_conditions: list[str] | None = None, + preconditions: list[str] | None = None, + tools: list[str] | None = None, + failure_modes: list[str] | None = None, + rollback_steps: list[str] | None = None, + success_criteria: list[str] | None = None, + expected_outcomes: list[str] | None = None, + status: str = "active", + **_kw: Any, +) -> dict[str, Any]: + if not goal: + return {"ok": False, "error": "goal is required"} + db = _db() + try: + payload = { + "title": title, + "goal": goal, + "description": description or "", + "procedure_kind": procedure_kind, + "task_family": task_family, + "steps_json": [{"action": step} for step in (steps or [])], + "trigger_conditions": trigger_conditions or [], + "preconditions": preconditions or [], + "tools_json": tools or [], + "failure_modes_json": failure_modes or [], + "rollback_steps_json": rollback_steps or [], + "success_criteria_json": success_criteria or [], + "expected_outcomes": expected_outcomes or [], + "applicability_scope": scope, + "status": status, + } + result = procedural.create_procedure( + db, + agent_id=agent_id, + payload=payload, + category=category, + scope=scope, + confidence=confidence, + ) + db.commit() + return {"ok": True, **result} + except Exception as exc: + return {"ok": False, "error": str(exc)} + finally: + db.close() + + +def tool_procedure_get(procedure_id: int, **_kw: Any) -> dict[str, Any]: + db = _db() + try: + return {"ok": True, **procedural.get_procedure(db, procedure_id, include_sources=True)} + except Exception as exc: + return {"ok": False, "error": str(exc)} + finally: + db.close() + + +def tool_procedure_list(status: str = "all", scope: str | None = None, limit: int = 50, **_kw: Any) -> dict[str, Any]: + db = _db() + try: + items = procedural.list_procedures(db, status=status, scope=scope, limit=limit) + return {"ok": True, "procedures": items, "count": len(items)} + finally: + db.close() + + +def tool_procedure_search(query: str, limit: int = 10, scope: str | None = None, status: str = "all", debug: bool = False, **_kw: Any) -> dict[str, Any]: + if not query: + return {"ok": False, "error": "query is required"} + db = _db() + try: + return procedural.search_procedures(db, query, limit=limit, scope=scope, status=status, debug=debug) + finally: + db.close() + + +def tool_procedure_update(procedure_id: int, **changes: Any) -> dict[str, Any]: + db = _db() + try: + normalized = dict(changes) + if normalized.get("steps") is not None: + normalized["steps_json"] = [{"action": step} for step in normalized.pop("steps") or []] + if normalized.get("tools") is not None: + normalized["tools_json"] = normalized.pop("tools") + if normalized.get("trigger_conditions") is not None: + normalized["trigger_conditions"] = normalized["trigger_conditions"] + result = procedural.update_procedure(db, procedure_id, normalized) + db.commit() + return {"ok": True, **result} + except Exception as exc: + return {"ok": False, "error": str(exc)} + finally: + db.close() + + +def tool_procedure_feedback( + procedure_id: int, + agent_id: str = "mcp-client", + success: bool = True, + usefulness_score: float | None = None, + outcome_summary: str | None = None, + errors_seen: str | None = None, + validated: bool = False, + task_signature: str | None = None, + input_summary: str | None = None, + **_kw: Any, +) -> dict[str, Any]: + db = _db() + try: + result = procedural.record_feedback( + db, + procedure_id=procedure_id, + agent_id=agent_id, + success=success, + usefulness_score=usefulness_score, + outcome_summary=outcome_summary, + errors_seen=errors_seen, + validated=validated, + task_signature=task_signature, + input_summary=input_summary, + ) + db.commit() + return {"ok": True, **result} + except Exception as exc: + return {"ok": False, "error": str(exc)} + finally: + db.close() + + +def tool_procedure_backfill(agent_id: str = "mcp-client", scope: str | None = None, limit: int = 100, dry_run: bool = False, **_kw: Any) -> dict[str, Any]: + db = _db() + try: + result = procedural.backfill_procedures( + db, + agent_id=agent_id, + scope=scope, + limit=limit, + dry_run=dry_run, + ) + if not dry_run: + db.commit() + return result + finally: + db.close() + + +def tool_procedure_stats(**_kw: Any) -> dict[str, Any]: + db = _db() + try: + return procedural.procedure_stats(db) + finally: + db.close() + + +TOOLS = [ + Tool( + name="procedure_add", + description="Create a canonical structured procedure with ordered steps and provenance.", + inputSchema={ + "type": "object", + "properties": { + "agent_id": {"type": "string"}, + "goal": {"type": "string"}, + "title": {"type": "string"}, + "description": {"type": "string"}, + "procedure_kind": {"type": "string"}, + "task_family": {"type": "string"}, + "scope": {"type": "string", "default": "global"}, + "category": {"type": "string", "default": "convention"}, + "confidence": {"type": "number", "default": 0.9}, + "steps": {"type": "array", "items": {"type": "string"}}, + "trigger_conditions": {"type": "array", "items": {"type": "string"}}, + "preconditions": {"type": "array", "items": {"type": "string"}}, + "tools": {"type": "array", "items": {"type": "string"}}, + "failure_modes": {"type": "array", "items": {"type": "string"}}, + "rollback_steps": {"type": "array", "items": {"type": "string"}}, + "success_criteria": {"type": "array", "items": {"type": "string"}}, + "expected_outcomes": {"type": "array", "items": {"type": "string"}}, + "status": {"type": "string", "default": "active"}, + }, + "required": ["goal"], + }, + ), + Tool( + name="procedure_get", + description="Get a procedure by id.", + inputSchema={"type": "object", "properties": {"procedure_id": {"type": "integer"}}, "required": ["procedure_id"]}, + ), + Tool( + name="procedure_list", + description="List procedures with optional scope/status filters.", + inputSchema={ + "type": "object", + "properties": { + "status": {"type": "string", "default": "all"}, + "scope": {"type": "string"}, + "limit": {"type": "integer", "default": 50}, + }, + }, + ), + Tool( + name="procedure_search", + description="Search structured procedural memories.", + inputSchema={ + "type": "object", + "properties": { + "query": {"type": "string"}, + "limit": {"type": "integer", "default": 10}, + "scope": {"type": "string"}, + "status": {"type": "string", "default": "all"}, + "debug": {"type": "boolean", "default": False}, + }, + "required": ["query"], + }, + ), + Tool( + name="procedure_update", + description="Update a procedure.", + inputSchema={ + "type": "object", + "properties": { + "procedure_id": {"type": "integer"}, + "title": {"type": "string"}, + "goal": {"type": "string"}, + "description": {"type": "string"}, + "procedure_kind": {"type": "string"}, + "task_family": {"type": "string"}, + "status": {"type": "string"}, + "scope": {"type": "string"}, + "steps": {"type": "array", "items": {"type": "string"}}, + "tools": {"type": "array", "items": {"type": "string"}}, + "trigger_conditions": {"type": "array", "items": {"type": "string"}}, + "preconditions": {"type": "array", "items": {"type": "string"}}, + "failure_modes_json": {"type": "array", "items": {"type": "string"}}, + "rollback_steps_json": {"type": "array", "items": {"type": "string"}}, + "success_criteria_json": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["procedure_id"], + }, + ), + Tool( + name="procedure_feedback", + description="Record procedural execution feedback and validation outcome.", + inputSchema={ + "type": "object", + "properties": { + "procedure_id": {"type": "integer"}, + "agent_id": {"type": "string"}, + "success": {"type": "boolean", "default": True}, + "usefulness_score": {"type": "number"}, + "outcome_summary": {"type": "string"}, + "errors_seen": {"type": "string"}, + "validated": {"type": "boolean", "default": False}, + "task_signature": {"type": "string"}, + "input_summary": {"type": "string"}, + }, + "required": ["procedure_id"], + }, + ), + Tool( + name="procedure_backfill", + description="Backfill or synthesize procedures from existing memories, events, and decisions.", + inputSchema={ + "type": "object", + "properties": { + "agent_id": {"type": "string"}, + "scope": {"type": "string"}, + "limit": {"type": "integer", "default": 100}, + "dry_run": {"type": "boolean", "default": False}, + }, + }, + ), + Tool( + name="procedure_stats", + description="Show procedure counts and candidate promotion stats.", + inputSchema={"type": "object", "properties": {}}, + ), +] + + +DISPATCH = { + "procedure_add": tool_procedure_add, + "procedure_get": tool_procedure_get, + "procedure_list": tool_procedure_list, + "procedure_search": tool_procedure_search, + "procedure_update": tool_procedure_update, + "procedure_feedback": tool_procedure_feedback, + "procedure_backfill": tool_procedure_backfill, + "procedure_stats": tool_procedure_stats, +} + diff --git a/src/agentmemory/procedural.py b/src/agentmemory/procedural.py new file mode 100644 index 0000000..6a5b9ca --- /dev/null +++ b/src/agentmemory/procedural.py @@ -0,0 +1,1679 @@ +"""Procedural memory service layer. + +Canonical procedures live in dedicated tables and are bridged back to the +generic ``memories`` table through ``procedures.memory_id`` so the legacy +memory/search surfaces still have a human-readable synopsis row. +""" + +from __future__ import annotations + +import hashlib +import json +import re +import sqlite3 +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any, Iterable, Optional + +PROCEDURE_STATUSES = { + "active", + "candidate", + "stale", + "needs_review", + "superseded", + "retired", +} + +PROCEDURE_KINDS = { + "workflow", + "runbook", + "playbook", + "troubleshooting", + "rollback", + "recipe", + "routine", +} + +_STEP_RE = re.compile(r"^\s*(?:\d+[\).\:-]|[-*•])\s+(?P.+?)\s*$") +_IF_THEN_RE = re.compile(r"\bif\s+(.+?)\s+then\s+(.+)", re.IGNORECASE) +_ROLLBACK_RE = re.compile(r"\b(rollback|roll back|revert|undo)\b", re.IGNORECASE) +_HOW_TO_RE = re.compile(r"^\s*how\s+(?:to|do|does|can|should)\s+", re.IGNORECASE) +_TOOL_RE = re.compile(r"\b(?:run|use|with|via|invoke)\s+([A-Za-z0-9_./:-]+)") +_LIST_SPLIT_RE = re.compile(r"\b(?:first|then|next|after that|finally|lastly)\b", re.IGNORECASE) +_BULLET_RE = re.compile(r"[•*\-]\s+") +_TOKEN_RE = re.compile(r"[a-z0-9_./:-]+") + +_STOPWORDS = { + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "by", + "for", + "from", + "how", + "i", + "if", + "in", + "is", + "it", + "of", + "on", + "or", + "that", + "the", + "then", + "to", + "use", + "using", + "when", + "with", +} + + +@dataclass(slots=True) +class ProcedureRecord: + procedure_id: int + memory_id: int + title: str + goal: str + procedure_kind: str + status: str + + +def now_iso() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _json_dumps(value: Any) -> str: + return json.dumps(value or [], ensure_ascii=True) + + +def _json_loads_list(value: Any) -> list[Any]: + if value in (None, ""): + return [] + if isinstance(value, list): + return value + try: + parsed = json.loads(value) + except Exception: + return [] + return parsed if isinstance(parsed, list) else [] + + +def _json_loads_obj(value: Any) -> dict[str, Any]: + if value in (None, ""): + return {} + if isinstance(value, dict): + return value + try: + parsed = json.loads(value) + except Exception: + return {} + return parsed if isinstance(parsed, dict) else {} + + +def _tokenize(text: str) -> list[str]: + return [ + tok + for tok in _TOKEN_RE.findall((text or "").lower()) + if tok not in _STOPWORDS and len(tok) > 1 + ] + + +def _sentence_split(text: str) -> list[str]: + if not text: + return [] + parts = re.split(r"(?<=[.!?])\s+|\r?\n+", text.strip()) + return [p.strip(" -\t\r\n") for p in parts if p.strip(" -\t\r\n")] + + +def _slugify(text: str) -> str: + slug = re.sub(r"[^a-z0-9]+", "-", (text or "").lower()).strip("-") + return slug[:80] or "procedure" + + +def _procedure_key(title: str, goal: str, scope: str) -> str: + stem = f"{_slugify(title or goal)}:{scope or 'global'}:{goal or title}" + digest = hashlib.sha1(stem.encode("utf-8")).hexdigest()[:10] + return f"{_slugify(title or goal)}-{digest}" + + +def _normalize_step_item(step: Any) -> dict[str, Any]: + if isinstance(step, str): + return {"action": step.strip()} + if isinstance(step, dict): + action = (step.get("action") or step.get("step") or "").strip() + out = { + "action": action, + "rationale": (step.get("rationale") or "").strip() or None, + "tool_name": (step.get("tool_name") or step.get("tool") or "").strip() or None, + "expected_output": (step.get("expected_output") or "").strip() or None, + "stop_condition": (step.get("stop_condition") or "").strip() or None, + "retry_policy": (step.get("retry_policy") or "").strip() or None, + "rollback_hint": (step.get("rollback_hint") or "").strip() or None, + } + return {k: v for k, v in out.items() if v is not None or k == "action"} + return {"action": str(step).strip()} + + +def _normalize_steps(steps: Iterable[Any]) -> list[dict[str, Any]]: + out: list[dict[str, Any]] = [] + for raw in steps: + step = _normalize_step_item(raw) + if step.get("action"): + out.append(step) + return out + + +def _extract_tools(text: str, steps: list[dict[str, Any]]) -> list[str]: + tools: list[str] = [] + for step in steps: + if step.get("tool_name"): + tools.append(step["tool_name"]) + for match in _TOOL_RE.findall(step.get("action") or ""): + tools.append(match) + for match in _TOOL_RE.findall(text or ""): + tools.append(match) + seen: set[str] = set() + deduped: list[str] = [] + for tool in tools: + key = tool.lower() + if key not in seen: + seen.add(key) + deduped.append(tool) + return deduped + + +def _guess_kind(text: str) -> str: + lower = (text or "").lower() + if _ROLLBACK_RE.search(lower): + return "rollback" + if any(word in lower for word in ("troubleshoot", "debug", "fix ", "error", "failure", "incident")): + return "troubleshooting" + if any(word in lower for word in ("playbook", "runbook")): + return "runbook" + if any(word in lower for word in ("routine", "repeat", "recurring")): + return "routine" + if any(word in lower for word in ("recipe", "tool use", "tool-use")): + return "recipe" + return "workflow" + + +def looks_procedural(text: str) -> bool: + if not text or len(text.strip()) < 12: + return False + lowered = text.lower() + if _HOW_TO_RE.search(text): + return True + if _IF_THEN_RE.search(text): + return True + if _ROLLBACK_RE.search(text): + return True + if any(_STEP_RE.match(line) for line in text.splitlines()): + return True + hints = ( + "steps", + "first", + "then", + "finally", + "run ", + "deploy", + "rollback", + "revert", + "restart", + "apply migrations", + "troubleshoot", + "before ", + "after ", + ) + return sum(1 for hint in hints if hint in lowered) >= 2 + + +def parse_procedural_text( + text: str, + *, + title: Optional[str] = None, + goal: Optional[str] = None, + procedure_kind: Optional[str] = None, + scope: str = "global", +) -> dict[str, Any]: + """Deterministically coerce free text into a structured procedure payload.""" + + original = (text or "").strip() + lines = [ln.strip() for ln in original.splitlines() if ln.strip()] + steps: list[dict[str, Any]] = [] + triggers: list[str] = [] + preconditions: list[str] = [] + rollback_steps: list[str] = [] + failure_modes: list[str] = [] + success_criteria: list[str] = [] + + for line in lines: + match = _STEP_RE.match(line) + if match: + body = match.group("step").strip() + steps.append({"action": body}) + if _ROLLBACK_RE.search(body): + rollback_steps.append(body) + if "if " in line.lower(): + m = _IF_THEN_RE.search(line) + if m: + triggers.append(m.group(1).strip()) + steps.append({"action": m.group(2).strip()}) + else: + triggers.append(line) + if any(token in line.lower() for token in ("before ", "requires ", "ensure ", "must ", "need to ")): + preconditions.append(line) + if any(token in line.lower() for token in ("failure", "error", "incident", "stuck", "syntax error")): + failure_modes.append(line) + if any(token in line.lower() for token in ("success", "done when", "healthy", "green", "validated")): + success_criteria.append(line) + + if not steps and original: + split_chunks = [chunk.strip(" .") for chunk in _LIST_SPLIT_RE.split(original) if chunk.strip(" .")] + if len(split_chunks) > 1: + steps = [{"action": chunk} for chunk in split_chunks] + + if not steps and original: + sentences = _sentence_split(original) + if len(sentences) > 1: + steps = [{"action": sentence} for sentence in sentences] + + if not steps and original: + steps = [{"action": original}] + + steps = _normalize_steps(steps) + tools = _extract_tools(original, steps) + kind = procedure_kind or _guess_kind(original) + + if not goal: + for sentence in _sentence_split(original): + cleaned = _HOW_TO_RE.sub("", sentence).strip(" .:-") + if cleaned: + goal = cleaned[0].upper() + cleaned[1:] if len(cleaned) > 1 else cleaned + break + goal = goal or (steps[0]["action"] if steps else "Complete the procedure safely") + + if not title: + title = goal + if len(title) > 96: + title = title[:93].rstrip() + "..." + + expected_outcomes: list[str] = [] + if success_criteria: + expected_outcomes.extend(success_criteria) + elif "deploy" in original.lower(): + expected_outcomes.append("Deployment completes and target environment is healthy.") + elif "rollback" in original.lower(): + expected_outcomes.append("System returns to the last known good state.") + elif "migrat" in original.lower(): + expected_outcomes.append("Schema changes apply cleanly and services remain healthy.") + + if not rollback_steps and kind == "rollback": + rollback_steps = [step["action"] for step in steps] + elif not rollback_steps: + rollback_steps = [line for line in lines if _ROLLBACK_RE.search(line)] + + search_text = compose_search_text( + { + "title": title, + "goal": goal, + "description": original, + "procedure_kind": kind, + "trigger_conditions": triggers, + "preconditions": preconditions, + "steps_json": steps, + "tools_json": tools, + "failure_modes_json": failure_modes, + "rollback_steps_json": rollback_steps, + "success_criteria_json": success_criteria, + "expected_outcomes": expected_outcomes, + "applicability_scope": scope, + } + ) + return { + "title": title, + "goal": goal, + "description": original, + "procedure_kind": kind, + "trigger_conditions": triggers, + "preconditions": preconditions, + "steps_json": steps, + "tools_json": tools, + "failure_modes_json": failure_modes, + "rollback_steps_json": rollback_steps, + "success_criteria_json": success_criteria, + "expected_outcomes": expected_outcomes, + "applicability_scope": scope, + "status": "active", + "automation_ready": 1 if tools else 0, + "determinism": 0.7 if len(steps) > 1 else 0.45, + "constraints_json": [], + "repair_strategies_json": rollback_steps or failure_modes, + "tool_policy_json": tools, + "task_family": kind, + "search_text": search_text, + } + + +def compose_search_text(payload: dict[str, Any]) -> str: + parts: list[str] = [] + for key in ( + "title", + "goal", + "description", + "task_family", + "procedure_kind", + "applicability_scope", + "expected_outcomes", + ): + value = payload.get(key) + if isinstance(value, str): + parts.append(value) + elif isinstance(value, list): + parts.extend(str(v) for v in value) + + for key in ( + "trigger_conditions", + "preconditions", + "tools_json", + "failure_modes_json", + "rollback_steps_json", + "success_criteria_json", + "constraints_json", + "repair_strategies_json", + "tool_policy_json", + ): + values = payload.get(key) + if isinstance(values, list): + parts.extend(str(v) for v in values) + + for step in _normalize_steps(payload.get("steps_json") or []): + parts.extend(str(v) for v in step.values() if v) + + text = " ".join(part for part in parts if part) + return re.sub(r"\s+", " ", text).strip() + + +def compose_synopsis(payload: dict[str, Any]) -> str: + title = payload.get("title") or payload.get("goal") or "Procedure" + goal = payload.get("goal") or title + steps = _normalize_steps(payload.get("steps_json") or []) + lead = f"{title}. Goal: {goal}." + if steps: + preview = " ".join( + f"{idx + 1}. {step['action']}" + for idx, step in enumerate(steps[:4]) + if step.get("action") + ) + lead += f" Steps: {preview}." + rollback = _json_loads_list(payload.get("rollback_steps_json")) + if rollback: + lead += f" Rollback: {rollback[0]}" + if len(rollback) > 1: + lead += f"; then {rollback[1]}" + lead += "." + tools = _json_loads_list(payload.get("tools_json")) + if tools: + lead += f" Tools: {', '.join(str(t) for t in tools[:5])}." + return re.sub(r"\s+", " ", lead).strip() + + +def ensure_procedure_schema(conn: sqlite3.Connection) -> None: + """Best-effort local guard so procedural APIs work on legacy DBs too.""" + + if conn.row_factory is None: + conn.row_factory = sqlite3.Row + + conn.executescript( + """ + CREATE TABLE IF NOT EXISTS procedures ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + memory_id INTEGER NOT NULL UNIQUE REFERENCES memories(id) ON DELETE CASCADE, + procedure_key TEXT UNIQUE, + title TEXT, + goal TEXT NOT NULL, + description TEXT, + task_family TEXT, + procedure_kind TEXT NOT NULL DEFAULT 'workflow', + trigger_conditions TEXT, + preconditions TEXT, + constraints_json TEXT, + steps_json TEXT NOT NULL, + tools_json TEXT, + failure_modes_json TEXT, + rollback_steps_json TEXT, + success_criteria_json TEXT, + repair_strategies_json TEXT, + tool_policy_json TEXT, + expected_outcomes TEXT, + applicability_scope TEXT NOT NULL DEFAULT 'global', + temporal_class TEXT DEFAULT 'durable', + status TEXT NOT NULL DEFAULT 'active', + automation_ready INTEGER NOT NULL DEFAULT 0, + determinism REAL NOT NULL DEFAULT 0.5, + confidence REAL NOT NULL DEFAULT 0.5, + utility_score REAL NOT NULL DEFAULT 0.5, + generality_score REAL NOT NULL DEFAULT 0.5, + support_count INTEGER NOT NULL DEFAULT 0, + execution_count INTEGER NOT NULL DEFAULT 0, + success_count INTEGER NOT NULL DEFAULT 0, + failure_count INTEGER NOT NULL DEFAULT 0, + last_used_at TEXT, + last_executed_at TEXT, + last_validated_at TEXT, + stale_after_days INTEGER NOT NULL DEFAULT 90, + supersedes_procedure_id INTEGER REFERENCES procedures(id), + retired_at TEXT, + search_text TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_procedures_kind ON procedures(procedure_kind); + CREATE INDEX IF NOT EXISTS idx_procedures_status ON procedures(status); + CREATE INDEX IF NOT EXISTS idx_procedures_last_validated ON procedures(last_validated_at); + CREATE INDEX IF NOT EXISTS idx_procedures_execution_count ON procedures(execution_count DESC); + CREATE INDEX IF NOT EXISTS idx_procedures_scope ON procedures(applicability_scope); + CREATE INDEX IF NOT EXISTS idx_procedures_memory_id ON procedures(memory_id); + CREATE INDEX IF NOT EXISTS idx_procedures_supersedes ON procedures(supersedes_procedure_id); + + CREATE TABLE IF NOT EXISTS procedure_steps ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + step_order INTEGER NOT NULL, + action TEXT NOT NULL, + rationale TEXT, + tool_name TEXT, + expected_output TEXT, + stop_condition TEXT, + retry_policy TEXT, + rollback_hint TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_procedure_steps_procedure_order + ON procedure_steps(procedure_id, step_order); + + CREATE TABLE IF NOT EXISTS procedure_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + memory_id INTEGER REFERENCES memories(id) ON DELETE CASCADE, + event_id INTEGER REFERENCES events(id) ON DELETE CASCADE, + decision_id INTEGER REFERENCES decisions(id) ON DELETE CASCADE, + entity_id INTEGER REFERENCES entities(id) ON DELETE CASCADE, + source_role TEXT NOT NULL DEFAULT 'evidence', + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_procedure_sources_procedure ON procedure_sources(procedure_id); + CREATE INDEX IF NOT EXISTS idx_procedure_sources_memory ON procedure_sources(memory_id); + CREATE INDEX IF NOT EXISTS idx_procedure_sources_event ON procedure_sources(event_id); + CREATE INDEX IF NOT EXISTS idx_procedure_sources_decision ON procedure_sources(decision_id); + + CREATE TABLE IF NOT EXISTS procedure_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + procedure_id INTEGER NOT NULL REFERENCES procedures(id) ON DELETE CASCADE, + agent_id TEXT REFERENCES agents(id), + task_family TEXT, + task_signature TEXT, + input_summary TEXT, + outcome_summary TEXT, + success INTEGER NOT NULL DEFAULT 0, + usefulness_score REAL, + errors_seen TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_procedure_runs_procedure_created + ON procedure_runs(procedure_id, created_at DESC); + + CREATE TABLE IF NOT EXISTS procedure_candidates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + candidate_signature TEXT NOT NULL UNIQUE, + task_family TEXT, + normalized_signature TEXT NOT NULL, + support_count INTEGER NOT NULL DEFAULT 0, + evidence_json TEXT, + mean_success REAL NOT NULL DEFAULT 0.0, + promoted_procedure_id INTEGER REFERENCES procedures(id), + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_procedure_candidates_family + ON procedure_candidates(task_family); + CREATE INDEX IF NOT EXISTS idx_procedure_candidates_support + ON procedure_candidates(support_count DESC); + + CREATE VIRTUAL TABLE IF NOT EXISTS procedures_fts USING fts5( + title, + goal, + description, + task_family, + search_text, + content=procedures, + content_rowid=id, + tokenize='porter unicode61' + ); + CREATE TRIGGER IF NOT EXISTS procedures_fts_insert AFTER INSERT ON procedures BEGIN + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); + END; + CREATE TRIGGER IF NOT EXISTS procedures_fts_update AFTER UPDATE ON procedures BEGIN + INSERT INTO procedures_fts( + procedures_fts, rowid, title, goal, description, task_family, search_text + ) + VALUES ( + 'delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text + ); + INSERT INTO procedures_fts(rowid, title, goal, description, task_family, search_text) + VALUES (new.id, new.title, new.goal, new.description, new.task_family, new.search_text); + END; + CREATE TRIGGER IF NOT EXISTS procedures_fts_delete AFTER DELETE ON procedures BEGIN + INSERT INTO procedures_fts( + procedures_fts, rowid, title, goal, description, task_family, search_text + ) + VALUES ( + 'delete', old.id, old.title, old.goal, old.description, old.task_family, old.search_text + ); + END; + """ + ) + + +def _insert_procedure_steps(conn: sqlite3.Connection, procedure_id: int, steps: list[dict[str, Any]]) -> None: + conn.execute("DELETE FROM procedure_steps WHERE procedure_id = ?", (procedure_id,)) + for idx, step in enumerate(steps, start=1): + conn.execute( + """ + INSERT INTO procedure_steps ( + procedure_id, step_order, action, rationale, tool_name, + expected_output, stop_condition, retry_policy, rollback_hint + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + procedure_id, + idx, + step.get("action"), + step.get("rationale"), + step.get("tool_name"), + step.get("expected_output"), + step.get("stop_condition"), + step.get("retry_policy"), + step.get("rollback_hint"), + ), + ) + + +def _link_knowledge_edge( + conn: sqlite3.Connection, + *, + procedure_id: int, + target_table: str, + target_id: int, + relation_type: str, + weight: float = 1.0, + agent_id: Optional[str] = None, +) -> None: + conn.execute( + """ + INSERT OR IGNORE INTO knowledge_edges + (source_table, source_id, target_table, target_id, relation_type, weight, agent_id, created_at) + VALUES ('procedures', ?, ?, ?, ?, ?, ?, ?) + """, + (procedure_id, target_table, target_id, relation_type, weight, agent_id, now_iso()), + ) + + +def create_procedure( + conn: sqlite3.Connection, + *, + agent_id: str, + payload: dict[str, Any], + category: str = "convention", + scope: str = "global", + confidence: float = 0.9, + source_memory_ids: Optional[list[int]] = None, + source_event_ids: Optional[list[int]] = None, + source_decision_ids: Optional[list[int]] = None, + source_entity_ids: Optional[list[int]] = None, + memory_id: Optional[int] = None, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + source_memory_ids = source_memory_ids or [] + source_event_ids = source_event_ids or [] + source_decision_ids = source_decision_ids or [] + source_entity_ids = source_entity_ids or [] + + data = dict(payload) + if not data.get("steps_json"): + data = parse_procedural_text( + data.get("description") or data.get("goal") or "", + title=data.get("title"), + goal=data.get("goal"), + procedure_kind=data.get("procedure_kind"), + scope=scope, + ) + steps = _normalize_steps(data.get("steps_json") or []) + data["steps_json"] = steps or [{"action": data.get("goal") or "Review the procedure"}] + data["trigger_conditions"] = list(data.get("trigger_conditions") or []) + data["preconditions"] = list(data.get("preconditions") or []) + data["tools_json"] = list(data.get("tools_json") or []) + data["failure_modes_json"] = list(data.get("failure_modes_json") or []) + data["rollback_steps_json"] = list(data.get("rollback_steps_json") or []) + data["success_criteria_json"] = list(data.get("success_criteria_json") or []) + data["constraints_json"] = list(data.get("constraints_json") or []) + data["repair_strategies_json"] = list(data.get("repair_strategies_json") or []) + data["tool_policy_json"] = list(data.get("tool_policy_json") or []) + data["expected_outcomes"] = data.get("expected_outcomes") or [] + data["title"] = (data.get("title") or data.get("goal") or "Procedure").strip() + data["goal"] = (data.get("goal") or data["title"]).strip() + data["description"] = (data.get("description") or "").strip() + data["procedure_kind"] = data.get("procedure_kind") or _guess_kind( + " ".join([data["goal"], data["description"]]) + ) + if data["procedure_kind"] not in PROCEDURE_KINDS: + data["procedure_kind"] = "workflow" + data["status"] = data.get("status") or "active" + if data["status"] not in PROCEDURE_STATUSES: + data["status"] = "active" + data["applicability_scope"] = data.get("applicability_scope") or scope or "global" + data["task_family"] = data.get("task_family") or data["procedure_kind"] + data["search_text"] = compose_search_text(data) + synopsis = compose_synopsis(data) + source_refs = { + "memory_ids": source_memory_ids, + "event_ids": source_event_ids, + "decision_ids": source_decision_ids, + "entity_ids": source_entity_ids, + } + + created_at = now_iso() + if memory_id is None: + tags = data.get("tags") + tags_json = _json_dumps(tags) if tags else None + cur = conn.execute( + """ + INSERT INTO memories ( + agent_id, category, scope, content, confidence, tags, memory_type, + derived_from_ids, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, 'procedural', ?, ?, ?) + """, + ( + agent_id, + category, + scope, + synopsis, + confidence, + tags_json, + json.dumps(source_refs, ensure_ascii=True), + created_at, + created_at, + ), + ) + memory_id = int(cur.lastrowid) + else: + exists = conn.execute( + "SELECT id, content, scope FROM memories WHERE id = ?", + (memory_id,), + ).fetchone() + if not exists: + raise ValueError(f"memory_id {memory_id} does not exist") + conn.execute( + """ + UPDATE memories + SET memory_type = 'procedural', + scope = COALESCE(scope, ?), + updated_at = ?, + derived_from_ids = COALESCE(derived_from_ids, ?) + WHERE id = ? + """, + (scope, created_at, json.dumps(source_refs, ensure_ascii=True), memory_id), + ) + maybe_existing = conn.execute( + "SELECT id FROM procedures WHERE memory_id = ?", + (memory_id,), + ).fetchone() + if maybe_existing: + return get_procedure(conn, int(maybe_existing["id"]), include_sources=True) + + proc_key = data.get("procedure_key") or _procedure_key( + data["title"], data["goal"], data["applicability_scope"] + ) + cur = conn.execute( + """ + INSERT INTO procedures ( + memory_id, procedure_key, title, goal, description, task_family, + procedure_kind, trigger_conditions, preconditions, constraints_json, + steps_json, tools_json, failure_modes_json, rollback_steps_json, + success_criteria_json, repair_strategies_json, tool_policy_json, + expected_outcomes, applicability_scope, temporal_class, status, + automation_ready, determinism, confidence, utility_score, + generality_score, support_count, execution_count, success_count, + failure_count, last_used_at, last_executed_at, last_validated_at, + stale_after_days, supersedes_procedure_id, retired_at, search_text, + created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, 0, NULL, NULL, NULL, ?, ?, NULL, ?, ?, ?) + """, + ( + memory_id, + proc_key, + data["title"], + data["goal"], + data["description"], + data["task_family"], + data["procedure_kind"], + _json_dumps(data["trigger_conditions"]), + _json_dumps(data["preconditions"]), + _json_dumps(data["constraints_json"]), + _json_dumps(data["steps_json"]), + _json_dumps(data["tools_json"]), + _json_dumps(data["failure_modes_json"]), + _json_dumps(data["rollback_steps_json"]), + _json_dumps(data["success_criteria_json"]), + _json_dumps(data["repair_strategies_json"]), + _json_dumps(data["tool_policy_json"]), + json.dumps(data["expected_outcomes"], ensure_ascii=True), + data["applicability_scope"], + data.get("temporal_class") or "durable", + data["status"], + int(bool(data.get("automation_ready", 0))), + float(data.get("determinism", 0.5)), + float(data.get("confidence", confidence)), + float(data.get("utility_score", confidence)), + float(data.get("generality_score", 0.5)), + int(data.get("support_count", len(source_memory_ids) + len(source_event_ids) + len(source_decision_ids))), + int(data.get("stale_after_days", 90)), + data.get("supersedes_procedure_id"), + data["search_text"], + created_at, + created_at, + ), + ) + procedure_id = int(cur.lastrowid) + + conn.execute( + "UPDATE memories SET content = ?, updated_at = ? WHERE id = ?", + (synopsis, created_at, memory_id), + ) + + _insert_procedure_steps(conn, procedure_id, steps) + + for mid in source_memory_ids: + conn.execute( + """ + INSERT INTO procedure_sources (procedure_id, memory_id, source_role, created_at) + VALUES (?, ?, 'derived_from_memory', ?) + """, + (procedure_id, mid, created_at), + ) + _link_knowledge_edge( + conn, + procedure_id=procedure_id, + target_table="memories", + target_id=mid, + relation_type="derived_from_memory", + weight=1.0, + agent_id=agent_id, + ) + + for eid in source_event_ids: + conn.execute( + """ + INSERT INTO procedure_sources (procedure_id, event_id, source_role, created_at) + VALUES (?, ?, 'derived_from_event', ?) + """, + (procedure_id, eid, created_at), + ) + rel = "rollback_for" if data["procedure_kind"] == "rollback" else "derived_from_event" + _link_knowledge_edge( + conn, + procedure_id=procedure_id, + target_table="events", + target_id=eid, + relation_type=rel, + weight=0.9, + agent_id=agent_id, + ) + + for did in source_decision_ids: + conn.execute( + """ + INSERT INTO procedure_sources (procedure_id, decision_id, source_role, created_at) + VALUES (?, ?, 'derived_from_decision', ?) + """, + (procedure_id, did, created_at), + ) + _link_knowledge_edge( + conn, + procedure_id=procedure_id, + target_table="decisions", + target_id=did, + relation_type="derived_from_decision", + weight=0.95, + agent_id=agent_id, + ) + + for ent_id in source_entity_ids: + conn.execute( + """ + INSERT INTO procedure_sources (procedure_id, entity_id, source_role, created_at) + VALUES (?, ?, 'applicable_to', ?) + """, + (procedure_id, ent_id, created_at), + ) + _link_knowledge_edge( + conn, + procedure_id=procedure_id, + target_table="entities", + target_id=ent_id, + relation_type="applicable_to", + weight=0.8, + agent_id=agent_id, + ) + + for tool in data["tools_json"]: + conn.execute( + """ + INSERT OR IGNORE INTO knowledge_edges + (source_table, source_id, target_table, target_id, relation_type, weight, agent_id, created_at) + SELECT 'procedures', ?, 'entities', e.id, 'requires_tool', 0.7, ?, ? + FROM entities e + WHERE lower(e.name) = lower(?) + """, + (procedure_id, agent_id, created_at, str(tool)), + ) + + if data.get("supersedes_procedure_id"): + _link_knowledge_edge( + conn, + procedure_id=procedure_id, + target_table="procedures", + target_id=int(data["supersedes_procedure_id"]), + relation_type="supersedes_procedure", + weight=1.0, + agent_id=agent_id, + ) + conn.execute( + "UPDATE procedures SET status = 'superseded', updated_at = ? WHERE id = ?", + (created_at, int(data["supersedes_procedure_id"])), + ) + + return get_procedure(conn, procedure_id, include_sources=True) + + +def ensure_procedure_for_memory( + conn: sqlite3.Connection, + *, + memory_id: int, + agent_id: str, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + existing = conn.execute( + "SELECT id FROM procedures WHERE memory_id = ?", + (memory_id,), + ).fetchone() + if existing: + return get_procedure(conn, int(existing["id"]), include_sources=True) + + row = conn.execute( + "SELECT id, content, category, scope, confidence FROM memories WHERE id = ?", + (memory_id,), + ).fetchone() + if not row: + raise ValueError(f"memory_id {memory_id} not found") + + payload = parse_procedural_text( + row["content"], + scope=row["scope"] or "global", + ) + payload.setdefault("description", row["content"]) + payload.setdefault("confidence", row["confidence"] or 0.6) + payload.setdefault("utility_score", row["confidence"] or 0.6) + payload.setdefault("support_count", 1) + return create_procedure( + conn, + agent_id=agent_id, + payload=payload, + category=row["category"] or "convention", + scope=row["scope"] or "global", + confidence=float(row["confidence"] or 0.8), + source_memory_ids=[memory_id], + memory_id=memory_id, + ) + + +def _procedure_row_to_dict(row: sqlite3.Row) -> dict[str, Any]: + out = dict(row) + for key in ( + "trigger_conditions", + "preconditions", + "constraints_json", + "steps_json", + "tools_json", + "failure_modes_json", + "rollback_steps_json", + "success_criteria_json", + "repair_strategies_json", + "tool_policy_json", + ): + out[key] = _json_loads_list(out.get(key)) + if isinstance(out.get("expected_outcomes"), str) and out["expected_outcomes"].startswith("["): + out["expected_outcomes"] = _json_loads_list(out["expected_outcomes"]) + out["success_rate"] = round( + float(out.get("success_count") or 0) / max(int(out.get("execution_count") or 0), 1), + 4, + ) + return out + + +def get_procedure( + conn: sqlite3.Connection, + procedure_id: int, + *, + include_sources: bool = False, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + row = conn.execute( + """ + SELECT p.*, m.content, m.category, m.scope, m.confidence AS memory_confidence, + m.memory_type, m.created_at AS memory_created_at + FROM procedures p + JOIN memories m ON m.id = p.memory_id + WHERE p.id = ? + """, + (procedure_id,), + ).fetchone() + if not row: + raise ValueError(f"procedure_id {procedure_id} not found") + out = _procedure_row_to_dict(row) + if include_sources: + out["sources"] = [dict(r) for r in conn.execute( + """ + SELECT memory_id, event_id, decision_id, entity_id, source_role, created_at + FROM procedure_sources + WHERE procedure_id = ? + ORDER BY id + """, + (procedure_id,), + ).fetchall()] + out["steps"] = [dict(r) for r in conn.execute( + """ + SELECT step_order, action, rationale, tool_name, expected_output, + stop_condition, retry_policy, rollback_hint + FROM procedure_steps + WHERE procedure_id = ? + ORDER BY step_order + """, + (procedure_id,), + ).fetchall()] + return out + + +def list_procedures( + conn: sqlite3.Connection, + *, + status: Optional[str] = None, + scope: Optional[str] = None, + limit: int = 50, +) -> list[dict[str, Any]]: + ensure_procedure_schema(conn) + clauses = ["1=1"] + params: list[Any] = [] + if status and status != "all": + clauses.append("p.status = ?") + params.append(status) + if scope: + clauses.append("(p.applicability_scope = 'global' OR p.applicability_scope = ?)") + params.append(scope) + params.append(limit) + rows = conn.execute( + f""" + SELECT p.*, m.content, m.category, m.scope, m.confidence AS memory_confidence + FROM procedures p + JOIN memories m ON m.id = p.memory_id + WHERE {' AND '.join(clauses)} + ORDER BY + CASE p.status + WHEN 'active' THEN 0 + WHEN 'candidate' THEN 1 + WHEN 'needs_review' THEN 2 + WHEN 'stale' THEN 3 + WHEN 'superseded' THEN 4 + ELSE 5 + END, + COALESCE(p.last_validated_at, p.updated_at, p.created_at) DESC + LIMIT ? + """, + params, + ).fetchall() + return [_procedure_row_to_dict(row) for row in rows] + + +def _days_old(timestamp: Optional[str]) -> float: + if not timestamp: + return 9999.0 + normalized = str(timestamp).replace("Z", "+00:00") + dt = datetime.fromisoformat(normalized) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return max(0.0, (datetime.now(timezone.utc) - dt).total_seconds() / 86400.0) + + +def _score_procedure( + query: str, + proc: dict[str, Any], + *, + debug: bool = False, +) -> tuple[float, dict[str, float]]: + tokens = set(_tokenize(query)) + phrase = query.lower().strip() + + title_tokens = set(_tokenize(proc.get("title") or "")) + goal_tokens = set(_tokenize(proc.get("goal") or "")) + desc_tokens = set(_tokenize(proc.get("description") or "")) + trigger_tokens = set(_tokenize(" ".join(str(v) for v in proc.get("trigger_conditions", [])))) + pre_tokens = set(_tokenize(" ".join(str(v) for v in proc.get("preconditions", [])))) + tool_tokens = set(_tokenize(" ".join(str(v) for v in proc.get("tools_json", [])))) + step_tokens = set(_tokenize(" ".join(step.get("action", "") for step in proc.get("steps_json", [])))) + failure_tokens = set(_tokenize(" ".join(str(v) for v in proc.get("failure_modes_json", [])))) + rollback_tokens = set(_tokenize(" ".join(str(v) for v in proc.get("rollback_steps_json", [])))) + scope_tokens = set(_tokenize(proc.get("applicability_scope") or "")) + + overlap = lambda bag: len(tokens & bag) / max(len(tokens), 1) + breakdown = { + "goal_match": overlap(goal_tokens | desc_tokens) * 1.4, + "title_match": overlap(title_tokens) * 1.6, + "trigger_match": overlap(trigger_tokens) * 0.9, + "precondition_match": overlap(pre_tokens) * 0.7, + "step_overlap": overlap(step_tokens) * 1.3, + "tool_overlap": overlap(tool_tokens) * 0.9, + "failure_overlap": overlap(failure_tokens) * 0.7, + "rollback_overlap": overlap(rollback_tokens) * 1.1, + "scope_match": overlap(scope_tokens) * 0.4, + "exact_phrase": 1.0 if phrase and phrase in (proc.get("search_text") or "").lower() else 0.0, + } + + status = proc.get("status") or "active" + status_multiplier = { + "active": 1.15, + "candidate": 0.95, + "needs_review": 0.75, + "stale": 0.68, + "superseded": 0.35, + "retired": 0.15, + }.get(status, 1.0) + validation_age = _days_old(proc.get("last_validated_at")) + last_exec_age = _days_old(proc.get("last_executed_at")) + validation_boost = max(0.0, 1.0 - min(validation_age / max(int(proc.get("stale_after_days") or 90), 1), 1.5)) + utility_boost = float(proc.get("utility_score") or 0.5) + confidence_boost = float(proc.get("confidence") or 0.5) + execution_count = int(proc.get("execution_count") or 0) + success_count = int(proc.get("success_count") or 0) + failure_count = int(proc.get("failure_count") or 0) + success_rate = success_count / max(execution_count, 1) + failure_penalty = min(failure_count / max(execution_count, 1), 1.0) + support_bonus = min(int(proc.get("support_count") or 0) / 5.0, 1.0) + freshness = max(0.0, 1.0 - min(last_exec_age / max(int(proc.get("stale_after_days") or 90), 1), 1.5)) + + base = sum(breakdown.values()) + score = ( + base + + validation_boost * 0.8 + + freshness * 0.4 + + success_rate * 0.8 + + support_bonus * 0.5 + + utility_boost * 0.3 + + confidence_boost * 0.4 + - failure_penalty * 0.9 + ) * status_multiplier + directness = ( + breakdown["goal_match"] + + breakdown["title_match"] + + breakdown["trigger_match"] + + breakdown["exact_phrase"] + ) + if directness < 0.6 and breakdown["step_overlap"] > 0: + score *= 0.72 + if debug: + breakdown.update( + { + "validation_boost": round(validation_boost, 4), + "freshness_boost": round(freshness, 4), + "success_rate": round(success_rate, 4), + "support_bonus": round(support_bonus, 4), + "utility_boost": round(utility_boost, 4), + "confidence_boost": round(confidence_boost, 4), + "failure_penalty": round(failure_penalty, 4), + "status_multiplier": round(status_multiplier, 4), + "directness": round(directness, 4), + } + ) + return round(score, 6), breakdown + + +def search_procedures( + conn: sqlite3.Connection, + query: str, + *, + limit: int = 10, + scope: Optional[str] = None, + status: Optional[str] = None, + debug: bool = False, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + search = query.strip() + if not search: + return {"ok": True, "procedures": [], "debug": {"reason": "empty_query"}} + + tokens = _tokenize(search) + fts_query = " OR ".join(tokens) if tokens else re.sub(r"[^\w\s]", " ", search).strip() + clauses = ["1=1"] + params: list[Any] = [] + if status and status != "all": + clauses.append("p.status = ?") + params.append(status) + if scope: + clauses.append("(p.applicability_scope = 'global' OR p.applicability_scope = ?)") + params.append(scope) + + rows: list[sqlite3.Row] + if fts_query: + rows = conn.execute( + f""" + SELECT p.*, m.content, m.category, m.scope, m.confidence AS memory_confidence, + bm25(procedures_fts, 3.0, 2.0, 1.5, 1.0, 2.5) AS fts_rank + FROM procedures_fts + JOIN procedures p ON p.id = procedures_fts.rowid + JOIN memories m ON m.id = p.memory_id + WHERE procedures_fts MATCH ? AND {' AND '.join(clauses)} + ORDER BY bm25(procedures_fts, 3.0, 2.0, 1.5, 1.0, 2.5) + LIMIT ? + """, + [fts_query, *params, max(limit * 4, 12)], + ).fetchall() + else: + rows = [] + + if not rows: + rows = conn.execute( + f""" + SELECT p.*, m.content, m.category, m.scope, m.confidence AS memory_confidence, NULL AS fts_rank + FROM procedures p + JOIN memories m ON m.id = p.memory_id + WHERE {' AND '.join(clauses)} + AND ( + lower(p.goal) LIKE ? OR lower(COALESCE(p.description, '')) LIKE ? + OR lower(p.search_text) LIKE ? OR lower(m.content) LIKE ? + ) + LIMIT ? + """, + [*params, f"%{search.lower()}%", f"%{search.lower()}%", f"%{search.lower()}%", f"%{search.lower()}%", max(limit * 4, 12)], + ).fetchall() + + results: list[dict[str, Any]] = [] + for row in rows: + proc = _procedure_row_to_dict(row) + score, breakdown = _score_procedure(search, proc, debug=debug) + proc["final_score"] = score + proc["fts_rank"] = row["fts_rank"] + proc["type"] = "procedure" + proc["why_retrieved"] = ( + "goal/title match" if breakdown.get("goal_match", 0.0) + breakdown.get("title_match", 0.0) >= 1.0 + else "procedural evidence match" + ) + if debug: + proc["score_breakdown"] = breakdown + results.append(proc) + + results.sort(key=lambda item: item.get("final_score", 0.0), reverse=True) + return { + "ok": True, + "procedures": results[:limit], + "debug": { + "query": search, + "fts_query": fts_query, + "candidate_count": len(results), + }, + } + + +def update_procedure( + conn: sqlite3.Connection, + procedure_id: int, + changes: dict[str, Any], +) -> dict[str, Any]: + ensure_procedure_schema(conn) + current = get_procedure(conn, procedure_id, include_sources=True) + merged = dict(current) + merged.update({k: v for k, v in changes.items() if v is not None}) + merged["steps_json"] = _normalize_steps(merged.get("steps_json") or current.get("steps_json") or []) + merged["search_text"] = compose_search_text(merged) + merged["updated_at"] = now_iso() + + conn.execute( + """ + UPDATE procedures + SET title = ?, goal = ?, description = ?, task_family = ?, procedure_kind = ?, + trigger_conditions = ?, preconditions = ?, constraints_json = ?, steps_json = ?, + tools_json = ?, failure_modes_json = ?, rollback_steps_json = ?, + success_criteria_json = ?, repair_strategies_json = ?, tool_policy_json = ?, + expected_outcomes = ?, applicability_scope = ?, status = ?, automation_ready = ?, + determinism = ?, confidence = ?, utility_score = ?, generality_score = ?, + support_count = ?, stale_after_days = ?, supersedes_procedure_id = ?, + search_text = ?, updated_at = ? + WHERE id = ? + """, + ( + merged.get("title"), + merged.get("goal"), + merged.get("description"), + merged.get("task_family"), + merged.get("procedure_kind"), + _json_dumps(merged.get("trigger_conditions")), + _json_dumps(merged.get("preconditions")), + _json_dumps(merged.get("constraints_json")), + _json_dumps(merged.get("steps_json")), + _json_dumps(merged.get("tools_json")), + _json_dumps(merged.get("failure_modes_json")), + _json_dumps(merged.get("rollback_steps_json")), + _json_dumps(merged.get("success_criteria_json")), + _json_dumps(merged.get("repair_strategies_json")), + _json_dumps(merged.get("tool_policy_json")), + json.dumps(merged.get("expected_outcomes") or [], ensure_ascii=True), + merged.get("applicability_scope"), + merged.get("status"), + int(bool(merged.get("automation_ready", 0))), + float(merged.get("determinism", 0.5)), + float(merged.get("confidence", 0.5)), + float(merged.get("utility_score", 0.5)), + float(merged.get("generality_score", 0.5)), + int(merged.get("support_count", 0)), + int(merged.get("stale_after_days", 90)), + merged.get("supersedes_procedure_id"), + merged["search_text"], + merged["updated_at"], + procedure_id, + ), + ) + _insert_procedure_steps(conn, procedure_id, merged["steps_json"]) + conn.execute( + "UPDATE memories SET content = ?, updated_at = ? WHERE id = ?", + (compose_synopsis(merged), merged["updated_at"], current["memory_id"]), + ) + return get_procedure(conn, procedure_id, include_sources=True) + + +def _recompute_status(proc: dict[str, Any]) -> str: + if proc.get("retired_at"): + return "retired" + if proc.get("status") == "superseded": + return "superseded" + stale_after_days = int(proc.get("stale_after_days") or 90) + last_validated = proc.get("last_validated_at") or proc.get("updated_at") or proc.get("created_at") + if last_validated and _days_old(last_validated) > stale_after_days: + return "stale" + failures = int(proc.get("failure_count") or 0) + successes = int(proc.get("success_count") or 0) + execution_count = int(proc.get("execution_count") or 0) + if execution_count >= 3 and failures >= max(2, successes): + return "needs_review" + return "active" + + +def record_feedback( + conn: sqlite3.Connection, + *, + procedure_id: int, + agent_id: str, + success: bool, + usefulness_score: Optional[float] = None, + outcome_summary: Optional[str] = None, + errors_seen: Optional[str] = None, + validated: bool = False, + task_signature: Optional[str] = None, + input_summary: Optional[str] = None, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + proc = get_procedure(conn, procedure_id, include_sources=False) + now = now_iso() + execution_count = int(proc.get("execution_count") or 0) + 1 + success_count = int(proc.get("success_count") or 0) + (1 if success else 0) + failure_count = int(proc.get("failure_count") or 0) + (0 if success else 1) + utility = usefulness_score if usefulness_score is not None else proc.get("utility_score") or 0.5 + utility = float(max(0.0, min(1.0, utility))) + confidence = float(proc.get("confidence") or 0.5) + confidence = confidence + (0.06 if success else -0.09) + confidence = max(0.05, min(0.99, confidence)) + + conn.execute( + """ + INSERT INTO procedure_runs ( + procedure_id, agent_id, task_family, task_signature, input_summary, + outcome_summary, success, usefulness_score, errors_seen, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + procedure_id, + agent_id, + proc.get("task_family"), + task_signature, + input_summary, + outcome_summary, + 1 if success else 0, + usefulness_score, + errors_seen, + now, + ), + ) + + proc.update( + { + "execution_count": execution_count, + "success_count": success_count, + "failure_count": failure_count, + "last_used_at": now, + "last_executed_at": now, + "last_validated_at": now if validated or success else proc.get("last_validated_at"), + "utility_score": utility, + "confidence": confidence, + } + ) + proc["status"] = _recompute_status(proc) + conn.execute( + """ + UPDATE procedures + SET execution_count = ?, success_count = ?, failure_count = ?, + last_used_at = ?, last_executed_at = ?, last_validated_at = ?, + utility_score = ?, confidence = ?, status = ?, updated_at = ? + WHERE id = ? + """, + ( + execution_count, + success_count, + failure_count, + now, + now, + proc.get("last_validated_at"), + utility, + confidence, + proc["status"], + now, + procedure_id, + ), + ) + + mem = conn.execute( + "SELECT alpha, beta FROM memories WHERE id = ?", + (proc["memory_id"],), + ).fetchone() + alpha = float(mem["alpha"] if mem and mem["alpha"] is not None else 1.0) + beta = float(mem["beta"] if mem and mem["beta"] is not None else 1.0) + if success: + alpha += 1.0 + else: + beta += 1.0 + posterior = alpha / max(alpha + beta, 1e-6) + conn.execute( + """ + UPDATE memories + SET alpha = ?, beta = ?, confidence = ?, updated_at = ? + WHERE id = ? + """, + (alpha, beta, posterior, now, proc["memory_id"]), + ) + + return get_procedure(conn, procedure_id, include_sources=True) + + +def _candidate_signature_from_text(text: str) -> str: + tokens = _tokenize(text)[:8] + if not tokens: + return "" + return " ".join(tokens) + + +def synthesize_procedure_candidates( + conn: sqlite3.Connection, + *, + agent_id: str, + dry_run: bool = False, + min_support: int = 2, + promote_support: int = 3, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + rows = conn.execute( + """ + SELECT id, content, category, scope, confidence + FROM memories + WHERE retired_at IS NULL + AND COALESCE(memory_type, 'episodic') = 'episodic' + AND category IN ('lesson', 'integration', 'decision', 'convention') + ORDER BY created_at DESC + """ + ).fetchall() + grouped: dict[str, list[sqlite3.Row]] = {} + for row in rows: + if not looks_procedural(row["content"]): + continue + signature = _candidate_signature_from_text(row["content"]) + if not signature: + continue + grouped.setdefault(signature, []).append(row) + + stats = { + "scanned": len(rows), + "candidates_updated": 0, + "promoted": 0, + "signatures": [], + } + now = now_iso() + for signature, members in grouped.items(): + if len(members) < min_support: + continue + mean_success = sum(float(row["confidence"] or 0.5) for row in members) / len(members) + evidence = { + "memory_ids": [int(row["id"]) for row in members], + "scope": members[0]["scope"], + "category": members[0]["category"], + } + if not dry_run: + conn.execute( + """ + INSERT INTO procedure_candidates ( + candidate_signature, task_family, normalized_signature, + support_count, evidence_json, mean_success, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(candidate_signature) DO UPDATE SET + support_count = excluded.support_count, + evidence_json = excluded.evidence_json, + mean_success = excluded.mean_success, + updated_at = excluded.updated_at + """, + ( + signature, + _guess_kind(signature), + signature, + len(members), + json.dumps(evidence, ensure_ascii=True), + round(mean_success, 4), + now, + ), + ) + stats["candidates_updated"] += 1 + stats["signatures"].append({"signature": signature, "support": len(members)}) + + should_promote = len(members) >= promote_support or ( + len(members) >= 2 and mean_success >= 0.75 and any(row["category"] in ("decision", "lesson") for row in members) + ) + if should_promote: + payload = parse_procedural_text( + members[0]["content"], + scope=members[0]["scope"] or "global", + ) + payload["support_count"] = len(members) + payload["confidence"] = round(mean_success, 4) + payload["utility_score"] = round(mean_success, 4) + if not dry_run: + proc = create_procedure( + conn, + agent_id=agent_id, + payload=payload, + category=members[0]["category"] or "convention", + scope=members[0]["scope"] or "global", + confidence=round(mean_success, 4), + source_memory_ids=[int(row["id"]) for row in members], + ) + conn.execute( + """ + UPDATE procedure_candidates + SET promoted_procedure_id = ?, updated_at = ? + WHERE candidate_signature = ? + """, + (proc["id"], now, signature), + ) + stats["promoted"] += 1 + return stats + + +def backfill_procedures( + conn: sqlite3.Connection, + *, + agent_id: str, + scope: Optional[str] = None, + limit: int = 100, + dry_run: bool = False, +) -> dict[str, Any]: + ensure_procedure_schema(conn) + clauses = [ + "m.retired_at IS NULL", + "COALESCE(m.memory_type, 'episodic') != 'procedural'", + "m.category IN ('convention', 'lesson', 'integration', 'decision')", + "NOT EXISTS (SELECT 1 FROM procedure_sources ps WHERE ps.memory_id = m.id)", + ] + params: list[Any] = [] + if scope: + clauses.append("(m.scope = ? OR m.scope = 'global')") + params.append(scope) + params.append(limit) + rows = conn.execute( + f""" + SELECT m.id, m.content, m.category, m.scope, m.confidence + FROM memories m + WHERE {' AND '.join(clauses)} + ORDER BY m.created_at DESC + LIMIT ? + """, + params, + ).fetchall() + + stats = { + "ok": True, + "scanned_memories": len(rows), + "created_procedures": 0, + "created_from_decisions": 0, + "created_from_events": 0, + "procedure_ids": [], + } + + for row in rows: + if not looks_procedural(row["content"]): + continue + stats["created_procedures"] += 1 + if dry_run: + continue + proc = ensure_procedure_for_memory(conn, memory_id=int(row["id"]), agent_id=agent_id) + stats["procedure_ids"].append(proc["id"]) + + decision_rows = conn.execute( + """ + SELECT d.id, d.title, d.rationale, d.project + FROM decisions d + WHERE NOT EXISTS ( + SELECT 1 FROM procedure_sources ps WHERE ps.decision_id = d.id + ) + ORDER BY d.created_at DESC + LIMIT ? + """, + (limit,), + ).fetchall() + for row in decision_rows: + combined = f"{row['title']}. {row['rationale']}" + if not looks_procedural(combined): + continue + stats["created_from_decisions"] += 1 + if dry_run: + continue + payload = parse_procedural_text(combined, title=row["title"], scope=f"project:{row['project']}" if row["project"] else "global") + proc = create_procedure( + conn, + agent_id=agent_id, + payload=payload, + category="decision", + scope=f"project:{row['project']}" if row["project"] else "global", + confidence=0.75, + source_decision_ids=[int(row["id"])], + ) + stats["procedure_ids"].append(proc["id"]) + + event_rows = conn.execute( + """ + SELECT e.id, e.summary, COALESCE(e.detail, '') AS detail, e.project + FROM events e + WHERE e.event_type IN ('error', 'warning', 'artifact', 'result') + AND NOT EXISTS ( + SELECT 1 FROM procedure_sources ps WHERE ps.event_id = e.id + ) + ORDER BY e.created_at DESC + LIMIT ? + """, + (limit,), + ).fetchall() + for row in event_rows: + combined = f"{row['summary']} {row['detail']}".strip() + if not looks_procedural(combined): + continue + stats["created_from_events"] += 1 + if dry_run: + continue + payload = parse_procedural_text( + combined, + title=row["summary"], + scope=f"project:{row['project']}" if row["project"] else "global", + ) + proc = create_procedure( + conn, + agent_id=agent_id, + payload=payload, + category="lesson", + scope=f"project:{row['project']}" if row["project"] else "global", + confidence=0.7, + source_event_ids=[int(row["id"])], + ) + stats["procedure_ids"].append(proc["id"]) + + candidate_stats = synthesize_procedure_candidates( + conn, + agent_id=agent_id, + dry_run=dry_run, + ) + stats["candidate_stats"] = candidate_stats + return stats + + +def procedure_stats(conn: sqlite3.Connection) -> dict[str, Any]: + ensure_procedure_schema(conn) + rows = conn.execute( + "SELECT status, COUNT(*) AS cnt FROM procedures GROUP BY status" + ).fetchall() + out = {row["status"]: row["cnt"] for row in rows} + total = sum(out.values()) + candidate_count = conn.execute( + "SELECT COUNT(*) FROM procedure_candidates" + ).fetchone()[0] + return { + "ok": True, + "total": total, + "by_status": out, + "candidates": candidate_count, + } diff --git a/tests/test_cli.py b/tests/test_cli.py index 44e3a62..8665ba3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -83,6 +83,19 @@ def test_add_then_stats_incremented(self, cli_db): data = json.loads(r.stdout) assert data["memories"] >= 1 + def test_add_procedural_memory_creates_procedure(self, cli_db): + r = run_brainctl( + "--agent", "tester", + "memory", "add", + "How to deploy safely: run tests, apply migrations, deploy, then verify health checks.", + "--category", "convention", + "--type", "procedural", + db_path=cli_db, + ) + data = json.loads(r.stdout) + assert data.get("memory_id") + assert data.get("procedure_id") + # ── memory search ─────────────────────────────────────────────────────────── @@ -140,6 +153,66 @@ def test_search_runs(self, cli_db): # Should either succeed with JSON or fail gracefully (no Python traceback) assert r.returncode in (0, 1) + def test_search_includes_procedures_bucket(self, cli_db): + run_brainctl( + "--agent", "tester", + "procedure", "add", + "--goal", "Deploy to staging safely", + "--title", "Staging deploy", + "--description", "Run tests, apply migrations, deploy, verify health checks.", + "--step", "Run tests", + "--step", "Apply migrations", + "--step", "Deploy", + "--step", "Verify health checks", + db_path=cli_db, + ) + r = run_brainctl( + "--agent", "tester", + "search", + "How do I deploy to staging?", + "--tables", "procedures,memories", + db_path=cli_db, + ) + data = json.loads(r.stdout) + assert "procedures" in data + assert data["procedures"] + + +class TestCLIProcedure: + def test_add_get_and_feedback(self, cli_db): + add = run_brainctl( + "--agent", "tester", + "procedure", "add", + "--goal", "Apply migrations", + "--title", "Migration runbook", + "--description", "Inspect pending migrations, run brainctl migrate, restart services.", + "--step", "Inspect pending migrations", + "--step", "Run brainctl migrate", + "--step", "Restart services", + db_path=cli_db, + ) + add_data = json.loads(add.stdout) + proc_id = add_data["id"] + + get_result = run_brainctl( + "--agent", "tester", + "procedure", "get", str(proc_id), + db_path=cli_db, + ) + get_data = json.loads(get_result.stdout) + assert get_data["title"] == "Migration runbook" + + feedback = run_brainctl( + "--agent", "tester", + "procedure", "feedback", str(proc_id), + "--success", + "--validated", + "--usefulness", "0.9", + db_path=cli_db, + ) + feedback_data = json.loads(feedback.stdout) + assert feedback_data["execution_count"] == 1 + # ── cost ──────────────────────────────────────────────────────────────────── diff --git a/tests/test_mcp_tools_procedural.py b/tests/test_mcp_tools_procedural.py new file mode 100644 index 0000000..9546bf1 --- /dev/null +++ b/tests/test_mcp_tools_procedural.py @@ -0,0 +1,82 @@ +"""Tests for procedural MCP tool module.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +SRC = Path(__file__).resolve().parent.parent / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + +from agentmemory.brain import Brain +import agentmemory.mcp_tools_procedural as pt + + +@pytest.fixture(autouse=True) +def _isolate_db(tmp_path, monkeypatch): + db_file = tmp_path / "brain.db" + Brain(db_path=str(db_file), agent_id="test-agent") + monkeypatch.setattr(pt, "DB_PATH", db_file) + return db_file + + +class TestExports: + def test_tools_and_dispatch_exposed(self): + names = {tool.name for tool in pt.TOOLS} + assert "procedure_add" in names + assert "procedure_search" in names + assert "procedure_feedback" in names + assert "procedure_backfill" in names + assert "procedure_stats" in names + assert "procedure_add" in pt.DISPATCH + assert callable(pt.DISPATCH["procedure_add"]) + + +class TestProceduralTools: + def test_add_get_search_feedback_cycle(self): + add = pt.tool_procedure_add( + agent_id="test-agent", + goal="Deploy to staging safely", + title="Staging deploy", + description="Run tests, apply migrations, deploy, verify health checks.", + steps=["Run tests", "Apply migrations", "Deploy", "Verify health checks"], + tools=["pytest", "brainctl", "deployctl"], + ) + assert add["ok"] is True + + fetched = pt.tool_procedure_get(procedure_id=add["id"]) + assert fetched["ok"] is True + assert fetched["title"] == "Staging deploy" + + search = pt.tool_procedure_search(query="How do I deploy to staging?", limit=5) + assert search["ok"] is True + assert search["procedures"] + assert search["procedures"][0]["title"] == "Staging deploy" + + feedback = pt.tool_procedure_feedback( + agent_id="test-agent", + procedure_id=add["id"], + success=True, + usefulness_score=0.8, + validated=True, + ) + assert feedback["ok"] is True + assert feedback["execution_count"] == 1 + + def test_backfill_and_stats(self): + brain = Brain(db_path=str(pt.DB_PATH), agent_id="test-agent") + brain.remember( + "Rollback checklist: first pause deploys, then redeploy the previous release, finally verify health checks.", + category="lesson", + ) + brain.close() + + backfill = pt.tool_procedure_backfill(agent_id="test-agent", limit=20) + stats = pt.tool_procedure_stats() + + assert backfill["ok"] is True + assert stats["ok"] is True + assert stats["total"] >= 1 diff --git a/tests/test_migrate.py b/tests/test_migrate.py index bbbc43a..646b207 100644 --- a/tests/test_migrate.py +++ b/tests/test_migrate.py @@ -142,6 +142,11 @@ def test_sorted_by_version(self): versions = [v for v, _, _ in migrations] assert versions == sorted(versions) + def test_includes_procedural_memory_layer_migration(self): + migrations = migrate._get_migrations() + versions = [v for v, _, _ in migrations] + assert 52 in versions + def test_excludes_non_numbered_files(self): # quantum_schema_migration_sqlite.sql should NOT be included migrations = migrate._get_migrations() diff --git a/tests/test_procedural.py b/tests/test_procedural.py new file mode 100644 index 0000000..61b9fa8 --- /dev/null +++ b/tests/test_procedural.py @@ -0,0 +1,138 @@ +"""Tests for the procedural memory service and Brain API integration.""" + +from __future__ import annotations + +import sqlite3 + + +class TestBrainProcedures: + def test_remember_procedure_creates_bridge_and_structured_row(self, brain): + result = brain.remember_procedure( + goal="Deploy to staging safely", + title="Staging deploy", + description="Run tests, apply migrations, deploy, and verify health checks.", + steps=[ + "Run tests", + "Apply migrations", + "Deploy release", + "Verify health checks", + ], + tools_json=["pytest", "brainctl", "deployctl"], + ) + + conn = sqlite3.connect(str(brain.db_path)) + proc = conn.execute( + "SELECT id, memory_id, title, goal FROM procedures WHERE id = ?", + (result["id"],), + ).fetchone() + memory = conn.execute( + "SELECT memory_type, content FROM memories WHERE id = ?", + (result["memory_id"],), + ).fetchone() + step_count = conn.execute( + "SELECT count(*) FROM procedure_steps WHERE procedure_id = ?", + (result["id"],), + ).fetchone()[0] + conn.close() + + assert proc is not None + assert memory is not None + assert memory[0] == "procedural" + assert "Deploy to staging safely" in memory[1] + assert step_count == 4 + + def test_remember_with_procedural_type_extracts_structure(self, brain): + mid = brain.remember( + "How to roll back a release: first pause deploys, then redeploy the previous version, finally verify health checks.", + category="convention", + memory_type="procedural", + ) + + conn = sqlite3.connect(str(brain.db_path)) + proc = conn.execute( + "SELECT id, goal, procedure_kind FROM procedures WHERE memory_id = ?", + (mid,), + ).fetchone() + steps = conn.execute( + "SELECT action FROM procedure_steps WHERE procedure_id = ? ORDER BY step_order", + (proc[0],), + ).fetchall() + conn.close() + + assert proc is not None + assert proc[2] in {"workflow", "rollback"} + assert len(steps) >= 1 + + def test_search_prefers_active_procedure_over_stale_legacy(self, brain): + brain.remember_procedure( + goal="Deploy to staging safely", + title="Staging deploy", + description="Current runbook for staging deploys.", + steps=["Run tests", "Apply migrations", "Deploy", "Verify health checks"], + status="active", + execution_count=8, + success_count=7, + ) + brain.remember_procedure( + goal="Deploy to staging safely", + title="Legacy staging deploy", + description="Old runbook kept for audit history.", + steps=["Deploy directly", "Run tests later"], + status="stale", + execution_count=2, + success_count=1, + failure_count=1, + ) + + result = brain.search_procedures("How do I deploy to staging?", limit=5) + assert result["procedures"] + assert result["procedures"][0]["status"] == "active" + assert result["procedures"][0]["title"] == "Staging deploy" + + def test_feedback_updates_execution_and_validation(self, brain): + proc = brain.remember_procedure( + goal="Apply migrations", + title="Migration runbook", + description="Run brainctl migrate before restarting services.", + steps=["Inspect pending migrations", "Run brainctl migrate", "Restart the service"], + ) + + feedback = brain.procedure_feedback( + proc["id"], + success=True, + usefulness_score=0.9, + outcome_summary="Migrations applied cleanly", + validated=True, + ) + fetched = brain.get_procedure(proc["id"]) + + assert feedback["id"] == proc["id"] + assert fetched["execution_count"] == 1 + assert fetched["success_count"] == 1 + assert fetched["last_validated_at"] is not None + + def test_backfill_promotes_procedural_free_text(self, brain): + brain.remember( + "Deployment checklist: 1. Run pytest. 2. Apply migrations. 3. Deploy to staging. 4. Verify health checks.", + category="convention", + ) + + result = brain.backfill_procedures(limit=20) + procedures = brain.list_procedures(limit=20) + + assert result["ok"] is True + assert result["created_procedures"] >= 1 + assert any("Deployment checklist" in (proc.get("description") or "") for proc in procedures) + + def test_orient_surfaces_procedures(self, brain): + brain.remember_procedure( + goal="Deploy to staging safely", + title="Staging deploy", + description="Run tests, apply migrations, deploy, verify.", + steps=["Run tests", "Apply migrations", "Deploy", "Verify"], + ) + + snapshot = brain.orient(query="deploy to staging") + + assert "procedures" in snapshot + assert snapshot["procedures"] diff --git a/tests/test_validation.py b/tests/test_validation.py index 0027fb5..0993568 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -63,6 +63,19 @@ def test_valid_memory_accepted(self): result = tool_memory_add(agent_id="test", content="valid memory", category="lesson", force=True) assert result.get("ok") is True + def test_valid_procedural_memory_accepted(self): + _init() + from agentmemory.mcp_server import tool_memory_add + result = tool_memory_add( + agent_id="test", + content="How to deploy safely: run tests, apply migrations, deploy, then verify health checks.", + category="convention", + memory_type="procedural", + force=True, + ) + assert result.get("ok") is True + assert result.get("procedure_id") is not None + class TestEventValidation: def test_invalid_event_type_rejected(self):