diff --git a/README.md b/README.md index 7b24934..ce56587 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,9 @@ AI agents lose context between sessions. This protocol gives them a structured w - **Auto-scaling** — RST files split at 50 entries, transparent to queries - **Git-native** — every memory is an RST directive, fully diffable and versioned - **MCP server** — expose memory as tools for Claude Desktop, VS Code Copilot, and other MCP clients -- **CLI-first** — 12 subcommands for full lifecycle management +- **Autonomous capture** — extract memories from Git commits, CI logs, and discussion transcripts +- **Planning engine** — analyze memory graph and propose maintenance actions +- **CLI-first** — 16+ subcommands for full lifecycle management ## Installation @@ -82,6 +84,12 @@ memory tags [--prefix PREFIX] # Discover tags in use memory stale # Find expired/overdue memories memory review # Show memories needing review memory rebuild # Rebuild needs.json +memory capture git # Extract memories from recent commits +memory capture ci --input # Extract memories from CI/test logs +memory capture discussion --input # Extract from conversation transcripts +memory plan [--auto-apply] # Analyze graph and propose maintenance +memory apply # Execute a generated plan +memory doctor # Verify installation health ``` Key flags for `recall`: @@ -155,6 +163,11 @@ Add to `.vscode/mcp.json`: | `memory_tags` | List all tags with counts | | `memory_stale` | Find expired/overdue memories | | `memory_rebuild` | Rebuild needs.json index | +| `memory_capture_git` | Extract memories from recent Git commits | +| `memory_capture_ci` | Extract memories from CI/test log output | +| `memory_capture_discussion` | Extract memories from conversation transcripts | +| `memory_plan` | Analyze memory graph and propose maintenance actions | +| `memory_apply` | Execute a generated maintenance plan | ## Memory Types @@ -282,8 +295,11 @@ ai_memory_protocol/ └── src/ └── ai_memory_protocol/ ├── __init__.py - ├── cli.py # CLI (argparse, 12 subcommands) - ├── mcp_server.py # MCP server (8 tools, stdio transport) + ├── cli.py # CLI (argparse, 16+ subcommands) + ├── mcp_server.py # MCP server (13 tools, stdio transport) + ├── capture.py # Knowledge extraction (git, CI, discussion) + ├── planner.py # Graph analysis and maintenance planning + ├── executor.py # Plan execution engine ├── config.py # Type definitions, constants ├── engine.py # Workspace detection, search, graph walk ├── formatter.py # Output formatting (brief/compact/context/json) @@ -293,6 +309,68 @@ ai_memory_protocol/ Memory data lives in a **separate workspace** (e.g., `.memories/`), created with `memory init`. +## Autonomous Workflow + +The protocol supports a fully autonomous memory lifecycle — agents can capture, plan, and maintain knowledge without human intervention: + +``` + capture (git / CI / discussion) + │ + ▼ + plan (analyze graph → propose actions) + │ + ▼ + apply (execute plan → add/update/deprecate) + │ + ▼ + rebuild (sphinx-build → needs.json) + │ + ▼ + recall (search updated graph) +``` + +**Capture sources:** +- `memory capture git` — scans recent commits, extracts decisions, bug fixes, refactors +- `memory capture ci --input ` — parses test failures, compiler errors, deprecation warnings +- `memory capture discussion --input ` — classifies conversation into decisions, facts, preferences, risks, goals, questions + +**Planning engine:** +- `memory plan` — analyzes the memory graph for staleness, missing links, contradictions, and proposes maintenance actions +- `memory plan --auto-apply` — execute the plan immediately after analysis +- `memory apply plan.json` — execute a previously saved plan + +All captured candidates include provenance (`--source`) and are deduplicated against existing memories. + +## Build-as-Guardian + +The Sphinx build acts as a quality gate for the memory graph. `needs_warnings` in `conf.py` define constraints that fire during `memory rebuild`: + +```python +needs_warnings = { + "missing_topic_tag": "type in ['mem','dec','fact',...] and not any(t.startswith('topic:') for t in tags)", + "empty_body": "description == '' or description == 'TODO: Add description.'", + "deprecated_without_supersede": "status == 'deprecated' and len(supersedes_back) == 0", +} +``` + +With `sphinx-build -W` (warnings as errors), the build fails if any memory violates these constraints. This means: +- Every memory must have at least one `topic:` tag +- No empty placeholders survive to the index +- Deprecated memories must be superseded by a replacement + +Agents learn to self-correct: if `rebuild` fails, they read the warning, fix the offending memory, and retry. + +## Human Role + +Humans are **observers and editors**, not gatekeepers: + +- **Dashboards** — `memory/dashboards.rst` contains `needtable`, `needlist`, and `needflow` directives rendering the live state of the memory graph as HTML +- **RST editing** — memories are plain RST, editable in any text editor or IDE with full diff/blame in Git +- **Override** — humans can update status, confidence, or tags on any memory via CLI or direct RST edit +- **Review** — `memory review` surfaces memories whose `review_after` date has passed, prompting human validation + +The protocol is designed so that agents maintain knowledge autonomously while humans retain full visibility and override capability. + ## Contributing See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how to contribute. diff --git a/src/ai_memory_protocol/capture.py b/src/ai_memory_protocol/capture.py index 82c7ce4..925ef8c 100644 --- a/src/ai_memory_protocol/capture.py +++ b/src/ai_memory_protocol/capture.py @@ -1,11 +1,15 @@ """Capture knowledge from external sources — git, CI, discussions. -Primary use-case: extract memories from git history so the agent does -not lose context from past development sessions. +Extract memories from: +- **Git history** — commit messages classified by conventional commit type +- **CI logs** — test failures, build errors, warnings +- **Discussions** — decisions, preferences, goals from conversation transcripts Usage: - from ai_memory_protocol.capture import capture_from_git + from ai_memory_protocol.capture import capture_from_git, capture_from_ci, capture_from_discussion candidates = capture_from_git(workspace, repo_path, since="2 weeks ago") + candidates = capture_from_ci(workspace, log_text) + candidates = capture_from_discussion(workspace, transcript) """ from __future__ import annotations @@ -468,3 +472,358 @@ def format_candidates(candidates: list[MemoryCandidate], fmt: str = "human") -> lines.append("") return "\n".join(lines) + + +# =========================================================================== +# CI Log Capture +# =========================================================================== + +# Patterns for extracting structured data from CI logs +_CI_PATTERNS: list[tuple[str, str, str, str]] = [ + # (regex, memory_type, title_template, confidence) + # Test failures + ( + r"(?:FAILED|FAIL|ERROR)\s*:?\s*(?:test_?)?(\S+?)(?:\s*[-—]\s*(.+))?$", + "mem", + "CI test failure: {name}", + "high", + ), + # Python pytest failures + ( + r"(?:FAILED)\s+([\w/]+\.py::[\w:]+)", + "mem", + "Test failure: {name}", + "high", + ), + # Compiler errors (C/C++) + ( + r"(\S+\.\w+):(\d+):\d+:\s*error:\s*(.+)", + "mem", + "Build error in {file}:{line}", + "high", + ), + # Linker errors + ( + r"(?:undefined reference to|cannot find -l)(.+)", + "mem", + "Linker error: {name}", + "high", + ), + # Deprecation warnings + ( + r"(?:DeprecationWarning|FutureWarning):\s*(.+)", + "risk", + "Deprecation warning: {name}", + "medium", + ), + # Timeout errors + ( + r"(?:TimeoutError|timed?\s*out)\s*:?\s*(.+)?", + "mem", + "Timeout: {name}", + "high", + ), + # CMake / build configuration errors + ( + r"CMake Error.*?:\s*(.+)", + "mem", + "CMake error: {name}", + "high", + ), + # Generic error lines + ( + r"^(?:Error|ERROR)\s*:?\s*(.+)", + "mem", + "CI error: {name}", + "medium", + ), +] + +# Summary line patterns: "X passed, Y failed" +_CI_SUMMARY_PATTERN = re.compile( + r"(\d+)\s+(?:passed|succeeded).*?(\d+)\s+(?:failed|errors?)", + re.IGNORECASE, +) + + +@dataclass +class _CIMatch: + """A matched CI pattern with extracted data.""" + + mem_type: str + title: str + detail: str + confidence: str + line_num: int + + +def _parse_ci_log(text: str) -> list[_CIMatch]: + """Parse CI log text and extract structured error/failure data.""" + matches: list[_CIMatch] = [] + seen_titles: set[str] = set() + + for line_num, line in enumerate(text.splitlines(), 1): + line = line.strip() + if not line: + continue + + for pattern, mem_type, title_tpl, confidence in _CI_PATTERNS: + m = re.search(pattern, line, re.IGNORECASE) + if m: + groups = m.groups() + # Build title from template and captured groups + name = (groups[0] or "").strip() if groups else "" + file_val = "" + line_val = "" + if len(groups) >= 3: + file_val = (groups[0] or "").strip() + line_val = (groups[1] or "").strip() + name = (groups[2] or "").strip() + + title = title_tpl.format( + name=name[:80] if name else "unknown", + file=file_val, + line=line_val, + )[:120] + + # Dedup within same log + if title in seen_titles: + break + seen_titles.add(title) + + detail = line[:200] + matches.append( + _CIMatch( + mem_type=mem_type, + title=title, + detail=detail, + confidence=confidence, + line_num=line_num, + ) + ) + break # One match per line + + return matches + + +def capture_from_ci( + workspace: Path, + log_text: str, + source: str = "ci-log", + tags: list[str] | None = None, + deduplicate: bool = True, +) -> list[MemoryCandidate]: + """Extract memory candidates from CI log output. + + Parameters + ---------- + workspace + Path to the memory workspace (for dedup against existing). + log_text + Raw CI log text (stdout/stderr from build or test run). + source + Source label for provenance (e.g. ``"ci:github-actions:run-123"``). + tags + Additional tags to apply to all candidates. Auto-infers ``topic:ci``. + deduplicate + If True, filter out candidates that match existing memories. + + Returns + ------- + list[MemoryCandidate] + Candidate memories ready for review and optional insertion. + """ + base_tags = ["topic:ci"] + if tags: + base_tags.extend(t for t in tags if t not in base_tags) + + matches = _parse_ci_log(log_text) + if not matches: + return [] + + # Load existing for dedup + existing: dict[str, Any] = {} + if deduplicate: + try: + existing = load_needs(workspace) + except (SystemExit, Exception): + existing = {} + + candidates: list[MemoryCandidate] = [] + for match in matches: + candidate = MemoryCandidate( + type=match.mem_type, + title=match.title, + body=f"Line {match.line_num}: {match.detail}", + tags=list(base_tags), + source=source, + confidence=match.confidence, + ) + candidates.append(candidate) + + # Dedup against existing + if deduplicate and existing: + candidates = [c for c in candidates if not _is_duplicate(c, existing)] + + return candidates + + +# =========================================================================== +# Discussion / Transcript Capture +# =========================================================================== + +# Patterns for classifying discussion statements +_DISCUSSION_PATTERNS: list[tuple[str, str, str]] = [ + # Decisions + (r"(?:we\s+)?decided\s+(?:to\s+)?(.+)", "dec", "high"), + (r"(?:the\s+)?decision\s+is\s+(?:to\s+)?(.+)", "dec", "high"), + ( + r"(?:let'?s|we\s+should|we\s+will|we'?ll)\s+(?:go\s+with\s+|use\s+|adopt\s+)(.+)", + "dec", + "medium", + ), + (r"(?:I'?m\s+going\s+with|going\s+with|choosing)\s+(.+)", "dec", "medium"), + # Preferences + (r"I\s+prefer\s+(.+)", "pref", "high"), + (r"(?:let'?s|we\s+should)\s+(?:always|prefer|stick\s+with|keep)\s+(.+)", "pref", "medium"), + (r"(?:convention|standard|style):\s*(.+)", "pref", "medium"), + (r"(?:use|prefer)\s+(\S+)\s+(?:over|instead\s+of)\s+(\S+)", "pref", "medium"), + # Goals + (r"(?:the\s+)?goal\s+(?:is\s+)?(?:to\s+)?(.+)", "goal", "high"), + (r"we\s+(?:need|want|aim|plan)\s+to\s+(.+)", "goal", "medium"), + (r"(?:TODO|FIXME|HACK):\s*(.+)", "goal", "medium"), + (r"next\s+(?:step|priority|milestone):\s*(.+)", "goal", "medium"), + # Facts + (r"(?:it\s+)?turns?\s+out\s+(?:that\s+)?(.+)", "fact", "medium"), + (r"(?:TIL|FYI|note|important):\s*(.+)", "fact", "medium"), + ( + r"(?:the\s+)?(?:API|endpoint|service|server)\s+(?:is|uses|runs|supports)\s+(.+)", + "fact", + "medium", + ), + # Risks + (r"(?:risk|warning|careful|watch\s+out|danger):\s*(.+)", "risk", "high"), + (r"(?:this\s+)?(?:might|could|may)\s+(?:break|fail|cause)\s+(.+)", "risk", "medium"), + # Questions + (r"(?:should\s+we|do\s+we\s+need\s+to|how\s+(?:do|should)\s+we)\s+(.+)\??", "q", "medium"), + (r"(?:open\s+question|TBD|to\s+be\s+decided):\s*(.+)", "q", "medium"), +] + + +# Confidence ranking for tie-breaking +_CONFIDENCE_RANK = {"high": 2, "medium": 1, "low": 0} + + +def _classify_statement(text: str) -> tuple[str, str, str] | None: + """Classify a statement into a memory type. + + Evaluates all matching patterns and returns the highest-confidence + classification. Returns (type, extracted_title, confidence) or None + if no match. + """ + text_stripped = text.strip() + best: tuple[str, str, str] | None = None + best_rank = -1 + for pattern, mem_type, confidence in _DISCUSSION_PATTERNS: + m = re.search(pattern, text_stripped, re.IGNORECASE) + if m: + title = m.group(1).strip() + # Handle special case for "use X over Y" → "Prefer X over Y" + if mem_type == "pref" and len(m.groups()) >= 2: + title = f"{m.group(1)} over {m.group(2)}" + # Clean title + title = re.sub(r"\s+", " ", title) + title = title.rstrip(".") + if len(title) < 5: + continue + rank = _CONFIDENCE_RANK.get(confidence, 0) + if rank > best_rank: + best = (mem_type, title[:120], confidence) + best_rank = rank + return best + + +def capture_from_discussion( + workspace: Path, + transcript: str, + source: str = "discussion", + tags: list[str] | None = None, + deduplicate: bool = True, +) -> list[MemoryCandidate]: + """Extract memory candidates from a discussion transcript. + + Parses free-text conversation and identifies decisions, preferences, + goals, facts, risks, and open questions based on linguistic patterns. + + Parameters + ---------- + workspace + Path to the memory workspace (for dedup against existing). + transcript + Raw text of the discussion/conversation. + source + Source label for provenance (e.g. ``"slack:2026-02-10"``). + tags + Additional tags to apply to all candidates. + deduplicate + If True, filter out candidates that match existing memories. + + Returns + ------- + list[MemoryCandidate] + Candidate memories ready for review and optional insertion. + """ + base_tags = ["topic:discussion"] + if tags: + base_tags.extend(t for t in tags if t not in base_tags) + + # Load existing for dedup + existing: dict[str, Any] = {} + if deduplicate: + try: + existing = load_needs(workspace) + except (SystemExit, Exception): + existing = {} + + candidates: list[MemoryCandidate] = [] + seen_titles: set[str] = set() + + # Process line by line and also try multi-line sentences + lines = transcript.splitlines() + for line in lines: + line = line.strip() + if not line or len(line) < 10: + continue + + # Strip common prefixes: "> quote", "- list", "* list", "User:", timestamps + cleaned = re.sub(r"^(?:[>*\-]\s*|\d{1,2}:\d{2}\s*|[\w]+:\s*)", "", line).strip() + if not cleaned or len(cleaned) < 10: + continue + + result = _classify_statement(cleaned) + if result is None: + continue + + mem_type, title, confidence = result + + # Dedup within same transcript + title_lower = title.lower() + if title_lower in seen_titles: + continue + seen_titles.add(title_lower) + + candidate = MemoryCandidate( + type=mem_type, + title=title, + body=cleaned[:500], + tags=list(base_tags), + source=source, + confidence=confidence, + ) + candidates.append(candidate) + + # Dedup against existing + if deduplicate and existing: + candidates = [c for c in candidates if not _is_duplicate(c, existing)] + + return candidates diff --git a/src/ai_memory_protocol/cli.py b/src/ai_memory_protocol/cli.py index 5e459e7..31b2d2e 100644 --- a/src/ai_memory_protocol/cli.py +++ b/src/ai_memory_protocol/cli.py @@ -27,7 +27,7 @@ from pathlib import Path from . import __version__ -from .capture import capture_from_git, format_candidates +from .capture import capture_from_ci, capture_from_discussion, capture_from_git, format_candidates from .config import TYPE_FILES from .engine import ( expand_graph, @@ -558,12 +558,90 @@ def cmd_capture(args: argparse.Namespace) -> None: if not args.no_rebuild: success, message = run_rebuild(workspace) print(message) + elif args.source == "ci": + log_text = _read_capture_input(args.input) + if log_text is None: + print("Provide CI log via --input or pipe to stdin.") + sys.exit(1) + extra_tags = ( + [t.strip() for t in args.extra_tags.split(",") if t.strip()] + if args.extra_tags + else None + ) + candidates = capture_from_ci( + workspace=workspace, + log_text=log_text, + source=args.source_label or "ci-log", + tags=extra_tags, + ) + print(format_candidates(candidates, fmt=args.format)) + _auto_add_candidates(workspace, candidates, args) + elif args.source == "discussion": + transcript = _read_capture_input(args.input) + if transcript is None: + print("Provide transcript via --input or pipe to stdin.") + sys.exit(1) + extra_tags = ( + [t.strip() for t in args.extra_tags.split(",") if t.strip()] + if args.extra_tags + else None + ) + candidates = capture_from_discussion( + workspace=workspace, + transcript=transcript, + source=args.source_label or "discussion", + tags=extra_tags, + ) + print(format_candidates(candidates, fmt=args.format)) + _auto_add_candidates(workspace, candidates, args) else: print(f"Unknown capture source: {args.source}") - print("Supported sources: git") + print("Supported sources: git, ci, discussion") sys.exit(1) +def _read_capture_input(input_path: str | None) -> str | None: + """Read capture input from file, stdin, or return None.""" + if input_path: + path = Path(input_path) + if path.exists(): + return path.read_text() + print(f"File not found: {input_path}") + return None + if not sys.stdin.isatty(): + return sys.stdin.read() + return None + + +def _auto_add_candidates( + workspace: Path, + candidates: list, + args: argparse.Namespace, +) -> None: + """Add candidates to workspace if --auto-add flag is set.""" + if not getattr(args, "auto_add", False) or not candidates: + return + from .rst import append_to_rst, generate_rst_directive + + count = 0 + for c in candidates: + directive = generate_rst_directive( + mem_type=c.type, + title=c.title, + tags=c.tags, + source=c.source, + confidence=c.confidence, + scope=c.scope, + body=c.body, + ) + append_to_rst(workspace, c.type, directive) + count += 1 + print(f"\nAdded {count} memories to workspace.") + if not getattr(args, "no_rebuild", False): + success, message = run_rebuild(workspace) + print(message) + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -826,16 +904,16 @@ def build_parser() -> argparse.ArgumentParser: p_capture = sub.add_parser("capture", help="Capture memories from external sources") p_capture.add_argument( "source", - choices=["git"], + choices=["git", "ci", "discussion"], help="Capture source type", ) p_capture.add_argument( "--repo", - help="Path to git repository (default: current directory)", + help="Path to git repository (default: current directory, git only)", ) p_capture.add_argument( "--repo-name", - help="Repository name for repo: tags (auto-detected from path if omitted)", + help="Repository name for repo: tags (auto-detected from path if omitted, git only)", ) p_capture.add_argument( "--since", @@ -870,6 +948,18 @@ def build_parser() -> argparse.ArgumentParser: action="store_true", help="Skip rebuild after auto-add", ) + p_capture.add_argument( + "--input", + help="Input file for ci/discussion capture (reads stdin if omitted and not a TTY)", + ) + p_capture.add_argument( + "--source-label", + help="Source provenance label (e.g. 'ci:github-actions:run-123', 'slack:2026-02-10')", + ) + p_capture.add_argument( + "--extra-tags", + help="Extra tags for ci/discussion candidates, comma-separated", + ) p_capture.set_defaults(func=cmd_capture) return parser diff --git a/src/ai_memory_protocol/mcp_server.py b/src/ai_memory_protocol/mcp_server.py index 6c8d07a..4444764 100644 --- a/src/ai_memory_protocol/mcp_server.py +++ b/src/ai_memory_protocol/mcp_server.py @@ -381,6 +381,7 @@ def _build_tools() -> list: "conflicts", "tag_normalize", "split_files", + "auto_summaries", ], }, "description": "Which checks to run. Default: all.", @@ -467,6 +468,83 @@ def _build_tools() -> list: "required": [], }, ), + Tool( + name="memory_capture_ci", + description=( + "Extract memory candidates from CI log output (test failures, build errors, " + "deprecation warnings, timeouts). Parses common CI patterns and generates " + "structured memories. Pass log text directly or reference a log file." + ), + inputSchema={ + "type": "object", + "properties": { + "log_text": { + "type": "string", + "description": "Raw CI log text to parse.", + }, + "source": { + "type": "string", + "default": "ci-log", + "description": "Source label (e.g. 'ci:github-actions:run-123').", + }, + "tags": { + "type": "string", + "description": "Extra tags, comma-separated. topic:ci is auto-added.", + }, + "format": { + "type": "string", + "enum": ["human", "json"], + "default": "human", + "description": "Output format.", + }, + "auto_add": { + "type": "boolean", + "default": False, + "description": "Automatically add candidates to workspace.", + }, + }, + "required": ["log_text"], + }, + ), + Tool( + name="memory_capture_discussion", + description=( + "Extract memory candidates from a discussion or conversation transcript. " + "Identifies decisions, preferences, goals, facts, risks, and open questions " + "from natural language patterns like 'we decided to...', 'I prefer...', " + "'the goal is...', 'should we...'." + ), + inputSchema={ + "type": "object", + "properties": { + "transcript": { + "type": "string", + "description": "Raw text of the discussion/conversation.", + }, + "source": { + "type": "string", + "default": "discussion", + "description": "Source label (e.g. 'slack:2026-02-10', 'meeting:standup').", + }, + "tags": { + "type": "string", + "description": "Extra tags, comma-separated. topic:discussion is auto-added.", + }, + "format": { + "type": "string", + "enum": ["human", "json"], + "default": "human", + "description": "Output format.", + }, + "auto_add": { + "type": "boolean", + "default": False, + "description": "Automatically add candidates to workspace.", + }, + }, + "required": ["transcript"], + }, + ), ] @@ -594,6 +672,10 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]: return _handle_apply(arguments) elif name == "memory_capture_git": return _handle_capture_git(arguments) + elif name == "memory_capture_ci": + return _handle_capture_ci(arguments) + elif name == "memory_capture_discussion": + return _handle_capture_discussion(arguments) else: return _text_response(f"Unknown tool: {name}") except SystemExit as e: @@ -891,6 +973,88 @@ def _handle_capture_git(args: dict[str, Any]) -> list[TextContent]: return _text_response("\n".join(output_lines)) +def _handle_capture_ci(args: dict[str, Any]) -> list[TextContent]: + from .capture import capture_from_ci, format_candidates + from .rst import append_to_rst, generate_rst_directive + + workspace = _get_workspace() + log_text = args.get("log_text", "") + extra_tags = ( + [t.strip() for t in args["tags"].split(",") if t.strip()] if args.get("tags") else None + ) + candidates = capture_from_ci( + workspace=workspace, + log_text=log_text, + source=args.get("source", "ci-log"), + tags=extra_tags, + ) + + output_lines: list[str] = [] + fmt = args.get("format", "human") + output_lines.append(format_candidates(candidates, fmt=fmt)) + + if args.get("auto_add", False) and candidates: + count = 0 + for c in candidates: + directive = generate_rst_directive( + mem_type=c.type, + title=c.title, + tags=c.tags, + source=c.source, + confidence=c.confidence, + scope=c.scope, + body=c.body, + ) + append_to_rst(workspace, c.type, directive) + count += 1 + output_lines.append(f"\nAdded {count} memories to workspace.") + success, msg = run_rebuild(workspace) + output_lines.append(msg) + + return _text_response("\n".join(output_lines)) + + +def _handle_capture_discussion(args: dict[str, Any]) -> list[TextContent]: + from .capture import capture_from_discussion, format_candidates + from .rst import append_to_rst, generate_rst_directive + + workspace = _get_workspace() + transcript = args.get("transcript", "") + extra_tags = ( + [t.strip() for t in args["tags"].split(",") if t.strip()] if args.get("tags") else None + ) + candidates = capture_from_discussion( + workspace=workspace, + transcript=transcript, + source=args.get("source", "discussion"), + tags=extra_tags, + ) + + output_lines: list[str] = [] + fmt = args.get("format", "human") + output_lines.append(format_candidates(candidates, fmt=fmt)) + + if args.get("auto_add", False) and candidates: + count = 0 + for c in candidates: + directive = generate_rst_directive( + mem_type=c.type, + title=c.title, + tags=c.tags, + source=c.source, + confidence=c.confidence, + scope=c.scope, + body=c.body, + ) + append_to_rst(workspace, c.type, directive) + count += 1 + output_lines.append(f"\nAdded {count} memories to workspace.") + success, msg = run_rebuild(workspace) + output_lines.append(msg) + + return _text_response("\n".join(output_lines)) + + # --------------------------------------------------------------------------- # Entry points # --------------------------------------------------------------------------- diff --git a/src/ai_memory_protocol/planner.py b/src/ai_memory_protocol/planner.py index 3a36e76..95ce2f3 100644 --- a/src/ai_memory_protocol/planner.py +++ b/src/ai_memory_protocol/planner.py @@ -14,7 +14,7 @@ from collections import defaultdict from dataclasses import asdict, dataclass, field -from datetime import date +from datetime import date, timedelta from difflib import SequenceMatcher from pathlib import Path from typing import Any, Literal @@ -36,6 +36,7 @@ "conflicts", "tag_normalize", "split_files", + "auto_summaries", ] @@ -307,6 +308,63 @@ def detect_tag_normalization(needs: dict[str, Any]) -> list[Action]: return actions +def detect_auto_summaries( + needs: dict[str, Any], + min_count: int = 5, + min_age_days: int = 60, +) -> list[Action]: + """Find topics with many aged observations that could be consolidated. + + When a single ``topic:`` tag has *min_count* or more ``mem`` entries + all older than *min_age_days*, propose consolidating them into a + single ``fact``. + + O(n) — one pass over active needs, then per-topic grouping. + """ + active = _active_needs(needs) + cutoff = (date.today() - timedelta(days=min_age_days)).isoformat() + + # Group observations by topic tag (use sets to avoid duplicate IDs) + by_topic: dict[str, set[str]] = defaultdict(set) + for nid, need in active.items(): + if need.get("type") != "mem": + continue + created = need.get("created_at", "") + if not created or created > cutoff: + continue + for tag in need.get("tags", []): + if tag.startswith("topic:"): + by_topic[tag].add(nid) + + actions: list[Action] = [] + for topic_tag, ids in by_topic.items(): + if len(ids) < min_count: + continue + topic_value = topic_tag.split(":", 1)[1] + # Collect all tags from the group for the consolidated entry + all_tags: set[str] = set() + for nid in ids: + all_tags.update(active[nid].get("tags", [])) + + sorted_ids = sorted(ids) + actions.append( + Action( + kind="SUPERSEDE", + reason=( + f"Auto-summary: {len(ids)} observations on {topic_tag} " + f"older than {min_age_days} days. Consider consolidating " + f"into a single fact." + ), + old_id=",".join(sorted_ids), + new_type="fact", + new_title=f"Consolidated: {topic_value} observations", + new_tags=sorted(all_tags), + ) + ) + + return actions + + def detect_split_files(workspace: Path) -> list[Action]: """Find RST files that exceed MAX_ENTRIES_PER_FILE. @@ -343,6 +401,7 @@ def detect_split_files(workspace: Path) -> list[Action]: "conflicts": lambda needs, ws: detect_conflicts(needs), "tag_normalize": lambda needs, ws: detect_tag_normalization(needs), "split_files": lambda needs, ws: detect_split_files(ws), + "auto_summaries": lambda needs, ws: detect_auto_summaries(needs), } diff --git a/tests/test_capture.py b/tests/test_capture.py index 02b7003..03750d1 100644 --- a/tests/test_capture.py +++ b/tests/test_capture.py @@ -10,12 +10,16 @@ from ai_memory_protocol.capture import ( MemoryCandidate, _classify_commit, + _classify_statement, _extract_scope, _file_overlap, _GitCommit, _group_commits, _infer_tags, _is_duplicate, + _parse_ci_log, + capture_from_ci, + capture_from_discussion, capture_from_git, format_candidates, ) @@ -434,3 +438,307 @@ def test_empty_fields_omitted(self): d = c.to_dict() assert "body" not in d assert "source" not in d + + +# =========================================================================== +# Tests: CI Log Capture +# =========================================================================== + + +class TestParseCILog: + def test_test_failure(self): + log = "FAILED: test_gateway_health\nSome other output" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert matches[0].mem_type == "mem" + assert "gateway_health" in matches[0].title + + def test_pytest_failure(self): + log = "FAILED tests/test_api.py::TestHealth::test_endpoint" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert "test_api" in matches[0].title or "TestHealth" in matches[0].title + + def test_compiler_error(self): + log = "src/server.cpp:42:10: error: use of undeclared identifier 'foo'" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert matches[0].confidence == "high" + assert "server.cpp" in matches[0].title + + def test_deprecation_warning(self): + log = "DeprecationWarning: pkg_resources is deprecated" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert matches[0].mem_type == "risk" + + def test_timeout_error(self): + log = "TimeoutError: connection timed out after 30 seconds" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert "timeout" in matches[0].title.lower() + + def test_cmake_error(self): + log = "CMake Error at CMakeLists.txt:15: Could not find dependency XYZ" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + assert "cmake" in matches[0].title.lower() or "CMake" in matches[0].detail + + def test_generic_error(self): + log = "Error: file not found: config.yaml" + matches = _parse_ci_log(log) + assert len(matches) >= 1 + + def test_empty_log(self): + matches = _parse_ci_log("") + assert matches == [] + + def test_clean_log_no_matches(self): + log = "Building project...\nCompilation successful.\nAll tests passed." + matches = _parse_ci_log(log) + assert matches == [] + + def test_dedup_within_log(self): + log = "FAILED: test_foo\nFAILED: test_foo\nFAILED: test_bar" + matches = _parse_ci_log(log) + titles = [m.title for m in matches] + # Should not have duplicate titles + assert len(titles) == len(set(titles)) + + +class TestCaptureFromCI: + def test_basic_capture(self, tmp_workspace): + log = "FAILED: test_health_check\nError: connection refused" + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_ci( + workspace=tmp_workspace, + log_text=log, + source="ci:test-run-123", + ) + assert len(candidates) >= 1 + assert all("topic:ci" in c.tags for c in candidates) + assert candidates[0].source == "ci:test-run-123" + + def test_extra_tags(self, tmp_workspace): + log = "FAILED: test_api" + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_ci( + workspace=tmp_workspace, + log_text=log, + tags=["repo:backend", "topic:api"], + ) + assert len(candidates) >= 1 + assert "repo:backend" in candidates[0].tags + assert "topic:ci" in candidates[0].tags + + def test_empty_log_returns_empty(self, tmp_workspace): + candidates = capture_from_ci( + workspace=tmp_workspace, + log_text="All tests passed. Build successful.", + ) + assert candidates == [] + + def test_dedup_against_existing(self, tmp_workspace): + log = "FAILED: test_health_check" + existing = { + "MEM_x": { + "title": "CI test failure: test_health_check", + "status": "active", + "source": "", + }, + } + with patch("ai_memory_protocol.capture.load_needs", return_value=existing): + candidates = capture_from_ci( + workspace=tmp_workspace, + log_text=log, + deduplicate=True, + ) + assert len(candidates) == 0 + + +# =========================================================================== +# Tests: Discussion Capture +# =========================================================================== + + +class TestClassifyStatement: + def test_decision(self): + result = _classify_statement("We decided to use FastAPI for the backend") + assert result is not None + mem_type, title, confidence = result + assert mem_type == "dec" + assert "FastAPI" in title + + def test_lets_go_with(self): + result = _classify_statement("Let's go with PostgreSQL for storage") + assert result is not None + assert result[0] == "dec" + + def test_preference(self): + result = _classify_statement("I prefer TypeScript over JavaScript") + assert result is not None + assert result[0] == "pref" + + def test_convention(self): + result = _classify_statement("Convention: all API responses use camelCase") + assert result is not None + assert result[0] == "pref" + + def test_goal(self): + result = _classify_statement("The goal is to have 80% test coverage") + assert result is not None + assert result[0] == "goal" + + def test_we_need_to(self): + result = _classify_statement("We need to optimize the database queries") + assert result is not None + assert result[0] == "goal" + + def test_todo(self): + result = _classify_statement("TODO: add retry logic for failed requests") + assert result is not None + assert result[0] == "goal" + + def test_fact_turns_out(self): + result = _classify_statement("It turns out the API uses OAuth2 internally") + assert result is not None + assert result[0] == "fact" + + def test_fact_til(self): + result = _classify_statement("TIL: Sphinx-Needs supports needextend directives") + assert result is not None + assert result[0] == "fact" + + def test_risk(self): + result = _classify_statement("Warning: this might break backward compatibility") + assert result is not None + assert result[0] == "risk" + + def test_could_break(self): + result = _classify_statement("This could break the CI pipeline if merged") + assert result is not None + assert result[0] == "risk" + + def test_question(self): + result = _classify_statement("Should we use Redis for caching?") + assert result is not None + assert result[0] == "q" + + def test_open_question(self): + result = _classify_statement("Open question: how do we handle rate limiting?") + assert result is not None + assert result[0] == "q" + + def test_no_match(self): + result = _classify_statement("The weather is nice today") + assert result is None + + def test_too_short(self): + result = _classify_statement("Decided ok") + assert result is None # Title too short after extraction + + +class TestCaptureFromDiscussion: + def test_basic_capture(self, tmp_workspace): + transcript = """ + We decided to use ROS 2 Jazzy for the gateway. + I prefer async/await over callbacks for all new code. + The goal is to have all endpoints documented by March. + Should we support gRPC in addition to REST? + """ + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + source="meeting:standup", + ) + assert len(candidates) >= 3 + types = {c.type for c in candidates} + assert "dec" in types + assert "pref" in types or "goal" in types + + def test_tags_applied(self, tmp_workspace): + transcript = "We decided to deploy on Kubernetes for production" + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + tags=["repo:infra"], + ) + assert len(candidates) >= 1 + assert "topic:discussion" in candidates[0].tags + assert "repo:infra" in candidates[0].tags + + def test_source_label(self, tmp_workspace): + transcript = "The goal is to launch by Q3 2026" + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + source="slack:2026-02-10", + ) + assert len(candidates) >= 1 + assert candidates[0].source == "slack:2026-02-10" + + def test_empty_transcript(self, tmp_workspace): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript="", + ) + assert candidates == [] + + def test_irrelevant_transcript(self, tmp_workspace): + transcript = """ + Good morning everyone. + How was your weekend? + Fine thanks. + """ + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + ) + assert candidates == [] + + def test_dedup_within_transcript(self, tmp_workspace): + transcript = """ + We decided to use PostgreSQL for storage. + As I said, we decided to use PostgreSQL for storage. + """ + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + ) + # Should deduplicate within the same transcript + titles = [c.title.lower() for c in candidates] + assert len(titles) == len(set(titles)) + + def test_strips_prefixes(self, tmp_workspace): + transcript = """ + > We decided to adopt trunk-based development + - TODO: set up branch protection rules + 12:30 I prefer small PRs over large ones + """ + with patch("ai_memory_protocol.capture.load_needs", return_value={}): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + ) + assert len(candidates) >= 2 + + def test_dedup_against_existing(self, tmp_workspace): + transcript = "We decided to use FastAPI for the backend" + existing = { + "DEC_x": { + "title": "use FastAPI for the backend", + "status": "active", + "source": "", + }, + } + with patch("ai_memory_protocol.capture.load_needs", return_value=existing): + candidates = capture_from_discussion( + workspace=tmp_workspace, + transcript=transcript, + deduplicate=True, + ) + assert len(candidates) == 0 diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index bac271a..851ce5e 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -33,7 +33,7 @@ def test_create_server(self) -> None: class TestMCPToolDefinitions: def test_tool_count(self) -> None: - assert len(TOOLS) >= 8 + assert len(TOOLS) >= 13 def test_all_tools_have_schemas(self) -> None: for tool in TOOLS: @@ -54,6 +54,8 @@ def test_required_tools_present(self) -> None: "memory_plan", "memory_apply", "memory_capture_git", + "memory_capture_ci", + "memory_capture_discussion", ]: assert expected in names, f"Missing tool: {expected}" diff --git a/tests/test_planner.py b/tests/test_planner.py index 7052fff..092aa6c 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -9,6 +9,7 @@ from ai_memory_protocol.planner import ( Action, + detect_auto_summaries, detect_conflicts, detect_duplicates, detect_missing_tags, @@ -326,6 +327,135 @@ def test_action_type_is_retag(self, needs_with_tag_issues): assert all(a.kind == "RETAG" for a in actions) +# --------------------------------------------------------------------------- +# Tests: detect_auto_summaries +# --------------------------------------------------------------------------- + + +class TestDetectAutoSummaries: + @pytest.fixture + def needs_many_old_observations(self) -> dict: + """Six old observations on the same topic.""" + old_date = (date.today() - timedelta(days=90)).isoformat() + return { + f"MEM_obs_{i}": { + "id": f"MEM_obs_{i}", + "type": "mem", + "title": f"Observation {i} about gateway", + "status": "active", + "tags": ["topic:gateway", "repo:ros2_medkit"], + "confidence": "medium", + "created_at": old_date, + } + for i in range(6) + } + + def test_finds_consolidation_candidates(self, needs_many_old_observations): + actions = detect_auto_summaries(needs_many_old_observations) + assert len(actions) == 1 + assert actions[0].kind == "SUPERSEDE" + assert actions[0].new_type == "fact" + assert "gateway" in actions[0].new_title.lower() + + def test_includes_all_ids(self, needs_many_old_observations): + actions = detect_auto_summaries(needs_many_old_observations) + old_ids = actions[0].old_id.split(",") + assert len(old_ids) == 6 + + def test_collects_tags_from_group(self, needs_many_old_observations): + actions = detect_auto_summaries(needs_many_old_observations) + assert "topic:gateway" in actions[0].new_tags + assert "repo:ros2_medkit" in actions[0].new_tags + + def test_skips_below_threshold(self): + """Fewer than min_count entries should not trigger.""" + old_date = (date.today() - timedelta(days=90)).isoformat() + needs = { + f"MEM_obs_{i}": { + "id": f"MEM_obs_{i}", + "type": "mem", + "title": f"Observation {i}", + "status": "active", + "tags": ["topic:gateway"], + "created_at": old_date, + } + for i in range(4) # Only 4, below default min_count=5 + } + actions = detect_auto_summaries(needs) + assert len(actions) == 0 + + def test_skips_recent_observations(self): + """Recent observations should not be consolidated.""" + recent_date = (date.today() - timedelta(days=10)).isoformat() + needs = { + f"MEM_obs_{i}": { + "id": f"MEM_obs_{i}", + "type": "mem", + "title": f"Observation {i}", + "status": "active", + "tags": ["topic:gateway"], + "created_at": recent_date, + } + for i in range(6) + } + actions = detect_auto_summaries(needs) + assert len(actions) == 0 + + def test_skips_non_mem_types(self): + """Only 'mem' type entries should be considered.""" + old_date = (date.today() - timedelta(days=90)).isoformat() + needs = { + f"FACT_obs_{i}": { + "id": f"FACT_obs_{i}", + "type": "fact", + "title": f"Fact {i}", + "status": "active", + "tags": ["topic:gateway"], + "created_at": old_date, + } + for i in range(6) + } + actions = detect_auto_summaries(needs) + assert len(actions) == 0 + + def test_skips_deprecated(self): + """Deprecated observations should be excluded.""" + old_date = (date.today() - timedelta(days=90)).isoformat() + needs = { + f"MEM_obs_{i}": { + "id": f"MEM_obs_{i}", + "type": "mem", + "title": f"Observation {i}", + "status": "deprecated", + "tags": ["topic:gateway"], + "created_at": old_date, + } + for i in range(6) + } + actions = detect_auto_summaries(needs) + assert len(actions) == 0 + + def test_custom_thresholds(self): + """Custom min_count and min_age_days.""" + old_date = (date.today() - timedelta(days=30)).isoformat() + needs = { + f"MEM_obs_{i}": { + "id": f"MEM_obs_{i}", + "type": "mem", + "title": f"Observation {i}", + "status": "active", + "tags": ["topic:gateway"], + "created_at": old_date, + } + for i in range(3) + } + # Default thresholds: should not trigger + assert len(detect_auto_summaries(needs)) == 0 + # Lower thresholds: should trigger + actions = detect_auto_summaries(needs, min_count=3, min_age_days=20) + assert len(actions) == 1 + + # --------------------------------------------------------------------------- # Tests: detect_split_files # ---------------------------------------------------------------------------