diff --git a/docs/hermes-dogfood.md b/docs/hermes-dogfood.md index 9bd61c8..f12f417 100644 --- a/docs/hermes-dogfood.md +++ b/docs/hermes-dogfood.md @@ -161,6 +161,8 @@ Stage C starts with an internal `memory_activations` substrate. Retrieval observ `activations decay-risk-report` is the paired advisory view for weak activation evidence. It scores low repetition, weak strength, stale activity, low connectivity, and lifecycle status risk, but caps risk for approved/frequently activated/connected refs so "old" is not treated as automatically useless. It suggests review/explanation follow-up only; it does not delete traces, deprecate memories, mutate status, or change ranking. +`dogfood decay-collapse-preview` is the G5e read-only bridge from decay-risk scoring into human review. It filters stale/weak evidence candidates by `--min-decay-score`, emits ref-safe `decay_collapse_candidates`, review recommendations, and `collapse_review` guardrails, and keeps `read_only: true`, `mutated: false`, `default_retrieval_unchanged: true`. It does not persist review state, delete/deprecate/collapse memories, auto-approve ordinary conversation, print raw prompts/queries/query previews/sample values, or change retrieval ranking. Any future decay/collapse mutation must be a separate guarded apply policy with backup, audit, explicit approval, actor, reason hash, and rollback. + `consolidation candidates` starts Stage D as a read-only dogfood report over sanitized traces. It groups `experience_traces` with deterministic cluster keys, reports candidate fingerprints, evidence windows, surfaces/scopes, safe summaries, related memory/observation refs, activation/status reinforcement context, guessed memory type, and risk flags. It is an advisory review queue only: no raw prompts/queries/transcripts, no automatic long-term memory creation, no approval, no reject/snooze state yet, and no ranking change. `consolidation explain` expands a single candidate id into a read-only explanation packet for local review. It answers why the candidate was grouped, which safe traces/activations/status signals support it, why the memory type was guessed, and which risk flags or review guardrails apply. Unknown candidate ids produce JSON with `found: false` plus a non-zero exit. The command remains local-only and advisory: it does not promote, approve, reject, snooze, mutate status, delete traces, change ranking, or print raw prompts/queries/transcripts/query previews. diff --git a/src/agent_memory/api/cli.py b/src/agent_memory/api/cli.py index 031b491..2f8e391 100644 --- a/src/agent_memory/api/cli.py +++ b/src/agent_memory/api/cli.py @@ -1926,6 +1926,108 @@ def _dogfood_reinforcement_refinement_preview_payload(args: argparse.Namespace) return payload +def _ref_safe_decay_collapse_candidate(candidate: dict[str, Any]) -> dict[str, Any]: + return { + "memory_ref": candidate["memory_ref"], + "current_status": candidate["current_status"], + "decay_score": candidate["score"], + "activation_count": candidate["activation_count"], + "total_strength": candidate["total_strength"], + "signals": candidate["signals"], + "factor_breakdown": candidate["factor_breakdown"], + "ref_safe_evidence": candidate["ref_safe_evidence"], + "resolution_hint": candidate["resolution_hint"], + "review_support": candidate["review_support"], + "sample_activation_ids": candidate["sample_activation_ids"], + "sample_observation_ids": candidate["sample_observation_ids"], + "activation_window": candidate["activation_window"], + "review_recommendation": { + "decision": "ready_for_decay_collapse_review", + "automation": "human_review_only", + "ordinary_conversation_auto_approval": False, + "default_retrieval_unchanged": True, + "mutation_supported": False, + }, + "collapse_review": { + "candidate_action": "consider_decay_or_collapse_after_review", + "apply_path": "not_supported_by_preview", + "requires_separate_guarded_policy": True, + }, + } + + +def _dogfood_decay_collapse_preview_payload(args: argparse.Namespace) -> dict[str, Any]: + if args.min_decay_score < 0: + raise ValueError("dogfood decay-collapse-preview min-decay-score must be >= 0") + report = _activation_decay_risk_report( + args.db_path, + limit=args.limit, + top=args.top, + frequent_threshold=args.frequent_threshold, + ) + candidates = [ + _ref_safe_decay_collapse_candidate(candidate) + for candidate in report["decay_risk_candidates"] + if candidate["score"] >= args.min_decay_score + ] + blocked_reasons: list[str] = [] + if not candidates: + blocked_reasons.append("no_decay_collapse_candidates_ready") + passed = not blocked_reasons + payload = { + "kind": "dogfood_decay_collapse_preview", + "read_only": True, + "mutated": False, + "default_retrieval_unchanged": True, + "db_path": str(args.db_path), + "activation_count": report["activation_count"], + "negative_evidence": report["negative_evidence"], + "scan": { + "limit": args.limit, + "top": args.top, + "frequent_threshold": args.frequent_threshold, + "min_decay_score": args.min_decay_score, + "quality_warnings": report["quality_warnings"], + }, + "candidate_decomposition": report["candidate_decomposition"], + "candidate_count": len(candidates), + "decay_collapse_candidates": candidates, + "quality_gate": { + "pass": passed, + "decision": ( + "decay_collapse_preview_ready_for_human_review" + if passed + else "continue_decay_collapse_dogfooding_before_review" + ), + "blocked_reasons": blocked_reasons, + }, + "automation_policy": { + "apply_supported": False, + "ordinary_conversation_auto_approval": False, + "requires_human_review": True, + "default_retrieval_policy": "approved_only_unchanged", + "mutation_contract": { + "writes_review_queue": False, + "deprecates_or_deletes_memory": False, + "collapses_memory": False, + "raw_content_allowed": False, + }, + }, + "privacy": { + "raw_conversation_content_included": False, + "sample_values_included": False, + "safe_summaries_included": False, + }, + "suggested_next_steps": [ + "Review stale weak-evidence candidates before any decay/collapse apply corridor.", + "Keep this preview read-only; do not delete, deprecate, or collapse memories from score alone.", + "Use a separate guarded policy with backup/audit/rollback for any future mutation slice.", + ], + } + _write_json_report(args.output, payload) + return payload + + def _decay_risk_scoring_contract() -> dict[str, Any]: return { "max_score": 1.0, @@ -8964,6 +9066,16 @@ def _build_parser() -> argparse.ArgumentParser: dogfood_reinforcement_refinement_preview_parser.add_argument("--limit", type=int, default=200) dogfood_reinforcement_refinement_preview_parser.add_argument("--top", type=int, default=20) dogfood_reinforcement_refinement_preview_parser.add_argument("--frequent-threshold", type=int, default=3) + dogfood_decay_collapse_preview_parser = dogfood_subparsers.add_parser( + "decay-collapse-preview", + help="Build a read-only G5e preview of stale weak-evidence decay/collapse candidates.", + ) + dogfood_decay_collapse_preview_parser.add_argument("db_path", type=Path) + dogfood_decay_collapse_preview_parser.add_argument("--output", type=Path) + dogfood_decay_collapse_preview_parser.add_argument("--limit", type=int, default=200) + dogfood_decay_collapse_preview_parser.add_argument("--top", type=int, default=20) + dogfood_decay_collapse_preview_parser.add_argument("--frequent-threshold", type=int, default=3) + dogfood_decay_collapse_preview_parser.add_argument("--min-decay-score", type=float, default=0.5) dogfood_trace_candidate_persist_parser = dogfood_subparsers.add_parser( "trace-candidate-persist", help="Persist G5 trace-cluster candidates for explicit human review without promoting memories.", @@ -9999,6 +10111,9 @@ def main() -> None: if args.dogfood_action == "reinforcement-refinement-preview": print(json.dumps(_dogfood_reinforcement_refinement_preview_payload(args), indent=2)) return + if args.dogfood_action == "decay-collapse-preview": + print(json.dumps(_dogfood_decay_collapse_preview_payload(args), indent=2)) + return if args.dogfood_action == "trace-candidate-persist": print(json.dumps(_dogfood_trace_candidate_persist_payload(args), indent=2)) return diff --git a/tests/test_cli.py b/tests/test_cli.py index 1db0813..620acec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9215,6 +9215,141 @@ def test_dogfood_reinforcement_refinement_preview_scores_repeated_activation_wit assert "query_preview" not in result.stdout +def test_dogfood_decay_collapse_preview_reports_stale_weak_evidence_without_mutation( + tmp_path: Path, +) -> None: + db_path = tmp_path / "decay-collapse-preview.db" + output_path = tmp_path / "decay-collapse-preview.json" + initialize_database(db_path) + source = ingest_source_text( + db_path=db_path, + source_type="note", + content="G5e decay collapse source text and token=SHOULD_NOT_LEAK must not leak.", + metadata={"project": "g5e-decay"}, + ) + stale_fact = create_candidate_fact( + db_path=db_path, + subject_ref="G5e stale weak evidence", + predicate="needs", + object_ref_or_value="collapse review preview", + evidence_ids=[source.id], + scope="project:g5e-decay", + confidence=0.41, + ) + fresh_fact = create_candidate_fact( + db_path=db_path, + subject_ref="G5e fresh evidence", + predicate="needs", + object_ref_or_value="protection from stale-only cleanup", + evidence_ids=[source.id], + scope="project:g5e-decay", + confidence=0.94, + ) + approve_fact(db_path=db_path, fact_id=fresh_fact.id) + record_retrieval_observation( + db_path, + surface="hermes-pre-llm-hook", + query="SHOULD_NOT_LEAK stale weak collapse query", + preferred_scope="project:g5e-decay", + limit=5, + statuses=("approved", "candidate"), + retrieval_trace=[_fact_trace(stale_fact.id, label="stale weak candidate")], + response_mode="verify_first", + metadata={"query_preview": "token=SHOULD_NOT_LEAK", "session_id": "g5e-stale"}, + ) + for index in range(4): + record_retrieval_observation( + db_path, + surface="cli", + query="SHOULD_NOT_LEAK fresh decay spacing query", + preferred_scope="project:g5e-decay", + limit=5, + statuses=("approved",), + retrieval_trace=[_fact_trace(fresh_fact.id, label="fresh protected target")], + response_mode="verify_first", + metadata={"raw_prompt": "SHOULD_NOT_LEAK", "session_id": f"g5e-fresh-{index}"}, + ) + before_counts = _table_counts( + db_path, + ["experience_traces", "retrieval_observations", "memory_activations", "facts", "relations"], + ) + + env = {**os.environ, "PYTHONPATH": "src"} + result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "dogfood", + "decay-collapse-preview", + str(db_path), + "--limit", + "20", + "--top", + "5", + "--frequent-threshold", + "3", + "--min-decay-score", + "0.5", + "--output", + str(output_path), + ], + cwd=Path(__file__).resolve().parents[1], + env=env, + capture_output=True, + text=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["kind"] == "dogfood_decay_collapse_preview" + assert payload["read_only"] is True + assert payload["mutated"] is False + assert payload["default_retrieval_unchanged"] is True + assert payload["automation_policy"] == { + "apply_supported": False, + "ordinary_conversation_auto_approval": False, + "requires_human_review": True, + "default_retrieval_policy": "approved_only_unchanged", + "mutation_contract": { + "writes_review_queue": False, + "deprecates_or_deletes_memory": False, + "collapses_memory": False, + "raw_content_allowed": False, + }, + } + assert payload["quality_gate"] == { + "pass": True, + "decision": "decay_collapse_preview_ready_for_human_review", + "blocked_reasons": [], + } + assert payload["candidate_count"] == 1 + candidate = payload["decay_collapse_candidates"][0] + assert candidate["memory_ref"] == f"fact:{stale_fact.id}" + assert candidate["current_status"] == "candidate" + assert candidate["decay_score"] >= 0.5 + assert candidate["collapse_review"]["candidate_action"] == "consider_decay_or_collapse_after_review" + assert candidate["collapse_review"]["apply_path"] == "not_supported_by_preview" + assert candidate["collapse_review"]["requires_separate_guarded_policy"] is True + assert candidate["review_recommendation"] == { + "decision": "ready_for_decay_collapse_review", + "automation": "human_review_only", + "ordinary_conversation_auto_approval": False, + "default_retrieval_unchanged": True, + "mutation_supported": False, + } + assert candidate["ref_safe_evidence"]["content_included"] is False + assert output_path.exists() + assert json.loads(output_path.read_text()) == payload + assert _table_counts( + db_path, + ["experience_traces", "retrieval_observations", "memory_activations", "facts", "relations"], + ) == before_counts + assert "SHOULD_NOT_LEAK" not in result.stdout + assert "source text" not in result.stdout + assert "query_preview" not in result.stdout + + def _seed_trace_cluster_for_candidate_flow(db_path: Path) -> int: source = ingest_source_text( db_path=db_path,