Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/hermes-dogfood.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ Stage C starts with an internal `memory_activations` substrate. Retrieval observ

`activations decay-risk-report` is the paired advisory view for weak activation evidence. It scores low repetition, weak strength, stale activity, low connectivity, and lifecycle status risk, but caps risk for approved/frequently activated/connected refs so "old" is not treated as automatically useless. It suggests review/explanation follow-up only; it does not delete traces, deprecate memories, mutate status, or change ranking.

`dogfood decay-collapse-preview` is the G5e read-only bridge from decay-risk scoring into human review. It filters stale/weak evidence candidates by `--min-decay-score`, emits ref-safe `decay_collapse_candidates`, review recommendations, and `collapse_review` guardrails, and keeps `read_only: true`, `mutated: false`, `default_retrieval_unchanged: true`. It does not persist review state, delete/deprecate/collapse memories, auto-approve ordinary conversation, print raw prompts/queries/query previews/sample values, or change retrieval ranking. Any future decay/collapse mutation must be a separate guarded apply policy with backup, audit, explicit approval, actor, reason hash, and rollback.

`consolidation candidates` starts Stage D as a read-only dogfood report over sanitized traces. It groups `experience_traces` with deterministic cluster keys, reports candidate fingerprints, evidence windows, surfaces/scopes, safe summaries, related memory/observation refs, activation/status reinforcement context, guessed memory type, and risk flags. It is an advisory review queue only: no raw prompts/queries/transcripts, no automatic long-term memory creation, no approval, no reject/snooze state yet, and no ranking change.

`consolidation explain` expands a single candidate id into a read-only explanation packet for local review. It answers why the candidate was grouped, which safe traces/activations/status signals support it, why the memory type was guessed, and which risk flags or review guardrails apply. Unknown candidate ids produce JSON with `found: false` plus a non-zero exit. The command remains local-only and advisory: it does not promote, approve, reject, snooze, mutate status, delete traces, change ranking, or print raw prompts/queries/transcripts/query previews.
Expand Down
115 changes: 115 additions & 0 deletions src/agent_memory/api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1926,6 +1926,108 @@ def _dogfood_reinforcement_refinement_preview_payload(args: argparse.Namespace)
return payload


def _ref_safe_decay_collapse_candidate(candidate: dict[str, Any]) -> dict[str, Any]:
return {
"memory_ref": candidate["memory_ref"],
"current_status": candidate["current_status"],
"decay_score": candidate["score"],
"activation_count": candidate["activation_count"],
"total_strength": candidate["total_strength"],
"signals": candidate["signals"],
"factor_breakdown": candidate["factor_breakdown"],
"ref_safe_evidence": candidate["ref_safe_evidence"],
"resolution_hint": candidate["resolution_hint"],
"review_support": candidate["review_support"],
"sample_activation_ids": candidate["sample_activation_ids"],
"sample_observation_ids": candidate["sample_observation_ids"],
"activation_window": candidate["activation_window"],
"review_recommendation": {
"decision": "ready_for_decay_collapse_review",
"automation": "human_review_only",
"ordinary_conversation_auto_approval": False,
"default_retrieval_unchanged": True,
"mutation_supported": False,
},
"collapse_review": {
"candidate_action": "consider_decay_or_collapse_after_review",
"apply_path": "not_supported_by_preview",
"requires_separate_guarded_policy": True,
},
}


def _dogfood_decay_collapse_preview_payload(args: argparse.Namespace) -> dict[str, Any]:
if args.min_decay_score < 0:
raise ValueError("dogfood decay-collapse-preview min-decay-score must be >= 0")
report = _activation_decay_risk_report(
args.db_path,
limit=args.limit,
top=args.top,
frequent_threshold=args.frequent_threshold,
)
candidates = [
_ref_safe_decay_collapse_candidate(candidate)
for candidate in report["decay_risk_candidates"]
if candidate["score"] >= args.min_decay_score
]
blocked_reasons: list[str] = []
if not candidates:
blocked_reasons.append("no_decay_collapse_candidates_ready")
passed = not blocked_reasons
payload = {
"kind": "dogfood_decay_collapse_preview",
"read_only": True,
"mutated": False,
"default_retrieval_unchanged": True,
"db_path": str(args.db_path),
"activation_count": report["activation_count"],
"negative_evidence": report["negative_evidence"],
"scan": {
"limit": args.limit,
"top": args.top,
"frequent_threshold": args.frequent_threshold,
"min_decay_score": args.min_decay_score,
"quality_warnings": report["quality_warnings"],
},
"candidate_decomposition": report["candidate_decomposition"],
"candidate_count": len(candidates),
"decay_collapse_candidates": candidates,
"quality_gate": {
"pass": passed,
"decision": (
"decay_collapse_preview_ready_for_human_review"
if passed
else "continue_decay_collapse_dogfooding_before_review"
),
"blocked_reasons": blocked_reasons,
},
"automation_policy": {
"apply_supported": False,
"ordinary_conversation_auto_approval": False,
"requires_human_review": True,
"default_retrieval_policy": "approved_only_unchanged",
"mutation_contract": {
"writes_review_queue": False,
"deprecates_or_deletes_memory": False,
"collapses_memory": False,
"raw_content_allowed": False,
},
},
"privacy": {
"raw_conversation_content_included": False,
"sample_values_included": False,
"safe_summaries_included": False,
},
"suggested_next_steps": [
"Review stale weak-evidence candidates before any decay/collapse apply corridor.",
"Keep this preview read-only; do not delete, deprecate, or collapse memories from score alone.",
"Use a separate guarded policy with backup/audit/rollback for any future mutation slice.",
],
}
_write_json_report(args.output, payload)
return payload


def _decay_risk_scoring_contract() -> dict[str, Any]:
return {
"max_score": 1.0,
Expand Down Expand Up @@ -8964,6 +9066,16 @@ def _build_parser() -> argparse.ArgumentParser:
dogfood_reinforcement_refinement_preview_parser.add_argument("--limit", type=int, default=200)
dogfood_reinforcement_refinement_preview_parser.add_argument("--top", type=int, default=20)
dogfood_reinforcement_refinement_preview_parser.add_argument("--frequent-threshold", type=int, default=3)
dogfood_decay_collapse_preview_parser = dogfood_subparsers.add_parser(
"decay-collapse-preview",
help="Build a read-only G5e preview of stale weak-evidence decay/collapse candidates.",
)
dogfood_decay_collapse_preview_parser.add_argument("db_path", type=Path)
dogfood_decay_collapse_preview_parser.add_argument("--output", type=Path)
dogfood_decay_collapse_preview_parser.add_argument("--limit", type=int, default=200)
dogfood_decay_collapse_preview_parser.add_argument("--top", type=int, default=20)
dogfood_decay_collapse_preview_parser.add_argument("--frequent-threshold", type=int, default=3)
dogfood_decay_collapse_preview_parser.add_argument("--min-decay-score", type=float, default=0.5)
dogfood_trace_candidate_persist_parser = dogfood_subparsers.add_parser(
"trace-candidate-persist",
help="Persist G5 trace-cluster candidates for explicit human review without promoting memories.",
Expand Down Expand Up @@ -9999,6 +10111,9 @@ def main() -> None:
if args.dogfood_action == "reinforcement-refinement-preview":
print(json.dumps(_dogfood_reinforcement_refinement_preview_payload(args), indent=2))
return
if args.dogfood_action == "decay-collapse-preview":
print(json.dumps(_dogfood_decay_collapse_preview_payload(args), indent=2))
return
if args.dogfood_action == "trace-candidate-persist":
print(json.dumps(_dogfood_trace_candidate_persist_payload(args), indent=2))
return
Expand Down
135 changes: 135 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9215,6 +9215,141 @@ def test_dogfood_reinforcement_refinement_preview_scores_repeated_activation_wit
assert "query_preview" not in result.stdout


def test_dogfood_decay_collapse_preview_reports_stale_weak_evidence_without_mutation(
tmp_path: Path,
) -> None:
db_path = tmp_path / "decay-collapse-preview.db"
output_path = tmp_path / "decay-collapse-preview.json"
initialize_database(db_path)
source = ingest_source_text(
db_path=db_path,
source_type="note",
content="G5e decay collapse source text and token=SHOULD_NOT_LEAK must not leak.",
metadata={"project": "g5e-decay"},
)
stale_fact = create_candidate_fact(
db_path=db_path,
subject_ref="G5e stale weak evidence",
predicate="needs",
object_ref_or_value="collapse review preview",
evidence_ids=[source.id],
scope="project:g5e-decay",
confidence=0.41,
)
fresh_fact = create_candidate_fact(
db_path=db_path,
subject_ref="G5e fresh evidence",
predicate="needs",
object_ref_or_value="protection from stale-only cleanup",
evidence_ids=[source.id],
scope="project:g5e-decay",
confidence=0.94,
)
approve_fact(db_path=db_path, fact_id=fresh_fact.id)
record_retrieval_observation(
db_path,
surface="hermes-pre-llm-hook",
query="SHOULD_NOT_LEAK stale weak collapse query",
preferred_scope="project:g5e-decay",
limit=5,
statuses=("approved", "candidate"),
retrieval_trace=[_fact_trace(stale_fact.id, label="stale weak candidate")],
response_mode="verify_first",
metadata={"query_preview": "token=SHOULD_NOT_LEAK", "session_id": "g5e-stale"},
)
for index in range(4):
record_retrieval_observation(
db_path,
surface="cli",
query="SHOULD_NOT_LEAK fresh decay spacing query",
preferred_scope="project:g5e-decay",
limit=5,
statuses=("approved",),
retrieval_trace=[_fact_trace(fresh_fact.id, label="fresh protected target")],
response_mode="verify_first",
metadata={"raw_prompt": "SHOULD_NOT_LEAK", "session_id": f"g5e-fresh-{index}"},
)
before_counts = _table_counts(
db_path,
["experience_traces", "retrieval_observations", "memory_activations", "facts", "relations"],
)

env = {**os.environ, "PYTHONPATH": "src"}
result = subprocess.run(
[
sys.executable,
"-m",
"agent_memory.api.cli",
"dogfood",
"decay-collapse-preview",
str(db_path),
"--limit",
"20",
"--top",
"5",
"--frequent-threshold",
"3",
"--min-decay-score",
"0.5",
"--output",
str(output_path),
],
cwd=Path(__file__).resolve().parents[1],
env=env,
capture_output=True,
text=True,
)

assert result.returncode == 0, result.stderr
payload = json.loads(result.stdout)
assert payload["kind"] == "dogfood_decay_collapse_preview"
assert payload["read_only"] is True
assert payload["mutated"] is False
assert payload["default_retrieval_unchanged"] is True
assert payload["automation_policy"] == {
"apply_supported": False,
"ordinary_conversation_auto_approval": False,
"requires_human_review": True,
"default_retrieval_policy": "approved_only_unchanged",
"mutation_contract": {
"writes_review_queue": False,
"deprecates_or_deletes_memory": False,
"collapses_memory": False,
"raw_content_allowed": False,
},
}
assert payload["quality_gate"] == {
"pass": True,
"decision": "decay_collapse_preview_ready_for_human_review",
"blocked_reasons": [],
}
assert payload["candidate_count"] == 1
candidate = payload["decay_collapse_candidates"][0]
assert candidate["memory_ref"] == f"fact:{stale_fact.id}"
assert candidate["current_status"] == "candidate"
assert candidate["decay_score"] >= 0.5
assert candidate["collapse_review"]["candidate_action"] == "consider_decay_or_collapse_after_review"
assert candidate["collapse_review"]["apply_path"] == "not_supported_by_preview"
assert candidate["collapse_review"]["requires_separate_guarded_policy"] is True
assert candidate["review_recommendation"] == {
"decision": "ready_for_decay_collapse_review",
"automation": "human_review_only",
"ordinary_conversation_auto_approval": False,
"default_retrieval_unchanged": True,
"mutation_supported": False,
}
assert candidate["ref_safe_evidence"]["content_included"] is False
assert output_path.exists()
assert json.loads(output_path.read_text()) == payload
assert _table_counts(
db_path,
["experience_traces", "retrieval_observations", "memory_activations", "facts", "relations"],
) == before_counts
assert "SHOULD_NOT_LEAK" not in result.stdout
assert "source text" not in result.stdout
assert "query_preview" not in result.stdout


def _seed_trace_cluster_for_candidate_flow(db_path: Path) -> int:
source = ingest_source_text(
db_path=db_path,
Expand Down