diff --git a/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md b/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md index b17e2be..b7bdbbd 100644 --- a/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md +++ b/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md @@ -1,7 +1,22 @@ # Memory Consolidation Current Progress and Next Steps Status: AI-authored draft. Not yet human-approved. -Last updated: 2026-05-13 17:09 KST +Last updated: 2026-05-13 17:55 KST + + + +## v0.1.153 next-step live dogfood checkpoint + +Run directory: `/Users/reddit/.agent-memory/reports/v0.1.153-next-steps-20260513T084528/`. + +Results from the requested next-step pass: + +- Metadata-rich dogfooding produced a clean dogfood-only fresh epoch: `fresh-epoch-dogfood-only-strict.json` passed with trace coverage `1.0`, empty retrieval ratio `0.0`, no unknown/classified metadata gap, no raw query/trace/content samples, and no mutation. +- Wider post-v0.1.152/post-v0.1.153 fresh epochs still fail because old rows remain in the epoch window: post-v0.1.153 has `low_epoch_observation_trace_coverage` plus `epoch_empty_retrieval_outcome_metadata_gap_classified`; dominant blocker remains `classified_legacy_missing_outcome`, not an unresolved adapter payload gap. +- Default ranking remains protected: mixed 50-task shadow eval passed `50/50` with zero baseline regressions, but `active_ranking_policy=conservative_legacy`, `candidate_ranking_policy=graph_reinforced_v1`, `default_retrieval_unchanged=true`, `mutated=false`, and migration still requires the explicit migration command/approval. +- Reviewed trace-candidate promotion remains narrow only: generation/listing are read-only, ordinary conversation auto-approval is false, raw content is not allowed, and bad apply policy/approval exits without mutation. +- Broad G4/background apply remains blocked: dogfood epoch G4 preview is read-only/no-mutation with `broad_g4_apply_allowed=false`; required green gates are retrieval ranking, rollback replay, live telemetry reconciliation/fresh epoch, and human-reviewed queue approval. +- While checking broad G4/decay paths, the live v0.1.153 decay-collapse decision hit an episode evidence snapshot bug (`episodes` use `source_ids_json`, not `evidence_ids_json`). Source now has a regression fix and test; release v0.1.154 is required before relying on live decay-collapse decision over episode candidates. ## v0.1.153 released runtime checkpoint and next runway diff --git a/.dev/status/current-handoff.md b/.dev/status/current-handoff.md index 454a649..b9a2d33 100644 --- a/.dev/status/current-handoff.md +++ b/.dev/status/current-handoff.md @@ -1,7 +1,22 @@ # agent-memory current handoff Status: AI-authored draft. Not yet human-approved. -Last updated: 2026-05-13 17:09 KST +Last updated: 2026-05-13 17:55 KST + + + +## v0.1.153 next-step live dogfood checkpoint + +Run directory: `/Users/reddit/.agent-memory/reports/v0.1.153-next-steps-20260513T084528/`. + +Results from the requested next-step pass: + +- Metadata-rich dogfooding produced a clean dogfood-only fresh epoch: `fresh-epoch-dogfood-only-strict.json` passed with trace coverage `1.0`, empty retrieval ratio `0.0`, no unknown/classified metadata gap, no raw query/trace/content samples, and no mutation. +- Wider post-v0.1.152/post-v0.1.153 fresh epochs still fail because old rows remain in the epoch window: post-v0.1.153 has `low_epoch_observation_trace_coverage` plus `epoch_empty_retrieval_outcome_metadata_gap_classified`; dominant blocker remains `classified_legacy_missing_outcome`, not an unresolved adapter payload gap. +- Default ranking remains protected: mixed 50-task shadow eval passed `50/50` with zero baseline regressions, but `active_ranking_policy=conservative_legacy`, `candidate_ranking_policy=graph_reinforced_v1`, `default_retrieval_unchanged=true`, `mutated=false`, and migration still requires the explicit migration command/approval. +- Reviewed trace-candidate promotion remains narrow only: generation/listing are read-only, ordinary conversation auto-approval is false, raw content is not allowed, and bad apply policy/approval exits without mutation. +- Broad G4/background apply remains blocked: dogfood epoch G4 preview is read-only/no-mutation with `broad_g4_apply_allowed=false`; required green gates are retrieval ranking, rollback replay, live telemetry reconciliation/fresh epoch, and human-reviewed queue approval. +- While checking broad G4/decay paths, the live v0.1.153 decay-collapse decision hit an episode evidence snapshot bug (`episodes` use `source_ids_json`, not `evidence_ids_json`). Source now has a regression fix and test; release v0.1.154 is required before relying on live decay-collapse decision over episode candidates. ## v0.1.153 released runtime checkpoint diff --git a/.dev/status/next-agent-memory-action.md b/.dev/status/next-agent-memory-action.md index 61a2f55..7889e06 100644 --- a/.dev/status/next-agent-memory-action.md +++ b/.dev/status/next-agent-memory-action.md @@ -1,7 +1,7 @@ # agent-memory next action Status: AI-authored draft. Not yet human-approved. -Last updated: 2026-05-13 17:09 KST +Last updated: 2026-05-13 17:55 KST ## Use this first when the user asks diff --git a/src/agent_memory/api/cli.py b/src/agent_memory/api/cli.py index 0befa32..67a6b12 100644 --- a/src/agent_memory/api/cli.py +++ b/src/agent_memory/api/cli.py @@ -582,10 +582,12 @@ def _ref_safe_evidence_snapshot(db_path: Path, memory_ref: str) -> dict[str, Any } memory_type, memory_id = parts table_by_type = {"fact": "facts", "procedure": "procedures", "episode": "episodes"} + evidence_column_by_type = {"fact": "evidence_ids_json", "procedure": "evidence_ids_json", "episode": "source_ids_json"} table_name = table_by_type[memory_type] + evidence_column = evidence_column_by_type[memory_type] with _open_readonly_sqlite(db_path) as connection: row = connection.execute( - f"SELECT evidence_ids_json, scope FROM {table_name} WHERE id = ?", + f"SELECT {evidence_column} AS evidence_refs_json, scope FROM {table_name} WHERE id = ?", (memory_id,), ).fetchone() relations = list_relations_for_node(db_path, node_ref=memory_ref) @@ -605,7 +607,7 @@ def _ref_safe_evidence_snapshot(db_path: Path, memory_ref: str) -> dict[str, Any "memory_type": memory_type, "memory_id": memory_id, "exists": True, - "evidence_id_count": len(_safe_json_list_from_db(row["evidence_ids_json"])), + "evidence_id_count": len(_safe_json_list_from_db(row["evidence_refs_json"])), "relation_count": len(relations), "scope_present": row["scope"] is not None, "content_included": False, diff --git a/tests/test_cli.py b/tests/test_cli.py index c255d12..8823219 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,7 @@ import pytest from agent_memory.api.cli import main -from agent_memory.core.curation import approve_fact, create_candidate_fact, supersede_fact +from agent_memory.core.curation import approve_fact, create_candidate_fact, create_episode, supersede_fact from agent_memory.core.ingestion import ingest_source_text from agent_memory.core.models import RetrievalTraceEntry from agent_memory.core.retrieval import retrieve_memory_packet @@ -9466,6 +9466,95 @@ def test_dogfood_decay_collapse_preview_reports_stale_weak_evidence_without_muta assert "query_preview" not in result.stdout + +def test_dogfood_decay_collapse_preview_handles_episode_source_ids_without_mutation( + tmp_path: Path, +) -> None: + db_path = tmp_path / "decay-collapse-episode-preview.db" + initialize_database(db_path) + source = ingest_source_text( + db_path=db_path, + source_type="note", + content="G5e episode source text and token=SHOULD_NOT_LEAK must not leak.", + metadata={"project": "g5e-decay-episode"}, + ) + episode = create_episode( + db_path=db_path, + title="G5e stale episode", + summary="Episode evidence should be counted from source ids without reading raw content.", + source_ids=[source.id], + tags=["g5e", "episode"], + importance_score=0.25, + scope="project:g5e-decay", + status="approved", + ) + record_retrieval_observation( + db_path, + surface="hermes-pre-llm-hook", + query="SHOULD_NOT_LEAK stale episode collapse query", + preferred_scope="project:g5e-decay", + limit=5, + statuses=("approved",), + retrieval_trace=[ + RetrievalTraceEntry( + memory_type="episode", + memory_id=episode.id, + label="stale episode candidate", + scope="project:g5e-decay", + scope_priority=0, + text_match_count=1, + rank_value=0.7, + total_score=0.7, + ) + ], + response_mode="verify_first", + metadata={"query_preview": "token=SHOULD_NOT_LEAK", "session_id": "g5e-episode"}, + ) + + before_counts = _table_counts( + db_path, + ["experience_traces", "retrieval_observations", "memory_activations", "facts", "procedures", "episodes"], + ) + env = {**os.environ, "PYTHONPATH": "src"} + result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "dogfood", + "decay-collapse-preview", + str(db_path), + "--limit", + "20", + "--top", + "5", + "--frequent-threshold", + "3", + "--min-decay-score", + "0.1", + ], + cwd=Path(__file__).resolve().parents[1], + env=env, + capture_output=True, + text=True, + ) + + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["read_only"] is True + assert payload["mutated"] is False + candidates = {candidate["memory_ref"]: candidate for candidate in payload["decay_collapse_candidates"]} + candidate = candidates[f"episode:{episode.id}"] + assert candidate["ref_safe_evidence"]["evidence_id_count"] == 1 + assert candidate["ref_safe_evidence"]["content_included"] is False + assert _table_counts( + db_path, + ["experience_traces", "retrieval_observations", "memory_activations", "facts", "procedures", "episodes"], + ) == before_counts + assert "SHOULD_NOT_LEAK" not in result.stdout + assert "source text" not in result.stdout + + def test_dogfood_supersession_preview_reports_claim_conflicts_without_mutation( tmp_path: Path, ) -> None: