From 5cddde139fa1c8930e7121bf44ade26d75f1647b Mon Sep 17 00:00:00 2001
From: cafitac <cafitac99@gmail.com>
Date: Wed, 13 May 2026 16:26:29 +0900
Subject: [PATCH] feat: add reviewed episode promotion diagnostics

---
 .../current-progress-and-next-steps.md        | 22 +++----
 .dev/status/current-handoff.md                | 18 +++---
 .dev/status/next-agent-memory-action.md       | 24 ++++----
 src/agent_memory/api/cli.py                   | 57 ++++++++++++++++++-
 tests/test_cli.py                             | 38 ++++++++++++-
 tests/test_roadmap_contract.py                |  3 +-
 6 files changed, 125 insertions(+), 37 deletions(-)

diff --git a/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md b/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md
index 82d518c..bf5439b 100644
--- a/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md
+++ b/.dev/roadmap/memory-consolidation/current-progress-and-next-steps.md
@@ -1,11 +1,11 @@
 # Memory Consolidation Current Progress and Next Steps
 
 Status: AI-authored draft. Not yet human-approved.
-Last updated: 2026-05-13 15:48 KST
+Last updated: 2026-05-13 16:32 KST
 
 ## v0.1.152 released runtime checkpoint and next runway
 
-This document is the restartable checkpoint after the v0.1.152 release/runtime rollout: 50-task expanded retrieval fixture gate, 75 checked-in retrieval eval tasks across the fixture directory, per-candidate collapse proof artifact persistence/replay with supersession-chain evidence, one fresh non-idempotent narrow live reviewed-candidate promotion, copy/live-safe explicit approval corridor evidence, v0.1.152 `personal-oss` Hermes hook rollout, released named ranking policy/shadow-compare diagnostics, approval-gated config-only default-ranking migrate/rollback mechanics, and 50-task live-Hermes-DB representative shadow corpus evidence while keeping `conservative_legacy` as the live default.
+This document is the restartable checkpoint after the v0.1.152 release/runtime rollout: 50-task expanded retrieval fixture gate, 75 checked-in retrieval eval tasks across the fixture directory, per-candidate collapse proof artifact persistence/replay with supersession-chain evidence, one fresh non-idempotent narrow live reviewed-candidate fact promotion, one guarded live reviewed procedure/episode promotion pair, copy/live-safe explicit approval corridor evidence, v0.1.152 `personal-oss` Hermes hook rollout, released named ranking policy/shadow-compare diagnostics, approval-gated config-only default-ranking migrate/rollback mechanics, and 50-task live-Hermes-DB representative fact plus mixed fact/procedure/episode shadow corpus evidence while keeping `conservative_legacy` as the live default.
 
 Current verified release state:
 
@@ -20,28 +20,28 @@ Current verified release state:
 Fresh diagnostics:
 
 - `g4-linkage-gap-diagnose-v0138-fresh.json`: decision `fresh_trace_linkage_gap_not_detected`.
-- `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/fresh-epoch-since-v0152.json`: still blocks epoch-wide automation on `epoch_empty_retrieval_outcome_metadata_gap_classified`.
+- `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/fresh-epoch-since-v0152-with-metadata-gap-diagnostic.json`: still blocks epoch-wide automation on `low_epoch_observation_trace_coverage` and `epoch_empty_retrieval_outcome_metadata_gap_classified`; metadata-gap drilldown reports `dominant_blocker=classified_legacy_missing_outcome`, `classified_missing_outcome_count=6`, and `unresolved_adapter_payload_gap_count=0`.
 - `/tmp/agent-memory-apply-corridor-v0150/`: copy/live-safe explicit approval corridor smoke passed without unintended durable-memory mutation; live apply was idempotent.
 - `/tmp/agent-memory-telemetry-reset-decision/copy-apply.json`: copy telemetry reset passed with protected durable memory tables unchanged; live telemetry reset remains blocked.
-- 50-task expanded retrieval source fixture gate exists, the checked-in fixture directory evaluates at 75/75 pass, and the live-Hermes-DB representative 50-task fact corpus passes with zero shadow regressions/no durable mutation. The checked-in expanded fixture is still not directly replayable against the tiny live DB because project-M1 references are absent; default ranking remains unchanged until a separate explicit default-rollout decision.
+- 50-task expanded retrieval source fixture gate exists, the checked-in fixture directory evaluates at 75/75 pass, and live-Hermes-DB representative 50-task fact and mixed fact/procedure/episode corpora pass with zero shadow regressions/no durable ranking mutation. The checked-in expanded fixture is still not directly replayable against the tiny live DB because project-M1 references are absent; default ranking remains unchanged until a separate explicit default-rollout decision.
 - Collapse proof artifacts can be persisted/replayed and can reach `satisfied` with reviewed supersession-chain/relation evidence, but collapse/delete apply remains disabled.
 
 Progress estimate:
 
-- Overall north-star: 76-78%.
-- Substrate/evidence plumbing: about 86%.
-- Safe automatic mutation/promotion: about 64-68%.
-- Remaining work: about 22-24% overall.
+- Overall north-star: 78-80%.
+- Substrate/evidence plumbing: about 87%.
+- Safe automatic mutation/promotion: about 66-70%.
+- Remaining work: about 20-22% overall.
 
 Current interpretation:
 
-Fresh v0.1.152 evidence and merged G5a-G5i plus default-ranking migration mechanics are healthy enough to continue the brain-like reviewed-candidate runway. The current runway has completed the expanded retrieval source fixture gate, stronger read-only opt-in ranking comparison, supersession-chain collapse proof evidence, one fresh guarded live reviewed-candidate promotion, the explicit default-ranking opt-in-to-default migration design, released named ranking policy diagnostics plus approval-gated config-only migrate/rollback mechanics, and representative live-Hermes-DB shadow evidence preserving `conservative_legacy`. Broad G4/background apply remains blocked. Current next work is to broaden live shadow fixture coverage beyond facts into procedure/episode surfaces, continue telemetry/fresh-epoch reconciliation, and only then consider explicit operator-approved default ranking migration.
+Fresh v0.1.152 evidence and merged G5a-G5i plus default-ranking migration mechanics are healthy enough to continue the brain-like reviewed-candidate runway. The current runway has completed the expanded retrieval source fixture gate, stronger read-only opt-in ranking comparison, supersession-chain collapse proof evidence, one fresh guarded live reviewed-candidate fact promotion, one guarded live reviewed procedure/episode promotion pair, the explicit default-ranking opt-in-to-default migration design, released named ranking policy diagnostics plus approval-gated config-only migrate/rollback mechanics, and representative live-Hermes-DB fact plus mixed shadow evidence preserving `conservative_legacy`. Broad G4/background apply remains blocked. Current next work is to improve fresh-epoch telemetry coverage and reduce classified legacy missing-outcome rows through metadata-rich dogfooding before any explicit operator-approved default ranking migration.
 
 Recommended sequence from here:
 
 1. Keep live default ranking on `conservative_legacy`; do not run live `retrieval-ranking-migrate-default` until the operator gives the exact approval phrase and fresh-epoch telemetry is green.
-2. Broaden live shadow fixture coverage beyond the current 50 approved-fact tasks by seeding/approving representative procedure and episode memories through guarded review corridors.
-3. Continue telemetry/fresh-epoch reconciliation; current post-v0.1.152 telemetry-only reconciliation is green, but fresh-epoch still blocks on `epoch_empty_retrieval_outcome_metadata_gap_classified`.
+2. Continue metadata-rich dogfooding to lift fresh-epoch observation/trace linkage coverage above threshold and replace classified legacy missing-outcome rows.
+3. Keep live mixed fact/procedure/episode corpus work in read-only shadow comparison unless additional representative memories are promoted through guarded review corridors with backup/hash/actor/reason/approval evidence.
 4. Keep collapse proof evidence-driven: `satisfied` requires supersession-chain/relation evidence, and collapse/delete apply remains disabled.
 5. Keep fresh reviewed candidate promotion limited to the explicit guarded corridor with backup/hash/actor/reason/approval evidence; do not use broad apply.
 6. Preserve broad G4/background apply as blocked until ranking, rollback replay, telemetry reconciliation/fresh epoch, and reviewed queue approvals all pass on real runtime evidence.
diff --git a/.dev/status/current-handoff.md b/.dev/status/current-handoff.md
index e91cb04..e696bbe 100644
--- a/.dev/status/current-handoff.md
+++ b/.dev/status/current-handoff.md
@@ -1,7 +1,7 @@
 # agent-memory current handoff
 
 Status: AI-authored draft. Not yet human-approved.
-Last updated: 2026-05-13 15:48 KST
+Last updated: 2026-05-13 16:32 KST
 
 ## v0.1.152 released runtime checkpoint
 
@@ -16,23 +16,23 @@ Current verified state:
 - Hermes hook doctor is green for `personal-oss` after `--accept-hooks` smoke on the v0.1.152 runtime.
 - Fresh G4 report directory retained: `/Users/reddit/.agent-memory/reports/g4-v0138-20260512-132253/`.
 - Fresh linkage diagnosis retained from G4 diagnostics: `g4-linkage-gap-diagnose-v0138-fresh.json` passed with decision `fresh_trace_linkage_gap_not_detected`.
-- Current v0.1.152 source/runtime runway now includes a 50-task expanded retrieval fixture gate (`live-compatible-50-gate.json`), 75 checked-in retrieval eval tasks across the fixture directory, persisted/replayed per-candidate collapse proof artifacts with relation-equivalence/supersession-chain evidence, one fresh live G5 reviewed-candidate promotion (`candidate:29db0390b2f81bdb` -> `fact:4`) with backup/hash evidence, idempotent live G4 queue apply evidence, the explicit default-ranking opt-in-to-default migration plan at `.dev/roadmap/memory-consolidation/default-ranking-opt-in-to-default-migration.md`, and the released default-ranking migration mechanics.
-- Default-ranking migration mechanics are now released in v0.1.152: named `conservative_legacy`/`graph_reinforced_v1`/`shadow_compare` policy diagnostics, shadow compare on `retrieval-ranking-experiment`, and approval-gated config-only `retrieval-ranking-migrate-default` with protected table hash proof plus rollback metadata. Live Hermes remains on `conservative_legacy`; live shadow reports under `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/` include a 50-task representative live-Hermes-DB fact corpus with 50/50 pass, zero baseline regressions, protected default order, and no durable mutation. The checked-in expanded 50-task source fixture still fails against the tiny live DB because project-M1 references are absent; the gap artifact is `checked-in-expanded-50-live-gap.stderr.txt`.
+- Current v0.1.152 source/runtime runway now includes a 50-task expanded retrieval fixture gate (`live-compatible-50-gate.json`), 75 checked-in retrieval eval tasks across the fixture directory, persisted/replayed per-candidate collapse proof artifacts with relation-equivalence/supersession-chain evidence, one fresh live G5 reviewed-candidate promotion (`candidate:29db0390b2f81bdb` -> `fact:4`) with backup/hash evidence, one guarded live reviewed procedure/episode promotion pair (`candidate:3435fe1db562aaf2` -> `procedure:1`, `candidate:4a35c03e7130fdec` -> `episode:1`) with backup/hash evidence, idempotent live G4 queue apply evidence, the explicit default-ranking opt-in-to-default migration plan at `.dev/roadmap/memory-consolidation/default-ranking-opt-in-to-default-migration.md`, and the released default-ranking migration mechanics.
+- Default-ranking migration mechanics are now released in v0.1.152: named `conservative_legacy`/`graph_reinforced_v1`/`shadow_compare` policy diagnostics, shadow compare on `retrieval-ranking-experiment`, and approval-gated config-only `retrieval-ranking-migrate-default` with protected table hash proof plus rollback metadata. Live Hermes remains on `conservative_legacy`; live shadow reports under `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/` include a 50-task representative live-Hermes-DB fact corpus and a 50-task mixed fact/procedure/episode corpus, both with 50/50 pass, zero baseline regressions, protected default order, and no durable ranking mutation. The checked-in expanded 50-task source fixture still fails against the tiny live DB because project-M1 references are absent; the gap artifact is `checked-in-expanded-50-live-gap.stderr.txt`.
 - Broad G4/background apply remains blocked; default retrieval ranking changes, collapse/delete apply, live telemetry reset, and ordinary conversation auto-approval remain blocked. The new fact `fact:4` also records this guardrail in the live memory DB.
 
 Progress estimate:
 
-- Overall north-star: 76-78%.
-- Substrate/evidence plumbing: about 86%.
-- Safe automatic mutation/promotion: about 64-68%.
-- Remaining work: about 22-24% overall.
+- Overall north-star: 78-80%.
+- Substrate/evidence plumbing: about 87%.
+- Safe automatic mutation/promotion: about 66-70%.
+- Remaining work: about 20-22% overall.
 
 Current interpretation:
 
 - The trace/retrieval/candidate/proof substrate is healthy enough for the next safety runway.
-- Completed in the current runway: expanded retrieval gate to 50 tasks, proved the checked-in fixture directory at 75/75 pass, moved collapse proof to `satisfied` with supersession-chain evidence while keeping collapse/delete disabled, ran one fresh non-idempotent narrow live reviewed-candidate promotion with backup/hash verification, released/runtime-smoked v0.1.151, documented the explicit default-ranking opt-in-to-default migration plan, implemented and released the named-policy/shadow-compare/config-only migrate/rollback command path in v0.1.152, and smoke-tested live shadow comparison plus a 50-task representative live fact corpus without changing the live default.
+- Completed in the current runway: expanded retrieval gate to 50 tasks, proved the checked-in fixture directory at 75/75 pass, moved collapse proof to `satisfied` with supersession-chain evidence while keeping collapse/delete disabled, ran one fresh non-idempotent narrow live reviewed-candidate fact promotion plus one guarded reviewed procedure/episode promotion pair with backup/hash verification, released/runtime-smoked v0.1.151, documented the explicit default-ranking opt-in-to-default migration plan, implemented and released the named-policy/shadow-compare/config-only migrate/rollback command path in v0.1.152, and smoke-tested live shadow comparison plus both 50-task representative live fact and mixed corpora without changing the live default.
 - Broad G4/background apply remains blocked; existing docs/RED-test-only broad-G4 baseline must not be advertised as ready.
-- Retrieval ranking changes remain opt-in experiments only; the expanded 50-task source experiment and the representative 50-task live-Hermes-DB fact corpus both passed as read-only comparisons with no durable mutation. v0.1.152 adds released migration mechanics, but live default enablement still requires broader live fixture coverage, fresh-epoch telemetry green, the exact approval phrase, and explicit operator approval.
+- Retrieval ranking changes remain opt-in experiments only; the expanded 50-task source experiment, the representative 50-task live-Hermes-DB fact corpus, and the representative 50-task mixed fact/procedure/episode corpus all passed as read-only comparisons with no durable ranking mutation. v0.1.152 adds released migration mechanics, but live default enablement still requires fresh-epoch telemetry green, the exact approval phrase, and explicit operator approval.
 
 Current safe mutation boundaries:
 
diff --git a/.dev/status/next-agent-memory-action.md b/.dev/status/next-agent-memory-action.md
index 9bf0d76..276f9db 100644
--- a/.dev/status/next-agent-memory-action.md
+++ b/.dev/status/next-agent-memory-action.md
@@ -1,7 +1,7 @@
 # agent-memory next action
 
 Status: AI-authored draft. Not yet human-approved.
-Last updated: 2026-05-13 15:48 KST
+Last updated: 2026-05-13 16:32 KST
 
 ## Use this first when the user asks
 
@@ -16,7 +16,7 @@ Then verify the repo/runtime state briefly and answer from the recommendation be
 
 ## One-sentence current state
 
-`agent-memory` is released and live-runtime-smoked through `v0.1.152`; the `personal-oss` Hermes hook is healthy on the v0.1.152 runtime. The current verified runway now has a 50-task expanded retrieval fixture gate, 75 checked-in retrieval eval tasks across the fixture directory, persisted/replayed per-candidate collapse proof artifacts with supersession-chain evidence, one fresh non-idempotent narrow live reviewed-candidate promotion, copy/live-safe explicit-approval corridor evidence, an idempotent live G4 queue apply, named ranking policy/shadow-compare diagnostics, approval-gated config-only default-ranking migrate/rollback mechanics, and a live Hermes DB 50-task representative shadow corpus. Broad G4/background apply, collapse/delete apply, live telemetry reset, default ranking migration, and ordinary conversation auto-approval remain blocked. Live default ranking remains `conservative_legacy`.
+`agent-memory` is released and live-runtime-smoked through `v0.1.152`; the `personal-oss` Hermes hook is healthy on the v0.1.152 runtime. The current verified runway now has a 50-task expanded retrieval fixture gate, 75 checked-in retrieval eval tasks across the fixture directory, persisted/replayed per-candidate collapse proof artifacts with supersession-chain evidence, one fresh non-idempotent narrow live reviewed-candidate promotion, copy/live-safe explicit-approval corridor evidence, an idempotent live G4 queue apply, named ranking policy/shadow-compare diagnostics, approval-gated config-only default-ranking migrate/rollback mechanics, a live Hermes DB 50-task representative fact shadow corpus, and a new live Hermes DB 50-task mixed fact/procedure/episode shadow corpus. Broad G4/background apply, collapse/delete apply, live telemetry reset, default ranking migration, and ordinary conversation auto-approval remain blocked. Live default ranking remains `conservative_legacy`.
 
 ## Current progress estimate toward the north-star
 
@@ -24,10 +24,10 @@ The north-star is a human-memory-like, mostly automatic, graph-based memory cons
 
 Approximate progress:
 
-- Overall north-star: 76-78%.
-- Substrate/evidence plumbing: about 86%.
-- Safe automatic mutation/promotion: about 64-68%.
-- Remaining work: about 22-24% overall.
+- Overall north-star: 78-80%.
+- Substrate/evidence plumbing: about 87%.
+- Safe automatic mutation/promotion: about 66-70%.
+- Remaining work: about 20-22% overall.
 
 Reasoning:
 
@@ -50,17 +50,17 @@ Reasoning:
 - Historical scheduled dry-run retained: `/Users/reddit/.agent-memory/reports/g4-v0138-20260512-132253/scheduled-dry-run.json`.
 - Source G5a-G5i checkpoint: `dogfood trace-cluster-preview`, `dogfood trace-candidate-persist/list/update/apply`, read-only `review_score`/`review_recommendation`, `dogfood reinforcement-refinement-preview`, `dogfood decay-collapse-preview`, `dogfood supersession-preview`, lifecycle candidate registry/apply, decay deprecate apply, ranking gate/experiment, rollback confidence, `rollback-replay-validate`, `retrieval-ranking-experiment`, `decay-collapse-decision`, `telemetry-reconciliation`, telemetry reconciliation/reset safety reporting, and G4 reviewed queue preview/persist/update/apply are merged and released through v0.1.150.
 - Current local follow-up evidence: expanded fixture file `tests/fixtures/retrieval_eval/expanded/live-compatible-50-gate.json` has 50 live-compatible tasks; checked-in fixture directory evaluates at 75/75 pass; opt-in ranking experiment report `/Users/reddit/.agent-memory/reports/g5i-ranking-experiment-expanded-50-20260513T1355/ranking-experiment-expanded-50.json` is read-only with `expanded_fixture_gate_met=true`, `eval_gate_pass=true`, and `default_ranking_mutated=false`; fresh live reviewed candidate `candidate:29db0390b2f81bdb` promoted to `fact:4` only through the guarded explicit-approval corridor.
-- Current source/runtime ranking evidence: `retrieval-ranking-experiment` has named policy/shadow-compare diagnostics; `retrieval-ranking-migrate-default` provides an approval-gated config-only migration with protected table hashes, audit output, and rollback metadata. v0.1.152 published and installed this path. Live default remains `conservative_legacy`. Live shadow reports under `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/` include `live-fact4-shadow.json` and `live-hermes-approved-fact-50-corpus-v1-shadow.json`; the latter replayed 50 representative tasks against the tiny live Hermes DB with 50/50 pass, zero baseline regressions, protected default order, and no durable mutation. The checked-in 50-task fixture still is not directly runnable against the tiny live Hermes DB because project-M1 references are absent there; the gap artifact is `checked-in-expanded-50-live-gap.stderr.txt`.
+- Current source/runtime ranking evidence: `retrieval-ranking-experiment` has named policy/shadow-compare diagnostics; `retrieval-ranking-migrate-default` provides an approval-gated config-only migration with protected table hashes, audit output, and rollback metadata. v0.1.152 published and installed this path. Live default remains `conservative_legacy`. Live shadow reports under `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/` include `live-fact4-shadow.json`, `live-hermes-approved-fact-50-corpus-v1-shadow.json`, and `live-hermes-mixed-approved-50-corpus-v1-shadow.json`; the mixed corpus replayed 50 live tasks across approved facts/procedure/episode with 50/50 pass, zero baseline regressions, protected default order, and no durable mutation. The checked-in 50-task fixture still is not directly runnable against the tiny live Hermes DB because project-M1 references are absent there; the gap artifact is `checked-in-expanded-50-live-gap.stderr.txt`.
 
 ## Current blocker
 
 The v0.1.152 runtime is healthy, but broad brain-like automation is still intentionally blocked:
 
-- Fresh epoch report `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/fresh-epoch-since-v0152.json`: quality gate still fails with `epoch_empty_retrieval_outcome_metadata_gap_classified`; continue dogfooding before trusting epoch-wide automation.
+- Fresh epoch report `/Users/reddit/.agent-memory/reports/default-ranking-v0152-shadow/fresh-epoch-since-v0152-with-metadata-gap-diagnostic.json`: quality gate still fails with `low_epoch_observation_trace_coverage` and `epoch_empty_retrieval_outcome_metadata_gap_classified`. The new metadata-gap diagnostic shows `dominant_blocker=classified_legacy_missing_outcome`, `classified_missing_outcome_count=6`, and `unresolved_adapter_payload_gap_count=0`; continue metadata-rich dogfooding before telemetry reset or default ranking migration.
 - G4 review queue copy/live-safe smoke `/tmp/agent-memory-apply-corridor-v0150/`: live preview/list/reconciliation were read-only; copy telemetry reset and copy G4 queue apply preserved durable memory (`mutated=false`); live G4 queue apply was idempotent with `applied_count=0`, `already_applied_count=1`, `mutated=false`, and `default_retrieval_unchanged=true`.
 - Historical telemetry reconciliation via the telemetry reset copy smoke `/tmp/agent-memory-telemetry-reset-decision/copy-apply.json`: deleting 1773 historical telemetry rows on a DB copy passed with protected durable memory tables unchanged. Live DB was not reset because the fresh epoch gate still fails; live reset remains manual-only behind `telemetry-reset-v1` and `apply-telemetry-reset-v1`.
 - Collapse proof is evidence-driven and can persist/replay per-candidate proof artifacts. The current local proof path can reach `satisfied` when supersession-chain/relation evidence exists, but collapse/delete apply remains disabled even after proof satisfaction.
-- Retrieval fixture coverage now includes a 50-task live-compatible expanded source gate, 75 checked-in eval tasks across the directory, and a live-Hermes-DB representative 50-task fact corpus. The opt-in ranking experiments passed as read-only comparisons, but default retrieval ranking is still unchanged and blocked until a separate explicit default-rollout decision is made after fresh-epoch telemetry is green.
+- Retrieval fixture coverage now includes a 50-task live-compatible expanded source gate, 75 checked-in eval tasks across the directory, a live-Hermes-DB representative 50-task fact corpus, and a live-Hermes-DB representative 50-task mixed fact/procedure/episode corpus. The opt-in ranking experiments passed as read-only comparisons, but default retrieval ranking is still unchanged and blocked until a separate explicit default-rollout decision is made after fresh-epoch telemetry is green.
 - G4 broad apply contract remains blocked by policy even when a report is individually green. The guardrail now requires all of these to be green on real runtime evidence before reconsideration: retrieval ranking gate, rollback replay validation, live telemetry reconciliation, and human-reviewed queue approval; ordinary conversation auto-approval remains false.
 
 ## Recommended next work
@@ -68,8 +68,8 @@ The v0.1.152 runtime is healthy, but broad brain-like automation is still intent
 Proceed in this sequence:
 
 1. Keep live default ranking on `conservative_legacy`; do not run `retrieval-ranking-migrate-default` against the live profile until an operator gives the exact approval phrase and fresh-epoch telemetry is green.
-2. Improve live fixture coverage beyond fact-only replay: seed or approve representative procedure/episode memories in a guarded corridor, then extend the live shadow corpus beyond the current 50 fact tasks.
-3. Continue telemetry/fresh-epoch reconciliation; current post-v0.1.152 telemetry reconciliation is green as telemetry-only, but fresh-epoch still blocks on `epoch_empty_retrieval_outcome_metadata_gap_classified`.
+2. Continue metadata-rich dogfooding to lift fresh-epoch `observation_trace_coverage_ratio` above threshold and eliminate classified legacy missing-outcome rows; the latest blocker is not an unresolved adapter payload gap.
+3. Keep live mixed retrieval corpus coverage in the shadow-only lane; extend it only through guarded reviewed-candidate promotions with backup/audit evidence.
 4. Keep fresh reviewed candidate promotion limited to the guarded explicit-approval corridor.
 5. Keep broad G4/background apply blocked until ranking gate, rollback replay, telemetry reconciliation/fresh epoch, and reviewed queue approvals all pass on real runtime evidence.
 
@@ -87,7 +87,7 @@ Do not silently delete, reset, or rewrite telemetry. Historical reconciliation m
 
 If asked "다음으로 뭐해야 해?", answer:
 
-> 지금은 v0.1.152까지 릴리즈/설치/스모크가 끝났고 `personal-oss` Hermes hook도 doctor-green입니다. 전체 목표 대비 대략 76-78% 정도 왔습니다. live Hermes default는 여전히 `conservative_legacy`이고, `graph_reinforced_v1`은 shadow 후보로만 비교했습니다. 새 live-Hermes-DB 50-task representative fact corpus는 50/50 pass, zero baseline regression, no mutation으로 통과했습니다. 하지만 checked-in expanded 50-task fixture는 live DB에 project-M1 reference facts/procedures/episodes가 없어서 직접 replay는 아직 불가하고, post-v0.1.152 fresh-epoch도 `epoch_empty_retrieval_outcome_metadata_gap_classified`로 계속 block입니다. 다음은 live fixture coverage를 fact-only에서 procedure/episode까지 넓히고 fresh-epoch telemetry를 더 dogfood하는 순서입니다. broad G4/background apply, collapse/delete apply, ordinary conversation auto-approval, default ranking migration은 아직 금지입니다.
+> 지금은 v0.1.152까지 릴리즈/설치/스모크가 끝났고 `personal-oss` Hermes hook도 doctor-green입니다. 전체 목표 대비 대략 78-80% 정도 왔습니다. live Hermes default는 여전히 `conservative_legacy`이고, `graph_reinforced_v1`은 shadow 후보로만 비교했습니다. 새 live-Hermes-DB mixed 50-task corpus는 approved facts/procedure/episode를 포함해 50/50 pass, zero baseline regression, protected default order, no mutation으로 통과했습니다. 다만 post-v0.1.152 fresh-epoch는 아직 `low_epoch_observation_trace_coverage`와 `epoch_empty_retrieval_outcome_metadata_gap_classified`로 block입니다. 새 diagnostic 기준 unresolved adapter payload gap은 0이고, 남은 핵심은 classified legacy missing-outcome row를 metadata-rich dogfooding으로 밀어내는 것입니다. broad G4/background apply, collapse/delete apply, ordinary conversation auto-approval, default ranking migration, live telemetry reset은 아직 금지입니다.
 
 ## Quick verification commands
 
diff --git a/src/agent_memory/api/cli.py b/src/agent_memory/api/cli.py
index a8f85c0..0befa32 100644
--- a/src/agent_memory/api/cli.py
+++ b/src/agent_memory/api/cli.py
@@ -4614,6 +4614,18 @@ def _reviewed_promotion_payload_from_args(args: argparse.Namespace) -> dict[str,
             "success_rate": args.success_rate,
             "evidence_ids": [],
         }
+    if args.promotion_type == "episode":
+        if not args.title or not args.summary:
+            raise ValueError("dogfood trace-candidate-update episode promotion requires --title and --summary")
+        return {
+            "promotion_type": "episode",
+            "title": args.title,
+            "summary": args.summary,
+            "source_ids": [],
+            "tags": list(args.tag or []),
+            "scope": args.scope,
+            "importance_score": args.importance_score,
+        }
     raise ValueError("unsupported trace candidate promotion type")
 
 
@@ -4677,7 +4689,7 @@ def _dogfood_trace_candidate_update_payload(args: argparse.Namespace) -> dict[st
         "status_after": args.status,
         "status": args.status,
         "proposal_type": proposal_type,
-        "promotion_ready": args.status == "approved" and proposal_type in {"fact_promotion", "preference_promotion", "procedure_promotion"},
+        "promotion_ready": args.status == "approved" and proposal_type in {"fact_promotion", "preference_promotion", "procedure_promotion", "episode_promotion"},
         "reason_sha256": reason_sha256,
         "privacy": {"reviewed_payload_included": False, "raw_reason_included": False, "raw_content_included": False},
     }
@@ -4723,7 +4735,7 @@ def _dogfood_trace_candidate_apply_payload(args: argparse.Namespace) -> dict[str
         if row["status"] != "approved":
             skipped.append({"candidate_id": candidate_id, "reason": f"status_{row['status']}"})
             continue
-        if row["proposal_type"] not in {"fact_promotion", "preference_promotion", "procedure_promotion"}:
+        if row["proposal_type"] not in {"fact_promotion", "preference_promotion", "procedure_promotion", "episode_promotion"}:
             skipped.append({"candidate_id": candidate_id, "reason": f"proposal_type_{row['proposal_type']}"})
             continue
         with sqlite3.connect(db_path) as connection:
@@ -4763,6 +4775,18 @@ def _dogfood_trace_candidate_apply_payload(args: argparse.Namespace) -> dict[str
             )
             approve_procedure(db_path=db_path, procedure_id=procedure.id)
             promoted_ref = f"procedure:{procedure.id}"
+        elif promotion_type == "episode":
+            episode = create_episode(
+                db_path=db_path,
+                title=str(reviewed["title"]),
+                summary=str(reviewed["summary"]),
+                source_ids=[int(value) for value in reviewed.get("source_ids", [])],
+                tags=[str(value) for value in reviewed.get("tags", [])],
+                importance_score=float(reviewed.get("importance_score") or 0.0),
+                scope=str(reviewed.get("scope") or "global"),
+                status="approved",
+            )
+            promoted_ref = f"episode:{episode.id}"
         else:
             skipped.append({"candidate_id": candidate_id, "reason": f"reviewed_payload_not_promotable_{promotion_type or 'missing'}"})
             continue
@@ -6888,6 +6912,21 @@ def _dogfood_fresh_epoch_payload(args: argparse.Namespace) -> dict[str, Any]:
         warnings.append("high_epoch_empty_retrieval_ratio")
     unknown_empty_outcome_count = empty_by_retrieval_outcome.get("unknown", 0) + empty_by_retrieval_outcome.get("", 0)
     unresolved_unknown_empty_outcome_count = empty_unknown_outcome_drilldown.get("adapter_payload_gap", 0)
+    classified_missing_outcome_count = max(0, unknown_empty_outcome_count - unresolved_unknown_empty_outcome_count)
+    if unresolved_unknown_empty_outcome_count:
+        dominant_blocker = "adapter_payload_gap"
+        classification_confidence = "partial" if classified_missing_outcome_count else "low"
+        metadata_gap_next_action = (
+            "Fix adapter payload metadata for unresolved empty observations before treating classified legacy gaps as reset-safe."
+        )
+    elif unknown_empty_outcome_count:
+        dominant_blocker = "classified_legacy_missing_outcome"
+        classification_confidence = "classified"
+        metadata_gap_next_action = "Collect more fresh metadata-rich dogfood before telemetry reset; no adapter payload gap detected."
+    else:
+        dominant_blocker = "none"
+        classification_confidence = "complete"
+        metadata_gap_next_action = "No unknown empty-retrieval outcome metadata gap detected."
     if unresolved_unknown_empty_outcome_count:
         warnings.append("epoch_empty_retrieval_outcome_unknown")
     elif unknown_empty_outcome_count:
@@ -6949,6 +6988,14 @@ def _dogfood_fresh_epoch_payload(args: argparse.Namespace) -> dict[str, Any]:
                 "classification_rule": "metadata-only aggregate inference from hook_event_name and response_mode",
                 "next_action": "Prefer more v0.1.129+ dogfood or a targeted metadata backfill preview before telemetry reset.",
             },
+            "metadata_gap_diagnostic": {
+                "unknown_empty_outcome_count": unknown_empty_outcome_count,
+                "unresolved_adapter_payload_gap_count": unresolved_unknown_empty_outcome_count,
+                "classified_missing_outcome_count": classified_missing_outcome_count,
+                "dominant_blocker": dominant_blocker,
+                "classification_confidence": classification_confidence,
+                "next_action": metadata_gap_next_action,
+            },
             "by_hook_event_name": {key: empty_by_hook_event_name[key] for key in sorted(empty_by_hook_event_name)},
             "by_surface": {key: empty_by_surface[key] for key in sorted(empty_by_surface)},
             "by_scope": {key: empty_by_scope[key] for key in sorted(empty_by_scope)},
@@ -10911,7 +10958,7 @@ def _build_parser() -> argparse.ArgumentParser:
     dogfood_trace_candidate_update_parser.add_argument("--actor", required=True)
     dogfood_trace_candidate_update_parser.add_argument("--reason", required=True)
     dogfood_trace_candidate_update_parser.add_argument("--approval-phrase", required=True)
-    dogfood_trace_candidate_update_parser.add_argument("--promotion-type", choices=["fact", "preference", "procedure"])
+    dogfood_trace_candidate_update_parser.add_argument("--promotion-type", choices=["fact", "preference", "procedure", "episode"])
     dogfood_trace_candidate_update_parser.add_argument("--subject")
     dogfood_trace_candidate_update_parser.add_argument("--predicate")
     dogfood_trace_candidate_update_parser.add_argument("--object")
@@ -10920,6 +10967,10 @@ def _build_parser() -> argparse.ArgumentParser:
     dogfood_trace_candidate_update_parser.add_argument("--precondition", action="append")
     dogfood_trace_candidate_update_parser.add_argument("--step", action="append")
     dogfood_trace_candidate_update_parser.add_argument("--success-rate", type=float, default=0.0)
+    dogfood_trace_candidate_update_parser.add_argument("--title")
+    dogfood_trace_candidate_update_parser.add_argument("--summary")
+    dogfood_trace_candidate_update_parser.add_argument("--tag", action="append")
+    dogfood_trace_candidate_update_parser.add_argument("--importance-score", type=float, default=0.0)
     dogfood_trace_candidate_update_parser.add_argument("--scope", default="global")
     dogfood_trace_candidate_update_parser.add_argument("--confidence", type=float, default=0.7)
     dogfood_trace_candidate_apply_parser = dogfood_subparsers.add_parser(
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5bf6f6d..c255d12 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -3147,6 +3147,14 @@ def test_python_module_cli_dogfood_fresh_epoch_classifies_unknown_empty_retrieva
         "classification_rule": "metadata-only aggregate inference from hook_event_name and response_mode",
         "next_action": "Prefer more v0.1.129+ dogfood or a targeted metadata backfill preview before telemetry reset.",
     }
+    assert diagnostics["metadata_gap_diagnostic"] == {
+        "unknown_empty_outcome_count": 2,
+        "unresolved_adapter_payload_gap_count": 1,
+        "classified_missing_outcome_count": 1,
+        "dominant_blocker": "adapter_payload_gap",
+        "classification_confidence": "partial",
+        "next_action": "Fix adapter payload metadata for unresolved empty observations before treating classified legacy gaps as reset-safe.",
+    }
     assert payload["quality_gate"] == {
         "pass": False,
         "decision": "continue_fresh_epoch_dogfooding",
@@ -10819,9 +10827,37 @@ def test_dogfood_trace_candidate_apply_promotes_only_approved_reviewed_fact_cand
             "name, trigger_context, status",
             ("Run reviewed candidate promotion", "when a trace candidate is explicitly approved", "approved"),
         ),
+        (
+            "episode",
+            [
+                "--promotion-type",
+                "episode",
+                "--title",
+                "Reviewed live mixed corpus checkpoint",
+                "--summary",
+                "A reviewed episode records the live mixed retrieval shadow corpus checkpoint without raw transcript storage.",
+                "--tag",
+                "retrieval-eval",
+                "--tag",
+                "shadow-corpus",
+                "--scope",
+                "project:g5-candidates",
+                "--importance-score",
+                "0.7",
+            ],
+            "promote_reviewed_episode",
+            "episode:",
+            "episodes",
+            "title, summary, status",
+            (
+                "Reviewed live mixed corpus checkpoint",
+                "A reviewed episode records the live mixed retrieval shadow corpus checkpoint without raw transcript storage.",
+                "approved",
+            ),
+        ),
     ],
 )
-def test_dogfood_trace_candidate_apply_supports_reviewed_preference_and_procedure_promotions(
+def test_dogfood_trace_candidate_apply_supports_reviewed_preference_procedure_and_episode_promotions(
     tmp_path: Path,
     promotion_type: str,
     update_args: list[str],
diff --git a/tests/test_roadmap_contract.py b/tests/test_roadmap_contract.py
index 56c79e3..1bda550 100644
--- a/tests/test_roadmap_contract.py
+++ b/tests/test_roadmap_contract.py
@@ -71,10 +71,11 @@ def test_v0152_status_docs_record_current_brainlike_runway_and_blocked_broad_app
         assert "/Users/reddit/.agent-memory/runtime/v0.1.152/.venv/bin/agent-memory" in doc
         assert "fresh_trace_linkage_gap_not_detected" in doc
         assert "g4-v0138-20260512-132253" in doc
-        assert "Overall north-star: 76-78%" in doc
+        assert "Overall north-star: 78-80%" in doc
         assert "broad g4/background apply" in doc.lower()
         assert "50-task expanded retrieval fixture gate" in doc or "50-task expanded retrieval fixture" in doc
         assert "75 checked-in" in doc or "75/75" in doc
+        assert "mixed fact/procedure/episode" in doc or "approved facts/procedure/episode" in doc
         assert "collapse proof" in doc.lower()
 
     assert "dogfood trace-cluster-preview" in next_action