From cdd14af4a0e7286ff852db79e7fc1d4bb999cca5 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 16:52:55 +0200 Subject: [PATCH 01/17] test(f3): add RED selftest wrapper contract Locks down the contract for `bin/pos-selftest.sh` before implementing the wrapper or its Python orchestrator. Five failing tests verify: - `bin/pos-selftest.sh` exists and is executable - `bin/_selftest.py` exists (Python orchestrator) - Wrapper uses `set -euo pipefail` and delegates to `python3 _selftest.py` - Running the wrapper from the repo root exits 0 Scope ratified in Fase -1 (see `.claude/branch-approvals/feat_f3-selftest-end-to-end.approved`): gates funcionales criticos D1 / D3 / D4 / D5 / D6 stop-policy-check; informativos D2 + D6 pre-compact diferidos; sin runtime Claude Code; solo checks estaticos baratos para skills/agents. Following commits: - GREEN minimo wrapper + orchestrator (smoke exit 0) - RED/GREEN incrementales por escenario (D1, D3, D4, D5, D6) - CI job selftest - Docs-sync Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_smoke.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bin/tests/test_selftest_smoke.py diff --git a/bin/tests/test_selftest_smoke.py b/bin/tests/test_selftest_smoke.py new file mode 100644 index 0000000..3c2f275 --- /dev/null +++ b/bin/tests/test_selftest_smoke.py @@ -0,0 +1,52 @@ +"""F3 RED smoke — locks down bin/pos-selftest.sh contract. + +Fails until: +- bin/pos-selftest.sh exists and is executable +- bin/_selftest.py exists (Python orchestrator) +- The shell wrapper delegates to python3 _selftest.py +- Running the wrapper exits 0 against current repo state +""" + +import os +import subprocess +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +SELFTEST_SH = REPO_ROOT / "bin" / "pos-selftest.sh" +SELFTEST_PY = REPO_ROOT / "bin" / "_selftest.py" + + +class TestSelftestScriptShape: + def test_wrapper_exists(self): + assert SELFTEST_SH.is_file(), f"missing: {SELFTEST_SH}" + + def test_wrapper_is_executable(self): + assert SELFTEST_SH.is_file(), f"missing: {SELFTEST_SH}" + assert os.access(SELFTEST_SH, os.X_OK), f"not executable: {SELFTEST_SH}" + + def test_orchestrator_exists(self): + assert SELFTEST_PY.is_file(), f"missing: {SELFTEST_PY}" + + def test_wrapper_delegates_to_python_orchestrator(self): + assert SELFTEST_SH.is_file(), f"missing: {SELFTEST_SH}" + body = SELFTEST_SH.read_text() + assert "python3" in body, "wrapper should invoke python3" + assert "_selftest.py" in body, "wrapper should reference _selftest.py" + assert "set -euo pipefail" in body, "wrapper should use strict bash" + + +class TestSelftestExecution: + def test_wrapper_exits_zero(self): + assert SELFTEST_SH.is_file(), f"missing: {SELFTEST_SH}" + result = subprocess.run( + [str(SELFTEST_SH)], + cwd=REPO_ROOT, + capture_output=True, + text=True, + timeout=180, + ) + assert result.returncode == 0, ( + f"selftest exited {result.returncode}\n" + f"--- stdout ---\n{result.stdout}\n" + f"--- stderr ---\n{result.stderr}" + ) From d11e9667a1eb4cb14aa1c5866d1a9b577f4c86a0 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 16:59:04 +0200 Subject: [PATCH 02/17] feat(f3): GREEN minimal wrapper + orchestrator stub Satisfies the contract locked by RED in the previous commit: - bin/pos-selftest.sh: thin bash wrapper, set -euo pipefail, execs python3 _selftest.py. Both files are executable (mode 0755). - bin/_selftest.py: stdlib orchestrator with empty scenario set (smoke print + return 0). Scenarios D1 / D3 / D4 / D5 / D6 stop are added in subsequent RED/GREEN commits. All 5 smoke contract tests pass. Hooks suite intact (587 passed + 1 skipped baseline preserved). Skills + agents + bin tests: 237 passed. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 33 +++++++++++++++++++++++++++++++++ bin/pos-selftest.sh | 9 +++++++++ 2 files changed, 42 insertions(+) create mode 100755 bin/_selftest.py create mode 100755 bin/pos-selftest.sh diff --git a/bin/_selftest.py b/bin/_selftest.py new file mode 100755 index 0000000..da79c47 --- /dev/null +++ b/bin/_selftest.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +"""pos-selftest orchestrator. + +End-to-end selftest of the pos plugin: generates a synthetic project via +`npx tsx generator/run.ts --profile questionnaire/profiles/cli-tool.yaml` +and exercises the functional-critical gates against it. + +Scope (ratified in F3 Fase -1): +- D1 pre-branch-gate (deny without marker) +- D3 pre-write-guard (deny write without test pair) +- D4 pre-pr-gate (deny PR with docs-sync missing) +- D5 post-action (confirmed merge emits /pos:compound suggestion) +- D6 stop-policy-check (allow/deny skill allowlist) + +Out of scope: D2 session-start + D6 pre-compact (informative-only), +Claude Code runtime invocations, real skill/agent dispatch. + +Stdlib only. No third-party deps. +""" + +from __future__ import annotations + +import sys + + +def main() -> int: + # Scenarios registered in subsequent commits. + print("pos-selftest: smoke (no scenarios registered yet)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/pos-selftest.sh b/bin/pos-selftest.sh new file mode 100755 index 0000000..6ea8555 --- /dev/null +++ b/bin/pos-selftest.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +# pos-selftest — end-to-end smoke for the pos plugin gates. +# Thin bash wrapper. Orchestration lives in bin/_selftest.py (stdlib only, +# no Claude Code runtime). + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec python3 "$SCRIPT_DIR/_selftest.py" "$@" From 0fb2442b4d5675b6c6b65ddffc40ca17983fcfed Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:04:18 +0200 Subject: [PATCH 03/17] test(f3): add RED D1 pre-branch-gate scenario Locks down the orchestrator contract: each registered scenario must emit `[ok] D{N} {name}` on its line. Module-scoped fixture runs the wrapper once and shares stdout across scenario tests. Fails until _selftest.py registers + implements D1 against the synthetic project. Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_scenarios.py | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 bin/tests/test_selftest_scenarios.py diff --git a/bin/tests/test_selftest_scenarios.py b/bin/tests/test_selftest_scenarios.py new file mode 100644 index 0000000..66841e8 --- /dev/null +++ b/bin/tests/test_selftest_scenarios.py @@ -0,0 +1,37 @@ +"""F3 scenario contract — each scenario asserts the orchestrator emits +`[ok] D{N} {name}` on its line. + +Module-scoped fixture runs `pos-selftest.sh` once and shares stdout across +scenario tests. RED until `bin/_selftest.py` registers the scenario; GREEN +once the scenario passes against the synthetic project. +""" + +import subprocess +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +SELFTEST_SH = REPO_ROOT / "bin" / "pos-selftest.sh" + + +@pytest.fixture(scope="module") +def selftest_run(): + assert SELFTEST_SH.is_file(), f"missing: {SELFTEST_SH}" + return subprocess.run( + [str(SELFTEST_SH)], + cwd=REPO_ROOT, + capture_output=True, + text=True, + timeout=300, + ) + + +class TestScenarios: + def test_d1_pre_branch_gate(self, selftest_run): + assert "[ok] D1 pre-branch-gate" in selftest_run.stdout, ( + f"D1 scenario did not pass\n" + f"--- exit ---\n{selftest_run.returncode}\n" + f"--- stdout ---\n{selftest_run.stdout}\n" + f"--- stderr ---\n{selftest_run.stderr}" + ) From d46ff95f0947b3881b83b7735a909915297501c7 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:07:53 +0200 Subject: [PATCH 04/17] feat(f3): GREEN D1 pre-branch-gate scenario Orchestrator generates synthetic project per scenario via real `npx tsx generator/run.ts --profile cli-tool.yaml --out `, invokes meta-repo hook against synthetic cwd, asserts deny-without-marker + allow-after-touch contract. Selftest runs in ~1.2s end-to-end. Stdlib only (subprocess + tempfile + shutil + json + pathlib). Each scenario gets its own tmpdir to avoid cross-scenario contamination. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/bin/_selftest.py b/bin/_selftest.py index da79c47..a1428fa 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -20,12 +20,113 @@ from __future__ import annotations +import json +import shutil +import subprocess import sys +import tempfile +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +GENERATOR = REPO_ROOT / "generator" / "run.ts" +PROFILE = REPO_ROOT / "questionnaire" / "profiles" / "cli-tool.yaml" +HOOKS_DIR = REPO_ROOT / "hooks" + + +def generate_synthetic(target: Path) -> None: + """Generate synthetic project at `target` via `npx tsx generator/run.ts`. + + Generator refuses non-empty `--out`; caller passes a fresh path. + """ + subprocess.run( + [ + "npx", "tsx", str(GENERATOR), + "--profile", str(PROFILE), + "--out", str(target), + ], + cwd=REPO_ROOT, + check=True, + capture_output=True, + text=True, + timeout=120, + ) + + +def invoke_hook(name: str, payload: dict, cwd: Path) -> subprocess.CompletedProcess: + """Invoke `hooks/.py` from the meta-repo against `cwd`.""" + return subprocess.run( + [sys.executable, str(HOOKS_DIR / f"{name}.py")], + input=json.dumps(payload), + cwd=cwd, + capture_output=True, + text=True, + timeout=10, + ) + + +def scenario_d1_pre_branch_gate(synthetic: Path) -> tuple[bool, str]: + """D1: deny `git checkout -b` without marker; allow with marker present.""" + payload = { + "tool_name": "Bash", + "tool_input": {"command": "git checkout -b feat/example"}, + } + + res = invoke_hook("pre-branch-gate", payload, synthetic) + if res.returncode != 2: + return False, ( + f"deny phase: expected exit 2, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + if '"permissionDecision": "deny"' not in res.stdout: + return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + + marker = synthetic / ".claude" / "branch-approvals" / "feat_example.approved" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.touch() + + res = invoke_hook("pre-branch-gate", payload, synthetic) + if res.returncode != 0: + return False, ( + f"allow phase: expected exit 0, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + + return True, "" + + +SCENARIOS = [ + ("D1", "pre-branch-gate", scenario_d1_pre_branch_gate), +] def main() -> int: - # Scenarios registered in subsequent commits. - print("pos-selftest: smoke (no scenarios registered yet)") + failures: list[str] = [] + for code, name, fn in SCENARIOS: + tmp = Path(tempfile.mkdtemp(prefix="pos-selftest-")) + synthetic = tmp / "synthetic" + try: + try: + generate_synthetic(synthetic) + except subprocess.CalledProcessError as e: + print( + f"[fail] {code} {name}: generator failed\n" + f"stdout: {e.stdout}\nstderr: {e.stderr}" + ) + failures.append(code) + continue + ok, reason = fn(synthetic) + if ok: + print(f"[ok] {code} {name}") + else: + print(f"[fail] {code} {name}: {reason}") + failures.append(code) + finally: + shutil.rmtree(tmp, ignore_errors=True) + + if failures: + print(f"pos-selftest: {len(failures)} scenario(s) failed: {', '.join(failures)}") + return 1 + print(f"pos-selftest: {len(SCENARIOS)} scenario(s) passed") return 0 From b29fce3a8550146fa706411a8d93bcc889add6ea Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:09:23 +0200 Subject: [PATCH 05/17] test(f3): add RED D3 pre-write-guard scenario Extracts shared diag helper. D3 scenario test fails until the orchestrator registers + implements the pre-write-guard contract (deny Write to enforced path without test pair, allow once test pair exists) against the synthetic project. Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_scenarios.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/bin/tests/test_selftest_scenarios.py b/bin/tests/test_selftest_scenarios.py index 66841e8..6f3c681 100644 --- a/bin/tests/test_selftest_scenarios.py +++ b/bin/tests/test_selftest_scenarios.py @@ -27,11 +27,18 @@ def selftest_run(): ) +def _scenario_diag(scenario: str, run: subprocess.CompletedProcess) -> str: + return ( + f"{scenario} scenario did not pass\n" + f"--- exit ---\n{run.returncode}\n" + f"--- stdout ---\n{run.stdout}\n" + f"--- stderr ---\n{run.stderr}" + ) + + class TestScenarios: def test_d1_pre_branch_gate(self, selftest_run): - assert "[ok] D1 pre-branch-gate" in selftest_run.stdout, ( - f"D1 scenario did not pass\n" - f"--- exit ---\n{selftest_run.returncode}\n" - f"--- stdout ---\n{selftest_run.stdout}\n" - f"--- stderr ---\n{selftest_run.stderr}" - ) + assert "[ok] D1 pre-branch-gate" in selftest_run.stdout, _scenario_diag("D1", selftest_run) + + def test_d3_pre_write_guard(self, selftest_run): + assert "[ok] D3 pre-write-guard" in selftest_run.stdout, _scenario_diag("D3", selftest_run) From 1959610e21309344f57cfd9336275f3407b254b9 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:10:13 +0200 Subject: [PATCH 06/17] feat(f3): GREEN D3 pre-write-guard scenario Synthetic project's rendered policy.yaml lacks pre_write (template drift documented post-D5b), so the scenario writes a minimal policy override into synthetic/policy.yaml before invoking. Then asserts deny on `Write hooks/foo.py` without test pair, allow once hooks/tests/test_foo.py exists. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/bin/_selftest.py b/bin/_selftest.py index a1428fa..914d6df 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -25,6 +25,7 @@ import subprocess import sys import tempfile +import textwrap from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent @@ -94,8 +95,56 @@ def scenario_d1_pre_branch_gate(synthetic: Path) -> tuple[bool, str]: return True, "" +POLICY_PRE_WRITE_ONLY = textwrap.dedent("""\ + lifecycle: + pre_write: + enforced_patterns: + - label: "hooks_top_level_py" + match_glob: "hooks/*.py" + exclude_globs: + - "hooks/_lib/**" + - "hooks/tests/**" +""") + + +def scenario_d3_pre_write_guard(synthetic: Path) -> tuple[bool, str]: + """D3: deny Write to enforced path without test pair, allow with.""" + # Synthetic project's rendered policy lacks pre_write (template drift + # documented post-D5b). Inject a minimal policy so the hook enforces. + (synthetic / "policy.yaml").write_text(POLICY_PRE_WRITE_ONLY, encoding="utf-8") + + target = synthetic / "hooks" / "foo.py" + payload = { + "tool_name": "Write", + "tool_input": {"file_path": str(target)}, + } + + res = invoke_hook("pre-write-guard", payload, synthetic) + if res.returncode != 2: + return False, ( + f"deny phase: expected exit 2, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + if '"permissionDecision": "deny"' not in res.stdout: + return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + + test_pair = synthetic / "hooks" / "tests" / "test_foo.py" + test_pair.parent.mkdir(parents=True, exist_ok=True) + test_pair.touch() + + res = invoke_hook("pre-write-guard", payload, synthetic) + if res.returncode != 0: + return False, ( + f"allow phase: expected exit 0, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + + return True, "" + + SCENARIOS = [ ("D1", "pre-branch-gate", scenario_d1_pre_branch_gate), + ("D3", "pre-write-guard", scenario_d3_pre_write_guard), ] From 5a63dc0b2cea929dfac4c3e7b650c49eac3e6d55 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:11:34 +0200 Subject: [PATCH 07/17] test(f3): add RED D4 pre-pr-gate scenario Fails until the orchestrator registers + implements the pre-pr-gate contract: deny `gh pr create` when docs-sync (ROADMAP.md + HANDOFF.md) is missing from the diff, allow once docs-sync is satisfied. Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_scenarios.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/tests/test_selftest_scenarios.py b/bin/tests/test_selftest_scenarios.py index 6f3c681..97a03b4 100644 --- a/bin/tests/test_selftest_scenarios.py +++ b/bin/tests/test_selftest_scenarios.py @@ -42,3 +42,6 @@ def test_d1_pre_branch_gate(self, selftest_run): def test_d3_pre_write_guard(self, selftest_run): assert "[ok] D3 pre-write-guard" in selftest_run.stdout, _scenario_diag("D3", selftest_run) + + def test_d4_pre_pr_gate(self, selftest_run): + assert "[ok] D4 pre-pr-gate" in selftest_run.stdout, _scenario_diag("D4", selftest_run) From d24a29db739bdd8ec1d7c357e83f3f00204dc7ce Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:14:56 +0200 Subject: [PATCH 08/17] feat(f3): GREEN D4 pre-pr-gate scenario MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthetic project's policy.yaml is overridden with a minimal pre_pr section (baseline + empty conditional — loader requires both keys). Init git on main, commit baseline, branch feat/example with a code-only commit, assert deny on `gh pr create`. Add ROADMAP/HANDOFF changes, re-invoke, assert allow. Factors `git_in()` + `init_baseline_repo()` helpers — D5 will reuse them for reflog-based detection. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/bin/_selftest.py b/bin/_selftest.py index 914d6df..41903da 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -65,6 +65,27 @@ def invoke_hook(name: str, payload: dict, cwd: Path) -> subprocess.CompletedProc ) +def git_in(repo: Path, *args: str) -> subprocess.CompletedProcess: + """Run `git ` inside `repo`. Raises on non-zero exit.""" + return subprocess.run( + ["git", *args], + cwd=repo, + capture_output=True, + text=True, + check=True, + timeout=10, + ) + + +def init_baseline_repo(repo: Path) -> None: + """Init a fresh git repo on `main`, configure user, commit current tree.""" + git_in(repo, "init", "-q", "-b", "main") + git_in(repo, "config", "user.email", "selftest@pos.local") + git_in(repo, "config", "user.name", "pos selftest") + git_in(repo, "add", "-A") + git_in(repo, "commit", "-q", "-m", "baseline") + + def scenario_d1_pre_branch_gate(synthetic: Path) -> tuple[bool, str]: """D1: deny `git checkout -b` without marker; allow with marker present.""" payload = { @@ -142,9 +163,60 @@ def scenario_d3_pre_write_guard(synthetic: Path) -> tuple[bool, str]: return True, "" +POLICY_DOCS_SYNC_ONLY = textwrap.dedent("""\ + lifecycle: + pre_pr: + docs_sync_required: + - "ROADMAP.md" + - "HANDOFF.md" + docs_sync_conditional: [] +""") + + +def scenario_d4_pre_pr_gate(synthetic: Path) -> tuple[bool, str]: + """D4: deny `gh pr create` when docs-sync incomplete; allow when satisfied.""" + (synthetic / "policy.yaml").write_text(POLICY_DOCS_SYNC_ONLY, encoding="utf-8") + init_baseline_repo(synthetic) + git_in(synthetic, "checkout", "-q", "-b", "feat/example") + (synthetic / "src.txt").write_text("payload\n", encoding="utf-8") + git_in(synthetic, "add", "src.txt") + git_in(synthetic, "commit", "-q", "-m", "feat: add src") + + payload = { + "tool_name": "Bash", + "tool_input": {"command": "gh pr create --title test --body test"}, + } + + res = invoke_hook("pre-pr-gate", payload, synthetic) + if res.returncode != 2: + return False, ( + f"deny phase: expected exit 2, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + if '"permissionDecision": "deny"' not in res.stdout: + return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + if "docs-sync" not in res.stdout: + return False, f"deny phase: missing 'docs-sync' in reason\nstdout: {res.stdout}" + + (synthetic / "ROADMAP.md").write_text("# ROADMAP\nupdated\n", encoding="utf-8") + (synthetic / "HANDOFF.md").write_text("# HANDOFF\nupdated\n", encoding="utf-8") + git_in(synthetic, "add", "ROADMAP.md", "HANDOFF.md") + git_in(synthetic, "commit", "-q", "-m", "docs: sync") + + res = invoke_hook("pre-pr-gate", payload, synthetic) + if res.returncode != 0: + return False, ( + f"allow phase: expected exit 0, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + + return True, "" + + SCENARIOS = [ ("D1", "pre-branch-gate", scenario_d1_pre_branch_gate), ("D3", "pre-write-guard", scenario_d3_pre_write_guard), + ("D4", "pre-pr-gate", scenario_d4_pre_pr_gate), ] From f84ba38cd8a8822835f23ada280bdb10038d697a Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:16:21 +0200 Subject: [PATCH 09/17] test(f3): add RED D5 post-action scenario Fails until the orchestrator registers + implements the post-action contract: a confirmed `git merge` whose diff matches a configured trigger emits the `Consider running /pos:compound` advisory. Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_scenarios.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/tests/test_selftest_scenarios.py b/bin/tests/test_selftest_scenarios.py index 97a03b4..e018787 100644 --- a/bin/tests/test_selftest_scenarios.py +++ b/bin/tests/test_selftest_scenarios.py @@ -45,3 +45,6 @@ def test_d3_pre_write_guard(self, selftest_run): def test_d4_pre_pr_gate(self, selftest_run): assert "[ok] D4 pre-pr-gate" in selftest_run.stdout, _scenario_diag("D4", selftest_run) + + def test_d5_post_action(self, selftest_run): + assert "[ok] D5 post-action" in selftest_run.stdout, _scenario_diag("D5", selftest_run) From 7f991e0cfed0cdc30d56e8608a46332e72ff9f38 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:17:34 +0200 Subject: [PATCH 10/17] feat(f3): GREEN D5 post-action scenario Override synthetic policy with minimal post_merge trigger (fnmatch-style non-recursive globs since `**` is literal in fnmatch). Init git on main, branch feat/example, add `generator/feature.ts`, merge --no-ff, then invoke post-action with a `git merge` payload. Asserts exit 0 and `/pos:compound` advisory in stdout. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/bin/_selftest.py b/bin/_selftest.py index 41903da..e70c58b 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -213,10 +213,56 @@ def scenario_d4_pre_pr_gate(synthetic: Path) -> tuple[bool, str]: return True, "" +POLICY_POST_MERGE_ONLY = textwrap.dedent("""\ + lifecycle: + post_merge: + skills_conditional: + - trigger: + touched_paths_any_of: + - "generator/*.ts" + skip_if_only: + - "*.md" + min_files_changed: 1 +""") + + +def scenario_d5_post_action(synthetic: Path) -> tuple[bool, str]: + """D5: confirmed merge whose diff matches trigger emits /pos:compound advisory.""" + (synthetic / "policy.yaml").write_text(POLICY_POST_MERGE_ONLY, encoding="utf-8") + init_baseline_repo(synthetic) + git_in(synthetic, "checkout", "-q", "-b", "feat/example") + target = synthetic / "generator" / "feature.ts" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("export const x = 1;\n", encoding="utf-8") + git_in(synthetic, "add", "generator/feature.ts") + git_in(synthetic, "commit", "-q", "-m", "feat: add generator/feature.ts") + git_in(synthetic, "checkout", "-q", "main") + git_in(synthetic, "merge", "--no-ff", "feat/example", "-m", "Merge feat/example") + + payload = { + "tool_name": "Bash", + "tool_input": {"command": "git merge --no-ff feat/example"}, + } + res = invoke_hook("post-action", payload, synthetic) + if res.returncode != 0: + return False, ( + f"expected exit 0, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + if "/pos:compound" not in res.stdout: + return False, ( + f"missing /pos:compound advisory in stdout\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + + return True, "" + + SCENARIOS = [ ("D1", "pre-branch-gate", scenario_d1_pre_branch_gate), ("D3", "pre-write-guard", scenario_d3_pre_write_guard), ("D4", "pre-pr-gate", scenario_d4_pre_pr_gate), + ("D5", "post-action", scenario_d5_post_action), ] From 26fd686bd540a04ddf984f0cb75ec7f44455f860 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:18:26 +0200 Subject: [PATCH 11/17] test(f3): add RED D6 stop-policy-check scenario Fails until the orchestrator registers + implements the stop-policy-check contract: with `skills_allowed` declared in policy.yaml + a rogue invocation in `.claude/logs/skills.jsonl` for the active session, the Stop hook denies exit 2; an unrelated session_id with no invocations allows exit 0. Co-Authored-By: Claude Opus 4.7 --- bin/tests/test_selftest_scenarios.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/tests/test_selftest_scenarios.py b/bin/tests/test_selftest_scenarios.py index e018787..23180c8 100644 --- a/bin/tests/test_selftest_scenarios.py +++ b/bin/tests/test_selftest_scenarios.py @@ -48,3 +48,6 @@ def test_d4_pre_pr_gate(self, selftest_run): def test_d5_post_action(self, selftest_run): assert "[ok] D5 post-action" in selftest_run.stdout, _scenario_diag("D5", selftest_run) + + def test_d6_stop_policy_check(self, selftest_run): + assert "[ok] D6 stop-policy-check" in selftest_run.stdout, _scenario_diag("D6", selftest_run) From 5081549fffc589b97494e5379ef53583d9ab3e50 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:20:49 +0200 Subject: [PATCH 12/17] feat(f3): GREEN D6 stop-policy-check scenario Override synthetic policy.yaml with `skills_allowed: ["pos:simplify"]`, seed `.claude/logs/skills.jsonl` with a rogue invocation under session_id `sess-rogue`. Deny phase: Stop payload `{session_id: "sess-rogue"}` triggers exit 2 deny. Allow phase: a different session_id with no recorded invocations passes through with exit 0. Locks down the session-scoping contract end-to-end. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/bin/_selftest.py b/bin/_selftest.py index e70c58b..28064cd 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -258,11 +258,54 @@ def scenario_d5_post_action(synthetic: Path) -> tuple[bool, str]: return True, "" +POLICY_SKILLS_ALLOWED_ONLY = textwrap.dedent("""\ + skills_allowed: + - "pos:simplify" +""") + + +def scenario_d6_stop_policy_check(synthetic: Path) -> tuple[bool, str]: + """D6: enforce skill allowlist scoped by session_id; allow when clean.""" + (synthetic / "policy.yaml").write_text(POLICY_SKILLS_ALLOWED_ONLY, encoding="utf-8") + skills_log = synthetic / ".claude" / "logs" / "skills.jsonl" + skills_log.parent.mkdir(parents=True, exist_ok=True) + skills_log.write_text( + json.dumps({ + "ts": "2026-04-26T00:00:00Z", + "skill": "pos:rogue", + "session_id": "sess-rogue", + "status": "ok", + }) + "\n", + encoding="utf-8", + ) + + deny_payload = {"session_id": "sess-rogue"} + res = invoke_hook("stop-policy-check", deny_payload, synthetic) + if res.returncode != 2: + return False, ( + f"deny phase: expected exit 2, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + if '"permissionDecision": "deny"' not in res.stdout: + return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + + allow_payload = {"session_id": "sess-clean"} + res = invoke_hook("stop-policy-check", allow_payload, synthetic) + if res.returncode != 0: + return False, ( + f"allow phase: expected exit 0, got {res.returncode}\n" + f"stdout: {res.stdout}\nstderr: {res.stderr}" + ) + + return True, "" + + SCENARIOS = [ ("D1", "pre-branch-gate", scenario_d1_pre_branch_gate), ("D3", "pre-write-guard", scenario_d3_pre_write_guard), ("D4", "pre-pr-gate", scenario_d4_pre_pr_gate), ("D5", "post-action", scenario_d5_post_action), + ("D6", "stop-policy-check", scenario_d6_stop_policy_check), ] From f87d80677c6d5865bff9a842cd375966894d5955 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:25:22 +0200 Subject: [PATCH 13/17] ci(f3): add selftest job + sync ci-cd.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `selftest` job runs `pytest bin/tests -q` on ubuntu × Python 3.11 with Node setup (for `npx tsx generator/run.ts`). Covers smoke wrapper + 5 functional-critical scenarios end-to-end. Move integration bullet from "Diferidos" to "Aterrizado" per the invariant in `.claude/rules/ci-cd.md`. Add a dedicated H3 documenting the job's scope, what it covers, what it explicitly does not (D2 informative, Claude Code runtime), and the synthetic-policy drift. Co-Authored-By: Claude Opus 4.7 --- .claude/rules/ci-cd.md | 23 +++++++++++++++++++++-- .github/workflows/ci.yml | 29 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/.claude/rules/ci-cd.md b/.claude/rules/ci-cd.md index 0d1b547..5d9f437 100644 --- a/.claude/rules/ci-cd.md +++ b/.claude/rules/ci-cd.md @@ -18,11 +18,10 @@ El hook `pre-push.sh` corre la suite local; GitHub Actions corre la misma suite ## Workflows obligatorios (meta-repo) 1. **`.github/workflows/ci.yml`** — por PR y push a `main`. Entregado de forma incremental, rama a rama. La versión actual cubre: - - **Aterrizado**: typecheck generator (`tsc --noEmit`), validación cuestionario + profiles, render generator dry-run, unit tests generator (vitest con coverage), unit tests hooks (pytest, matriz ubuntu + macos × Python 3.10/3.11). + - **Aterrizado**: typecheck generator (`tsc --noEmit`), validación cuestionario + profiles, render generator dry-run, unit tests generator (vitest con coverage), unit tests hooks (pytest, matriz ubuntu + macos × Python 3.10/3.11), integración end-to-end (`pytest bin/tests` — smoke wrapper + 5 escenarios funcionales-críticos vía `bin/pos-selftest.sh`, ubuntu × Python 3.11) **(F3)**. - **Diferidos a rama dedicada** (declarados en `policy.yaml.pre_push.checks_required` como `command_meta`, no enforzados aún): - Lint + format check (`eslint`, `prettier`, `ruff`). - Typecheck hooks (`mypy hooks/`). - - Integración (`./bin/pos-selftest.sh`). - Snapshot diff check (valida templates deterministic). - **Invariante**: cuando una rama añade un check al workflow, también mueve su bullet de "Diferidos" a "Aterrizado" y ajusta el bloque `command_meta` en `policy.yaml` si procede. @@ -36,6 +35,26 @@ El hook `pre-push.sh` corre la suite local; GitHub Actions corre la misma suite - Publica release en GitHub con assets (plugin bundle). - Actualiza `javiAI/pos-marketplace` vía PR automático (cuando exista). +### Job `selftest` (entregado en F3) + +Job dedicado a integración end-to-end del propio plugin `pos`. Corre en `ubuntu-latest` × Python 3.11 (sin matriz extendida — los gates funcionales que cubre son platform-agnostic y la generación del proyecto sintético es la operación más cara). Setup: Node (`npx tsx generator/run.ts`) + Python (`pytest bin/tests`). El comando único es `pytest bin/tests -q`, que ejecuta: + +- **Smoke** (`bin/tests/test_selftest_smoke.py`): contrato del wrapper (`bin/pos-selftest.sh` existe, ejecutable, delega a `python3 bin/_selftest.py`, exit 0 al correr). Bloquea regresiones en la forma del entrypoint. +- **Scenarios** (`bin/tests/test_selftest_scenarios.py`): 5 escenarios funcionales-críticos contra un proyecto sintético generado por scenario: + - **D1 pre-branch-gate** — deny `git checkout -b` sin marker → allow tras `touch `. + - **D3 pre-write-guard** — deny `Write hooks/foo.py` sin test pair → allow tras crear `hooks/tests/test_foo.py`. + - **D4 pre-pr-gate** — deny `gh pr create` sin docs-sync (ROADMAP + HANDOFF) en el diff → allow tras commit de docs. + - **D5 post-action** — `git merge` confirmado por reflog cuyo diff matchea trigger emite advisory `Consider running /pos:compound`. + - **D6 stop-policy-check** — Stop con `session_id` rogue (allowlist + `skills.jsonl` seeded) deniega; `session_id` clean allow. + +**Out of scope** (ratificado en F3 Fase -1): + +- D2 session-start (informative, exit 0 sin enforcement) y D6 pre-compact (informative). No tienen contrato deny/allow a verificar; el patrón se cubre vía sus tests unitarios en `hooks/tests/`. +- Claude Code runtime: el selftest no instancia Claude Code, no invoca skills/agents reales, no dispatchea `/pos:compound`. Skills/agents se verifican por presencia estática en sus tests dedicados. +- D5b loader: cubierto indirectamente — los hooks D3/D4/D5 lo consumen y los escenarios sobre-escriben `policy.yaml` del sintético para ejercitar el accessor live. + +**Drift sintético ↔ meta-repo**: el `policy.yaml` que emite la generación cli-tool todavía tiene el shape pre-D5b (template no migrado). Cada escenario reescribe la sección que necesita (`pre_write` / `pre_pr` / `post_merge` / `skills_allowed`) directamente en `synthetic/policy.yaml`. Esto desacopla la cobertura de D5b de la migración del template (rama propia post-F3). + ## Workflows generados (proyecto destino) El generador emite workflows según `project_profile.yaml.git_host`. Soportados: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a9ce879..b73bd6b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,3 +74,32 @@ jobs: - name: Pytest hooks (with coverage) run: pytest hooks/tests -q --cov=hooks --cov-report=term-missing + + selftest: + name: selftest (ubuntu, py 3.11) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Setup Node + uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + with: + node-version-file: .nvmrc + cache: npm + + - name: Install Node deps + run: npm ci + + - name: Setup Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: requirements-dev.txt + + - name: Install dev deps + run: python -m pip install --upgrade pip && pip install -r requirements-dev.txt + + - name: Pytest selftest (smoke + scenarios) + run: pytest bin/tests -q From bb146bc6bdcfe1980885db577644a1d4fc64218a Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:41:54 +0200 Subject: [PATCH 14/17] refactor(f3): extract check_deny / check_allow helpers in selftest orchestrator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each of the 5 scenarios in bin/_selftest.py repeated the same deny phase (exit 2 + permissionDecision deny check) and allow phase (exit 0 check) boilerplate. Extracting two small helpers (check_deny, check_allow) removes ~30 lines of duplication and makes scenario intent more readable without hiding what each scenario asserts. Pre-PR simplify pass (CLAUDE.md regla #7 satisfied: 5 instances). 829 passed + 1 skipped — no regression. Co-Authored-By: Claude Opus 4.7 --- bin/_selftest.py | 117 +++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 70 deletions(-) diff --git a/bin/_selftest.py b/bin/_selftest.py index 28064cd..3c27a33 100755 --- a/bin/_selftest.py +++ b/bin/_selftest.py @@ -86,36 +86,46 @@ def init_baseline_repo(repo: Path) -> None: git_in(repo, "commit", "-q", "-m", "baseline") -def scenario_d1_pre_branch_gate(synthetic: Path) -> tuple[bool, str]: - """D1: deny `git checkout -b` without marker; allow with marker present.""" - payload = { - "tool_name": "Bash", - "tool_input": {"command": "git checkout -b feat/example"}, - } - - res = invoke_hook("pre-branch-gate", payload, synthetic) +def check_deny(phase: str, res: subprocess.CompletedProcess) -> tuple[bool, str]: + """Standard deny assertion: exit 2 + permissionDecision deny in stdout.""" if res.returncode != 2: return False, ( - f"deny phase: expected exit 2, got {res.returncode}\n" + f"{phase}: expected exit 2, got {res.returncode}\n" f"stdout: {res.stdout}\nstderr: {res.stderr}" ) if '"permissionDecision": "deny"' not in res.stdout: - return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + return False, f"{phase}: missing permissionDecision deny\nstdout: {res.stdout}" + return True, "" - marker = synthetic / ".claude" / "branch-approvals" / "feat_example.approved" - marker.parent.mkdir(parents=True, exist_ok=True) - marker.touch() - res = invoke_hook("pre-branch-gate", payload, synthetic) +def check_allow(phase: str, res: subprocess.CompletedProcess) -> tuple[bool, str]: + """Standard allow assertion: exit 0.""" if res.returncode != 0: return False, ( - f"allow phase: expected exit 0, got {res.returncode}\n" + f"{phase}: expected exit 0, got {res.returncode}\n" f"stdout: {res.stdout}\nstderr: {res.stderr}" ) - return True, "" +def scenario_d1_pre_branch_gate(synthetic: Path) -> tuple[bool, str]: + """D1: deny `git checkout -b` without marker; allow with marker present.""" + payload = { + "tool_name": "Bash", + "tool_input": {"command": "git checkout -b feat/example"}, + } + + ok, reason = check_deny("deny phase", invoke_hook("pre-branch-gate", payload, synthetic)) + if not ok: + return False, reason + + marker = synthetic / ".claude" / "branch-approvals" / "feat_example.approved" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.touch() + + return check_allow("allow phase", invoke_hook("pre-branch-gate", payload, synthetic)) + + POLICY_PRE_WRITE_ONLY = textwrap.dedent("""\ lifecycle: pre_write: @@ -140,27 +150,15 @@ def scenario_d3_pre_write_guard(synthetic: Path) -> tuple[bool, str]: "tool_input": {"file_path": str(target)}, } - res = invoke_hook("pre-write-guard", payload, synthetic) - if res.returncode != 2: - return False, ( - f"deny phase: expected exit 2, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) - if '"permissionDecision": "deny"' not in res.stdout: - return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + ok, reason = check_deny("deny phase", invoke_hook("pre-write-guard", payload, synthetic)) + if not ok: + return False, reason test_pair = synthetic / "hooks" / "tests" / "test_foo.py" test_pair.parent.mkdir(parents=True, exist_ok=True) test_pair.touch() - res = invoke_hook("pre-write-guard", payload, synthetic) - if res.returncode != 0: - return False, ( - f"allow phase: expected exit 0, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) - - return True, "" + return check_allow("allow phase", invoke_hook("pre-write-guard", payload, synthetic)) POLICY_DOCS_SYNC_ONLY = textwrap.dedent("""\ @@ -188,13 +186,9 @@ def scenario_d4_pre_pr_gate(synthetic: Path) -> tuple[bool, str]: } res = invoke_hook("pre-pr-gate", payload, synthetic) - if res.returncode != 2: - return False, ( - f"deny phase: expected exit 2, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) - if '"permissionDecision": "deny"' not in res.stdout: - return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" + ok, reason = check_deny("deny phase", res) + if not ok: + return False, reason if "docs-sync" not in res.stdout: return False, f"deny phase: missing 'docs-sync' in reason\nstdout: {res.stdout}" @@ -203,14 +197,7 @@ def scenario_d4_pre_pr_gate(synthetic: Path) -> tuple[bool, str]: git_in(synthetic, "add", "ROADMAP.md", "HANDOFF.md") git_in(synthetic, "commit", "-q", "-m", "docs: sync") - res = invoke_hook("pre-pr-gate", payload, synthetic) - if res.returncode != 0: - return False, ( - f"allow phase: expected exit 0, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) - - return True, "" + return check_allow("allow phase", invoke_hook("pre-pr-gate", payload, synthetic)) POLICY_POST_MERGE_ONLY = textwrap.dedent("""\ @@ -244,11 +231,9 @@ def scenario_d5_post_action(synthetic: Path) -> tuple[bool, str]: "tool_input": {"command": "git merge --no-ff feat/example"}, } res = invoke_hook("post-action", payload, synthetic) - if res.returncode != 0: - return False, ( - f"expected exit 0, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) + ok, reason = check_allow("post-action", res) + if not ok: + return False, reason if "/pos:compound" not in res.stdout: return False, ( f"missing /pos:compound advisory in stdout\n" @@ -279,25 +264,17 @@ def scenario_d6_stop_policy_check(synthetic: Path) -> tuple[bool, str]: encoding="utf-8", ) - deny_payload = {"session_id": "sess-rogue"} - res = invoke_hook("stop-policy-check", deny_payload, synthetic) - if res.returncode != 2: - return False, ( - f"deny phase: expected exit 2, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) - if '"permissionDecision": "deny"' not in res.stdout: - return False, f"deny phase: missing permissionDecision deny\nstdout: {res.stdout}" - - allow_payload = {"session_id": "sess-clean"} - res = invoke_hook("stop-policy-check", allow_payload, synthetic) - if res.returncode != 0: - return False, ( - f"allow phase: expected exit 0, got {res.returncode}\n" - f"stdout: {res.stdout}\nstderr: {res.stderr}" - ) + ok, reason = check_deny( + "deny phase", + invoke_hook("stop-policy-check", {"session_id": "sess-rogue"}, synthetic), + ) + if not ok: + return False, reason - return True, "" + return check_allow( + "allow phase", + invoke_hook("stop-policy-check", {"session_id": "sess-clean"}, synthetic), + ) SCENARIOS = [ From c3b76e8c187e17817ae5d3b058fcd45b5eca6d28 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 17:42:07 +0200 Subject: [PATCH 15/17] docs(f3): docs-sync ROADMAP + HANDOFF + MASTER_PLAN + ARCHITECTURE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close F3 in the standard docs-sync surfaces: - ROADMAP.md: F-row 3/4 ramas, F3 row → ✅, full progress block under § F. - HANDOFF.md: §1 snapshot updated (F3 closed, F4 next), §9 next-branch pointer flipped to F4, new §21 with F3 state (entregables, escenarios, out-of-scope, ajustes). - MASTER_PLAN.md § Rama F3: stub → realized decisions (A1.b shape, A2 functional-critical subset, A3 tmpdir + cli-tool, A4 exit + tokens, A5 single-matrix CI job, A6 no Claude runtime), implementation adjustments documented (fnmatch literal vs recursive, docs_sync_rules double-key contract, ci-cd.md H3 placement), drift open post-F3. - docs/ARCHITECTURE.md § 10: new "Selftest end-to-end (entregado en F3)" subsection inside Testing — tres niveles. Documents wrapper + orchestrator + scenarios + CI + drift. Pre-PR gate (D4 dogfooding) satisfied: ROADMAP + HANDOFF in diff; no conditional triggers apply (bin/** + .github/** outside the rules). Co-Authored-By: Claude Opus 4.7 --- HANDOFF.md | 55 +++++++++++++++++++++++++++++++++++------ MASTER_PLAN.md | 48 +++++++++++++++++++++++++++++++++++- ROADMAP.md | 58 ++++++++++++++++++++++++++++++++++++++++++-- docs/ARCHITECTURE.md | 23 +++++++++++++++++- 4 files changed, 173 insertions(+), 11 deletions(-) diff --git a/HANDOFF.md b/HANDOFF.md index f5ab2d8..ac138b9 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -5,9 +5,10 @@ ## 1. Snapshot - Repo: `project-operating-system` (plugin `pos`). -- Rama actual: **F2 ✅ PR pendiente** (`feat/f2-agents-subagents`, en revisión docs-sync). Anterior: **F1 ✅ PR #25** (mergeada). Siguiente: **F3 — `feat/f3-selftest-end-to-end`** (`bin/pos-selftest.sh` + escenarios end-to-end). -- F2 entregó: `agents/pos-code-reviewer.md` + `agents/pos-architect.md` (plugin subagents primitive-correct con namespace `pos-*`); flips de `pre-commit-review` y `compound` a los nuevos consumidores; 26 contract tests parametrizados (`agents/tests/test_agent_frontmatter.py`). Behavior tests del consumer flippean assertions (sin delta de count). **No** toca `policy.yaml` (`agents_allowed` diferido). `auditor` diferido (sin consumer real, regla #7). -- F1 entregó: `/pos:audit-session` (read-only advisory main-strict) — compara 3 superficies explícitas de `policy.yaml` (`skills_allowed`, `lifecycle..hooks_required`, `audit.required_logs`) vs `.claude/logs/` reales; reporta drift candidates por bucket sin auto-fix. Policy: `skills_allowed` 13→14. Fase F abierta (2/4 ramas). +- Rama actual: **F3 ✅ PR pendiente** (`feat/f3-selftest-end-to-end`, en revisión docs-sync). Anterior: **F2 ✅ PR #26** (mergeada). Siguiente: **F4 — `feat/f4-marketplace-public-repo`** (`javiAI/pos-marketplace` + release flow). +- F3 entregó: `bin/pos-selftest.sh` (wrapper bash mínimo) + `bin/_selftest.py` (orquestador stdlib Python) + `bin/tests/test_selftest_smoke.py` + `bin/tests/test_selftest_scenarios.py` (5 escenarios funcionales-críticos D1/D3/D4/D5/D6 sobre proyecto sintético generado real-time por `npx tsx generator/run.ts --profile cli-tool.yaml`). CI: nuevo job `selftest` (ubuntu × py 3.11) en `.github/workflows/ci.yml`. Sin Claude Code runtime, sin invocaciones reales de skills/agents. Suite: 829 passed + 1 skipped. +- F2 entregó: `agents/pos-code-reviewer.md` + `agents/pos-architect.md` (plugin subagents primitive-correct con namespace `pos-*`); flips de `pre-commit-review` y `compound` a los nuevos consumidores; 26 contract tests parametrizados (`agents/tests/test_agent_frontmatter.py`). **No** toca `policy.yaml` (`agents_allowed` diferido). `auditor` diferido (sin consumer real, regla #7). +- F1 entregó: `/pos:audit-session` (read-only advisory main-strict) — compara 3 superficies explícitas de `policy.yaml` (`skills_allowed`, `lifecycle..hooks_required`, `audit.required_logs`) vs `.claude/logs/` reales; reporta drift candidates por bucket sin auto-fix. Policy: `skills_allowed` 13→14. Fase F abierta (3/4 ramas). - Fuente de verdad ejecutable: [MASTER_PLAN.md](MASTER_PLAN.md). - Estado vivo: [ROADMAP.md](ROADMAP.md). - Arquitectura canonical: [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md). @@ -133,13 +134,14 @@ Hasta F1 el plugin reusaba subagents built-in; desde F2 los críticos son propio ## 9. Próxima rama -**F3 — `feat/f3-selftest-end-to-end`** (tras merge de F2 — tercer bloque de Fase F: selftest end-to-end del plugin). +**F4 — `feat/f4-marketplace-public-repo`** (tras merge de F3 — última rama de Fase F: marketplace público + release flow). Scope: -- `bin/pos-selftest.sh` + escenarios. Valida los gates principales (kickoff, Fase -1 marker, pre-write, pre-pr-gate, stop-policy-check, post-action) sobre un proyecto sintético generado por `npx tsx generator/run.ts`. -- Decisión Fase -1: ¿qué subset de gates entran en F3 vs diferimos?, ¿cómo aislar el repo sintético (tmpdir vs fixture committeado)?, ¿qué profile canónico se usa para el smoke (`nextjs-app` / `cli-tool` / `agent-sdk`)?, ¿cómo se ejecuta el selftest en CI (mismo workflow que vitest+pytest o workflow propio)? -- F3 puede dogfooding F2: el selftest invoca `pre-commit-review` y `compound` reales, lo que ejercita el resolution `pos-code-reviewer` / `pos-architect` con fallback `general-purpose` cuando el runtime sintético no expone los agents. +- Crear repo `javiAI/pos-marketplace` con `marketplace.json` + release flow. +- Docs en `docs/RELEASE.md`. +- Workflow `.github/workflows/release.yml` (en tag `v*`): valida versión `plugin.json` = tag, publica release con assets, actualiza `pos-marketplace` vía PR automático. +- Decisión Fase -1: ¿shape de `marketplace.json`?, ¿cómo se versiona el plugin (`plugin.json` vs git tag)?, ¿qué assets entran en el release bundle?, ¿`audit.yml` nightly entra aquí o se difiere a una rama post-F4? ## 10. Estado E2b (✅ merged PR #22) @@ -425,3 +427,42 @@ Entregables completados: **Resultado**: **819 passed + 1 skipped** (vs baseline F1 de 793; +26 netos del nuevo `agents/tests/test_agent_frontmatter.py` parametrizado por 2 slugs × 13 métodos. Las behavior flips de `test_skill_frontmatter.py` actualizan assertions de tests existentes — no añaden count). Sin regresión D1..D6 / E1a..E3b / F1. `stop-policy-check.py` sigue en enforcement live con `ALLOWED_SKILLS = 14` (F2 no añade skills, solo agents). El skip es el D5 intencional `TestIntegrationDiffUnavailable` por subprocess-no-cover. **Detalle + deferrals + ajustes**: ver [ROADMAP.md § feat/f2-agents-subagents](ROADMAP.md), [MASTER_PLAN.md § Rama F2](MASTER_PLAN.md), [.claude/rules/skills-map.md § Subagents del plugin](.claude/rules/skills-map.md), [.claude/rules/skills.md § Fork / delegación](.claude/rules/skills.md). + +## 21. Estado F3 (cerrada en rama, docs-sync en curso) + +`feat/f3-selftest-end-to-end` — **tercera rama de Fase F**. Entrega el selftest end-to-end del propio plugin: cierra el círculo "lo que el plugin promete enforce-ar contra repos generados, lo prueba sobre uno generado al vuelo", sin Claude Code runtime y sin invocaciones reales de skills/agents (cobertura estática queda en sus tests dedicados). + +**Entregables**: + +- `bin/pos-selftest.sh` (9 líneas) — wrapper bash mínimo (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). Sin lógica; entrypoint estable. +- `bin/_selftest.py` (~344 líneas) — orquestador stdlib Python. Por escenario: tmpdir + `npx tsx generator/run.ts --profile cli-tool.yaml --out ` para generar proyecto sintético, sobre-escribe la sección mínima de `synthetic/policy.yaml` que el escenario necesita, monta el sintético como git repo (`git init -b main` + commit baseline), invoca el hook real (`hooks/.py`) vía subprocess con payload JSON, asserta exit + tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. +- `bin/tests/test_selftest_smoke.py` — 4 tests pytest sobre el contrato del wrapper. +- `bin/tests/test_selftest_scenarios.py` — 5 tests pytest, fixture `module-scoped` que corre `pos-selftest.sh` una vez y comparte stdout. Cada test asserta `"[ok] D{N} {name}"`. +- `.github/workflows/ci.yml` — nuevo job `selftest` (ubuntu × Python 3.11, sin matriz extendida — gates funcionales platform-agnostic, generación sintética es la op más cara). Setup Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando único: `pytest bin/tests -q`. +- `.claude/rules/ci-cd.md` — bullet "integración end-to-end" promovido de "Diferidos" a "Aterrizado". H3 `### Job selftest (entregado en F3)` documenta scope + qué corre + qué queda fuera + drift sintético ↔ meta-repo. + +**Escenarios cubiertos** (5 funcionales-críticos): + +- D1 pre-branch-gate: deny `git checkout -b` sin marker → allow tras `touch `. +- D3 pre-write-guard: deny `Write hooks/foo.py` sin test pair → allow tras crear `hooks/tests/test_foo.py`. +- D4 pre-pr-gate: deny `gh pr create` sin docs-sync (ROADMAP + HANDOFF) → allow tras commit que añade los docs. +- D5 post-action: tras `git merge` confirmado por reflog cuyo diff matchea trigger globs, hook emite advisory `Consider running /pos:compound`. +- D6 stop-policy-check: Stop con `session_id` rogue (allowlist + `skills.jsonl` con entry no permitida) deniega; `session_id` clean permite. + +**Out of scope** (ratificado en F3 Fase -1): + +- D2 session-start (informative, exit 0 sin enforcement) y D6 pre-compact (informative). Sin contrato deny/allow; cobertura via tests unitarios en `hooks/tests/`. +- Claude Code runtime: no instancia Claude Code, no invoca `pre-commit-review` / `compound` / `audit-session`, no dispatcha `/pos:compound`. Skills/agents se verifican estáticamente. +- D5b loader: cubierto **indirectamente** — los hooks D3/D4/D5 lo consumen y los escenarios sobre-escriben la sección relevante de `synthetic/policy.yaml`. Esto desacopla la cobertura de la migración del template `templates/policy.yaml.hbs` al shape post-D5b (drift abierto, reabrir en rama propia post-F3). + +**Decisiones Fase -1 ratificadas**: A1.b (wrapper bash + orquestador Python + smoke pytest); A2 (subset funcional-crítico D1/D3/D4/D5/D6); A3 (tmpdir + cli-tool profile + generator real, no fixture); A4 (exit code + tokens, no golden diff); A5 (job `selftest` en `ci.yml`, no workflow separado, single matrix); A6 (no Claude runtime, no skills/agents reales). + +**Ajustes durante implementación**: + +- D5 trigger globs: `fnmatch` no recursa en `**/`. Corregido `generator/**/*.ts` → `generator/*.ts` (toplevel-only); `**/*.md` → `*.md`. Lección: si una rama futura necesita recursión real, switch a `pathlib.PurePath.match` o glob walker. +- D4 accessor doble-clave: `docs_sync_rules()` requiere **ambas** `docs_sync_required` AND `docs_sync_conditional` o devuelve `None`. Corregido añadiendo `docs_sync_conditional: []` al override. +- ci-cd.md placement de H3: la primera versión rompió la lista ordenada (MD029/MD032). Movido a después del item 3 (`release.yml`), antes de `## Workflows generados`. + +**Resultado**: **829 passed + 1 skipped** (vs baseline F2 819 + 1 skip = +10 nuevos: 4 smoke + 5 D-scenarios + 1 GREEN smoke ya merged). Sin regresión D1..D6 + E1a..E3b + F1 + F2. Selftest end-to-end local ~1.2s. + +**Detalle + deferrals + ajustes**: ver [ROADMAP.md § feat/f3-selftest-end-to-end](ROADMAP.md), [MASTER_PLAN.md § Rama F3](MASTER_PLAN.md), [.claude/rules/ci-cd.md § Job selftest (entregado en F3)](.claude/rules/ci-cd.md), [docs/ARCHITECTURE.md § 10 Selftest end-to-end](docs/ARCHITECTURE.md). diff --git a/MASTER_PLAN.md b/MASTER_PLAN.md index 485c744..fd3fcec 100644 --- a/MASTER_PLAN.md +++ b/MASTER_PLAN.md @@ -728,7 +728,53 @@ Esperar aprobación explícita del usuario. Con OK → crear marker + rama. ### Rama F3 — `feat/f3-selftest-end-to-end` -**Scope**: `bin/pos-selftest.sh` + escenarios. Valida todos los gates con proyecto sintético. +**Scope realizado**: `bin/pos-selftest.sh` (wrapper bash mínimo) + `bin/_selftest.py` (orquestador stdlib Python) + 5 escenarios funcionales-críticos (D1/D3/D4/D5/D6) sobre proyecto sintético generado real-time por `npx tsx generator/run.ts --profile cli-tool.yaml`. CI: nuevo job `selftest` en `.github/workflows/ci.yml`. **Sin Claude Code runtime, sin invocaciones reales de skills/agents**. + +**Archivos entregados**: + +- `bin/pos-selftest.sh` — wrapper bash (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). 9 líneas. Sin lógica. +- `bin/_selftest.py` — orquestador Python stdlib (~344 líneas). Por escenario: tmpdir + generator real + sobre-escribe sección mínima de `synthetic/policy.yaml` + monta git repo (`git init -b main` + commit baseline) + invoca hook real vía subprocess + asserta exit + tokens. +- `bin/tests/test_selftest_smoke.py` (4 tests) — contrato del wrapper. +- `bin/tests/test_selftest_scenarios.py` (5 tests) — fixture module-scoped + asserción `[ok] D{N} {name}` por escenario. +- `.github/workflows/ci.yml` — job `selftest` (ubuntu × py 3.11, single matrix). Setup Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando: `pytest bin/tests -q`. +- `.claude/rules/ci-cd.md` — bullet "integración end-to-end" promovido a "Aterrizado" + nuevo H3 `### Job selftest (entregado en F3)` con scope + drift sintético. +- `docs/ARCHITECTURE.md § 10 Selftest end-to-end` — subsección nueva dentro de `§ 10 Testing — tres niveles` documentando el wrapper + orquestador, escenarios cubiertos / out of scope, CI, y drift abierto. + +**Decisiones Fase -1 ratificadas (ajustes obligatorios del usuario sobre v1)**: + +- **(A1.b)** Shape: bash wrapper mínimo + Python orquestador stdlib + smoke pytest. Rechazadas A1.a (todo bash, lectura ilegible para 5 escenarios) y A1.c (Python embebido en heredoc, frágil). Bash invoke + Python orquesta = separation of concerns mínima. +- **(A2)** Gates: subset **funcional-crítico** D1/D3/D4/D5 post-action/D6 stop-policy-check. **Diferidos**: D2 session-start (informative, exit 0 sin enforcement) y D6 pre-compact (informative). No tienen contrato deny/allow. +- **(A3)** Sintético: tmpdir + cli-tool profile + `npx tsx generator/run.ts` real (no fixture committeado). Cada escenario tiene su tmpdir + cleanup. Cli-tool por simplicidad (TS + vitest, sin Next.js infra). +- **(A4)** Validación: exit code + assertions sobre stdout/stderr/files. **No** golden diff (frágil ante cambios cosméticos en wording de hooks). +- **(A5)** CI: nuevo `selftest` job en `.github/workflows/ci.yml` (no workflow separado). Single matrix (ubuntu × Python 3.11) — gates funcionales platform-agnostic; matriz extendida sería sobre-promesa. +- **(A6)** Skills/agents: NO Claude Code runtime, NO real invocations. Cobertura estática queda en `agents/tests/test_agent_frontmatter.py` y `.claude/skills/tests/test_skill_frontmatter.py`. F3 ejercita **hooks**, no Claude. + +**Escenarios cubiertos** (cada uno asserta `[ok] D{N} {name}` en stdout): + +- **D1 pre-branch-gate** — deny `git checkout -b feat/x` sin marker → allow tras `touch .claude/branch-approvals/feat_x.approved`. Ejercita exit 2 + `permissionDecision: deny` y resolución del slug sanitizado (`/` → `_`). +- **D3 pre-write-guard** — deny `Write hooks/foo.py` sin test pair → allow tras crear `hooks/tests/test_foo.py`. Policy override: `lifecycle.pre_write.enforced_patterns` con label `hooks_top_level_py`. Ejercita el accessor `pre_write_rules()` del loader D5b. +- **D4 pre-pr-gate** — deny `gh pr create` sin docs-sync (ROADMAP + HANDOFF en el diff) → allow tras commit que añade los docs. Policy override: `docs_sync_required: [ROADMAP.md, HANDOFF.md]` + `docs_sync_conditional: []` (ambas claves obligatorias por el accessor `docs_sync_rules()` o devuelve `None`). +- **D5 post-action** — tras `git merge` confirmado por reflog cuyo diff matchea trigger globs, hook emite advisory `Consider running /pos:compound`. Policy override: `lifecycle.post_merge.skills_conditional[0].trigger` con `touched_paths_any_of: ["generator/*.ts"]`, `skip_if_only: ["*.md"]`, `min_files_changed: 1`. Nota: `fnmatch` no recursa en `**/` — globs literales toplevel-only. +- **D6 stop-policy-check** — Stop con `session_id` rogue (allowlist + `skills.jsonl` con entry no permitida pre-seeded) deniega; `session_id` clean permite. Policy override: top-level `skills_allowed: ["pos:simplify"]`. Ejercita el filtrado por `session_id` y el tri-estado del accessor `skills_allowed_list()`. + +**Ajustes durante implementación** (lecciones para ramas futuras): + +- **D5 trigger globs literales**: el primer attempt usó `generator/**/*.ts` esperando recursión; `fnmatch` lo trata como literal. Corregido a `generator/*.ts` (toplevel-only) + `*.md` para skip_if_only. Si una rama futura necesita recursión real → switch a `pathlib.PurePath.match` o glob walker. +- **D4 accessor doble-clave**: el primer attempt para D4 sólo puso `docs_sync_required` en el override; el accessor `docs_sync_rules()` requiere **ambas** `docs_sync_required` AND `docs_sync_conditional` o devuelve `None`. Corregido añadiendo `docs_sync_conditional: []`. Documentado como contrato del loader. +- **ci-cd.md placement de H3**: la primera versión del H3 "Job selftest" rompió la lista ordenada (MD029/MD032 markdown lint). Movido a después del item 3 (`release.yml`), antes de `## Workflows generados`. Convención: H3 entregables van fuera de la lista numerada. + +**Drift abierto post-F3**: + +- `templates/policy.yaml.hbs` y `generator/renderers/policy.ts` siguen emitiendo el shape **pre-D5b** (sección `pre_write` plana sin `enforced_patterns`, `docs_sync_required` flat sin `docs_sync_conditional`). Cada escenario sobre-escribe la sección que necesita en `synthetic/policy.yaml` para desacoplar la cobertura de la migración del template. Reabrir en rama propia post-F3 (no bloqueante). + +**Contexto a leer** (rangos): este §, `HANDOFF.md §1/§9`, `ROADMAP.md fila F3 + sección F`, `hooks/_lib/policy.py § docs_sync_rules + post_merge_trigger + skills_allowed_list`, `hooks/post-action.py § classify_command + reflog_confirms`, `hooks/stop-policy-check.py § _extract_invoked_skills`, `policy.yaml § lifecycle.pre_write/pre_pr/post_merge + skills_allowed`. + +**Criterio de salida**: **829 passed + 1 skipped** (vs baseline F2 819 + 1 skip; +10 netos: 4 smoke + 5 D-scenarios + 1 GREEN smoke). Sin regresión D1..D6 + E1a..E3b + F1 + F2. Selftest end-to-end local ~1.2s. Docs-sync dentro del PR (ROADMAP § F3 + HANDOFF §1/§9/§21 + MASTER_PLAN § Rama F3 expandida + `.claude/rules/ci-cd.md` selftest job promovido + `docs/ARCHITECTURE.md § 8 Selftest`). `pre-pr-gate.py` aprueba este mismo PR — los conditional triggers no aplican (`bin/**` no está en `docs_sync_conditional`, `.github/**` no está bajo `generator|hooks|skills|patterns`); required `ROADMAP.md` + `HANDOFF.md` satisfecho. + +**Carry-overs a F4**: + +- `.github/workflows/release.yml` queda como entrega de F4 (no F3). El `selftest` job se reusará en `release.yml` como gate antes de publicar tag. +- Drift `templates/policy.yaml.hbs` → shape post-D5b queda diferido (no bloquea F4 ni Fase G). ### Rama F4 — `feat/f4-marketplace-public-repo` diff --git a/ROADMAP.md b/ROADMAP.md index 9136a78..ce7fd7b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -13,7 +13,7 @@ Estado vivo. Cada fila refleja una rama de [MASTER_PLAN.md](MASTER_PLAN.md). | E1 | Skills orquestación | ✅ (E1a + E1b) | | E2 | Skills calidad | ✅ (E2a + E2b) | | E3 | Skills patterns + tests | ✅ (E3a ✅, E3b ✅) | -| F | Audit + selftest + marketplace | 🔄 (F1 ✅, F2 ✅, F3..F4 ⏳) | +| F | Audit + selftest + marketplace | 🔄 (F1 ✅, F2 ✅, F3 ✅, F4 ⏳) | | G | Knowledge Plane (opcional) | ⏳ solo planificación (scope cerrado, sin implementación) | ## Ramas @@ -44,7 +44,7 @@ Estado vivo. Cada fila refleja una rama de [MASTER_PLAN.md](MASTER_PLAN.md). | `feat/e3b-skill-test-scaffold-audit-coverage` | `/pos:test-scaffold` (writer-scoped), `/pos:test-audit` (read-only advisory), `/pos:coverage-explain` (read-only advisory); `skills_allowed` 10→13 | ✅ | — (PR pendiente) | | `feat/f1-skill-audit-session` | `/pos:audit-session` (read-only advisory main-strict) — compara 3 superficies de `policy.yaml` (`skills_allowed`, `lifecycle.*.hooks_required`, `audit.required_logs`) vs `.claude/logs/`; `skills_allowed` 13→14 | ✅ | — (PR pendiente) | | `feat/f2-agents-subagents` | 2 plugin subagents en `agents/` con namespace `pos-*`: `pos-code-reviewer` (consumido por `pre-commit-review`), `pos-architect` (consumido por `compound`); `auditor` diferido (sin consumer real); `agents_allowed` diferido (sin enforcement consumer) | ✅ | — (PR pendiente) | -| `feat/f3-selftest-end-to-end` | `bin/pos-selftest.sh` + escenarios | ⏳ | — | +| `feat/f3-selftest-end-to-end` | `bin/pos-selftest.sh` + orquestador Python + 5 escenarios funcionales-críticos (D1/D3/D4/D5/D6) sobre proyecto sintético | ✅ | — (PR pendiente) | | `feat/f4-marketplace-public-repo` | `javiAI/pos-marketplace` + release flow | ⏳ | — | | `feat/fx-knowledge-plane-plan` | Docs-only: abre FASE G en MASTER_PLAN (capa opcional knowledge plane) | ⏳ | — | | `feat/g1-knowledge-plane-contract` | Contrato tool-agnostic (raw/wiki/schema) + opt-in questionnaire | ⏳ | — | @@ -639,6 +639,60 @@ Contrato fijado por la suite (extiende E1..F1 sin reabrirlos): **Criterio de salida**: 819 verdes + 1 skip intencional. Sin regresión sobre F1. Docs-sync completo dentro del PR (incluye `docs/ARCHITECTURE.md § 6 Agents` reescrita post-revisión). `pre-pr-gate.py` aprueba este mismo PR — el conditional `skills/**` (porque tocamos dos `SKILL.md`) exige `skills-map.md`, satisfecho. `agents/**` no está en `policy.yaml.lifecycle.pre_pr.docs_sync_conditional` hoy (drift abierto deliberadamente — reabrir cuando un consumer enforcement justifique extender el gate). +### `feat/f3-selftest-end-to-end` — ✅ (PR pendiente) + +Tercera rama de Fase F — entrega el **selftest end-to-end del propio plugin `pos`**: un wrapper bash mínimo que delega a un orquestador stdlib Python que ejercita los gates funcionales-críticos del plugin contra un proyecto sintético generado real-time por `npx tsx generator/run.ts`. F3 cierra el círculo "lo que el plugin promete enforce-ar contra repos generados, lo prueba sobre uno generado al vuelo" — sin Claude Code runtime, sin invocaciones reales de skills/agents. + +Entregables: + +- `bin/pos-selftest.sh` (9 líneas) — wrapper bash mínimo (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). No contiene lógica; es entrypoint estable que tests + CI consumen sin dependencia de path absoluto. +- `bin/_selftest.py` (~344 líneas) — orquestador stdlib (sin dependencias externas). Por cada escenario: crea un tmpdir, ejecuta `npx tsx generator/run.ts --profile questionnaire/profiles/cli-tool.yaml --out ` para generar un proyecto sintético, sobre-escribe `synthetic/policy.yaml` con la sección mínima que el escenario necesita, monta el repo sintético como git repo (`git init -b main` + commit baseline), e invoca el hook real (`hooks/.py`) vía subprocess con payload JSON. Asserta exit code + presencia de tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. Exit 0/1 según pass/fail. +- `bin/tests/test_selftest_smoke.py` — 4 tests pytest sobre el contrato del wrapper (existe, ejecutable, delega a `python3 bin/_selftest.py`, exit 0 al correr). Bloquea regresiones en la forma del entrypoint. +- `bin/tests/test_selftest_scenarios.py` — 5 tests pytest, fixture `module-scoped` que corre `pos-selftest.sh` una vez y comparte stdout entre los tests. Cada test asserta `"[ok] D{N} {name}"` en stdout. +- `.github/workflows/ci.yml` — nuevo job `selftest` (ubuntu × Python 3.11, sin matriz extendida — los gates funcionales son platform-agnostic y la generación del proyecto sintético es la operación más cara). Setup: Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando único: `pytest bin/tests -q`. +- `.claude/rules/ci-cd.md` — bullet "integración end-to-end (`pytest bin/tests` — smoke + 5 escenarios funcionales-críticos vía `bin/pos-selftest.sh`, ubuntu × Python 3.11)" promovido de "Diferidos" a "Aterrizado". H3 `### Job selftest (entregado en F3)` documenta scope, qué corre, qué queda fuera, y el drift sintético ↔ meta-repo. + +Escenarios cubiertos (5 funcionales-críticos): + +- **D1 pre-branch-gate** (`hooks/pre-branch-gate.py`): deny `git checkout -b` sin marker → allow tras `touch .claude/branch-approvals/.approved`. Ejercita exit 2 + `permissionDecision: deny` y resolución del slug sanitizado. +- **D3 pre-write-guard** (`hooks/pre-write-guard.py`): deny `Write hooks/foo.py` sin test pair → allow tras crear `hooks/tests/test_foo.py`. Policy override mínima: `lifecycle.pre_write.enforced_patterns` con label `hooks_top_level_py`. Ejercita el accessor `pre_write_rules()` del loader D5b. +- **D4 pre-pr-gate** (`hooks/pre-pr-gate.py`): deny `gh pr create` sin docs-sync (ROADMAP + HANDOFF en el diff) → allow tras commit que añade los docs. Policy override: `docs_sync_required: [ROADMAP.md, HANDOFF.md]` + `docs_sync_conditional: []` (ambas claves obligatorias por el accessor `docs_sync_rules()` del loader D5b). +- **D5 post-action** (`hooks/post-action.py`): tras `git merge` confirmado por reflog cuyo diff matchea trigger globs, el hook emite advisory `Consider running /pos:compound`. Policy override: `lifecycle.post_merge.skills_conditional[0].trigger` con `touched_paths_any_of: ["generator/*.ts"]`, `skip_if_only: ["*.md"]`, `min_files_changed: 1`. Nota: `fnmatch` no recursa en `**/` — globs literales toplevel-only. +- **D6 stop-policy-check** (`hooks/stop-policy-check.py`): Stop hook con `session_id` rogue (allowlist + `skills.jsonl` con entry no permitida pre-seeded) deniega; `session_id` clean (allowlisted skill o sin invocaciones) permite. Policy override: top-level `skills_allowed: ["pos:simplify"]`. Ejercita el filtrado por `session_id` y el tri-estado del accessor `skills_allowed_list()`. + +Gates explícitamente fuera de scope (ratificados en F3 Fase -1): + +- D2 session-start (informative, exit 0 sin enforcement) y D6 pre-compact (informative). No tienen contrato deny/allow a verificar; el patrón se cubre vía sus tests unitarios en `hooks/tests/`. +- Claude Code runtime: el selftest no instancia Claude Code, no invoca skills/agents reales (no `pre-commit-review`, no `compound`, no `audit-session`), no dispatcha `/pos:compound`. Skills/agents se verifican por presencia estática en `agents/tests/test_agent_frontmatter.py` y `.claude/skills/tests/test_skill_frontmatter.py`. +- D5b loader: cubierto **indirectamente**. Los hooks D3/D4/D5 lo consumen en cada escenario, y los escenarios sobre-escriben la sección relevante del `synthetic/policy.yaml` directamente — esto desacopla la cobertura de D5b de la migración del template `templates/policy.yaml.hbs` al shape post-D5b (drift abierto, reabrir en rama propia post-F3). + +Suite global post-F3: **829 passed + 1 skipped** (vs baseline F2 819 + 1 skip = +5 D-scenarios + 4 smoke + 1 GREEN smoke ya merged, sin regresión D1..D6 + E1a..E3b + F1 + F2). Selftest end-to-end local ~1.2s. + +Contrato fijado por la suite (extiende E1..F2 sin reabrirlos): + +- Wrapper shape inmutable: `bin/pos-selftest.sh` es bash + delega a `python3 bin/_selftest.py`. No lógica en bash. Smoke tests bloquean cualquier reescritura. +- Cada escenario corre contra **un proyecto sintético generado de cero** por la generator real (`npx tsx generator/run.ts --profile cli-tool.yaml --out `). No fixture committeado, no mock de generator. +- Cada escenario sobre-escribe sólo la sección de `synthetic/policy.yaml` que necesita — no toca el resto del shape — para desacoplar la cobertura de la migración del template. +- Selftest **nunca instancia Claude Code**, nunca invoca skills/agents reales, nunca dispatcha `/pos:*`. La cobertura de skills/agents queda en sus tests dedicados (frontmatter + behavior). +- CI `selftest` job es **single-matrix** (ubuntu × Python 3.11). Los gates funcionales son platform-agnostic; matriz extendida sería sobre-promesa. + +**Decisiones cerradas en Fase -1 (ratificadas por el usuario)**: + +- (A1.b) Shape: `bin/pos-selftest.sh` mínimo + `bin/_selftest.py` orquestador stdlib + `bin/tests/test_selftest_smoke.py`. **No** A1.a (todo bash) ni A1.c (Python embebido inline). El bash mínimo facilita el invoke, el Python orquesta. +- (A2) Gates: subset funcional-crítico D1/D3/D4/D5/D6 stop-policy-check. D2 + D6 pre-compact diferidos por ser informative (sin contrato deny/allow). +- (A3) Sintético: tmpdir + cli-tool profile + `npx tsx generator/run.ts` real (no fixture committeado). Cada escenario lleva su propio tmpdir + cleanup. +- (A4) Validación: exit code + assertions sobre stdout/stderr/files. **No** golden diff (frágil ante cambios cosméticos en hooks). +- (A5) CI: nuevo `selftest` job en `.github/workflows/ci.yml` (no workflow separado). Single matrix. +- (A6) Skills/agents: NO Claude Code runtime, NO real invocations. Solo cheap static checks (presencia de archivos, no ejecución). + +**Ajustes vs plan original**: + +- **D5 trigger globs literales**: el primer attempt usó `generator/**/*.ts` esperando recursión; `fnmatch` lo trata como literal. Corregido a `generator/*.ts` (toplevel-only) + `*.md` para skip_if_only. Lección documentada en gotchas: si una rama futura necesita recursión real, switch a `pathlib.PurePath.match` o glob walker. +- **D4 accessor doble-clave**: el primer attempt para D4 puso solo `docs_sync_required` en el override; el accessor `docs_sync_rules()` requiere **ambas** `docs_sync_required` AND `docs_sync_conditional` o devuelve `None`. Corregido añadiendo `docs_sync_conditional: []`. Documentado como contrato del loader. +- **ci-cd.md placement de H3**: la primera versión del H3 "Job selftest" rompió la lista ordenada (MD029/MD032). Movido a después del item 3 (`release.yml`), antes de `## Workflows generados`. Convención: H3 entregables van fuera de la lista de workflows. + +**Criterio de salida**: 829 verdes + 1 skip. Sin regresión sobre F2. Docs-sync dentro del PR (ROADMAP § F3 + HANDOFF §1/§9/§21 + MASTER_PLAN § Rama F3 expandida + `.claude/rules/ci-cd.md` selftest job promovido + `docs/ARCHITECTURE.md § 10 Selftest end-to-end`). `pre-pr-gate.py` aprueba este mismo PR — los conditional triggers no aplican (`bin/**` no está en `docs_sync_conditional`, `.github/**` no está bajo `generator|hooks|skills|patterns`); required `ROADMAP.md` + `HANDOFF.md` satisfecho. + ## Convenciones de este archivo - Una fila por rama. `⏳` pendiente, `🔄` en vuelo, `✅` completada, `❌` abandonada, `🚫` bloqueada. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index cf8ab8b..b1cf463 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -598,7 +598,7 @@ PR separado `chore/compound-YYYY-MM-DD` con: - Unit (vitest/pytest). - Integration. -- Selftest (`bin/pos-selftest.sh`): escenarios end-to-end con proyecto sintético. +- Selftest (`bin/pos-selftest.sh`): escenarios end-to-end con proyecto sintético (ver subsección). - Coverage gate (threshold de `policy.yaml.testing.unit.coverage_threshold`). Hook `pre-push.sh` corre todos antes de permitir `git push`. @@ -607,6 +607,27 @@ Hook `pre-push.sh` corre todos antes de permitir `git push`. Mismo set que local + matriz (2 OS × 2 versiones runtime). Branch protection requiere todos verdes para merge a main. +### Selftest end-to-end (entregado en F3) + +`bin/pos-selftest.sh` cierra el círculo "lo que el plugin promete enforce-ar contra repos generados, lo prueba sobre uno generado al vuelo". Estructura: + +- **Wrapper** `bin/pos-selftest.sh` (bash mínimo): `#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`. Sin lógica; entrypoint estable que tests + CI consumen sin path absoluto. +- **Orquestador** `bin/_selftest.py` (stdlib only, sin deps externas). Por escenario: + 1. crea tmpdir, + 2. ejecuta `npx tsx generator/run.ts --profile questionnaire/profiles/cli-tool.yaml --out ` (la generator real, no fixture committeado), + 3. sobre-escribe la sección mínima de `synthetic/policy.yaml` que el escenario necesita (desacopla la cobertura de la migración pendiente del template `policy.yaml.hbs` al shape post-D5b), + 4. monta el sintético como git repo (`git init -b main` + commit baseline), + 5. invoca el hook real (`hooks/.py`) vía subprocess con payload JSON, + 6. asserta exit code + presencia de tokens en stdout/stderr/files, + 7. imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. +- **Pytest harness** `bin/tests/test_selftest_smoke.py` (4 tests, contrato del wrapper) + `bin/tests/test_selftest_scenarios.py` (5 tests, fixture module-scoped que corre `pos-selftest.sh` una vez y comparte stdout). + +**Escenarios cubiertos** (5 funcionales-críticos): D1 pre-branch-gate, D3 pre-write-guard, D4 pre-pr-gate, D5 post-action (advisory `/pos:compound`), D6 stop-policy-check. **Out of scope**: D2 session-start + D6 pre-compact (informative, sin contrato deny/allow), Claude Code runtime (no instancia Claude, no invoca skills/agents reales — cobertura estática queda en `agents/tests/test_agent_frontmatter.py` y `.claude/skills/tests/test_skill_frontmatter.py`), D5b loader (cubierto indirectamente vía hooks consumidores). + +**CI**: nuevo job `selftest` en `.github/workflows/ci.yml` (ubuntu × Python 3.11, single matrix — gates funcionales platform-agnostic; matriz extendida sería sobre-promesa). Comando único: `pytest bin/tests -q`. Ejecución end-to-end local ~1.2s. + +**Drift abierto**: `templates/policy.yaml.hbs` y `generator/renderers/policy.ts` siguen emitiendo el shape pre-D5b. F3 lo evade sobre-escribiendo la sección relevante en `synthetic/policy.yaml` por escenario. Reabrir migración del template en rama propia post-F3. + ### Generador emite test harness El generador emite, según stack del profile: From 925d198a09db3f941ecc2212a6b698dfb53ea8ae Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 20:00:07 +0200 Subject: [PATCH 16/17] docs(f3): open refactor/template-policy-d5b-migration stub for post-F3 drift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F3 documented an open drift: templates/policy.yaml.hbs + generator/renderers/policy.ts still emit pre-D5b shape, evaded in F3 via per-scenario overlays in bin/_selftest.py. Open the stub branch slot now so the carry-over has a concrete home: - MASTER_PLAN.md § Rama F3b — full stub (scope, contexto a leer, decisiones a cerrar en Fase -1, criterio de salida, rationale de no entregarlo en F3). Position F3b mirrors precedent of refactor/d5-policy-loader as Rama D5b. - ROADMAP.md — new row between F3 and F4. F3 row updated with PR #27. - HANDOFF.md § 7 Gotchas — drift bullet now points to the stub instead of saying "diferida a rama propia post-D6". No code changes. Stub is informational; Fase -1 happens when F4 closes (or when a consumer requires post-D5b shape in generator output). --- HANDOFF.md | 2 +- MASTER_PLAN.md | 42 +++++++++++++++++++++++++++++++++++++++++- ROADMAP.md | 3 ++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/HANDOFF.md b/HANDOFF.md index ac138b9..a851fee 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -101,7 +101,7 @@ Ejecuta §2.1 Fase -1 completo. Espera aprobación explícita antes de `git chec - El hook `pre-write-guard.py` **ya está vivo** desde D3: PreToolUse(Write) blocker. Bloquea con exit 2 la creación de archivos en paths enforced (`hooks/*.py` top-level + `generator/**/*.ts` excluyendo tests/fixtures) sin test pair co-located. Los writes sobre archivos existentes en esos paths enforced sí pasan, pero siguen logueándose (allow / audit trail del flujo de edición). El pass-through silencioso (sin log) aplica solo a `hooks/_lib/**`, tests/docs/templates/meta y paths fuera del repo. Bypass legítimo: crear primero `hooks/tests/test_.py` o `.test.ts` con un test que falle (RED), luego escribir la implementación. - El hook `pre-pr-gate.py` **ya está vivo** desde D4: PreToolUse(Bash) blocker sobre `gh pr create` únicamente. Resuelve base con `git merge-base HEAD main` y calcula archivos tocados con `git diff --name-only HEAD`. Bloquea con exit 2 cuando ese diff no contiene los docs exigidos (required `ROADMAP.md` + `HANDOFF.md`; conditional por prefijo: `generator/**` | `hooks/**` excl. `hooks/tests/` | `.claude/patterns/**` → `docs/ARCHITECTURE.md`; `skills/**` → `.claude/rules/skills-map.md`). Skip advisory (pass-through + log explícito en hook log, no en phase-gates) en `main` / `master` / HEAD detached / cwd no-git / main borrada localmente / `git diff` subprocess falla (`diff_files() is None`). Empty diff (`[]`) → deny dedicado con reason `empty PR`, separado textualmente del reason docs-sync. 3 entradas advisory `deferred` (skills_required / ci_dry_run_required / invariants_check) se loguean en cada decisión real como scaffold activable sin cambio de shape cuando sus ramas dedicadas aporten sustrato. Reglas hardcoded (mirror de `policy.yaml.lifecycle.pre_pr.docs_sync_required` + `docs_sync_conditional`); divergencia deliberada D4: la lista `hooks/**` de la policy es uniforme, el hook excluye `hooks/tests/` — convergencia diferida a la rama policy-loader. Migración a parser declarativo en esa misma rama (junto con los paths hardcoded de D3). - **Loader declarativo `hooks/_lib/policy.py`** vivo desde D5b: los tres hooks D3/D4/D5 ya **no** hardcodean policy — leen via `docs_sync_rules()` / `post_merge_trigger()` / `pre_write_rules()`. Failure mode canónico (c.2): `policy.yaml` ausente o corrupto → loader devuelve `None` → hook degrada a pass-through advisory con `status: policy_unavailable` en su propio log. Nunca deny blind. Consumo único (stdlib + `pyyaml==6.0.2`, primera dependencia no-stdlib en `hooks/_lib/`, justificada en kickoff D5b). Ver `.claude/rules/hooks.md § Policy loader`. -- **Drift temporal meta-repo ↔ template** abierto tras D5b: `policy.yaml` (meta-repo) tiene el shape nuevo (`pre_write.enforced_patterns` + `docs_sync_conditional.hooks/**` con `excludes: ["hooks/tests/**"]`); `templates/policy.yaml.hbs` y el renderer `generator/renderers/policy.ts` **no** — un proyecto generado hoy con `pos` emite un `policy.yaml` con el shape previo. Reconciliación diferida a rama propia post-D6 (update template + renderer + snapshots + `pyyaml` a requirements-dev de proyectos Python generados). +- **Drift temporal meta-repo ↔ template** abierto tras D5b: `policy.yaml` (meta-repo) tiene el shape nuevo (`pre_write.enforced_patterns` + `docs_sync_conditional.hooks/**` con `excludes: ["hooks/tests/**"]`); `templates/policy.yaml.hbs` y el renderer `generator/renderers/policy.ts` **no** — un proyecto generado hoy con `pos` emite un `policy.yaml` con el shape previo. Reconciliación con stub abierto en `refactor/template-policy-d5b-migration` (ver [MASTER_PLAN.md § Rama F3b](MASTER_PLAN.md)) — no bloquea F4 ni Fase G; F3 lo evade vía overlays por escenario en `bin/_selftest.py`. - El hook `post-action.py` **ya está vivo** desde D5: PostToolUse(Bash) hook **non-blocking** (exit 0 siempre; no emite `permissionDecision`). Detección jerárquica 2 tiers — Tier 1 (`shlex.split`): matcher A `git merge ` (excluye `--abort/--quit/--continue/--skip`), matcher C `git pull` (excluye `--rebase/-r`). Tier 2 (`git reflog HEAD -1 --format=%gs`): confirma `"merge "` (A) o `"pull:" | "pull "` y no `"pull --rebase"` (C). `gh pr merge` (matcher B) descartado en Fase -1 por ausencia de `tool_response.exit_code` garantizado. Con ambos tiers confirmados: `git diff --name-only HEAD@{1} HEAD` + `fnmatch` contra `TRIGGER_GLOBS` mirror de `policy.yaml.lifecycle.post_merge.skills_conditional[0]` (`generator/lib/**`, `generator/renderers/**`, `hooks/**`, `skills/**`, `templates/**/*.hbs`), respetando `SKIP_IF_ONLY_GLOBS` (`docs/**`, `*.md`, `.claude/patterns/**`) y `MIN_FILES_CHANGED = 2`. Si matchea, emite `hookSpecificOutput.additionalContext` sugiriendo `/pos:compound` (4 líneas, cap 3 paths + `(+N more)`). **Nunca dispatcha la skill** — advisory-only; D5 sólo sugiere, E3a la entrega. Double log: `post-action.jsonl` (4 status distinguidos: `tier2_unconfirmed`, `diff_unavailable`, `confirmed_no_triggers`, `confirmed_triggers_matched`) + `phase-gates.jsonl` (evento `post_merge`, sólo en los dos status confirmed — los advisory tier2/diff no cruzan la puerta del lifecycle). Pass-throughs (Tier 1 no matchea) NO loguean. Reuso `_lib/`: `append_jsonl` + `now_iso`. Hardcode mirror de `policy.yaml` (segunda repetición tras D4) — regla #7 CLAUDE.md **cumplida dos veces** para el parser declarativo, precondición ready para la rama policy-loader. - El hook `pre-compact.py` **ya está vivo** desde D6: PreCompact **informative** (shape D2) — exit 0 siempre, nunca `permissionDecision`. Lee `pre_compact_rules(cwd).persist` del loader (D5b) y emite `additionalContext` con checklist de items a persistir antes del compact. Failure mode canónico (c.2): policy ausente o sección `lifecycle.pre_compact` ausente → loader devuelve `None` → hook emite contexto informativo mínimo que señala policy no disponible + log `status: policy_unavailable`. Safe-fail informative ante stdin corrupto: contexto degradado que señala el error de payload + log `status: payload_error`. Double log: `pre-compact.jsonl` siempre; `phase-gates.jsonl` evento `pre_compact` **solo** en happy path. (Wording exacto del contexto no es contrato — no se citan strings; ver `hooks/pre-compact.py` y sus tests si algún consumidor necesita inspeccionarlo.) - El hook `stop-policy-check.py` **ya está vivo** desde D6 como **scaffold activable** — shape D1 blocker (safe-fail canónico deny en payload malformado), pero **sin enforcement en producción hoy**. Lee `skills_allowed_list(cwd)` como tri-estado: `None` → `status: deferred` + pass-through silencioso (sección `skills_allowed` ausente del `policy.yaml` del meta-repo hoy); `SKILLS_ALLOWED_INVALID` → `status: policy_misconfigured` + pass-through (clave presente pero mal formada — **observable, NO silenciosa**: un typo en la policy ya no apaga enforcement como si fuera deferred); `()` → explicit deny-all; `tuple[str, ...]` poblada → enforcement live. Las invocaciones se leen de `.claude/logs/skills.jsonl` **filtradas por el `session_id`** del payload Stop actual (entradas de sesiones anteriores o sin `session_id` se ignoran); sin `session_id` en el payload → deny safe-fail (no se puede scopiar enforcement). Double log solo en decisiones reales (`deferred`/`policy_misconfigured`/`policy_unavailable` van solo al hook log, no cruzan `phase-gates.jsonl`). Framing **anti-sobrerrepresentación**: hoy el hook NO protege nada en producción; la entrega D6 aporta el shape y la suite de tests que valida el contrato — el switch-on real llega cuando una skill poblada declare su allowlist. diff --git a/MASTER_PLAN.md b/MASTER_PLAN.md index fd3fcec..5ece4e2 100644 --- a/MASTER_PLAN.md +++ b/MASTER_PLAN.md @@ -774,7 +774,47 @@ Esperar aprobación explícita del usuario. Con OK → crear marker + rama. **Carry-overs a F4**: - `.github/workflows/release.yml` queda como entrega de F4 (no F3). El `selftest` job se reusará en `release.yml` como gate antes de publicar tag. -- Drift `templates/policy.yaml.hbs` → shape post-D5b queda diferido (no bloquea F4 ni Fase G). +- Drift `templates/policy.yaml.hbs` → shape post-D5b queda diferido (no bloquea F4 ni Fase G). Stub abierto en `refactor/template-policy-d5b-migration` (ver siguiente sección). + +### Rama F3b — `refactor/template-policy-d5b-migration` (stub) + +**Status**: stub abierto post-F3. Sub-rama refactor que cierra el drift `meta-repo ↔ template` documentado en D5b (rama D5b decidió explícitamente no migrar el template) y reforzado en F3 (cada escenario sobre-escribe `synthetic/policy.yaml` para evadir el drift). No bloquea F4 ni Fase G — se programa cuando un consumer real (`pos:audit-session` corriendo sobre proyecto generado, o futuro test contractual del template) requiera el shape post-D5b en el output del generator. + +**Scope previsto**: + +- `templates/policy.yaml.hbs` — migrar a shape post-D5b: bloque `pre_write.enforced_patterns` (lista, no flat) + `lifecycle.pre_pr.docs_sync_conditional[].excludes` + cualquier otra sección que el loader (`hooks/_lib/policy.py`) consuma vía dataclass tipada. +- `generator/renderers/policy.ts` — adaptar el render para emitir el shape nuevo. Validar que las 3 ramas del profile (`nextjs-app` / `cli-tool` / `agent-sdk`) compilan sin patches manuales. +- `generator/__snapshots__//policy.yaml.snap` — re-snapshotear los 3 perfiles canónicos. +- `templates/requirements-dev.txt.hbs` (o equivalente del stack Python emitido) — añadir `pyyaml==6.0.2` cuando el profile sea Python, consistente con el meta-repo (loader depende de pyyaml). +- `bin/_selftest.py` — limpieza opcional: una vez el template emite shape post-D5b, los escenarios D3/D4/D5 pueden simplificarse (sólo override del campo específico, no la sección entera). Reabrir las constants `POLICY_PRE_WRITE_ONLY` / `POLICY_DOCS_SYNC_ONLY` / `POLICY_POST_MERGE_ONLY` para reducir. +- Tests: + - `generator/lib/__tests__/policy.test.ts` — actualizar fixtures + asserciones del renderer. + - `bin/tests/test_selftest_scenarios.py` — debe seguir verde sin cambios (los hooks consumen el loader, no el shape literal). Si rompe, ahí está la regresión que justifica la rama. + - Considerar añadir un test contractual nuevo: render del policy del profile X parsea limpio con `hooks/_lib/policy.py.load_policy` (cierra el drift por construcción). + +**Contexto a leer**: + +- `policy.yaml` (meta-repo, shape post-D5b) vs `templates/policy.yaml.hbs` (shape pre-D5b) — diff manual. +- `hooks/_lib/policy.py § dataclasses + accessors` — contrato que el template debe cumplir. +- `bin/_selftest.py § POLICY_*_ONLY constants` — overrides actuales por escenario, son la referencia de qué shape espera cada hook. +- `MASTER_PLAN.md § Rama D5b` — decisiones (b.1 strings/globs en YAML, c.2 failure mode `None`). +- `generator/renderers/policy.ts` + sus tests + snapshots actuales. + +**Decisiones a cerrar en Fase -1**: + +- Ámbito: ¿migrar todos los profiles a la vez o uno por commit (pattern incremental F3)? Probablemente uno por commit: `cli-tool` primero (es el que usa el selftest), luego `nextjs-app` y `agent-sdk` con re-snapshot. +- ¿Añadir test contractual `template render → loader parse` o dejarlo implícito por el selftest? El test contractual cierra el drift por construcción y pertenece a `generator/lib/__tests__/`. +- ¿Limpieza de overlays en `bin/_selftest.py` se hace en esta rama o se difiere? Probablemente en esta rama — la justificación de la rama es exactamente que los overlays dejen de ser necesarios. + +**Criterio de salida (preliminar)**: + +- Los 3 profiles canónicos generan `policy.yaml` que parsea con `hooks/_lib/policy.py` sin warnings ni `policy_unavailable`. +- `bin/tests/test_selftest_scenarios.py` verde sin cambios funcionales (sólo simplificación de overlays si se hace). +- Snapshots actualizados con diff revisado. +- Drift `meta-repo ↔ template` cerrado en HANDOFF + ARCHITECTURE. +- Sin regresión en tests del generator ni de los 3 hooks D3/D4/D5. + +**Razón para no entregarlo en F3**: F3 es selftest. Mezclar migración del template inflaría el scope, retrasaría la cobertura D-gates, y los overlays por escenario son una solución limpia y auto-contenida que **prueba** la independencia hook/loader respecto al template. Documentar el drift como abierto + abrir stub explícito (este §) es la decisión correcta. ### Rama F4 — `feat/f4-marketplace-public-repo` diff --git a/ROADMAP.md b/ROADMAP.md index ce7fd7b..c1c6f5a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -44,7 +44,8 @@ Estado vivo. Cada fila refleja una rama de [MASTER_PLAN.md](MASTER_PLAN.md). | `feat/e3b-skill-test-scaffold-audit-coverage` | `/pos:test-scaffold` (writer-scoped), `/pos:test-audit` (read-only advisory), `/pos:coverage-explain` (read-only advisory); `skills_allowed` 10→13 | ✅ | — (PR pendiente) | | `feat/f1-skill-audit-session` | `/pos:audit-session` (read-only advisory main-strict) — compara 3 superficies de `policy.yaml` (`skills_allowed`, `lifecycle.*.hooks_required`, `audit.required_logs`) vs `.claude/logs/`; `skills_allowed` 13→14 | ✅ | — (PR pendiente) | | `feat/f2-agents-subagents` | 2 plugin subagents en `agents/` con namespace `pos-*`: `pos-code-reviewer` (consumido por `pre-commit-review`), `pos-architect` (consumido por `compound`); `auditor` diferido (sin consumer real); `agents_allowed` diferido (sin enforcement consumer) | ✅ | — (PR pendiente) | -| `feat/f3-selftest-end-to-end` | `bin/pos-selftest.sh` + orquestador Python + 5 escenarios funcionales-críticos (D1/D3/D4/D5/D6) sobre proyecto sintético | ✅ | — (PR pendiente) | +| `feat/f3-selftest-end-to-end` | `bin/pos-selftest.sh` + orquestador Python + 5 escenarios funcionales-críticos (D1/D3/D4/D5/D6) sobre proyecto sintético | ✅ | #27 | +| `refactor/template-policy-d5b-migration` | Migrar `templates/policy.yaml.hbs` + renderer + snapshots al shape post-D5b; cerrar drift documentado en D5b/F3 | ⏳ | — | | `feat/f4-marketplace-public-repo` | `javiAI/pos-marketplace` + release flow | ⏳ | — | | `feat/fx-knowledge-plane-plan` | Docs-only: abre FASE G en MASTER_PLAN (capa opcional knowledge plane) | ⏳ | — | | `feat/g1-knowledge-plane-contract` | Contrato tool-agnostic (raw/wiki/schema) + opt-in questionnaire | ⏳ | — | From 4fbffe1dfc47251649df025a1419dbc0149a13d4 Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 26 Apr 2026 20:14:57 +0200 Subject: [PATCH 17/17] =?UTF-8?q?fix:=20address=20PR=20#27=20review=20?= =?UTF-8?q?=E2=80=94=20drop=20hardcoded=20line=20counts=20in=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot flagged "~344 líneas" hardcoded for bin/_selftest.py in 3 docs (ROADMAP § F3, HANDOFF §21, MASTER_PLAN § Rama F3). Already stale: the file is 321 lines after the simplify pass (bb146bc), and will keep drifting as scenarios evolve. Drop the precise count entirely — the description carries the magnitude signal without the maintenance debt. Wrapper count ("9 líneas" for bin/pos-selftest.sh) kept: minimal-by-design, stable as long as the wrapper just exec's the orchestrator. Triage: 3 inline comments, all FIX (low value / trivial effort). 0 issue/conversation comments. Review-body was the PR overview summary. --- HANDOFF.md | 2 +- MASTER_PLAN.md | 2 +- ROADMAP.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/HANDOFF.md b/HANDOFF.md index a851fee..f3a6261 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -435,7 +435,7 @@ Entregables completados: **Entregables**: - `bin/pos-selftest.sh` (9 líneas) — wrapper bash mínimo (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). Sin lógica; entrypoint estable. -- `bin/_selftest.py` (~344 líneas) — orquestador stdlib Python. Por escenario: tmpdir + `npx tsx generator/run.ts --profile cli-tool.yaml --out ` para generar proyecto sintético, sobre-escribe la sección mínima de `synthetic/policy.yaml` que el escenario necesita, monta el sintético como git repo (`git init -b main` + commit baseline), invoca el hook real (`hooks/.py`) vía subprocess con payload JSON, asserta exit + tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. +- `bin/_selftest.py` — orquestador stdlib Python. Por escenario: tmpdir + `npx tsx generator/run.ts --profile cli-tool.yaml --out ` para generar proyecto sintético, sobre-escribe la sección mínima de `synthetic/policy.yaml` que el escenario necesita, monta el sintético como git repo (`git init -b main` + commit baseline), invoca el hook real (`hooks/.py`) vía subprocess con payload JSON, asserta exit + tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. - `bin/tests/test_selftest_smoke.py` — 4 tests pytest sobre el contrato del wrapper. - `bin/tests/test_selftest_scenarios.py` — 5 tests pytest, fixture `module-scoped` que corre `pos-selftest.sh` una vez y comparte stdout. Cada test asserta `"[ok] D{N} {name}"`. - `.github/workflows/ci.yml` — nuevo job `selftest` (ubuntu × Python 3.11, sin matriz extendida — gates funcionales platform-agnostic, generación sintética es la op más cara). Setup Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando único: `pytest bin/tests -q`. diff --git a/MASTER_PLAN.md b/MASTER_PLAN.md index 5ece4e2..c8a71f3 100644 --- a/MASTER_PLAN.md +++ b/MASTER_PLAN.md @@ -733,7 +733,7 @@ Esperar aprobación explícita del usuario. Con OK → crear marker + rama. **Archivos entregados**: - `bin/pos-selftest.sh` — wrapper bash (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). 9 líneas. Sin lógica. -- `bin/_selftest.py` — orquestador Python stdlib (~344 líneas). Por escenario: tmpdir + generator real + sobre-escribe sección mínima de `synthetic/policy.yaml` + monta git repo (`git init -b main` + commit baseline) + invoca hook real vía subprocess + asserta exit + tokens. +- `bin/_selftest.py` — orquestador Python stdlib. Por escenario: tmpdir + generator real + sobre-escribe sección mínima de `synthetic/policy.yaml` + monta git repo (`git init -b main` + commit baseline) + invoca hook real vía subprocess + asserta exit + tokens. - `bin/tests/test_selftest_smoke.py` (4 tests) — contrato del wrapper. - `bin/tests/test_selftest_scenarios.py` (5 tests) — fixture module-scoped + asserción `[ok] D{N} {name}` por escenario. - `.github/workflows/ci.yml` — job `selftest` (ubuntu × py 3.11, single matrix). Setup Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando: `pytest bin/tests -q`. diff --git a/ROADMAP.md b/ROADMAP.md index c1c6f5a..1ff6e14 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -647,7 +647,7 @@ Tercera rama de Fase F — entrega el **selftest end-to-end del propio plugin `p Entregables: - `bin/pos-selftest.sh` (9 líneas) — wrapper bash mínimo (`#!/usr/bin/env bash` + `set -euo pipefail` + delega a `python3 bin/_selftest.py`). No contiene lógica; es entrypoint estable que tests + CI consumen sin dependencia de path absoluto. -- `bin/_selftest.py` (~344 líneas) — orquestador stdlib (sin dependencias externas). Por cada escenario: crea un tmpdir, ejecuta `npx tsx generator/run.ts --profile questionnaire/profiles/cli-tool.yaml --out ` para generar un proyecto sintético, sobre-escribe `synthetic/policy.yaml` con la sección mínima que el escenario necesita, monta el repo sintético como git repo (`git init -b main` + commit baseline), e invoca el hook real (`hooks/.py`) vía subprocess con payload JSON. Asserta exit code + presencia de tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. Exit 0/1 según pass/fail. +- `bin/_selftest.py` — orquestador stdlib (sin dependencias externas). Por cada escenario: crea un tmpdir, ejecuta `npx tsx generator/run.ts --profile questionnaire/profiles/cli-tool.yaml --out ` para generar un proyecto sintético, sobre-escribe `synthetic/policy.yaml` con la sección mínima que el escenario necesita, monta el repo sintético como git repo (`git init -b main` + commit baseline), e invoca el hook real (`hooks/.py`) vía subprocess con payload JSON. Asserta exit code + presencia de tokens en stdout/stderr/files. Imprime `[ok] D{N} {name}` o `[fail] D{N} {name}: `. Exit 0/1 según pass/fail. - `bin/tests/test_selftest_smoke.py` — 4 tests pytest sobre el contrato del wrapper (existe, ejecutable, delega a `python3 bin/_selftest.py`, exit 0 al correr). Bloquea regresiones en la forma del entrypoint. - `bin/tests/test_selftest_scenarios.py` — 5 tests pytest, fixture `module-scoped` que corre `pos-selftest.sh` una vez y comparte stdout entre los tests. Cada test asserta `"[ok] D{N} {name}"` en stdout. - `.github/workflows/ci.yml` — nuevo job `selftest` (ubuntu × Python 3.11, sin matriz extendida — los gates funcionales son platform-agnostic y la generación del proyecto sintético es la operación más cara). Setup: Node + Python + `npm ci` + `pip install -r requirements-dev.txt`. Comando único: `pytest bin/tests -q`.