diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..9117447 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,46 @@ +--- +name: Bug report +about: Something cma-mcp does that does not match the documented behavior +title: '[bug] ' +labels: bug +--- + +## Expected behavior + + + +## Actual behavior + + + +## Reproduction + + + +## Install fingerprint + +``` +$ cma-mcp --version + +``` + +## bash cma version + +``` +$ cma --version + +``` + +## Environment + +- OS: +- Python version: +- MCP client (Claude Desktop / Cursor / Cline / etc.) and version: + +## Logs + + diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..9314156 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: false +contact_links: + - name: Security issue + url: mailto:lovro.lucic@gmail.com?subject=%5Bcma-mcp%20security%5D + about: Email security issues directly per SECURITY.md. Do NOT open public issues for vulnerabilities. + - name: bash cma issue (the wrapped binary) + url: https://github.com/Clarethium/cma/issues + about: Issues with cma's seven primitives, hooks, or shell wrappers belong in the cma repo, not here. + - name: Lodestone methodology question + url: https://github.com/Clarethium/lodestone/issues + about: Methodology canon questions (FM catalog, surface protocols, compound practice) live with Lodestone, not cma-mcp. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..fb42ea2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,45 @@ +--- +name: Feature request +about: A capability cma-mcp could expose +title: '[feature] ' +labels: enhancement +--- + +## What you want cma-mcp to do + + + +## Why it matters + + + +## Proposed shape + + + +## STRATEGY check + + + +## Companion impact + + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..0343550 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,50 @@ + + +## Summary + + + +## Type + + + +- [ ] Bug fix (no schema change) +- [ ] New tool, resource, or schema field (additive; minor SERVER_VERSION bump) +- [ ] Schema-breaking change (major SERVER_VERSION bump; STRATEGY review required) +- [ ] Documentation only +- [ ] Test addition / refactor +- [ ] Architectural decision (DECISIONS.md entry added) +- [ ] Durable decision change (STRATEGY.md §6; rationale below) + +## Reviewer checklist + +- [ ] DCO sign-off on every commit (`git log` shows `Signed-off-by:` trailer) +- [ ] `python3 -m pytest -q` passes locally +- [ ] If surface changed: `mcp_schema.py`, `mcp_server.py`, the relevant test, and `docs/MCP_SERVER.md` all updated together +- [ ] If payload shape changed: `tests/test_payload_determinism.py` updated +- [ ] If runtime behavior changed: `CHANGELOG.md` `[Unreleased]` updated +- [ ] No new runtime dependency added (cma-mcp's runtime stays stdlib-only unless STRATEGY says otherwise) + +## Companion-link impact + + + +## STRATEGY / DECISIONS + + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..dbadc2b --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,55 @@ +# Dependabot configuration for the cma project. +# +# Two ecosystems are tracked: +# +# - GitHub Actions (workflow YAML): floating action versions +# (`actions/checkout@v4`, `actions/setup-python@v5`, etc.) get +# weekly major/minor PRs so security advisories land without +# manual chasing. +# - Python (cma-mcp): runtime deps are intentionally empty +# (DECISIONS AD-001) so the dev-deps block (`pytest`, +# `pytest-timeout`, `pytest-cov`) is the only watched surface. +# Scoped to `cma-mcp/pyproject.toml` per the AD-008 monorepo +# layout. +# +# bash cma has no package dependencies (Python stdlib only for JSON +# escape) so it does not need a Dependabot ecosystem. + +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + commit-message: + prefix: "ci" + include: scope + labels: + - dependencies + - github-actions + reviewers: + - llucic + + - package-ecosystem: pip + directory: /cma-mcp + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + commit-message: + prefix: "deps(cma-mcp)" + include: scope + labels: + - dependencies + - cma-mcp + reviewers: + - llucic + # Only test-time dependencies exist; group their updates so a + # pytest minor bump and a pytest-cov minor bump arrive as one + # PR rather than two. + groups: + test-deps: + patterns: + - "pytest*" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..4a15e5d --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,32 @@ +name: codeql + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Weekly CodeQL re-scan on Mondays so a vulnerability landing + # in a transitive dependency surfaces even when the repo is idle. + - cron: "17 6 * * 1" + +permissions: + contents: read + security-events: write + actions: read + +jobs: + analyze: + name: CodeQL (python) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: python + queries: security-and-quality + - name: Perform CodeQL analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:python" diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml new file mode 100644 index 0000000..f96c382 --- /dev/null +++ b/.github/workflows/dco-check.yml @@ -0,0 +1,41 @@ +name: dco-check + +# Enforce Developer Certificate of Origin sign-off on every commit +# in a pull request. Per CONTRIBUTING.md: contributors must sign off +# every commit with `git commit -s`, certifying the contribution +# under https://developercertificate.org/. + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + pull-requests: read + +jobs: + dco: + name: Verify DCO sign-off + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Check Signed-off-by trailer on every PR commit + run: | + base="${{ github.event.pull_request.base.sha }}" + head="${{ github.event.pull_request.head.sha }}" + missing=0 + for commit in $(git rev-list "$base..$head"); do + if ! git log -1 --format='%B' "$commit" | grep -qE '^Signed-off-by: .+ <.+@.+>$'; then + echo "::error::Commit $commit missing Signed-off-by trailer (DCO)" + missing=$((missing + 1)) + fi + done + if [ "$missing" -gt 0 ]; then + echo "" + echo "Add the trailer with: git commit -s --amend (or rebase + sign-off-by-each-commit)" + echo "Background: https://developercertificate.org/" + exit 1 + fi + echo "All commits carry Signed-off-by." diff --git a/.github/workflows/publish-mcp.yml b/.github/workflows/publish-mcp.yml new file mode 100644 index 0000000..8235fd1 --- /dev/null +++ b/.github/workflows/publish-mcp.yml @@ -0,0 +1,198 @@ +# Publish workflow for cma-mcp. +# +# Builds the cma-mcp wheel + sdist on every cma-mcp tag push +# (`cma-mcp-X.Y.Z` per DECISIONS AD-008's tag-prefix convention), +# bakes the build-time git SHA into the artifacts, validates them, +# smoke-tests the installed wheel, and uploads them as workflow +# artifacts. The PyPI / TestPyPI upload jobs are intentionally +# commented out — publication is held until the operator lifts it. +# +# To enable publishing: +# +# 1. Bump version in cma-mcp/pyproject.toml from `0.1.0.dev0` to +# `0.1.0`. Bump SERVER_VERSION in cma-mcp/mcp_server.py to the +# same value. Move cma-mcp/CHANGELOG.md `[Unreleased]` to +# `[0.1.0]` with the release date. +# 2. Confirm Zenodo DOI allocation has landed (see CITATION.cff). +# 3. Configure PyPI Trusted Publishing for `cma-mcp` against this +# repo (https://docs.pypi.org/trusted-publishers/), adding the +# `testpypi` and `pypi` GitHub environments. +# 4. Uncomment `publish-to-testpypi` below. Push tag +# `cma-mcp-0.1.0`. The workflow builds, validates, and uploads +# to TestPyPI. +# 5. Verify the TestPyPI install: +# `pipx run --index-url https://test.pypi.org/simple/ \ +# --pip-args='--extra-index-url https://pypi.org/simple/' \ +# cma-mcp==0.1.0 --version` +# 6. Uncomment `publish-to-pypi`. Push the same tag again (or a +# new tag); the workflow uploads to real PyPI. +# 7. Zenodo issues a DOI automatically if GitHub-Zenodo +# integration is enabled (one-time repo-settings toggle). +# +# Releases of bash cma (tags `cma-X.Y.Z`) flow through a separate +# release process — they do not invoke this workflow. + +name: publish-mcp + +on: + push: + tags: + - 'cma-mcp-*' + workflow_dispatch: + +permissions: + contents: read + +defaults: + run: + working-directory: cma-mcp + +jobs: + build: + name: Build wheel + sdist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # Full history so the SHA bake survives if a contributor + # ever invokes setup.py without the env var path. + fetch-depth: 0 + + - uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Verify the tag matches pyproject + SERVER_VERSION + env: + GITHUB_REF: ${{ github.ref }} + run: | + python - <<'PY' + import os, re, sys, tomllib + ref = os.environ["GITHUB_REF"] + prefix = "refs/tags/cma-mcp-" + if not ref.startswith(prefix): + sys.exit(f"unexpected ref {ref!r}; expected refs/tags/cma-mcp-...") + tag_version = ref[len(prefix):] + with open("pyproject.toml", "rb") as f: + pyver = tomllib.load(f)["project"]["version"] + text = open("mcp_server.py").read() + m = re.search(r'^SERVER_VERSION\s*=\s*"([^"]+)"', text, re.MULTILINE) + codever = m.group(1) if m else None + mismatches = [] + if pyver != tag_version: + mismatches.append(f"pyproject.toml version={pyver!r} != tag {tag_version!r}") + if codever != tag_version: + mismatches.append(f"mcp_server.SERVER_VERSION={codever!r} != tag {tag_version!r}") + if mismatches: + for line in mismatches: + print(f"::error::{line}", file=sys.stderr) + sys.exit(1) + print(f"Versions in sync at {tag_version}") + PY + + - name: Install build tooling + run: python -m pip install --upgrade build twine + + - name: Build wheel + sdist + env: + CMA_MCP_BUILD_SHA: ${{ github.sha }} + run: python -m build + + - name: Validate artifacts with twine + run: python -m twine check dist/* + + - name: Inspect wheel contents + run: | + python - <<'PY' + import glob, zipfile, sys + wheel = glob.glob("dist/cma_mcp-*-py3-none-any.whl")[0] + z = zipfile.ZipFile(wheel) + files = z.namelist() + required = [ + "_build_info.py", + "mcp_server.py", + "mcp_protocol.py", + "mcp_schema.py", + "mcp_resources.py", + "mcp_compose.py", + "mcp_log.py", + "cma_subprocess.py", + "cma_jsonl.py", + ] + missing = [r for r in required if r not in files] + if missing: + sys.exit(f"missing from wheel: {missing}") + # License + notice must ship in dist-info/licenses/ (PEP 639) + dist_info_licenses = [f for f in files if f.startswith("cma_mcp-") and "/licenses/" in f] + required_licenses = {"LICENSE", "NOTICE"} + got_licenses = {f.rsplit("/", 1)[-1] for f in dist_info_licenses} + if not required_licenses <= got_licenses: + sys.exit(f"wheel missing license artifacts: {required_licenses - got_licenses}") + print(f"Wheel OK: {wheel}") + print(f" modules: {len([f for f in files if f.endswith('.py')])}") + print(f" licenses: {sorted(got_licenses)}") + PY + + - name: Smoke-test installed wheel + env: + GITHUB_SHA: ${{ github.sha }} + run: | + python -m venv /tmp/cma-mcp-publish-smoke + /tmp/cma-mcp-publish-smoke/bin/pip install --quiet dist/cma_mcp-*-py3-none-any.whl + /tmp/cma-mcp-publish-smoke/bin/cma-mcp --help >/dev/null + fingerprint=$(/tmp/cma-mcp-publish-smoke/bin/cma-mcp --version) + echo "$fingerprint" + /tmp/cma-mcp-publish-smoke/bin/python - "$fingerprint" "$GITHUB_SHA" <<'PY' + import json, sys + fp = json.loads(sys.argv[1]) + expected = sys.argv[2] + got = fp.get("git_sha") + if got != expected: + sys.exit(f"git_sha bake mismatch: wheel reports {got!r}, expected {expected!r}") + if fp.get("server_name") != "cma-mcp": + sys.exit(f"server_name mismatch: {fp.get('server_name')!r}") + print(f"Smoke OK: server_version={fp['server_version']} git_sha={got}") + PY + + - name: Upload artifacts to GitHub + uses: actions/upload-artifact@v4 + with: + name: cma-mcp-dist + path: cma-mcp/dist/* + + # publish-to-testpypi: + # name: Publish to TestPyPI + # needs: build + # runs-on: ubuntu-latest + # environment: testpypi + # permissions: + # id-token: write # OIDC for Trusted Publishing (PEP 740) + # steps: + # - uses: actions/download-artifact@v4 + # with: + # name: cma-mcp-dist + # path: dist/ + # - uses: pypa/gh-action-pypi-publish@release/v1 + # with: + # repository-url: https://test.pypi.org/legacy/ + + # publish-to-pypi: + # name: Publish to PyPI + # needs: publish-to-testpypi + # # Lift-only: real PyPI uploads require a non-pre-release tag + # # (no .dev / .rc / .a / .b suffix in the version). + # if: | + # !contains(github.ref, '.dev') && + # !contains(github.ref, 'rc') && + # !contains(github.ref, 'a') && + # !contains(github.ref, 'b') + # runs-on: ubuntu-latest + # environment: pypi + # permissions: + # id-token: write + # steps: + # - uses: actions/download-artifact@v4 + # with: + # name: cma-mcp-dist + # path: dist/ + # - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc62206..b3c6e0b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,3 +17,20 @@ jobs: - name: Run test suite run: ./test.sh + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install shellcheck + run: sudo apt-get install -y shellcheck + + - name: Lint cma and hooks + run: | + shellcheck cma + shellcheck test.sh + shellcheck bench.sh + shellcheck hooks/cma-pre + shellcheck hooks/claude-code-pre-tool-use.sh + shellcheck hooks/claude-code-session-start.sh diff --git a/.github/workflows/tests-mcp.yml b/.github/workflows/tests-mcp.yml new file mode 100644 index 0000000..6cb41fd --- /dev/null +++ b/.github/workflows/tests-mcp.yml @@ -0,0 +1,105 @@ +name: tests-mcp + +# pytest suite for the cma-mcp Python MCP wrapper. Exercises protocol +# conformance, three-section payload determinism, JSONL parsing +# tolerance, and subprocess-wrapper isolation. Subprocess-bound tests +# auto-skip when the cma binary is not installed in CI. +# +# bash cma's test.yml runs separately for the bash CLI; the two +# workflows together validate both interfaces of the cma project. + +on: + push: + branches: [main] + paths: + - "cma-mcp/**" + - ".github/workflows/tests-mcp.yml" + pull_request: + branches: [main] + paths: + - "cma-mcp/**" + - ".github/workflows/tests-mcp.yml" + +permissions: + contents: read + +defaults: + run: + working-directory: cma-mcp + +jobs: + pytest: + name: pytest (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + # Make the in-repo cma binary discoverable on $PATH so the + # subprocess-bound tests (argv-injection-resistance probe, + # cma --version smoke, etc.) actually execute in CI rather + # than silently skip via the `cma_binary_available` fixture. + # Without this, a coverage gap masquerades as green CI. + - name: Install bash cma to PATH + working-directory: ${{ github.workspace }} + run: | + chmod +x cma + echo "$GITHUB_WORKSPACE" >> "$GITHUB_PATH" + + - name: Verify cma is reachable + run: cma --version + + - name: Install package and test deps + run: | + python -m pip install --upgrade pip + pip install -e .[test] + - name: Run pytest with coverage + # Coverage measurement scopes the eight runtime modules per + # [tool.coverage.run] in pyproject.toml. The reported number + # reflects in-process dispatch only — `tests/test_mcp_wire.py` + # spawns cma-mcp as a real subprocess (closes ANTICIPATED_ + # CRITIQUES C-8), and pytest-cov does not follow subprocess + # paths without a sitecustomize hook. Lines exercised only + # via the wire-protocol tests therefore count as uncovered + # in this report even though they are exercised end-to-end. + # Treat the number as a floor on coverage, not a ceiling. + run: python -m pytest -q --strict-markers --cov --cov-report=term-missing + + # Wheel install smoke. Builds the sdist + wheel exactly as PyPI + # publication would, installs the wheel into a fresh virtualenv, + # and exercises the console-script entry point. Catches + # packaging regressions (missing py-modules, broken entry + # points, _build_info.py absence) that the editable-install + # pytest path cannot see. Also verifies the build-time SHA + # bake survives PEP 517 build isolation. + - name: Build wheel and sdist + env: + CMA_MCP_BUILD_SHA: ${{ github.sha }} + run: | + pip install --upgrade build + python -m build + - name: Smoke-test installed wheel in clean venv + env: + GITHUB_SHA: ${{ github.sha }} + run: | + python -m venv /tmp/cma-mcp-smoke + /tmp/cma-mcp-smoke/bin/pip install dist/cma_mcp-*.whl + /tmp/cma-mcp-smoke/bin/cma-mcp --help >/dev/null + fingerprint=$(/tmp/cma-mcp-smoke/bin/cma-mcp --version) + echo "$fingerprint" + /tmp/cma-mcp-smoke/bin/python - "$fingerprint" "$GITHUB_SHA" <<'PY' + import json, sys + fp = json.loads(sys.argv[1]) + expected = sys.argv[2] + got = fp.get("git_sha") + if got != expected: + sys.exit(f"git_sha mismatch: wheel reports {got!r}, expected {expected!r}") + print(f"git_sha bake verified: {got}") + PY diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bf92168 --- /dev/null +++ b/.gitignore @@ -0,0 +1,52 @@ +# Python (cma-mcp) +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +*.egg +build/ +dist/ +.eggs/ +develop-eggs/ +sdist/ +wheels/ +share/python-wheels/ +MANIFEST + +# Python venv +.venv/ +venv/ +env/ +ENV/ + +# pytest / coverage (cma-mcp) +.pytest_cache/ +.coverage +.coverage.* +htmlcov/ +.tox/ +coverage.xml +*.cover + +# editors +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# os +.DS_Store +Thumbs.db + +# project-local +*.log +.cma-mcp.local/ + +# build-time generated (cma-mcp) +# setup.py writes _build_info.py at sdist/wheel build time so the +# installed wheel carries the git SHA it was built from. The file is +# regenerated on every build and must never be committed. +cma-mcp/_build_info.py diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..eb48d99 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,356 @@ +# cma Architecture + +This document specifies the architecture of cma's action-time injection layer: how surfaced warnings reach the operator at the moment of action, and how the data produced supports validation that the compound practice loop is closing. + +The document is the contract between cma and any integration that connects it to an operator's environment (AI client, shell, IDE, CI). Specific integrations are interchangeable; this architecture is what they implement. + +## 1. Purpose and scope + +cma is the executable of compound practice, the operator-side discipline defined in [Lodestone](https://github.com/Clarethium/lodestone). The compound practice loop (Lodestone Section VIII) has five steps: + +1. **Capture** a failure, decision, rejection, or prevention. +2. **Surface** relevant prior captures when context matches a future action. +3. **Catch** the repeat: the surfaced capture changes operator behavior. +4. **Record prevention** linking the catch to the original capture. +5. **Strengthen** the warning's weight from the prevention evidence. + +The loop closes only if step 2 (surface) happens reliably at the moment of step 3 (catch). Manual `cma surface` invocation is sufficient in principle but unreliable in practice: operators forget. Action-time injection makes surfacing automatic and reliable. + +This document covers the action-time injection layer: the pattern by which surfacing is triggered automatically by an external interception point, the data it produces, and the criteria that distinguish state-of-the-art integrations from bolt-on ones. + +The document does not cover the seven primitives themselves (see [DESIGN.md](DESIGN.md)) or the methodology defined in Lodestone. + +## 2. The five-stage architecture + +The action-time injection layer is decomposed into five stages. Each is independently testable; each can be implemented differently per environment without affecting the others. + +``` +Interception → Context extraction → Query → Injection → Logging +``` + +### 2.1 Interception + +The first stage observes that an action is about to happen. Different environments expose different interception points: + +- **Claude Code**: `PreToolUse` hook fires before each tool call. +- **Shell (zsh, bash with bash-preexec)**: `preexec` function fires before each command. +- **IDE**: editor pre-save or pre-execute hooks. +- **CI**: pre-commit, pre-push, pre-merge hooks. +- **Manual**: explicit wrapper invocation (`cma-pre `). + +The interception layer must: + +- Fire reliably before the action it observes (not after, not during). +- Provide enough raw data for the next stage to identify what is about to happen. +- Add minimal overhead to the wrapped action. + +Interception is the only environment-specific stage; the rest of the pipeline is shared. + +### 2.2 Context extraction + +The second stage parses raw interception data into a normalized context. A context describes the action in a form the query stage can use. + +Standard context fields: + +| Field | Description | +|-------|-------------| +| `tool_name` | Which interception channel fired (`Edit`, `Bash`, `git`, `vim`, etc.) | +| `file_path` | Absolute or relative path of the file being acted on, if applicable | +| `command` | Full command line, if applicable | +| `working_directory` | Current working directory at time of action | +| `project_root` | Detected project root (git toplevel, etc.), if available | +| `surface` | Domain area inferred from `file_path` and `command` | + +Surface detection is heuristic: lexical matching of path components and command keywords against a configurable rule table. Heuristics will produce false positives and false negatives. State-of-the-art integrations expose the rule table for operator tuning. + +### 2.3 Query + +The third stage calls `cma surface` with filters derived from the context. + +Query strategy (default): + +1. If surface is detected, query with `--surface ` (broader, more useful). +2. Else if file_path is detected, query with `--file `. +3. Else skip the query entirely (no actionable filter; would dump all recent captures, which is noise). + +Query result limit: a small number (default 3). The point is to surface the most relevant matches, not to dump the archive. + +The query is read-only from the operator's perspective. Logging happens as a side effect of `cma surface`, not as additional work for the integration. + +For decisions, `cma surface` additionally matches the decision's `applies_when` field against the context keywords. A decision with `applies_when="auth"` surfaces when the context contains "auth" (in surface or file), even if the decision's stored surface is something else. This closes the decision-surfacing loop: decisions surface at the moment their conditions match, not only when explicitly queried by stored surface. `applies_when` matching is decision-specific; misses, rejections, and preventions match only by their surface and file fields. + +### 2.4 Injection + +The fourth stage delivers surfaced captures to the operator's context. The injection channel is environment-specific: + +| Environment | Channel | +|-------------|---------| +| Claude Code | `stdout` becomes additional context passed to the model | +| Shell | `stderr` is visible to the operator before the next prompt | +| IDE | Inline notice in the editor (status bar, popover, comment) | +| CI | Comment on the commit, PR, or pipeline run | +| Manual | `stdout` of the wrapper command | + +The injection layer must: + +- Be visible to the operator or agent at the moment of decision (not after the action is complete). +- Be distinguishable from normal output (clearly attributed to cma). +- Be silent on no-match (a noisy signal trains the operator to ignore it). +- Not interrupt the action flow when matches do exist (display, then proceed). + +### 2.5 Logging + +The fifth stage records the surface event in `surface_events.jsonl` for validation analysis. + +Logging is performed by `cma surface` itself, not by the integration layer. This centralizes the logging schema and ensures consistency across all integrations. + +The surface event schema is documented in Section 4 (Data contracts). + +## 3. Reference implementations + +The cma repository includes the following reference implementations of this architecture. Each is a complete worked example; integrations targeting other environments may follow the same pattern. + +### 3.1 Claude Code PreToolUse hook + +`hooks/claude-code-pre-tool-use.sh` + +Interception via Claude Code's `PreToolUse` hook event. Reads stdin JSON (current format) or environment variables (legacy fallback). Surfaces captures to stdout, where Claude Code injects them as additional context. Silent for non-relevant tools (`Read`, etc.) and when no captures match. + +Status: implemented, tested. + +### 3.1a Claude Code SessionStart hook + +`hooks/claude-code-session-start.sh` + +Interception via Claude Code's `SessionStart` hook event. Surfaces priming context at the start of each session: recurring failure patterns (from `cma stats --recurrence`), active rejections (from `cma stats --rejections`), and optionally behavior pivots (from `cma stats --behavior`). Configurable via `CMA_SESSION_START_SECTIONS` env var. + +Together with the PreToolUse hook, the two cover both ends of the architecture: session-priming context at session boundary and per-action surfacing during work. + +Status: implemented, tested. + +### 3.2 Shell wrapper (in development) + +`hooks/cma-pre.sh` plus `hooks/cma-pre` + +Interception via zsh's native `preexec` or bash's bash-preexec library. Multi-shell support. Configurable command rules. Stderr injection. Failure-isolated: if cma errors, the wrapped command still runs. + +Status: in design. + +### 3.3 Manual wrapper + +`hooks/cma-pre` standalone invocation (`cma-pre `). + +For environments where automatic interception is unavailable or undesirable. Operators wrap substantive commands explicitly. Same pipeline as the shell wrapper, just triggered manually. + +Status: in design. + +## 4. Data contracts + +The architecture produces three data shapes that downstream analysis depends on. These shapes are stable across environments; integrations writing to or reading from them must conform. + +### 4.1 Surface event + +Written to `$CMA_DIR/surface_events.jsonl` by `cma surface`: + +```json +{ + "type": "surface_event", + "id": "20260505-070100-abc12345", + "timestamp": "2026-05-05T07:01:00Z", + "filter_surface": "auth", + "filter_file": "", + "filter_type": "", + "filter_limit": 3, + "matched": [ + { + "id": "20260504-...", + "type": "miss", + "surface": "auth", + "fm": "" + } + ] +} +``` + +`matched` may be empty. Empty events still record that surfacing was attempted. + +### 4.2 Miss with texture + +Written to `$CMA_DIR/misses.jsonl` by `cma miss`: + +```json +{ + "type": "miss", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "...", + "surface": "auth", + "fm": "", + "files": "src/auth/jwt.ts", + "intended": "patch only the failing test", + "corrected": "trace upstream defect, fix at root", + "excerpt": "operator: ...\nassistant: ..." +} +``` + +Texture fields (`intended`, `corrected`, `excerpt`) are optional but recommended. They are the data substrate for behavior-layer validation. + +### 4.3 Prevention + +Written to `$CMA_DIR/preventions.jsonl` by `cma prevented`: + +```json +{ + "type": "prevention", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "almost X, did Y instead after seeing surfaced warning", + "miss_id": "20260504-...", + "warning_id": "..." +} +``` + +Linking via `miss_id` lets validation analysis compute prevention rates per original miss. + +## 5. Validation framework + +The architecture is designed to produce data supporting three independent layers of evidence about whether the compound practice loop is working. + +### 5.1 Process layer + +**Question**: Does the loop run at all? + +**Metrics**, computable from cma's existing data: + +- Capture rate (captures per session, per day, per project). +- Surface event rate (surfacings per session). +- Coverage: fraction of operator actions that produce a surface event. + +**Strength**: easy to measure. **Weakness**: a journal that never affects behavior would still produce these metrics. Necessary but not sufficient. + +### 5.2 Behavior layer + +**Question**: Do surfaced warnings change operator action? + +**Metrics**, computable from texture-preserved captures: + +- Counterfactual rate: misses with both `intended` and `corrected` set, where the two differ. Direct evidence of mid-course correction. +- Prevention-to-leak ratio: `cma prevented` captures versus leak detections. Higher ratio means warnings are working. +- Decision-shift rate: decisions captured that contradict prior decisions (operator reflection). + +**Strength**: structural data; not self-report alone. **Weakness**: requires operators to capture texture (the `--intended`, `--corrected` flags). Coverage depends on capture discipline. + +### 5.3 Outcome layer + +**Question**: Does the operator's work get measurably better? + +**Metrics**, requiring [Touchstone](https://github.com/Clarethium/touchstone) integration: + +- Touchstone score trends on the operator's actual outputs over time. +- Score deltas correlated with cma usage intensity (captures per task, surfacings per task). +- Failure-shape distribution shifts (fewer recurrences of named failure shapes). + +**Strength**: objective, model-independent measurement. **Weakness**: requires operator's actual work to be amenable to Touchstone evaluation, which depends on output type. Currently aspirational; the bridge between Lodestone, cma, and Touchstone is the empire's intellectual spine and is the next major architectural project after this one. + +The three layers are independent. A system that passes the Process layer alone is a journal. A system that passes Process + Behavior is a working compound practice loop. A system that passes all three is a methodology with empirical grounding. + +## 6. Quality criteria for an integration + +A state-of-the-art integration of this architecture meets the following criteria. + +### Reliability + +- The interception layer fires before the action, on every relevant action. +- Failures in the cma layer do not block the wrapped action. +- The integration is testable in isolation per stage. + +### Performance + +- End-to-end overhead under 50 ms in the typical case. +- Bounded worst-case overhead (timeout on cma queries, default 5 seconds). +- Empty-result path is fast: no surface match should not slow the action significantly. + +### Signal quality + +- Surface detection rules are configurable; defaults are tuned for canonical surfaces (auth, payments, db, ui, docs, test, api). +- False-positive surface matches are rare; false-negatives are acceptable (operators can capture without surface and still hit the file filter). +- Empty matches produce silent output (no dialog box, no chatter). + +### Composability + +- The integration does not require global state in the operator's environment. +- Aliasing commands is not required; preexec or hook mechanisms are preferred where available. +- Multiple integrations can coexist (Claude Code hook + shell wrapper + IDE plugin). + +### Validation contribution + +- Every surface event is logged. +- Texture preservation flags are exposed when the integration captures. +- Output is distinguishable from normal output (parseable by future analysis tooling). + +## 7. Failure modes to avoid + +State-of-the-art integrations explicitly handle each of the following. + +### Bolt-on integration + +Aliasing every command with `cma-wrap` and stopping there. Fails because it does not compose, scales poorly, and produces inconsistent context across commands. The five-stage architecture exists to prevent this. + +### Eager logging + +Logging every keystroke, file save, or shell command regardless of relevance. Fails because log noise drowns signal. Surface event logging is bounded to actual `cma surface` calls; the architecture does not log raw interception data. + +### Silent failure + +If cma is missing or errors, the integration does nothing visible. Fails because operators stop trusting the signal. State-of-the-art: the wrapped action still runs cleanly, but the operator gets a notice that cma was unavailable. + +### Performance regression + +Integration adds noticeable latency. Fails because operators disable the integration. The 50 ms criterion is the practical bar. + +### Surface noise + +Surface detection is too aggressive; irrelevant captures get surfaced. Fails because operators stop reading the output. State-of-the-art: heuristics are configurable, defaults are tuned, false-positive rate is monitored. + +### Stale signal + +Surface events accumulate indefinitely; recent events drown in historical noise. Mitigation: a recency-weighted scoring strategy in `cma surface`, or a log rotation policy in `$CMA_DIR`. The current implementation uses simple recency-sort; future versions may add scoring. + +### Schema drift + +Integrations writing to `surface_events.jsonl` without conforming to the schema in Section 4 break downstream analysis. Mitigation: cma owns the writer (via `cma surface`); integrations call cma rather than writing directly. + +## 8. Versioning and evolution + +The architecture's contract is: integrations call `cma surface` and respect its output schema (Section 4). Adding new fields to surface events (additive) is backward-compatible. Removing fields requires a major version bump. + +The contract is documented in this file and in [DESIGN.md](DESIGN.md). Changes to the contract follow the project's versioning policy (see [CHANGELOG.md](CHANGELOG.md)). + +## 9. Methodology integration + +cma is methodology-agnostic. The `--fm` field on captures and any methodology-specific tags are opaque strings from cma's perspective. Semantics are owned by the methodology in use. + +When operators use cma alongside a methodology that defines a canonical failure-mode catalog (Lodestone is the canonical operator-AI methodology under Clarethium; others may emerge), they tag captures with the methodology's canonical names. Analysis tooling that reads cma data interprets the tags according to the methodology context. + +The integration is by convention (shared vocabulary), not by code. cma does not depend on any methodology. Methodologies do not depend on cma. Each evolves independently. cma documentation does not replicate methodology catalogs; methodology documents own their catalogs and their meaning. + +### 9.1 Plugin point: classifier + +When the `CMA_FM_CLASSIFIER` env var is set, `cma miss` invokes the named command as a shell expression if `--fm` is not provided explicitly. The command receives the description on stdin and is expected to emit the classified failure-mode tag on its first line of stdout. + +Failure modes: + +- `--fm` provided explicitly: the classifier is not invoked. Operator override is absolute. +- `CMA_FM_CLASSIFIER` not set: no classifier invocation; `fm` stays empty unless `--fm` was provided. +- Classifier command not found, errors, or exits non-zero: `fm` stays empty; the capture proceeds. +- Classifier exceeds 5 second timeout: `fm` stays empty; the capture proceeds. + +The plugin point exists so operators can wire methodology-aware classification (a Lodestone-aware classifier, for instance) without coupling cma to any specific methodology. Operators using a different methodology wire a different classifier. Operators not using auto-classification leave the env var unset and tag manually with `--fm`. + +The classifier is operator-side: cma ships no classifier in this repository. Methodologies that wish to provide one ship it as a separate companion tool (or document the wiring pattern in their own docs). This preserves the loose coupling. + +## 10. References + +- [DESIGN.md](DESIGN.md): the seven cma 1.0 primitives. +- [Lodestone Section VIII](https://github.com/Clarethium/lodestone): the compound practice loop (the methodology this architecture serves). +- [Touchstone](https://github.com/Clarethium/touchstone): the measurement infrastructure for the outcome layer of validation. diff --git a/CHANGELOG.md b/CHANGELOG.md index 27c4229..62cdf2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,18 +18,47 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and - Action-time injection: PreToolUse hook for Claude Code (`hooks/claude-code-pre-tool-use.sh`) surfaces relevant prior captures automatically when Claude is about to edit a file or run a command. Heuristic surface detection from file path or command. Closes the compound loop without requiring manual `cma surface` calls. - Hook test coverage: silent for non-relevant tools, silent when no captures match, surfaces matched captures via stdin JSON, env var fallback for legacy hook protocol. - Texture preservation on misses: `--excerpt ` or `--excerpt-from ` (multi-line, preserves newlines and quotes through JSON encoding), `--intended ` (counterfactual: what was about to happen), `--corrected ` (what happened instead after correction). Texture fields preserve the conditions of failure so future surfacing can match by situation, not just keywords. +- ARCHITECTURE.md specifying the five-stage action-time injection pattern (interception, context extraction, query, injection, logging), reference implementation status, data contracts (surface event, miss with texture, prevention), the three-layer validation framework (process, behavior, outcome), quality criteria for integrations, failure modes to avoid, and versioning policy. The contract integrations conform to. +- Shell wrapper (`hooks/cma-pre`) implementing the action-time injection architecture for zsh and bash environments. Native preexec integration (zsh) or via bash-preexec library (bash). Manual invocation supported. `CMA_PRE_TRIGGERS` env var overrides the default trigger command list. Failure-isolated (cma missing or errored does not block the wrapped command), bounded latency (5-second timeout on cma queries), surface detection consistent with the Claude Code hook. +- `cma stats --behavior`: behavior-layer validation view. Reads texture-preserved misses (those captured with `--intended` and `--corrected`), groups by surface/fm, and surfaces representative pivots. Makes the behavior layer of the validation framework (ARCHITECTURE.md Section 5.2) computable from cma's own data without external tooling. Recurring pivots in the same surface/fm pair are evidence that surfaced warnings consistently change operator behavior. +- Performance benchmarks (`bench.sh`) measuring `cma-pre --check`, `cma surface`, and `cma stats` latency against a synthetic 100-capture data set. All operations measured under 50ms at p95, validating the ARCHITECTURE.md Section 6 latency target with concrete numbers rather than aspirational claims. Uses Python `time.perf_counter` for portability and warmup iterations to discount cold-start. +- Decision applies-when matching: `cma surface` now matches decisions by their `applies_when` field against context keywords. A decision with `applies_when="auth db"` surfaces whenever surface or file context contains "auth" or "db", even if the decision's stored surface differs. Closes the decision-surfacing loop: decisions surface at the moment their conditions match (action time), not only at session start or when explicitly queried by stored surface. Matching is decision-specific; misses and other capture types unchanged. Documented in ARCHITECTURE.md Section 2.3. +- Claude Code SessionStart hook (`hooks/claude-code-session-start.sh`): surfaces priming context at the start of each session — recurring failure patterns and active rejections by default, optionally behavior pivots. Configurable via `CMA_SESSION_START_SECTIONS` env var. Together with the PreToolUse hook, covers both ends of the action-time injection theme: session-boundary context and per-action surfacing during work. ARCHITECTURE.md Section 3.1a. + +### Reliability and forward-compatibility (Phase 1 polish) + +- Schema versioning on all captures: every record (miss, decision, rejection, prevention, core learning, surface event) now includes `"schema_version":"1.0"` as the first field. Future schema changes can gate migrations against this field. Forward-looking polish for 3-year corpus stability. +- Atomic write semantics: capture writes use a single python3 `f.write` syscall on the fully-composed JSON record. Atomic for records under PIPE_BUF (typically 4096 bytes); best-effort atomic for longer texture-bearing records. Replaces the prior bash `>>` append, which could interleave on long writes from concurrent cma processes. +- Tolerant read: corrupted JSONL lines are skipped with a per-file stderr warning of the form `cma: skipped N corrupted line(s) in `, instead of breaking the entire query. The corpus stays usable even when individual records are damaged. Implemented in `cma surface` and `cma stats --leaks`; remaining query paths (recurrence, behavior, distill --review, distill --retire) silently skip with corruption counters that will surface in Phase 2 polish. + +### Methodology integration (loose-coupling polish) + +- Documentation sanitization: removed Lodestone-coined failure-shape names from cma's docs and code examples (replaced with `` placeholders or `fm-1` generic tags). Documentation now explicitly states cma is methodology-agnostic and references Lodestone as the canonical methodology when present. Protects the methodology asset by keeping the catalog where it belongs (in Lodestone), not replicated in cma's docs. +- `CMA_FM_CLASSIFIER` plugin hook: when set and `--fm` is not provided, `cma miss` invokes the configured shell command with description on stdin and uses its first line of stdout as the failure-mode tag. Failure-isolated (5-second timeout, classifier errors do not block the capture). Operator-side wiring; cma ships no classifier. Documented in ARCHITECTURE.md Section 9. Enables operators to wire methodology-aware classification (Lodestone-aware or otherwise) without coupling cma to any specific methodology. + +### Operator confidence (Phase 2 polish) + +- DATA.md: complete schema documentation for the data directory. Layout, per-record-type schemas with examples, schema versioning policy, atomicity guarantees, tolerant-read behavior, backup recommendations, and migration policy for future schema versions. The contract for the durable corpus. +- `cma init` command: explicitly creates `~/.cma/` with an inline README pointing to DATA.md. Idempotent. Operators can run `cma init` immediately after install instead of waiting for first capture to materialize the directory. - Test suite (`test.sh`) with 42 cases covering all functional paths, edge cases (special characters, missing args, unknown flags), and JSON validity. - CI workflow (GitHub Actions) running the test suite on every push and pull request. - DESIGN.md specifying the seven-primitive surface and the migration from the working version. - README with quick-start, status, and license information. -### Pending for 1.0.0 +### Pending for 1.0.0 (lift checklist) -(none — all seven primitives functional in this dev branch) +The seven-primitive surface is functional and frozen in this dev +branch; the `1.0.0-dev` suffix on `VERSION` reflects the items below +that gate the lift, not unfinished feature work. -### Future (post-1.0) +- Zenodo DOI allocation (one-time GitHub-Zenodo integration in repo + settings; first DOI fires on the next published release tag). +- `cma 1.0.0` release tag (`cma-1.0.0`) cut and a corresponding + CHANGELOG.md `[1.0.0]` entry replacing this `[Unreleased]` block. +- Cross-component release notes covering the cma-mcp 0.1 lift if + the two ship in the same window (see `cma-mcp/CHANGELOG.md`). -- Generic CLI wrapper for action-time injection in environments other than Claude Code (terminal-based tools, other AI clients). +### Future (post-1.0) - Counterfactual capture: explicit "what was about to happen versus what happened" data structure for studying basin transitions. - Per-project data scoping: optional separation of captures by project rather than a single global directory. - Trained classifier on accumulated labeled corpus (long-term, requires data accumulation). diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..1e863e5 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,66 @@ +cff-version: 1.2.0 +message: >- + If you use cma in academic, applied, or compliance work, please + cite it using the metadata below. The methodology canon + (Lodestone) and the MCP distribution wrapper (cma-mcp, in this + repository under cma-mcp/) are separately citeable artifacts; + see the references block. +title: "cma: executable compound practice loop" +type: software +authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + website: "https://blog.clarethium.com" +repository-code: "https://github.com/Clarethium/cma" +url: "https://github.com/Clarethium/cma" +abstract: >- + cma is the executable companion to Lodestone, Clarethium's + canonical operator methodology for AI-coupled work. cma runs the + compound practice loop on the operator's local machine: it + captures failures, surfaces relevant prior context at the moment + of action, tracks decisions and rejected alternatives, detects + recurrence, and captures preventions. Action-time injection ships + for Claude Code (PreToolUse and SessionStart hooks) and shell + environments (zsh native preexec, bash via bash-preexec). cma is + methodology-agnostic at the substrate level; vocabulary lives in + the methodology, not in cma. +keywords: + - compound-practice + - failure-capture + - decision-tracking + - prevention + - lodestone + - clarethium +license: Apache-2.0 +version: "1.0.0" +date-released: "2026-05-06" +references: + - type: software + title: "cma-mcp: Model Context Protocol distribution for the cma compound practice loop" + authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + url: "https://github.com/Clarethium/cma/tree/main/cma-mcp" + license: Apache-2.0 + notes: >- + Lives in this repository under cma-mcp/. Brings the same + compound practice loop to MCP-compatible AI clients (Claude + Desktop, Cursor, Cline, Continue.dev) by wrapping cma's CLI + as a subprocess. See cma-mcp/CITATION.cff for its citable + form. + - type: article + title: "Lodestone: canonical operator methodology for AI-coupled work" + authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + url: "https://github.com/Clarethium/lodestone" + license: CC-BY-4.0 + notes: >- + The methodology canon Clarethium publishes alongside cma. + Defines stance, the loop, calibration, altitude, failure + shapes, quality levels, surface protocols, and compound + practice (Section VIII). cma is the executable instantiation + of Lodestone Section VIII. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..effa6df --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,181 @@ +# Contributing + +The cma project ships two components in this repository: + +- **cma** (bash, repository root) — the canonical compound practice + loop reference implementation. Contributions touch `cma`, `hooks/`, + `test.sh`, `bench.sh`, and the design docs (`DESIGN.md`, + `ARCHITECTURE.md`, `DATA.md`). +- **cma-mcp** (Python, `cma-mcp/` subdirectory) — the Model Context + Protocol distribution wrapper. Contributions touch the Python + modules (`mcp_*.py`, `cma_*.py`), `cma-mcp/tests/`, and + `cma-mcp/docs/`. + +This document covers **how** contributions happen mechanically: file +layout, test requirements, PR process, and what a reviewer will +check. For **who** decides and **when** a contribution becomes +canon, see `GOVERNANCE.md`. For strategic positioning and durable +decisions, see `STRATEGY.md`. + +--- + +## Repository layout + +``` +cma/ +├── cma bash CLI (the seven primitives) +├── hooks/ Claude Code + shell preexec integrations +├── test.sh, bench.sh bash test and benchmark harnesses +├── DESIGN.md ARCHITECTURE.md DATA.md cma's surface, architecture, schema +├── CHANGELOG.md cma's release history +├── README.md cma operator-facing overview +├── cma-mcp/ Python MCP distribution wrapper +│ ├── mcp_server.py mcp_*.py cma_*.py flat-modules wheel layout +│ ├── tests/ pytest suite +│ ├── docs/ MCP_SERVER, ANTICIPATED_CRITIQUES, VALIDATION_PROGRAM +│ ├── pyproject.toml PyPI metadata, build config, py-modules list +│ ├── README.md cma-mcp quickstart (also rendered on PyPI) +│ └── CHANGELOG.md cma-mcp's release history (independent of cma) +├── STRATEGY.md DECISIONS.md GOVERNANCE.md project-level governance (covers both) +├── CONTRIBUTING.md SECURITY.md CITATION.cff NOTICE LICENSE cross-cutting +└── .github/ + ├── workflows/test.yml cma's bash test workflow + ├── workflows/tests-mcp.yml cma-mcp's pytest workflow + ├── workflows/dco-check.yml Sign-off-by enforcement (entire repo) + ├── workflows/codeql.yml CodeQL scan for Python in cma-mcp/ + ├── PULL_REQUEST_TEMPLATE.md + └── ISSUE_TEMPLATE/ +``` + +The two release tracks are independent. cma's `CHANGELOG.md` at root +tracks the bash CLI's releases (cma 1.0, etc.). `cma-mcp/CHANGELOG.md` +tracks the Python wrapper's releases (cma-mcp 0.1, etc.). Tags are +prefixed accordingly (`cma-1.1`, `cma-mcp-0.2`). + +--- + +## Before you start + +1. **Read `STRATEGY.md`.** Especially §6 Durable Decisions. + Contributions that require overturning a durable decision need an + explicit overturn proposal, not a silent PR. +2. **Read `DECISIONS.md`.** New architectural changes append a new + entry; do not silently overturn an existing one. +3. **For bash cma contributions:** ensure `bash` and a working + `python3` are available (cma uses python3 for JSON escape only). + Run `./test.sh` from the repository root before opening a PR. +4. **For cma-mcp contributions:** confirm bash cma is installed and + on `PATH` (cma-mcp's tests exercise real subprocess calls). + ```bash + cma --help + ``` + If cma is missing, build it from this repository: `ln -s + "$(pwd)/cma" ~/.local/bin/cma` (per cma's README quickstart). +5. **Run the full test suite for the component you touched.** + ```bash + ./test.sh # bash cma tests + cd cma-mcp && pip install -e .[test] && python3 -m pytest -q + ``` + The suite for the touched component must stay green. + +--- + +## Contribution types + +### Bug fix + +Open a PR with the fix and at least one test that fails before the +fix and passes after. Reference the issue number in the PR +description. + +### MCP protocol surface change + +Tool or resource additions, removals, or schema changes touch four +files together: + +1. `mcp_schema.py` (input/output schema) +2. `mcp_server.py` (dispatch) +3. `tests/test_mcp_server.py` (conformance test) +4. `docs/MCP_SERVER.md` (operator-facing reference) + +A PR that moves only one of the four is incomplete. Reviewers will +ask for the others. + +Surface changes also bump `SERVER_VERSION` in `mcp_server.py`: + +- patch: bug fix, no schema change +- minor: new optional field, new tool/resource +- major: schema-breaking change + +### Subprocess wrapper extension + +`cma_subprocess.py` wraps bash cma's CLI. New cma flags or behaviors +land here when: + +- cma releases a new flag that operators want exposed via MCP +- a defensive timeout, retry, or error-shape adjustment is needed + +Extensions must respect AD-004 (argv-array, never shell=True) and +AD-003 (5-second timeout). + +### Test addition + +cma-mcp's adversarial coverage is split across the existing test +files: `cma-mcp/tests/test_mcp_server.py` (protocol-level boundaries: +parse error, invalid JSON-RPC, unknown method, malformed params), +`cma-mcp/tests/test_resources.py` (JSONL corruption recovery, +unknown schema-version surfacing, missing-file graceful return), +and `cma-mcp/tests/test_subprocess.py` (subprocess timeout, +missing-binary path, argv-injection-resistance probe). New +adversarial cases are always welcome; add to whichever file fits +the layer being exercised. + +`cma-mcp/tests/test_payload_determinism.py` pins the three-section +payload shape on every tool and resource. Any change that affects +payload shape requires a determinism-test update. + +--- + +## Pull request requirements + +1. **Sign off your commits with the Developer Certificate of Origin + (DCO).** This is enforced by CI. Use `git commit -s` to add + `Signed-off-by: Your Name ` to each + commit. By signing off, you certify the contribution as your work + under the rules of [DCO 1.1](https://developercertificate.org/). +2. **Tests pass on Python 3.10, 3.11, 3.12.** CI runs all three. +3. **No new external runtime dependencies** without an architectural + decision in `DECISIONS.md`. cma-mcp's runtime dependency footprint + is currently the Python standard library only; adding a + dependency requires explicit rationale. +4. **Three-section payload discipline preserved.** If the PR changes + any tool or resource response, every changed surface still + returns `{analysis, agent_guidance, provenance}`. +5. **No silent behavior change.** If a fix changes observable + behavior, document the change in `CHANGELOG.md` under + `[Unreleased]`. + +--- + +## Reviewer checklist + +A PR that lands meets all of the following: + +- [ ] DCO sign-off present on every commit (CI green) +- [ ] Tests pass on all supported Python versions (CI green) +- [ ] CodeQL scan green (CI) +- [ ] Three-section payload preserved on all changed surfaces +- [ ] Schema, server, test, and docs updated together for surface changes +- [ ] CHANGELOG.md `[Unreleased]` updated +- [ ] No new runtime dependency without architectural decision +- [ ] STRATEGY.md `§6` durable decisions not silently overturned + +--- + +## Reporting issues + +Bug reports, feature requests, and protocol questions go to +[GitHub Issues](https://github.com/Clarethium/cma/issues). For +issues specific to one component, prefix the title with `[cma]` +or `[cma-mcp]` so triage can disambiguate. Security issues go to +`lovro.lucic@gmail.com` per `SECURITY.md`. diff --git a/DATA.md b/DATA.md new file mode 100644 index 0000000..4dbdaf4 --- /dev/null +++ b/DATA.md @@ -0,0 +1,217 @@ +# cma Data Directory + +This document specifies the layout and schema of cma's data directory, the durable artifact of compound practice. The data is the asset; the tool is the interface. This document is the contract for what's in `~/.cma/`. + +The default data directory is `~/.cma/`, overridable with the `CMA_DIR` environment variable. cma creates the directory on first capture; running `cma init` creates it explicitly with a README placed inside. + +## 1. Layout + +``` +$CMA_DIR/ +├── misses.jsonl Captures of failures (cma miss) +├── decisions.jsonl Captures of architectural choices (cma decision) +├── rejections.jsonl Captures of eliminated options (cma reject) +├── preventions.jsonl Captures of caught warnings (cma prevented) +├── core.jsonl Distilled learnings + retirements (cma distill) +└── surface_events.jsonl Surface query events (logged by cma surface) +``` + +All files are JSON Lines (JSONL): one JSON object per line, append-only, never edited in place. The format is durable, recoverable line by line, and parseable by any JSON-aware tool. + +## 2. Schema + +Every record begins with `schema_version`, `type`, `id`, and `timestamp`. Type-specific fields follow. + +### 2.1 Common fields + +| Field | Type | Description | +|-------|------|-------------| +| `schema_version` | string | Schema version. Currently `"1.0"`. Future schema changes gate migrations against this. | +| `type` | string | Record type: `miss`, `decision`, `rejection`, `prevention`, `core`, `retirement`, `surface_event`. | +| `id` | string | Unique record ID, format `YYYYMMDD-HHMMSS-<8-hex>`. UTC timestamp + random suffix. | +| `timestamp` | string | ISO 8601 UTC, format `YYYY-MM-DDTHH:MM:SSZ`. | +| `description` | string | One-line statement of what the record captures. Required on captures; not present on retirement and surface_event records. | + +### 2.2 Miss + +Captures a failure. Written by `cma miss`. + +```json +{ + "schema_version": "1.0", + "type": "miss", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "...", + "surface": "auth", + "fm": "", + "files": "src/auth/jwt.ts", + "intended": "patch only the failing test", + "corrected": "trace upstream defect, fix at root", + "excerpt": "operator: ...\nassistant: ..." +} +``` + +Optional fields: `surface`, `fm`, `files`, `intended`, `corrected`, `excerpt`. Texture fields (`intended`, `corrected`, `excerpt`) preserve the conditions of failure and enable behavior-layer validation analysis (`cma stats --behavior`). + +### 2.3 Decision + +Captures an architectural or strategic choice. Written by `cma decision`. + +```json +{ + "schema_version": "1.0", + "type": "decision", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "TOPIC: choice (rationale)", + "surface": "infra", + "applies_when": "auth db migration" +} +``` + +Optional fields: `surface`, `applies_when`. The `applies_when` predicate is matched against context keywords at action time so the decision surfaces when its conditions are met (see ARCHITECTURE.md Section 2.3). + +### 2.4 Rejection + +Captures an eliminated option. Written by `cma reject`. + +```json +{ + "schema_version": "1.0", + "type": "rejection", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "OPTION: reason for elimination", + "surface": "infra", + "revisit_when": "if performance becomes critical" +} +``` + +Optional fields: `surface`, `revisit_when`. + +### 2.5 Prevention + +Captures a moment where a surfaced warning prevented a recurrence. Written by `cma prevented`. + +```json +{ + "schema_version": "1.0", + "type": "prevention", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "almost X, did Y instead", + "miss_id": "20260504-...", + "warning_id": "20260504-..." +} +``` + +Optional fields: `miss_id`, `warning_id`. Linking a prevention to its original miss enables the miss's prevention rate to be computed (process-layer validation). + +### 2.6 Core learning + +A promoted learning that surfaces permanently. Written by `cma distill `. + +```json +{ + "schema_version": "1.0", + "type": "core", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "description": "the distilled rule", + "scope": "project", + "surface": "general" +} +``` + +Optional fields: `scope`, `surface`. + +### 2.7 Retirement + +Marks a core learning as retired. Written by `cma distill --retire `. Retirements live in `core.jsonl` alongside core learnings; `cma surface` filters them out automatically. + +```json +{ + "schema_version": "1.0", + "type": "retirement", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "retires": "20260504-...", + "pattern": "auth" +} +``` + +Required fields: `retires` (the ID of the core learning being retired), `pattern` (the substring that matched). No `description` field. + +### 2.8 Surface event + +Records a `cma surface` invocation and what it matched. Written automatically by `cma surface` (suppressible with `--no-log`). Used by `cma stats --leaks` to detect failures despite surfaced warnings. + +```json +{ + "schema_version": "1.0", + "type": "surface_event", + "id": "20260505-...", + "timestamp": "2026-05-05T...", + "filter_surface": "auth", + "filter_file": "", + "filter_type": "", + "filter_limit": 3, + "matched": [ + {"id": "...", "type": "miss", "surface": "auth", "fm": "..."} + ] +} +``` + +`matched` may be empty (the surface query found no records). Empty events are still recorded as evidence that surfacing was attempted. + +## 3. Schema versioning policy + +The schema follows semantic versioning. + +- **Patch versions** (1.0.x): clarifications to documentation, no record changes. +- **Minor versions** (1.x.0): backward-compatible additions. Old records remain valid; readers ignore unknown fields. Examples: adding optional fields to existing record types, adding new record types. +- **Major versions** (x.0.0): breaking changes. Old records may require migration. cma will provide migration tooling at the major version boundary. + +Readers MUST gracefully ignore unknown fields. A reader written against schema 1.0 should still parse schema 1.1 records, treating new fields as opaque metadata. + +The current schema version is `1.0`. There are currently no announced schema changes. + +## 4. Atomicity and durability + +cma writes records via a single `write()` syscall on the encoded record bytes. POSIX guarantees atomicity for `write()` calls up to `PIPE_BUF` (typically 4096 bytes on Linux). Records exceeding `PIPE_BUF` (rare; possible with long `excerpt` fields) may interleave under concurrent writes from multiple cma processes. + +For single-operator usage with manual or hook-driven captures, concurrent-write risk is negligible. Future versions may add `fcntl.flock`-based locking for multi-process scenarios. + +## 5. Tolerant reads + +Queries (`cma surface`, `cma stats`, `cma distill --review`, etc.) tolerate corrupted lines: a JSON parse failure on a single line is reported via stderr (`cma: skipped N corrupted line(s) in `) and the query continues with the remaining records. The corpus stays usable even when individual records are damaged. + +Empty lines are silently ignored, not counted as corruption. + +## 6. Backup recommendations + +The data directory is purely append-only JSONL. Backup is straightforward: + +- **Snapshot**: copy the entire `~/.cma/` directory. +- **Incremental**: git-track the directory and commit periodically. The append-only structure makes diffs informative. +- **Synced**: store on a synced filesystem (Dropbox, iCloud, etc.). Concurrent writes across machines may interleave; prefer one machine writing at a time. + +The data is plain text JSONL; any tooling that handles JSONL handles cma data. + +## 7. Migration to future versions + +When cma 2.0 ships, migration tooling will: + +1. Read records of any prior schema version. +2. Apply any field renames, type conversions, or structural changes. +3. Write a new file alongside the original (e.g., `misses.jsonl.v2`) without modifying the original. +4. Atomically swap the new file into place after operator confirmation. + +Operators choosing to stay on schema 1.0 can do so indefinitely; cma 2.0 readers will continue to parse 1.0 records. + +## 8. References + +- [DESIGN.md](DESIGN.md): the seven cma 1.0 primitives that produce these records. +- [ARCHITECTURE.md](ARCHITECTURE.md): the action-time injection layer and three-layer validation framework that uses this data. +- [CHANGELOG.md](CHANGELOG.md): record of schema and feature changes over time. diff --git a/DECISIONS.md b/DECISIONS.md new file mode 100644 index 0000000..c97e449 --- /dev/null +++ b/DECISIONS.md @@ -0,0 +1,185 @@ +# Architectural Decisions + +This file records architectural decisions for the cma project at +finer grain than `STRATEGY.md` durable decisions. Entries are dated +and named. Decisions here can be revised through normal pull +request review; durable decisions in `STRATEGY.md §6` require an +explicit overturn proposal. + +Entries to date are scoped to the **cma-mcp** component (the +Python MCP wrapper under `cma-mcp/`). Architectural decisions +governing the bash cma reference implementation are documented in +`DESIGN.md`, `ARCHITECTURE.md`, and `DATA.md` at the repository +root. + +Newest first. + +--- + +## AD-008: cma-mcp lives inside Clarethium/cma as a subdirectory, not as a separate sibling repository + +**Date:** 2026-05-06 + +**Decision.** cma-mcp ships under `cma-mcp/` in the Clarethium/cma +repository alongside the canonical bash CLI rather than as a +separate `Clarethium/cma-mcp` repository. One repository, one +governance scaffold (root-level `STRATEGY.md`, `DECISIONS.md`, +`GOVERNANCE.md`, `CONTRIBUTING.md`, `SECURITY.md`, `CITATION.cff`, +`NOTICE`), two release tracks (tags prefixed `cma-1.x` and +`cma-mcp-0.x`), two CHANGELOGs (`CHANGELOG.md` for cma; `cma-mcp/CHANGELOG.md` +for cma-mcp), two CI workflows (`tests.yml` for bash cma; +`tests-mcp.yml` for the Python wrapper, path-filtered to +`cma-mcp/**`). + +**Rationale.** cma-mcp is a *wrapper-of* relationship to cma, not a +*uses-as-substrate* relationship: every cma flag is a tool argument, +every JSONL field a parser concern, the cma `surface_events.jsonl` +schema directly load-bearing for cma-mcp's leak-detection coverage. +Wrapper-of relationships couple their subjects tightly enough that +drift is the failure mode (STRATEGY DD-1). Same-repo prevents drift +structurally: schema changes, new flags, and leak-detection logic +must update wrapper and wrapped together in one PR. Separate repos +would create a coordination tax that the empire's compounding +logic actively works against. + +**Why frame-check-mcp's separate-repo pattern doesn't apply.** That +project *uses* Touchstone as a substrate; Touchstone evolves +independently. cma-mcp wraps cma. Treating frame-check-mcp's repo +shape as the empire-wide rule was the early misread that produced +the discarded `Clarethium/cma-mcp` repo on 2026-05-06; the +correction landed in this commit's predecessor. + +**Trade-off accepted.** Repo size grows with both Python and bash +content. Contributor population is slightly more mixed. Independent +release cadence is preserved through tag prefixing and per-component +CHANGELOG files; same-repo does not force same-release. + +**Reversibility.** If a future evidence point demands separation, +the `cma-mcp/` directory can be extracted to its own repo via +`git filter-repo`, preserving history. The decision is not +load-bearing on irreversible structure. + +--- + +## AD-007: Tool surface is seven verbs, resource surface is four URIs + +**Date:** 2026-05-06 + +**Decision.** Tool surface mirrors bash cma's seven primitives: +`cma_miss`, `cma_decision`, `cma_reject`, `cma_prevented`, +`cma_distill`, `cma_surface`, `cma_stats`. Resource surface is four +URIs for read-only context: `cma://decisions`, `cma://rejections`, +`cma://core`, `cma://stats`. + +**Rationale.** A model that knows bash cma's CLI knows cma-mcp's +tools without retraining. `cma_distill` and `cma_stats` carry +`mode`/`view` arguments rather than splitting into multiple tools to +keep the tool count at the bash cma 1.0 surface (seven). `cma_surface` +remains a tool, not a resource, because it logs `surface_events.jsonl` +as a side effect (load-bearing for `cma stats --leaks`). + +Resources are reserved for context the agent reads to orient itself +(decisions, rejections, core learnings, stats summary). Calling +`cma stats` for non-default views (`--leaks`, `--recurrence`, +`--behavior`, `--preventions`, `--rejections`) goes through the +`cma_stats` tool with a `view` arg. + +--- + +## AD-006: cma-mcp does not bundle Lodestone's failure-shape catalog + +**Date:** 2026-05-06 + +**Decision.** No `cma://failure-shapes` resource. Tool descriptions +for `cma_miss` and `cma_prevented` reference Lodestone's FM-1..10 as +an example methodology but do not enumerate it. + +**Rationale.** STRATEGY DD-4. Methodology vocabulary lives in +Lodestone; bundling a frozen copy in cma-mcp couples release cadence +and inverts canon-vs-companion separation. Operators who want the FM +catalog read Lodestone directly; operators who want autoclassification +wire `CMA_FM_CLASSIFIER` per cma's plugin convention. + +--- + +## AD-005: Stdio transport only + +**Date:** 2026-05-06 + +**Decision.** cma-mcp ships stdio transport. SSE, WebSocket, and HTTP +transports are explicitly out of scope for v0.1. + +**Rationale.** Stdio is the universally supported transport across MCP +clients (Claude Desktop, Cursor, Cline, Continue.dev). Operators who +need multi-client server-side deployment can use one of the +forthcoming MCP gateway projects. Adding transports inside cma-mcp +would expand the surface beyond its distribution-wrapper role. + +--- + +## AD-004: subprocess.run with argv-array, never shell=True + +**Date:** 2026-05-06 + +**Decision.** Every bash cma invocation goes through +`subprocess.run([...], shell=False)` with an argv array. Operator +input never gets concatenated into a shell-interpreted string. + +**Rationale.** Argument injection is the most likely abuse path for a +local MCP server. The argv-array discipline makes injection +structurally impossible: any operator-supplied string lands in a +single `argv[i]` slot and bash cma's argument parser treats it as +data, not as code. + +--- + +## AD-003: 5-second timeout on every subprocess call + +**Date:** 2026-05-06 + +**Decision.** `subprocess.run` calls all carry `timeout=5`. On +timeout, cma-mcp returns an `isError: true` response naming the +timeout and the partial command. The MCP server stays responsive; the +caller decides whether to retry. + +**Rationale.** Matches bash cma's own failure-isolated discipline +(`hooks/cma-pre` 5-second timeout on `cma surface`). A hung cma +process must not hang the MCP server. + +--- + +## AD-002: schema_version pinned to "1.0", any new schema_version +emitted by bash cma surfaces as a parse warning + +**Date:** 2026-05-06 + +**Decision.** cma-mcp's JSONL parser treats records with +`schema_version: "1.0"` as native, records without that field as +legacy (parses leniently), and records with any other +`schema_version` value as a parse warning surfaced in `provenance`. + +**Rationale.** Per cma's DATA.md, schema_version is the migration +gate. cma-mcp's wrapper role means it must not silently interpret a +schema it does not recognize; surfacing the unknown schema in +`provenance` lets the caller (model and downstream user) know the +data carries assumptions cma-mcp cannot validate. + +--- + +## AD-001: Manual JSON-RPC, no MCP SDK dependency + +**Date:** 2026-05-06 + +**Decision.** Implement the MCP protocol directly in `mcp_server.py` +using JSON-RPC 2.0 over stdio. No third-party MCP SDK in +`pyproject.toml` dependencies. + +**Rationale.** STRATEGY DD-2. Echoes frame-check-mcp's +self-containment convention. The protocol surface used here +(initialize, tools/list, tools/call, resources/list, resources/read, +ping, notifications) fits in a few hundred lines and removes a class +of version-skew failures. + +--- + +*Future architectural decisions append above this line, newest first.* diff --git a/DESIGN.md b/DESIGN.md index 75686f0..8d580a2 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -35,7 +35,7 @@ cma miss - `` (required, positional). One-line statement of what failed. Phrased actively: "Treated X as Y without verifying" rather than "X was treated as Y." - `--surface` (optional). The domain area: `auth`, `db`, `docs`, `ui`, `infra`, `general`, `git`. Auto-detected from file paths when `--files` is provided. -- `--fm` (optional). The failure shape from the Lodestone canonical catalog (e.g., `assumption-over-verification`, `basin-capture`). Auto-detected from keywords when not specified. +- `--fm` (optional). A failure-mode tag. cma stores the value opaquely; interpretation is the methodology's responsibility. When using a methodology with a canonical catalog (such as [Lodestone](https://github.com/Clarethium/lodestone)), tag with that methodology's canonical names. Auto-classification can be plugged in via the `CMA_FM_CLASSIFIER` env var (see ARCHITECTURE.md). - `--files` (optional). Files involved in the failure. Comma-separated list. **Output:** Confirmation with the captured description, surface, fm, and a unique miss ID. If a similar miss exists in the last 90 days, output flags the recurrence and indicates which warning weight has been incremented. diff --git a/GOVERNANCE.md b/GOVERNANCE.md new file mode 100644 index 0000000..325846a --- /dev/null +++ b/GOVERNANCE.md @@ -0,0 +1,125 @@ +# Governance + +**Scope:** Covers the cma project as a whole. The project ships as +a single repository ([Clarethium/cma](https://github.com/Clarethium/cma)) +with two components: the bash cma reference implementation +(repository root) and the cma-mcp Python distribution wrapper +(`cma-mcp/` subdirectory). One curator, one governance model, one +issue tracker. + +**Status:** Minimal v0. Documents the current de-facto governance +model. Formal review process and dissent handling are deferred until +a real external contributor or reviewer engages. + +**Date:** 2026-05-06 + +**Curator:** Lovro Lucic (single-curator BDFL model for v0.x). + +--- + +## Purpose + +This document names who decides what in the cma project, where +those decisions are recorded, and what is explicitly held for +future specification. It closes the reference to `GOVERNANCE.md` +in `CONTRIBUTING.md` without overcommitting to a formal review +process that has not yet been tested against a real external +reviewer. + +Strategy and durable decisions live in `STRATEGY.md`. Architectural +decisions live in `DECISIONS.md`. This document covers governance +mechanics only: who has authority, over what, by what process, and +what happens when governance itself needs to change. + +--- + +## Current state: single-curator + +The cma project is a single-curator project. **Lovro Lucic** is the +curator. +For v0.x of the project, the curator carries benevolent-dictator +authority (BDFL-style) over: + +- Which architectural decisions land in `DECISIONS.md` +- Which durable decisions land or are amended in `STRATEGY.md §6` +- Which tool/resource surface changes ship +- Which pull requests merge (reviewer, per `CONTRIBUTING.md`) +- Release timing and version numbers +- Companion-link maintenance with cma, Lodestone, Touchstone, + frame-check-mcp + +Named authorship is the project's primary credibility asset, in line +with the broader Clarethium discipline (see frame-check-mcp's +`GOVERNANCE.md` for the parallel statement). The curator is the +named author on every published release. + +--- + +## Where decisions already live + +A decision is not a decision until it lands in an authoritative +source. The sources below are current: + +| Decision type | Authoritative source | +|---|---| +| Strategy and durable product decisions (require overturn proposal) | `STRATEGY.md` (especially §6) | +| Architectural decisions | `DECISIONS.md` | +| Contribution workflow (PR process, tests, sign-off) | `CONTRIBUTING.md` | +| MCP protocol surface | `docs/MCP_SERVER.md` | +| Release history | `CHANGELOG.md` | +| Security policy and reporting | `SECURITY.md` | +| License | `LICENSE` (Apache-2.0); `NOTICE` (per-component summary) | +| Citable form | `CITATION.cff` | +| Anticipated critiques (construct-honesty) | `docs/ANTICIPATED_CRITIQUES.md` | +| Validation program | `docs/VALIDATION_PROGRAM.md` | + +--- + +## How a change becomes canon + +For any of the following, open a pull request against `main` with a +description that names the affected source(s) above: + +1. **Bug fix or test addition.** Reviewer approves; merge. +2. **New architectural decision.** PR adds an entry to `DECISIONS.md` + (newest-first append). Reviewer approves; merge. +3. **Durable decision change** (anything in `STRATEGY.md §6`). PR + includes both the strategy change and an explicit overturn + rationale. Curator review required. Default disposition is + conservative: durable decisions stay durable absent material new + evidence. +4. **Tool or resource surface change.** PR updates the schema in + `mcp_schema.py`, the dispatch in `mcp_server.py`, the tests in + `tests/test_mcp_server.py`, and the docs in `docs/MCP_SERVER.md`. + All four must move together. +5. **Companion-link change** (text in `STRATEGY.md §3` or `README.md` + that references Lodestone, Touchstone, cma, frame-check-mcp). + Coordinate with the affected companion repo's curator before + merge. Currently the curator is the same person; that simplifies + coordination but does not exempt it. + +--- + +## Explicitly deferred + +The following are deferred until evidence or external engagement +forces a position: + +- **Multi-contributor governance.** Formal review process with + named reviewers, dissent procedure, and conflict resolution. + Deferred until a sustained external contributor exists. +- **Suggestion/RFC process** modeled on Touchstone's + `SUGGESTIONS/PROCESS.md`. cma-mcp is small enough that PR review + is sufficient for v0.x; an RFC layer may add unwanted weight. +- **Trademark and brand policy.** cma-mcp is published under + Clarethium; brand decisions defer to the Clarethium-level + curator. + +--- + +## When governance itself needs to change + +Curator amends this document via PR. New external contributors +sustained over 90 days warrant moving from BDFL to a named-reviewer +model; the move itself is recorded as a `STRATEGY.md` durable +decision. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..313fad0 --- /dev/null +++ b/NOTICE @@ -0,0 +1,63 @@ +cma project (cma + cma-mcp) +Copyright 2026 Lovro Lucic + +Published under Clarethium (https://github.com/Clarethium). + +This product includes software developed by Lovro Lucic. + +Components +---------- + +This repository ships two components, both Apache-2.0 licensed: + +- **cma** (repository root): the canonical bash reference + implementation of the compound practice loop. Seven primitives + (miss, decision, reject, prevented, surface, distill, stats), + texture preservation, recurrence detection, leak detection. + Action-time injection via Claude Code hooks (PreToolUse + + SessionStart) and shell preexec wrappers (zsh native, bash via + bash-preexec). +- **cma-mcp** (cma-mcp/ subdirectory): the Python Model Context + Protocol distribution wrapper. Exposes the same seven primitives + and four context-resource surfaces to MCP-compatible AI clients + (Claude Desktop, Cursor, Cline, Continue.dev). Subprocess + wrapper around the bash cma binary; methodology-agnostic + substrate; three-section payload discipline. + +Licensing +--------- + +Code in this repository is licensed under Apache 2.0; see LICENSE +at repository root. + +External works referenced +------------------------- + +Reference methodology lives in Lodestone +(https://github.com/Clarethium/lodestone), licensed CC-BY 4.0. +Neither cma nor cma-mcp bundles Lodestone content. Operators who +tag captures with a methodology's failure-mode vocabulary (such as +Lodestone's FM-1..10) are responsible for citing that methodology +in their own work. + +Verification substrate referenced in design documents is Touchstone +(https://github.com/Clarethium/touchstone), Apache 2.0 (library) +and CC-BY 4.0 (Standard). + +Companion projects in the Clarethium body +----------------------------------------- + +- Lodestone: canonical operator methodology (CC-BY 4.0) +- Touchstone: model-independent measurement substrate +- frame-check-mcp: applied vehicle, sibling MCP using Touchstone + (https://github.com/Clarethium/frame-check-mcp) + +Sibling-MCP pattern conformance +------------------------------- + +cma-mcp's three-section payload discipline, governance scaffolding, +and adversarial test architecture follow the conventions established +by frame-check-mcp. Where this project diverges (e.g., bundling the +MCP wrapper alongside the wrapped binary in one repository rather +than as a separate sibling repo), the divergence is named in +DECISIONS.md. diff --git a/README.md b/README.md index 3c16c7e..9af0a2c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ # cma +[![tests](https://github.com/Clarethium/cma/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/Clarethium/cma/actions/workflows/test.yml) +[![tests-mcp](https://github.com/Clarethium/cma/actions/workflows/tests-mcp.yml/badge.svg?branch=main)](https://github.com/Clarethium/cma/actions/workflows/tests-mcp.yml) +[![codeql](https://github.com/Clarethium/cma/actions/workflows/codeql.yml/badge.svg?branch=main)](https://github.com/Clarethium/cma/actions/workflows/codeql.yml) +[![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](LICENSE) + Executable compound practice loop. The terminal-side companion to [Lodestone](https://github.com/Clarethium/lodestone). ## What this is @@ -10,31 +15,34 @@ The methodology lives in Lodestone. cma is what running that methodology looks l ## Status -cma 1.0 reference implementation. All seven primitives fully functional: `cma miss`, `cma decision`, `cma reject`, `cma prevented`, `cma surface`, `cma distill` (default + `--review` + `--retire`), and `cma stats` (default + `--rejections` + `--preventions` + `--recurrence` + `--leaks`). Test suite (55 cases) covers functional paths, edge cases, JSON validity, and the leak-detection join. +cma 1.0 reference implementation. All seven primitives fully functional: `cma miss`, `cma decision`, `cma reject`, `cma prevented`, `cma surface`, `cma distill` (default + `--review` + `--retire`), and `cma stats` (default + `--rejections` + `--preventions` + `--recurrence` + `--leaks` + `--behavior`). Action-time injection (Claude Code hook + shell wrapper). Texture preservation on misses. Test suite (98 cases) covers functional paths, edge cases, JSON validity, the leak-detection join, hook integration, and shell wrapper modes. The full surface is specified in [DESIGN.md](DESIGN.md). Additive features (action-time injection, texture preservation, counterfactual capture, recurrence detection) layer on without changing the locked surface. ## Quick start +**Requirements:** bash 3.2 or newer (Linux, macOS native, and Windows-WSL all work; the `cma` binary uses no bash 4+ features so macOS's system `/bin/bash` is sufficient), and `python3` (used only for JSON escaping). No package-manager dependencies. + Clone the repository and add the script to your `PATH`: ```bash git clone https://github.com/Clarethium/cma.git ln -s "$(pwd)/cma/cma" ~/.local/bin/cma # or copy to anywhere on PATH +cma init # create the data directory with a README ``` Capture a failure: ```bash cma miss "fix removed the error message instead of addressing the defect" \ - --surface infra --fm speed-over-understanding + --surface infra --fm ``` For richer capture (texture preservation), add the situational fields: ```bash -cma miss "missed JWT expiration in middleware" \ - --surface auth --fm assumption-over-verification \ +cma miss "missed validation in middleware" \ + --surface auth --fm \ --intended "patch only the failing test" \ --corrected "trace upstream defect, fix at root" \ --excerpt-from /tmp/conversation-excerpt.txt @@ -42,22 +50,39 @@ cma miss "missed JWT expiration in middleware" \ The texture fields (`--excerpt`, `--intended`, `--corrected`) preserve the conditions of the failure so future surfacing can match by situation, not just keywords. -Captures are written to `~/.cma/` as JSON Lines files (one record per line, append-only). The data directory can be overridden with `CMA_DIR=/path/to/data cma ...`. +The `--fm` value is an opaque string from the operator's perspective; cma stores it without interpretation. When using a methodology with a canonical failure-mode catalog (such as [Lodestone](https://github.com/Clarethium/lodestone)), tag with the methodology's canonical names so analysis tooling can interpret them. cma is methodology-agnostic; the catalog and its meaning live in the methodology, not in cma. + +Captures are written to `~/.cma/` as JSON Lines files (one record per line, append-only). The data directory can be overridden with `CMA_DIR=/path/to/data cma ...`. The full schema, atomicity guarantees, and migration policy are documented in [DATA.md](DATA.md). Run `cma --help` for the full command surface. -## Action-time injection (Claude Code) +## Action-time injection + +cma surfaces relevant prior captures automatically when an operator (or AI assistant) is about to act. The five-stage architecture (interception, context extraction, query, injection, logging) is documented in [ARCHITECTURE.md](ARCHITECTURE.md). Two reference integrations ship in this repository. + +### Claude Code + +Two hooks for Claude Code: a `PreToolUse` hook for per-action surfacing and a `SessionStart` hook for session-priming context. -cma includes a PreToolUse hook for Claude Code in [`hooks/claude-code-pre-tool-use.sh`](hooks/claude-code-pre-tool-use.sh). When Claude is about to edit a file or run a command, the hook surfaces relevant prior captures automatically — the surfacing step of the compound loop without manual `cma surface` invocation. +**Per-action surfacing** (`hooks/claude-code-pre-tool-use.sh`): when Claude is about to use a tool that touches a file or runs a command, the hook detects surface heuristically from the tool input, queries `cma surface`, and writes matched captures to stdout. Claude Code injects them as additional context. Silent for non-relevant tools (`Read`, etc.) and when no captures match. -Install: +**Session-priming context** (`hooks/claude-code-session-start.sh`): at the start of each session, surfaces recurring failure patterns and active rejections so the assistant has orientation before the first tool call. Configurable via `CMA_SESSION_START_SECTIONS` (default `recurrence,rejections`; set to `all` for `recurrence,rejections,behavior`). -1. Ensure `cma` is on your `PATH` (see Quick start above). -2. Add a hook entry to `~/.claude/settings.json`: +Install both: ```json { "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "bash /path/to/cma/hooks/claude-code-session-start.sh" + } + ] + } + ], "PreToolUse": [ { "hooks": [ @@ -72,9 +97,52 @@ Install: } ``` -The hook detects the relevant surface heuristically from the file path or command (`auth`, `payments`, `db`, `api`, `ui`, `docs`, `test`), then queries `cma surface --surface ` for matching captures. Output goes to the assistant's context. Non-matching tool calls and tools that don't touch files are silent. +Together the two hooks cover both ends of the action-time injection theme: priming context at session start, relevant captures at each action. + +### Shell (zsh, bash) + +`hooks/cma-pre` is a wrapper for shell environments. Surface detection uses the same heuristics as the Claude Code hook, so behavior is consistent across integrations. + +**zsh** (native preexec). Add to `~/.zshrc`: + +```bash +preexec() { /path/to/cma/hooks/cma-pre --check "$1"; } +``` + +**bash** (requires [bash-preexec](https://github.com/rcaloras/bash-preexec)): + +```bash +source /path/to/bash-preexec.sh +preexec_functions+=("cma_pre_hook") +cma_pre_hook() { /path/to/cma/hooks/cma-pre --check "$1"; } +``` + +**Manual wrapping**: + +```bash +/path/to/cma/hooks/cma-pre git commit -m "fix auth bug" +# Surfaces relevant captures, then runs the command +``` + +Triggers fire on commands likely to warrant surfacing: editors (`vim`, `nvim`, `emacs`, `code`, `subl`), version control (`git`), language toolchains (`npm`, `cargo`, `python`, `node`), and build tools (`make`, `gradle`, `mvn`). Override the trigger list with `CMA_PRE_TRIGGERS` (space-separated). + +Failure isolation: if cma is missing, errors, or times out (default 5 seconds), the wrapped command still runs cleanly. The wrapper never blocks an action on its own failure. -Every fire is logged as a surface event, so `cma stats --leaks` can later flag failures that occurred despite a relevant warning being surfaced — the closing step of the compound loop turning into evidence. +Both integrations log surface events to `~/.cma/surface_events.jsonl`. `cma stats --leaks` later joins these events against subsequent misses to flag failures that occurred despite a relevant warning being surfaced — the validation evidence that the loop closes. + +## MCP distribution (cma-mcp) + +The bash CLI and shell hooks above cover Claude Code and any +shell-environment operator. For operators reaching the loop +through other MCP-compatible AI clients (Claude Desktop, Cursor, +Cline, Continue.dev), the same compound practice loop ships as a +Python MCP server at [`cma-mcp/`](cma-mcp/). Subprocess wrapper +around this binary; methodology-agnostic substrate; three-section +payload discipline. PyPI: `pip install cma-mcp`. + +See [`cma-mcp/README.md`](cma-mcp/README.md) for the MCP-specific +quickstart and tool surface, and [STRATEGY.md](STRATEGY.md) for +the durable decisions governing the wrapper. ## Testing @@ -86,19 +154,51 @@ Tests cover all capture verbs (normal and edge cases including special character ## The Clarethium body -cma sits alongside three reference artifacts published by Clarethium: +cma sits alongside two open reference artifacts published by Clarethium: - **Touchstone** validates work against quality standards. -- **Whetstone** sharpens craft. - **Lodestone** orients practice. cma is the executable companion to Lodestone. The doctrine is in Lodestone; the running code is here. +## Methodology integration + +cma is methodology-agnostic. The `--fm` field on captures is an opaque string; cma stores it without interpretation. When using a methodology with a canonical failure-mode catalog (such as [Lodestone](https://github.com/Clarethium/lodestone)), tag with the methodology's canonical names. The methodology owns the vocabulary and its meaning; cma owns the data substrate. + +For automatic classification at capture time, set `CMA_FM_CLASSIFIER` to a command that reads the description on stdin and emits the failure-mode tag on stdout: + +```bash +export CMA_FM_CLASSIFIER=/path/to/your-classifier + +cma miss "the operator skipped verification before deploying" +# Classifier auto-tags the --fm value based on the description. +``` + +The classifier is operator-side. It can be Lodestone-aware (mapping descriptions to Lodestone's canonical failure shapes), methodology-specific, or generic. cma calls it as an opaque command. Failure-isolated: if the classifier errors, is missing, or times out (5s), the capture proceeds without an `--fm` value. See [ARCHITECTURE.md Section 9](ARCHITECTURE.md) for the full integration pattern. + +## Architecture + +cma's action-time injection layer follows a five-stage architecture (interception, context extraction, query, injection, logging). The pattern, reference implementations, data contracts, and validation framework are specified in [ARCHITECTURE.md](ARCHITECTURE.md). Read it before writing a new integration; conform to its contracts so downstream analysis tooling stays consistent. + +### Performance + +ARCHITECTURE.md Section 6 specifies <50ms typical latency for action-time injection. Measured against a synthetic 100-capture data set (`./bench.sh`): + +| Operation | Median | p95 | +|-----------|--------|-----| +| `cma-pre --check` (no match) | 6ms | 10ms | +| `cma-pre --check` (matched surface) | 36ms | 43ms | +| `cma surface --surface ` | 27ms | 31ms | +| `cma stats --recurrence` | 26ms | 31ms | +| `cma stats` (default summary) | 8ms | 9ms | + +All operations stay under the 50ms target at p95. Cold-start invocations (first call in a fresh shell) may run higher; the wrapper warms up after a few hooks fire. + ## Roadmap -The 1.0 surface is locked (see [DESIGN.md](DESIGN.md)) and all seven primitives are functional. Action-time injection is implemented for Claude Code (see [Action-time injection](#action-time-injection-claude-code) above). +The 1.0 surface is locked (see [DESIGN.md](DESIGN.md)) and all seven primitives are functional. Action-time injection ships for Claude Code (PreToolUse and SessionStart hooks) and for shell environments (zsh native preexec, bash via bash-preexec). Both follow the five-stage architecture in [ARCHITECTURE.md](ARCHITECTURE.md). -Beyond 1.0: generic CLI wrapper (for environments other than Claude Code), texture preservation on misses (conversation excerpt, intended action, corrected action), counterfactual capture, per-project data scoping. See [CHANGELOG.md](CHANGELOG.md) for the full pending list. +Beyond 1.0: counterfactual capture analysis tooling, per-project data scoping, recency-weighted surface ranking. See [CHANGELOG.md](CHANGELOG.md) for the full pending list. ## License diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..cb97585 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,72 @@ +# Security Policy + +This policy covers both components in this repository: bash cma +(repository root) and cma-mcp (`cma-mcp/` subdirectory). + +## Supported versions + +| Component | Version | Supported | +|---|---|---| +| cma | 1.0.x | Yes | +| cma | < 1.0 | No | +| cma-mcp | 0.1.x | Yes | +| cma-mcp | < 0.1 | No | + +Security fixes land on the latest minor release line of each +component; older minor releases are not backported. Operators +tracking security posture should pin to the latest published +artifact and update on each minor release. + +## Reporting a vulnerability + +If you find a security issue in either component, do not open a +public GitHub issue. Email `lovro.lucic@gmail.com` with +`[cma security]` in the subject line. Include: + +- Affected component (cma or cma-mcp) and version + (`cma --version` or `cma-mcp --version`) +- Reproduction steps +- Impact assessment if you have one + +Acknowledgement within 7 days. Disclosure timeline negotiated on the +specifics; default is 90 days from acknowledgement to public +disclosure or coordinated release of a fix, whichever comes first. + +## Threat model + +Both components run locally on the operator's machine. The bash +cma CLI runs in an interactive terminal session (or as a hook +invoked by Claude Code, zsh, or bash-preexec). cma-mcp runs as an +MCP server (stdio transport) spawned by the operator's MCP client. +Their combined threat surface is limited: + +1. **Untrusted input from MCP clients.** Tool arguments and resource + read URIs originate in the MCP client (which itself runs an LLM + agent). All inputs are validated against schemas before being + passed to the bash cma subprocess. Argument injection into the + subprocess is prevented by passing arguments as an argv array, + never as a shell-interpolated string. +2. **Filesystem reads.** cma-mcp reads `$CMA_DIR/*.jsonl` files + (default `~/.cma/`). It honors the `CMA_DIR` environment variable + for redirection. cma-mcp does not read files outside this + directory, and resource URIs do not accept arbitrary filesystem + paths. +3. **Subprocess execution.** cma-mcp invokes the `cma` bash binary + from the operator's `PATH`. Operators are responsible for + confirming the `cma` binary on their `PATH` is the canonical one + (run `cma --version` and verify the SHA against the + [Clarethium/cma](https://github.com/Clarethium/cma) release). +4. **No network calls.** cma-mcp performs zero network I/O. No + telemetry, no remote configuration, no external dependencies at + runtime beyond the Python standard library. + +## Out of scope + +- Protection against a malicious MCP client. An operator who + deliberately wires their MCP client to an untrusted server is + outside this threat model. cma-mcp is the server; the trust + boundary is the operator's local machine. +- Protection against malicious local processes. A process running + with the operator's filesystem permissions can already read or + modify `~/.cma/` directly. cma-mcp does not add or subtract from + that surface. diff --git a/STRATEGY.md b/STRATEGY.md new file mode 100644 index 0000000..1fe2349 --- /dev/null +++ b/STRATEGY.md @@ -0,0 +1,252 @@ +# cma Project Strategy + +**Scope.** This document covers the cma project as a whole. The +project ships under one repository +([Clarethium/cma](https://github.com/Clarethium/cma)) with two +components: + +- **cma** — the canonical bash reference implementation of the + compound practice loop. Surface, primitives, and architecture + live in `DESIGN.md`, `ARCHITECTURE.md`, `DATA.md`, and the CLI's + own README. cma is the project's anchor; design decisions about + the loop's shape are documented there and in + [Lodestone](https://github.com/Clarethium/lodestone) (the + methodology canon). +- **cma-mcp** — the Python MCP distribution wrapper. Lives under + `cma-mcp/` in this repository. Most durable decisions in §6 + govern this component. + +Both components are Apache-2.0, share one governance model +(`GOVERNANCE.md`), one set of cross-cutting docs (`SECURITY.md`, +`CITATION.cff`, `NOTICE`, `CONTRIBUTING.md`), one issue tracker, +and this strategy document. They release independently with +separate version tags and separate CHANGELOG files (`CHANGELOG.md` +for cma; `cma-mcp/CHANGELOG.md` for cma-mcp). + +**Status:** v0.1 strategy. Versioned alongside the codebase; +durable decisions in §6 require an explicit overturn proposal to +change. + +**Curator:** Lovro Lucic (BDFL, single-curator model for v0.x). + +--- + +## 1. What cma-mcp is, in one sentence + +cma-mcp is the Model Context Protocol distribution wrapper that brings +the [cma](https://github.com/Clarethium/cma) compound practice loop +to operator environments outside Claude Code's native hook surface. + +## 2. What cma-mcp is not + +- Not a reimplementation of cma. cma-mcp invokes the canonical bash + cma binary as a subprocess. Drift is the enemy. +- Not a replacement for cma. Operators with shell access continue to + use bash cma directly; cma-mcp is the layer for operators reaching + the loop through an MCP-compatible client. +- Not a methodology. Failure-mode vocabulary, the practice loop's + shape, calibration, and altitude all live in Lodestone. cma-mcp is + methodology-agnostic at the substrate level: `--fm` is an opaque + string passed through to cma. +- Not an applied vehicle in the [Frame Check](https://github.com/Clarethium/frame-check-mcp) + sense. Frame Check applies Touchstone substrate to a specific + context (structural framing analysis). cma-mcp does not apply a + measurement substrate; it distributes a practice loop. + +## 3. Position in the Clarethium body + +The [Clarethium](https://github.com/Clarethium) empire publishes four +substantive open reference artifacts: + +- **Lodestone** orients the operator's practice (methodology canon). +- **Touchstone** validates work against quality standards (measurement + substrate). +- **cma** runs the compound practice loop on the operator's machine + (terminal-side companion to Lodestone). +- **Frame Check** applies Clarethium methodology in a specific + context (the first applied vehicle, distributed as an MCP). + +cma-mcp is **distribution channel for cma**. Same loop, broader reach: + +| Operator environment | Integration | +|---|---| +| Bash, zsh, fish | cma CLI directly | +| Claude Code | cma's PreToolUse + SessionStart hooks | +| Claude Desktop, Cursor, Cline, Continue.dev, any MCP client | cma-mcp | + +The org profile names cma-mcp under "companion tooling": `cma + cma-mcp`. + +## 4. Why a thin distribution layer is the right shape + +cma-mcp's contribution is reach, not new capability. Three principles +follow: + +- **Drift is the enemy.** Reimplementing cma's seven primitives in + Python would duplicate a 1.0 surface (98-test suite, texture + preservation, recurrence detection, leak detection) and lag behind + canonical cma's evolution. The subprocess wrapper picks up cma's + evolution automatically. +- **Empire-conformant payload discipline.** Every cma-mcp tool and + resource response carries three sections (`analysis`, + `agent_guidance`, `provenance`), matching the construct-honesty + pattern established by frame-check-mcp. Agents passing cma-mcp + output to users without attribution would strip the discipline that + makes the loop's evidence worth citing; the agent_guidance block + exists to prevent that. +- **Methodology-agnostic substrate.** Lodestone owns the failure-mode + vocabulary; cma stores `--fm` as an opaque string; cma-mcp passes + it through unchanged. An MCP server that bundled Lodestone's FM-1..10 + catalog would couple cma-mcp to Lodestone's release cadence and + invert the canon-vs-companion separation. + +## 5. Distribution + +PyPI under the package name `cma-mcp`. Entry point installed as +`cma-mcp` (matching frame-check-mcp's convention: `frame-check-mcp = +"mcp_server:cli"`). + +Version 0.1.0 is the first release; semver, with PEP 440 pre-release +markers (`.dev0`) during the dev window. CHANGELOG.md tracks the +release history in Keep a Changelog format. + +DOI on Zenodo for citable form (allocated at first PyPI release). + +## 6. Durable decisions + +Decisions in this section are durable: they require an explicit +overturn proposal to change. New durable decisions land here when +their consequences span multiple components or when an early +contributor would otherwise need to rediscover the rationale. + +### DD-1: Subprocess wrapper, not reimplementation + +**Decision.** cma-mcp invokes the canonical `cma` bash binary as a +subprocess for every captured action. cma-mcp does not reimplement +cma's seven primitives in Python. + +**Rationale.** bash cma is the canonical 1.0 reference implementation +with a 98-test suite. Any reimplementation introduces drift; the +empire's compounding logic favors thin wrappers over parallel codebases. +Reimplementation would also duplicate texture preservation, recurrence +detection, and leak-detection logic that bash cma already validates. + +**Trade-off accepted.** cma-mcp requires a working `cma` binary on +the operator's `PATH`. See DD-3 for the platform stance. + +### DD-2: Manual JSON-RPC, no MCP SDK dependency + +**Decision.** cma-mcp implements the Model Context Protocol over +stdio using JSON-RPC 2.0 line-delimited, in-repo. No dependency on a +third-party MCP SDK. + +**Rationale.** The MCP protocol surface used here (initialize, +tools/list, tools/call, resources/list, resources/read, ping, +notifications) is small enough that implementing it in-repo keeps +cma-mcp self-contained: no extra install step, no SDK version drift, +no transitive dependency exposure. This matches frame-check-mcp's +convention. + +### DD-3: Bash dependency, WSL-universal stance + +**Decision.** cma-mcp requires a working bash environment to invoke +the `cma` binary. On Linux and macOS, bash is part of the operating +system. On Windows, cma-mcp requires WSL. + +**Rationale.** Every operator running an MCP-compatible AI client on +Windows (Claude Desktop, Cursor, Cline, Continue.dev) is reasonably +expected to have WSL available. Claude operators specifically tend to +use WSL because Claude Code's own integration patterns favor it. +Standalone Python reimplementation would lift this dependency at the +cost of DD-1's drift-vs-canonical concerns; the bash dependency is +the deliberately-paid price of canonical-cma alignment. + +**Trade-off accepted.** Operators on a pure Windows host with no WSL +cannot run cma-mcp. This is named clearly in the README so no operator +reaches install-time confusion. + +### DD-4: Methodology-agnostic substrate + +**Decision.** cma-mcp stores `--fm` (failure-mode tag) as an opaque +string. cma-mcp does not bundle Lodestone's FM-1..10 catalog or any +other methodology's vocabulary. + +**Rationale.** Vocabulary is the methodology's responsibility (in +Clarethium's case, Lodestone). Bundling vocabulary into the substrate +inverts the canon-vs-companion separation. Operators who want +methodology-aware classification at capture time use bash cma's +`CMA_FM_CLASSIFIER` plugin hook, which cma-mcp inherits because it +shells out to bash cma. + +### DD-5: Three-section payload discipline + +**Decision.** Every tool response and every resource read returns a +JSON payload with three top-level sections: `analysis` (the data), +`agent_guidance` (what the tool can and cannot tell the agent, how +to cite faithfully), and `provenance` (versions, license, cost, +citation). Adversarial tests pin the structure (see +`tests/test_payload_determinism.py`). + +**Rationale.** Established by frame-check-mcp; preserved here because +agents passing cma-mcp output to users without attribution would +strip the construct-honesty discipline. Surfacing "how to cite +faithfully" inside the payload is the structure that carries the +discipline forward. + +### DD-6: Schema parity with bash cma + +**Decision.** cma-mcp reads the same JSONL files bash cma writes +(`misses.jsonl`, `decisions.jsonl`, `rejections.jsonl`, +`preventions.jsonl`, `core.jsonl`, `surface_events.jsonl`). cma-mcp +does not write records itself; bash cma does. Any future Python-side +write path must produce records byte-identical in field set to bash +cma's writes. + +**Rationale.** Forward and reverse compatibility with bash cma are +required for operators who run both interfaces. cma's DATA.md is the +canonical schema reference. + +### DD-7: Apache-2.0 + CC-BY-4.0 licensing aligned with empire + +**Decision.** Code under Apache-2.0. Documentation and any reference +data under CC-BY-4.0. Matches the rest of the Clarethium body. + +--- + +## 7. Explicitly deferred + +Items deferred until evidence accumulates or an explicit forcing +function arrives: + +- **Resource-update notifications** (`notifications/resources/updated`). + Most MCP clients re-fetch resources per read; the notification + channel adds complexity without clear demand. Defer until a client + asks. +- **Session-priming resource analogous to bash cma's SessionStart + hook.** MCP has no native session-start hook; closest equivalent + is the `instructions` field on the `initialize` handshake. v0.1 + ships a minimal `instructions` field. Richer session-priming + awaits operator feedback on whether `instructions` is enough. +- **Validation program for the loop closing through cma-mcp.** + Designing the empirical claim ("with cma-mcp installed, + prevention/miss ratio measured on a longitudinal corpus") and the + protocol to test it is post-launch work. Tracked in + `cma-mcp/docs/VALIDATION_PROGRAM.md`. + +## 8. Where decisions live + +A decision is not a decision until it lands in an authoritative +source. + +| Decision type | Authoritative source | +|---|---| +| Strategy and durable decisions | `STRATEGY.md` (this file) | +| Architectural decisions | `DECISIONS.md` | +| Governance mechanics (who decides, by what process) | `GOVERNANCE.md` | +| Contribution mechanics | `CONTRIBUTING.md` | +| Release history | `CHANGELOG.md` | +| MCP protocol surface | `cma-mcp/docs/MCP_SERVER.md` | +| Anticipated critiques (construct-honesty discipline) | `cma-mcp/docs/ANTICIPATED_CRITIQUES.md` | +| Validation program | `cma-mcp/docs/VALIDATION_PROGRAM.md` | +| cma's CLI surface, primitives, schema | `DESIGN.md`, `ARCHITECTURE.md`, `DATA.md` | +| cma's release history | `CHANGELOG.md` | +| cma-mcp's release history | `cma-mcp/CHANGELOG.md` | diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000..84a30a3 --- /dev/null +++ b/bench.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# bench.sh - Performance benchmarks for cma's action-time injection layer. +# +# ARCHITECTURE.md Section 6 specifies <50ms typical end-to-end overhead for +# integration calls. This script measures `cma-pre --check` (the hook path) +# and `cma surface` (the underlying query) so the claim is verifiable rather +# than aspirational. +# +# Run from repository root: ./bench.sh + +set -uo pipefail + +CMA="$(cd "$(dirname "$0")" && pwd)/cma" +PRE="$(cd "$(dirname "$0")" && pwd)/hooks/cma-pre" + +# Set up a clean data directory and populate it with realistic data +CMA_DIR=$(mktemp -d) +export CMA_DIR +trap 'rm -rf "$CMA_DIR"' EXIT + +# Make cma command findable by cma-pre (which calls it via PATH) +PATH_BIN=$(mktemp -d) +ln -sf "$CMA" "$PATH_BIN/cma" +export PATH="$PATH_BIN:$PATH" +trap 'rm -rf "$CMA_DIR" "$PATH_BIN"' EXIT + +# Populate with 100 realistic captures across multiple surfaces +echo "Populating $CMA_DIR with 100 captures..." +surfaces=(auth payments db api ui docs test) +fms=(fm-1 fm-2 fm-3 fm-4 fm-5) +for i in $(seq 1 100); do + surface=${surfaces[$((RANDOM % ${#surfaces[@]}))]} + fm=${fms[$((RANDOM % ${#fms[@]}))]} + cma miss "synthetic miss $i for benchmark" --surface "$surface" --fm "$fm" >/dev/null +done + +# Time a single invocation in milliseconds. Uses python3's perf_counter for +# portability (Linux %N differs from macOS BSD date, NTP corrections can +# cause negative deltas with naive arithmetic). +time_ms() { + python3 -c ' +import subprocess, sys, time +cmd = sys.argv[1:] +t0 = time.perf_counter() +subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) +print(int((time.perf_counter() - t0) * 1000)) +' "$@" +} + +# Run a benchmark with warmup, then N timed iterations. Report min/median/p95. +bench() { + local name="$1" + local n="$2" + shift 2 + # Warmup: 3 throwaway iterations so cold-start does not skew first sample + for i in 1 2 3; do + time_ms "$@" >/dev/null + done + local samples=() + for i in $(seq 1 "$n"); do + samples+=( "$(time_ms "$@")" ) + done + local sorted=() + mapfile -t sorted < <(printf '%s\n' "${samples[@]}" | sort -n) + local min="${sorted[0]}" + local median="${sorted[$((n / 2))]}" + local p95_idx=$(( (n * 95) / 100 )) + [[ "$p95_idx" -ge "$n" ]] && p95_idx=$((n - 1)) + local p95="${sorted[$p95_idx]}" + printf " %-40s min=%4sms median=%4sms p95=%4sms\n" "$name" "$min" "$median" "$p95" +} + +echo "" +echo "Latency benchmarks (lower is better; ARCHITECTURE.md target: <50ms typical)" +echo "" + +bench "cma surface --surface auth" 20 "$CMA" surface --surface auth +bench "cma surface --type miss" 20 "$CMA" surface --type miss --limit 5 +bench "cma stats (default summary)" 20 "$CMA" stats +bench "cma stats --recurrence" 20 "$CMA" stats --recurrence +bench "cma-pre --check (matched surface)" 20 bash "$PRE" --check "git commit -m fix-auth" +bench "cma-pre --check (no match)" 20 bash "$PRE" --check "ls /tmp" +bench "cma-pre --check (non-trigger)" 20 bash "$PRE" --check "echo hello" + +echo "" +echo "Captures: 100 misses across 7 surfaces, 5 failure shapes." +echo "Data directory: $CMA_DIR" diff --git a/cma b/cma index b330f5c..c97312b 100755 --- a/cma +++ b/cma @@ -8,6 +8,7 @@ set -euo pipefail VERSION="1.0.0-dev" +SCHEMA_VERSION="1.0" CMA_DIR="${CMA_DIR:-$HOME/.cma}" mkdir -p "$CMA_DIR" @@ -51,6 +52,11 @@ cma decision [--surface ] [--applies-when ] Capture an architectural or strategic choice. Format the description as "TOPIC: choice (rationale)". + +The --applies-when predicate is matched against context at action time: +when surface or file context contains any keyword in applies-when, the +decision surfaces alongside other captures. Decisions thus apply when their +conditions are met, not only when explicitly queried. EOF ;; reject) @@ -108,16 +114,33 @@ write_capture() { id=$(new_id) timestamp=$(iso_now) - { - printf '{"type":%s,"id":"%s","timestamp":"%s","description":%s' \ - "$(json_escape "$type")" "$id" "$timestamp" "$(json_escape "$description")" + # Compose the entire JSON record, then append it via a single write() + # syscall. POSIX append (>>) is atomic for writes up to PIPE_BUF + # (typically 4096 bytes on Linux). Texture fields (excerpt, intended, + # corrected) can exceed this. Using python3's f.write of the full + # record is a single write() syscall, the best we can do without + # explicit file locking, and is atomic for the common case. + local record + record=$( + printf '{"schema_version":"%s","type":%s,"id":"%s","timestamp":"%s","description":%s' \ + "$SCHEMA_VERSION" "$(json_escape "$type")" "$id" "$timestamp" "$(json_escape "$description")" while [[ $# -gt 1 ]]; do local key="$1" value="$2" shift 2 [[ -n "$value" ]] && printf ',%s:%s' "$(json_escape "$key")" "$(json_escape "$value")" done - printf '}\n' - } >> "$CMA_DIR/$file" + printf '}' + ) + + # Bash command substitution strips trailing newlines; we add the JSONL + # record terminator inside the python writer to ensure each record is + # one line, atomically. + python3 -c ' +import sys +path, data = sys.argv[1], sys.argv[2] +with open(path, "ab") as f: + f.write((data + "\n").encode("utf-8")) +' "$CMA_DIR/$file" "$record" echo "Captured $type $id" } @@ -143,6 +166,19 @@ cmd_miss() { *) echo "cma miss: unknown flag: $1" >&2; usage_for miss; exit 1 ;; esac done + + # Plugin point: when --fm is not provided and CMA_FM_CLASSIFIER is set, + # invoke the classifier with description on stdin and use its first + # line of output as the fm tag. Failure-isolated: classifier errors, + # missing commands, or timeouts do not block the capture. + # See ARCHITECTURE.md Section 9 (Methodology integration). + if [[ -z "$fm" ]] && [[ -n "${CMA_FM_CLASSIFIER:-}" ]]; then + fm=$(printf '%s' "$description" | timeout 5 sh -c "$CMA_FM_CLASSIFIER" 2>/dev/null | head -1 || true) + # Trim leading/trailing whitespace + fm="${fm#"${fm%%[![:space:]]*}"}" + fm="${fm%"${fm##*[![:space:]]}"}" + fi + write_capture miss misses.jsonl "$description" \ surface "$surface" fm "$fm" files "$files" \ excerpt "$excerpt" intended "$intended" corrected "$corrected" @@ -226,6 +262,7 @@ for path in sorted(glob(os.path.join(data_dir, "*.jsonl"))): # Don't include surface_events.jsonl in the captures-to-surface set if os.path.basename(path) == "surface_events.jsonl": continue + corrupted = 0 with open(path) as f: for line in f: line = line.strip() @@ -234,15 +271,35 @@ for path in sorted(glob(os.path.join(data_dir, "*.jsonl"))): try: rec = json.loads(line) except json.JSONDecodeError: + corrupted += 1 continue # Skip retirement records when surfacing if rec.get("type") == "retirement": continue captures.append(rec) + if corrupted: + print("cma: skipped {} corrupted line(s) in {}".format(corrupted, os.path.basename(path)), file=sys.stderr) def matches(rec): if type_filter and rec.get("type") != type_filter: return False + + # Decisions: applies-when matches against context keywords. A decision + # with applies-when="auth db" surfaces whenever surface or file context + # contains "auth" or "db", even if the decision's own surface differs. + # This closes the decision-surfacing loop (decisions surface at action + # time when conditions match, not just by stored surface field). + if rec.get("type") == "decision": + applies_when = (rec.get("applies_when") or "").lower() + if applies_when: + keywords = [k for k in applies_when.replace(",", " ").split() if k] + context_parts = [] + if surface_filter: context_parts.append(surface_filter.lower()) + if file_filter: context_parts.append(file_filter.lower()) + context_str = " ".join(context_parts) + if context_str and any(kw in context_str for kw in keywords): + return True + if surface_filter and rec.get("surface") != surface_filter: return False if file_filter: @@ -260,6 +317,7 @@ filtered = filtered[:limit] if should_log: event_id = "{}-{}".format(time.strftime("%Y%m%d-%H%M%S", time.gmtime()), os.urandom(4).hex()) event = { + "schema_version": "1.0", "type": "surface_event", "id": event_id, "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), @@ -329,6 +387,7 @@ groups = Counter() total = 0 misses_path = os.path.join(data_dir, "misses.jsonl") +corrupted = 0 if os.path.exists(misses_path): with open(misses_path) as f: for line in f: @@ -338,11 +397,15 @@ if os.path.exists(misses_path): try: rec = json.loads(line) except json.JSONDecodeError: + corrupted += 1 continue key = (rec.get("surface", ""), rec.get("fm", "")) groups[key] += 1 total += 1 +if corrupted: + print("cma: skipped {} corrupted line(s) in misses.jsonl".format(corrupted), file=sys.stderr) + if total == 0: print("No misses recorded yet.") sys.exit(0) @@ -447,17 +510,86 @@ PYEOF } cmd_stats() { - local view="" recurrence_mode=false leaks_mode=false + local view="" recurrence_mode=false leaks_mode=false behavior_mode=false while [[ $# -gt 0 ]]; do case "$1" in --rejections) view="rejection"; shift ;; --preventions) view="prevention"; shift ;; --recurrence) recurrence_mode=true; shift ;; --leaks) leaks_mode=true; shift ;; + --behavior) behavior_mode=true; shift ;; *) echo "cma stats: unknown flag: $1" >&2; usage_for stats; exit 1 ;; esac done + if $behavior_mode; then + python3 - "$CMA_DIR" <<'PYEOF' +import json, os, sys +from collections import defaultdict + +data_dir = sys.argv[1] +misses_path = os.path.join(data_dir, "misses.jsonl") + +if not os.path.exists(misses_path): + print("No misses recorded yet.") + sys.exit(0) + +with_texture = [] +total = 0 +with open(misses_path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + except json.JSONDecodeError: + continue + total += 1 + if rec.get("intended") or rec.get("corrected"): + with_texture.append(rec) + +if not with_texture: + print("{} miss(es) recorded; none have intended/corrected texture yet.".format(total)) + print("Capture with --intended and --corrected to enable behavior-layer analysis.") + sys.exit(0) + +groups = defaultdict(list) +for rec in with_texture: + key = (rec.get("surface", ""), rec.get("fm", "")) + groups[key].append(rec) + +print("Behavior-layer signals from {} miss(es) ({} with texture):".format(total, len(with_texture))) +print() + +sorted_groups = sorted(groups.items(), key=lambda x: len(x[1]), reverse=True) + +def truncate(text, n=70): + text = " ".join(text.split()) + return text if len(text) <= n else text[:n-3] + "..." + +for (surface, fm), records in sorted_groups: + s = surface if surface else "(no surface)" + f = fm if fm else "(no fm)" + print(" surface={} fm={} ({} with texture)".format(s, f, len(records))) + + recent = max(records, key=lambda r: r.get("timestamp", "")) + intended = recent.get("intended", "").strip() + corrected = recent.get("corrected", "").strip() + + if intended: + print(" intended: {}".format(truncate(intended))) + if corrected: + print(" corrected: {}".format(truncate(corrected))) + print() + +print("Behavior-layer analysis: each group shows where the operator had to pivot") +print("from an initial impulse to a different action. Recurring pivots indicate") +print("patterns where surfaced warnings consistently change operator behavior.") +PYEOF + return 0 + fi + if $leaks_mode; then python3 - "$CMA_DIR" <<'PYEOF' import json, os, sys @@ -468,6 +600,7 @@ def load_jsonl(path): records = [] if not os.path.exists(path): return records + corrupted = 0 with open(path) as f: for line in f: line = line.strip() @@ -476,7 +609,10 @@ def load_jsonl(path): try: records.append(json.loads(line)) except json.JSONDecodeError: + corrupted += 1 continue + if corrupted: + print("cma: skipped {} corrupted line(s) in {}".format(corrupted, os.path.basename(path)), file=sys.stderr) return records misses = load_jsonl(os.path.join(data_dir, "misses.jsonl")) @@ -538,6 +674,7 @@ groups = Counter() total = 0 misses_path = os.path.join(data_dir, "misses.jsonl") +corrupted = 0 if os.path.exists(misses_path): with open(misses_path) as f: for line in f: @@ -547,11 +684,15 @@ if os.path.exists(misses_path): try: rec = json.loads(line) except json.JSONDecodeError: + corrupted += 1 continue key = (rec.get("surface", ""), rec.get("fm", "")) groups[key] += 1 total += 1 +if corrupted: + print("cma: skipped {} corrupted line(s) in misses.jsonl".format(corrupted), file=sys.stderr) + if total == 0: print("No misses recorded yet.") sys.exit(0) @@ -603,6 +744,38 @@ PYEOF # Help and dispatch # --------------------------------------------------------------------------- +cmd_init() { + mkdir -p "$CMA_DIR" + cat > "$CMA_DIR/README.md" <] [--file ] [--type ] [--limit ] cma distill [--scope ] [--surface ] | --retire

| --review - cma stats [--rejections] [--leaks] [--preventions] [--recurrence] + cma stats [--rejections] [--leaks] [--preventions] [--recurrence] [--behavior] + +Setup: + cma init Create the data directory with a README Other: cma --version cma --help Specification: DESIGN.md in this repository. +Architecture: ARCHITECTURE.md in this repository. +Schema: DATA.md in this repository. Methodology: https://github.com/Clarethium/lodestone Data directory: \$CMA_DIR (default: ~/.cma) @@ -637,6 +815,7 @@ case "${1:-}" in surface) shift; cmd_surface "$@" ;; distill) shift; cmd_distill "$@" ;; stats) shift; cmd_stats "$@" ;; + init) shift; cmd_init "$@" ;; --version|-v) echo "cma $VERSION" ;; --help|-h|help|"") cmd_help ;; *) diff --git a/cma-mcp/CHANGELOG.md b/cma-mcp/CHANGELOG.md new file mode 100644 index 0000000..b720d17 --- /dev/null +++ b/cma-mcp/CHANGELOG.md @@ -0,0 +1,114 @@ +# Changelog (cma-mcp) + +All notable changes to **cma-mcp** are documented in this file. + +cma-mcp ships in this repository alongside the canonical bash cma +reference implementation. The two components release independently: + +- bash cma's release history is in the repository's + [CHANGELOG.md](../CHANGELOG.md) at root. +- This file tracks cma-mcp's release history. + +Version tags carry the component prefix (`cma-mcp-0.1.0` for this +component, `cma-1.0.0` for bash cma). + +The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) +and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +--- + +## [Unreleased] (0.1.0-dev) + +### Added + +- Initial cma-mcp 0.1.0 reference implementation. Python ≥3.10, + MCP protocol manual JSON-RPC over stdio (no SDK dependency, per + [STRATEGY DD-2](../STRATEGY.md) / [DECISIONS AD-001](../DECISIONS.md)). +- Subprocess wrapper around the canonical bash cma binary + (STRATEGY DD-1 / DECISIONS AD-004): argv-array, no shell + interpolation, 5-second timeout per call (AD-003). +- Seven tools mirroring bash cma's seven primitives: `cma_miss`, + `cma_decision`, `cma_reject`, `cma_prevented`, `cma_distill` + (modes: default / retire / review), `cma_surface` (instrumented + query; logs `surface_events.jsonl` for `cma stats --leaks` + validation), `cma_stats` (views: default / leaks / recurrence / + preventions / rejections / behavior). +- Four resources for read-only context: `cma://decisions`, + `cma://rejections`, `cma://core`, `cma://stats`. +- Three-section payload (`analysis` + `agent_guidance` + + `provenance`) on every tool response and resource read. + Adversarial tests pin the structure (STRATEGY DD-5). +- Methodology-agnostic substrate (STRATEGY DD-4): `--fm` is + opaque. No Lodestone vocabulary bundled. +- Schema-version handling (DECISIONS AD-002): records with + `schema_version: "1.0"` are native; legacy records (no + schema_version field) parse leniently; unknown schema_version + surfaces in `provenance`. +- `--version` install fingerprint emitting server_version, + protocol version, git_sha (with `+dirty` flag if working tree + dirty), cma_binary_version (from `cma --version`), python + version, absolute script path. The git_sha resolves via two + paths: a runtime probe of the script's directory (works in + development clones) and a build-time bake into `_build_info.py` + via `setup.py` (works for PyPI installs where the runtime probe + has no `.git` to read). CI sets the `CMA_MCP_BUILD_SHA` env var + before `python -m build` so PEP 517 build isolation does not + drop the SHA. +- `--test` offline sanity check: prints the full three-section + payload for a canned tool call without requiring an MCP client + handshake. +- Initialize handshake carrying the standard MCP fields plus a + top-level `instructions` field with cross-tool orientation prose + (matches frame-check-mcp's pattern). +- pytest suite (48 cases) covering protocol conformance, + subprocess wrapping, JSONL parsing, three-section payload + determinism, install-fingerprint git_sha fallback, adversarial + inputs (boundary, malformed, argv-injection-resistance probe), + and wire-protocol subprocess roundtrips + (`tests/test_mcp_wire.py` — closes + `docs/ANTICIPATED_CRITIQUES.md` C-8). Coverage in CI scopes the + eight runtime modules; reported number is a floor (subprocess + paths in wire tests are not counted by pytest-cov without a + sitecustomize hook). +- `docs/ARCHITECTURE.md`: module map, data flow for tool calls + and resource reads, three-section payload contract, subprocess + discipline, JSONL read tolerance, install fingerprint two-path + resolution. Reading map for new contributors. +- `docs/FAQ.md` and `docs/TROUBLESHOOTING.md`: conceptual and + operator-side gotchas, MCP-client config patterns across Claude + Desktop / Cursor / Cline / Continue.dev, the four-command + diagnostic loop, and reproducible bug-report template. +- `bench.py`: latency benchmark mirroring bash cma's `bench.sh` + shape — measures wire-level round-trip latency for each tool + and resource through real stdin/stdout pipes against a + 100-capture synthetic corpus. Reveals the wrapper itself adds + essentially zero overhead; subprocess-bound calls inherit + cma's latency. +- Publish workflow (`.github/workflows/publish-mcp.yml`) builds + the wheel + sdist on `cma-mcp-X.Y.Z` tag pushes, validates with + twine, smoke-tests the installed wheel against the baked SHA, + and stages the artifacts. PyPI / TestPyPI upload steps are + intentionally commented out pending the lift checklist + documented in the workflow header. +- CI wheel-install smoke step in `tests-mcp.yml` builds and + installs the wheel into a clean virtualenv on every push, + catching packaging regressions (missing modules, broken entry + points, dropped license-files, dropped `_build_info.py`) that + the editable-install pytest path cannot see. + +### Notes + +- 0.1.0 is the first release. PyPI publication is gated on the DOI + allocation from Zenodo and a final pre-flight conformance pass + against the installed wheel. Until publication, install from + source: `pip install -e .` from this directory. +- Versioning convention: PEP 440 pre-release markers (`.dev0`) + decorate the underlying semver M.m.p during the dev-build + window. At lift, the suffix drops and `pyproject.toml` aligns + character-for-character with `SERVER_VERSION` in + `mcp_server.py`. + +--- + +*cma-mcp internal prototype work prior to 0.1.0 is not documented +here.* diff --git a/cma-mcp/CITATION.cff b/cma-mcp/CITATION.cff new file mode 100644 index 0000000..af80032 --- /dev/null +++ b/cma-mcp/CITATION.cff @@ -0,0 +1,57 @@ +cff-version: 1.2.0 +message: >- + If you use cma-mcp in academic, applied, or compliance work, please + cite it using the metadata below. The methodology canon (Lodestone) + and the executable companion (cma) are separately citeable artifacts + with their own canonical references. +title: "cma-mcp: Model Context Protocol distribution for the compound practice loop" +type: software +authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + website: "https://blog.clarethium.com" +repository-code: "https://github.com/Clarethium/cma" +url: "https://github.com/Clarethium/cma/tree/main/cma-mcp" +abstract: >- + cma-mcp is the Model Context Protocol server that exposes the cma + compound practice loop to any MCP-compatible AI client (Claude + Desktop, Cursor, Cline, Continue.dev, and others). The server is a + thin distribution wrapper around the canonical cma reference + implementation; vocabulary, methodology, and the loop's design live + in Lodestone (the methodology canon) and cma (the bash reference + implementation). cma-mcp's contribution is reach: bringing the same + capture-and-surface discipline to operator environments where the + cma CLI's shell hooks do not run. +keywords: + - mcp + - model-context-protocol + - compound-practice + - failure-capture + - decision-tracking + - prevention + - lodestone + - cma + - clarethium +license: Apache-2.0 +version: "0.1.0" +date-released: "2026-05-06" +references: + - type: software + title: "cma: executable compound practice loop" + authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + url: "https://github.com/Clarethium/cma" + license: Apache-2.0 + notes: "The canonical bash reference implementation that cma-mcp wraps. cma-mcp invokes its CLI as a subprocess; the data substrate (`~/.cma/*.jsonl`) is shared between cma and cma-mcp." + - type: article + title: "Lodestone: canonical operator methodology for AI-coupled work" + authors: + - family-names: Lucic + given-names: Lovro + orcid: "https://orcid.org/0009-0008-9976-6933" + url: "https://github.com/Clarethium/lodestone" + license: CC-BY-4.0 + notes: "The methodology canon. Defines stance, the loop, calibration, altitude, failure shapes, quality levels, surface protocols, and compound practice (Section VIII). cma-mcp is methodology-agnostic at the substrate level; operators tag captures with a methodology's vocabulary (Lodestone's FM-1..10 or otherwise) by passing it through as opaque strings." diff --git a/cma-mcp/LICENSE b/cma-mcp/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/cma-mcp/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/cma-mcp/NOTICE b/cma-mcp/NOTICE new file mode 100644 index 0000000..313fad0 --- /dev/null +++ b/cma-mcp/NOTICE @@ -0,0 +1,63 @@ +cma project (cma + cma-mcp) +Copyright 2026 Lovro Lucic + +Published under Clarethium (https://github.com/Clarethium). + +This product includes software developed by Lovro Lucic. + +Components +---------- + +This repository ships two components, both Apache-2.0 licensed: + +- **cma** (repository root): the canonical bash reference + implementation of the compound practice loop. Seven primitives + (miss, decision, reject, prevented, surface, distill, stats), + texture preservation, recurrence detection, leak detection. + Action-time injection via Claude Code hooks (PreToolUse + + SessionStart) and shell preexec wrappers (zsh native, bash via + bash-preexec). +- **cma-mcp** (cma-mcp/ subdirectory): the Python Model Context + Protocol distribution wrapper. Exposes the same seven primitives + and four context-resource surfaces to MCP-compatible AI clients + (Claude Desktop, Cursor, Cline, Continue.dev). Subprocess + wrapper around the bash cma binary; methodology-agnostic + substrate; three-section payload discipline. + +Licensing +--------- + +Code in this repository is licensed under Apache 2.0; see LICENSE +at repository root. + +External works referenced +------------------------- + +Reference methodology lives in Lodestone +(https://github.com/Clarethium/lodestone), licensed CC-BY 4.0. +Neither cma nor cma-mcp bundles Lodestone content. Operators who +tag captures with a methodology's failure-mode vocabulary (such as +Lodestone's FM-1..10) are responsible for citing that methodology +in their own work. + +Verification substrate referenced in design documents is Touchstone +(https://github.com/Clarethium/touchstone), Apache 2.0 (library) +and CC-BY 4.0 (Standard). + +Companion projects in the Clarethium body +----------------------------------------- + +- Lodestone: canonical operator methodology (CC-BY 4.0) +- Touchstone: model-independent measurement substrate +- frame-check-mcp: applied vehicle, sibling MCP using Touchstone + (https://github.com/Clarethium/frame-check-mcp) + +Sibling-MCP pattern conformance +------------------------------- + +cma-mcp's three-section payload discipline, governance scaffolding, +and adversarial test architecture follow the conventions established +by frame-check-mcp. Where this project diverges (e.g., bundling the +MCP wrapper alongside the wrapped binary in one repository rather +than as a separate sibling repo), the divergence is named in +DECISIONS.md. diff --git a/cma-mcp/README.md b/cma-mcp/README.md new file mode 100644 index 0000000..bdd316e --- /dev/null +++ b/cma-mcp/README.md @@ -0,0 +1,208 @@ +# cma-mcp + +[![tests-mcp](https://github.com/Clarethium/cma/actions/workflows/tests-mcp.yml/badge.svg?branch=main)](https://github.com/Clarethium/cma/actions/workflows/tests-mcp.yml) +[![codeql](https://github.com/Clarethium/cma/actions/workflows/codeql.yml/badge.svg?branch=main)](https://github.com/Clarethium/cma/actions/workflows/codeql.yml) +[![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://pypi.org/project/cma-mcp/) +[![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://github.com/Clarethium/cma/blob/main/LICENSE) +[![Companions](https://img.shields.io/badge/Clarethium-empire-blue.svg)](https://github.com/Clarethium) + +The Model Context Protocol distribution layer for [cma](https://github.com/Clarethium/cma#readme), +Clarethium's executable compound practice loop. + +## Where this lives + +cma-mcp is one component of the [cma project](https://github.com/Clarethium/cma). +The repository root holds the canonical bash cma reference +implementation; this `cma-mcp/` subdirectory holds the Python +wrapper that exposes the same loop to MCP-compatible AI clients. +The two components release independently: + +- **bash cma**: see the [parent README](https://github.com/Clarethium/cma#readme) for the CLI + surface, install, and Claude Code / shell hook integrations. +- **cma-mcp**: this README, focused on the PyPI installation and + MCP client configuration. + +Cross-cutting governance (license, citation, security, strategy, +contribution) lives at the repository root. + +## What this is + +Most MCP servers expose new capability. cma-mcp exposes an +existing capability (bash cma's seven primitives) to a wider set +of operator environments: Claude Desktop, Cursor, Cline, +Continue.dev, and any other MCP-compatible client. The contribution +is reach. Drift is the named enemy: cma-mcp invokes the canonical +bash cma binary as a subprocess for every captured action, so +cma-mcp picks up cma's evolution automatically and never diverges +from the 1.0 reference implementation. + +## Quickstart + +cma-mcp wraps the canonical bash cma binary. Install bash cma first +from the [parent repository](https://github.com/Clarethium/cma#readme), then confirm it is on +`PATH`: + + cma --help + +Install cma-mcp from PyPI: + + pip install cma-mcp + +Point your MCP client at the installed entry point. For Claude +Desktop, add to `claude_desktop_config.json`: + + { + "mcpServers": { + "cma": { + "command": "cma-mcp" + } + } + } + +Restart the client. Then in any conversation: *"Record a miss: I +claimed verified without testing the cross-tenant write path"*, or +*"What active rejections do I have?"*, or *"What does cma stats +show for prevention/miss ratio over the last 30 days?"* + +For Cursor, Cline, Continue.dev, and other MCP-compatible clients, +the same pattern applies (point the client at the `cma-mcp` +command; the stdio handshake runs). + +## What it exposes + +**Seven tools** mirroring bash cma's seven primitives: + +| Tool | Wraps | When the agent invokes it | +|---|---|---| +| `cma_miss` | `cma miss` | A failure happened that may recur | +| `cma_decision` | `cma decision` | A non-trivial architectural choice was made | +| `cma_reject` | `cma reject` | An option was eliminated and should not be silently rebuilt | +| `cma_prevented` | `cma prevented` | A surfaced warning actually changed behavior | +| `cma_distill` | `cma distill` (modes: default / retire / review) | Promote, retire, or preview a distilled learning | +| `cma_surface` | `cma surface` | Pull relevant prior captures before acting (logs surface event for leak detection) | +| `cma_stats` | `cma stats` (views: default / leaks / recurrence / preventions / rejections / behavior) | Inspect loop-closing evidence | + +**Four resources** for read-only context: + +| URI | Reads | +|---|---| +| `cma://decisions` | Active decisions in the last 180 days | +| `cma://rejections` | Active rejections in the last 30 days | +| `cma://core` | Active core learnings (retired filtered) | +| `cma://stats` | Default stats summary | + +## Three-section payload + +Every tool response and resource read returns a JSON payload with +three top-level sections: + + { + "analysis": { ... data and stdout }, + "agent_guidance": { what to tell the user, how to cite }, + "provenance": { server_version, license, cost: 0.0, ... } + } + +The `agent_guidance` and `provenance` sections exist because an +agent passing cma-mcp output to a user without attribution would +strip the construct-honesty discipline that makes the loop's +evidence worth citing. Surfacing "how to cite faithfully" inside +the payload is the structure that carries the discipline forward. +This convention is established by [frame-check-mcp](https://github.com/Clarethium/frame-check-mcp); +cma-mcp inherits it. Adversarial tests in +`tests/test_payload_determinism.py` pin the structure. + +## Approach + +**Subprocess over reimplementation.** cma-mcp invokes bash cma as +a subprocess for every captured action. cma-mcp does not +reimplement cma's seven primitives in Python. See +[STRATEGY.md](https://github.com/Clarethium/cma/blob/main/STRATEGY.md) §6 DD-1 for the durable decision. + +**Methodology-agnostic substrate.** cma stores `--fm` (failure +mode) as an opaque string. cma-mcp does not bundle any +methodology's failure-mode catalog. Operators tag captures with +their methodology's vocabulary (Lodestone's FM-1..10 or otherwise) +by passing the tag through; for autoclassification, set +`CMA_FM_CLASSIFIER` per cma's plugin convention. + +**No external runtime dependencies.** cma-mcp implements MCP +directly in-repo using JSON-RPC 2.0 over stdio. No third-party MCP +SDK; no pip-installed runtime requirements beyond the Python +standard library. (Test-time deps: pytest.) + +## Platform support + +Linux and macOS native. Windows operators run cma-mcp under WSL +because cma-mcp shells out to the bash cma binary. This is +deliberate ([STRATEGY.md](https://github.com/Clarethium/cma/blob/main/STRATEGY.md) DD-3): canonical-cma +alignment beats standalone Python reach. Any operator running an +MCP-compatible AI client on Windows is reasonably expected to have +WSL available. + +## Install fingerprint + + cma-mcp --version + +Emits a one-line JSON fingerprint with `server_version`, +`protocol_version`, `git_sha` (with `+dirty` flag if the working +tree has uncommitted changes), `cma_binary_version` (probed from +`cma --version`), `python` version, and `script` path. Lets an +operator confirm the cma-mcp install configured in their MCP +client is the expected one. + +## Offline sanity check + + cma-mcp --test + +Prints the full three-section payload for a `cma_stats` (default +view) call against the operator's `~/.cma/` data. Useful to verify +pipeline wiring and that the cma binary is reachable. + +## Documentation + +Project-level (repository root): + +- [README.md](https://github.com/Clarethium/cma#readme): cma's CLI overview +- [STRATEGY.md](https://github.com/Clarethium/cma/blob/main/STRATEGY.md): durable decisions and empire positioning +- [DECISIONS.md](https://github.com/Clarethium/cma/blob/main/DECISIONS.md): architectural decisions log +- [GOVERNANCE.md](https://github.com/Clarethium/cma/blob/main/GOVERNANCE.md): BDFL governance, named curator +- [CONTRIBUTING.md](https://github.com/Clarethium/cma/blob/main/CONTRIBUTING.md): contribution mechanics, DCO sign-off +- [SECURITY.md](https://github.com/Clarethium/cma/blob/main/SECURITY.md): threat model and reporting +- [LICENSE](https://github.com/Clarethium/cma/blob/main/LICENSE) (Apache-2.0), [NOTICE](https://github.com/Clarethium/cma/blob/main/NOTICE), [CITATION.cff](https://github.com/Clarethium/cma/blob/main/CITATION.cff) + +cma-mcp specific (this directory): + +- [CHANGELOG.md](CHANGELOG.md): cma-mcp release history +- [docs/MCP_SERVER.md](docs/MCP_SERVER.md): protocol reference +- [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md): module layout, data + flow, contracts; reading map for new contributors +- [docs/FAQ.md](docs/FAQ.md): conceptual questions, install gotchas, + cross-client config patterns +- [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md): symptoms and + fixes; the diagnostic loop is the four-command sequence at the top +- [docs/ANTICIPATED_CRITIQUES.md](docs/ANTICIPATED_CRITIQUES.md): + self-enumerated adversarial readings of cma-mcp's design +- [docs/VALIDATION_PROGRAM.md](docs/VALIDATION_PROGRAM.md): empirical + validation plan for whether the loop closes through MCP exposure + +## Running tests + +From this directory: + + pip install -e .[test] + python3 -m pytest -q + +The suite covers MCP protocol conformance, three-section payload +determinism, JSONL parsing tolerance, and subprocess-wrapper +isolation (argv-array discipline, timeout discipline). Tests that +require the bash cma binary skip when it is not on `PATH`. + +## Issues + +Bug reports and feature requests at +[github.com/Clarethium/cma/issues](https://github.com/Clarethium/cma/issues). +Use the `cma-mcp` label or include `[cma-mcp]` in the title to +disambiguate from bash cma issues. + +Security issues go to `lovro.lucic@gmail.com` per +[SECURITY.md](https://github.com/Clarethium/cma/blob/main/SECURITY.md). diff --git a/cma-mcp/bench.py b/cma-mcp/bench.py new file mode 100644 index 0000000..5d99439 --- /dev/null +++ b/cma-mcp/bench.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +"""bench.py - Latency benchmarks for cma-mcp's MCP wrapper layer. + +bash cma's `bench.sh` measures the cma binary's own latency +(`cma surface`, `cma stats`, `cma-pre --check`) against a synthetic +100-capture data set. cma-mcp adds two layers of overhead on top of +that: `subprocess.run` to spawn the cma binary per call, and +JSON-RPC framing over stdio between the MCP client and cma-mcp. + +This benchmark measures cma-mcp's actual wire-level round-trip +latency: the MCP client writes one JSON-RPC line on cma-mcp's stdin +and reads one JSON-RPC line back from cma-mcp's stdout. The result +is the operator's actual cost: "how much does an MCP call cost +compared to running cma directly". + +Run from cma-mcp/: + python3 bench.py + +Requires the cma binary on PATH (the wrapper spawns it). Uses a +disposable CMA_DIR populated with 100 synthetic captures so results +do not depend on the operator's corpus. + +The benchmark spawns one cma-mcp subprocess for the whole run (the +expected MCP-client lifecycle) and reuses it across all measured +calls. Each tool call goes through: + + client → JSON-RPC over stdin → cma-mcp dispatch + → subprocess.run([cma, *argv]) → cma binary I/O + → three-section payload composition + → JSON-RPC over stdout → client + +so the reported latency captures the full operator-experienced cost. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +import tempfile +import time +from pathlib import Path +from statistics import median + + +HERE = Path(__file__).parent.resolve() +WARMUP_ITERATIONS = 3 +MEASURED_ITERATIONS = 20 +P95_INDEX = int(MEASURED_ITERATIONS * 0.95) +SURFACES = ("auth", "payments", "db", "api", "ui", "docs", "test", "general") +FMS = ("fm-1", "fm-2", "fm-3", "fm-4", "fm-5") + + +def populate_corpus(cma_binary: str, cma_dir: str, n: int = 100) -> None: + """Populate a fresh CMA_DIR with synthetic captures.""" + env = os.environ.copy() + env["CMA_DIR"] = cma_dir + for i in range(n): + surface = SURFACES[i % len(SURFACES)] + fm = FMS[i % len(FMS)] + subprocess.run( + [cma_binary, "miss", + f"synthetic bench miss {i} for cma-mcp wire-latency probe", + "--surface", surface, "--fm", fm], + env=env, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + timeout=5, + ) + + +class WireClient: + """Drive cma-mcp over real stdin/stdout pipes.""" + + def __init__(self, cma_dir: str): + env = os.environ.copy() + env["CMA_DIR"] = cma_dir + self.proc = subprocess.Popen( + [sys.executable, str(HERE / "mcp_server.py")], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + bufsize=0, + ) + self._next_id = 1 + self._initialize() + + def _initialize(self) -> None: + self.call("initialize", {"protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "bench.py", "version": "0"}}) + self.notify("notifications/initialized", {}) + + def call(self, method: str, params: dict | None = None) -> dict: + req_id = self._next_id + self._next_id += 1 + line = json.dumps({"jsonrpc": "2.0", "id": req_id, "method": method, + "params": params or {}}) + self.proc.stdin.write((line + "\n").encode()) + self.proc.stdin.flush() + reply_line = self.proc.stdout.readline() + if not reply_line: + stderr = self.proc.stderr.read().decode(errors="replace") + raise RuntimeError(f"cma-mcp closed pipe; stderr=\n{stderr}") + reply = json.loads(reply_line) + if reply.get("id") != req_id: + raise RuntimeError(f"id mismatch: sent {req_id}, got {reply.get('id')}") + if "error" in reply: + raise RuntimeError(f"server error: {reply['error']}") + return reply["result"] + + def notify(self, method: str, params: dict | None = None) -> None: + line = json.dumps({"jsonrpc": "2.0", "method": method, + "params": params or {}}) + self.proc.stdin.write((line + "\n").encode()) + self.proc.stdin.flush() + + def close(self) -> None: + try: + self.proc.stdin.close() + except Exception: + pass + try: + self.proc.wait(timeout=2) + except subprocess.TimeoutExpired: + self.proc.kill() + + +def time_ms(fn) -> int: + t0 = time.perf_counter() + fn() + return int((time.perf_counter() - t0) * 1000) + + +def bench(client: WireClient, label: str, fn) -> tuple[int, int, int]: + for _ in range(WARMUP_ITERATIONS): + fn(client) + samples = sorted(time_ms(lambda: fn(client)) for _ in range(MEASURED_ITERATIONS)) + return samples[0], samples[len(samples) // 2], samples[min(P95_INDEX, MEASURED_ITERATIONS - 1)] + + +def report(label: str, mn: int, md: int, p95: int) -> None: + print(f" {label:<46} min={mn:>4}ms median={md:>4}ms p95={p95:>4}ms") + + +def main() -> int: + cma_binary = shutil.which("cma") + if cma_binary is None: + print("ERROR: cma binary not on PATH. Install bash cma first " + "(see ../README.md).", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory(prefix="cma-mcp-bench-") as cma_dir: + print(f"Populating disposable CMA_DIR={cma_dir} with 100 captures...", + flush=True) + populate_corpus(cma_binary, cma_dir, n=100) + print() + print("MCP wire-latency benchmarks (lower is better)") + print("Each call: stdin write → cma-mcp dispatch → cma binary spawn → " + "stdout read") + print() + + client = WireClient(cma_dir) + try: + cases: list[tuple[str, callable]] = [ + ("ping", + lambda c: c.call("ping")), + ("tools/list", + lambda c: c.call("tools/list")), + ("resources/list", + lambda c: c.call("resources/list")), + ("tools/call cma_stats (default)", + lambda c: c.call("tools/call", + {"name": "cma_stats", + "arguments": {"view": "default"}})), + ("tools/call cma_stats (recurrence)", + lambda c: c.call("tools/call", + {"name": "cma_stats", + "arguments": {"view": "recurrence"}})), + ("tools/call cma_surface (surface=auth)", + lambda c: c.call("tools/call", + {"name": "cma_surface", + "arguments": {"surface": "auth", "limit": 5}})), + ("tools/call cma_miss", + lambda c: c.call("tools/call", + {"name": "cma_miss", + "arguments": { + "description": "bench probe", + "surface": "general", + "fm": "fm-1", + }})), + ("resources/read cma://decisions", + lambda c: c.call("resources/read", + {"uri": "cma://decisions"})), + ("resources/read cma://stats", + lambda c: c.call("resources/read", + {"uri": "cma://stats"})), + ] + for label, fn in cases: + mn, md, p95 = bench(client, label, fn) + report(label, mn, md, p95) + finally: + client.close() + + print() + print(f"Iterations per case: {MEASURED_ITERATIONS} measured " + f"(after {WARMUP_ITERATIONS} warmup).") + print("Reported latency is the full client-perceived round trip " + "(stdin write → stdout read).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cma-mcp/cma_jsonl.py b/cma-mcp/cma_jsonl.py new file mode 100644 index 0000000..b792e7c --- /dev/null +++ b/cma-mcp/cma_jsonl.py @@ -0,0 +1,165 @@ +""" +JSONL reader for cma's data directory. + +cma writes append-only JSON Lines files to `$CMA_DIR/` (default +`~/.cma/`): + + misses.jsonl + decisions.jsonl + rejections.jsonl + preventions.jsonl + core.jsonl + surface_events.jsonl + +Schema is documented in cma's DATA.md. This module reads those files +without writing — cma-mcp's tools always shell out to bash cma for +writes; reads happen here directly because they are simpler and +faster than spawning a subprocess for every resource fetch. + +Tolerance discipline matches bash cma's own (CHANGELOG, "Tolerant +read"): corrupt lines are skipped with a counter the caller can +surface, never raised as an exception. The whole corpus stays usable +even when individual records are damaged. + +Schema-version handling follows DECISIONS AD-002: records with +`schema_version: "1.0"` are native; legacy records with no +schema_version field parse leniently; records with any other +schema_version surface the unknown version to the caller for +inclusion in `provenance`. +""" + +from __future__ import annotations + +import json +import os +from dataclasses import dataclass, field +from typing import Iterator + + +# The set of schema_versions cma-mcp parses natively. Records with a +# schema_version not in this set are still parsed (best-effort) but +# the unknown version is reported up to the caller so it lands in +# `provenance` for the read. +NATIVE_SCHEMA_VERSIONS: frozenset[str] = frozenset({"1.0"}) + + +def cma_dir() -> str: + """Return the resolved cma data directory.""" + explicit = os.environ.get("CMA_DIR") + if explicit: + return os.path.expanduser(explicit) + return os.path.expanduser("~/.cma") + + +@dataclass +class ReadResult: + """Result of reading a JSONL file: records plus parse provenance.""" + + records: list[dict] = field(default_factory=list) + corrupt_lines: int = 0 + legacy_records: int = 0 + unknown_schema_versions: set[str] = field(default_factory=set) + file_existed: bool = False + file_path: str = "" + + def merge_into(self, other: "ReadResult") -> None: + """Accumulate counts from another read into this one.""" + self.records.extend(other.records) + self.corrupt_lines += other.corrupt_lines + self.legacy_records += other.legacy_records + self.unknown_schema_versions |= other.unknown_schema_versions + + +def read_jsonl(filename: str) -> ReadResult: + """ + Read a single JSONL file from cma's data directory. + + Returns a ReadResult with the parsed records and a provenance + summary. Missing files return an empty result with `file_existed + = False` rather than raising. Corrupt lines (invalid JSON) are + skipped and counted. + """ + path = os.path.join(cma_dir(), filename) + result = ReadResult(file_path=path) + + if not os.path.exists(path): + return result + + result.file_existed = True + + with open(path, "r", encoding="utf-8") as fh: + for line in fh: + line = line.strip() + if not line: + continue + try: + record = json.loads(line) + except json.JSONDecodeError: + result.corrupt_lines += 1 + continue + + if not isinstance(record, dict): + # NDJSON files always carry objects, never bare values + result.corrupt_lines += 1 + continue + + sv = record.get("schema_version") + if sv is None: + result.legacy_records += 1 + elif sv not in NATIVE_SCHEMA_VERSIONS: + result.unknown_schema_versions.add(str(sv)) + + result.records.append(record) + + return result + + +def read_misses() -> ReadResult: + return read_jsonl("misses.jsonl") + + +def read_decisions() -> ReadResult: + return read_jsonl("decisions.jsonl") + + +def read_rejections() -> ReadResult: + return read_jsonl("rejections.jsonl") + + +def read_preventions() -> ReadResult: + return read_jsonl("preventions.jsonl") + + +def read_core() -> ReadResult: + return read_jsonl("core.jsonl") + + +def read_surface_events() -> ReadResult: + return read_jsonl("surface_events.jsonl") + + +def parse_provenance(result: ReadResult) -> dict: + """ + Render a ReadResult as a provenance dict suitable for inclusion + in a three-section payload's `provenance.data_source` field. + """ + prov: dict = { + "file": result.file_path, + "exists": result.file_existed, + "records_parsed": len(result.records), + } + if result.corrupt_lines > 0: + prov["corrupt_lines_skipped"] = result.corrupt_lines + if result.legacy_records > 0: + prov["legacy_records_no_schema_version"] = result.legacy_records + if result.unknown_schema_versions: + prov["unknown_schema_versions"] = sorted(result.unknown_schema_versions) + return prov + + +def iter_records_sorted_by_timestamp_desc(records: list[dict]) -> Iterator[dict]: + """Yield records newest first by timestamp field, missing-last.""" + keyed = [(r.get("timestamp", ""), r) for r in records] + keyed.sort(key=lambda kv: kv[0], reverse=True) + for _, r in keyed: + yield r diff --git a/cma-mcp/cma_subprocess.py b/cma-mcp/cma_subprocess.py new file mode 100644 index 0000000..c4786d7 --- /dev/null +++ b/cma-mcp/cma_subprocess.py @@ -0,0 +1,243 @@ +""" +bash cma subprocess wrapper. + +Every write through cma-mcp ultimately runs the canonical `cma` bash +binary as a subprocess. Reads (resource fetches) bypass this module +and parse JSONL files directly via cma_jsonl, but reads that have +side effects (notably `cma_surface`, which writes +surface_events.jsonl for `cma stats --leaks` validation) go through +here too. + +Discipline (DECISIONS AD-003, AD-004): + +- Every call uses subprocess.run with shell=False and an argv array. + Operator input never gets concatenated into a shell-interpreted + string, so argument injection is structurally impossible. +- Every call carries a 5-second timeout. A hung cma process must not + hang the MCP server. +- Errors from cma (non-zero exit, timeout, missing binary) become + CmaError exceptions carrying the partial command and stderr. + Callers translate them into MCP isError responses. + +The wrapper intentionally does not parse cma's stdout. Stdout shape +varies between cma verbs (`cma miss` returns a confirmation; +`cma stats --leaks` returns a table). The dispatch in mcp_server.py +includes the raw stdout in the response payload's `analysis` block +so callers can read what cma reported. For structured reads, the +caller follows up with a JSONL fetch via cma_jsonl. +""" + +from __future__ import annotations + +import os +import re +import shutil +import subprocess +from dataclasses import dataclass + + +# Per DECISIONS AD-003: every cma call carries this timeout. +DEFAULT_TIMEOUT_SECONDS = 5 + +# The bash cma binary is resolved from PATH by default. Operators +# can override with CMA_BIN to point at a specific cma checkout. +_CMA_BIN_OVERRIDE = os.environ.get("CMA_BIN") + + +class CmaError(Exception): + """ + Raised when a cma subprocess invocation fails. + + Attributes + ---------- + argv : list of str + The full argv that was attempted. Always starts with the + resolved cma binary path. Useful for debugging in operator + logs. + returncode : int or None + The cma process exit status. None if the process did not + return (timeout, missing binary, etc.). + stdout : str + Captured stdout up to the failure point. May be empty. + stderr : str + Captured stderr up to the failure point. cma writes + operator-facing diagnostics here. + reason : str + One of: "missing_binary", "timeout", "non_zero_exit", + "unexpected". + """ + + def __init__( + self, + argv: list[str], + returncode: int | None, + stdout: str, + stderr: str, + reason: str, + ): + self.argv = argv + self.returncode = returncode + self.stdout = stdout + self.stderr = stderr + self.reason = reason + super().__init__(self._format_message()) + + def _format_message(self) -> str: + cmd = " ".join(self.argv) + return f"cma subprocess failed ({self.reason}): {cmd}" + + +@dataclass +class CmaResult: + """Successful cma invocation result.""" + + argv: list[str] + returncode: int + stdout: str + stderr: str + + +def resolve_cma_binary() -> str: + """Resolve the bash cma binary path. Raises CmaError if missing.""" + if _CMA_BIN_OVERRIDE: + if not os.path.isfile(_CMA_BIN_OVERRIDE): + raise CmaError( + argv=[_CMA_BIN_OVERRIDE], + returncode=None, + stdout="", + stderr=f"CMA_BIN override points to a path that is not a file: {_CMA_BIN_OVERRIDE}", + reason="missing_binary", + ) + return _CMA_BIN_OVERRIDE + + found = shutil.which("cma") + if not found: + raise CmaError( + argv=["cma"], + returncode=None, + stdout="", + stderr="cma binary not found on PATH; install from https://github.com/Clarethium/cma", + reason="missing_binary", + ) + return found + + +def run_cma(args: list[str], timeout: int | None = None) -> CmaResult: + """ + Invoke `cma ` as a subprocess. + + Parameters + ---------- + args : list of str + Arguments passed to cma. Do not include the binary itself; + this function resolves it. + timeout : int, optional + Override the default 5-second timeout. None uses + DEFAULT_TIMEOUT_SECONDS. + + Returns + ------- + CmaResult + On success (returncode == 0). + + Raises + ------ + CmaError + On any failure (missing binary, timeout, non-zero exit). + """ + binary = resolve_cma_binary() + argv = [binary] + list(args) + t = timeout if timeout is not None else DEFAULT_TIMEOUT_SECONDS + + try: + proc = subprocess.run( + argv, + shell=False, + capture_output=True, + text=True, + timeout=t, + ) + except subprocess.TimeoutExpired as exc: + raise CmaError( + argv=argv, + returncode=None, + stdout=exc.stdout or "", + stderr=exc.stderr or "", + reason="timeout", + ) from exc + except OSError as exc: + raise CmaError( + argv=argv, + returncode=None, + stdout="", + stderr=str(exc), + reason="unexpected", + ) from exc + + if proc.returncode != 0: + raise CmaError( + argv=argv, + returncode=proc.returncode, + stdout=proc.stdout, + stderr=proc.stderr, + reason="non_zero_exit", + ) + + return CmaResult( + argv=argv, + returncode=proc.returncode, + stdout=proc.stdout, + stderr=proc.stderr, + ) + + +# Match a "Version X.Y[.Z][-suffix]" line in `cma help` output for cma +# binaries (or forks) that do not support the `--version` flag. +# Anchored to the literal token "Version" with at least one space, so a +# stray "version" word in prose never matches. +_HELP_VERSION_PATTERN = re.compile(r"^Version\s+([0-9][\w.\-]*)$", re.MULTILINE) + + +def cma_version() -> str | None: + """ + Probe the cma binary for its version string. + + Strategy: + 1. Try `cma --version` (canonical cma's documented surface). + Returns the raw stdout on success. + 2. Fall back to parsing `cma help` for a `Version X.Y.Z` line + (handles forks or older cma installs that use subcommand-only + syntax without a `--version` flag). + 3. Return None when neither probe succeeds (graceful: operators + see `cma_binary_version: null` in the install fingerprint + rather than a crash). + + Used by `cma-mcp --version` to show operators which cma binary + their MCP server is wrapping. The function never raises; failure + surfaces as None. + """ + # Primary probe: --version flag. + try: + result = run_cma(["--version"]) + out = result.stdout.strip() + if out: + return out + except CmaError: + pass + + # Fallback: scan `cma help` output. Some cma binaries return + # exit 1 from `cma help` despite emitting useful output, so we + # parse from CmaError as well as CmaResult. + help_out = "" + try: + result = run_cma(["help"]) + help_out = result.stdout + except CmaError as exc: + help_out = exc.stdout + + if help_out: + match = _HELP_VERSION_PATTERN.search(help_out) + if match: + return f"cma {match.group(1)}" + + return None diff --git a/cma-mcp/docs/ANTICIPATED_CRITIQUES.md b/cma-mcp/docs/ANTICIPATED_CRITIQUES.md new file mode 100644 index 0000000..9a1e5b7 --- /dev/null +++ b/cma-mcp/docs/ANTICIPATED_CRITIQUES.md @@ -0,0 +1,221 @@ +# Anticipated critiques + +This document enumerates the strongest adversarial readings of +cma-mcp's design, named openly so a reader does not have to +discover them through use. Each critique is followed by the +project's current position. Some critiques are accepted (a +trade-off cma-mcp deliberately pays); some are deflected (a +misreading of the design); some are open questions (cma-mcp does +not yet have an answer beyond candor). + +The discipline of self-enumeration is shared with [frame-check-mcp's +ANTICIPATED_CRITIQUES.md](https://github.com/Clarethium/frame-check-mcp/blob/master/docs/ANTICIPATED_CRITIQUES.md). +Both projects publish their weak points up front because surfacing +limits is the price of construct-honesty. + +--- + +## C-1: "Why ship an MCP for cma at all? Bash cma already has Claude Code hooks." + +**Position: accepted as a real question; answered by reach.** + +bash cma's Claude Code hooks (PreToolUse + SessionStart) cover one +operator environment. cma-mcp covers the rest: Claude Desktop, +Cursor, Cline, Continue.dev, and any future MCP-compatible client. +Operators using these clients have no path to cma's compound +practice loop without the MCP layer. The contribution is reach, +not new capability. STRATEGY §3. + +The trade-off: each new MCP client expands cma-mcp's surface even +though cma-mcp itself stays thin. That is by design. + +## C-2: "A subprocess wrapper is fragile. Why not reimplement cma in Python?" + +**Position: accepted as a trade-off, deliberately paid.** + +Reimplementation would lift the bash dependency and remove +subprocess-launch overhead. It would also duplicate cma's +seven-primitive surface (98-test suite as of 1.0), texture +preservation, recurrence detection, and leak-detection logic in a +parallel codebase that lags whenever bash cma evolves. Drift is the +enemy. + +The wrapper is fragile in a narrow sense: a missing `cma` binary +fails the tool dispatch. cma-mcp surfaces this clearly (the +`isError` payload names `reason: missing_binary` and points the +operator at the install URL). No silent failures. + +STRATEGY DD-1; further evidence that thin distribution wrappers are +the empire-correct shape: frame-check-mcp's similar choice to keep +the analysis library separate from the MCP packaging. + +## C-3: "Bash dependency means Windows-native operators are excluded." + +**Position: accepted; WSL is the documented stance.** + +STRATEGY DD-3 is explicit. Operators on a pure Windows host with no +WSL cannot run cma-mcp. The README and `pyproject.toml` classifiers +document the platform stance up front so no operator reaches +install-time confusion. Every operator running an MCP-compatible AI +client on Windows is reasonably expected to have WSL available, and +Claude operators specifically tend to use WSL because Claude Code's +own integration patterns favor it. + +If WSL universality fails — if a meaningful population of Windows +operators using MCP clients lacks WSL — the answer is a separate +Python-native cma reimplementation, not a hybrid. cma-mcp would +remain the thin wrapper. + +## C-4: "The three-section payload is verbose. Plain stdout would be cleaner." + +**Position: deflected. Verbosity is the construct-honesty tax.** + +Plain stdout would let an agent paraphrase cma's output as the +agent's own observation, stripping the citation discipline that +makes the loop's evidence worth keeping. The agent_guidance and +provenance blocks are not decoration; they are the structure that +carries the discipline forward to whatever the agent shows the +user. + +frame-check-mcp ships the same pattern (its +`how_to_cite_faithfully` field exists for the same reason). Both +projects accept the verbosity tax because the alternative is +construct-honesty erosion at the agent boundary. + +## C-5: "cma-mcp does not parse cma stdout into structured records. Why?" + +**Position: deferred to v0.2; v0.1 passes stdout through unchanged.** + +cma's stdout shape varies by verb (a confirmation for `cma miss`, a +table for `cma stats --leaks`). Robust parsing requires knowing each +verb's exact output format, which couples cma-mcp to bash cma's +formatting choices. v0.1 includes the raw stdout in +`analysis.cma_stdout` so callers see what cma reported; v0.2 may add +structured `analysis.record` extraction for the capture verbs whose +output format is stable. + +Forward-compat: when bash cma changes its output format, cma-mcp +v0.1 keeps working (the raw text passes through). v0.2's structured +extraction would require versioning. + +## C-6: "cma-mcp is methodology-agnostic but the README references Lodestone repeatedly." + +**Position: deflected. Reference is not bundling.** + +Methodology-agnostic means the substrate (cma's data files, cma-mcp's +schemas) does not encode any specific methodology's vocabulary. +Operators using Lodestone tag captures with FM-1..10; operators using +a different methodology tag with that methodology's catalog. cma-mcp +does not validate, expand, or interpret the tag. + +The README and tool descriptions reference Lodestone because it is +the canonical Clarethium methodology and the empirical case study +operators are most likely to read. References are pointers, not +enforcement. STRATEGY DD-4; pinned by the +`test_tool_descriptions_reference_lodestone_for_methodology` test +which forbids bundling FM definitions while requiring the pointer. + +## C-7: "Single-curator BDFL governance is fragile. What if Lovro disappears?" + +**Position: accepted; named-authorship is the v0.x credibility asset.** + +GOVERNANCE.md is honest: cma-mcp is a single-curator project. The +move to a named-reviewer model is a `STRATEGY.md` durable decision +trigger when a sustained external contributor exists. Until then, +the named curator is the credibility asset (matching frame-check-mcp's +explicit position). + +Disappearance is a real risk for any single-curator open-source +project. The mitigations: Apache-2.0 license (anyone can fork); +public methodology in Lodestone (non-cma-mcp operators retain access +to the canon); bash cma is canonical and lives independently. + +## C-8: "cma-mcp's tests do not exercise the live MCP wire protocol over a real subprocess pair." + +**Position: accepted gap; in-process dispatch tests cover the +handler logic, but full wire-level adversarial testing is a v0.2 +target.** + +Current tests invoke the dispatcher's request handlers directly +(via `conftest.call_handler`). They cover schema validation, +three-section payload discipline, JSONL parsing tolerance, and +error envelopes. They do not exercise the JSON-RPC parser through +real stdin/stdout pipes against a separate process. + +frame-check-mcp's `test_mcp_adversarial.py` runs subprocess +roundtrips at the wire level (rapid-fire sequential stdio, +determinism normalization). cma-mcp v0.2 will add an equivalent. + +## C-9: "Schema-version handling is permissive: legacy records and unknown schema versions both pass." + +**Position: deliberate, with surface in provenance.** + +A strict schema would reject legacy records (no schema_version +field) and break operators with pre-1.0 cma data. cma-mcp's read +path accepts both legacy and unknown-schema-version records, but +counts each in the `provenance.data_source` block so the caller +sees the parse-trust signal. The discipline is "tolerant read, +honest provenance" — match cma's own stance (`cma`'s tolerant-read +discipline, CHANGELOG "Tolerant read"). + +If a future schema-version is genuinely incompatible, cma-mcp will +add a strict check at parse time and emit an `isError` for that +specific schema. Until then, permissiveness with full provenance is +the right balance. + +## C-11: "Same-repo with cma is a monorepo and monorepos rot. Why not a separate Clarethium/cma-mcp like frame-check-mcp?" + +**Position: deflected. Wrapper-of relationships belong with their +wrapped subject; substrate-uses relationships do not.** + +cma-mcp wraps cma as a thin subprocess layer: every flag is a tool +argument, every JSONL field a parser concern, the surface-events +schema load-bearing for leak detection. That coupling makes drift +the failure mode and the wrapper-vs-wrapped repo split a +coordination tax the empire's compounding logic actively works +against (DECISIONS AD-008). + +frame-check-mcp's separate-repo pattern doesn't apply because it +*uses* Touchstone as a substrate. Touchstone can ship a new +measurement layer without forcing a frame-check-mcp release; the +relationship is loose enough that separation has value. +Substrate-uses and wrapper-of are structurally different and +accept different repo shapes. + +The monorepo cost is mitigated by: two release tracks via tag +prefixing (`cma-1.x`, `cma-mcp-0.x`); per-component CHANGELOG; +path-filtered CI (`tests-mcp.yml` only fires on `cma-mcp/**` +changes, `test.yml` only on bash cma changes); clear component +partition in the repo tree. The repo can be split via +`git filter-repo` if a future evidence point demands it +(AD-008 §Reversibility). + +**Concrete worked example of the drift the consolidation prevents.** +cma's DATA.md schema names a JSONL field `revisit_when` on +rejection records. If cma adds a new optional field tomorrow +(say `revisit_after_date`) and the wrapper lives in a separate +repo, three states become possible: cma-mcp parses the new field +(but cma's release lags), cma writes the new field but cma-mcp +ignores it, or both update but releases interleave. Same-repo +collapses the three to one: a PR that adds the field touches both +sides and a single review confirms the alignment. + +## C-10: "`cma_surface` is a tool, not a resource. That's surprising for a read-only query." + +**Position: deliberate; the side effect is load-bearing.** + +`cma surface` writes to `surface_events.jsonl` for every invocation. +The leak-detection view (`cma_stats view=leaks`) joins these events +against subsequent misses to flag failures that occurred despite a +warning being surfaced. Without the log, leak detection cannot +function. + +Modeling `cma_surface` as a resource would either suppress the log +(breaking leak detection) or have the resource act as a side-effect +producer (violating the MCP norm that resources are read-only). +Tool semantics fit the actual behavior. DECISIONS AD-007. + +--- + +*Open critiques worth raising are welcome at the issue tracker. +Construct-honesty improves when the questions arrive earlier.* diff --git a/cma-mcp/docs/ARCHITECTURE.md b/cma-mcp/docs/ARCHITECTURE.md new file mode 100644 index 0000000..e5437b7 --- /dev/null +++ b/cma-mcp/docs/ARCHITECTURE.md @@ -0,0 +1,344 @@ +# cma-mcp Architecture + +This document is the orientation map for cma-mcp: how the modules fit +together, where the contracts live, and which structural decisions +hold the design in place. New contributors should read this before +editing code; reviewers should use it as the diff-against-claim +when evaluating changes. + +For the protocol surface (every tool argument, every resource URI), +see [`MCP_SERVER.md`](MCP_SERVER.md). For the project's reasoning +about its own design, see [`ANTICIPATED_CRITIQUES.md`](ANTICIPATED_CRITIQUES.md). +For the longer-arc strategy, see [`STRATEGY.md`](../../STRATEGY.md) +and [`DECISIONS.md`](../../DECISIONS.md) at the repository root. + +--- + +## What cma-mcp is + +cma-mcp is a **subprocess wrapper** that exposes the seven primitives +of bash cma to MCP-compatible AI clients. It does not reimplement +the loop. It does not own a corpus. It does not interpret captures. +For every tool call it spawns the canonical `cma` binary with an +argv array, captures the result, and composes a three-section +payload (`analysis` + `agent_guidance` + `provenance`) that the +caller's agent passes through to its user. + +The contribution is **reach**, not new capability. STRATEGY DD-1 +locks subprocess-over-reimplementation; DECISIONS AD-008 locks +same-repo-as-cma; DECISIONS AD-001 locks no-MCP-SDK-dependency. + +--- + +## The five layers + +``` + MCP client (Claude Desktop, Cursor, Cline, ...) + │ stdio (JSON-RPC 2.0 line-delimited) + ▼ + ┌──────────────────────────────────────────────────────────────────┐ + │ L1 protocol mcp_protocol.py Dispatcher, Request, │ + │ parse_line, write_response │ + ├──────────────────────────────────────────────────────────────────┤ + │ L2 schema mcp_schema.py seven tools + four │ + │ resources, JSONSchema │ + │ inputs, descriptions │ + ├──────────────────────────────────────────────────────────────────┤ + │ L3 dispatch mcp_server.py _handle_* per method: │ + │ initialize, tools/list, │ + │ tools/call, resources/list, │ + │ resources/read, ping │ + ├──────────────────────────────────────────────────────────────────┤ + │ L4 data path cma_subprocess.py argv-array invocation, │ + │ cma_jsonl.py 5s timeout, raw stdout │ + │ mcp_resources.py capture; tolerant JSONL │ + │ reader for read-only views │ + ├──────────────────────────────────────────────────────────────────┤ + │ L5 composition mcp_compose.py three-section payload: │ + │ analysis + agent_guidance + │ + │ provenance │ + └──────────────────────────────────────────────────────────────────┘ + │ spawn / read + ▼ + bash cma binary (~/.cma/*.jsonl) +``` + +Plus two cross-cutting modules: + +- `mcp_log.py` — stderr-only structured logging. Stdout is reserved + for JSON-RPC. STRATEGY DD-6. +- `_build_info.py` — auto-generated at build time by `setup.py`, + bakes the git SHA into the wheel so `--version` reports a real + value after `pip install` (see "Install fingerprint" below). + +--- + +## Two execution modes + +The `cli()` entry point in `mcp_server.py` routes by argument: + +| Invocation | Path | Use | +|---|---|---| +| `cma-mcp` | builds dispatcher, runs `Dispatcher.serve()` blocking on stdin | normal MCP client startup | +| `cma-mcp --version` | `_emit_version_fingerprint()` prints one-line JSON, exits | operator confirms which install is wired up | +| `cma-mcp --test` | `_emit_test_payload()` runs a canned `cma_stats` against the operator's `~/.cma/`, prints the full three-section payload, exits | offline pipeline check without an MCP client handshake | +| `cma-mcp --help` | argparse default | discoverability | + +Unknown flags exit non-zero with a usage message — the CLI never +silently swallows misconfiguration. + +--- + +## Data flow: a tool call + +``` +1. client writes a JSON-RPC line on cma-mcp's stdin: + {"jsonrpc":"2.0","id":42,"method":"tools/call", + "params":{"name":"cma_miss","arguments":{...}}} + +2. Dispatcher.serve() reads the line. parse_line() returns a + Request. _dispatch_one() looks up the handler: + _handle_tools_call(params) + +3. _handle_tools_call validates `name` against mcp_schema.TOOLS, + raises ProtocolError(INVALID_PARAMS) if unknown. Routes to the + per-tool handler: + _handle_cma_miss(arguments) + +4. _handle_cma_miss (in mcp_server.py) calls + _build_capture_argv("miss", arguments, ["surface","fm","files", + "intended","corrected","excerpt"]). The result is an argv list + like ["miss", "", "--surface", "auth", "--fm", + "FM-3"]. Operator-supplied values land in distinct argv slots + only — never concatenated into a shell-interpreted string + (DECISIONS AD-004). + +5. _wrap_cma_call invokes cma_subprocess.run_cma(argv) which calls + subprocess.run([cma_binary] + argv, capture_output=True, + timeout=5, shell=False). If cma is missing it raises CmaError + with reason="missing_binary". If cma exits non-zero it raises + with the stderr. + +6. On success, mcp_compose.compose_capture_response(...) builds + the three-section payload: + + { + "analysis": {"tool": "cma_miss", "cma_stdout": "..."}, + "agent_guidance": {"what_this_tool_does": "...", + "what_this_tool_does_not_do": "...", + "how_to_cite_faithfully": "..."}, + "provenance": {"server_version": "0.1.0", + "license": "Apache-2.0", + "cost_usd": 0.0, + "cma_argv": ["/usr/local/bin/cma", + "miss", "", ...], + "cma_returncode": 0, + "cma_binary_version": "cma 1.0.0", + "git_sha": "abc123...", + "deterministic": true, + "timestamp": "2026-05-07T..."} + } + +7. Dispatcher.write_response() encodes it as a tools/call result + on stdout. The JSON-RPC reply lands on the client's stdin. + +8. The agent reading the response uses `agent_guidance` to compose + its message to the user. `provenance.citation` carries the + canonical reference string for any quoting. +``` + +Resource reads (`resources/read`) take a similar path through +`_handle_resources_read` → `mcp_resources.read_*` → `parse_provenance` +→ `mcp_compose.compose_resource_response`. They never invoke the +cma binary; they read JSONL directly with a tolerant parser. + +--- + +## The three-section payload contract + +Every tool response and every resource read returns: + +``` +{ + "analysis": , + "agent_guidance": { + "what_this_tool_does": "...", + "what_this_tool_does_not_do": "...", + "how_to_cite_faithfully": "..." + }, + "provenance": +} +``` + +**Why three sections, not one.** A plain stdout passthrough lets the +agent paraphrase cma's output as its own observation, stripping the +attribution that makes the loop's evidence worth citing. The +`agent_guidance` section is the structure that carries the citation +discipline forward into whatever the agent shows the user. The +`provenance` section is the structure that lets a downstream auditor +reproduce or verify the call. + +Adversarial determinism tests in `tests/test_payload_determinism.py` +pin the shape on every tool and resource. Any change that affects +the payload requires updating those tests. + +The convention is inherited from frame-check-mcp; STRATEGY DD-5 +locks it for cma-mcp. + +--- + +## Subprocess discipline + +Three rules govern every cma invocation. They are enforced +structurally, not by review. + +1. **argv-array, never shell=True** (DECISIONS AD-004). + `subprocess.run([cma_binary, *argv], shell=False)`. Operator- + supplied strings land in single argv slots. cma's argument + parser treats them as data; bash does not interpolate them. + The argv-injection-resistance test in + `tests/test_subprocess.py` writes a malicious filename + (`'; rm benign'`) and confirms cma sees it as one argv slot, + not as a shell command. + +2. **5-second timeout on every call** (DECISIONS AD-003). + `subprocess.run(..., timeout=5)`. On timeout, cma_subprocess + raises `CmaError(reason="timeout")`. The MCP server stays + responsive; the caller decides whether to retry. Matches bash + cma's own failure-isolated `hooks/cma-pre` discipline. + +3. **Missing binary surfaces as `isError`, not silent failure** + (DECISIONS AD-006-style discipline). When `cma` is not on + `PATH`, the response carries + `{"isError": true, "reason": "missing_binary", + "install": "https://github.com/Clarethium/cma#readme"}`. + No silent skip. + +The cma binary path resolution: `shutil.which("cma")`. Operators +who need a non-default path set the `CMA_BINARY` environment +variable and `cma_subprocess.run_cma` uses it. (Tests cover both +paths.) + +--- + +## JSONL read tolerance + +`cma_jsonl.py` reads the operator's data directly for the four +read-only resource URIs (`cma://decisions`, `cma://rejections`, +`cma://core`, `cma://stats`). The reader is **tolerant** — DECISIONS +AD-002 — and reports the parse-trust signal in `provenance`: + +``` +ReadResult( + records=[], + schema_version_native=, + schema_version_legacy=, + schema_version_unknown=<{seen_value: count, ...}>, + parse_failures=, +) +``` + +`mcp_resources.parse_provenance` rolls these counts into the +`provenance.data_source` block on every resource read. A caller who +wants strict reading checks the counts; the wrapper itself does not +reject. This matches bash cma's own tolerant-read discipline. + +If a future schema_version is genuinely incompatible, cma-mcp will +add a strict gate at parse time and emit `isError` for that schema +specifically. Until then, permissive read with honest provenance is +the right balance (see ANTICIPATED_CRITIQUES C-9). + +--- + +## Install fingerprint + +`cma-mcp --version` emits one-line JSON: + +``` +{"server_name": "cma-mcp", + "server_version": "0.1.0", + "protocol_version": "2024-11-05", + "git_sha": "", + "cma_binary_version": "", + "python": "3.12.3", + "script": "/path/to/installed/mcp_server.py"} +``` + +`git_sha` resolves via two paths: + +1. **Runtime probe** (`_git_sha()` in `mcp_server.py`): + `git rev-parse HEAD` against the script's directory. Works in + development clones (`pip install -e .`). +2. **Build-time bake** (fallback): + `from _build_info import BUILD_GIT_SHA`. `setup.py` writes + `_build_info.py` at sdist/wheel build time, preferring the + `CMA_MCP_BUILD_SHA` environment variable (CI sets it to + `$GITHUB_SHA`) over a local `git rev-parse`. Works for installs + from a wheel where the runtime probe sees no `.git`. + +Without the bake, PyPI installs would silently report +`git_sha: null` and the forensic-traceability claim would degrade +on the most common install path. + +--- + +## Module map + +| File | Lines | Responsibility | +|---|---:|---| +| `mcp_server.py` | ~550 | CLI entry point; `cli()` argument parser; per-tool/per-method handlers; install fingerprint; `_build_dispatcher()` wiring. | +| `mcp_protocol.py` | ~230 | JSON-RPC 2.0 over stdio: `Request`, `parse_line`, `write_response`, `Dispatcher`, `ProtocolError`, JSON-RPC error codes. | +| `mcp_schema.py` | ~520 | Tool and resource catalogs: descriptions, `inputSchema` JSONSchemas, parameter validation. The agent-facing surface lives here. | +| `mcp_compose.py` | ~340 | Three-section payload composers, per tool + per resource. `configure_provenance()` once at startup; helpers thereafter. | +| `mcp_resources.py` | ~210 | Resource-read business logic: read JSONL, sort newest-first, attach `parse_provenance`, return composed payload. | +| `cma_subprocess.py` | ~245 | The single ingress to bash cma. `run_cma`, `cma_version`, error shapes (`CmaError` with `reason` field). | +| `cma_jsonl.py` | ~165 | Tolerant JSONL reader. Counts schema-version trust signal. No interpretation. | +| `mcp_log.py` | ~70 | Stderr-only structured logging. Stdout reserved for JSON-RPC. | +| `_build_info.py` | 2 | Auto-generated by `setup.py` at build time. `BUILD_GIT_SHA = "..."`. Gitignored. | + +Test suite (`tests/`): 36 cases plus the wheel-install smoke step +in CI. + +--- + +## What this design rejects + +- **No MCP SDK dependency.** Manual JSON-RPC keeps the runtime + surface to the Python standard library. STRATEGY DD-2. +- **No methodology vocabulary bundled.** `--fm` is opaque. + Operators tag with their methodology's catalog (Lodestone's + FM-1..10 or otherwise). STRATEGY DD-4. +- **No transports beyond stdio.** SSE / WebSocket / HTTP are out of + scope; gateways exist for multi-client deployment. DECISIONS + AD-005. +- **No reimplementation of cma's primitives.** Every flag is a + subprocess argv slot. STRATEGY DD-1. + +These rejections are not "we'll get to them later." They are the +shape that keeps cma-mcp thin and drift-resistant. + +--- + +## How to extend safely + +A surface change (adding a tool, adding a resource, changing a +schema) touches **four files together**: + +1. `mcp_schema.py` — add the entry to `TOOLS` or `RESOURCES`. +2. `mcp_server.py` — add the `_handle_*` dispatcher and wire it. +3. `tests/test_mcp_server.py` — add a conformance test that + exercises the surface. +4. `docs/MCP_SERVER.md` — document the operator-facing reference. + +A PR that moves only one of the four is incomplete. Reviewers will +ask for the others. See [`CONTRIBUTING.md`](../../CONTRIBUTING.md) +for the full PR checklist. + +--- + +*Updated when the architecture changes. Last revision tracks the +build-time SHA bake mechanism and the wire-protocol subprocess test +landing.* diff --git a/cma-mcp/docs/FAQ.md b/cma-mcp/docs/FAQ.md new file mode 100644 index 0000000..a3529fe --- /dev/null +++ b/cma-mcp/docs/FAQ.md @@ -0,0 +1,223 @@ +# cma-mcp FAQ + +Common questions about cma-mcp, ordered roughly by where operators +hit them in setup. For symptoms-and-fixes, see +[`TROUBLESHOOTING.md`](TROUBLESHOOTING.md). + +--- + +## Conceptual + +### What does cma-mcp give me that bash cma doesn't? + +Reach. Bash cma already integrates with Claude Code (via the +PreToolUse and SessionStart hooks) and with shell environments +(zsh native preexec, bash via bash-preexec). cma-mcp brings the same +seven primitives to MCP-compatible AI clients that have no shell +hook surface: Claude Desktop, Cursor, Cline, Continue.dev, and +others. The contribution is which audiences can run the loop, not +new loop semantics. STRATEGY DD-1. + +### Does cma-mcp add LLM cost? + +No. cma-mcp is a deterministic subprocess wrapper. Every response +carries `provenance.cost_usd: 0.0` and `provenance.deterministic: +true`. The agent that calls cma-mcp is the only LLM in the path, +and it pays its own normal token cost; cma-mcp does not call any +model. + +### Where does my data live? + +In `~/.cma/` (the canonical location used by bash cma). cma-mcp +never owns or relocates data; it shells out to bash cma which writes +to the operator's `~/.cma/` per its DATA.md schema. On WSL, that is +the WSL home (`/home//.cma/`), not the Windows side. + +The operator can override via `CMA_DIR=/some/other/path` and bash +cma honors it — cma-mcp passes the env through subprocess +inheritance. + +### Is cma-mcp methodology-specific? + +No. cma stores `--fm` as an opaque string. Operators using +[Lodestone](https://github.com/Clarethium/lodestone) tag captures +with FM-1..10; operators using a different methodology tag with +that catalog. cma-mcp does not validate, expand, or interpret the +tag. Tool descriptions reference Lodestone as the canonical +methodology but bundle no vocabulary. STRATEGY DD-4. + +--- + +## Install + +### Do I need bash cma installed before installing cma-mcp? + +Yes. cma-mcp wraps the canonical bash cma binary as a subprocess. +On startup, every tool call invokes `cma ...`. Without the +binary on `PATH`, every call returns `isError: true` with +`reason: missing_binary`. Install bash cma first per the +[parent README](https://github.com/Clarethium/cma#readme), then +`pip install cma-mcp`. + +### Where do I put the MCP client config? + +| Client | Config path | Block name | +|---|---|---| +| Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS), `%APPDATA%\Claude\claude_desktop_config.json` (Windows) | `mcpServers.cma` | +| Cursor | `~/.cursor/mcp.json` (or via Cursor settings UI) | `mcpServers.cma` | +| Cline | VS Code settings UI → Cline → MCP servers | `cma` | +| Continue.dev | `~/.continue/config.json` | `mcpServers.cma` | + +The block content is the same across clients — point at the +installed `cma-mcp` entry point: + +``` +{ + "mcpServers": { + "cma": { + "command": "cma-mcp" + } + } +} +``` + +### Can I run cma-mcp without a virtualenv (`pipx install`, etc.)? + +Yes. `pipx install cma-mcp` puts `cma-mcp` on the user-level PATH +and the MCP client config above works unchanged. `uv tool install +cma-mcp` works the same way. The wheel ships the `cma-mcp` console +script as the only entry point. + +### Why isn't there a Windows-native install? + +bash cma is the canonical implementation and uses `/bin/bash`. +Windows operators run cma-mcp under WSL, which gives them the same +binary on `PATH`. STRATEGY DD-3 documents the platform stance. +Operators on a pure Windows host with no WSL cannot run cma-mcp +today; that is the deliberate trade-off between canonical-binary +alignment and standalone Python reach. + +--- + +## Operation + +### How do I know cma-mcp is wired up correctly? + +Run `cma-mcp --version` directly in a terminal. It emits a one-line +JSON fingerprint with `server_version`, `protocol_version`, +`git_sha`, `cma_binary_version`, `python` version, and `script` +path. If `cma_binary_version` is `null`, bash cma is missing or +silent — fix that first. + +For a deeper check, run `cma-mcp --test`. It emits a full +three-section payload for `cma_stats` (default view) against your +real `~/.cma/` corpus — the same shape an MCP client would see for +that tool call, without needing to spin up a client. + +### Why does my agent paraphrase cma's output instead of quoting it? + +The agent guidance section of every payload includes +`how_to_cite_faithfully`: a one-line instruction telling the agent +exactly how to quote without smoothing the numbers. If the agent +still paraphrases, surface the issue with the agent's prompt +configuration rather than cma-mcp's payload — the discipline lives +in the agent's reading, not in the wire format. + +### Can I use cma-mcp and the bash hooks at the same time? + +Yes. They are independent integration paths over the same +underlying corpus (`~/.cma/*.jsonl`). The PreToolUse hook in Claude +Code surfaces priming context before tool calls; cma-mcp tools +surface or capture on demand from any MCP-compatible client. Both +write through bash cma's atomic-write discipline — captures from +either path interleave correctly. + +### How fast is each MCP call? + +Lightweight calls (ping, tools/list, resources/list) round-trip in +under 5ms. Subprocess-bound calls inherit bash cma's latency: +~50ms for `cma_stats` (default), ~5–15ms for `cma_surface` and +`cma_miss`. Run `python3 bench.py` from the cma-mcp directory for +numbers against your machine. The MCP wrapper itself adds +essentially zero overhead. + +### Is the schema stable across releases? + +The three-section payload contract (`analysis` + `agent_guidance` + +`provenance`) is stable across cma-mcp 0.x. Tool argument schemas +are stable within a major version (`SERVER_VERSION` — see +`mcp_server.py`). bash cma's JSONL data schema is stable across +the `1.0` line per its DATA.md. Schema changes that are not +backwards-compatible bump the relevant major version explicitly. + +--- + +## Limits + +### What's not in cma-mcp 0.1? + +- **Wire-protocol fuzzing.** v0.1 ships subprocess roundtrip tests + (`tests/test_mcp_wire.py`); broader fuzzing of the JSON-RPC + parser is a v0.2 target. +- **Structured stdout parsing** for capture verbs. cma-mcp passes + `cma_stdout` through unchanged; v0.2 may extract structured + records from verbs whose output format is stable. ANTICIPATED_ + CRITIQUES C-5. +- **Cancellation / progress notifications.** cma calls are + sub-second; the MCP cancellation surface and `progress` + notifications are out of scope for the current call shape. +- **Native Windows install.** WSL is the documented path. + +### Does cma-mcp validate that captures are well-formed? + +It validates the MCP-side input schema (every argument's type per +the `inputSchema` in `mcp_schema.py`) and surfaces validation +errors as `isError: true` with reason. It does not enforce +methodology rules (which `--fm` values are legal, what shapes a +"good" miss has). That belongs in the methodology layer +(Lodestone), not the substrate. STRATEGY DD-4. + +### What happens if cma writes to a corrupted JSONL line? + +Resource reads use a tolerant parser +(`cma_jsonl.read_jsonl`) that skips malformed lines and counts the +parse failures. The count surfaces in +`provenance.data_source.parse_failures` so the caller knows what +the trust signal is. This matches bash cma's own tolerant-read +discipline. ANTICIPATED_CRITIQUES C-9. + +--- + +## Citation + +### How do I cite cma-mcp? + +Every response carries `provenance.citation`: + +> `cma-mcp 0.1.0 (Clarethium, 2026). https://github.com/Clarethium/cma/tree/main/cma-mcp` + +Also in `CITATION.cff` at the repository root and in the project's +PyPI metadata. Once a Zenodo DOI is allocated, the citation will +include it. + +### Can I publish a paper using cma-mcp? + +Yes; the project is Apache-2.0 licensed and the methodology canon +(Lodestone) is CC-BY-4.0. Cite cma-mcp via the field above and +Lodestone separately if you reference its vocabulary. + +--- + +## Where things live + +- **Operator-facing reference:** [`MCP_SERVER.md`](MCP_SERVER.md) — + every tool argument, every resource URI, the exact response + shapes. +- **Architecture map:** [`ARCHITECTURE.md`](ARCHITECTURE.md) — + module layout, data flow, contracts. +- **Validation plan:** [`VALIDATION_PROGRAM.md`](VALIDATION_PROGRAM.md) — + what claims this project makes and how they are tested. +- **Self-criticism:** [`ANTICIPATED_CRITIQUES.md`](ANTICIPATED_CRITIQUES.md) — + the strongest readings against the design, named openly with + positions and trade-offs. +- **Symptoms and fixes:** [`TROUBLESHOOTING.md`](TROUBLESHOOTING.md). diff --git a/cma-mcp/docs/MCP_SERVER.md b/cma-mcp/docs/MCP_SERVER.md new file mode 100644 index 0000000..c7bded4 --- /dev/null +++ b/cma-mcp/docs/MCP_SERVER.md @@ -0,0 +1,200 @@ +# cma-mcp Server Reference + +This document is the authoritative reference for cma-mcp's MCP +protocol surface: the initialize handshake, tools/list, tools/call, +resources/list, resources/read, ping, and notifications. It covers +the request and response shapes a client sees. + +For the rationale behind these choices, see `STRATEGY.md` (durable +decisions) and `DECISIONS.md` (architectural decisions). + +## Transport + +Stdio. Each line on stdin is one JSON-RPC 2.0 request or +notification; each response is one line on stdout. Stderr is +reserved for cma-mcp's logs (timestamp + level + key=value); MCP +protocol traffic never touches stderr. + +## Initialize handshake + +Request: + + { + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "claude-desktop", "version": "1.x"} + } + } + +Response: + + { + "jsonrpc": "2.0", + "id": 1, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {"listChanged": false}, + "resources": {"listChanged": false, "subscribe": false} + }, + "serverInfo": {"name": "cma-mcp", "version": "0.1.0"}, + "instructions": "cma-mcp distributes the cma compound practice loop ..." + } + } + +The `instructions` field carries cross-tool orientation prose for +agents and for MCP clients whose UI surfaces the field. Names the +use case, the default invocation pattern, and the methodology-canon +boundary (Lodestone owns vocabulary; cma stores `--fm` opaque). + +## Tools + +`tools/list` returns seven tools. Each tool definition has +`name`, `title`, `description`, and `inputSchema`. The full schema +shapes are in `mcp_schema.py`. + +### Common shapes + +All tool results have `content` (a one-element array of `{type: +"text", text: }`) and `isError` (boolean). + +The text payload is always a three-section JSON document: + + { + "analysis": { ... }, + "agent_guidance": { ... }, + "provenance": { ... } + } + +`isError` is true when the analysis block carries an error (cma +binary missing, subprocess timeout, validation failure on a +required field). The error detail and reason are inside +`analysis.error` / `analysis.reason` / `analysis.detail`. + +### Surface labels + +The seven tools accept `surface` as an optional `string` parameter +(min 2, max 20 chars). cma's data substrate stores it as an opaque +label. Canonical examples used by the bash cma reference +implementation: `auth`, `db`, `docs`, `ui`, `infra`, `general`, +`git`. Custom values are accepted (e.g., `test`, `ml`, `frontend`, +`mobile`, `ops`). cma-mcp does not enforce a closed enum. + +### Failure-mode tags (`fm`) + +`cma_miss` and `cma_prevented` accept `fm` as an optional opaque +string. cma-mcp does not bundle Lodestone's FM-1..10 catalog +(STRATEGY DD-4); operators using a methodology with a canonical +catalog (such as Lodestone) pass that methodology's tag here. +Operators who want autoclassification at capture time wire the +`CMA_FM_CLASSIFIER` plugin per cma's CLI convention; cma-mcp +inherits the wiring transparently. + +### Tool list + +| Tool | Required params | Optional params | +|---|---|---| +| `cma_miss` | `description` (string, min 8) | `surface`, `fm`, `files`, `intended`, `corrected`, `excerpt` | +| `cma_decision` | `description` (string, min 15) | `surface`, `applies_when` | +| `cma_reject` | `description` (string, min 8) | `surface`, `revisit_when` | +| `cma_prevented` | `description` (string, min 8) | `miss_id`, `warning_id` | +| `cma_distill` | `mode` (enum: default / retire / review) | `description` (mode=default), `pattern` (mode=retire), `scope`, `surface` | +| `cma_surface` | (none) | `surface`, `file`, `type`, `limit` (int 1..50) | +| `cma_stats` | (none) | `view` (enum: default / leaks / recurrence / preventions / rejections / behavior) | + +## Resources + +`resources/list` returns four resources: + +| URI | mimeType | Lookback | Scope | +|---|---|---|---| +| `cma://decisions` | application/json | 180 days | All projects (cma 1.0 is single-project) | +| `cma://rejections` | application/json | 30 days | All projects | +| `cma://core` | application/json | indefinite | All scopes; retired filtered | +| `cma://stats` | application/json | per-view | All projects | + +`resources/read` returns the same three-section JSON shape as tool +calls, wrapped in `contents[0].text`. + +The `analysis.records` array on `cma://decisions`, `cma://rejections`, +and `cma://core` carries raw JSONL records as cma wrote them. The +`provenance.data_source` block reports parse outcomes: + + "data_source": { + "file": "/home/.../misses.jsonl", + "exists": true, + "records_parsed": 200, + "corrupt_lines_skipped": 0, + "legacy_records_no_schema_version": 12, + "unknown_schema_versions": [] + } + +## Error envelope + +JSON-RPC 2.0 errors. Standard codes: + +| Code | Meaning | +|---|---| +| -32700 | Parse error (malformed JSON) | +| -32600 | Invalid request (not a JSON-RPC 2.0 envelope) | +| -32601 | Method not found | +| -32602 | Invalid params | +| -32603 | Internal error | + +MCP-specific: + +| Code | Meaning | +|---|---| +| -32002 | Resource not found | + +When a tool dispatch produces a logical error (cma binary missing, +subprocess timeout, invalid argument), the response is a normal +`tools/call` result with `isError: true` rather than a JSON-RPC +error envelope. This matches the MCP spec recommendation: protocol +errors are for protocol violations; tool errors live in the tool's +content. + +## Pinned conformance + +`tests/test_mcp_server.py` and `tests/test_payload_determinism.py` +pin the surface contracts. The following are breaking changes that +require a CHANGELOG entry and a major or minor SERVER_VERSION bump: + +- Tool name added or removed +- Tool input schema changes (added required field, removed field, + changed type) +- Resource URI added or removed +- Three top-level payload sections changed in name or required + presence +- Provenance dropped any of: `server_name`, `server_version`, + `protocol_version`, `license`, `cost_usd`, `citation`, + `deterministic`, `timestamp` +- isError semantics for any tool + +Server version follows semver: + +- patch: bug fix, no schema change +- minor: new optional field in tool responses, new tool/resource +- major: any item from the breaking-changes list above + +## Install fingerprint (`--version`) + +`cma-mcp --version` emits one JSON line with: + + { + "server_name": "cma-mcp", + "server_version": "0.1.0", + "protocol_version": "2024-11-05", + "git_sha": "abc12345" or "abc12345+dirty" or null, + "cma_binary_version": "" or null, + "python": "3.12.3", + "script": "/abs/path/to/cma-mcp/mcp_server.py" + } + +`cma_binary_version` is null when cma is not on `PATH` or +`cma --version` returns non-zero. The MCP server still runs in that +case; tool calls that need cma fail with a structured error. diff --git a/cma-mcp/docs/TROUBLESHOOTING.md b/cma-mcp/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..b6203db --- /dev/null +++ b/cma-mcp/docs/TROUBLESHOOTING.md @@ -0,0 +1,264 @@ +# cma-mcp Troubleshooting + +Symptoms, diagnostics, fixes. Read top-down — the diagnostic loop +at the start gives the right answer for ~80% of issues. + +For "why" questions and conceptual orientation, see +[`FAQ.md`](FAQ.md). + +--- + +## The diagnostic loop + +When something is wrong, run these four commands in order. Each +one rules out a specific class of problem; the first failure +identifies the layer to fix. + +``` +1. cma --version # is bash cma installed and runnable? +2. cma stats # does bash cma read your corpus? +3. cma-mcp --version # is cma-mcp installed and importable? +4. cma-mcp --test # does the wrapper round-trip a real call? +``` + +If `1` fails: install bash cma per the +[parent README](https://github.com/Clarethium/cma#readme). + +If `2` fails (binary works but corpus errors): run `cma init` to +materialize `~/.cma/`, or check that `CMA_DIR` is not pointing +somewhere unexpected. + +If `3` fails: re-run `pip install cma-mcp` (or `pipx install +cma-mcp`). If the script is missing on PATH, the install location +is not on `PATH`. + +If `4` fails: the bug is in the wrapper layer or its connection to +cma. The error output names the layer (subprocess timeout, +parse failure, schema mismatch). + +--- + +## The MCP client cannot see cma-mcp + +**Symptom.** Claude Desktop / Cursor / Cline show no cma tools in +the tool list. + +**Cause hierarchy** (likely-first): + +1. **Config file path is wrong for the platform.** + - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json` + - Windows (Claude Desktop): `%APPDATA%\Claude\claude_desktop_config.json` + - Cursor: `~/.cursor/mcp.json` (or settings UI) + +2. **JSON syntax error in the config.** Validate with + `python3 -m json.tool < `. A trailing comma after + the last entry is the most common cause. + +3. **The client was not restarted after the config change.** MCP + servers are loaded at client start; a config change is invisible + until the next launch. + +4. **The `cma-mcp` command is not on the PATH the client sees.** + GUI applications do not always inherit shell PATH (Claude + Desktop on macOS is the canonical example). Use the absolute + path: + + ```json + { + "mcpServers": { + "cma": { + "command": "/Users/yourname/.local/bin/cma-mcp" + } + } + } + ``` + + Find the absolute path with `which cma-mcp`. + +5. **cma-mcp itself is failing on startup.** Run `cma-mcp --version` + directly. If it errors, that error is what the client sees. + +**Diagnostic.** Most clients log MCP server startup. For Claude +Desktop on macOS, the log is at +`~/Library/Logs/Claude/mcp-server-cma.log`. The first line of that +log tells you what failed. + +--- + +## Tool calls return "isError: true" with "missing_binary" + +**Symptom.** Every tool call returns: +``` +{"isError": true, "reason": "missing_binary", + "install": "https://github.com/Clarethium/cma#readme"} +``` + +**Cause.** The cma binary is not on the PATH that cma-mcp sees. + +**Fix.** + +```bash +which cma # confirm the binary is on YOUR PATH +echo "$PATH" # confirm what your PATH is +cma-mcp --version | python3 -m json.tool # check what cma-mcp sees +``` + +If `which cma` returns a path but `cma-mcp --version` reports +`cma_binary_version: null`, the MCP client is launching cma-mcp +without your shell PATH. Set the binary path explicitly via the +`CMA_BINARY` environment variable in the MCP client config: + +```json +{ + "mcpServers": { + "cma": { + "command": "cma-mcp", + "env": { + "CMA_BINARY": "/usr/local/bin/cma" + } + } + } +} +``` + +--- + +## Tool calls hang, then return "isError: true" with "timeout" + +**Symptom.** Calls that previously completed in milliseconds now +hang for 5 seconds and return: +``` +{"isError": true, "reason": "timeout"} +``` + +**Cause.** bash cma is taking longer than 5 seconds. This is a +hard ceiling enforced by cma-mcp (DECISIONS AD-003) so a hung cma +process does not hang the MCP server. + +**Diagnose.** + +```bash +time cma stats # baseline: should be sub-second +ls -la ~/.cma/ # check corpus size +wc -l ~/.cma/*.jsonl # individual file sizes +``` + +The most common causes: + +1. A JSONL file has grown into the millions of records. Aggregation + stats (`cma stats --leaks`, `--recurrence`) become O(N²) past + some threshold. Mitigate via `cma distill --retire ` + to graduate frequent recurrences into core learnings, or split + the corpus per project via `CMA_DIR`. +2. The file is on a slow / network filesystem. Move the corpus to a + local SSD path via `CMA_DIR`. +3. A corrupted record is causing cma's parser to thrash. Run + `cma stats` and look for `cma: skipped N corrupted line(s)` + warnings. + +--- + +## "Unknown schema_version" warnings on every read + +**Symptom.** Resource reads (`cma://decisions`, `cma://core`, etc.) +include `provenance.data_source.schema_version_unknown` with non- +zero counts. + +**Cause.** Records in your corpus carry a `schema_version` other +than `"1.0"`. cma-mcp parses them tolerantly (DECISIONS AD-002) so +the read still succeeds, but the trust signal flags the +unrecognized schema. + +**Fix.** Two paths. + +1. If the records came from a non-Clarethium cma fork (different + `schema_version` value), confirm the schema is read-compatible + with cma 1.0. Most additive changes are; check the fork's + schema docs. +2. If the records carry a future cma schema, upgrade to a cma + version that recognizes it. Until then, the `provenance.data_ + source.schema_version_unknown` count is the honest read signal. + +--- + +## `cma-mcp --version` reports `git_sha: null` + +**Symptom.** + +```json +{"server_name": "cma-mcp", ..., "git_sha": null, ...} +``` + +**Causes**, depending on install path. + +1. **From a wheel where `_build_info.py` was not bundled.** Should + not happen post 0.1.0 because `pyproject.toml` lists + `_build_info` in `py-modules` and `setup.py` writes the file. + If you are running an early-cut wheel, rebuild from source. +2. **Build-time SHA was unavailable.** `setup.py` writes empty + when `CMA_MCP_BUILD_SHA` is not set and there is no `.git` + directory accessible (e.g., a ZIP source download). Rebuild + inside a real clone. +3. **A development clone, not a wheel install, but the `git` + binary is missing or unreadable.** Install git and confirm + `git rev-parse HEAD` works from the cma-mcp directory. + +`git_sha: null` is honest and not a fatal error — the install +fingerprint surfaces the gap rather than hiding it. + +--- + +## Coverage report shows a sudden drop after a refactor + +Coverage scopes the eight runtime modules per `pyproject.toml +[tool.coverage.run]`. The wire-protocol tests +(`tests/test_mcp_wire.py`) spawn cma-mcp as a real subprocess and +do not increment coverage counters because pytest-cov does not +follow subprocess code paths without a `sitecustomize` hook. + +If you renamed a module or moved code into a function only +exercised through wire tests, coverage will appear to drop even +though end-to-end behavior is unchanged. Confirm by running: + +```bash +python3 -m pytest tests/test_mcp_wire.py -v +``` + +If those pass, the behavior is intact; the coverage number is a +floor, not a ceiling. + +--- + +## Where to file a bug + +Reproducible bugs go to +[github.com/Clarethium/cma/issues](https://github.com/Clarethium/cma/issues) +with the `cma-mcp` label or `[cma-mcp]` in the title to +disambiguate from bash cma issues. + +A useful bug report includes: + +``` +## What I expected + + +## What I observed + + +## Reproduction + + +## Environment +$ cma-mcp --version +{...paste the install fingerprint...} + +$ cma --version + + +OS: +MCP client: +``` + +Security issues go to `lovro.lucic@gmail.com` per +[SECURITY.md](https://github.com/Clarethium/cma/blob/main/SECURITY.md), +not the public issue tracker. diff --git a/cma-mcp/docs/VALIDATION_PROGRAM.md b/cma-mcp/docs/VALIDATION_PROGRAM.md new file mode 100644 index 0000000..76d28f9 --- /dev/null +++ b/cma-mcp/docs/VALIDATION_PROGRAM.md @@ -0,0 +1,284 @@ +# Validation Program + +This document specifies the empirical validation plan for cma-mcp: +the claims the project makes, the data needed to test them, and the +post-launch protocol for accumulating evidence. The program is +pre-registered so the test design is fixed before the data arrives. + +The structure follows frame-check-mcp's +[VALIDATION_PROGRAM.md](https://github.com/Clarethium/frame-check-mcp/blob/master/docs/VALIDATION_PROGRAM.md) +pattern: separate the protocol-conformance layer (does cma-mcp +faithfully expose cma's loop) from the loop-closing layer (does the +loop actually close when distributed via MCP). + +--- + +## Layer 1: Protocol conformance (validated at every release) + +**Claim.** Every cma-mcp release faithfully exposes bash cma's seven +primitives and four resource surfaces with the three-section payload +discipline intact. + +**Evidence.** The pytest suite at `tests/`. Every release passes: + +- pytest cases (currently 36) across protocol conformance, + three-section payload determinism, JSONL parsing tolerance, + subprocess-wrapper isolation, and install-fingerprint + git_sha fallback. Wire-protocol subprocess roundtrip tests + arrive in v0.2 (see `docs/ANTICIPATED_CRITIQUES.md` C-8). +- The `--version` install fingerprint matches the published wheel's + metadata. +- The `--test` offline sanity check produces a valid three-section + payload against the operator's `~/.cma/` data. + +**Status.** Validated continuously via CI on every PR (pytest runs +on Python 3.10, 3.11, 3.12). Coverage extension toward +adversarial-stdio roundtrips planned for v0.2 (see `docs/ANTICIPATED_CRITIQUES.md` C-8). + +--- + +## Layer 2: Distribution faithfulness (one-shot validation) + +**Claim.** Records captured through cma-mcp are byte-equivalent to +records captured through bash cma directly. An operator can switch +between cma CLI and cma-mcp without producing a heterogeneous +corpus. + +**Test design.** + +1. Empty `~/.cma/` directory. +2. Capture a fixed sequence of 20 records via bash cma directly: + 5 misses (mix of texture-preserved and bare), 5 decisions (with + `applies_when`), 5 rejections (with `revisit_when`), 5 + preventions (with `miss_id` linkages). +3. Read all `*.jsonl` files; record byte-content as snapshot A. +4. Empty `~/.cma/` again. +5. Capture the same 20 records via cma-mcp tool calls (identical + field values). +6. Read all `*.jsonl` files; record byte-content as snapshot B. +7. Compare: A and B must differ only in `id` (random suffix) and + `timestamp` (UTC clock). Every other field byte-identical. + +**Status.** Designed; one-shot pre-PyPI-publish. Result publishes to +`docs/internal/DISTRIBUTION_FAITHFULNESS_.md` with the +captured snapshots. + +--- + +## Layer 3: Loop closure through MCP exposure (longitudinal) + +**Claim (load-bearing for the whole project).** Operators who run +the cma compound practice loop through an MCP client (Claude +Desktop, Cursor, etc.) experience the loop closing — surfaced +warnings actually catching repeats — at a rate not statistically +distinguishable from operators running the same loop through cma's +shell hooks or CLI directly. + +This is the empire's core compounding claim, instantiated for +cma-mcp's distribution layer specifically. cma's loop is supposed +to close. cma-mcp's value depends on the loop closing through MCP +exposure too. + +**Test design.** + +1. **Cohort.** Operators who install cma-mcp from PyPI and opt in + to the validation program by setting `CMA_MCP_VALIDATION=1` (the + variable is read once at server start; opt-in only). +2. **Measurement window.** Rolling 90-day window per operator, with + minimum 30 days of activity to qualify. +3. **Metrics.** Per operator, computed monthly: + - **prevention/miss ratio**: count(preventions) / + count(misses) over the window. + - **leak rate**: count(leaks from `cma stats --leaks`) / + count(misses). + - **recurrence rate**: count(misses where prior similar miss + existed at capture time) / count(misses). +4. **Comparison.** Two cohorts: + - **Cohort A (cma-mcp)**: operators capturing primarily through + cma-mcp tool calls. + - **Cohort B (cma direct)**: operators capturing primarily + through cma CLI / Claude Code hooks. +5. **Test.** Two-sample t-test on prevention/miss ratio with α=0.05 + (pre-registered; no peeking). +6. **Falsification criterion.** If Cohort A's mean prevention/miss + ratio is statistically significantly lower than Cohort B's + (p < 0.05) at the 90-day mark with N ≥ 20 per cohort, cma-mcp's + distribution claim is falsified. Public report. Strategy + document update. + +**Cohort assignment.** Operator self-classifies primary capture +channel via `CMA_MCP_VALIDATION_COHORT` env var (values: `mcp`, +`direct`, `mixed`). Operators with `mixed` are excluded from the +two-cohort comparison. Cohort assignment is recorded once per +operator per measurement window; cross-window switching is +disallowed (the operator stays in their first declared cohort +through the window's end to prevent post-hoc cohort selection). + +**Data shipping.** Operators export their `~/.cma/` corpus as a +zip and submit through a separate intake (not part of cma-mcp's +runtime; cma-mcp ships zero telemetry). Submission is opt-in, +explicit, and named. Anonymization happens at the operator's choice +before export. + +**Status.** Designed; awaits release of cma-mcp on PyPI plus +sufficient cohort N (≥20 per arm). First report at 90 days +post-launch or N=20 per arm, whichever comes later. + +--- + +## Layer 4: Anti-claim (what we are NOT testing) + +cma-mcp's validation program does NOT claim: + +- That the cma loop is more effective than no loop at all (that is + cma's claim, not cma-mcp's). +- That cma-mcp produces "better" captures than the CLI (the + capture quality is a function of operator discipline, not of the + distribution channel). +- That MCP exposure improves loop adoption (a separate empirical + question; would require comparing operator counts pre and post + MCP availability). + +The narrow claim cma-mcp owns is **distribution faithfulness** +(Layer 2) plus **loop-closing parity through MCP exposure** (Layer +3). Anything broader belongs to cma's or Lodestone's validation. + +--- + +## Interim evidence (single-operator pilot, 2025-12-10 → 2026-05-07) + +The Layer 3 cohort study is the formal validation. It requires +N≥20 operators per arm and 90 days of activity, conditions that +cannot be met before PyPI publication. The evidence below is the +single-operator pilot the project author has run on themselves +during cma's pre-1.0 development. It is published here because +not publishing it would underclaim, and overclaiming it would +substitute lived experience for a cohort study. Both are wrong. + +**Conditions.** + +- N = 1 operator (the project author). +- Window: 2025-12-10 (earliest dated capture in the active corpus) + through 2026-05-07. ~150 days continuous, with daily activity in + most weeks. Earlier captures exist in archived files; the active + set was preserved through one distillation cycle. +- Daily-driver binary: a working variant of cma that predates the + canonical 1.0 reference implementation. Schema differs in field + names (`ts` rather than `timestamp`, no `schema_version`). The + methodology is identical; the operator's evidence carries a + cma-binary-fidelity caveat that the cohort study is designed to + remove. +- AI client: Claude Code with bash cma's PreToolUse and + SessionStart hooks throughout the window. Context-window upgrade + to ~1M tokens (Claude Sonnet 4.5+) landed mid-window. + +**What the corpus shows.** + +| Capture type | Active count | Window | +|---|---:|---| +| Misses | 208 | 2025-12-10 → 2026-05-07 | +| Decisions | 47 | 2025-12-05 → 2026-04-17 | +| Rejections | 15 | 2026-01-27 → 2026-03-14 | +| Core distillations | 6 | 2026-02-26 → 2026-04-24 | +| Lessons | 63 | 2025-12-09 → 2026-01-15 | + +Failure-mode distribution: FM-3 (Happy Path Only) 109; FM-1 +(Speed Over Understanding) 26; FM-6 (Assumption Over +Verification) 25; FM-8 (Scope Abandonment) 11. Surface +distribution: general 99, ui 48, docs 38, infra 9, git 7, db 5, +api 2. + +**What appears to compound.** + +The corpus itself. 339 active records survived ~5 months of +continuous use, ~30 context compactions, and at least one model +generation upgrade without manual reset. Decisions and rejections +captured early in the window remain queryable late in the window. +The artifact persists where session state does not. This is the +strongest replicated observation. + +Distillation to core. 6 core promotions across ~3 months, +hand-curated by the operator from the recurrence patterns. +Cadence is slow and deliberate by design — the gate to core is +"does this restructure how thinking works", not "did this happen +twice". + +Decision tracking. 47 decisions captured. The operator's +qualitative observation: silently-rebuilt rejected branches — +the failure mode that motivates `cma reject` — happens +materially less often when prior rejections are surfaced at +session start. Not measured against a counterfactual; reported as +operator-experience. + +**What did not compound the way the design predicted.** + +Per-turn surface injection. With a 1M-token context window, the +marginal value of surfacing prior captures into each turn is +smaller than it was at 200K. The model already carries enough +state in-context that the surfaced 5–15 captures contribute less +to that turn's reasoning than the equivalent surface did at +narrower windows. The CAPTURE side of the loop continued to +compound through the upgrade; the SURFACE side has variable +marginal value depending on context size. + +Implication: the load-bearing function of cma is shifting from +session-state augmentation toward durable-corpus retention. The +artifact survives across context windows; the per-session +injection's contribution is window-size-dependent. cma-mcp's MCP +distribution path inherits this — the value of `cma_surface` will +also be context-window-dependent in the receiving client. + +**What is missing from the data.** + +`cma prevented` captures: zero in the active corpus. The operator +does not formally record when a surfaced warning catches a repeat. +This is an operator-recording habit gap, not necessarily a +loop-function gap — the operator's qualitative experience is that +warnings do catch repeats — but without the prevention record the +loop's prevention/miss ratio cannot be computed. Layer 3's +falsification criterion depends on this metric. Closing the +recording gap is a methodology discipline (`cma prevented` becomes +part of the post-correction reflex) rather than a code change. + +The cohort study (Layer 3) is the test that turns "appears to +compound" into "compounds at rate X relative to control." Single- +operator evidence is necessary for the project to ship at all but +is not sufficient for the empire-grade compounding claim. + +**Honest summary.** + +cma's CAPTURE + DISTILL + RETAIN cycle compounds on single-operator +evidence: the corpus persists, distillation produces durable +learnings, decision tracking visibly reduces silent rebuilds. +cma's SURFACE → CATCH → PREVENT cycle is operator-experienced as +working but is not formally measured (zero prevention captures); +its claim is interim. cma-mcp's distribution path adds reach +without changing this evidence — the MCP layer is thin, and the +loop closes (or doesn't) in cma's substrate, not in the wrapper. + +What the project ships at 0.1 is a publication of the +infrastructure, the methodology, and the single-operator pilot +evidence in honest shape. The cohort comparison is what comes +next. + +--- + +## Reporting cadence + +- **Layer 1**: every release. CI green required to publish. +- **Layer 2**: once, pre-PyPI-publish. Re-run if bash cma's record + schema or cma-mcp's wrapper changes shape. +- **Layer 3**: 90-day post-launch first report; quarterly after. + Reports land in `docs/receipts/` and on `blog.clarethium.com`. +- **Interim evidence**: refreshed at major project milestones + (post-publish, post-cohort-study). The section above gets dated + updates rather than rewrites; previous snapshots are preserved + in git history so the trajectory is visible. + +--- + +*Pre-registration is the construct-honesty discipline that makes +empirical claims about compound learning worth taking. The plan +above is fixed before data arrives; revisions to the plan happen +only via explicit `STRATEGY.md` durable-decision overturns and are +named publicly.* diff --git a/cma-mcp/mcp_compose.py b/cma-mcp/mcp_compose.py new file mode 100644 index 0000000..2b6b7a6 --- /dev/null +++ b/cma-mcp/mcp_compose.py @@ -0,0 +1,340 @@ +""" +Three-section payload composer. + +Every cma-mcp tool response and resource read returns a JSON payload +with three top-level sections: + + { + "analysis": {...}, # the data + "agent_guidance": {...}, # what to tell the user, how to cite + "provenance": {...}, # versions, license, cost, citation + } + +The `agent_guidance` and `provenance` blocks exist because an agent +passing cma-mcp output to a user without attribution would strip the +construct-honesty discipline that makes the loop's evidence worth +citing. Surfacing "how to cite faithfully" inside the payload is the +structure that carries the discipline forward (this convention is +established by frame-check-mcp; see STRATEGY DD-5). + +Composers in this module produce the shape; tests in +test_payload_determinism.py pin every surface to assert all three +sections always present and the provenance block byte-deterministic +across calls (after timestamp normalization). +""" + +from __future__ import annotations + +import time +from typing import Any + +from cma_subprocess import cma_version + + +# These are populated by mcp_server at startup so compose calls don't +# repeat the discovery work. The cma binary version probe is gated +# behind a try/except in cma_subprocess.cma_version() so a missing +# cma binary surfaces here as None rather than crashing the server. +_SERVER_NAME: str = "cma-mcp" +_SERVER_VERSION: str = "0.1.0" +_PROTOCOL_VERSION: str = "2024-11-05" +_GIT_SHA: str | None = None +_CMA_BINARY_VERSION: str | None = None + + +def configure_provenance( + *, + server_name: str, + server_version: str, + protocol_version: str, + git_sha: str | None, +) -> None: + """ + Called once by mcp_server at startup. Caches the static parts of + the provenance block so every payload reuses the same dict + skeleton. + """ + global _SERVER_NAME, _SERVER_VERSION, _PROTOCOL_VERSION, _GIT_SHA + global _CMA_BINARY_VERSION + _SERVER_NAME = server_name + _SERVER_VERSION = server_version + _PROTOCOL_VERSION = protocol_version + _GIT_SHA = git_sha + _CMA_BINARY_VERSION = cma_version() + + +def _now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def base_provenance() -> dict[str, Any]: + """ + Build the standard provenance block. Callers extend this with + request-specific fields (e.g., data_source, latency_ms). + """ + prov: dict[str, Any] = { + "server_name": _SERVER_NAME, + "server_version": _SERVER_VERSION, + "protocol_version": _PROTOCOL_VERSION, + "license": "Apache-2.0", + "cost_usd": 0.0, + "citation": ( + f"cma-mcp {_SERVER_VERSION} (Clarethium, 2026). " + f"https://github.com/Clarethium/cma/tree/main/cma-mcp" + ), + "deterministic": True, + "timestamp": _now_iso(), + } + if _GIT_SHA: + prov["git_sha"] = _GIT_SHA + if _CMA_BINARY_VERSION: + prov["cma_binary_version"] = _CMA_BINARY_VERSION + return prov + + +# ── agent guidance presets ────────────────────────────────────────── + +# Each tool / resource has a default agent_guidance block that the +# composer attaches. Callers may override fields; defaults are below. + +_GUIDANCE_CAPTURE = { + "what_this_tool_does": ( + "Persists a capture record to the operator's cma data " + "directory via the canonical bash cma binary. The record is " + "append-only and survives session compaction." + ), + "what_this_tool_does_not_do": ( + "Does not interpret the capture, does not classify " + "automatically, and does not compose with the operator's " + "methodology unless the operator has wired CMA_FM_CLASSIFIER. " + "cma-mcp is a substrate; vocabulary lives in the methodology " + "(see https://github.com/Clarethium/lodestone for the " + "canonical methodology Clarethium publishes)." + ), + "how_to_cite_faithfully": ( + "Cite the capture explicitly when telling the operator about " + "it: name the cma tool that ran ('cma_miss' / 'cma_decision' " + "/ etc.), the id returned in analysis.record.id (or visible " + "in analysis.cma_stdout), and the stored surface/fm. Do not " + "paraphrase as 'I noted that...' — paraphrase strips the " + "durability the operator chose this tool to obtain." + ), +} + +_GUIDANCE_SURFACE = { + "what_this_tool_does": ( + "Queries the operator's cma corpus for captures relevant to " + "the current context (surface, file, type). The query is " + "logged to surface_events.jsonl so leak detection (cma_stats " + "view=leaks) can later flag failures that occurred despite a " + "warning being surfaced." + ), + "what_this_tool_does_not_do": ( + "Does not modify any captures. The matched captures are " + "context for the agent's next action; treat them as " + "warnings, not as instructions." + ), + "how_to_cite_faithfully": ( + "When telling the operator about surfaced captures, attribute " + "to cma ('cma surfaced N prior captures matching this " + "context') and reproduce the matched captures' descriptions " + "verbatim or as direct quotes. Do not paraphrase the operator's " + "prior captures as your own observations." + ), +} + +_GUIDANCE_STATS = { + "what_this_tool_does": ( + "Computes the evidence dashboard from the operator's cma " + "corpus. Counts and ratios are deterministic functions of " + "the underlying records." + ), + "what_this_tool_does_not_do": ( + "Does not interpret whether the prevention/miss ratio is " + "good or bad; does not assess loop health. Surfacing the " + "numbers is the contribution; the operator interprets them." + ), + "how_to_cite_faithfully": ( + "Quote the numbers as cma reports them. Do not round, " + "smooth, or characterize ratios with adjectives ('strong', " + "'weak', 'concerning') unless the operator asks for an " + "interpretation." + ), +} + +_GUIDANCE_RESOURCE_CONTEXT = { + "what_this_resource_provides": ( + "Read-only context from the operator's cma corpus. Records " + "are sorted newest-first and filtered to scope (current " + "project + global where applicable)." + ), + "how_to_cite_faithfully": ( + "When using these records to inform downstream tool calls, " + "attribute decisions / rejections / core learnings to the " + "operator (cma stores them as the operator's articulated " + "choices). Do not present them as your own conclusions." + ), +} + + +# ── composers ────────────────────────────────────────────────────── + + +def compose_capture_response( + *, + tool_name: str, + record: dict | None, + cma_stdout: str, + cma_stderr: str, + extra_provenance: dict | None = None, +) -> dict: + """ + Build a three-section response for a capture tool (miss, decision, + reject, prevented, distill). + + `record` is the parsed cma JSONL record when cma-mcp can recover + it (by reading the corresponding *.jsonl file's last line after + the subprocess returns). May be None if recovery failed; the + cma_stdout text is always present and reliable. + """ + analysis: dict[str, Any] = { + "tool": tool_name, + "cma_stdout": cma_stdout.strip(), + } + if cma_stderr.strip(): + analysis["cma_stderr"] = cma_stderr.strip() + if record is not None: + analysis["record"] = record + + prov = base_provenance() + if extra_provenance: + prov.update(extra_provenance) + + return { + "analysis": analysis, + "agent_guidance": dict(_GUIDANCE_CAPTURE), + "provenance": prov, + } + + +def compose_surface_response( + *, + matched: list[dict], + cma_stdout: str, + cma_stderr: str, + filters: dict, + extra_provenance: dict | None = None, +) -> dict: + """Build a three-section response for cma_surface.""" + analysis: dict[str, Any] = { + "tool": "cma_surface", + "filters": filters, + "matched_count": len(matched), + "matched": matched, + "cma_stdout": cma_stdout.strip(), + } + if cma_stderr.strip(): + analysis["cma_stderr"] = cma_stderr.strip() + + prov = base_provenance() + if extra_provenance: + prov.update(extra_provenance) + + return { + "analysis": analysis, + "agent_guidance": dict(_GUIDANCE_SURFACE), + "provenance": prov, + } + + +def compose_stats_response( + *, + view: str, + cma_stdout: str, + cma_stderr: str, + extra_provenance: dict | None = None, +) -> dict: + """Build a three-section response for cma_stats.""" + analysis: dict[str, Any] = { + "tool": "cma_stats", + "view": view, + "cma_stdout": cma_stdout.strip(), + } + if cma_stderr.strip(): + analysis["cma_stderr"] = cma_stderr.strip() + + prov = base_provenance() + if extra_provenance: + prov.update(extra_provenance) + + return { + "analysis": analysis, + "agent_guidance": dict(_GUIDANCE_STATS), + "provenance": prov, + } + + +def compose_resource_response( + *, + uri: str, + records: list[dict], + data_provenance: dict, + summary: dict | None = None, +) -> dict: + """Build a three-section response for a resource read.""" + analysis: dict[str, Any] = { + "uri": uri, + "records": records, + "record_count": len(records), + } + if summary is not None: + analysis["summary"] = summary + + prov = base_provenance() + prov["data_source"] = data_provenance + + return { + "analysis": analysis, + "agent_guidance": dict(_GUIDANCE_RESOURCE_CONTEXT), + "provenance": prov, + } + + +def compose_error_response( + *, + tool_or_uri: str, + reason: str, + detail: str, + is_user_error: bool = False, +) -> dict: + """Build a three-section error payload.""" + analysis = { + "tool_or_uri": tool_or_uri, + "error": True, + "reason": reason, + "detail": detail, + "user_error": is_user_error, + } + return { + "analysis": analysis, + "agent_guidance": { + "what_this_tool_does": ( + "An error occurred. The error reason is in " + "analysis.reason; analysis.detail carries the " + "subprocess or parser detail." + ), + "what_this_tool_does_not_do": ( + "Does not retry automatically. The caller (agent or " + "MCP client) decides whether to surface the error to " + "the operator, retry with adjusted arguments, or " + "abandon the action." + ), + "how_to_cite_faithfully": ( + "Surface the error reason verbatim ('cma binary " + "missing on PATH', 'cma subprocess timeout', etc.). " + "Do not paraphrase as 'something went wrong' — that " + "robs the operator of the actionable detail." + ), + }, + "provenance": base_provenance(), + } diff --git a/cma-mcp/mcp_log.py b/cma-mcp/mcp_log.py new file mode 100644 index 0000000..633fa61 --- /dev/null +++ b/cma-mcp/mcp_log.py @@ -0,0 +1,71 @@ +""" +Stderr logging helper. + +cma-mcp logs to stderr because stdio is reserved for the MCP protocol +itself. MCP clients capture stderr separately; operators inspecting +the logs use their client's MCP-server logs view. + +Format: ISO-8601 UTC timestamp + level + key=value pairs. Single +line per event. Timestamps are normalized to second resolution to +keep determinism tests stable across machines. + +The module-level logger instance is shared across all callers; this +matches the singleton stderr stream and avoids duplicate handler +attachment. +""" + +from __future__ import annotations + +import os +import sys +import time +from typing import Any + + +_LEVEL = os.environ.get("CMA_MCP_LOG_LEVEL", "INFO").upper() +_LEVELS = {"DEBUG": 10, "INFO": 20, "WARN": 30, "ERROR": 40, "OFF": 100} +_LEVEL_NUM = _LEVELS.get(_LEVEL, 20) + + +def _now_iso() -> str: + """Return UTC ISO-8601 timestamp at second resolution.""" + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + +def _emit(level: str, event: str, fields: dict[str, Any]) -> None: + """Write a single log line to stderr.""" + parts = [f"{k}={_format_value(v)}" for k, v in fields.items() if v is not None] + line = f"[{_now_iso()}] {level} event={event}" + if parts: + line += " " + " ".join(parts) + print(line, file=sys.stderr, flush=True) + + +def _format_value(v: Any) -> str: + """Render a log value: quote strings with whitespace, otherwise raw.""" + s = str(v) + if " " in s or "=" in s or "\t" in s: + # Replace newlines so multi-line values stay on one log line. + s = s.replace("\n", "\\n").replace("\r", "") + return f'"{s}"' + return s + + +def info(event: str, **fields: Any) -> None: + if _LEVEL_NUM <= 20: + _emit("INFO", event, fields) + + +def warn(event: str, **fields: Any) -> None: + if _LEVEL_NUM <= 30: + _emit("WARN", event, fields) + + +def error(event: str, **fields: Any) -> None: + if _LEVEL_NUM <= 40: + _emit("ERROR", event, fields) + + +def debug(event: str, **fields: Any) -> None: + if _LEVEL_NUM <= 10: + _emit("DEBUG", event, fields) diff --git a/cma-mcp/mcp_protocol.py b/cma-mcp/mcp_protocol.py new file mode 100644 index 0000000..344fe58 --- /dev/null +++ b/cma-mcp/mcp_protocol.py @@ -0,0 +1,229 @@ +""" +JSON-RPC 2.0 over stdio for the Model Context Protocol. + +Per DECISIONS AD-001: cma-mcp implements MCP directly without a +third-party SDK. The protocol surface used here is small enough +(initialize, tools/list, tools/call, resources/list, resources/read, +ping, notifications) that an in-repo implementation removes a class +of version-skew failures and keeps cma-mcp's runtime dependency +footprint at zero. + +This module owns the I/O loop and the envelope discipline. It does +not know about cma-specific tools or resources; mcp_server registers +handlers and this module dispatches. + +Transport contract: + +- Each line on stdin is exactly one JSON-RPC request or notification. +- Each response is exactly one line on stdout, JSON-encoded, no + embedded newlines (json.dumps default). +- Notifications (no `id` field) receive no response. +- stderr is for logging only; protocol traffic never touches it. +""" + +from __future__ import annotations + +import json +import sys +from dataclasses import dataclass +from typing import Any, Callable + +import mcp_log + + +# JSON-RPC 2.0 standard error codes. +PARSE_ERROR = -32700 +INVALID_REQUEST = -32600 +METHOD_NOT_FOUND = -32601 +INVALID_PARAMS = -32602 +INTERNAL_ERROR = -32603 + +# MCP-specific +RESOURCE_NOT_FOUND = -32002 + + +@dataclass +class Request: + """A parsed JSON-RPC request or notification.""" + + method: str + params: dict + id: Any = None # missing for notifications + + @property + def is_notification(self) -> bool: + return self.id is None + + +class ProtocolError(Exception): + """Raised when a request cannot be processed; carries JSON-RPC code.""" + + def __init__(self, code: int, message: str, data: Any = None): + self.code = code + self.message = message + self.data = data + super().__init__(message) + + +def parse_line(line: str) -> Request: + """ + Parse a single JSON-RPC line into a Request. + + Raises ProtocolError on parse or shape errors. Returned id is + None for notifications, otherwise carries the request id (string, + integer, or null per JSON-RPC 2.0). + """ + try: + msg = json.loads(line) + except json.JSONDecodeError as exc: + raise ProtocolError(PARSE_ERROR, f"parse error: {exc}", None) + + if not isinstance(msg, dict): + raise ProtocolError(INVALID_REQUEST, "request must be an object") + if msg.get("jsonrpc") != "2.0": + raise ProtocolError(INVALID_REQUEST, "jsonrpc must be '2.0'") + method = msg.get("method") + if not isinstance(method, str) or not method: + raise ProtocolError(INVALID_REQUEST, "method must be a non-empty string") + params = msg.get("params") or {} + if not isinstance(params, dict): + raise ProtocolError(INVALID_PARAMS, "params must be an object") + return Request(method=method, params=params, id=msg.get("id")) + + +def write_response(req_id: Any, result: dict) -> None: + """Write a JSON-RPC success response to stdout.""" + payload = {"jsonrpc": "2.0", "id": req_id, "result": result} + _write(payload) + + +def write_error(req_id: Any, code: int, message: str, data: Any = None) -> None: + """Write a JSON-RPC error response to stdout.""" + err: dict[str, Any] = {"code": code, "message": message} + if data is not None: + err["data"] = data + payload = {"jsonrpc": "2.0", "id": req_id, "error": err} + _write(payload) + + +def _write(payload: dict) -> None: + """Serialize and write a single JSON-RPC envelope on one line.""" + # ensure_ascii=False so unicode passes through; default behavior + # of json.dumps already produces no embedded newlines. + line = json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + print(line, file=sys.stdout, flush=True) + + +# ── server loop ──────────────────────────────────────────────────── + + +# Handler signature: (params: dict) -> dict (the result body). +# Notification handlers take params and return None. +Handler = Callable[[dict], dict] +NotificationHandler = Callable[[dict], None] + + +class Dispatcher: + """Method registry plus stdio loop.""" + + def __init__(self) -> None: + self._request_handlers: dict[str, Handler] = {} + self._notification_handlers: dict[str, NotificationHandler] = {} + + def on_request(self, method: str, handler: Handler) -> None: + """Register a handler for a request method.""" + self._request_handlers[method] = handler + + def on_notification(self, method: str, handler: NotificationHandler) -> None: + """Register a handler for a notification method.""" + self._notification_handlers[method] = handler + + def serve_forever(self) -> int: + """ + Read lines from stdin and dispatch until EOF or an + unrecoverable error. + + Returns + ------- + int + Exit code: 0 on clean EOF, non-zero if the loop terminates + from an unhandled error. + """ + mcp_log.info("loop_start") + for raw_line in sys.stdin: + line = raw_line.strip() + if not line: + continue + self._dispatch_one(line) + mcp_log.info("loop_eof") + return 0 + + def _dispatch_one(self, line: str) -> None: + """Parse, route, and respond to a single JSON-RPC line.""" + try: + req = parse_line(line) + except ProtocolError as exc: + # Parse errors carry no id (we never read one); per + # JSON-RPC the response id is null. + write_error(None, exc.code, exc.message, exc.data) + mcp_log.warn("parse_error", code=exc.code, message=exc.message) + return + + if req.is_notification: + handler = self._notification_handlers.get(req.method) + if handler is None: + # Notifications without handlers are silently ignored + # per JSON-RPC 2.0; log so operators can debug. + mcp_log.debug("notification_ignored", method=req.method) + return + try: + handler(req.params) + except Exception as exc: + # Notifications cannot return errors; log and move on. + mcp_log.error( + "notification_handler_failed", + method=req.method, + error=str(exc), + ) + return + + # Request: must produce a response. + handler = self._request_handlers.get(req.method) + if handler is None: + write_error( + req.id, + METHOD_NOT_FOUND, + f"method not found: {req.method}", + ) + mcp_log.warn("method_not_found", method=req.method) + return + + try: + result = handler(req.params) + except ProtocolError as exc: + write_error(req.id, exc.code, exc.message, exc.data) + mcp_log.warn( + "request_protocol_error", + method=req.method, + code=exc.code, + message=exc.message, + ) + return + except Exception as exc: + # Unexpected handler crash. Surface as INTERNAL_ERROR + # without leaking the traceback to clients (which would + # be a debug-info disclosure on remote-MCP setups). + mcp_log.error( + "request_handler_failed", + method=req.method, + error=str(exc), + ) + write_error( + req.id, + INTERNAL_ERROR, + "internal server error", + {"hint": "check cma-mcp stderr logs"}, + ) + return + + write_response(req.id, result) diff --git a/cma-mcp/mcp_resources.py b/cma-mcp/mcp_resources.py new file mode 100644 index 0000000..10175cb --- /dev/null +++ b/cma-mcp/mcp_resources.py @@ -0,0 +1,206 @@ +""" +Resource read handlers. + +Resources are read-only context surfaces an MCP client can pull at +will. cma-mcp ships four: + + cma://decisions active decisions in window + cma://rejections active rejections in window + cma://core active core learnings (retired filtered) + cma://stats compound-practice stats summary + +Reads bypass the bash cma subprocess and parse JSONL directly via +cma_jsonl. The exception is `cma://stats`, which shells out to +`cma stats` so the summary text matches what an operator would see +in their terminal — drift between the resource and the CLI would +violate STRATEGY DD-1. + +cma 1.0 is single-project (per-project scoping is on cma's roadmap +beyond 1.0). cma-mcp follows: no project filtering at v0.1; all +records in the operator's `~/.cma/` surface to the operator's MCP +clients. When cma adds project scoping, cma-mcp will follow. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from typing import Any + +import cma_jsonl +import cma_subprocess +import mcp_compose +from cma_subprocess import CmaError + + +DECISIONS_LOOKBACK_DAYS = 180 +DECISIONS_LIMIT = 30 + +REJECTIONS_LOOKBACK_DAYS = 30 +REJECTIONS_LIMIT = 30 + +CORE_LIMIT = 30 + + +def _cutoff_iso(days: int) -> str: + """Return ISO-8601 timestamp `days` ago in UTC.""" + cutoff = datetime.now(timezone.utc) - timedelta(days=days) + return cutoff.strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _filter_within_days(records: list[dict], days: int) -> list[dict]: + """Filter records to those with timestamp >= now - days.""" + cutoff = _cutoff_iso(days) + return [ + r for r in records + if isinstance(r.get("timestamp"), str) and r["timestamp"] >= cutoff + ] + + +def _newest_first(records: list[dict]) -> list[dict]: + """Sort records newest-first by timestamp; missing-last.""" + return sorted(records, key=lambda r: r.get("timestamp", ""), reverse=True) + + +# ── cma://decisions ──────────────────────────────────────────────── + + +def read_decisions() -> dict: + """Active decisions in window.""" + result = cma_jsonl.read_decisions() + in_window = _filter_within_days( + [r for r in result.records if r.get("type") == "decision"], + DECISIONS_LOOKBACK_DAYS, + ) + sorted_records = _newest_first(in_window)[:DECISIONS_LIMIT] + + summary = { + "lookback_days": DECISIONS_LOOKBACK_DAYS, + "in_window": len(in_window), + "shown": len(sorted_records), + "limit": DECISIONS_LIMIT, + } + + return mcp_compose.compose_resource_response( + uri="cma://decisions", + records=sorted_records, + data_provenance=cma_jsonl.parse_provenance(result), + summary=summary, + ) + + +# ── cma://rejections ─────────────────────────────────────────────── + + +def read_rejections() -> dict: + """Active rejections in window.""" + result = cma_jsonl.read_rejections() + in_window = _filter_within_days( + [r for r in result.records if r.get("type") == "rejection"], + REJECTIONS_LOOKBACK_DAYS, + ) + sorted_records = _newest_first(in_window)[:REJECTIONS_LIMIT] + + summary = { + "lookback_days": REJECTIONS_LOOKBACK_DAYS, + "in_window": len(in_window), + "shown": len(sorted_records), + "limit": REJECTIONS_LIMIT, + } + + return mcp_compose.compose_resource_response( + uri="cma://rejections", + records=sorted_records, + data_provenance=cma_jsonl.parse_provenance(result), + summary=summary, + ) + + +# ── cma://core ───────────────────────────────────────────────────── + + +def read_core() -> dict: + """ + Active core learnings. + + cma's core.jsonl mixes two record types: `core` (the learning) and + `retirement` (a marker that retires a core by id). A core is + active iff no retirement record references its id. + """ + result = cma_jsonl.read_core() + cores = [r for r in result.records if r.get("type") == "core"] + retired_ids = { + r.get("retires") + for r in result.records + if r.get("type") == "retirement" and isinstance(r.get("retires"), str) + } + active = [c for c in cores if c.get("id") not in retired_ids] + sorted_records = _newest_first(active)[:CORE_LIMIT] + + summary = { + "active": len(active), + "retired": len(cores) - len(active), + "shown": len(sorted_records), + "limit": CORE_LIMIT, + } + + return mcp_compose.compose_resource_response( + uri="cma://core", + records=sorted_records, + data_provenance=cma_jsonl.parse_provenance(result), + summary=summary, + ) + + +# ── cma://stats ──────────────────────────────────────────────────── + + +def read_stats() -> dict: + """ + Default stats summary. + + Shells out to `cma stats` so the summary text matches what an + operator would see in their terminal. Other views (leaks, + recurrence, behavior, etc.) go through the cma_stats tool with a + `view` argument. + """ + try: + result = cma_subprocess.run_cma(["stats"]) + return mcp_compose.compose_stats_response( + view="default", + cma_stdout=result.stdout, + cma_stderr=result.stderr, + extra_provenance={ + "cma_argv": result.argv, + "cma_returncode": result.returncode, + }, + ) + except CmaError as exc: + return mcp_compose.compose_error_response( + tool_or_uri="cma://stats", + reason=exc.reason, + detail=exc.stderr or str(exc), + ) + + +# ── dispatch ──────────────────────────────────────────────────────── + + +READERS = { + "cma://decisions": read_decisions, + "cma://rejections": read_rejections, + "cma://core": read_core, + "cma://stats": read_stats, +} + + +def read(uri: str) -> dict: + """Dispatch a resource read by URI.""" + reader = READERS.get(uri) + if reader is None: + return mcp_compose.compose_error_response( + tool_or_uri=uri, + reason="unknown_resource", + detail=f"no resource reader registered for uri: {uri}", + is_user_error=True, + ) + return reader() diff --git a/cma-mcp/mcp_schema.py b/cma-mcp/mcp_schema.py new file mode 100644 index 0000000..ac7277c --- /dev/null +++ b/cma-mcp/mcp_schema.py @@ -0,0 +1,521 @@ +""" +MCP tool input schemas and metadata. + +This module is the single source of truth for cma-mcp's tool surface. +Each tool entry mirrors a bash cma primitive (see cma's DESIGN.md); +the schema describes the input shape an MCP client passes; the +description teaches the agent both how to invoke the tool and when +to invoke it (operator says X, OR the agent itself recognizes a +capture-worthy moment). + +Field-name discipline: snake_case in MCP schema (e.g., +`applies_when`, `revisit_when`, `miss_id`). cma_subprocess translates +these to bash cma's CLI flag form (`--applies-when`, `--revisit-when`, +`--miss-id`). + +Surface labels are open-ended. cma's data substrate stores `surface` +as an opaque string; the canonical examples (`auth`, `db`, `docs`, +`ui`, `infra`, `general`, `git`) are listed in the field description +but operators may pass any short label that fits their work. + +`fm` (failure-mode) is opaque per STRATEGY DD-4. cma-mcp does not +bundle a failure-mode catalog. Tool descriptions reference Lodestone +as the canonical methodology that owns the FM-1..10 vocabulary; +operators using a different methodology pass that methodology's tag +through as opaque data. +""" + +from __future__ import annotations + +from typing import Any + + +# ── shared schema fragments ───────────────────────────────────────── + +_SURFACE_DESCRIPTION = ( + "Surface label (the domain area). Canonical values used by the cma " + "reference implementation: auth, db, docs, ui, infra, general, git. " + "Custom values are accepted; cma stores the surface as an opaque " + "string." +) + +_FM_DESCRIPTION = ( + "Failure-mode tag, opaque to cma. When the operator uses a " + "methodology with a canonical catalog (such as Lodestone's " + "FM-1..10, see https://github.com/Clarethium/lodestone), pass " + "that tag here as a string. cma-mcp does not bundle the catalog " + "itself. If unset, cma falls back to the operator's " + "CMA_FM_CLASSIFIER plugin (if configured) or stores the miss " + "with no fm." +) + + +def _surface_field() -> dict: + return {"type": "string", "description": _SURFACE_DESCRIPTION} + + +# ── per-tool definitions ──────────────────────────────────────────── + +CMA_MISS = { + "name": "cma_miss", + "title": "Record a miss", + "description": ( + "Capture a failure: a specific moment where work fell short of " + "intent and is likely to recur. Wraps `cma miss`.\n\n" + "Invoke when (a) the operator says 'record a miss', 'log this', " + "'this was wrong', or similar, OR (b) you yourself notice a " + "miss worth surfacing in future similar work.\n\n" + "Description should preserve enough specifics that the same " + "failure-shape would be recognizable next time it appears. " + "Generic captures ('I made a mistake') are useless; specific " + "captures ('I claimed verified without testing the cross-tenant " + "write path') fire as warnings later via cma's surface-time " + "matching.\n\n" + "Texture preservation: pass `intended` (what was about to " + "happen), `corrected` (what happened instead), and `excerpt` " + "(the conversation excerpt) when available. Texture-preserved " + "misses surface in cma's behavior-layer validation view " + "(`cma_stats` with view=behavior)." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "required": ["description"], + "properties": { + "description": { + "type": "string", + "minLength": 8, + "description": ( + "What failed, in the operator's own words. Phrase " + "actively ('Treated X as Y without verifying') " + "rather than passively. Specific enough that the " + "same shape of failure would be recognizable next " + "time." + ), + }, + "surface": _surface_field(), + "fm": {"type": "string", "description": _FM_DESCRIPTION}, + "files": { + "type": "string", + "description": ( + "Comma-separated list of files involved in the " + "failure. Used for surface auto-detection at " + "surface-time matching." + ), + }, + "intended": { + "type": "string", + "description": ( + "What was about to happen (the counterfactual). " + "Texture field; preserves the conditions of the " + "failure so cma_stats view=behavior can identify " + "behavior pivots." + ), + }, + "corrected": { + "type": "string", + "description": ( + "What happened instead, after correction. Texture " + "field; pairs with `intended` to capture the " + "behavior pivot." + ), + }, + "excerpt": { + "type": "string", + "description": ( + "Multi-line excerpt of the conversation or session " + "that produced the miss. Newlines and quotes are " + "preserved through bash cma's JSON encoding." + ), + }, + }, + }, +} + +CMA_DECISION = { + "name": "cma_decision", + "title": "Record an architectural or strategic decision", + "description": ( + "Capture a non-trivial choice the operator wants surfaced in " + "future similar work. Wraps `cma decision`.\n\n" + "Format: 'TOPIC: choice (rationale)'. The TOPIC is the decision " + "domain; the choice is what was decided; the rationale is the " + "why.\n\n" + "Invoke when (a) the operator articulates a decision, OR " + "(b) you yourself are about to commit to or recommend a " + "non-trivial choice (pattern, architecture, security stance, " + "configuration philosophy) whose rationale matters more than " + "its mechanics.\n\n" + "Read the cma://decisions resource first to avoid duplicating " + "an existing decision. Implementation tweaks, bug fixes, " + "polish, refactors are not decisions.\n\n" + "Pass `applies_when` to set the predicate cma matches against " + "context keywords at surface time. A decision with " + "applies_when='auth db' surfaces whenever the current action's " + "surface or file path contains 'auth' or 'db', closing the " + "decision-surfacing loop." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "required": ["description"], + "properties": { + "description": { + "type": "string", + "minLength": 15, + "description": ( + "TOPIC: choice (rationale). Real-world decisions " + "like 'GIT: Commit only' (16 chars) are valid." + ), + }, + "surface": _surface_field(), + "applies_when": { + "type": "string", + "description": ( + "Predicate matched against context keywords at " + "surface time. Coarse predicates (surface name, " + "file pattern) are sufficient; cma performs " + "substring matching." + ), + }, + }, + }, +} + +CMA_REJECT = { + "name": "cma_reject", + "title": "Record an explicit rejection", + "description": ( + "Capture an option considered and ruled out. Survives session " + "compaction and prevents silently rebuilding what was " + "deliberately not built. Wraps `cma reject`.\n\n" + "Format: 'OPTION: reason for elimination'.\n\n" + "Invoke when (a) the operator states a rejection, OR (b) you " + "yourself recognize that you have just eliminated an option " + "whose rationale is non-obvious enough that a future you " + "(or another model) might rebuild it without context.\n\n" + "Read the cma://rejections resource first to see what is " + "already eliminated.\n\n" + "Pass `revisit_when` to name the trigger that would warrant " + "reconsidering the rejection ('if performance becomes " + "critical', 'if Python 4 ships', etc.)." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "required": ["description"], + "properties": { + "description": { + "type": "string", + "minLength": 8, + "description": "OPTION: reason for elimination.", + }, + "surface": _surface_field(), + "revisit_when": { + "type": "string", + "description": ( + "Trigger that would warrant reconsidering this " + "rejection. Surfaces alongside the rejection so " + "operators see the reopen condition in context." + ), + }, + }, + }, +} + +CMA_PREVENTED = { + "name": "cma_prevented", + "title": "Record a prevention catch", + "description": ( + "Capture a moment where a surfaced warning actually changed " + "behavior. The catch is the evidence compound learning works; " + "without preventions captured, the loop's effect is invisible. " + "Wraps `cma prevented`.\n\n" + "Invoke immediately after the catch, while the chain (warning " + "→ recognition → different choice) is still legible. " + "Triggered by either (a) operator request, OR (b) your own " + "recognition that you almost did something a warning had " + "named, and you stopped.\n\n" + "Description names what was almost done versus what was done " + "instead. If the warning came from a specific prior miss, " + "pass that miss's id as `miss_id` so cma can compute the " + "miss's prevention rate." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "required": ["description"], + "properties": { + "description": { + "type": "string", + "minLength": 8, + "description": ( + "What was almost done versus what was done " + "instead. The chain is most useful when explicit." + ), + }, + "miss_id": { + "type": "string", + "description": ( + "ID of the original miss this prevention links to " + "(format: YYYYMMDD-HHMMSS-<8-hex>). Lets cma " + "compute the miss's prevention rate." + ), + }, + "warning_id": { + "type": "string", + "description": ( + "ID of the surface event whose warning was heeded. " + "Optional; cma uses it to track which warnings " + "actually catch repeats." + ), + }, + }, + }, +} + +CMA_DISTILL = { + "name": "cma_distill", + "title": "Promote, retire, or review distilled learnings", + "description": ( + "Operate on cma's core-learnings layer: promote a captured " + "pattern to permanent surfacing, retire one that no longer " + "applies, or preview the patterns that have accumulated since " + "the last distillation. Wraps `cma distill`.\n\n" + "Three modes:\n" + "- `default`: promote a learning. Pass `description` (the " + " distilled rule), optional `scope` (project / language / " + " general; default general) and optional `surface`.\n" + "- `retire`: mark matching core learnings as retired. Pass " + " `pattern` (substring matched against existing learnings).\n" + "- `review`: read-only preview of patterns that would warrant " + " distillation. No other arguments.\n\n" + "Invoke when the operator articulates a distilled learning, " + "or when reviewing recurring miss patterns and wanting to " + "promote one to permanent surfacing." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "required": ["mode"], + "properties": { + "mode": { + "type": "string", + "enum": ["default", "retire", "review"], + "description": ( + "Operation mode. `default` promotes; `retire` " + "removes from active surfacing; `review` previews." + ), + }, + "description": { + "type": "string", + "minLength": 8, + "description": ( + "Distilled rule (mode=default only). Phrased as a " + "permanent rule, not a one-off observation." + ), + }, + "pattern": { + "type": "string", + "minLength": 1, + "description": ( + "Substring pattern (mode=retire only). Matches " + "against existing core learnings; matches are " + "marked retired." + ), + }, + "scope": { + "type": "string", + "enum": ["project", "language", "general"], + "description": ( + "Scope of the learning (mode=default only). " + "Defaults to 'general'." + ), + }, + "surface": _surface_field(), + }, + }, +} + +CMA_SURFACE = { + "name": "cma_surface", + "title": "Surface relevant prior captures for the current context", + "description": ( + "Bring relevant prior captures into view for the current " + "context: misses with matching surface or file path, decisions " + "with matching applies_when, active rejections, and core " + "learnings. Wraps `cma surface`.\n\n" + "This call has a side effect: cma writes a record to " + "surface_events.jsonl with the filters used and the matched " + "captures. The leak-detection view (cma_stats view=leaks) " + "later joins these events against subsequent misses to flag " + "failures that occurred despite a relevant warning being " + "surfaced. Disabling the log defeats leak detection, so cma-mcp " + "does not expose the --no-log flag.\n\n" + "Invoke when about to act on a domain (file edit, command " + "execution, design decision) and the agent wants to inherit " + "prior context relevant to that action. Without arguments, " + "surfaces the most relevant captures for the current working " + "directory." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "properties": { + "surface": { + "type": "string", + "description": ( + "Filter by domain area. Matches captures whose " + "stored surface equals this value, or whose " + "applies_when predicate (decisions only) " + "substring-matches it." + ), + }, + "file": { + "type": "string", + "description": ( + "Filter by file path (or basename). Matches " + "captures whose `files` field includes this value." + ), + }, + "type": { + "type": "string", + "enum": ["miss", "decision", "rejection", "prevention"], + "description": "Filter by capture type.", + }, + "limit": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Maximum number of results. Default 10.", + }, + }, + }, +} + +CMA_STATS = { + "name": "cma_stats", + "title": "Compound-practice evidence dashboard", + "description": ( + "Compute the evidence dashboard for compound practice over " + "time. Wraps `cma stats`.\n\n" + "Views:\n" + "- `default`: summary (totals, recent activity, top surfaces, " + " top failure shapes, prevention rate, recurrence trends).\n" + "- `leaks`: failures that occurred despite an active warning " + " having been surfaced. Each leak increments the warning's " + " weight. The empirical signal that compound learning is " + " working (or not).\n" + "- `recurrence`: failure shapes ordered by recurrence rate. " + " Identifies preventions that are not working.\n" + "- `preventions`: captured preventions with linked misses. " + " Evidence of the loop closing.\n" + "- `rejections`: active rejections with surfaces, ages, and " + " revisit triggers.\n" + "- `behavior`: behavior pivots from texture-preserved misses " + " (those captured with `intended` and `corrected`). Surfaces " + " patterns where surfaced warnings consistently changed " + " operator behavior.\n\n" + "Invoke when the operator wants quantitative evidence the " + "loop is closing, or when the agent is evaluating which " + "captures matter most for the current work." + ), + "inputSchema": { + "type": "object", + "additionalProperties": False, + "properties": { + "view": { + "type": "string", + "enum": [ + "default", + "leaks", + "recurrence", + "preventions", + "rejections", + "behavior", + ], + "description": ( + "Which view to compute. Defaults to 'default' " + "(summary)." + ), + }, + }, + }, +} + + +# Ordered list (the order defines the sequence in tools/list response). +TOOLS: list[dict[str, Any]] = [ + CMA_MISS, + CMA_DECISION, + CMA_REJECT, + CMA_PREVENTED, + CMA_DISTILL, + CMA_SURFACE, + CMA_STATS, +] + + +# ── resources ───────────────────────────────────────────────────── + +RESOURCES: list[dict[str, Any]] = [ + { + "uri": "cma://decisions", + "name": "decisions", + "title": "Active decisions in scope", + "description": ( + "Architectural and strategic decisions the operator has " + "captured, filtered to those in scope for the current " + "project (plus global-scope decisions). Sorted by recency." + ), + "mimeType": "application/json", + }, + { + "uri": "cma://rejections", + "name": "rejections", + "title": "Active rejections", + "description": ( + "Options the operator has explicitly rejected and the " + "reason. Filtered to the current project. Sorted by " + "recency." + ), + "mimeType": "application/json", + }, + { + "uri": "cma://core", + "name": "core", + "title": "Active core learnings", + "description": ( + "Distilled learnings promoted to permanent surfacing via " + "cma_distill. Retired learnings are filtered out." + ), + "mimeType": "application/json", + }, + { + "uri": "cma://stats", + "name": "stats", + "title": "Compound-practice statistics summary", + "description": ( + "Default stats summary. For specific views (leaks, " + "recurrence, preventions, rejections, behavior), call the " + "cma_stats tool with the view argument." + ), + "mimeType": "application/json", + }, +] + + +def get_tool(name: str) -> dict[str, Any] | None: + """Look up a tool definition by name.""" + for tool in TOOLS: + if tool["name"] == name: + return tool + return None + + +def get_resource(uri: str) -> dict[str, Any] | None: + """Look up a resource definition by URI.""" + for res in RESOURCES: + if res["uri"] == uri: + return res + return None diff --git a/cma-mcp/mcp_server.py b/cma-mcp/mcp_server.py new file mode 100644 index 0000000..5e9f5af --- /dev/null +++ b/cma-mcp/mcp_server.py @@ -0,0 +1,548 @@ +""" +cma-mcp: Model Context Protocol server for the cma compound practice loop. + +This file is the entry point. It wires together: + +- The JSON-RPC over stdio loop (mcp_protocol) +- The tool surface and input schemas (mcp_schema) +- The bash cma subprocess wrapper (cma_subprocess) +- The resource read handlers (mcp_resources) +- The three-section payload composer (mcp_compose) +- Stderr logging (mcp_log) + +What makes this MCP server different from a plain tool wrapper +--------------------------------------------------------------- +Most MCP tools return raw data. cma-mcp returns a structured +epistemic payload with three sections: + + 1. analysis the data (record captured, query results, etc.) + 2. agent_guidance what this tool can and cannot tell the agent, + and how to cite the output faithfully without + paraphrasing it as the agent's own observation + 3. provenance cma-mcp version, wrapped cma binary version, + license, cost (always 0 USD; deterministic), + citation string + +The agent_guidance and provenance blocks exist because an agent +passing cma-mcp output to a user without attribution would strip the +construct-honesty discipline that makes the compound-practice +evidence worth citing. + +Protocol +-------- +Implements the Model Context Protocol over stdio using JSON-RPC 2.0 +line-delimited. No external dependency on an MCP SDK (DECISIONS +AD-001). The protocol surface used here (initialize, tools/list, +tools/call, resources/list, resources/read, ping, notifications) is +small enough that implementing it in-repo keeps cma-mcp +self-contained: no extra install step, no SDK version drift. + +License: Apache-2.0. See LICENSE at repo root. +""" + +from __future__ import annotations + +import argparse +import json +import os +import platform +import subprocess +import sys +from typing import Any + +import cma_subprocess +import mcp_compose +import mcp_log +import mcp_protocol +import mcp_resources +import mcp_schema +from cma_subprocess import CmaError, run_cma + + +# ── version constants ────────────────────────────────────────────── + +# SERVER_VERSION is exposed via the MCP initialize handshake; clients +# see it on connect. Bump on every user-visible capability change. +# Strict M.m.p form (no suffixes here) is enforced by +# tests/test_mcp_server.py::test_server_version_is_strict_semver. +# pyproject.toml carries the PEP 440 .dev0 marker during the dev +# window; at lift the suffix drops and the strings align. +SERVER_NAME = "cma-mcp" +SERVER_VERSION = "0.1.0" +PROTOCOL_VERSION = "2024-11-05" + +# Cross-tool orientation prose for MCP clients whose UI surfaces the +# initialize response's `instructions` field (e.g., Claude Desktop). +# Names the use case, the default invocation shape, and the tool / +# resource set so an agent reading this gets orientation that the +# per-tool descriptions cannot carry. +SERVER_INSTRUCTIONS = ( + "cma-mcp distributes the cma compound practice loop to " + "MCP-compatible clients. Use cma_miss to record a failure worth " + "surfacing later, cma_decision for an architectural choice, " + "cma_reject for an option ruled out, and cma_prevented for a " + "moment where a surfaced warning changed behavior. Use " + "cma_surface before substantive work to inherit relevant prior " + "context (this also logs a surface event used by leak detection). " + "Use cma_stats and the cma:// resources to inspect the corpus. " + "cma-mcp is methodology-agnostic: vocabulary lives in Lodestone " + "(https://github.com/Clarethium/lodestone). Three-section payload " + "(analysis + agent_guidance + provenance) on every response; " + "preserve attribution when relaying tool output to the operator." +) + + +# ── tool dispatch ────────────────────────────────────────────────── + + +def _to_cma_flag(field_name: str) -> str: + """Convert a snake_case schema field name to a `--kebab-case` flag.""" + return "--" + field_name.replace("_", "-") + + +def _build_capture_argv(verb: str, params: dict, optional_fields: list[str]) -> list[str]: + """ + Build argv for a cma capture verb (miss, decision, reject, + prevented). The first positional arg is `description`; named + flags follow. + """ + description = params.get("description", "") + if not isinstance(description, str) or len(description) < 1: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + f"{verb}: description is required and must be a non-empty string", + ) + argv = [verb, description] + for field in optional_fields: + value = params.get(field) + if value is None: + continue + if not isinstance(value, str): + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + f"{verb}: optional field '{field}' must be a string", + ) + argv.extend([_to_cma_flag(field), value]) + return argv + + +def _wrap_cma_call(tool_name: str, argv: list[str]) -> dict: + """Run cma , compose a three-section response.""" + try: + result = run_cma(argv) + except CmaError as exc: + return mcp_compose.compose_error_response( + tool_or_uri=tool_name, + reason=exc.reason, + detail=exc.stderr or str(exc), + ) + return mcp_compose.compose_capture_response( + tool_name=tool_name, + record=None, + cma_stdout=result.stdout, + cma_stderr=result.stderr, + extra_provenance={ + "cma_argv": result.argv, + "cma_returncode": result.returncode, + }, + ) + + +def _handle_cma_miss(params: dict) -> dict: + argv = _build_capture_argv( + "miss", + params, + optional_fields=["surface", "fm", "files", "intended", "corrected", "excerpt"], + ) + return _wrap_cma_call("cma_miss", argv) + + +def _handle_cma_decision(params: dict) -> dict: + argv = _build_capture_argv( + "decision", + params, + optional_fields=["surface", "applies_when"], + ) + return _wrap_cma_call("cma_decision", argv) + + +def _handle_cma_reject(params: dict) -> dict: + argv = _build_capture_argv( + "reject", + params, + optional_fields=["surface", "revisit_when"], + ) + return _wrap_cma_call("cma_reject", argv) + + +def _handle_cma_prevented(params: dict) -> dict: + argv = _build_capture_argv( + "prevented", + params, + optional_fields=["miss_id", "warning_id"], + ) + return _wrap_cma_call("cma_prevented", argv) + + +def _handle_cma_distill(params: dict) -> dict: + mode = params.get("mode", "default") + if mode == "review": + argv = ["distill", "--review"] + elif mode == "retire": + pattern = params.get("pattern") + if not isinstance(pattern, str) or not pattern: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "cma_distill mode=retire: 'pattern' is required", + ) + argv = ["distill", "--retire", pattern] + elif mode == "default": + description = params.get("description") + if not isinstance(description, str) or len(description) < 8: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "cma_distill mode=default: 'description' is required (min 8 chars)", + ) + argv = ["distill", description] + scope = params.get("scope") + if scope: + argv.extend(["--scope", scope]) + surface = params.get("surface") + if surface: + argv.extend(["--surface", surface]) + else: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + f"cma_distill: unknown mode '{mode}'", + ) + return _wrap_cma_call("cma_distill", argv) + + +def _handle_cma_surface(params: dict) -> dict: + argv = ["surface"] + surface = params.get("surface") + if surface: + argv.extend(["--surface", surface]) + file_arg = params.get("file") + if file_arg: + argv.extend(["--file", file_arg]) + type_arg = params.get("type") + if type_arg: + argv.extend(["--type", type_arg]) + limit = params.get("limit") + if limit is not None: + if not isinstance(limit, int) or limit < 1 or limit > 50: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "cma_surface: 'limit' must be an integer between 1 and 50", + ) + argv.extend(["--limit", str(limit)]) + + try: + result = run_cma(argv) + except CmaError as exc: + return mcp_compose.compose_error_response( + tool_or_uri="cma_surface", + reason=exc.reason, + detail=exc.stderr or str(exc), + ) + return mcp_compose.compose_surface_response( + matched=[], # cma's stdout carries the rendering; structured matched-list reserved for v0.2 + cma_stdout=result.stdout, + cma_stderr=result.stderr, + filters={k: v for k, v in params.items() if v is not None}, + extra_provenance={ + "cma_argv": result.argv, + "cma_returncode": result.returncode, + }, + ) + + +def _handle_cma_stats(params: dict) -> dict: + view = params.get("view", "default") + valid_views = {"default", "leaks", "recurrence", "preventions", "rejections", "behavior"} + if view not in valid_views: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + f"cma_stats: unknown view '{view}' (valid: {sorted(valid_views)})", + ) + argv = ["stats"] + if view != "default": + argv.append(f"--{view}") + try: + result = run_cma(argv) + except CmaError as exc: + return mcp_compose.compose_error_response( + tool_or_uri="cma_stats", + reason=exc.reason, + detail=exc.stderr or str(exc), + ) + return mcp_compose.compose_stats_response( + view=view, + cma_stdout=result.stdout, + cma_stderr=result.stderr, + extra_provenance={ + "cma_argv": result.argv, + "cma_returncode": result.returncode, + }, + ) + + +_TOOL_HANDLERS = { + "cma_miss": _handle_cma_miss, + "cma_decision": _handle_cma_decision, + "cma_reject": _handle_cma_reject, + "cma_prevented": _handle_cma_prevented, + "cma_distill": _handle_cma_distill, + "cma_surface": _handle_cma_surface, + "cma_stats": _handle_cma_stats, +} + + +# ── MCP method handlers ───────────────────────────────────────────── + + +def _handle_initialize(params: dict) -> dict: + """Initialize handshake: return server identity and capabilities.""" + mcp_log.info( + "initialize", + client_protocol=params.get("protocolVersion"), + client_name=(params.get("clientInfo") or {}).get("name"), + ) + return { + "protocolVersion": PROTOCOL_VERSION, + "capabilities": { + "tools": {"listChanged": False}, + "resources": {"listChanged": False, "subscribe": False}, + }, + "serverInfo": {"name": SERVER_NAME, "version": SERVER_VERSION}, + "instructions": SERVER_INSTRUCTIONS, + } + + +def _handle_tools_list(_: dict) -> dict: + """Return the registered tool list.""" + return { + "tools": [ + { + "name": t["name"], + "title": t.get("title", t["name"]), + "description": t["description"], + "inputSchema": t["inputSchema"], + } + for t in mcp_schema.TOOLS + ] + } + + +def _handle_tools_call(params: dict) -> dict: + """Dispatch a tool call by name, return MCP `content`-shaped result.""" + name = params.get("name") + if not isinstance(name, str): + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "tools/call: 'name' is required", + ) + args = params.get("arguments") or {} + if not isinstance(args, dict): + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "tools/call: 'arguments' must be an object", + ) + + handler = _TOOL_HANDLERS.get(name) + if handler is None: + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + f"tools/call: unknown tool '{name}'", + ) + + mcp_log.info("tool_call", tool=name) + payload = handler(args) + is_error = bool(payload.get("analysis", {}).get("error")) + return { + "content": [ + { + "type": "text", + "text": json.dumps(payload, ensure_ascii=False, indent=2), + } + ], + "isError": is_error, + } + + +def _handle_resources_list(_: dict) -> dict: + """Return the registered resource list.""" + return { + "resources": [ + { + "uri": r["uri"], + "name": r["name"], + "title": r.get("title", r["name"]), + "description": r["description"], + "mimeType": r["mimeType"], + } + for r in mcp_schema.RESOURCES + ] + } + + +def _handle_resources_read(params: dict) -> dict: + """Dispatch a resource read by URI.""" + uri = params.get("uri") + if not isinstance(uri, str): + raise mcp_protocol.ProtocolError( + mcp_protocol.INVALID_PARAMS, + "resources/read: 'uri' is required", + ) + if mcp_schema.get_resource(uri) is None: + raise mcp_protocol.ProtocolError( + mcp_protocol.RESOURCE_NOT_FOUND, + f"resources/read: unknown resource uri '{uri}'", + ) + mcp_log.info("resource_read", uri=uri) + payload = mcp_resources.read(uri) + return { + "contents": [ + { + "uri": uri, + "mimeType": "application/json", + "text": json.dumps(payload, ensure_ascii=False, indent=2), + } + ] + } + + +def _handle_ping(_: dict) -> dict: + return {} + + +def _handle_notification_initialized(_: dict) -> None: + mcp_log.info("client_initialized") + + +# ── server bootstrap ──────────────────────────────────────────────── + + +def _git_sha() -> str | None: + """Resolve the repo's git SHA + dirty flag, or None if unavailable. + + Two paths: the runtime probe (works in a development clone) and a + build-time-baked fallback (works for installs from a wheel where + `.git` no longer sits next to the script). The fallback ships in + `_build_info.py`, generated by `setup.py` at sdist/wheel build time. + """ + script_dir = os.path.dirname(os.path.abspath(__file__)) + try: + sha = subprocess.check_output( + ["git", "rev-parse", "HEAD"], + cwd=script_dir, + stderr=subprocess.DEVNULL, + timeout=2, + ).decode("utf-8").strip() + try: + dirty = subprocess.check_output( + ["git", "status", "--porcelain"], + cwd=script_dir, + stderr=subprocess.DEVNULL, + timeout=2, + ).decode("utf-8").strip() + if dirty: + sha = sha + "+dirty" + except Exception: + pass + return sha + except Exception: + pass + try: + from _build_info import BUILD_GIT_SHA # type: ignore[import-not-found] + except ImportError: + return None + sha = (BUILD_GIT_SHA or "").strip() + return sha or None + + +def _build_dispatcher() -> mcp_protocol.Dispatcher: + d = mcp_protocol.Dispatcher() + d.on_request("initialize", _handle_initialize) + d.on_request("tools/list", _handle_tools_list) + d.on_request("tools/call", _handle_tools_call) + d.on_request("resources/list", _handle_resources_list) + d.on_request("resources/read", _handle_resources_read) + d.on_request("ping", _handle_ping) + d.on_notification("notifications/initialized", _handle_notification_initialized) + return d + + +def _emit_version_fingerprint() -> None: + """ + Print a single-line install fingerprint covering server_version, + protocol, git_sha (with +dirty flag), cma_binary_version, python, + and script path. Lets an operator confirm the cma-mcp install + configured in their MCP client is the expected one. + """ + fingerprint = { + "server_name": SERVER_NAME, + "server_version": SERVER_VERSION, + "protocol_version": PROTOCOL_VERSION, + "git_sha": _git_sha(), + "cma_binary_version": cma_subprocess.cma_version(), + "python": platform.python_version(), + "script": os.path.abspath(__file__), + } + print(json.dumps(fingerprint, ensure_ascii=False)) + + +def _emit_test_payload() -> None: + """ + Offline sanity check: print the three-section payload for a + canned cma_stats default-view call. Useful to verify pipeline + wiring without an MCP client. Skips gracefully if cma is missing. + """ + mcp_compose.configure_provenance( + server_name=SERVER_NAME, + server_version=SERVER_VERSION, + protocol_version=PROTOCOL_VERSION, + git_sha=_git_sha(), + ) + payload = _handle_cma_stats({"view": "default"}) + print(json.dumps(payload, ensure_ascii=False, indent=2)) + + +def cli() -> int: + """ + Console entry point. Without flags: speaks MCP over stdio. + With --version or --test: prints the requested artifact and exits. + """ + parser = argparse.ArgumentParser(prog="cma-mcp", add_help=True) + parser.add_argument( + "--version", + action="store_true", + help="emit a single-line install fingerprint and exit", + ) + parser.add_argument( + "--test", + action="store_true", + help="emit a canned three-section payload and exit (no MCP client required)", + ) + args = parser.parse_args() + + if args.version: + _emit_version_fingerprint() + return 0 + if args.test: + _emit_test_payload() + return 0 + + mcp_compose.configure_provenance( + server_name=SERVER_NAME, + server_version=SERVER_VERSION, + protocol_version=PROTOCOL_VERSION, + git_sha=_git_sha(), + ) + mcp_log.info("server_start", version=SERVER_VERSION) + return _build_dispatcher().serve_forever() + + +if __name__ == "__main__": + sys.exit(cli()) diff --git a/cma-mcp/pyproject.toml b/cma-mcp/pyproject.toml new file mode 100644 index 0000000..3dff919 --- /dev/null +++ b/cma-mcp/pyproject.toml @@ -0,0 +1,112 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "cma-mcp" +version = "0.1.0.dev0" +description = "Model Context Protocol server for the cma compound practice loop. Subprocess wrapper around the canonical bash cma binary; methodology-agnostic substrate; three-section payload (analysis + agent_guidance + provenance) on every response." +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +authors = [ + {name = "Lovro Lucic", email = "lovro.lucic@gmail.com"}, +] +keywords = [ + "mcp", + "model-context-protocol", + "compound-practice", + "failure-capture", + "decision-tracking", + "prevention", + "lodestone", + "cma", + "clarethium", + "claude-desktop", + "cursor", +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development", + "Topic :: System :: Logging", +] +# Runtime dependencies are intentionally empty: cma-mcp uses Python +# standard library only. The MCP protocol is implemented manually +# (DECISIONS AD-001), and the bash cma binary is invoked as a +# subprocess (STRATEGY DD-1, DECISIONS AD-004). +dependencies = [] + +[project.optional-dependencies] +test = [ + "pytest>=7.0", + "pytest-timeout>=2.0", + "pytest-cov>=4.0", +] + +[project.urls] +Homepage = "https://github.com/Clarethium/cma/tree/main/cma-mcp" +Repository = "https://github.com/Clarethium/cma" +Issues = "https://github.com/Clarethium/cma/issues" +Changelog = "https://github.com/Clarethium/cma/blob/main/cma-mcp/CHANGELOG.md" +Security = "https://github.com/Clarethium/cma/blob/main/SECURITY.md" +Strategy = "https://github.com/Clarethium/cma/blob/main/STRATEGY.md" +Decisions = "https://github.com/Clarethium/cma/blob/main/DECISIONS.md" +"MCP Server Reference" = "https://github.com/Clarethium/cma/blob/main/cma-mcp/docs/MCP_SERVER.md" +"cma (the wrapped CLI, same repo)" = "https://github.com/Clarethium/cma" +"Methodology Canon (Lodestone)" = "https://github.com/Clarethium/lodestone" + +[project.scripts] +cma-mcp = "mcp_server:cli" + +[tool.setuptools] +# Flat-modules layout matching frame-check-mcp's wheel-bundle +# convention. Each *.py at the repo root ships as a top-level import +# on the installed wheel. +py-modules = [ + "mcp_server", + "mcp_protocol", + "mcp_schema", + "mcp_resources", + "mcp_compose", + "mcp_log", + "cma_subprocess", + "cma_jsonl", + "_build_info", +] + +# Apache-2.0 §4(d) requires a readable copy of NOTICE in distributed +# artifacts. license-files instructs setuptools to bundle both +# LICENSE and NOTICE in the wheel's `*.dist-info` metadata so PyPI +# operators receive the attribution notices unconditionally. +license-files = ["LICENSE", "NOTICE"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = "-q --strict-markers" +markers = [ + "subprocess: tests that invoke the bash cma binary as a real subprocess (skipped if cma not on PATH)", + "adversarial: boundary, malformed, and abuse-path tests", +] + +[tool.coverage.run] +source = [ + "mcp_server", + "mcp_protocol", + "mcp_schema", + "mcp_resources", + "mcp_compose", + "mcp_log", + "cma_subprocess", + "cma_jsonl", +] diff --git a/cma-mcp/setup.py b/cma-mcp/setup.py new file mode 100644 index 0000000..b6cb384 --- /dev/null +++ b/cma-mcp/setup.py @@ -0,0 +1,84 @@ +"""Build-time hook: bake the repo git SHA into _build_info.py. + +The pyproject.toml file is the authoritative metadata source — this +shim runs only during sdist/wheel builds (and editable installs) so +the resulting artifact carries the git SHA it was built from. After +`pip install` from a wheel there is no `.git` directory next to the +installed module, so the runtime probe in `mcp_server._git_sha()` +returns None; the baked SHA fills that gap and preserves the +forensic-traceability claim of the install fingerprint. + +Resolution order (first non-empty wins): + +1. CMA_MCP_BUILD_SHA environment variable. CI sets this to + $GITHUB_SHA before `python -m build` so PyPA's PEP 517 build + isolation (which copies sources into a temp dir without `.git`) + does not lose the SHA. +2. `git rev-parse HEAD` against this file's directory and its + parent. Catches the editable-install path (`pip install -e .`) + where setup.py runs in the live source tree and `git` walks up + to the worktree root. +3. Empty string. The runtime probe will return `git_sha: null` + and the install fingerprint reports the missing trace honestly. + +The generated file is regenerated on every build and `.gitignore`d +so local development never commits a stale value. +""" + +import os +import subprocess +from pathlib import Path + +from setuptools import setup + + +def _probe_git_sha(cwd: Path) -> str: + try: + sha = subprocess.check_output( + ["git", "rev-parse", "HEAD"], + cwd=cwd, + stderr=subprocess.DEVNULL, + timeout=5, + ).decode("utf-8").strip() + except Exception: + return "" + if not sha: + return "" + try: + dirty = subprocess.check_output( + ["git", "status", "--porcelain"], + cwd=cwd, + stderr=subprocess.DEVNULL, + timeout=5, + ).decode("utf-8").strip() + if dirty: + sha = sha + "+dirty" + except Exception: + pass + return sha + + +def _resolve_git_sha() -> str: + env_sha = os.environ.get("CMA_MCP_BUILD_SHA", "").strip() + if env_sha: + return env_sha + here = Path(__file__).parent.resolve() + for candidate in (here, here.parent): + sha = _probe_git_sha(candidate) + if sha: + return sha + return "" + + +def _write_build_info() -> None: + sha = _resolve_git_sha() + target = Path(__file__).parent / "_build_info.py" + target.write_text( + '"""Auto-generated at build time. Do not edit. Do not commit."""\n' + f'BUILD_GIT_SHA = "{sha}"\n', + encoding="utf-8", + ) + + +_write_build_info() +setup() diff --git a/cma-mcp/tests/conftest.py b/cma-mcp/tests/conftest.py new file mode 100644 index 0000000..83a05df --- /dev/null +++ b/cma-mcp/tests/conftest.py @@ -0,0 +1,172 @@ +""" +Shared pytest fixtures. + +The flat-modules layout (per pyproject.toml [tool.setuptools] +py-modules) means cma-mcp's source files live at the repo root, not +under a package directory. Tests sit in tests/ and reach the modules +via sys.path injection here. +""" + +from __future__ import annotations + +import json +import os +import shutil +import sys +from pathlib import Path + +import pytest + + +_REPO_ROOT = Path(__file__).resolve().parent.parent +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + + +# ── env isolation ───────────────────────────────────────────────── + + +@pytest.fixture +def isolated_cma_dir(tmp_path, monkeypatch): + """ + Point CMA_DIR at a temp directory so JSONL reads run against a + known-empty corpus. Does not invoke the bash cma binary; tests + that need the binary use the `cma_binary_available` fixture. + """ + monkeypatch.setenv("CMA_DIR", str(tmp_path)) + yield tmp_path + + +@pytest.fixture +def seeded_cma_dir(isolated_cma_dir): + """ + Returns a temp CMA_DIR pre-seeded with a small, schema-1.0 corpus + covering each record type. Tests build assertions against this + fixed seed. + """ + seeds = { + "misses.jsonl": [ + { + "schema_version": "1.0", + "type": "miss", + "id": "20260501-100000-aaaa1111", + "timestamp": "2026-05-01T10:00:00Z", + "description": "claimed verified without testing the cross-tenant write path", + "surface": "auth", + "fm": "FM-3", + }, + ], + "decisions.jsonl": [ + { + "schema_version": "1.0", + "type": "decision", + "id": "20260502-110000-bbbb2222", + "timestamp": "2026-05-02T11:00:00Z", + "description": "AUTH: JWT over sessions because stateless scales horizontally", + "surface": "auth", + "applies_when": "auth jwt", + }, + ], + "rejections.jsonl": [ + { + "schema_version": "1.0", + "type": "rejection", + "id": "20260503-120000-cccc3333", + "timestamp": "2026-05-03T12:00:00Z", + "description": "GraphQL: overhead for this project", + "surface": "api", + "revisit_when": "if mobile clients are added", + }, + ], + "preventions.jsonl": [ + { + "schema_version": "1.0", + "type": "prevention", + "id": "20260504-130000-dddd4444", + "timestamp": "2026-05-04T13:00:00Z", + "description": "almost claimed verified, ran the cross-tenant test instead", + "miss_id": "20260501-100000-aaaa1111", + }, + ], + "core.jsonl": [ + { + "schema_version": "1.0", + "type": "core", + "id": "20260301-090000-eeee5555", + "timestamp": "2026-03-01T09:00:00Z", + "description": "Always check JWT expiration in auth middleware", + "scope": "general", + "surface": "auth", + }, + { + "schema_version": "1.0", + "type": "core", + "id": "20260302-090000-ffff6666", + "timestamp": "2026-03-02T09:00:00Z", + "description": "Centralize model identifiers to config; never hardcode", + "scope": "general", + "surface": "general", + }, + { + "schema_version": "1.0", + "type": "retirement", + "id": "20260401-100000-9999aaaa", + "timestamp": "2026-04-01T10:00:00Z", + "retires": "20260302-090000-ffff6666", + "pattern": "model identifiers", + }, + ], + } + for filename, records in seeds.items(): + path = isolated_cma_dir / filename + with open(path, "w", encoding="utf-8") as fh: + for r in records: + fh.write(json.dumps(r) + "\n") + yield isolated_cma_dir + + +# ── subprocess availability ────────────────────────────────────── + + +@pytest.fixture(scope="session") +def cma_binary_available() -> bool: + """True iff the canonical cma binary is on PATH.""" + return shutil.which("cma") is not None + + +# ── helpers ───────────────────────────────────────────────────────── + + +@pytest.fixture +def fresh_dispatcher(monkeypatch): + """ + Build a Dispatcher with the server's handlers wired, with + provenance configured. Tests can call dispatcher._dispatch_one + or build a JSON-RPC line and feed it via the same path. + """ + # Re-import so any module-level state is fresh. + import importlib + + import mcp_compose + import mcp_server + + importlib.reload(mcp_compose) + importlib.reload(mcp_server) + + mcp_compose.configure_provenance( + server_name=mcp_server.SERVER_NAME, + server_version=mcp_server.SERVER_VERSION, + protocol_version=mcp_server.PROTOCOL_VERSION, + git_sha=None, + ) + return mcp_server._build_dispatcher() + + +def call_handler(dispatcher, method: str, params: dict | None = None) -> dict: + """ + Invoke a request handler and return its result body. + Bypasses the JSON-RPC envelope serializer for direct assertions. + """ + handler = dispatcher._request_handlers.get(method) + assert handler is not None, f"no handler registered for {method}" + return handler(params or {}) diff --git a/cma-mcp/tests/test_mcp_server.py b/cma-mcp/tests/test_mcp_server.py new file mode 100644 index 0000000..17de6ba --- /dev/null +++ b/cma-mcp/tests/test_mcp_server.py @@ -0,0 +1,203 @@ +""" +MCP protocol conformance tests. + +Pin the surface that clients see: initialize handshake, tools/list, +resources/list, ping, error envelopes for unknown methods and +invalid params. These tests do not exercise bash cma; tool dispatch +that runs the subprocess lives in test_subprocess.py. +""" + +from __future__ import annotations + +import json + +import mcp_protocol +import mcp_server +from conftest import call_handler + + +def test_initialize_returns_protocol_and_serverinfo(fresh_dispatcher): + result = call_handler(fresh_dispatcher, "initialize", {"protocolVersion": "2024-11-05"}) + assert result["protocolVersion"] == mcp_server.PROTOCOL_VERSION + assert result["serverInfo"]["name"] == "cma-mcp" + assert result["serverInfo"]["version"] == mcp_server.SERVER_VERSION + assert "capabilities" in result + assert result["capabilities"]["tools"]["listChanged"] is False + assert result["capabilities"]["resources"]["listChanged"] is False + # The instructions field is the cross-tool orientation prose; + # clients with UI may surface it. + assert isinstance(result["instructions"], str) + assert "cma-mcp" in result["instructions"] + assert "cma_miss" in result["instructions"] + + +def test_server_version_is_strict_semver(): + """Per CHANGELOG / SERVER_VERSION discipline, version is M.m.p only.""" + parts = mcp_server.SERVER_VERSION.split(".") + assert len(parts) == 3, f"SERVER_VERSION must be M.m.p; got {mcp_server.SERVER_VERSION}" + for p in parts: + assert p.isdigit(), f"SERVER_VERSION component must be digits; got {p}" + + +def test_tools_list_carries_seven_tools(fresh_dispatcher): + result = call_handler(fresh_dispatcher, "tools/list") + names = sorted(t["name"] for t in result["tools"]) + assert names == sorted([ + "cma_miss", + "cma_decision", + "cma_reject", + "cma_prevented", + "cma_distill", + "cma_surface", + "cma_stats", + ]) + # Every tool carries description + inputSchema fields the MCP spec requires. + for tool in result["tools"]: + assert isinstance(tool["description"], str) and len(tool["description"]) > 50 + assert tool["inputSchema"]["type"] == "object" + + +def test_tool_descriptions_reference_lodestone_for_methodology(fresh_dispatcher): + """ + Per STRATEGY DD-4: cma-mcp does not bundle Lodestone vocabulary. + The fm field on cma_miss (and cma_prevented) is where FM tagging + surfaces; that field's description must reference Lodestone as + the canonical methodology rather than enumerating the catalog. + """ + result = call_handler(fresh_dispatcher, "tools/list") + cma_miss = next(t for t in result["tools"] if t["name"] == "cma_miss") + fm_field_desc = cma_miss["inputSchema"]["properties"]["fm"]["description"] + assert "lodestone" in fm_field_desc.lower(), ( + "fm field must point to Lodestone as the canonical methodology" + ) + # The description must NOT define what each FM means (bundling the + # catalog inverts canon-vs-companion separation per STRATEGY DD-4). + # Brief reference to FM-1..10 as an example tag namespace is OK; + # an enumeration of definitions is not. We probe by checking for + # the disambiguation prose ("Speed Over Understanding", etc.) that + # would only appear if the catalog were bundled. + forbidden_definitions = [ + "Speed Over Understanding", + "Component Over Journey", + "Happy Path Only", + "Assumption Over Verification", + ] + for definition in forbidden_definitions: + assert definition not in fm_field_desc, ( + f"fm description bundles Lodestone vocabulary ({definition!r}); " + "remove the definition and reference Lodestone instead" + ) + + +def test_resources_list_carries_four_resources(fresh_dispatcher): + result = call_handler(fresh_dispatcher, "resources/list") + uris = sorted(r["uri"] for r in result["resources"]) + assert uris == sorted([ + "cma://decisions", + "cma://rejections", + "cma://core", + "cma://stats", + ]) + + +def test_ping_returns_empty(fresh_dispatcher): + assert call_handler(fresh_dispatcher, "ping") == {} + + +def test_unknown_method_emits_method_not_found_via_dispatch_one( + fresh_dispatcher, capsys +): + line = json.dumps({"jsonrpc": "2.0", "id": 1, "method": "does/not/exist"}) + fresh_dispatcher._dispatch_one(line) + out = capsys.readouterr().out.strip() + assert out, "dispatcher must emit a response on stdout" + payload = json.loads(out) + assert payload["id"] == 1 + assert payload["error"]["code"] == mcp_protocol.METHOD_NOT_FOUND + + +def test_invalid_jsonrpc_line_emits_parse_error(fresh_dispatcher, capsys): + fresh_dispatcher._dispatch_one("this is not json") + out = capsys.readouterr().out.strip() + payload = json.loads(out) + assert payload["id"] is None + assert payload["error"]["code"] == mcp_protocol.PARSE_ERROR + + +def test_resources_read_unknown_uri_emits_resource_not_found(fresh_dispatcher): + import pytest + + with pytest.raises(mcp_protocol.ProtocolError) as excinfo: + call_handler(fresh_dispatcher, "resources/read", {"uri": "cma://nope"}) + assert excinfo.value.code == mcp_protocol.RESOURCE_NOT_FOUND + + +def test_tools_call_unknown_tool_is_invalid_params(fresh_dispatcher): + import pytest + + with pytest.raises(mcp_protocol.ProtocolError) as excinfo: + call_handler( + fresh_dispatcher, + "tools/call", + {"name": "cma_does_not_exist", "arguments": {}}, + ) + assert excinfo.value.code == mcp_protocol.INVALID_PARAMS + + +def test_tools_call_missing_name_is_invalid_params(fresh_dispatcher): + import pytest + + with pytest.raises(mcp_protocol.ProtocolError) as excinfo: + call_handler(fresh_dispatcher, "tools/call", {"arguments": {}}) + assert excinfo.value.code == mcp_protocol.INVALID_PARAMS + + +def test_initialize_notification_does_not_crash(fresh_dispatcher): + handler = fresh_dispatcher._notification_handlers.get("notifications/initialized") + assert handler is not None + # Notifications return None and must not raise. + assert handler({}) is None + + +def test_git_sha_falls_back_to_baked_build_info(monkeypatch, tmp_path): + """When the runtime git probe fails (PyPI install layout — no `.git` + next to the script), `_git_sha()` must fall back to the build-time + value baked into `_build_info.BUILD_GIT_SHA` by `setup.py`. + + Without this fallback, the install fingerprint silently degrades + to `git_sha: null` on the most common install path, weakening the + forensic-traceability claim documented in cma-mcp/README.md. + """ + import subprocess as _subprocess + import sys + + def _always_fail(*args, **kwargs): + raise FileNotFoundError("git probe disabled for this test") + + monkeypatch.setattr(_subprocess, "check_output", _always_fail) + + fake_module = type(sys)("_build_info") + fake_module.BUILD_GIT_SHA = "deadbeefcafefade1234567890abcdef00000000" + monkeypatch.setitem(sys.modules, "_build_info", fake_module) + + assert mcp_server._git_sha() == "deadbeefcafefade1234567890abcdef00000000" + + +def test_git_sha_returns_none_when_no_runtime_and_no_baked(monkeypatch): + """If the runtime probe fails AND no `_build_info` is importable + (or its baked SHA is empty), `_git_sha()` must return None so the + fingerprint surfaces the missing trace as `git_sha: null` honestly. + """ + import subprocess as _subprocess + import sys + + def _always_fail(*args, **kwargs): + raise FileNotFoundError("git probe disabled for this test") + + monkeypatch.setattr(_subprocess, "check_output", _always_fail) + + empty_module = type(sys)("_build_info") + empty_module.BUILD_GIT_SHA = "" + monkeypatch.setitem(sys.modules, "_build_info", empty_module) + + assert mcp_server._git_sha() is None diff --git a/cma-mcp/tests/test_mcp_wire.py b/cma-mcp/tests/test_mcp_wire.py new file mode 100644 index 0000000..59c7325 --- /dev/null +++ b/cma-mcp/tests/test_mcp_wire.py @@ -0,0 +1,320 @@ +""" +Wire-protocol subprocess tests. + +The other test files dispatch in-process — they exercise handler +logic but never push bytes through real stdin/stdout pipes. This +file closes the gap documented in `docs/ANTICIPATED_CRITIQUES.md` +C-8: spawn cma-mcp as a real subprocess, exchange JSON-RPC over +the standard MCP transport, and pin the framing-level invariants +that an in-process dispatcher cannot see. + +Mirrors frame-check-mcp's `test_mcp_adversarial.py` pattern: +construct a real client→server→client roundtrip, fire malformed +and rapid-fire inputs at it, and confirm the server stays +responsive with well-formed JSON-RPC error envelopes throughout. + +Tests skip when the bash cma binary is not on PATH because tools/call +exercises the subprocess wrapper end-to-end. ping/initialize/list +methods do not need the binary. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Iterator + +import pytest + + +HERE = Path(__file__).resolve().parent.parent +SERVER_PATH = HERE / "mcp_server.py" +DEFAULT_TIMEOUT_S = 5.0 + + +class WireServer: + """A real cma-mcp subprocess driven over stdin/stdout pipes. + + Reads one JSON-RPC line per response. Notifications produce no + response. The class deliberately stays minimal — it is the + test's leverage point, not a general-purpose MCP client. + """ + + def __init__(self, cma_dir: Path): + env = os.environ.copy() + env["CMA_DIR"] = str(cma_dir) + self.proc = subprocess.Popen( + [sys.executable, str(SERVER_PATH)], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + bufsize=0, + ) + self._next_id = 1 + + def send_line(self, line: str) -> None: + """Write an arbitrary line to the server's stdin (no framing).""" + self.proc.stdin.write((line + "\n").encode("utf-8")) + self.proc.stdin.flush() + + def send_request(self, method: str, params: dict | None = None) -> int: + """Send a JSON-RPC request, return its id.""" + req_id = self._next_id + self._next_id += 1 + line = json.dumps( + {"jsonrpc": "2.0", "id": req_id, "method": method, + "params": params or {}} + ) + self.send_line(line) + return req_id + + def send_notification(self, method: str, params: dict | None = None) -> None: + line = json.dumps({"jsonrpc": "2.0", "method": method, + "params": params or {}}) + self.send_line(line) + + def read_reply(self, timeout_s: float = DEFAULT_TIMEOUT_S) -> dict: + """Read one JSON-RPC line. Raises if the server closes the pipe.""" + deadline = time.monotonic() + timeout_s + while time.monotonic() < deadline: + line = self.proc.stdout.readline() + if line: + return json.loads(line) + if self.proc.poll() is not None: + stderr = self.proc.stderr.read().decode("utf-8", errors="replace") + raise AssertionError( + f"server exited with code {self.proc.returncode}; " + f"stderr=\n{stderr}" + ) + raise AssertionError(f"no reply within {timeout_s}s") + + def call(self, method: str, params: dict | None = None) -> dict: + """Send a request and read its reply. Convenience for the common case.""" + req_id = self.send_request(method, params) + reply = self.read_reply() + assert reply.get("id") == req_id, ( + f"id mismatch: sent {req_id}, got {reply.get('id')}" + ) + return reply + + def close(self) -> None: + try: + if self.proc.stdin and not self.proc.stdin.closed: + self.proc.stdin.close() + except Exception: + pass + try: + self.proc.wait(timeout=2) + except subprocess.TimeoutExpired: + self.proc.kill() + self.proc.wait(timeout=2) + + +@contextmanager +def wire_server(cma_dir: Path) -> Iterator[WireServer]: + server = WireServer(cma_dir) + try: + yield server + finally: + server.close() + + +# ── handshake & negotiation ──────────────────────────────────────── + + +def test_initialize_handshake_over_real_stdio(tmp_path): + with wire_server(tmp_path) as server: + reply = server.call("initialize", { + "protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "wire-test", "version": "0"}, + }) + assert reply["jsonrpc"] == "2.0" + result = reply["result"] + assert result["protocolVersion"] == "2024-11-05" + assert result["serverInfo"]["name"] == "cma-mcp" + assert "capabilities" in result + # Instructions field surfaces cross-tool orientation prose. + assert "cma-mcp" in result["instructions"] + + +def test_initialized_notification_does_not_emit_a_response(tmp_path): + """Notifications are one-way per JSON-RPC 2.0; no reply must + appear on stdout, even if the server logs them on stderr.""" + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + server.send_notification("notifications/initialized", {}) + # If the server incorrectly emitted a response, the next + # request's reply would have a stale id. Send a real ping + # and confirm we get exactly the matching id back. + ping_reply = server.call("ping") + assert "result" in ping_reply + assert ping_reply["result"] == {} + + +# ── catalog discovery ──────────────────────────────────────────── + + +def test_tools_list_returns_seven_tools_over_wire(tmp_path): + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("tools/list") + names = sorted(t["name"] for t in reply["result"]["tools"]) + assert names == sorted([ + "cma_miss", "cma_decision", "cma_reject", "cma_prevented", + "cma_distill", "cma_surface", "cma_stats", + ]) + + +def test_resources_list_returns_four_resources_over_wire(tmp_path): + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("resources/list") + uris = sorted(r["uri"] for r in reply["result"]["resources"]) + assert uris == sorted([ + "cma://core", "cma://decisions", + "cma://rejections", "cma://stats", + ]) + + +# ── error envelope discipline ──────────────────────────────────── + + +def test_unknown_method_emits_method_not_found_envelope(tmp_path): + with wire_server(tmp_path) as server: + reply = server.call("does/not/exist") + assert "error" in reply + # JSON-RPC 2.0 method-not-found is -32601. + assert reply["error"]["code"] == -32601 + assert "result" not in reply + + +def test_malformed_json_line_emits_parse_error_and_server_stays_alive(tmp_path): + """A garbage line must produce a parse-error envelope (id null + per JSON-RPC 2.0), and the server must remain responsive to + subsequent valid requests.""" + with wire_server(tmp_path) as server: + server.send_line("this is not json {") + reply = server.read_reply() + assert reply["id"] is None + assert reply["error"]["code"] == -32700 # PARSE_ERROR + # Server must still answer a clean request afterwards. + ping = server.call("ping") + assert "result" in ping + + +def test_tools_call_unknown_tool_emits_invalid_params(tmp_path): + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("tools/call", { + "name": "cma_does_not_exist", + "arguments": {}, + }) + assert "error" in reply + # MCP servers map "unknown tool name" to JSON-RPC invalid-params + # (-32602) per MCP convention. + assert reply["error"]["code"] == -32602 + + +def test_resources_read_unknown_uri_emits_resource_not_found(tmp_path): + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("resources/read", {"uri": "cma://nope"}) + assert "error" in reply + # MCP-specific code for resource-not-found. + assert reply["error"]["code"] == -32002 + + +# ── framing robustness ──────────────────────────────────────────── + + +def test_rapid_fire_sequential_requests_keep_correct_id_pairing(tmp_path): + """Send 10 requests back-to-back without waiting between sends. + The server must read them in order and reply in order. Pinning + this catches framing-level bugs (e.g., a buffered reader that + swallows or merges lines).""" + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + sent_ids = [server.send_request("ping") for _ in range(10)] + replies = [server.read_reply() for _ in sent_ids] + for sent, reply in zip(sent_ids, replies): + assert reply["id"] == sent, ( + f"out-of-order reply: expected id {sent}, got {reply}" + ) + assert "result" in reply + + +def test_oversized_request_does_not_crash_the_server(tmp_path): + """A 64 KiB description on cma_miss must either round-trip or + error cleanly via JSON-RPC; the server must not exit, hang, or + emit malformed output.""" + big = "x" * (64 * 1024) + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("tools/call", { + "name": "cma_miss", + "arguments": {"description": big, "surface": "general"}, + }, ) + # We tolerate either path: a clean isError payload (cma + # rejected the input) or a successful capture. Not allowed: + # absent reply, mangled JSON, server crash. + assert "result" in reply or "error" in reply + # Server must remain responsive. + ping = server.call("ping") + assert "result" in ping + + +def test_server_continues_after_a_burst_of_malformed_lines(tmp_path): + """Three malformed lines in a row, then a valid request. The + server must answer all three with parse-error envelopes (id + null) and then return a normal result for the valid request.""" + with wire_server(tmp_path) as server: + for _ in range(3): + server.send_line("garbage }{") + reply = server.read_reply() + assert reply["id"] is None + assert reply["error"]["code"] == -32700 + ping = server.call("ping") + assert "result" in ping + + +# ── tool dispatch end-to-end (requires bash cma) ─────────────────── + + +@pytest.mark.subprocess +def test_tools_call_cma_stats_round_trips_three_section_payload( + tmp_path, cma_binary_available, +): + """Full-stack invocation: client → cma-mcp dispatch → bash cma + subprocess → mcp_compose → JSON-RPC reply. Pins the empty-corpus + case (no captures yet) so the test does not depend on the + operator's data.""" + if not cma_binary_available: + pytest.skip("cma binary not on PATH") + with wire_server(tmp_path) as server: + server.call("initialize", {"protocolVersion": "2024-11-05"}) + reply = server.call("tools/call", { + "name": "cma_stats", + "arguments": {"view": "default"}, + }) + result = reply["result"] + # MCP tools/call returns content[] with type="text" carrying + # the JSON-stringified payload. + assert "content" in result + text = result["content"][0]["text"] + payload = json.loads(text) + assert set(payload.keys()) >= {"analysis", "agent_guidance", + "provenance"} + assert payload["provenance"]["server_name"] == "cma-mcp" + assert payload["provenance"]["cost_usd"] == 0.0 + assert payload["provenance"]["deterministic"] is True + # cma_argv is the audit trail: the exact argv the wrapper + # passed to cma. On an empty corpus the call still succeeds. + assert payload["provenance"]["cma_returncode"] == 0 + assert payload["provenance"]["cma_argv"][-1] == "stats" diff --git a/cma-mcp/tests/test_payload_determinism.py b/cma-mcp/tests/test_payload_determinism.py new file mode 100644 index 0000000..9ca5524 --- /dev/null +++ b/cma-mcp/tests/test_payload_determinism.py @@ -0,0 +1,153 @@ +""" +Three-section payload determinism. + +Pin the discipline: every tool response and every resource read +returns `{analysis, agent_guidance, provenance}`. Provenance carries +the canonical fields. Tests here are the load-bearing assertions +that catch any future change which silently breaks the pattern. +""" + +from __future__ import annotations + +import mcp_compose +import mcp_resources + + +REQUIRED_TOP_KEYS = {"analysis", "agent_guidance", "provenance"} +REQUIRED_PROVENANCE_KEYS = { + "server_name", + "server_version", + "protocol_version", + "license", + "cost_usd", + "citation", + "deterministic", + "timestamp", +} + + +def _assert_three_section(payload: dict) -> None: + assert set(payload.keys()) == REQUIRED_TOP_KEYS, ( + f"payload top-level keys must equal {REQUIRED_TOP_KEYS}; got {set(payload.keys())}" + ) + assert isinstance(payload["analysis"], dict) + assert isinstance(payload["agent_guidance"], dict) + assert isinstance(payload["provenance"], dict) + + +def _assert_provenance_canonical(provenance: dict) -> None: + missing = REQUIRED_PROVENANCE_KEYS - set(provenance.keys()) + assert not missing, f"provenance missing required keys: {missing}" + assert provenance["server_name"] == "cma-mcp" + assert provenance["license"] == "Apache-2.0" + assert provenance["cost_usd"] == 0.0 + assert provenance["deterministic"] is True + assert "Clarethium" in provenance["citation"] + + +def _setup_provenance() -> None: + mcp_compose.configure_provenance( + server_name="cma-mcp", + server_version="0.1.0", + protocol_version="2024-11-05", + git_sha="test-sha-abc12345", + ) + + +def test_capture_response_is_three_section(): + _setup_provenance() + payload = mcp_compose.compose_capture_response( + tool_name="cma_miss", + record=None, + cma_stdout="recorded miss", + cma_stderr="", + ) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + # Capture-tool guidance must name the cite discipline. + assert "cite" in payload["agent_guidance"]["how_to_cite_faithfully"].lower() + + +def test_surface_response_is_three_section(): + _setup_provenance() + payload = mcp_compose.compose_surface_response( + matched=[], + cma_stdout="no matches", + cma_stderr="", + filters={"surface": "auth"}, + ) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + assert payload["analysis"]["filters"]["surface"] == "auth" + + +def test_stats_response_is_three_section(): + _setup_provenance() + payload = mcp_compose.compose_stats_response( + view="default", + cma_stdout="counts...", + cma_stderr="", + ) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + + +def test_resource_response_is_three_section(): + _setup_provenance() + payload = mcp_compose.compose_resource_response( + uri="cma://decisions", + records=[], + data_provenance={"file": "/tmp/x", "exists": False, "records_parsed": 0}, + ) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + assert "data_source" in payload["provenance"] + + +def test_error_response_is_three_section(): + _setup_provenance() + payload = mcp_compose.compose_error_response( + tool_or_uri="cma_miss", + reason="missing_binary", + detail="cma not on PATH", + ) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + assert payload["analysis"]["error"] is True + + +def test_provenance_git_sha_included_when_configured(): + _setup_provenance() + payload = mcp_compose.compose_stats_response( + view="default", + cma_stdout="", + cma_stderr="", + ) + assert payload["provenance"]["git_sha"] == "test-sha-abc12345" + + +def test_every_resource_uri_produces_three_section_payload(seeded_cma_dir): + """ + Each resource (except cma://stats which shells out) goes + through the read path with a real seeded corpus. All must + produce a three-section payload. + """ + _setup_provenance() + for uri in ["cma://decisions", "cma://rejections", "cma://core"]: + payload = mcp_resources.read(uri) + _assert_three_section(payload) + _assert_provenance_canonical(payload["provenance"]) + + +def test_provenance_timestamp_is_iso8601_zulu(): + _setup_provenance() + payload = mcp_compose.compose_stats_response( + view="default", + cma_stdout="", + cma_stderr="", + ) + ts = payload["provenance"]["timestamp"] + # YYYY-MM-DDTHH:MM:SSZ + assert len(ts) == 20 + assert ts.endswith("Z") + assert ts[4] == "-" and ts[7] == "-" and ts[10] == "T" diff --git a/cma-mcp/tests/test_resources.py b/cma-mcp/tests/test_resources.py new file mode 100644 index 0000000..95a2f6e --- /dev/null +++ b/cma-mcp/tests/test_resources.py @@ -0,0 +1,112 @@ +""" +Resource read tests. + +Exercise the JSONL parser and resource composers without invoking the +bash cma subprocess. cma://stats is the exception (it shells out); +its tests live in test_subprocess.py. +""" + +from __future__ import annotations + +import json + +import cma_jsonl +import mcp_resources + + +def test_decisions_resource_returns_three_section_payload(seeded_cma_dir): + payload = mcp_resources.read("cma://decisions") + assert set(payload.keys()) == {"analysis", "agent_guidance", "provenance"} + assert payload["analysis"]["uri"] == "cma://decisions" + assert payload["analysis"]["record_count"] == 1 + assert payload["analysis"]["records"][0]["id"] == "20260502-110000-bbbb2222" + + +def test_rejections_resource_returns_three_section_payload(seeded_cma_dir): + payload = mcp_resources.read("cma://rejections") + assert payload["analysis"]["record_count"] == 1 + assert payload["analysis"]["records"][0]["id"] == "20260503-120000-cccc3333" + + +def test_core_resource_filters_retired_via_retirement_records(seeded_cma_dir): + payload = mcp_resources.read("cma://core") + # The seed has 2 cores + 1 retirement targeting the second core. + # Active core surfaces; retired core does not. + summary = payload["analysis"]["summary"] + assert summary["active"] == 1 + assert summary["retired"] == 1 + ids = [r["id"] for r in payload["analysis"]["records"]] + assert "20260301-090000-eeee5555" in ids + assert "20260302-090000-ffff6666" not in ids + + +def test_resource_handles_missing_file_gracefully(isolated_cma_dir): + """An empty CMA_DIR returns empty record_count, not an error.""" + payload = mcp_resources.read("cma://decisions") + assert payload["analysis"]["record_count"] == 0 + assert payload["provenance"]["data_source"]["exists"] is False + + +def test_corrupt_lines_are_skipped_and_counted(isolated_cma_dir): + """A corrupt line is counted in provenance, not raised.""" + decisions_path = isolated_cma_dir / "decisions.jsonl" + valid = { + "schema_version": "1.0", + "type": "decision", + "id": "20260601-080000-corrup99", + "timestamp": "2026-06-01T08:00:00Z", + "description": "VALID: a real decision body that satisfies validation", + } + with open(decisions_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(valid) + "\n") + fh.write("this line is not valid json\n") + fh.write(json.dumps(valid) + "\n") # second valid record + + payload = mcp_resources.read("cma://decisions") + assert payload["analysis"]["record_count"] == 2 + assert payload["provenance"]["data_source"]["corrupt_lines_skipped"] == 1 + + +def test_unknown_schema_version_surfaces_in_provenance(isolated_cma_dir): + decisions_path = isolated_cma_dir / "decisions.jsonl" + record = { + "schema_version": "9.9", + "type": "decision", + "id": "20260601-080000-future99", + "timestamp": "2026-06-01T08:00:00Z", + "description": "FROM_FUTURE: a record the parser does not know", + } + with open(decisions_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(record) + "\n") + + payload = mcp_resources.read("cma://decisions") + assert "9.9" in payload["provenance"]["data_source"]["unknown_schema_versions"] + + +def test_legacy_record_no_schema_version_parses_leniently(isolated_cma_dir): + decisions_path = isolated_cma_dir / "decisions.jsonl" + legacy = { + "type": "decision", + "id": "20251201-080000-legacy00", + "timestamp": "2025-12-01T08:00:00Z", + "description": "LEGACY: a record from before schema_version was introduced", + } + with open(decisions_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(legacy) + "\n") + + payload = mcp_resources.read("cma://decisions") + assert payload["analysis"]["record_count"] == 1 + assert payload["provenance"]["data_source"]["legacy_records_no_schema_version"] == 1 + + +def test_unknown_resource_uri_returns_error_payload(isolated_cma_dir): + payload = mcp_resources.read("cma://does-not-exist") + assert payload["analysis"]["error"] is True + assert payload["analysis"]["reason"] == "unknown_resource" + + +def test_jsonl_reader_returns_empty_on_missing_dir(monkeypatch, tmp_path): + monkeypatch.setenv("CMA_DIR", str(tmp_path / "does-not-exist")) + result = cma_jsonl.read_decisions() + assert result.records == [] + assert result.file_existed is False diff --git a/cma-mcp/tests/test_subprocess.py b/cma-mcp/tests/test_subprocess.py new file mode 100644 index 0000000..4926eae --- /dev/null +++ b/cma-mcp/tests/test_subprocess.py @@ -0,0 +1,85 @@ +""" +bash cma subprocess wrapper tests. + +Most tests here require the cma binary on PATH; they auto-skip when +it is not. The injection-resistance test runs without cma because it +asserts the argv-array discipline at the Python layer, not what cma +does on receipt. +""" + +from __future__ import annotations + +import os + +import pytest + +import cma_subprocess + + +pytestmark = pytest.mark.subprocess + + +def test_resolve_missing_binary_raises_cmaerror(monkeypatch, tmp_path): + """When cma is not on PATH, CmaError carries reason='missing_binary'.""" + # Empty PATH so shutil.which returns None + monkeypatch.setenv("PATH", str(tmp_path)) + monkeypatch.delenv("CMA_BIN", raising=False) + with pytest.raises(cma_subprocess.CmaError) as excinfo: + cma_subprocess.resolve_cma_binary() + assert excinfo.value.reason == "missing_binary" + + +def test_cma_bin_override_wrong_path_raises(monkeypatch): + """CMA_BIN pointing at a non-existent file fails with clear reason.""" + monkeypatch.setenv("CMA_BIN", "/nonexistent/path/to/cma") + # Reload the module so it picks up the env var. + import importlib + + importlib.reload(cma_subprocess) + with pytest.raises(cma_subprocess.CmaError) as excinfo: + cma_subprocess.resolve_cma_binary() + assert excinfo.value.reason == "missing_binary" + # restore default + monkeypatch.delenv("CMA_BIN", raising=False) + importlib.reload(cma_subprocess) + + +def test_run_cma_help_exits_clean(cma_binary_available): + if not cma_binary_available: + pytest.skip("cma binary not on PATH") + # `cma --help` prints usage and exits 0 in well-behaved CLI. + # If cma chooses a different convention, skip gracefully rather + # than break the test on a CLI version diff. + try: + result = cma_subprocess.run_cma(["--help"]) + except cma_subprocess.CmaError as exc: + if exc.reason == "non_zero_exit": + pytest.skip(f"cma --help exits non-zero on this version: {exc.returncode}") + raise + assert result.returncode == 0 + assert isinstance(result.stdout, str) + + +def test_argv_injection_is_structurally_impossible(cma_binary_available, isolated_cma_dir): + """ + Per DECISIONS AD-004: argv-array discipline. Passing a + shell-metacharacter-laden description must NOT execute the + metacharacter. We assert this by attempting an injection that + would create a sentinel file if the shell interpreted it. + """ + if not cma_binary_available: + pytest.skip("cma binary not on PATH") + sentinel = isolated_cma_dir / "INJECTED_SENTINEL" + payload = f'rm benign; touch {sentinel}; echo done' + # Even if cma errors on this input, the sentinel must NOT be created. + try: + cma_subprocess.run_cma(["miss", payload]) + except cma_subprocess.CmaError: + pass + assert not sentinel.exists(), "argv-array discipline broken: shell metacharacter executed" + + +def test_cma_version_returns_string_or_none(): + """cma_version() never raises; returns string when cma exists, None otherwise.""" + out = cma_subprocess.cma_version() + assert out is None or (isinstance(out, str) and len(out) > 0) diff --git a/hooks/claude-code-session-start.sh b/hooks/claude-code-session-start.sh new file mode 100755 index 0000000..91702b1 --- /dev/null +++ b/hooks/claude-code-session-start.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# cma SessionStart hook for Claude Code. +# +# Surfaces priming context at the start of each Claude Code session: recent +# recurring failure patterns and active rejections, so the assistant has +# orientation before the first tool call. Per-action surfacing (the +# PreToolUse hook in claude-code-pre-tool-use.sh) handles relevance during +# work; this hook handles context at session boundary. +# +# Install: add to ~/.claude/settings.json: +# +# "hooks": { +# "SessionStart": [ +# { +# "hooks": [ +# { +# "type": "command", +# "command": "bash /path/to/cma/hooks/claude-code-session-start.sh" +# } +# ] +# } +# ] +# } +# +# Configuration: CMA_SESSION_START_SECTIONS (comma-separated, default +# "recurrence,rejections"). Available sections: recurrence, rejections, +# behavior. Set to "all" to include every available section. +# +# See ARCHITECTURE.md Section 2.1 (Interception) for the design context. + +set -uo pipefail + +# Drain stdin (Claude Code may pass JSON; we do not need to parse it for +# this hook since the work is pulling priming context, not responding to +# tool input). +if [[ ! -t 0 ]]; then + cat > /dev/null +fi + +# Stage 3: query — failure-isolated. If cma missing, silent exit. +if ! command -v cma >/dev/null 2>&1; then + exit 0 +fi + +sections="${CMA_SESSION_START_SECTIONS:-recurrence,rejections}" +if [[ "$sections" == "all" ]]; then + sections="recurrence,rejections,behavior" +fi + +# Collect outputs from selected sections. +# Each subsection's full output is preserved (header + data + footer) +# because the framing text helps the assistant interpret the data. +get_section() { + local name="$1" + local out="" + case "$name" in + recurrence) + out=$(timeout 5 cma stats --recurrence 2>/dev/null || true) + # Suppress empty/no-data outputs: only return content if we + # actually have recurring patterns. + if [[ "$out" == *"no patterns are recurring"* ]] || [[ "$out" == *"No misses recorded"* ]] || [[ -z "$out" ]]; then + out="" + fi + ;; + rejections) + out=$(timeout 5 cma stats --rejections 2>/dev/null || true) + # Suppress when there are no active rejections + if [[ "$out" == *"No captures match"* ]] || [[ -z "$out" ]]; then + out="" + fi + ;; + behavior) + out=$(timeout 5 cma stats --behavior 2>/dev/null || true) + if [[ "$out" == *"No misses recorded"* ]] || [[ "$out" == *"none have intended/corrected"* ]] || [[ -z "$out" ]]; then + out="" + fi + ;; + esac + echo "$out" +} + +# Stage 4: injection — assemble output, write to stdout for Claude Code +# to inject as session context. Silent when nothing to show. + +declare -a parts=() +IFS=',' read -ra requested <<< "$sections" +for s in "${requested[@]}"; do + s=$(echo "$s" | tr -d '[:space:]') + [[ -z "$s" ]] && continue + content=$(get_section "$s") + if [[ -n "$content" ]]; then + parts+=( "## $s" "$content" "" ) + fi +done + +if [[ ${#parts[@]} -eq 0 ]]; then + exit 0 +fi + +echo "# cma session-start context" +echo "" +for p in "${parts[@]}"; do + echo "$p" +done + +# Stage 5: logging — handled by cma stats invocations themselves where +# applicable (stats commands are aggregate views that do not log surface +# events; this is intentional, since session-start priming is broad rather +# than action-specific). diff --git a/hooks/cma-pre b/hooks/cma-pre new file mode 100755 index 0000000..2ca4809 --- /dev/null +++ b/hooks/cma-pre @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# cma-pre: action-time injection wrapper for shell environments. +# +# Implements the five-stage architecture from ARCHITECTURE.md: +# 1. Interception: invoked by shell preexec hook OR operator manually +# 2. Context extract: parses command line for trigger + surface heuristics +# 3. Query: calls `cma surface` with derived filters +# 4. Injection: writes results to stderr (visible to operator) +# 5. Logging: handled inside `cma surface` itself +# +# Usage: +# cma-pre [args...] Surface, then execute the command +# cma-pre --check "" Surface only; do not execute +# (use from shell preexec hooks) +# +# Failure isolation: if cma is missing, errors, or times out, the wrapped +# command still runs. The wrapper never blocks a command on its own failure. +# +# See ARCHITECTURE.md for the design contract this script implements. + +set -uo pipefail + +# --------------------------------------------------------------------------- +# Mode detection +# --------------------------------------------------------------------------- + +mode=execute +if [[ "${1:-}" == "--check" ]]; then + mode=check + shift +fi + +if [[ $# -eq 0 ]]; then + cat <<'EOF' >&2 +cma-pre: action-time injection wrapper. + +Usage: + cma-pre [args...] Surface relevant captures, then execute + cma-pre --check "" Surface relevant captures only + +Shell integration (zsh): + preexec() { cma-pre --check "$1" } + +Shell integration (bash, requires bash-preexec): + preexec_functions+=("cma_pre_hook") + cma_pre_hook() { cma-pre --check "$1"; } + +See ARCHITECTURE.md for the design contract. +EOF + exit 1 +fi + +# --------------------------------------------------------------------------- +# Stage 2: context extraction +# --------------------------------------------------------------------------- + +# In --check mode, $1 is a single string with the full command line. +# In execute mode, "$@" is properly split argv. +if [[ "$mode" == "check" ]]; then + command_line="$1" +else + command_line="$*" +fi + +# Command name (first word, stripped of any path) +cmd_name="${command_line%% *}" +cmd_name="${cmd_name##*/}" + +# Trigger filter: which commands warrant surfacing. +# Default list covers editors, version control, language toolchains, build tools. +# Operators can override by setting CMA_PRE_TRIGGERS to a space-separated list. +default_triggers="vim nvim vi nano emacs code subl micro hx helix kakoune git npm yarn pnpm cargo make gradle mvn bazel python python3 node ruby go rustc gcc clang" +triggers="${CMA_PRE_TRIGGERS:-$default_triggers}" + +is_trigger=0 +for t in $triggers; do + if [[ "$cmd_name" == "$t" ]]; then + is_trigger=1 + break + fi +done + +# Surface detection from the full command line. +# Same heuristics as the Claude Code PreToolUse hook for consistency. +detect_surface() { + local lower + lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]') + case "$lower" in + *auth*|*login*|*session*|*jwt*|*password*|*credential*) echo "auth"; return ;; + *payment*|*stripe*|*billing*|*checkout*) echo "payments"; return ;; + *schema*|*migration*|*database*|*/db/*) echo "db"; return ;; + *.test.*|*.spec.*|*/__tests__/*|*/tests/*|*/test/*) echo "test"; return ;; + *api/*|*/api*|*route*|*endpoint*|*controller*|*handler*) echo "api"; return ;; + *.tsx*|*.jsx*|*.vue*|*/ui/*|*component*) echo "ui"; return ;; + *.md*|*/docs/*|*readme*) echo "docs"; return ;; + esac + echo "" +} + +surface=$(detect_surface "$command_line") + +# If neither a trigger command nor a detected surface, skip query. +# (Surface alone can fire on non-trigger commands when keywords match; +# trigger alone without surface lets editor opens of unsurfaced files pass.) +if [[ "$is_trigger" == "0" && -z "$surface" ]]; then + if [[ "$mode" == "execute" ]]; then exec "$@"; fi + exit 0 +fi + +# Without a surface filter, querying is too broad. Skip in that case too. +if [[ -z "$surface" ]]; then + if [[ "$mode" == "execute" ]]; then exec "$@"; fi + exit 0 +fi + +# --------------------------------------------------------------------------- +# Stage 3: query (failure-isolated) +# --------------------------------------------------------------------------- + +if ! command -v cma >/dev/null 2>&1; then + # cma not installed; pass through silently + if [[ "$mode" == "execute" ]]; then exec "$@"; fi + exit 0 +fi + +# 5-second timeout protects against pathological cases (huge data dirs, etc.) +output=$(timeout 5 cma surface --surface "$surface" --limit 3 2>/dev/null || true) + +# --------------------------------------------------------------------------- +# Stage 4: injection (stderr, clearly attributed) +# --------------------------------------------------------------------------- + +if [[ -n "$output" && "$output" != "No captures match." ]]; then + { + printf '>>> cma surfaced relevant captures for: %s\n' "$cmd_name" + printf '%s\n' "$output" | sed 's/^/ /' + printf '>>>\n' + } >&2 +fi + +# --------------------------------------------------------------------------- +# Stage 5: logging — handled by `cma surface` invocation above. +# --------------------------------------------------------------------------- + +# Execute the wrapped command (if not in check-only mode) +if [[ "$mode" == "execute" ]]; then + exec "$@" +fi +exit 0 diff --git a/test.sh b/test.sh index 169889e..c36bddf 100755 --- a/test.sh +++ b/test.sh @@ -132,6 +132,118 @@ else fail=$((fail + 1)) fi +# CMA_FM_CLASSIFIER plugin hook +reset +CMA_FM_CLASSIFIER='echo from-classifier' "$CMA" miss "test description" >/dev/null +classifier_fm=$(python3 -c " +import json +with open('$CMA_DIR/misses.jsonl') as f: + print(json.loads(f.read().strip()).get('fm', '')) +") +if [[ "$classifier_fm" == "from-classifier" ]]; then + printf "PASS %s\n" "classifier sets fm when --fm not provided" + pass=$((pass + 1)) +else + printf "FAIL %s (got=%q)\n" "classifier sets fm when --fm not provided" "$classifier_fm" + fail=$((fail + 1)) +fi + +reset +CMA_FM_CLASSIFIER='echo wrong' "$CMA" miss "test" --fm explicit >/dev/null +explicit_fm=$(python3 -c " +import json +with open('$CMA_DIR/misses.jsonl') as f: + print(json.loads(f.read().strip()).get('fm', '')) +") +if [[ "$explicit_fm" == "explicit" ]]; then + printf "PASS %s\n" "--fm explicit wins over classifier" + pass=$((pass + 1)) +else + printf "FAIL %s (got=%q)\n" "--fm explicit wins over classifier" "$explicit_fm" + fail=$((fail + 1)) +fi + +reset +expect_exit "classifier failure does not block capture" 0 env CMA_FM_CLASSIFIER='exit 1' "$CMA" miss "test" +fail_fm=$(python3 -c " +import json +with open('$CMA_DIR/misses.jsonl') as f: + print(json.loads(f.read().strip()).get('fm', '')) +") +if [[ "$fail_fm" == "" ]]; then + printf "PASS %s\n" "classifier failure leaves fm unset" + pass=$((pass + 1)) +else + printf "FAIL %s (got=%q)\n" "classifier failure leaves fm unset" "$fail_fm" + fail=$((fail + 1)) +fi + +# Schema versioning: every capture has schema_version field +reset +"$CMA" miss "v" --surface auth >/dev/null +"$CMA" decision "v" --surface infra >/dev/null +"$CMA" reject "v" --surface ui >/dev/null +"$CMA" prevented "v" >/dev/null +"$CMA" distill "v" --scope project >/dev/null +schema_ok=$(python3 -c " +import json, glob, os +ok = True +for path in glob.glob('$CMA_DIR/*.jsonl'): + with open(path) as f: + for line in f: + line = line.strip() + if not line: continue + rec = json.loads(line) + if rec.get('schema_version') != '1.0': + ok = False + print('missing schema_version in', path, rec.get('type')) + break +print('ok' if ok else 'fail') +") +if [[ "$schema_ok" == "ok" ]]; then + printf "PASS %s\n" "all captures include schema_version 1.0" + pass=$((pass + 1)) +else + printf "FAIL %s\n" "all captures include schema_version 1.0" + fail=$((fail + 1)) +fi + +# Tolerant read: corrupted JSONL line is skipped with stderr warning +reset +"$CMA" miss "valid" --surface auth >/dev/null +echo "this is not valid json" >> "$CMA_DIR/misses.jsonl" +"$CMA" miss "another valid" --surface auth >/dev/null +output=$("$CMA" surface --surface auth 2>&1) +err_output=$("$CMA" surface --surface auth 2>&1 >/dev/null) +if [[ "$output" == *"valid"* ]] && [[ "$output" == *"another valid"* ]]; then + printf "PASS %s\n" "tolerant read: valid records still surfaced after corruption" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "tolerant read: valid records still surfaced after corruption" "$output" + fail=$((fail + 1)) +fi +if [[ "$err_output" == *"corrupted"* ]]; then + printf "PASS %s\n" "tolerant read: corrupted line warned to stderr" + pass=$((pass + 1)) +else + printf "FAIL %s (err=%q)\n" "tolerant read: corrupted line warned to stderr" "$err_output" + fail=$((fail + 1)) +fi + +# cma init creates the data directory with a README +init_dir=$(mktemp -d)/cma-init-test +expect_exit "init creates directory" 0 env CMA_DIR="$init_dir" "$CMA" init +if [[ -d "$init_dir" && -f "$init_dir/README.md" ]]; then + printf "PASS %s\n" "init writes README inside data directory" + pass=$((pass + 1)) +else + printf "FAIL %s\n" "init writes README inside data directory" + fail=$((fail + 1)) +fi +expect_exit "init is idempotent" 0 env CMA_DIR="$init_dir" "$CMA" init +expect_contains "init README references DATA.md" "DATA.md" cat "$init_dir/README.md" +rm -rf "$(dirname "$init_dir")" + reset expect_exit "decision succeeds" 0 "$CMA" decision "TOPIC: choice (rationale)" --surface infra expect_exit "decision with applies-when" 0 "$CMA" decision "X" --applies-when "surface=docs" @@ -151,7 +263,7 @@ expect_exit "prevented with miss-id" 0 "$CMA" prevented "x" --miss-i reset "$CMA" miss "simple description" >/dev/null "$CMA" miss 'with "quotes"' >/dev/null -"$CMA" miss 'with \backslashes\' >/dev/null +"$CMA" miss 'with \backslashes'\\ >/dev/null "$CMA" miss "with newline" >/dev/null expect_json_valid "misses.jsonl is valid JSONL after edge inputs" "$CMA_DIR/misses.jsonl" @@ -189,6 +301,54 @@ else fail=$((fail + 1)) fi +# Decision applies-when matching (the decision-surfacing closure) +reset +"$CMA" decision "AUTH: validate JWT" --surface infra --applies-when auth >/dev/null +"$CMA" decision "DB: use migrations" --surface infra --applies-when "db migration" >/dev/null +"$CMA" decision "STYLE: early returns" --surface general >/dev/null +"$CMA" miss "auth check missed" --surface auth >/dev/null + +# Surface filter on auth: matches AUTH decision (via applies-when) and miss (via surface field) +output=$("$CMA" surface --surface auth 2>&1) +if [[ "$output" == *"AUTH: validate JWT"* ]]; then + printf "PASS %s\n" "decision surfaces by applies-when keyword" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "decision surfaces by applies-when keyword" "$output" + fail=$((fail + 1)) +fi + +# Multi-keyword applies-when: db decision matches both "db" and "migration" +output=$("$CMA" surface --surface migration 2>&1) +if [[ "$output" == *"DB: use migrations"* ]]; then + printf "PASS %s\n" "decision applies-when matches any of multiple keywords" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "decision applies-when matches any of multiple keywords" "$output" + fail=$((fail + 1)) +fi + +# Decision without applies-when only matches by stored surface +output=$("$CMA" surface --surface infra 2>&1) +if [[ "$output" == *"AUTH: validate JWT"* && "$output" == *"DB: use migrations"* && "$output" != *"STYLE: early returns"* ]]; then + printf "PASS %s\n" "decision without applies-when matches only by stored surface" + pass=$((pass + 1)) +else + printf "FAIL %s\n" "decision without applies-when matches only by stored surface" + fail=$((fail + 1)) +fi + +# Misses do NOT get applies-when matching (only their surface field matters) +# A miss with surface="auth" should not match --surface=migration even if a decision with applies-when does +output=$("$CMA" surface --surface migration --type miss 2>&1) +if [[ "$output" == *"No captures match"* ]]; then + printf "PASS %s\n" "misses do not surface via applies-when (decision-specific)" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "misses do not surface via applies-when (decision-specific)" "$output" + fail=$((fail + 1)) +fi + # --------------------------------------------------------------------------- # Distill (default mode + --review/--retire stubs) # --------------------------------------------------------------------------- @@ -203,9 +363,9 @@ expect_exit "distill --bogus exits 1" 1 "$CMA" distill --bogus # Build a pattern of recurring misses and check --review surfaces it reset -"$CMA" miss "first" --surface auth --fm assumption-over-verification >/dev/null -"$CMA" miss "second" --surface auth --fm assumption-over-verification >/dev/null -"$CMA" miss "third elsewhere" --surface ui --fm basin-capture >/dev/null +"$CMA" miss "first" --surface auth --fm fm-1 >/dev/null +"$CMA" miss "second" --surface auth --fm fm-1 >/dev/null +"$CMA" miss "third elsewhere" --surface ui --fm fm-2 >/dev/null expect_contains "review identifies recurring pattern" "2x" "$CMA" distill --review expect_contains "review reports surface in pattern" "auth" "$CMA" distill --review @@ -238,24 +398,36 @@ expect_exit "stats --bogus exits 1" 1 "$CMA" stats --bogus # stats --recurrence reset expect_contains "recurrence empty data" "No misses" "$CMA" stats --recurrence -"$CMA" miss "x" --surface auth --fm assumption-over-verification >/dev/null +"$CMA" miss "x" --surface auth --fm fm-1 >/dev/null expect_contains "recurrence single miss not recurring" "no patterns are recurring" "$CMA" stats --recurrence -"$CMA" miss "y" --surface auth --fm assumption-over-verification >/dev/null +"$CMA" miss "y" --surface auth --fm fm-1 >/dev/null expect_contains "recurrence detects pattern" "2x" "$CMA" stats --recurrence expect_contains "recurrence frames as not working" "not working" "$CMA" stats --recurrence # stats --leaks reset expect_contains "leaks with no events" "No surface events" "$CMA" stats --leaks -"$CMA" miss "old" --surface auth --fm assumption-over-verification >/dev/null +"$CMA" miss "old" --surface auth --fm fm-1 >/dev/null "$CMA" surface --surface auth >/dev/null expect_contains "leaks with surface but no later miss" "no leaks detected" "$CMA" stats --leaks sleep 1 -"$CMA" miss "new despite warning" --surface auth --fm assumption-over-verification >/dev/null +"$CMA" miss "new despite warning" --surface auth --fm fm-1 >/dev/null expect_contains "leaks detects miss after surfaced warning" "1 leak" "$CMA" stats --leaks expect_contains "leaks shows the miss" "new despite warning" "$CMA" stats --leaks expect_exit "surface --no-log skips logging" 0 "$CMA" surface --no-log +# stats --behavior +reset +expect_contains "behavior empty data" "No misses" "$CMA" stats --behavior +"$CMA" miss "no texture" --surface auth >/dev/null +expect_contains "behavior with no texture" "none have intended/corrected" "$CMA" stats --behavior +"$CMA" miss "with texture" --surface auth --fm fm-1 \ + --intended "patch the symptom" \ + --corrected "fix the root cause" >/dev/null +expect_contains "behavior groups by surface/fm" "surface=auth" "$CMA" stats --behavior +expect_contains "behavior shows intended" "patch the symptom" "$CMA" stats --behavior +expect_contains "behavior shows corrected" "fix the root cause" "$CMA" stats --behavior + # --------------------------------------------------------------------------- # Hook integration (Claude Code PreToolUse) # --------------------------------------------------------------------------- @@ -309,6 +481,154 @@ else fail=$((fail + 1)) fi +# --------------------------------------------------------------------------- +# SessionStart hook +# --------------------------------------------------------------------------- + +reset +SS_HOOK="$(cd "$(dirname "$0")" && pwd)/hooks/claude-code-session-start.sh" + +# Empty data: silent +out=$(bash "$SS_HOOK" &1) +if [[ -z "$out" ]]; then + printf "PASS %s\n" "session-start silent on empty data" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start silent on empty data" "$out" + fail=$((fail + 1)) +fi + +# With recurring pattern: outputs recurrence section +"$CMA" miss "first" --surface auth --fm fm-1 >/dev/null +"$CMA" miss "second" --surface auth --fm fm-1 >/dev/null +out=$(bash "$SS_HOOK" &1) +if [[ "$out" == *"## recurrence"* ]] && [[ "$out" == *"2x"* ]]; then + printf "PASS %s\n" "session-start surfaces recurrence section" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start surfaces recurrence section" "$out" + fail=$((fail + 1)) +fi + +# With rejection: includes rejections section +"$CMA" reject "OPTION: rejected for testing" --surface infra >/dev/null +out=$(bash "$SS_HOOK" &1) +if [[ "$out" == *"## rejections"* ]] && [[ "$out" == *"OPTION: rejected"* ]]; then + printf "PASS %s\n" "session-start includes rejections section" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start includes rejections section" "$out" + fail=$((fail + 1)) +fi + +# CMA_SESSION_START_SECTIONS env var override (only rejections) +out=$(CMA_SESSION_START_SECTIONS=rejections bash "$SS_HOOK" &1) +if [[ "$out" == *"## rejections"* ]] && [[ "$out" != *"## recurrence"* ]]; then + printf "PASS %s\n" "session-start respects CMA_SESSION_START_SECTIONS" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start respects CMA_SESSION_START_SECTIONS" "$out" + fail=$((fail + 1)) +fi + +# CMA_SESSION_START_SECTIONS=all includes behavior +"$CMA" miss "with texture" --surface auth --fm fm-1 \ + --intended "patch symptom" --corrected "fix root" >/dev/null +out=$(CMA_SESSION_START_SECTIONS=all bash "$SS_HOOK" &1) +if [[ "$out" == *"## behavior"* ]]; then + printf "PASS %s\n" "session-start sections=all includes behavior" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start sections=all includes behavior" "$out" + fail=$((fail + 1)) +fi + +# Failure isolation: cma missing → silent +out=$(env -i PATH=/usr/bin:/bin HOME="$HOME" CMA_DIR="$CMA_DIR" bash "$SS_HOOK" &1) +if [[ -z "$out" ]]; then + printf "PASS %s\n" "session-start silent when cma missing" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "session-start silent when cma missing" "$out" + fail=$((fail + 1)) +fi + +# --------------------------------------------------------------------------- +# Shell wrapper (cma-pre) +# --------------------------------------------------------------------------- + +reset +PRE="$(cd "$(dirname "$0")" && pwd)/hooks/cma-pre" + +# No-args exits 1 +expect_exit "cma-pre with no args exits 1" 1 bash "$PRE" + +# Non-trigger command: silent, exits 0 +output=$(bash "$PRE" --check "ls /tmp" 2>&1) +exit=$? +if [[ -z "$output" && "$exit" == "0" ]]; then + printf "PASS %s\n" "cma-pre --check non-trigger silent exit 0" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q exit=%s)\n" "cma-pre --check non-trigger silent exit 0" "$output" "$exit" + fail=$((fail + 1)) +fi + +# Trigger + matching surface: produces output +"$CMA" miss "test wrapper miss" --surface auth >/dev/null +output=$(bash "$PRE" --check "git commit -m fix-auth-bug" 2>&1) +if [[ "$output" == *"test wrapper miss"* ]]; then + printf "PASS %s\n" "cma-pre --check surfaces matched capture" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "cma-pre --check surfaces matched capture" "$output" + fail=$((fail + 1)) +fi + +# Trigger but no surface keyword: silent +output=$(bash "$PRE" --check "make build" 2>&1) +if [[ -z "$output" ]]; then + printf "PASS %s\n" "cma-pre --check trigger without surface silent" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "cma-pre --check trigger without surface silent" "$output" + fail=$((fail + 1)) +fi + +# Failure isolation: cma not on PATH +output=$(env -i HOME="$HOME" PATH="/usr/bin:/bin" CMA_DIR="$CMA_DIR" bash "$PRE" --check "git commit -m auth" 2>&1) +exit=$? +if [[ -z "$output" && "$exit" == "0" ]]; then + printf "PASS %s\n" "cma-pre fails silently when cma missing" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q exit=%s)\n" "cma-pre fails silently when cma missing" "$output" "$exit" + fail=$((fail + 1)) +fi + +# Surface event logged by cma-pre invocations +events_before=0 +[[ -f "$CMA_DIR/surface_events.jsonl" ]] && events_before=$(wc -l < "$CMA_DIR/surface_events.jsonl" | tr -d ' ') +bash "$PRE" --check "git commit auth-fix" >/dev/null 2>&1 +events_after=$(wc -l < "$CMA_DIR/surface_events.jsonl" | tr -d ' ') +if [[ "$events_after" -gt "$events_before" ]]; then + printf "PASS %s\n" "cma-pre logs surface event via cma surface" + pass=$((pass + 1)) +else + printf "FAIL %s (before=%s after=%s)\n" "cma-pre logs surface event via cma surface" "$events_before" "$events_after" + fail=$((fail + 1)) +fi + +# CMA_PRE_TRIGGERS env var override +output=$(CMA_PRE_TRIGGERS="custom_tool" bash "$PRE" --check "custom_tool auth-config" 2>&1) +if [[ "$output" == *"test wrapper miss"* ]]; then + printf "PASS %s\n" "cma-pre respects CMA_PRE_TRIGGERS override" + pass=$((pass + 1)) +else + printf "FAIL %s (out=%q)\n" "cma-pre respects CMA_PRE_TRIGGERS override" "$output" + fail=$((fail + 1)) +fi + # --------------------------------------------------------------------------- # Summary # ---------------------------------------------------------------------------