From ca69ca9cde8473f94cd32e4ec5c1b72ebf5025f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Martinez?= Date: Fri, 8 May 2026 17:59:29 -0300 Subject: [PATCH] Use normal RunProof workflow in demo --- demo/.gitignore | 1 + demo/README.md | 116 +++++++++++++++++++++++++++++++++++ demo/broken-app/app.js | 5 ++ demo/broken-app/package.json | 9 +++ demo/broken-app/test.js | 5 ++ demo/scripts/run-demo.ps1 | 110 +++++++++++++++++++++++++++++++++ demo/scripts/run-demo.sh | 114 ++++++++++++++++++++++++++++++++++ 7 files changed, 360 insertions(+) create mode 100644 demo/.gitignore create mode 100644 demo/README.md create mode 100644 demo/broken-app/app.js create mode 100644 demo/broken-app/package.json create mode 100644 demo/broken-app/test.js create mode 100644 demo/scripts/run-demo.ps1 create mode 100755 demo/scripts/run-demo.sh diff --git a/demo/.gitignore b/demo/.gitignore new file mode 100644 index 0000000..3088ce6 --- /dev/null +++ b/demo/.gitignore @@ -0,0 +1 @@ +.demo-workdir/ diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000..e0f85b8 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,116 @@ +# Demo: RunProof en 60 segundos + +Esta carpeta muestra el potencial de RunProof con una historia mínima y reproducible: + +1. Un agente dice: **"listo, los tests pasan"**. +2. El código todavía está roto. +3. RunProof ejecuta el comando real y bloquea el avance. +4. Se aplica una corrección de una línea. +5. RunProof acepta la verificación solo después de capturar evidencia de ejecución exitosa. + +## Qué hay en esta demo + +```text +demo/ +├── broken-app/ +│ ├── app.js # bug intencional: suma usando resta +│ ├── package.json # sin dependencias externas +│ └── test.js # una aserción que debe fallar al inicio +└── scripts/ + ├── run-demo.sh # demo automatizada para macOS/Linux + └── run-demo.ps1 # demo automatizada para Windows PowerShell +``` + +El bug intencional está en `broken-app/app.js`: + +```js +function sum(a, b) { + return a - b; +} +``` + +## Ejecutar la demo completa + +Desde la raíz del repo: + +```bash +./demo/scripts/run-demo.sh +``` + +En Windows PowerShell: + +```powershell +.\demo\scripts\run-demo.ps1 +``` + +Los scripts crean `demo/.demo-workdir/` y corren el flujo normal de usuario: `runproof init`, `runproof run`, edición de artefactos, `runproof ready`, `runproof transition`, verificación fallida, fix de una línea y verificación exitosa. La carpeta temporal está ignorada por git. + +## Recorrido manual + +### 1. Demuestra que el código está roto + +```bash +npm test --prefix demo/broken-app +``` + +Salida esperada: Node lanza un `AssertionError` porque `sum(2, 2)` devuelve `0` en vez de `4`. + +### 2. Lee la promesa falsa del agente + +> "Listo, los tests pasan." + +RunProof no acepta esa frase como evidencia. Necesita ejecutar el comando. + +### 3. Observa cómo RunProof bloquea el cierre falso + +El script automatizado sigue primero los pasos normales del workflow: + +```bash +python -m runproof init --no-prompt --root demo/.demo-workdir +python -m runproof run demo-sum-bug --profile quick --title "Fix broken sum demo" --root demo/.demo-workdir +# editar proposal.md +python -m runproof ready demo-sum-bug --root demo/.demo-workdir +python -m runproof transition demo-sum-bug task --root demo/.demo-workdir +# editar tasks.md +python -m runproof ready demo-sum-bug --root demo/.demo-workdir +``` + +Después ejecuta la verificación real dentro del workspace desechable: + +```bash +python -m runproof verify demo-sum-bug --command "npm test --prefix broken-app" --root demo/.demo-workdir +``` + +Con el bug presente, RunProof devuelve un error similar a: + +```text +✗ ERROR: .runproof/evidence/demo-sum-bug: verification command failed (exit 1): npm test --prefix broken-app +``` + +### 4. Aplica la corrección real + +Cambia la implementación a: + +```js +function sum(a, b) { + return a + b; +} +``` + +### 5. Vuelve a verificar + +Cuando el comando realmente pasa, RunProof registra la evidencia: + +```text +✔ Verification recorded: demo-sum-bug +``` + +## Por qué esto muestra el potencial + +RunProof convierte una afirmación informal —"ya está"— en una regla verificable del repositorio: + +- si el comando no se ejecutó, no hay evidencia; +- si el comando falló, el cambio queda bloqueado; +- si el comando pasó, queda un registro con salida y checksum bajo `.runproof/evidence/`. + +**Promesa corta:** RunProof evita cierres falsos de agentes y solo acepta progreso respaldado por ejecución real. diff --git a/demo/broken-app/app.js b/demo/broken-app/app.js new file mode 100644 index 0000000..fd44f11 --- /dev/null +++ b/demo/broken-app/app.js @@ -0,0 +1,5 @@ +function sum(a, b) { + return a - b; +} + +module.exports = { sum }; diff --git a/demo/broken-app/package.json b/demo/broken-app/package.json new file mode 100644 index 0000000..e871d15 --- /dev/null +++ b/demo/broken-app/package.json @@ -0,0 +1,9 @@ +{ + "name": "runproof-demo-broken-app", + "private": true, + "version": "1.0.0", + "description": "Tiny intentionally broken app used by the RunProof demo.", + "scripts": { + "test": "node test.js" + } +} diff --git a/demo/broken-app/test.js b/demo/broken-app/test.js new file mode 100644 index 0000000..7eac8f1 --- /dev/null +++ b/demo/broken-app/test.js @@ -0,0 +1,5 @@ +const assert = require("node:assert/strict"); +const { sum } = require("./app"); + +assert.equal(sum(2, 2), 4); +console.log("PASS: sum(2, 2) === 4"); diff --git a/demo/scripts/run-demo.ps1 b/demo/scripts/run-demo.ps1 new file mode 100644 index 0000000..8119dc2 --- /dev/null +++ b/demo/scripts/run-demo.ps1 @@ -0,0 +1,110 @@ +$ErrorActionPreference = "Stop" + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$DemoDir = Resolve-Path (Join-Path $ScriptDir "..") +$RepoRoot = Resolve-Path (Join-Path $DemoDir "..") +$WorkDir = Join-Path $DemoDir ".demo-workdir" + +if ($env:PYTHONPATH) { + $env:PYTHONPATH = "$RepoRoot$([IO.Path]::PathSeparator)$env:PYTHONPATH" +} else { + $env:PYTHONPATH = "$RepoRoot" +} + +function Section($Text) { + Write-Host "" + Write-Host "-- $Text" +} + +function Invoke-ExpectFail([scriptblock]$Command, [string]$Label) { + & $Command + if ($LASTEXITCODE -eq 0) { + throw "Expected failure, but command passed: $Label" + } + Write-Host "✓ blocked as expected (exit $LASTEXITCODE)" +} + +if (Test-Path $WorkDir) { + Remove-Item -Recurse -Force $WorkDir +} +New-Item -ItemType Directory -Path $WorkDir | Out-Null +Copy-Item -Recurse (Join-Path $DemoDir "broken-app") (Join-Path $WorkDir "broken-app") + +Section "1/7 Start the normal RunProof workflow" +python -m runproof init --no-prompt --root $WorkDir +python -m runproof run demo-sum-bug --profile quick --title "Fix broken sum demo" --root $WorkDir + +Section "2/7 User edits proposal.md, then marks it ready" +@' +--- +schema: sdd.artifact.v1 +artifact: proposal +change_id: demo-sum-bug +profile: quick +status: draft +created: 2026-05-08 +updated: 2026-05-08 +--- +# Proposal + +## Intent + +Demonstrate that RunProof blocks a broken test run even when an agent claims the fix is complete. + +## Scope + +- Keep one intentionally broken function under `broken-app/`. +- Verify the change with `npm test --prefix broken-app`. + +## Non-Scope + +- No UI. +- No external dependencies. +'@ | Set-Content -Path (Join-Path $WorkDir ".runproof/changes/demo-sum-bug/proposal.md") -NoNewline +python -m runproof ready demo-sum-bug --root $WorkDir +python -m runproof transition demo-sum-bug task --root $WorkDir +python -m runproof run demo-sum-bug --no-create --root $WorkDir + +Section "3/7 User edits tasks.md, then marks it ready" +@' +--- +schema: sdd.artifact.v1 +artifact: tasks +change_id: demo-sum-bug +profile: quick +status: draft +created: 2026-05-08 +updated: 2026-05-08 +--- +# Tasks + +- [x] T-001 Reproduce the failing test for the broken sum demo. + - Requirement: failing baseline is visible + - Evidence: `npm test --prefix broken-app` +- [x] T-002 Verify RunProof blocks the failing command before the fix. + - Requirement: fake completion is blocked + - Evidence: `runproof verify demo-sum-bug --command "npm test --prefix broken-app"` +- [x] T-003 Apply the one-line fix and capture passing evidence. + - Requirement: real execution passes + - Evidence: `npm test --prefix broken-app` +'@ | Set-Content -Path (Join-Path $WorkDir ".runproof/changes/demo-sum-bug/tasks.md") -NoNewline +python -m runproof ready demo-sum-bug --root $WorkDir +python -m runproof run demo-sum-bug --no-create --root $WorkDir + +Section "4/7 An agent claims: 'done, tests pass'" +Write-Host "🤖 Agent: done, tests pass." + +Section "5/7 Reality check: the command fails" +Invoke-ExpectFail { npm test --prefix (Join-Path $WorkDir "broken-app") } "npm test" + +Section "6/7 RunProof blocks the fake completion" +Invoke-ExpectFail { python -m runproof verify demo-sum-bug --command "npm test --prefix broken-app" --root $WorkDir } "runproof verify" + +Section "7/7 Apply the one-line fix and record real passing evidence" +$AppPath = Join-Path $WorkDir "broken-app/app.js" +(Get-Content $AppPath -Raw).Replace("return a - b;", "return a + b;") | Set-Content -Path $AppPath -NoNewline +npm test --prefix (Join-Path $WorkDir "broken-app") +python -m runproof verify demo-sum-bug --command "npm test --prefix broken-app" --root $WorkDir + +Write-Host "" +Write-Host "✅ Demo complete. Evidence is in $WorkDir/.runproof/evidence/demo-sum-bug/" diff --git a/demo/scripts/run-demo.sh b/demo/scripts/run-demo.sh new file mode 100755 index 0000000..a71d1cf --- /dev/null +++ b/demo/scripts/run-demo.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEMO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +REPO_ROOT="$(cd "$DEMO_DIR/.." && pwd)" +WORKDIR="$DEMO_DIR/.demo-workdir" +RUNPROOF=(python -m runproof) + +export PYTHONPATH="$REPO_ROOT${PYTHONPATH:+:$PYTHONPATH}" + +section() { + printf '\n── %s\n' "$1" +} + +run_expect_fail() { + set +e + "$@" + status=$? + set -e + if [ "$status" -eq 0 ]; then + printf 'Expected failure, but command passed: %s\n' "$*" >&2 + exit 1 + fi + printf '✓ blocked as expected (exit %s)\n' "$status" +} + +set -e +rm -rf "$WORKDIR" +mkdir -p "$WORKDIR" +cp -R "$DEMO_DIR/broken-app" "$WORKDIR/broken-app" + +section "1/7 Start the normal RunProof workflow" +"${RUNPROOF[@]}" init --no-prompt --root "$WORKDIR" +"${RUNPROOF[@]}" run demo-sum-bug --profile quick --title "Fix broken sum demo" --root "$WORKDIR" + +section "2/7 User edits proposal.md, then marks it ready" +cat > "$WORKDIR/.runproof/changes/demo-sum-bug/proposal.md" <<'MARKDOWN' +--- +schema: sdd.artifact.v1 +artifact: proposal +change_id: demo-sum-bug +profile: quick +status: draft +created: 2026-05-08 +updated: 2026-05-08 +--- +# Proposal + +## Intent + +Demonstrate that RunProof blocks a broken test run even when an agent claims the fix is complete. + +## Scope + +- Keep one intentionally broken function under `broken-app/`. +- Verify the change with `npm test --prefix broken-app`. + +## Non-Scope + +- No UI. +- No external dependencies. +MARKDOWN +"${RUNPROOF[@]}" ready demo-sum-bug --root "$WORKDIR" +"${RUNPROOF[@]}" transition demo-sum-bug task --root "$WORKDIR" +"${RUNPROOF[@]}" run demo-sum-bug --no-create --root "$WORKDIR" + +section "3/7 User edits tasks.md, then marks it ready" +cat > "$WORKDIR/.runproof/changes/demo-sum-bug/tasks.md" <<'MARKDOWN' +--- +schema: sdd.artifact.v1 +artifact: tasks +change_id: demo-sum-bug +profile: quick +status: draft +created: 2026-05-08 +updated: 2026-05-08 +--- +# Tasks + +- [x] T-001 Reproduce the failing test for the broken sum demo. + - Requirement: failing baseline is visible + - Evidence: `npm test --prefix broken-app` +- [x] T-002 Verify RunProof blocks the failing command before the fix. + - Requirement: fake completion is blocked + - Evidence: `runproof verify demo-sum-bug --command "npm test --prefix broken-app"` +- [x] T-003 Apply the one-line fix and capture passing evidence. + - Requirement: real execution passes + - Evidence: `npm test --prefix broken-app` +MARKDOWN +"${RUNPROOF[@]}" ready demo-sum-bug --root "$WORKDIR" +"${RUNPROOF[@]}" run demo-sum-bug --no-create --root "$WORKDIR" + +section "4/7 An agent claims: 'done, tests pass'" +printf '🤖 Agent: done, tests pass.\n' + +section "5/7 Reality check: the command fails" +run_expect_fail npm test --prefix "$WORKDIR/broken-app" + +section "6/7 RunProof blocks the fake completion" +run_expect_fail "${RUNPROOF[@]}" verify demo-sum-bug --command "npm test --prefix broken-app" --root "$WORKDIR" + +section "7/7 Apply the one-line fix and record real passing evidence" +python - <<'PY' "$WORKDIR/broken-app/app.js" +from pathlib import Path +import sys +path = Path(sys.argv[1]) +text = path.read_text() +path.write_text(text.replace("return a - b;", "return a + b;")) +PY +npm test --prefix "$WORKDIR/broken-app" +"${RUNPROOF[@]}" verify demo-sum-bug --command "npm test --prefix broken-app" --root "$WORKDIR" + +printf '\n✅ Demo complete. Evidence is in %s/.runproof/evidence/demo-sum-bug/\n' "$WORKDIR"