Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions scenarios.json
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,39 @@
"notes": "AI-powered lead scoring with CRM sync and human-in-the-loop approval. Validates CRM adapter, approval state machine, and cross-plugin composition with sidecar containers.",
"lastTested": null,
"lastResult": null
},
"85-self-improving-api": {
"status": "draft",
"namespace": "wf-scenario-self-improving-api",
"deployed": false,
"testCount": 0,
"passCount": 0,
"failCount": 0,
"notes": "Self-improving API scenario. AI agent (Ollama + Gemma 4) improves a SQLite task CRUD API by adding FTS5 search, cursor-based pagination, rate limiting, and structured logging. Validates agent.guardrails, step.agent_execute, step.blackboard_post, and hot_reload deploy strategy via Docker Compose.",
"lastTested": null,
"lastResult": null
},
"86-self-extending-mcp": {
"status": "draft",
"namespace": "wf-scenario-86",
"deployed": false,
"testCount": 0,
"passCount": 0,
"failCount": 0,
"notes": "Self-extending MCP tooling scenario. Agent creates task_analytics and task_forecast as mcp_tool trigger pipelines, uses them iteratively. Validates mcp:self_improve:* permission, guardrails, and two-iteration tool chain. Real Ollama + Gemma 4 via Docker Compose.",
"lastTested": null,
"lastResult": null
},
"87-autonomous-agile-agent": {
"status": "draft",
"namespace": "wf-scenario-87",
"deployed": false,
"testCount": 0,
"passCount": 0,
"failCount": 0,
"notes": "Autonomous agile agent scenario. Agent audits, plans, deploys, and verifies up to 5 iterations without human direction. Full loop: audit \u2192 plan \u2192 validate \u2192 deploy \u2192 verify \u2192 git_commit. Real Ollama + Gemma 4 via Docker Compose.",
"lastTested": null,
"lastResult": null
}
}
}
30 changes: 30 additions & 0 deletions scenarios/85-self-improving-api/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.PHONY: up down pull-model logs test test-config clean

SCENARIO := 85-self-improving-api

up: pull-model
docker compose up -d

down:
docker compose down

pull-model:
docker compose run --rm ollama bash -c "ollama pull gemma4 && echo 'Model ready.'"

logs:
docker compose logs -f

test:
go test ./tests/... -v -timeout 30m

test-config:
go test ./tests/ -run TestConfigValidation -v

test-guardrails:
go test ./tests/ -run TestGuardrails -v

test-short:
go test ./tests/... -v -short

clean:
docker compose down -v
72 changes: 72 additions & 0 deletions scenarios/85-self-improving-api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Scenario 85 — Self-Improving API

An AI agent autonomously improves a task CRUD API using the workflow
self-improvement loop with Ollama + Gemma 4, guardrails, and MCP tools.

## Overview

The agent starts with a simple SQLite-backed task API and iteratively improves it:

1. **FTS5 full-text search** with BM25 ranking (custom Yaegi module)
2. **Cursor-based pagination** for the list endpoint
3. **Rate limiting per IP** to protect the API
4. **Structured JSON logging** with response times

## Architecture

```mermaid
graph LR
Agent["Self-Improvement Agent\n(Gemma 4 via Ollama)"] -->|MCP tools| WFCTL["wfctl / LSP"]
Agent -->|propose config| Guardrails["Guardrails\n(immutable sections, command policy)"]
Guardrails -->|validated diff| Deploy["Deploy\n(hot reload)"]
Deploy -->|updated config| App["Task API\n(HTTP + SQLite)"]
App -->|healthcheck| Agent
```

## Self-Improvement Loop

```
load_config → designer (LLM + MCP tools) → blackboard_post
→ self_improve_validate → self_improve_diff → self_improve_deploy
```

Each iteration is committed to a local git repo so progress can be audited.

## Running

```bash
# Pull Gemma 4 and start all services
make up

# Stream logs
make logs

# Run tests (short — config validation only)
make test-short

# Run full e2e (requires Docker + Ollama + GPU)
make test
```

## Config Files

| File | Purpose |
|------|---------|
| `config/base-app.yaml` | Starting point: 5-endpoint task CRUD API |
| `config/agent-config.yaml` | Agent provider, guardrails, improvement pipeline |

## Guardrails

- **Immutable sections:** `modules.guardrails` cannot be modified without a challenge token
- **Command policy:** allowlist mode — only `go build`, `go test`, `wfctl`, `curl` permitted
- **Blocked:** pipe-to-shell (`curl ... | bash`), script execution, static analysis on all commands
- **Tool scope:** agent limited to `mcp:wfctl:*` and `mcp:lsp:*` namespaces

## Tests

```bash
make test-config # Config validation (wfctl validate)
make test-guardrails # Guardrails config checks
make test-short # All tests with -short (skip Docker)
make test # Full e2e (requires running Docker stack)
```
150 changes: 150 additions & 0 deletions scenarios/85-self-improving-api/config/agent-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# ============================================================
# Scenario 85 — Self-Improving API: Agent + Guardrails Config
#
# The self-improvement agent uses Ollama + Gemma 4 to analyze
# and propose improvements to base-app.yaml. Guardrails enforce
# immutability of the guardrails section itself and block
# dangerous shell commands.
# ============================================================

modules:
- name: db
type: storage.sqlite
config:
dbPath: /data/agent.db
walMode: true

- name: agent_db
type: storage.sqlite
config:
dbPath: /data/agent-state.db
walMode: true

- name: server
type: http.server
config:
address: ":8081"

- name: router
type: http.router
dependsOn: [server]

- name: ai
type: agent.provider
config:
provider: ollama
model: gemma4
base_url: http://ollama:11434
max_tokens: 8192

- name: guardrails
type: agent.guardrails
config:
defaults:
enable_self_improvement: true
enable_iac_modification: false
require_human_approval: false
require_diff_review: true
max_iterations_per_cycle: 5
deploy_strategy: hot_reload
allowed_tools:
- "mcp:wfctl:*"
- "mcp:lsp:*"
command_policy:
mode: allowlist
allowed_commands:
- "go build"
- "go test"
- "wfctl"
- "curl"
enable_static_analysis: true
block_pipe_to_shell: true
block_script_execution: true
immutable_sections:
- path: "modules.guardrails"
override: challenge_token
override:
mechanism: challenge_token
admin_secret_env: "WFCTL_ADMIN_SECRET"

workflows:
http:
router: router
server: server
routes: []

pipelines:
self_improvement_loop:
trigger:
type: http
config:
path: /improve
method: POST
steps:
- name: load_config
type: step.read_file
config:
path: /data/config/app.yaml

- name: designer
type: step.agent_execute
config:
provider: ai
system_prompt: |
You are a workflow config designer. You have been given a task
to improve a workflow application. Analyze the current config
and propose improvements using the available MCP tools.
Always validate your proposals before submitting.
Target improvements:
- FTS5 full-text search with ranking (custom Yaegi module)
- Cursor-based pagination for list endpoints
- Rate limiting per IP
- Structured JSON logging with response times
tools:
- "mcp:wfctl:validate_config"
- "mcp:wfctl:inspect_config"
- "mcp:wfctl:get_module_schema"
- "mcp:wfctl:get_step_schema"
- "mcp:wfctl:list_module_types"
- "mcp:wfctl:list_step_types"
- "mcp:lsp:diagnose"
max_iterations: 15

- name: post_design
type: step.blackboard_post
config:
phase: design
artifact_type: config_proposal

- name: validate
type: step.self_improve_validate
config:
validation_level: strict
require_zero_errors: true

- name: diff
type: step.self_improve_diff
config:
force: true

- name: deploy
type: step.self_improve_deploy
config:
strategy: hot_reload
config_path: /data/config/app.yaml

health_check:
trigger:
type: http
config:
path: /healthz
method: GET
steps:
- name: respond
type: step.json_response
config:
status: 200
body:
status: healthy
scenario: "85-self-improving-api"
component: agent
Loading
Loading