Skip to content

Commit cb76263

Browse files
authored
Merge pull request #58 from VoynichLabs/feature/git-state-machine-enrichment
proposal: post-plan enrichment swarm via git-as-state-machine
2 parents 38ac99a + 2d586ac commit cb76263

1 file changed

Lines changed: 370 additions & 0 deletions

File tree

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
# Post-Plan Enrichment: Git as State Machine
2+
3+
## Core Idea
4+
5+
Each enrichment agent commits its output to the plan's GitHub repo. The git log IS the state. No separate state store needed.
6+
7+
## How It Works
8+
9+
Each enrichment agent:
10+
1. Reads from the repo (plan artifact + prior enrichment commits)
11+
2. Does its work
12+
3. Commits output to the repo with a structured commit message
13+
4. Signals completion
14+
15+
The orchestrator:
16+
1. Reads the repo commit log
17+
2. Determines which agents haven't run yet
18+
3. Triggers the next agent
19+
4. Repeats until all agents complete
20+
21+
## State Transitions (Example)
22+
23+
```
24+
commit a1b2 — [repo-agent] plan artifact initialized
25+
commit c3d4 — [research-agent] market research added
26+
commit e5f6 — [issues-agent] WBS converted to GitHub issues
27+
commit g7h8 — [scaffold-agent] folder structure + boilerplate committed
28+
commit i9j0 — [copy-agent] website copy drafted
29+
commit k1l2 — [reviewer-agent] critique and revision suggestions
30+
```
31+
32+
Each commit = a state transition. Full audit trail. Human-readable.
33+
34+
## Properties
35+
36+
**Durable:** Survives crashes. Restart from last commit, no data loss.
37+
38+
**Resumable:** Any agent is idempotent — if its output commit exists, skip it. Resume mid-swarm after failure.
39+
40+
**Auditable:** Full enrichment history as git log. Each agent's contribution is isolated to its commit(s).
41+
42+
**Reviewable:** Humans (or Simon) can review enrichment between commits, approve/reject, branch at any point.
43+
44+
**Parallelizable:** Independent agents (Research + Domain + Scaffold) can run on separate branches, merge when complete.
45+
46+
## The Orchestrator
47+
48+
### Complete Python Implementation
49+
50+
Create `enrichment_orchestrator.py`:
51+
52+
```python
53+
#!/usr/bin/env python3
54+
"""
55+
Git-as-state-machine enrichment orchestrator.
56+
Reads git log to determine which enrichment agents have run,
57+
then triggers remaining agents in sequence.
58+
"""
59+
60+
import subprocess
61+
import json
62+
import sys
63+
from typing import List, Dict, Any, Optional
64+
from dataclasses import dataclass
65+
from datetime import datetime
66+
from pathlib import Path
67+
import logging
68+
69+
logging.basicConfig(
70+
level=logging.INFO,
71+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
72+
)
73+
logger = logging.getLogger("enrichment-orchestrator")
74+
75+
76+
@dataclass
77+
class EnrichmentAgent:
78+
"""An enrichment agent that runs as part of the swarm."""
79+
80+
name: str
81+
description: str
82+
command: str # Exact shell command to execute
83+
depends_on: List[str] = None # Agent names that must complete first
84+
85+
def __post_init__(self):
86+
if self.depends_on is None:
87+
self.depends_on = []
88+
89+
def has_committed(self, repo_path: str) -> bool:
90+
"""Check if this agent's commit exists in the git log."""
91+
try:
92+
result = subprocess.run(
93+
["git", "log", "--all", "--grep", f"\\[{self.name}\\]", "--oneline"],
94+
cwd=repo_path,
95+
capture_output=True,
96+
text=True,
97+
timeout=5
98+
)
99+
return result.returncode == 0 and len(result.stdout.strip()) > 0
100+
except Exception as e:
101+
logger.error(f"Error checking commit for {self.name}: {e}")
102+
return False
103+
104+
def run(self, repo_path: str, context: Dict[str, Any]) -> bool:
105+
"""Execute the agent and commit its output."""
106+
try:
107+
logger.info(f"Running agent: {self.name}")
108+
109+
# Execute the agent command with environment context
110+
env = {
111+
**dict(subprocess.os.environ),
112+
"PLANEXE_REPO": repo_path,
113+
"PLANEXE_AGENT_NAME": self.name,
114+
"PLANEXE_AGENT_CONTEXT": json.dumps(context),
115+
}
116+
117+
result = subprocess.run(
118+
self.command,
119+
cwd=repo_path,
120+
shell=True,
121+
capture_output=True,
122+
text=True,
123+
timeout=300,
124+
env=env
125+
)
126+
127+
if result.returncode != 0:
128+
logger.error(f"Agent {self.name} failed:")
129+
logger.error(f"STDERR: {result.stderr}")
130+
return False
131+
132+
logger.info(f"Agent {self.name} completed")
133+
134+
# Commit the result with structured message
135+
commit_message = (
136+
f"enrichment: [{self.name}] {self.description}\n\n"
137+
f"Agent: {self.name}\n"
138+
f"Timestamp: {datetime.now().isoformat()}\n"
139+
f"Status: completed\n\n"
140+
f"Output:\n{result.stdout}"
141+
)
142+
143+
subprocess.run(
144+
["git", "add", "-A"],
145+
cwd=repo_path,
146+
capture_output=True,
147+
timeout=10
148+
)
149+
150+
commit_result = subprocess.run(
151+
["git", "commit", "-m", commit_message],
152+
cwd=repo_path,
153+
capture_output=True,
154+
text=True,
155+
timeout=10
156+
)
157+
158+
if commit_result.returncode == 0:
159+
logger.info(f"Committed output from {self.name}")
160+
else:
161+
logger.warning(f"No changes to commit for {self.name}")
162+
163+
return True
164+
165+
except subprocess.TimeoutExpired:
166+
logger.error(f"Agent {self.name} timed out (>300s)")
167+
return False
168+
except Exception as e:
169+
logger.error(f"Error running agent {self.name}: {e}")
170+
return False
171+
172+
173+
class EnrichmentOrchestrator:
174+
"""Orchestrates the enrichment swarm using git as state machine."""
175+
176+
def __init__(self, repo_path: str, agents: List[EnrichmentAgent]):
177+
self.repo_path = repo_path
178+
self.agents = {agent.name: agent for agent in agents}
179+
self.completed = set()
180+
self.failed = set()
181+
self.context = self._load_plan_artifact()
182+
183+
def _load_plan_artifact(self) -> Dict[str, Any]:
184+
"""Load the plan artifact from the repo."""
185+
artifact_path = Path(self.repo_path) / "plan.json"
186+
if artifact_path.exists():
187+
with open(artifact_path, "r") as f:
188+
return json.load(f)
189+
return {}
190+
191+
def _check_dependencies(self, agent_name: str) -> bool:
192+
"""Check if all dependencies for an agent have completed."""
193+
agent = self.agents[agent_name]
194+
for dep in agent.depends_on:
195+
if dep not in self.completed:
196+
logger.debug(f"Blocking {agent_name}: waiting for {dep}")
197+
return False
198+
return True
199+
200+
def run(self, max_steps: int = 50) -> Dict[str, Any]:
201+
"""Run the orchestration loop."""
202+
logger.info(f"Starting enrichment orchestration in {self.repo_path}")
203+
logger.info(f"Found {len(self.agents)} agents: {list(self.agents.keys())}")
204+
205+
step_count = 0
206+
207+
while step_count < max_steps:
208+
step_count += 1
209+
logger.info(f"=== Step {step_count} ===")
210+
211+
# Find next agent(s) to run
212+
ready_agents = []
213+
for agent_name in self.agents:
214+
if agent_name in self.completed or agent_name in self.failed:
215+
continue
216+
217+
if self._check_dependencies(agent_name):
218+
ready_agents.append(agent_name)
219+
220+
if not ready_agents:
221+
logger.info("No more agents ready to run")
222+
break
223+
224+
# Run agents sequentially
225+
for agent_name in ready_agents:
226+
agent = self.agents[agent_name]
227+
228+
if agent.has_committed(self.repo_path):
229+
logger.info(f"Agent {agent_name} already completed (skipping)")
230+
self.completed.add(agent_name)
231+
continue
232+
233+
success = agent.run(self.repo_path, self.context)
234+
235+
if success:
236+
self.completed.add(agent_name)
237+
else:
238+
self.failed.add(agent_name)
239+
240+
return {
241+
"completed": list(self.completed),
242+
"failed": list(self.failed),
243+
"status": "success" if not self.failed else "partial",
244+
"steps": step_count,
245+
}
246+
247+
248+
# Default agent definitions
249+
DEFAULT_AGENTS = [
250+
EnrichmentAgent(
251+
name="research-agent",
252+
description="Conduct market research and collect contextual information",
253+
command="python -m planexe.enrichment.research_agent",
254+
),
255+
EnrichmentAgent(
256+
name="issues-agent",
257+
description="Convert WBS to GitHub issues",
258+
command="python -m planexe.enrichment.issues_agent",
259+
depends_on=["research-agent"],
260+
),
261+
EnrichmentAgent(
262+
name="scaffold-agent",
263+
description="Generate folder structure and boilerplate",
264+
command="python -m planexe.enrichment.scaffold_agent",
265+
depends_on=["issues-agent"],
266+
),
267+
EnrichmentAgent(
268+
name="copy-agent",
269+
description="Draft website copy and documentation",
270+
command="python -m planexe.enrichment.copy_agent",
271+
depends_on=["scaffold-agent"],
272+
),
273+
EnrichmentAgent(
274+
name="reviewer-agent",
275+
description="Review enrichments and provide critique",
276+
command="python -m planexe.enrichment.reviewer_agent",
277+
depends_on=["copy-agent"],
278+
),
279+
]
280+
281+
282+
if __name__ == "__main__":
283+
if len(sys.argv) < 2:
284+
print("Usage: enrichment_orchestrator.py <repo_path> [agent_name ...]")
285+
sys.exit(1)
286+
287+
repo_path = sys.argv[1]
288+
requested_agents = sys.argv[2:] if len(sys.argv) > 2 else None
289+
290+
# Filter agents if specific ones requested
291+
agents = DEFAULT_AGENTS
292+
if requested_agents:
293+
agents = [a for a in agents if a.name in requested_agents]
294+
295+
orchestrator = EnrichmentOrchestrator(repo_path, agents)
296+
result = orchestrator.run()
297+
298+
logger.info(f"Orchestration complete: {result}")
299+
print(json.dumps(result, indent=2))
300+
301+
sys.exit(0 if result["status"] == "success" else 1)
302+
```
303+
304+
### Running the Orchestrator
305+
306+
**Direct execution:**
307+
```bash
308+
# Run all agents
309+
python enrichment_orchestrator.py /path/to/plan/repo
310+
311+
# Run specific agents only
312+
python enrichment_orchestrator.py /path/to/plan/repo research-agent issues-agent
313+
```
314+
315+
**GitHub Action workflow** (save as `.github/workflows/enrichment.yml`):
316+
```yaml
317+
name: Enrichment Swarm
318+
319+
on:
320+
push:
321+
paths:
322+
- 'plan.json'
323+
branches:
324+
- main
325+
326+
jobs:
327+
enrich:
328+
runs-on: ubuntu-latest
329+
steps:
330+
- uses: actions/checkout@v3
331+
- name: Set up Python
332+
uses: actions/setup-python@v4
333+
with:
334+
python-version: '3.11'
335+
- name: Install dependencies
336+
run: pip install -r requirements.txt
337+
- name: Run enrichment swarm
338+
env:
339+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
340+
run: python enrichment_orchestrator.py ${{ github.workspace }}
341+
- name: Push enrichment commits
342+
run: |
343+
git config user.name "Enrichment Bot"
344+
git config user.email "bot@planexe.local"
345+
git push origin HEAD:${{ github.ref }}
346+
```
347+
348+
**Railway cron job:**
349+
```bash
350+
# In Railway dashboard: create Job with schedule "0 * * * *" (hourly)
351+
# Build command: pip install -r requirements.txt
352+
# Start command: python enrichment_orchestrator.py /data/plans/${PLAN_ID}
353+
```
354+
355+
## Relationship to Session State
356+
357+
Session state (in-memory) is optimal for single-task, single-session work (coding agent fixing one bug). Git state is optimal for multi-step enrichment that:
358+
- Spans hours or days
359+
- Involves human review between steps
360+
- Needs to be resumable after failure
361+
- Benefits from parallel enrichment branches
362+
363+
These aren't mutually exclusive. An agent can use in-memory session state *within* its own run, then commit the result to git when done.
364+
365+
## Open Questions
366+
367+
1. Should enrichment run sequentially or in parallel branches?
368+
2. What triggers the orchestrator — plan generation webhook, or on-demand?
369+
3. Should humans approve enrichment commits via PR before merge?
370+
4. How does credit metering work — per agent run, or per enrichment session?

0 commit comments

Comments
 (0)