diff --git a/Codegen/REPO_NAME_OPERATE.md b/Codegen/REPO_NAME_OPERATE.md new file mode 100644 index 00000000..e69de29b diff --git a/Codegen/analysis.md b/Codegen/analysis.md new file mode 100644 index 00000000..61c25167 --- /dev/null +++ b/Codegen/analysis.md @@ -0,0 +1,161 @@ +--- +name: analysis +description: Spawn parallel agents to produce a deep, comprehensive, multi-dimensional codebase analysis — architecture, flows, APIs, quality, and onboarding +--- + +Perform a complete, exhaustive analysis of this codebase. Spawn **9 parallel agents** using the Task tool (subagent_type: Explore) in a **single response**. Each agent owns one analytical dimension. No agent may speculate — every finding must reference actual file paths, line numbers, or content read from the repository. + +--- + +## Agent Assignments + +### Agent 1 — Repository Topology & Module Map +- List every top-level directory with its precise purpose +- Identify sub-modules, workspaces, packages, or monorepo members +- Identify major architectural layers (e.g., API, domain, data access, UI, infrastructure, scripts, shared libs) and describe how they relate to one another +- Produce a text tree of the repo at 2–3 levels deep with inline annotations +- Flag any directories whose purpose is ambiguous or redundant + +### Agent 2 — Entrypoints & Execution Flows +- Find ALL entrypoints: CLIs, HTTP servers, background workers, schedulers, event listeners, framework bootstraps (main(), app factories, WSGI/ASGI apps, server start scripts, lambda handlers) +- For each entrypoint, trace the high-level control flow from external trigger → request parsing → business logic dispatch → response/side effect +- Note middleware chains, plugin hooks, and lifecycle hooks involved +- Identify startup/teardown sequences and what they initialize or release +- Flag any entrypoints that are dead, unreachable, or unregistered + +### Agent 3 — Data Flows & Transformation Paths +- Trace all major data flows: where data enters (HTTP, CLI args, message queues, files, DB reads, environment), how it is transformed, and where it exits (HTTP response, DB write, file write, queue publish, external API call) +- Identify every read/write path to persistent stores (databases, caches, files, object storage) +- Summarize key data transformation steps: parsing, validation, enrichment, serialization +- Produce text descriptions ready to render as: + - **Component Diagram**: list every major module/service and its named dependencies + - **Sequence Diagram (primary use-case)**: step-by-step actor→system message flow for the single most important operation (e.g., core API endpoint or main CLI command) + - **Sequence Diagram (secondary use-case)**: next most important operation +- Flag any data that flows without validation, sanitization, or error handling + +### Agent 4 — APIs, Interfaces & Public Contracts +- Enumerate ALL public interfaces: exported functions, classes, REST endpoints, gRPC services, CLI commands, WebSocket events, plugin extension points, SDK entry surfaces +- For each, document: purpose, parameters (name + type), return type/shape, side effects, error conditions, and expected caller behavior +- Identify which interfaces are versioned, deprecated, or unstable +- Identify interfaces that lack documentation, input validation, or error contracts +- Flag any breaking changes risk between layers (e.g., internal API used externally) + +### Agent 5 — Core Files, Functions & Data Structures +- List the 15–25 most central files in the codebase (highest dependency, most critical logic) +- For each critical function or class, summarize: inputs, outputs, algorithm, and side effects +- Enumerate all core domain models, entities, DTOs, schemas, and database models — including their fields, types, relationships, and validation constraints +- Identify shared utilities, helpers, and constants that are used across 3+ modules +- Document configuration loading: which files, env vars, feature flags, and secrets are read — and when +- Flag any god files, god classes, or functions with excessive cyclomatic complexity + +### Agent 6 — Frameworks, Libraries & Tech Stack +- Identify all programming languages, runtimes, and their versions (from lock files, toolchain files, or manifests) +- List all major frameworks (web, ORM, CLI, testing, auth, queuing, etc.) with versions +- Document the full build pipeline: package manager, bundler/compiler, transpilation steps, asset pipeline +- Document how to run the project locally: all required commands from zero to running +- Document how tests are run, and what coverage tooling is present +- Identify containerization (Docker, Compose, K8s manifests) and CI/CD scripts +- Flag any dependency version conflicts, unresolved peer deps, or critically outdated packages + +### Agent 7 — Capabilities, Features & Use-Cases +- Summarize what this program does from an end-user perspective — its core value proposition +- List every discrete user-facing feature or capability +- Produce 5 concrete example use-cases in this format: + ``` + Use-case N: [User goal] + Trigger: [How user initiates] + Flow: [Modules A → B → C involved] + Output: [What the user gets] + ``` +- Identify features that are partially implemented, stubbed out, or marked TODO +- Identify any capability gaps relative to what the README or documentation promises + +### Agent 8 — Code Quality, Consistency & Onboarding +- Assess naming consistency: files, functions, variables, constants, types — are conventions followed uniformly? +- Assess modularity: single-responsibility adherence, coupling/cohesion balance, circular dependency presence +- Assess test coverage: what is tested vs. what is untested; identify the riskiest untested paths +- Assess documentation level: inline comments, JSDoc/docstrings, README completeness, architecture docs +- Assess error handling consistency: are errors caught, typed, logged, and propagated uniformly? +- Rate onboarding difficulty (Easy / Medium / Hard / Very Hard) with specific justification +- Identify the top 5 most confusing or undiscoverable parts of the codebase for a new developer + +### Agent 9 — Strengths, Risks & Strategic Assessment +- Identify the top 5 architectural strengths with specific evidence (file/pattern references) +- Identify the top 5 technical risks: scalability bottlenecks, single points of failure, security exposure, maintainability debt +- Identify any anti-patterns present (e.g., anemic domain model, leaky abstractions, spaghetti dependencies) +- Rate overall implementation comprehensiveness on this scale — with justification: + - `1 — Skeleton`: scaffolding only, nothing functional + - `2 — Prototype`: core path works, major gaps elsewhere + - `3 — MVP`: primary use-cases work end-to-end, many edge cases missing + - `4 — Solid`: production-capable, tested, documented + - `5 — Production-Grade`: hardened, observable, fully documented, extensible +- State explicitly: what is this codebase best suited for, and where would it be ill-suited? + +--- + +## Agent Rules + +1. Read actual source files — no assumptions about what code probably does +2. Every claim must reference a specific file path or line number +3. If a file cannot be read, note it explicitly and skip rather than guess +4. Do not report opinions or preferences — only structural facts and verified patterns +5. Agents 1–8 are purely descriptive; Agent 9 is the only agent permitted to make evaluative judgments + +--- + +## Synthesis & Output + +After all 9 agents complete, synthesize their findings into a single `ANALYSIS.md` file at the project root using this exact structure: + +```markdown +# CODEBASE ANALYSIS: [Project Name] +Generated: [date] +Analyst: Claude (parallel 9-agent exploration) + +--- + +## 1. Repository Topology + +[From Agent 1 — tree + layer map] + +## 2. Entrypoints & Execution Flows + +[From Agent 2 — each entrypoint with control flow] + +## 3. Data Flows & Architecture Diagrams + +### 3a. Component Diagram (text) +### 3b. Sequence Diagram — [Primary Use-Case Name] +### 3c. Sequence Diagram — [Secondary Use-Case Name] + +[From Agent 3] + +## 4. APIs, Interfaces & Public Contracts + +[From Agent 4 — full enumeration with signatures] + +## 5. Core Files, Functions & Data Structures + +[From Agent 5 — central files, critical functions, domain models] + +## 6. Frameworks, Libraries & Tech Stack + +[From Agent 6 — full stack + run instructions] + +## 7. Capabilities, Features & Use-Cases + +[From Agent 7 — feature list + 5 use-cases] + +## 8. Code Quality & Onboarding Assessment + +[From Agent 8 — quality metrics + onboarding rating] + +## 9. Strengths, Risks & Strategic Assessment + +[From Agent 9 — strengths, risks, comprehensiveness rating, suitability] + +--- +*Analysis produced by parallel codebase exploration. All findings reference actual source files.* +``` + +Write the file, then tell the user it's ready and how many files were analyzed. \ No newline at end of file diff --git a/Codegen/candy.md b/Codegen/candy.md new file mode 100644 index 00000000..1a8a66aa --- /dev/null +++ b/Codegen/candy.md @@ -0,0 +1,45 @@ +--- +name: candy +description: Find low-risk, high-reward wins across the codebase using parallel exploration agents +--- + +Find quick wins in this codebase. Spawn 5 explore agents in parallel using the Task tool (subagent_type: Explore), each focusing on one area. Adapt each area to what's relevant for THIS project's stack and architecture. + +**Agent 1 - Performance**: Inefficient algorithms, unnecessary work, missing early returns, blocking operations, things that scale poorly + +**Agent 2 - Dead Weight**: Unused code, unreachable paths, stale comments/TODOs, obsolete files, imports to nowhere + +**Agent 3 - Lurking Bugs**: Unhandled edge cases, missing error handling, resource leaks, race conditions, silent failures + +**Agent 4 - Security**: Hardcoded secrets, injection risks, exposed sensitive data, overly permissive access, unsafe defaults + +**Agent 5 - Dependencies & Config**: Unused packages, vulnerable dependencies, misconfigured settings, dead environment variables, orphaned config files + +## The Only Valid Findings + +A finding is ONLY valid if it falls into one of these categories: + +1. **Dead** - Code that literally does nothing. Unused, unreachable, no-op. +2. **Broken** - Will cause errors, crashes, or wrong behavior. Not "might" - WILL. +3. **Dangerous** - Security holes, data exposure, resource exhaustion. + +That's it. Three categories. If it doesn't fit, don't report it. + +**NOT valid findings:** +- "This works but could be cleaner" - NO +- "Modern best practice suggests..." - NO +- "This is verbose/repetitive but functional" - NO +- "You could use X instead of Y" - NO +- "This isn't how I'd write it" - NO + +If the code works, isn't dangerous, and does something - leave it alone. + +## Output Format + +For each finding: +``` +[DEAD/BROKEN/DANGEROUS] file:line - What it is +Impact: What happens if left unfixed +``` + +Finding nothing is a valid outcome. Most codebases don't have easy wins - that's fine. diff --git a/Codegen/carrot.md b/Codegen/carrot.md new file mode 100644 index 00000000..8e4d2a60 --- /dev/null +++ b/Codegen/carrot.md @@ -0,0 +1,55 @@ +--- +name: carrot +description: Verify implementations against real-world code samples and official documentation using parallel agents +--- + +Verify this codebase against current best practices and official documentation. Spawn 8 explore agents in parallel using the Task tool (subagent_type: Explore), each focusing on one category. Each agent must VERIFY findings using Grep MCP (real code samples) or WebSearch (official docs) - no assumptions allowed. + +**Agent 1 - Core Framework**: Detect the main framework (React, Next, Express, Django, Rails, etc.), verify usage patterns against official documentation via WebSearch + +**Agent 2 - Dependencies/Libraries**: Check if library APIs being used are current or deprecated. Verify against library documentation and Grep MCP for how modern codebases use these libraries + +**Agent 3 - Language Patterns**: Identify the primary language (TypeScript, Python, Go, etc.), verify idioms and patterns are current. Use Grep MCP to see how modern projects write similar code + +**Agent 4 - Configuration**: Examine build tools, bundlers, linters, and config files. Verify settings against current tool documentation via WebSearch + +**Agent 5 - Security Patterns**: Review auth, data handling, secrets management. Verify against current security guidance and OWASP recommendations via WebSearch + +**Agent 6 - Testing**: Identify test framework in use, verify testing patterns match current library recommendations. Check via docs and Grep MCP for modern test patterns + +**Agent 7 - API/Data Handling**: Review data fetching, state management, storage patterns. Verify against current patterns via Grep MCP and framework docs + +**Agent 8 - Error Handling**: Examine error handling patterns, verify they match library documentation. Use Grep MCP to compare against real-world implementations + +## Agent Workflow + +Each agent MUST follow this process: +1. **Identify** - What's relevant in THIS project for your category +2. **Find** - Locate specific implementations in the codebase +3. **Verify** - Check against Grep MCP (real code) OR WebSearch (official docs) +4. **Report** - Only report when verified current practice differs from codebase + +## The Only Valid Findings + +A finding is ONLY valid if: +1. **OUTDATED** - Works but uses old patterns with verified better alternatives +2. **DEPRECATED** - Uses APIs marked deprecated in current official docs +3. **INCORRECT** - Implementation contradicts official documentation + +**NOT valid findings:** +- "I think there's a better way" without verification - NO +- "This looks old" without proof - NO +- Style preferences or subjective improvements - NO +- Anything not verified via Grep MCP or official docs - NO + +## Output Format + +For each finding: +``` +[OUTDATED/DEPRECATED/INCORRECT] file:line - What it is +Current: How it's implemented now +Verified: What the correct/current approach is +Source: Grep MCP (X repos) | URL to official docs +``` + +No findings is a valid outcome. If implementations match current practices, that's good news. diff --git a/Codegen/codegen.py b/Codegen/codegen.py new file mode 100644 index 00000000..509f703b --- /dev/null +++ b/Codegen/codegen.py @@ -0,0 +1,2033 @@ +#!/usr/bin/env python3 +""" +Codegen Agent Manager · Single-view edition +pip install requests plyer +""" + +import tkinter as tk +from tkinter import ttk, scrolledtext, messagebox, filedialog +import threading, time, json, requests, os, webbrowser +from datetime import datetime +from pathlib import Path + +# ── Config ────────────────────────────────────────────────────────────────────── +API_BASE = "https://api.codegen.com/v1" +ORG_ID = 323 +API_TOKEN = "sk-92083737-4e5b-4a48-a2a1-f870a3a096a6" +HEADERS = {"Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json"} +POLL_SEC = 15 +DEFAULT_TPL = r"C:\Users\L\Documents\Codegen\analysis.md" +CODEGEN_DIR = r"C:\Users\L\Documents\Codegen" + +# ── Palette ───────────────────────────────────────────────────────────────────── +BG = "#0b0b18" +PANEL = "#12121f" +CARD = "#1a1a2e" +BORDER = "#2a2a4a" +ACCENT = "#5c6bff" +HOT = "#ff4d6d" +GREEN = "#2ecc71" +TEXT = "#dde1f0" +MUTED = "#606080" +C_RUN = "#2ecc71" +C_DONE = "#5b9cf6" +C_FAIL = "#ff4d6d" +C_PEND = "#f39c12" + +FONT = ("Segoe UI", 10) +FONT_BOLD = ("Segoe UI", 10, "bold") +FONT_SMALL = ("Segoe UI", 8) +FONT_MONO = ("Consolas", 9) +FONT_TITLE = ("Segoe UI", 13, "bold") + + +# ════════════════════════════════════════════════════════════════════════════════ +# Helpers +# ════════════════════════════════════════════════════════════════════════════════ + +def btn(parent, text, cmd, bg=ACCENT, fg="white", padx=14, pady=7, **kw): + return tk.Button(parent, text=text, command=cmd, bg=bg, fg=fg, + activebackground=HOT, activeforeground="white", + font=FONT, bd=0, padx=padx, pady=pady, + cursor="hand2", relief="flat", **kw) + +def lbl(parent, text, fg=TEXT, font=FONT, bg=None, **kw): + b = bg if bg is not None else BG + return tk.Label(parent, text=text, fg=fg, font=font, bg=b, **kw) + +def fmt_dt(s): + return s[:19].replace("T", " ") if s else "" + +def attach_edit_menu(widget): + """Attach a right-click Cut/Copy/Paste/Select-All context menu to any text widget.""" + is_text = isinstance(widget, (tk.Text,)) # ScrolledText is a subclass of tk.Text + + def _cut(): + try: widget.event_generate("<>") + except Exception: pass + def _copy(): + try: widget.event_generate("<>") + except Exception: pass + def _paste(): + try: widget.event_generate("<>") + except Exception: pass + def _select_all(): + try: + if is_text: + widget.tag_add("sel", "1.0", "end") + else: + widget.select_range(0, tk.END) + widget.icursor(tk.END) + except Exception: pass + + m = tk.Menu(widget, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT_SMALL, bd=0) + m.add_command(label="Cut", command=_cut) + m.add_command(label="Copy", command=_copy) + m.add_command(label="Paste", command=_paste) + m.add_separator() + m.add_command(label="Select All", command=_select_all) + + def _show(event): + widget.focus_set() + try: m.tk_popup(event.x_root, event.y_root) + finally: m.grab_release() + + widget.bind("", _show) + + +def is_active(s): + s = (s or "").lower() + return "active" in s or "running" in s or "pending" in s + +def is_done(s): + s = (s or "").lower() + return "complete" in s or "fail" in s or "error" in s or "cancel" in s + +def status_tag(s): + if is_active(s): return "running" + s = (s or "").lower() + if "complete" in s: return "completed" + if "fail" in s or "error" in s: return "failed" + return "other" + +def status_color(s): + return {"running": C_RUN, "completed": C_DONE, + "failed": C_FAIL}.get(status_tag(s), C_PEND) + + +# ════════════════════════════════════════════════════════════════════════════════ +# API layer +# ════════════════════════════════════════════════════════════════════════════════ + +class API: + @staticmethod + def _get(path, params=None): + r = requests.get(f"{API_BASE}{path}", headers=HEADERS, + params=params, timeout=20) + r.raise_for_status() + return r.json() + + @staticmethod + def _post(path, body): + r = requests.post(f"{API_BASE}{path}", headers=HEADERS, + json=body, timeout=20) + r.raise_for_status() + return r.json() + + @classmethod + def fetch_all_runs(cls): + """Fetch the most recent 1000 runs (10 pages of 100).""" + all_items, skip, limit, max_runs = [], 0, 100, 1000 + while len(all_items) < max_runs: + data = cls._get(f"/organizations/{ORG_ID}/agent/runs", + {"limit": limit, "skip": skip}) + items = data.get("items", []) + if not items: + break + all_items.extend(items) + skip += len(items) + total = data.get("total", 0) + if skip >= total: + break + return all_items[:max_runs] + + @classmethod + def fetch_all_logs(cls, run_id): + """Paginate /alpha logs until all log entries are collected.""" + all_logs, skip, limit, run_info = [], 0, 100, None + while True: + data = cls._get( + f"/alpha/organizations/{ORG_ID}/agent/run/{run_id}/logs", + {"limit": limit, "skip": skip}) + if run_info is None: + run_info = data + logs = data.get("logs", []) + all_logs.extend(logs) + total = data.get("total_logs") or 0 + skip += len(logs) + if skip >= total or not logs: + break + if run_info: + run_info["logs"] = all_logs + return run_info + + @classmethod + def create_run(cls, prompt, model=None): + body = {"prompt": prompt} + if model: + body["model"] = model + return cls._post(f"/organizations/{ORG_ID}/agent/run", body) + + @classmethod + def resume_run(cls, run_id, prompt): + return cls._post(f"/organizations/{ORG_ID}/agent/run/resume", + {"agent_run_id": run_id, "prompt": prompt}) + + + + +# ════════════════════════════════════════════════════════════════════════════════ +# MdPickerDialog — pick an .md file from the Codegen folder +# ════════════════════════════════════════════════════════════════════════════════ + +class MdPickerDialog(tk.Toplevel): + """ + Lists every .md / .txt file under CODEGEN_DIR. + Returns the selected full path via self.result (set before destroy). + """ + + def __init__(self, parent): + super().__init__(parent) + self.result = None + self.title("Select Instruction File") + self.geometry("480x440") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + self._scan() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "📄 Select File", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=18, pady=12) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=10, pady=8) + + # Search / filter + sf = tk.Frame(self, bg=BG) + sf.pack(fill=tk.X, padx=14, pady=(8, 4)) + lbl(sf, "Filter:", fg=MUTED, font=FONT_SMALL).pack(side=tk.LEFT, padx=(0,6)) + self._filter_var = tk.StringVar() + self._filter_var.trace_add("write", lambda *_: self._apply_filter()) + fe = ttk.Entry(sf, textvariable=self._filter_var, width=30) + fe.pack(side=tk.LEFT) + attach_edit_menu(fe) + fe.focus() + + self._dir_lbl = lbl(self, "", fg=MUTED, font=FONT_SMALL) + self._dir_lbl.pack(anchor="w", padx=14, pady=(0, 2)) + + # File list + lf = tk.Frame(self, bg=BG) + lf.pack(fill=tk.BOTH, expand=True, padx=14) + vsb = ttk.Scrollbar(lf) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._lb = tk.Listbox(lf, bg=PANEL, fg=TEXT, font=FONT, + selectbackground=ACCENT, bd=0, relief="flat", + yscrollcommand=vsb.set, activestyle="none", + height=16, cursor="hand2") + self._lb.pack(fill=tk.BOTH, expand=True) + vsb.config(command=self._lb.yview) + self._lb.bind("", lambda _: self._select()) + self._lb.bind("", lambda _: self._select()) + + # Browse button (fallback) + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._count_lbl = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._count_lbl.pack(side=tk.LEFT, padx=14, pady=10) + btn(foot, "Browse…", self._browse, CARD).pack(side=tk.RIGHT, padx=4, pady=8) + btn(foot, "Select", self._select, HOT ).pack(side=tk.RIGHT, padx=4, pady=8) + btn(foot, "Cancel", self.destroy, CARD).pack(side=tk.RIGHT, padx=4, pady=8) + + def _scan(self): + """Collect all .md and .txt files under CODEGEN_DIR.""" + self._all_files = [] # list of (display_name, full_path) + base = Path(CODEGEN_DIR) + self._dir_lbl.config(text=f" {CODEGEN_DIR}") + if base.is_dir(): + for ext in ("*.md", "*.txt"): + for p in sorted(base.rglob(ext)): + # Display: relative path without extension + try: + rel = p.relative_to(base) + except ValueError: + rel = p + name = str(rel.with_suffix("")) + self._all_files.append((name, str(p))) + self._apply_filter() + + def _apply_filter(self): + q = self._filter_var.get().lower() + self._lb.delete(0, tk.END) + self._shown = [] + for name, path in self._all_files: + if not q or q in name.lower(): + self._lb.insert(tk.END, f" {name}") + self._shown.append((name, path)) + n = len(self._shown) + self._count_lbl.config(text=f"{n} file{'s' if n != 1 else ''}") + if self._shown: + self._lb.selection_set(0) + + def _select(self): + sel = self._lb.curselection() + if not sel: + return + _, path = self._shown[sel[0]] + self.result = path + self.destroy() + + def _browse(self): + """Fallback: open native file picker if needed.""" + p = filedialog.askopenfilename( + parent=self, + initialdir=CODEGEN_DIR, + title="Select instruction file", + filetypes=[("Markdown", "*.md"), ("Text", "*.txt"), ("All", "*.*")]) + if p: + self.result = p + self.destroy() + +# ════════════════════════════════════════════════════════════════════════════════ +# Flow — data model + persistence +# ════════════════════════════════════════════════════════════════════════════════ + +FLOW_FILE = Path.home() / ".codegen_manager_flows.json" + +class FlowStore: + """Load / save named flows from disk.""" + + @staticmethod + def load(): + try: + raw = json.loads(FLOW_FILE.read_text(encoding="utf-8")) + return raw if isinstance(raw, dict) else {} + except Exception: + return {} + + @staticmethod + def save(flows: dict): + try: + FLOW_FILE.write_text(json.dumps(flows, indent=2), encoding="utf-8") + except Exception: + pass + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowCreateDialog — create / edit a flow +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowCreateDialog(tk.Toplevel): + """ + A flow is a named list of steps. + Each step has: label (str), file_path (str|None), extra_text (str) + """ + + def __init__(self, parent, on_saved, edit_name=None): + super().__init__(parent) + self.on_saved = on_saved + self._edit_name = edit_name + self._steps = [] # list of dicts: {label, path, text} + self._step_frames = [] + + flows = FlowStore.load() + if edit_name and edit_name in flows: + self._steps = [dict(s) for s in flows[edit_name]] + + title_str = f"Edit Flow: {edit_name}" if edit_name else "Create New Flow" + self.title(title_str) + self.geometry("780x640") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + + # ── UI ─────────────────────────────────────────────────────────────────────── + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "⛓ Flow Builder", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + body = tk.Frame(self, bg=BG) + body.pack(fill=tk.BOTH, expand=True, padx=18, pady=10) + + # Flow name + name_row = tk.Frame(body, bg=BG) + name_row.pack(fill=tk.X, pady=(0, 10)) + lbl(name_row, "Flow Name:", fg=MUTED, font=FONT_SMALL).pack( + side=tk.LEFT, padx=(0, 8)) + self._name_var = tk.StringVar(value=self._edit_name or "") + ttk.Entry(name_row, textvariable=self._name_var, width=36).pack( + side=tk.LEFT) + + # Steps list in a scrollable canvas + lbl(body, "Steps (each step is sent as a sequential follow-up resume)", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", pady=(0, 4)) + + canvas_frame = tk.Frame(body, bg=BG) + canvas_frame.pack(fill=tk.BOTH, expand=True) + + self._canvas = tk.Canvas(canvas_frame, bg=BG, bd=0, + highlightthickness=0) + vsb = ttk.Scrollbar(canvas_frame, orient="vertical", + command=self._canvas.yview) + self._canvas.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + self._steps_frame = tk.Frame(self._canvas, bg=BG) + self._cwin = self._canvas.create_window( + (0, 0), window=self._steps_frame, anchor="nw") + self._canvas.bind("", + lambda e: self._canvas.itemconfig(self._cwin, width=e.width)) + self._steps_frame.bind("", + lambda e: self._canvas.configure( + scrollregion=self._canvas.bbox("all"))) + + # Render existing steps + for step in self._steps: + self._add_step_ui(step) + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "+ Add Step", self._add_step, CARD).pack( + side=tk.LEFT, padx=(12, 4), pady=10) + self._msg = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._msg.pack(side=tk.LEFT, padx=8) + btn(foot, "Cancel", self.destroy, CARD).pack( + side=tk.RIGHT, padx=8, pady=10) + btn(foot, "💾 Save Flow", self._save, ACCENT).pack( + side=tk.RIGHT, padx=4, pady=10) + + # ── Step management ────────────────────────────────────────────────────────── + + def _add_step(self): + picker = MdPickerDialog(self) + self.wait_window(picker) + path = picker.result or "" + self._add_step_ui({"label": "", "path": path, "text": ""}) + + def _add_step_ui(self, step_data): + idx = len(self._step_frames) + sf = tk.Frame(self._steps_frame, bg=CARD, pady=2) + sf.pack(fill=tk.X, pady=4, padx=2) + + # Step header row + hrow = tk.Frame(sf, bg=CARD) + hrow.pack(fill=tk.X, padx=8, pady=(6, 2)) + step_num = lbl(hrow, f"Step {idx + 1}", fg=ACCENT, + font=FONT_BOLD, bg=CARD) + step_num.pack(side=tk.LEFT, padx=(0, 10)) + + label_var = tk.StringVar(value=step_data.get("label", "")) + _label_entry = ttk.Entry(hrow, textvariable=label_var, width=28) + _label_entry.pack(side=tk.LEFT, padx=(0, 6)) + attach_edit_menu(_label_entry) + lbl(hrow, "label (optional)", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT) + + # Delete button + def _remove(f=sf, i=idx): + f.destroy() + self._step_frames = [x for x in self._step_frames if x["frame"].winfo_exists()] + self._renumber() + btn(hrow, "✕", _remove, CARD, fg=MUTED, pady=2, padx=6).pack( + side=tk.RIGHT) + + # Up / Down + def _move_up(f=sf): + self._move_step(f, -1) + def _move_down(f=sf): + self._move_step(f, +1) + btn(hrow, "↑", _move_up, CARD, fg=MUTED, pady=2, padx=6).pack(side=tk.RIGHT) + btn(hrow, "↓", _move_down, CARD, fg=MUTED, pady=2, padx=6).pack(side=tk.RIGHT) + + # ── File section ────────────────────────────────────────────────────── + file_outer = tk.Frame(sf, bg=PANEL) + file_outer.pack(fill=tk.X, padx=8, pady=(2, 0)) + + frow = tk.Frame(file_outer, bg=PANEL) + frow.pack(fill=tk.X, padx=6, pady=(6, 2)) + lbl(frow, "📄 File:", fg=MUTED, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.LEFT, padx=(0, 6)) + path_var = tk.StringVar(value=step_data.get("path", "")) + path_entry = ttk.Entry(frow, textvariable=path_var, width=40) + path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4)) + attach_edit_menu(path_entry) + + file_status = lbl(frow, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + file_status.pack(side=tk.LEFT, padx=4) + + # preview widget (initially hidden) + prev_frame = tk.Frame(file_outer, bg=PANEL) + prev_frame.pack(fill=tk.X, padx=6, pady=(0, 4)) + file_prev = scrolledtext.ScrolledText( + prev_frame, bg="#0e0e22", fg="#88ccff", + insertbackground=TEXT, font=FONT_MONO, + height=4, bd=0, wrap=tk.WORD, relief="flat", + padx=6, pady=4) + # don't pack yet — shown only after a file is loaded + file_prev.config(state=tk.DISABLED) + + def _load_file(pv=path_var, fs=file_status, fp=file_prev, pf=prev_frame): + p = pv.get().strip() + if not p: + return + if not os.path.isfile(p): + fs.config(text="File not found", fg=C_FAIL) + pf.pack_forget() + return + try: + content = open(p, encoding="utf-8").read() + fs.config( + text=f"✓ {os.path.basename(p)} ({len(content):,} chars)", + fg=GREEN) + fp.config(state=tk.NORMAL) + fp.delete("1.0", tk.END) + fp.insert("1.0", + content[:1200] + ("\n…(truncated)" if len(content) > 1200 else "")) + fp.config(state=tk.DISABLED) + pf.pack(fill=tk.X) + except Exception as e: + fs.config(text=f"Error: {e}", fg=C_FAIL) + + def _browse_step(pv=path_var, load=_load_file, dlg=self): + p = filedialog.askopenfilename( + parent=dlg, + title="Select file for this step", + filetypes=[("Markdown","*.md"),("Text","*.txt"),("All","*.*")]) + if p: + pv.set(p) + load() + + btn(frow, "Browse", _browse_step, CARD).pack(side=tk.LEFT, padx=2) + btn(frow, "Load Preview", _load_file, CARD).pack(side=tk.LEFT, padx=2) + + # Auto-load if path already set + if step_data.get("path"): + self.after(50, _load_file) + + # ── Additional text ──────────────────────────────────────────────────── + trow = tk.Frame(sf, bg=CARD) + trow.pack(fill=tk.X, padx=8, pady=(4, 8)) + lbl(trow, "✏ Additional Text:", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, anchor="n", padx=(0, 6)) + text_box = scrolledtext.ScrolledText( + trow, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=3, bd=0, wrap=tk.WORD, + relief="flat", padx=6, pady=4) + text_box.pack(side=tk.LEFT, fill=tk.X, expand=True) + attach_edit_menu(text_box) + if step_data.get("text"): + text_box.insert("1.0", step_data["text"]) + + entry = {"frame": sf, "label": label_var, + "path": path_var, "text_box": text_box, + "num_lbl": step_num} + self._step_frames.append(entry) + + def _move_step(self, frame_widget, direction): + frames = [e["frame"] for e in self._step_frames + if e["frame"].winfo_exists()] + try: + idx = frames.index(frame_widget) + except ValueError: + return + new_idx = idx + direction + if new_idx < 0 or new_idx >= len(frames): + return + # Re-pack in new order + frames.insert(new_idx, frames.pop(idx)) + for f in frames: + f.pack_forget() + for f in frames: + f.pack(fill=tk.X, pady=4, padx=2) + self._step_frames = [e for f in frames + for e in self._step_frames if e["frame"] is f] + self._renumber() + + def _renumber(self): + for i, e in enumerate(self._step_frames): + if e["frame"].winfo_exists(): + e["num_lbl"].config(text=f"Step {i + 1}") + + def _collect_steps(self): + steps = [] + for e in self._step_frames: + if not e["frame"].winfo_exists(): + continue + steps.append({ + "label": e["label"].get().strip(), + "path": e["path"].get().strip(), + "text": e["text_box"].get("1.0", tk.END).strip(), + }) + return steps + + def _save(self): + name = self._name_var.get().strip() + if not name: + self._msg.config(text="⚠ Enter a flow name.", fg=C_PEND) + return + steps = self._collect_steps() + if not steps: + self._msg.config(text="⚠ Add at least one step.", fg=C_PEND) + return + for i, s in enumerate(steps): + if not s["path"] and not s["text"]: + self._msg.config( + text=f"⚠ Step {i+1} has no file or text.", fg=C_PEND) + return + flows = FlowStore.load() + if self._edit_name and self._edit_name != name: + flows.pop(self._edit_name, None) + flows[name] = steps + FlowStore.save(flows) + self._msg.config(text=f"✅ Saved '{name}'", fg=GREEN) + self.on_saved() + self.after(900, self.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowRunner — background sequencer +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowRunner: + """ + Monitors a run and, when it completes each step, sends the next resume. + Runs entirely in a daemon thread; posts UI callbacks via root.after(). + """ + POLL = 12 # seconds between status checks + + def __init__(self, root, run_id, steps, on_status): + self.root = root + self.run_id = run_id + self.steps = list(steps) # remaining steps (index 0 is next) + self.on_status = on_status # callable(msg, colour) + self._current_run_id = run_id + self._stop = False + threading.Thread(target=self._loop, daemon=True).start() + + def stop(self): + self._stop = True + + @staticmethod + def _step_prompt(step): + parts = [] + path = step.get("path", "") + if path and os.path.isfile(path): + try: + parts.append(open(path, encoding="utf-8").read()) + except Exception: + pass + text = step.get("text", "").strip() + if text: + parts.append(text) + return "\n\n".join(parts).strip() + + def _loop(self): + total = len(self.steps) + sent = 0 + self._post(f"Flow started — {total} step(s) queued", C_RUN) + + while not self._stop and self.steps: + # Poll until current run is done + while not self._stop: + time.sleep(self.POLL) + try: + data = API._get( + f"/organizations/{ORG_ID}/agent/run/{self._current_run_id}") + status = data.get("status") or "" + if is_done(status): + break + self._post( + f"Flow [{sent}/{total}] — waiting for #{self._current_run_id}" + f" ({status})", MUTED) + except Exception as e: + self._post(f"Flow poll error: {e}", C_FAIL) + time.sleep(self.POLL) + + if self._stop: + break + + # Send next step + step = self.steps.pop(0) + sent += 1 + prompt = self._step_prompt(step) + label = step.get("label") or f"Step {sent}" + if not prompt: + self._post(f"Flow: skipping empty step {sent}", MUTED) + continue + + self._post(f"Flow: sending {label} ({sent}/{total})…", C_PEND) + try: + result = API.resume_run(self._current_run_id, prompt) + self._current_run_id = result.get("id", self._current_run_id) + self._post( + f"Flow: {label} sent → run #{self._current_run_id}", C_RUN) + except Exception as e: + self._post(f"Flow error on {label}: {e}", C_FAIL) + break + + if not self._stop: + self._post(f"✅ Flow complete — all {total} step(s) sent", GREEN) + + def _post(self, msg, colour): + self.root.after(0, lambda m=msg, c=colour: self.on_status(m, c)) + + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowViewDialog — read-only preview of a single flow's steps +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowViewDialog(tk.Toplevel): + """Shows a flow's steps in read-only form with file preview.""" + + def __init__(self, parent, name, steps, on_edit): + super().__init__(parent) + self.name = name + self.steps = steps + self.on_edit = on_edit + self.title(f"Flow: {name}") + self.geometry("720x580") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, f"⛓ {self.name}", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + btn(hdr, "✏ Edit", self._edit, HOT).pack( + side=tk.RIGHT, padx=4, pady=8) + + lbl(self, f" {len(self.steps)} step(s) — double-click a step to preview its file", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14, pady=(6, 2)) + + # Steps treeview + tree_f = tk.Frame(self, bg=BG) + tree_f.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 4)) + + cols = ("#", "Label", "File", "Text Preview") + self._tree = ttk.Treeview(tree_f, columns=cols, + show="headings", selectmode="browse") + ws = {"#": 36, "Label": 160, "File": 200, "Text Preview": 0} + for c in cols: + self._tree.heading(c, text=c) + self._tree.column(c, width=ws.get(c, 120), + anchor="w", stretch=(c == "Text Preview")) + vsb = ttk.Scrollbar(tree_f, orient=tk.VERTICAL, + command=self._tree.yview) + self._tree.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self._tree.tag_configure("has_file", foreground=C_DONE) + self._tree.tag_configure("text_only", foreground=TEXT) + + for i, s in enumerate(self.steps): + path = s.get("path", "") or "" + fname = os.path.basename(path) if path else "—" + text = (s.get("text") or "").replace("\n", " ")[:80] + label = s.get("label") or f"Step {i+1}" + tag = "has_file" if path and os.path.isfile(path) else "text_only" + self._tree.insert("", tk.END, iid=str(i), + values=(i + 1, label, fname, text), tags=(tag,)) + + self._tree.bind("", self._preview_step) + + # Preview pane + lbl(self, " File Preview", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", padx=14, pady=(2, 1)) + self._preview = scrolledtext.ScrolledText( + self, bg=PANEL, fg="#88ccff", insertbackground=TEXT, + font=FONT_MONO, height=8, bd=0, wrap=tk.WORD, + relief="flat", padx=10, pady=6) + self._preview.pack(fill=tk.X, padx=14, pady=(0, 4)) + self._preview.insert("1.0", "Select a step above to preview its file content.") + self._preview.config(state=tk.DISABLED) + + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + def _preview_step(self, _event=None): + sel = self._tree.selection() + if not sel: + return + idx = int(sel[0]) + step = self.steps[idx] + path = step.get("path", "") or "" + self._preview.config(state=tk.NORMAL) + self._preview.delete("1.0", tk.END) + if path and os.path.isfile(path): + try: + content = open(path, encoding="utf-8").read() + self._preview.insert("1.0", content[:3000] + + ("\n…(truncated)" if len(content) > 3000 else "")) + except Exception as e: + self._preview.insert("1.0", f"Could not read file: {e}") + elif path: + self._preview.insert("1.0", f"File not found:\n{path}") + else: + text = step.get("text", "") or "(no text)" + self._preview.insert("1.0", text[:3000]) + self._preview.config(state=tk.DISABLED) + + def _edit(self): + self.destroy() + self.on_edit() + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowManagerDialog — list / edit / delete flows +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowManagerDialog(tk.Toplevel): + def __init__(self, parent, on_changed=None): + super().__init__(parent) + self.on_changed = on_changed + self.title("Flows") + self.geometry("620x500") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._flows = {} + self._build() + self._reload() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + + # Header + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "⛓ Flows", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + # Sub-toolbar + tb = tk.Frame(self, bg=PANEL) + tb.pack(fill=tk.X) + btn(tb, "+ New Flow", self._new, HOT ).pack(side=tk.LEFT, padx=(12,4), pady=8) + btn(tb, "✏ Edit", self._edit, CARD ).pack(side=tk.LEFT, padx=4, pady=8) + btn(tb, "🗑 Delete", self._delete, CARD ).pack(side=tk.LEFT, padx=4, pady=8) + self._tb_msg = lbl(tb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._tb_msg.pack(side=tk.LEFT, padx=12) + + # Flow list treeview + tree_f = tk.Frame(self, bg=BG) + tree_f.pack(fill=tk.BOTH, expand=True, padx=14, pady=10) + + cols = ("Flow Name", "Steps", "Step Labels") + self._tree = ttk.Treeview(tree_f, columns=cols, + show="headings", selectmode="browse") + self._tree.heading("Flow Name", text="Flow Name") + self._tree.heading("Steps", text="Steps") + self._tree.heading("Step Labels", text="Step Labels") + self._tree.column("Flow Name", width=180, anchor="w") + self._tree.column("Steps", width=52, anchor="center") + self._tree.column("Step Labels", width=0, anchor="w", stretch=True) + + vsb = ttk.Scrollbar(tree_f, orient=tk.VERTICAL, command=self._tree.yview) + self._tree.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + self._tree.bind("", lambda _: self._view()) + self._tree.bind("", lambda _: self._view()) + self._tree.bind("", self._ctx) + + # Hint + lbl(self, " Double-click to preview · Right-click for options", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14, pady=(0, 4)) + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + def _reload(self): + for row in self._tree.get_children(): + self._tree.delete(row) + self._flows = FlowStore.load() + for name, steps in self._flows.items(): + labels = ", ".join( + s.get("label") or f"Step {i+1}" + for i, s in enumerate(steps)) + self._tree.insert("", tk.END, iid=name, + values=(name, len(steps), labels)) + count = len(self._flows) + self._tb_msg.config( + text=f"{count} flow{'s' if count != 1 else ''}") + + def _selected_name(self): + sel = self._tree.selection() + return sel[0] if sel else None + + def _view(self): + name = self._selected_name() + if not name or name not in self._flows: + return + FlowViewDialog(self, name, self._flows[name], + on_edit=lambda n=name: self._edit_named(n)) + + def _new(self): + FlowCreateDialog(self, on_saved=self._on_saved) + + def _edit(self): + name = self._selected_name() + if name: + self._edit_named(name) + else: + self._tb_msg.config(text="Select a flow first", fg=C_PEND) + + def _edit_named(self, name): + FlowCreateDialog(self, on_saved=self._on_saved, edit_name=name) + + def _delete(self): + name = self._selected_name() + if not name: + self._tb_msg.config(text="Select a flow first", fg=C_PEND) + return + if messagebox.askyesno("Delete Flow", + f'Delete flow "{name}"?', + parent=self): + flows = FlowStore.load() + flows.pop(name, None) + FlowStore.save(flows) + self._on_saved() + + def _ctx(self, event): + row = self._tree.identify_row(event.y) + if not row: + return + self._tree.selection_set(row) + m = tk.Menu(self, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT, bd=0) + m.add_command(label="🔍 Preview", command=self._view) + m.add_command(label="✏ Edit", command=self._edit) + m.add_separator() + m.add_command(label="🗑 Delete", command=self._delete) + m.post(event.x_root, event.y_root) + + def _on_saved(self): + self._reload() + if self.on_changed: + self.on_changed() + +# ════════════════════════════════════════════════════════════════════════════════ +# Create Run Dialog +# ════════════════════════════════════════════════════════════════════════════════ + +class CreateRunDialog(tk.Toplevel): + def __init__(self, parent, on_created, on_flow_runner=None): + super().__init__(parent) + self.on_created = on_created + self.on_flow_runner = on_flow_runner # callback(runner) when flow starts + self._tpl_text = None + self.title("New Agent Run") + self.geometry("760x600") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + self.after(200, self._try_default_tpl) + + def _build(self): + tk.Frame(self, bg=HOT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "🚀 New Agent Run", fg=HOT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + body = tk.Frame(self, bg=BG) + body.pack(fill=tk.BOTH, expand=True, padx=20, pady=10) + + lbl(body, "Template File (optional)", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", pady=(0, 3)) + tr = tk.Frame(body, bg=BG) + tr.pack(fill=tk.X) + self._tpl_var = tk.StringVar(value=DEFAULT_TPL) + ttk.Entry(tr, textvariable=self._tpl_var).pack( + side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 6)) + btn(tr, "Browse", self._browse, CARD).pack(side=tk.LEFT, padx=2) + btn(tr, "Load", self._load, ACCENT).pack(side=tk.LEFT, padx=2) + + self._tpl_info = lbl(body, "", fg=MUTED, font=FONT_SMALL) + self._tpl_info.pack(anchor="w", pady=(4, 8)) + + lbl(body, "Prompt / Instructions", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", pady=(0, 3)) + self._prompt = scrolledtext.ScrolledText( + body, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=8, bd=0, wrap=tk.WORD, relief="flat", + padx=10, pady=8) + self._prompt.pack(fill=tk.BOTH, expand=True) + self._prompt.focus() + + # ── Flow selector ──────────────────────────────────────────────────── + tk.Frame(body, bg=BORDER, height=1).pack(fill=tk.X, pady=(10, 6)) + flow_row = tk.Frame(body, bg=BG) + flow_row.pack(fill=tk.X) + lbl(flow_row, "⛓ Flow (optional):", fg=MUTED, font=FONT_SMALL + ).pack(side=tk.LEFT, padx=(0, 8)) + self._flow_var = tk.StringVar(value="None") + self._flow_combo = ttk.Combobox( + flow_row, textvariable=self._flow_var, + width=26, state="readonly") + self._flow_combo.pack(side=tk.LEFT, padx=(0, 6)) + self._flow_combo.bind("<>", self._on_flow_selected) + btn(flow_row, "⛓ Manage Flows", self._open_flow_manager, + CARD).pack(side=tk.LEFT, padx=4) + self._flow_info = lbl(flow_row, "", fg=MUTED, font=FONT_SMALL) + self._flow_info.pack(side=tk.LEFT, padx=8) + self._refresh_flow_combo() + + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._foot_msg = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._foot_msg.pack(side=tk.LEFT, padx=16, pady=12) + btn(foot, "Cancel", self.destroy, CARD).pack( + side=tk.RIGHT, padx=8, pady=10) + btn(foot, "🚀 Launch Run", self._launch, HOT).pack( + side=tk.RIGHT, padx=4, pady=10) + + def _browse(self): + p = filedialog.askopenfilename( + filetypes=[("Markdown","*.md"),("Text","*.txt"),("All","*.*")]) + if p: + self._tpl_var.set(p) + self._load() + + def _refresh_flow_combo(self): + flows = FlowStore.load() + names = ["None"] + sorted(flows.keys()) + self._flow_combo["values"] = names + if self._flow_var.get() not in names: + self._flow_var.set("None") + self._on_flow_selected() + + def _on_flow_selected(self, _event=None): + name = self._flow_var.get() + if name == "None": + self._flow_info.config(text="", fg=MUTED) + return + flows = FlowStore.load() + steps = flows.get(name, []) + self._flow_info.config( + text=f"{len(steps)} step(s)", fg=ACCENT) + + def _open_flow_manager(self): + FlowManagerDialog(self, on_changed=self._refresh_flow_combo) + + def _try_default_tpl(self): + if os.path.isfile(DEFAULT_TPL): + self._load() + + def _load(self): + path = self._tpl_var.get() + if not path or not os.path.isfile(path): + self._tpl_info.config(text="File not found", fg=C_FAIL) + return + try: + with open(path, encoding="utf-8") as f: + self._tpl_text = f.read() + self._tpl_info.config( + text=f"✓ {os.path.basename(path)} ({len(self._tpl_text):,} chars)", + fg=GREEN) + except Exception as e: + self._tpl_info.config(text=f"Error: {e}", fg=C_FAIL) + + def _launch(self): + extra = self._prompt.get("1.0", tk.END).strip() + parts = [p for p in [self._tpl_text, extra] if p and p.strip()] + prompt = "\n\n".join(parts).strip() + if not prompt: + self._foot_msg.config(text="⚠ Enter a prompt or load a template.", + fg=C_PEND) + return + flow_name = self._flow_var.get() + self._selected_flow = None + if flow_name != "None": + flows = FlowStore.load() + self._selected_flow = flows.get(flow_name) + self._foot_msg.config(text="Launching…", fg=C_PEND) + + def _bg(): + try: + res = API.create_run(prompt, model="claude-opus-4-6") + self.after(0, lambda: self._done(res)) + except Exception as e: + self.after(0, lambda: self._foot_msg.config( + text=f"Error: {e}", fg=C_FAIL)) + + threading.Thread(target=_bg, daemon=True).start() + + def _done(self, res): + rid = res.get("id", "?") + flow = getattr(self, "_selected_flow", None) + msg = f"✅ Run #{rid} created!" + if flow: + msg += f" ⛓ flow ({len(flow)} steps) queued" + self._foot_msg.config(text=msg, fg=GREEN) + self.on_created(res) + if flow and self.on_flow_runner: + self.on_flow_runner(rid, flow) + self.after(1400, self.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# Run Detail / Conversation Dialog +# ════════════════════════════════════════════════════════════════════════════════ + +class RunDialog(tk.Toplevel): + def __init__(self, parent, run, on_refreshed, on_start_flow=None): + super().__init__(parent) + self.run = run + self.on_refreshed = on_refreshed + self.on_start_flow = on_start_flow + rid = run["id"] + status = run.get("status", "") + self.title(f"Run #{rid} · {status}") + self.geometry("900x700") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build(status) + self._load_logs() + + def _build(self, status): + sc = status_color(status) + + # Coloured accent bar + tk.Frame(self, bg=sc, height=3).pack(fill=tk.X) + + # Header + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + + lh = tk.Frame(hdr, bg=PANEL) + lh.pack(side=tk.LEFT, fill=tk.X, expand=True) + lbl(lh, f"Run #{self.run['id']}", fg=TEXT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=18, pady=(12, 4)) + lbl(lh, (status or "").upper(), fg=sc, font=FONT_BOLD, bg=PANEL + ).pack(side=tk.LEFT, padx=6) + + rh = tk.Frame(hdr, bg=PANEL) + rh.pack(side=tk.RIGHT) + if self.run.get("web_url"): + btn(rh, "🌐 Web", lambda: webbrowser.open(self.run["web_url"]), + CARD).pack(side=tk.LEFT, padx=4, pady=8) + btn(rh, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.LEFT, padx=10, pady=8) + + # Meta + meta = tk.Frame(hdr, bg=PANEL) + meta.pack(fill=tk.X, padx=18, pady=(0, 10)) + lbl(meta, fmt_dt(self.run.get("created_at")), + fg=MUTED, font=FONT_SMALL, bg=PANEL).pack(side=tk.LEFT) + for pr in (self.run.get("github_pull_requests") or [])[:4]: + lk = tk.Label(meta, text=f" 🔗 PR #{pr['id']}", + fg=ACCENT, font=FONT_SMALL, bg=PANEL, cursor="hand2") + lk.pack(side=tk.LEFT) + lk.bind("", + lambda e, u=pr.get("url",""): webbrowser.open(u)) + + # Summary / result strip + summary = (self.run.get("summary") or self.run.get("result") or "").strip() + if summary: + sf = tk.Frame(self, bg=CARD) + sf.pack(fill=tk.X, padx=14, pady=(4, 0)) + lbl(sf, "Summary", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=12, pady=(6, 1)) + st = tk.Text(sf, bg=CARD, fg=TEXT, font=FONT_SMALL, + height=3, bd=0, wrap=tk.WORD, relief="flat", + padx=10, pady=4) + st.pack(fill=tk.X, padx=10, pady=(0, 8)) + st.insert("1.0", summary) + st.config(state=tk.DISABLED) + + # Conversation view + lbl(self, " Conversation Log", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", padx=14, pady=(8, 2)) + + self._conv = scrolledtext.ScrolledText( + self, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT_MONO, bd=0, wrap=tk.WORD, relief="flat", + padx=12, pady=10) + self._conv.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 4)) + self._conv.tag_configure("ts", foreground=MUTED, font=FONT_SMALL) + self._conv.tag_configure("tool", foreground="#88aaff", font=("Consolas",9,"bold")) + self._conv.tag_configure("thought", foreground="#c0a0ff") + self._conv.tag_configure("inp", foreground="#80d8c0") + self._conv.tag_configure("out", foreground=TEXT) + self._conv.tag_configure("div", foreground=BORDER) + self._conv.insert(tk.END, "Loading logs…", "ts") + self._conv.config(state=tk.DISABLED) + + # Resume panel — shown for all done runs + if is_done(status): + rf = tk.Frame(self, bg=CARD) + rf.pack(fill=tk.X, padx=14, pady=(2, 4)) + tk.Frame(rf, bg=BORDER, height=1).pack(fill=tk.X) + + # --- Single prompt resume (existing) --- + lbl(rf, " Follow‑up prompt (single message)", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(8, 3)) + row = tk.Frame(rf, bg=CARD) + row.pack(fill=tk.X, padx=10, pady=(0, 10)) + self._resume_box = scrolledtext.ScrolledText( + row, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=4, bd=0, wrap=tk.WORD, + relief="flat", padx=8, pady=6) + self._resume_box.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self._resume_box.focus() + sb = tk.Frame(row, bg=CARD) + sb.pack(side=tk.LEFT, padx=(8, 0), fill=tk.Y) + btn(sb, "▶ Send", self._resume, HOT).pack(fill=tk.X, pady=2) + self._res_msg = lbl(sb, "", fg=MUTED, font=FONT_SMALL, bg=CARD) + self._res_msg.pack(pady=2) + self._resume_box.bind("", lambda _: self._resume()) + + # ⭐ NEW: Flow resume section + tk.Frame(rf, bg=BORDER, height=1).pack(fill=tk.X, padx=10, pady=(8, 4)) + flow_row = tk.Frame(rf, bg=CARD) + flow_row.pack(fill=tk.X, padx=10, pady=(0, 10)) + + lbl(flow_row, "⛓ Run a flow instead:", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=0, pady=(0, 4)) + + flow_sel_row = tk.Frame(flow_row, bg=CARD) + flow_sel_row.pack(fill=tk.X) + lbl(flow_sel_row, "Flow:", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 6)) + self._flow_var = tk.StringVar(value="None") + self._flow_combo = ttk.Combobox( + flow_sel_row, textvariable=self._flow_var, + width=26, state="readonly") + self._flow_combo.pack(side=tk.LEFT, padx=(0, 6)) + self._flow_combo.bind("<>", self._on_flow_selected) + btn(flow_sel_row, "Manage Flows", self._open_flow_manager, + CARD).pack(side=tk.LEFT, padx=2) + self._flow_info = lbl(flow_sel_row, "", fg=MUTED, font=FONT_SMALL, bg=CARD) + self._flow_info.pack(side=tk.LEFT, padx=8) + + run_flow_btn = btn(flow_sel_row, "▶ Run Flow", self._run_flow, ACCENT) + run_flow_btn.pack(side=tk.LEFT, padx=4) + + self._refresh_flow_combo() + else: + self._resume_box = None + self._flow_combo = None + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._log_lbl = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._log_lbl.pack(side=tk.LEFT, padx=16, pady=8) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + # ── Logs ──────────────────────────────────────────────────────────────────── + + def _load_logs(self): + rid = self.run["id"] + def _bg(): + try: + data = API.fetch_all_logs(rid) + self.after(0, lambda d=data: self._render(d)) + except Exception as e: + self.after(0, lambda: self._render_err(str(e))) + threading.Thread(target=_bg, daemon=True).start() + + def _render_err(self, msg): + """Display an error message in the conversation pane.""" + if not self._conv.winfo_exists(): + return + self._conv.config(state=tk.NORMAL) + self._conv.delete("1.0", tk.END) + self._conv.insert(tk.END, f"⚠ {msg}", "ts") + self._conv.config(state=tk.DISABLED) + + def _render(self, data): + """Render the logs in the conversation text widget.""" + if not self._conv.winfo_exists(): + return + logs = (data or {}).get("logs", []) + self._conv.config(state=tk.NORMAL) + self._conv.delete("1.0", tk.END) + + if not logs: + self._conv.insert(tk.END, "(No log entries found)\n", "ts") + else: + for lg in logs: + ts = fmt_dt(lg.get("created_at")) + tool = lg.get("tool_name") or "" + mtype = lg.get("message_type") or "" + thought = (lg.get("thought") or "").strip() + inp = lg.get("tool_input") + out = lg.get("tool_output") + obs = lg.get("observation") + + # timestamp + tool header + self._conv.insert(tk.END, f"[{ts}] ", "ts") + if tool: + self._conv.insert(tk.END, f"⚙ {tool}", "tool") + if mtype: + self._conv.insert(tk.END, f" ({mtype})", "ts") + self._conv.insert(tk.END, "\n") + + if thought: + preview = thought[:400] + ("…" if len(thought) > 400 else "") + self._conv.insert(tk.END, f" 💭 {preview}\n", "thought") + if inp: + raw = json.dumps(inp, indent=2) if isinstance(inp, (dict,list)) else str(inp) + preview = raw[:500] + ("…" if len(raw) > 500 else "") + self._conv.insert(tk.END, f" ▸ {preview}\n", "inp") + if out: + raw = json.dumps(out, indent=2) if isinstance(out, (dict,list)) else str(out) + preview = raw[:500] + ("…" if len(raw) > 500 else "") + self._conv.insert(tk.END, f" ◂ {preview}\n", "out") + if obs and obs not in (inp, out): + raw = json.dumps(obs, indent=2) if isinstance(obs, (dict,list)) else str(obs) + self._conv.insert(tk.END, + f" 👁 {raw[:200]}{'…' if len(raw)>200 else ''}\n", "ts") + + self._conv.insert(tk.END, "─" * 66 + "\n", "div") + + self._conv.see(tk.END) + + self._conv.config(state=tk.DISABLED) + self._log_lbl.config(text=f"{len(logs)} log entries") + + # ── Resume ────────────────────────────────────────────────────────────────── + + def _resume(self): + if not self._resume_box: + return + prompt = self._resume_box.get("1.0", tk.END).strip() + if not prompt: + self._res_msg.config(text="Enter a prompt", fg=C_PEND) + return + self._res_msg.config(text="Sending…", fg=C_PEND) + + rid = self.run["id"] + def _bg(): + try: + res = API.resume_run(rid, prompt) + new_id = res.get("id", rid) + self.after(0, lambda: self._resumed(new_id)) + except Exception as e: + self.after(0, lambda: self._res_msg.config( + text=f"Error: {e}", fg=C_FAIL)) + + threading.Thread(target=_bg, daemon=True).start() + + def _resumed(self, new_id): + self._res_msg.config(text=f"✅ #{new_id} resumed!", fg=GREEN) + self.on_refreshed() + self.after(1500, self.destroy) + + + def _refresh_flow_combo(self): + flows = FlowStore.load() + names = ["None"] + sorted(flows.keys()) + self._flow_combo["values"] = names + if self._flow_var.get() not in names: + self._flow_var.set("None") + self._on_flow_selected() + + def _on_flow_selected(self, _event=None): + name = self._flow_var.get() + if name == "None": + self._flow_info.config(text="", fg=MUTED) + return + flows = FlowStore.load() + steps = flows.get(name, []) + self._flow_info.config(text=f"{len(steps)} step(s)", fg=ACCENT) + + def _open_flow_manager(self): + FlowManagerDialog(self, on_changed=self._refresh_flow_combo) + + def _run_flow(self): + """Start a flow runner for the selected flow.""" + if not self.on_start_flow: + self._res_msg.config(text="Flow runner not available", fg=C_FAIL) + return + name = self._flow_var.get() + if name == "None": + self._res_msg.config(text="Select a flow", fg=C_PEND) + return + flows = FlowStore.load() + steps = flows.get(name) + if not steps: + self._res_msg.config(text="Flow not found", fg=C_FAIL) + return + # Call the main app to start the flow runner + self.on_start_flow(self.run["id"], steps) + self._res_msg.config(text=f"✅ Flow '{name}' started", fg=GREEN) + self.after(1200, self.destroy) + +# ════════════════════════════════════════════════════════════════════════════════ +# Main Application +# ════════════════════════════════════════════════════════════════════════════════ + +class CodegenManager: + def __init__(self, root: tk.Tk): + self.root = root + self.root.title("Codegen Agent Manager") + self.root.geometry("1240x760") + self.root.minsize(900, 580) + self.root.configure(bg=BG) + + self._runs = [] + self._prev_statuses = {} + self._polling = True + self._sort_col = "Created At" + self._sort_rev = True + self._star_file = Path.home() / ".codegen_manager_stars.json" + self._starred = self._load_stars() + self._flow_runners = {} # run_id -> FlowRunner + + self._style() + self._build() + threading.Thread(target=self._poll_loop, daemon=True).start() + self.root.after(300, self._refresh) + + # ── Styles ────────────────────────────────────────────────────────────────── + + def _style(self): + s = ttk.Style() + s.theme_use("clam") + s.configure(".", background=BG, foreground=TEXT, font=FONT, borderwidth=0) + s.configure("TFrame", background=BG) + s.configure("TScrollbar", background=CARD, troughcolor=BG, arrowcolor=MUTED) + s.configure("Treeview", background=PANEL, foreground=TEXT, + fieldbackground=PANEL, rowheight=34) + s.configure("Treeview.Heading", background=CARD, foreground=MUTED, + font=("Segoe UI", 9, "bold"), relief="flat") + s.map("Treeview", + background=[("selected", ACCENT)], + foreground=[("selected", "white")]) + s.configure("TCombobox", fieldbackground=PANEL, background=PANEL, + foreground=TEXT, selectbackground=ACCENT, arrowcolor=MUTED) + s.configure("TEntry", fieldbackground=PANEL, foreground=TEXT, + insertcolor=TEXT) + + # ── Build ──────────────────────────────────────────────────────────────────── + + def _build(self): + self._topbar() + self._toolbar() + self._split_tables() + self._flow_statusbar() + self._statusbar() + + def _topbar(self): + bar = tk.Frame(self.root, bg=PANEL, height=56) + bar.pack(fill=tk.X) + bar.pack_propagate(False) + tk.Frame(bar, bg=ACCENT, width=4).pack(side=tk.LEFT, fill=tk.Y) + lbl(bar, "⚡ Codegen Agent Manager", fg=HOT, font=FONT_TITLE, + bg=PANEL).pack(side=tk.LEFT, padx=18) + + # right side + self._last_upd = lbl(bar, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._last_upd.pack(side=tk.RIGHT, padx=16) + lbl(bar, "● LIVE", fg=GREEN, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.RIGHT, padx=4) + + # Active-runs badge ── hover → dropdown, click item → RunDialog + tk.Frame(bar, bg=BORDER, width=1).pack( + side=tk.RIGHT, fill=tk.Y, pady=10, padx=8) + badge_frame = tk.Frame(bar, bg=PANEL) + badge_frame.pack(side=tk.RIGHT, padx=4) + lbl(badge_frame, "ACTIVE", fg=MUTED, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.LEFT, padx=(0, 4)) + self._active_badge = tk.Label( + badge_frame, text="—", bg="#0d2a1a", fg=C_RUN, + font=("Segoe UI", 13, "bold"), padx=10, pady=4, + cursor="hand2", relief="flat") + self._active_badge.pack(side=tk.LEFT) + self._active_badge.bind("", self._badge_hover) + self._active_badge.bind("", self._badge_leave) + self._active_badge.bind("", self._badge_click) + self._dropdown_win = None + + def _update_active_badge(self, runs): + active_runs = [r for r in runs if is_active(r.get("status"))] + self._active_runs = active_runs + count = len(active_runs) + self._active_badge.config( + text=str(count) if count else "0", + bg="#0d2a1a" if count else CARD, + fg=C_RUN if count else MUTED) + + # ── Active-runs dropdown ───────────────────────────────────────────────────── + + def _badge_hover(self, event): + self._dropdown_show() + + def _badge_leave(self, event): + # Only hide if mouse didn't move into the dropdown window + self.root.after(200, self._maybe_hide_dropdown) + + def _badge_click(self, event): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + self._dropdown_hide() + else: + self._dropdown_show() + + def _dropdown_show(self): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + return + active = getattr(self, "_active_runs", []) + + win = tk.Toplevel(self.root) + win.overrideredirect(True) + win.attributes("-topmost", True) + win.configure(bg=BORDER) + self._dropdown_win = win + + # Position below badge + self._active_badge.update_idletasks() + bx = self._active_badge.winfo_rootx() + by = self._active_badge.winfo_rooty() + self._active_badge.winfo_height() + 2 + win.geometry(f"+{bx}+{by}") + + inner = tk.Frame(win, bg=CARD, padx=1, pady=1) + inner.pack(fill=tk.BOTH, expand=True) + + if not active: + lbl(inner, " No active runs ", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(pady=10, padx=10) + else: + lbl(inner, f" {len(active)} active run(s) — click to inspect", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(8, 4)) + tk.Frame(inner, bg=BORDER, height=1).pack(fill=tk.X, padx=8) + for run in active: + rid = run["id"] + stat = run.get("status") or "" + ts = fmt_dt(run.get("created_at")) + summ = (run.get("summary") or run.get("result") or "(no summary)") + summ = summ.replace("\n", " ")[:60] + row = tk.Frame(inner, bg=CARD, cursor="hand2") + row.pack(fill=tk.X, padx=0) + tk.Frame(row, bg=CARD, height=1).pack(fill=tk.X) + ri = tk.Frame(row, bg=CARD) + ri.pack(fill=tk.X, padx=12, pady=6) + lbl(ri, f"#{rid}", fg=C_RUN, font=FONT_BOLD, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 8)) + lbl(ri, stat, fg=C_RUN, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 10)) + lbl(ri, ts, fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 10)) + lbl(ri, summ + "…", fg=TEXT, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT) + + def _on_enter(e, r=row): r.config(bg="#1e2a3a"); [c.config(bg="#1e2a3a") for c in r.winfo_children() + [w for c in r.winfo_children() for w in (c.winfo_children() if hasattr(c,"winfo_children") else [])]] + def _on_leave(e, r=row): r.config(bg=CARD); [c.config(bg=CARD) for c in r.winfo_children() + [w for c in r.winfo_children() for w in (c.winfo_children() if hasattr(c,"winfo_children") else [])]] + def _on_click(e, run=run): self._dropdown_hide(); self._open_run_by(run) + for w in [row, ri] + ri.winfo_children(): + w.bind("", _on_enter) + w.bind("", _on_leave) + w.bind("", _on_click) + + tk.Frame(inner, bg=BORDER, height=1).pack(fill=tk.X, padx=8) + lbl(inner, " Click to open logs & resume", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(4, 8)) + + win.bind("", lambda e: self.root.after(250, self._maybe_hide_dropdown)) + win.update_idletasks() + # Clamp to screen + sw = self.root.winfo_screenwidth() + ww = win.winfo_width() + if bx + ww > sw: + bx = sw - ww - 10 + win.geometry(f"+{bx}+{by}") + + def _dropdown_hide(self): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + self._dropdown_win.destroy() + self._dropdown_win = None + + def _maybe_hide_dropdown(self): + if not self._dropdown_win or not self._dropdown_win.winfo_exists(): + return + # Check if mouse is over badge or dropdown + x, y = self.root.winfo_pointerx(), self.root.winfo_pointery() + try: + wx = self._dropdown_win.winfo_rootx() + wy = self._dropdown_win.winfo_rooty() + ww = self._dropdown_win.winfo_width() + wh = self._dropdown_win.winfo_height() + bx = self._active_badge.winfo_rootx() + by = self._active_badge.winfo_rooty() + bw = self._active_badge.winfo_width() + bh = self._active_badge.winfo_height() + over_win = wx <= x <= wx+ww and wy <= y <= wy+wh + over_badge = bx <= x <= bx+bw and by <= y <= by+bh + if not over_win and not over_badge: + self._dropdown_hide() + except Exception: + self._dropdown_hide() + + def _toolbar(self): + tb = tk.Frame(self.root, bg=PANEL) + tb.pack(fill=tk.X, padx=14, pady=(0, 6)) + btn(tb, "+ New Run", self._open_create, HOT ).pack( + side=tk.LEFT, padx=(8, 4), pady=8) + btn(tb, "⛓ Flows", self._open_flows, CARD ).pack( + side=tk.LEFT, padx=4, pady=8) + btn(tb, "⟳ Refresh", self._refresh, ACCENT).pack( + side=tk.LEFT, padx=4, pady=8) + + tk.Frame(tb, bg=BORDER, width=1).pack( + side=tk.LEFT, fill=tk.Y, pady=8, padx=10) + + lbl(tb, "Status:", fg=MUTED, font=FONT_SMALL, bg=PANEL).pack( + side=tk.LEFT) + self._filt = ttk.Combobox( + tb, values=["All","ACTIVE","COMPLETE","FAILED"], + width=11, state="readonly") + self._filt.set("All") + self._filt.pack(side=tk.LEFT, padx=6) + self._filt.bind("<>", lambda _: self._repopulate()) + + lbl(tb, " Search:", fg=MUTED, font=FONT_SMALL, bg=PANEL).pack( + side=tk.LEFT) + self._svar = tk.StringVar() + self._svar.trace_add("write", lambda *_: self._repopulate()) + ttk.Entry(tb, textvariable=self._svar, width=24).pack( + side=tk.LEFT, padx=6) + + self._cnt_lbl = lbl(tb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._cnt_lbl.pack(side=tk.RIGHT, padx=16) + + def _make_tree(self, parent): + """Build a styled Treeview with scrollbars inside parent frame.""" + cols = ("★", "ID", "Status", "Created At", "Summary", "PRs", "Source") + widths = {"★": 28, "ID": 68, "Status": 112, "Created At": 162, + "Summary": 0, "PRs": 38, "Source": 90} + anchors = {"★": "center", "ID": "center", "Status": "center", "PRs": "center"} + + tree = ttk.Treeview(parent, columns=cols, show="headings", + selectmode="browse") + for c in cols: + tree.heading(c, text=c, + command=lambda cc=c: self._sort(cc)) + tree.column(c, width=widths.get(c, 110), + anchor=anchors.get(c, "w"), + stretch=(c == "Summary"), + minwidth=widths.get(c, 40)) + + vsb = ttk.Scrollbar(parent, orient=tk.VERTICAL, command=tree.yview) + hsb = ttk.Scrollbar(parent, orient=tk.HORIZONTAL, command=tree.xview) + tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + hsb.pack(side=tk.BOTTOM, fill=tk.X) + tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + for tag, bg in (("running", "#0c2218"), ("completed", "#0b1a33"), + ("failed", "#280b0b"), ("other", PANEL), + ("starred", "#1e1a08"), ("star_run", "#0d2218")): + tree.tag_configure(tag, background=bg) + + tree.bind("", lambda e, t=tree: self._open_from_tree(t)) + tree.bind("", lambda e, t=tree: self._open_from_tree(t)) + tree.bind("", self._ctx_menu) + return tree + + def _split_tables(self): + pw = tk.PanedWindow(self.root, orient=tk.VERTICAL, bg=BG, + sashwidth=6, sashrelief="flat", sashpad=2) + pw.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 2)) + + # ── Top pane: Pinned & Active ──────────────────────────────────────── + top_pane = tk.Frame(pw, bg=BG) + pw.add(top_pane, height=200, minsize=60) + + top_hdr = tk.Frame(top_pane, bg=PANEL, height=26) + top_hdr.pack(fill=tk.X) + top_hdr.pack_propagate(False) + lbl(top_hdr, " ★ Pinned & Active", fg="#f0c040", + font=FONT_BOLD, bg=PANEL).pack(side=tk.LEFT, padx=6) + self._top_cnt = lbl(top_hdr, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._top_cnt.pack(side=tk.RIGHT, padx=10) + + top_tree_frame = tk.Frame(top_pane, bg=BG) + top_tree_frame.pack(fill=tk.BOTH, expand=True) + self._top_tree = self._make_tree(top_tree_frame) + + # ── Bottom pane: Past Runs ─────────────────────────────────────────── + bot_pane = tk.Frame(pw, bg=BG) + pw.add(bot_pane, minsize=80) + + bot_hdr = tk.Frame(bot_pane, bg=PANEL, height=26) + bot_hdr.pack(fill=tk.X) + bot_hdr.pack_propagate(False) + lbl(bot_hdr, " ☰ Past Runs", fg=MUTED, + font=FONT_BOLD, bg=PANEL).pack(side=tk.LEFT, padx=6) + self._bot_cnt = lbl(bot_hdr, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._bot_cnt.pack(side=tk.RIGHT, padx=10) + + bot_tree_frame = tk.Frame(bot_pane, bg=BG) + bot_tree_frame.pack(fill=tk.BOTH, expand=True) + self._bot_tree = self._make_tree(bot_tree_frame) + + # Keep a ref so _open_run() still works for backward compat + self._tree = self._bot_tree + + lbl(self.root, " Double-click to view logs & resume · Right-click to star/unstar", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14) + + def _flow_statusbar(self): + self._fsb = tk.Frame(self.root, bg="#0d1a0d", height=22) + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + self._fsb.pack_propagate(False) + self._flow_sv = tk.StringVar(value="") + self._flow_clr = C_RUN + self._flow_msg_lbl = tk.Label( + self._fsb, textvariable=self._flow_sv, + fg=C_RUN, font=FONT_SMALL, bg="#0d1a0d") + self._flow_msg_lbl.pack(side=tk.LEFT, padx=12) + self._fsb.pack_forget() # hidden until a flow is active + + def _statusbar(self): + sb = tk.Frame(self.root, bg=PANEL, height=22) + sb.pack(fill=tk.X, side=tk.BOTTOM) + sb.pack_propagate(False) + self._sv = tk.StringVar(value="Initialising…") + lbl(sb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL, + textvariable=self._sv).pack(side=tk.LEFT, padx=12) + + # ── Poll ───────────────────────────────────────────────────────────────────── + + def _poll_loop(self): + while self._polling: + time.sleep(POLL_SEC) + try: + runs = API.fetch_all_runs() + self.root.after(0, lambda r=runs: self._apply(r)) + except Exception as e: + self.root.after(0, lambda msg=str(e): self._sv.set(f"Poll error: {msg}")) + + def _refresh(self): + self._sv.set("Fetching all runs (paginating)…") + def _bg(): + try: + runs = API.fetch_all_runs() + self.root.after(0, lambda r=runs: self._apply(r)) + except Exception as e: + self.root.after(0, lambda msg=str(e): self._sv.set(f"Error: {msg}")) + threading.Thread(target=_bg, daemon=True).start() + + def _apply(self, runs): + for run in runs: + rid = run.get("id") + new = run.get("status") or "" + old = self._prev_statuses.get(rid) + if old and old != new and is_active(old) and is_done(new): + self._notify(f"Run #{rid} finished", f"{old} → {new}") + self._prev_statuses[rid] = new + + self._runs = runs + self._update_active_badge(runs) + self._repopulate() + now = datetime.now().strftime("%H:%M:%S") + self._last_upd.config(text=f"Updated {now}") + self._sv.set(f"Loaded {len(runs)} run(s) · paginated") + + + + # ── Table ──────────────────────────────────────────────────────────────────── + + def _row_values(self, run): + """Build treeview value tuple for a run.""" + rid = run["id"] + s = run.get("status") or "" + summary = (run.get("summary") or run.get("result") or "").replace("\n", " ") + prs = len(run.get("github_pull_requests") or []) + star = "★" if rid in self._starred else "" + return (star, rid, s, fmt_dt(run.get("created_at")), + summary[:130], prs or "", run.get("source_type") or "") + + def _row_tag(self, run): + rid = run["id"] + s = run.get("status") or "" + if rid in self._starred and is_active(s): return "star_run" + if rid in self._starred: return "starred" + return status_tag(s) + + def _repopulate(self): + filt = self._filt.get() + query = self._svar.get().lower() + + for t in (self._top_tree, self._bot_tree): + for row in t.get_children(): + t.delete(row) + + top_n = bot_n = 0 + for run in self._runs: + rid = run["id"] + s = run.get("status") or "" + summary = (run.get("summary") or run.get("result") or "").replace("\n", " ") + + # Apply filter & search (filter only applies to bottom pane) + if query and query not in str(rid).lower() \ + and query not in s.lower() \ + and query not in summary.lower(): + continue + + starred = rid in self._starred + active = is_active(s) + filt_ok = (filt == "All" or filt.lower() in s.lower()) + + if starred or active: + # Always shown in top pane regardless of filter + self._top_tree.insert("", tk.END, iid=f"t_{rid}", + values=self._row_values(run), + tags=(self._row_tag(run),)) + top_n += 1 + + if not active and filt_ok: + # Past runs go to bottom — starred ones still appear here too (dimmed) + self._bot_tree.insert("", tk.END, iid=f"b_{rid}", + values=self._row_values(run), + tags=(self._row_tag(run),)) + bot_n += 1 + + self._top_cnt.config(text=f"{top_n} shown") + self._bot_cnt.config(text=f"{bot_n} shown") + total = len(self._runs) + self._cnt_lbl.config(text=f"{top_n + bot_n} / {total}") + + def _sort(self, col): + if self._sort_col == col: + self._sort_rev = not self._sort_rev + else: + self._sort_col, self._sort_rev = col, False + key_map = { + "ID": lambda r: r.get("id", 0), + "Status": lambda r: r.get("status") or "", + "Created At": lambda r: r.get("created_at") or "", + "Summary": lambda r: r.get("summary") or "", + "PRs": lambda r: len(r.get("github_pull_requests") or []), + "Source": lambda r: r.get("source_type") or "", + } + self._runs.sort(key=key_map.get(col, lambda r: ""), + reverse=self._sort_rev) + self._repopulate() + + # ── Dialogs ────────────────────────────────────────────────────────────────── + + def _open_create(self): + CreateRunDialog( + self.root, + on_created=lambda _: self._refresh(), + on_flow_runner=self._start_flow_runner) + + def _open_flows(self): + FlowManagerDialog(self.root) + + def _start_flow_runner(self, run_id, steps): + runner = FlowRunner( + self.root, run_id, steps, + on_status=self._on_flow_status) + self._flow_runners[run_id] = runner + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + self._on_flow_status( + f"⛓ Flow attached to run #{run_id} — {len(steps)} steps", C_RUN) + + def _on_flow_status(self, msg, colour): + self._flow_sv.set(f"⛓ {msg}") + self._flow_msg_lbl.config(fg=colour) + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + # Auto-hide "complete" messages after 8s + if "complete" in msg.lower() or "✅" in msg: + self.root.after(8000, self._maybe_hide_flow_bar) + + def _maybe_hide_flow_bar(self): + if "complete" in self._flow_sv.get().lower() or "✅" in self._flow_sv.get(): + self._fsb.pack_forget() + + def _iid_to_rid(self, iid): + """Strip t_/b_ prefix and return int run id.""" + return int(str(iid).lstrip("tb_").replace("_","")) + + def _open_from_tree(self, tree): + sel = tree.selection() + if not sel: + return + try: + rid = self._iid_to_rid(sel[0]) + except Exception: + return + run = next((r for r in self._runs if r["id"] == rid), None) + if run: + RunDialog(self.root, run, + on_refreshed=self._refresh, + on_start_flow=self._start_flow_runner) + + def _open_run(self): + # Try both trees + for tree in (self._top_tree, self._bot_tree): + sel = tree.selection() + if sel: + self._open_from_tree(tree) + return + + def _open_run_by(self, run): + RunDialog(self.root, run, + on_refreshed=self._refresh, + on_start_flow=self._start_flow_runner) + + def _toggle_star(self, rid): + if rid in self._starred: + self._starred.discard(rid) + else: + self._starred.add(rid) + self._save_stars() + self._repopulate() + + def _load_stars(self): + try: + data = json.loads(self._star_file.read_text(encoding="utf-8")) + return set(data) + except Exception: + return set() + + def _save_stars(self): + try: + self._star_file.write_text( + json.dumps(list(self._starred)), encoding="utf-8") + except Exception: + pass + + def _ctx_menu(self, event): + # Figure out which tree was right-clicked + widget = event.widget + row = widget.identify_row(event.y) + if not row: + return + widget.selection_set(row) + try: + rid = self._iid_to_rid(row) + except Exception: + return + run = next((r for r in self._runs if r["id"] == rid), None) + if not run: + return + starred = rid in self._starred + star_label = "☆ Remove Star" if starred else "★ Star this Run" + m = tk.Menu(self.root, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT, bd=0) + m.add_command(label="🔍 View / Resume", + command=lambda: self._open_run_by(run)) + m.add_separator() + m.add_command(label=star_label, + command=lambda: self._toggle_star(rid)) + m.add_separator() + if run.get("web_url"): + m.add_command(label="🌐 Open in Browser", + command=lambda: webbrowser.open(run["web_url"])) + m.add_command(label="📋 Copy Run ID", + command=lambda: (self.root.clipboard_clear(), + self.root.clipboard_append(str(rid)), + self._sv.set(f"Copied #{rid}"))) + m.post(event.x_root, event.y_root) + + # ── Notifications ──────────────────────────────────────────────────────────── + + def _notify(self, title, message): + try: + from plyer import notification + notification.notify(title=title, message=message, + app_name="Codegen Manager", timeout=6) + except Exception: + pass + self.root.after(0, lambda: self._toast(title, message)) + + def _toast(self, title, msg): + t = tk.Toplevel(self.root) + t.overrideredirect(True) + t.attributes("-topmost", True) + t.configure(bg=ACCENT) + inner = tk.Frame(t, bg=CARD) + inner.pack(fill=tk.BOTH, expand=True, padx=2, pady=2) + lbl(inner, f"🔔 {title}", fg=HOT, font=FONT_BOLD, bg=CARD + ).pack(anchor="w", padx=14, pady=(10, 2)) + lbl(inner, msg, fg=TEXT, font=FONT, bg=CARD + ).pack(anchor="w", padx=14, pady=(0, 10)) + t.update_idletasks() + sw = self.root.winfo_screenwidth() + sh = self.root.winfo_screenheight() + t.geometry(f"340x74+{sw-356}+{sh-110}") + t.after(5000, t.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# Entry point +# ════════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + import subprocess, sys + for pkg in ("requests", "plyer"): + try: + __import__(pkg) + except ImportError: + subprocess.check_call([sys.executable, "-m", "pip", + "install", pkg, "-q"]) + root = tk.Tk() + try: + root.iconbitmap(default="") + except Exception: + pass + CodegenManager(root) + try: + root.mainloop() + except KeyboardInterrupt: + pass \ No newline at end of file diff --git a/Codegen/desktop.ini b/Codegen/desktop.ini new file mode 100644 index 00000000..ac3d0f4c --- /dev/null +++ b/Codegen/desktop.ini @@ -0,0 +1,6 @@ +[.ShellClassInfo] +IconResource=C:\WINDOWS\System32\SHELL32.dll,258 +[ViewState] +Mode= +Vid= +FolderType=Documents diff --git a/Codegen/integrate.md b/Codegen/integrate.md new file mode 100644 index 00000000..af89b43f --- /dev/null +++ b/Codegen/integrate.md @@ -0,0 +1,198 @@ +--- +name: integrate +description: Safely integrate a new feature into the codebase using parallel analysis, contract-first design, staged implementation, and zero-regression verification +--- + +Integrate the requested feature into the codebase with zero regressions, full contract definition, and verified consistency with existing architecture. This command enforces a rigorous multi-phase process before a single line of production code is written. + +**Usage**: Describe the feature to integrate. If no description is given, ask: *"What feature are you integrating? Describe its inputs, outputs, and expected behavior."* + +Store the feature description as `$FEATURE_DESC`. + +--- + +## Phase 1 — Pre-Integration Intelligence (Parallel, 5 Agents) + +Spawn **5 parallel explore agents** in a single response before writing any code. + +### Agent A — Codebase Impact Map +- Identify every file, module, and layer that the new feature will touch, extend, or depend on +- Find all existing patterns the feature must conform to (naming conventions, file organization, module boundaries) +- Identify the exact insertion points: where new code must be added, where existing code must be modified +- Flag any areas where the feature would introduce coupling that doesn't currently exist +- Output: `IMPACT.md` — list of files to create, files to modify, risk level per file (Low/Medium/High) + +### Agent B — Contract & Interface Definition +- Define the complete interface contract for the feature BEFORE implementation: + - Function/method signatures with full parameter types and return types + - REST endpoint shape (method, path, request body schema, response schema, error responses) + - Event/message schema if applicable + - Data model changes (new fields, new tables, migrations required) +- Verify that proposed interfaces don't conflict with existing ones +- Define the feature's error contract: every possible failure mode and its error type/code +- Output: `CONTRACT.md` — machine-readable interface specification + +### Agent C — Dependency & Compatibility Audit +- Determine if the feature requires new external dependencies +- For any new dependency: verify it's actively maintained (via WebSearch), check for license compatibility, check for conflicts with existing deps +- Check if any existing dependency already provides the required capability (avoid redundant deps) +- Identify any version constraint implications +- Output: list of required dependency changes with justification + +### Agent D — Test Strategy Design +- Design the complete test plan for this feature before implementation: + - Unit test cases: every function, every branch, every edge case + - Integration test cases: every interaction with external systems (DB, APIs, queues) + - E2E test cases: every user-facing flow the feature enables + - Negative tests: invalid inputs, missing data, permission failures, network failures + - Performance test considerations if the feature is on a hot path +- Identify which existing tests might be affected or need updating +- Output: `TEST_PLAN.md` — complete test case list with descriptions + +### Agent E — Architecture Consistency Review +- Verify that the proposed integration follows the existing architectural patterns exactly: + - Layer separation (does the feature respect existing boundaries?) + - Dependency direction (does data flow in the established direction?) + - Error propagation style (does it match how errors are handled elsewhere?) + - Logging and observability patterns + - Authentication/authorization patterns +- Flag any deviations from existing patterns and require explicit justification before proceeding +- Output: architecture compliance checklist + +--- + +## Phase 2 — Integration Plan Review + +**Before writing any production code**, synthesize Phase 1 outputs and present to the user: + +``` +INTEGRATION PLAN: [Feature Name] + +Files to CREATE: [N files] + - [path] — [purpose] + +Files to MODIFY: [N files] + - [path] — [what changes] + +New dependencies: [list or "none"] + +Contract summary: + [brief interface description] + +Test plan: [N unit, N integration, N E2E tests] + +Architecture risks: [list or "none"] + +Estimated complexity: [Low / Medium / High / Very High] +``` + +**Pause here.** Ask the user: *"Does this plan look correct? Shall I proceed with implementation?"* + +--- + +## Phase 3 — Staged Implementation + +Implement in this strict order. Do not skip stages. Do not combine stages. + +### Stage 1: Data Layer +- Implement any new data models, schemas, migrations, or storage changes first +- Run existing data-layer tests to confirm no regressions +- Do not proceed if any existing test fails + +### Stage 2: Core Business Logic +- Implement domain/service layer logic +- Write unit tests for every function as it's implemented (test-alongside, not test-after) +- Every function must have its error contract handled explicitly +- No silent failures, no bare `catch` blocks that swallow errors + +### Stage 3: Interface Layer +- Implement the API endpoint, CLI command, or UI component that exposes the feature +- Wire to business logic only — no business logic in the interface layer +- Apply input validation at the interface boundary (not inside business logic) +- Add request/response logging consistent with existing patterns + +### Stage 4: Integration Wiring +- Connect all layers end-to-end +- Add integration tests verifying the full path +- Verify the feature's behavior against the CONTRACT.md specification exactly + +### Stage 5: Cross-Cutting Concerns +- Add observability: metrics, structured logs, traces — consistent with existing instrumentation +- Add any feature flags if the feature requires progressive rollout +- Update configuration handling if new env vars or config keys are required +- Update documentation: README, API docs, inline docstrings/JSDoc + +--- + +## Phase 4 — Post-Integration Verification (Parallel, 3 Agents) + +Spawn **3 parallel agents** after all stages complete. + +### Verifier 1 — Regression Check +- Run the full existing test suite +- Report any failures with full error output +- Cross-reference failures against the IMPACT.md list — are all impacted files accounted for? + +### Verifier 2 — Contract Compliance +- Verify the implementation matches CONTRACT.md exactly: + - All defined inputs accepted and validated + - All defined outputs produced correctly + - All defined error cases return the correct error type/code + - No undocumented behaviors introduced + +### Verifier 3 — Consistency Audit +- Verify the new code follows all project conventions: + - Naming conventions match existing code + - File organization matches existing structure + - Error handling style matches existing patterns + - No linting or type errors introduced + - No hardcoded values that should be config + +--- + +## Phase 5 — Integration Report + +Output a final `INTEGRATION_REPORT.md`: + +```markdown +# Integration Report: [Feature Name] +Date: [date] +Status: [COMPLETE / PARTIAL / BLOCKED] + +## What Was Built +[Description of what was implemented] + +## Files Changed +| File | Change Type | Risk | +|------|------------|------| +| ... | Created / Modified / Deleted | Low/Med/High | + +## Tests Added +- Unit: [N] tests +- Integration: [N] tests +- E2E: [N] tests + +## Regressions +[None / list of issues found and resolved] + +## Deviations from Plan +[None / list of deviations with justifications] + +## Known Limitations +[Any unimplemented edge cases or deferred work] + +## How to Test This Feature +[Exact commands or steps] +``` + +--- + +## Integration Rules (Non-Negotiable) + +1. **Contract first** — interfaces are defined and agreed before any code is written +2. **No side-channel modifications** — do not refactor unrelated code during integration +3. **Test alongside** — tests are written as each stage is implemented, never deferred +4. **Zero new linting errors** — the integration must leave lint/typecheck status no worse than it found it +5. **No silent failures** — every error path must be explicitly handled and logged +6. **Layer discipline** — business logic never lives in interface layer; data access never lives in business layer +7. **One feature per integration** — do not bundle multiple features; if scope creep is detected, stop and flag it \ No newline at end of file diff --git a/Codegen/modernize.md b/Codegen/modernize.md new file mode 100644 index 00000000..bc7e6a89 --- /dev/null +++ b/Codegen/modernize.md @@ -0,0 +1,359 @@ +--- +name: modernize_upgrade +description: Modernize a codebase toward a user-defined trajectory — replace hand-rolled code with best-in-class libraries, eliminate wheel-reinvention, and upgrade all specified contextual targets with verified 2025-2026 ecosystem intelligence +--- + +Modernize this codebase toward `$TRAJECTORY`. Eliminate hand-rolled implementations where battle-tested libraries exist. Replace outdated patterns with current idioms. Upgrade every specified context to match the target trajectory. + +**Usage**: `/modernize_upgrade [trajectory description]` + +Examples: +- `/modernize_upgrade toward a production-grade REST API with full observability` +- `/modernize_upgrade to modern TypeScript with strict types and edge-ready runtime` +- `/modernize_upgrade toward event-driven microservices architecture` +- `/modernize_upgrade to 2026 Python async stack with full type safety` + +If no trajectory is provided, ask: +*"What direction are you modernizing toward? Describe your target architecture, runtime, scale, or quality bar."* + +Store as `$TRAJECTORY`. + +--- + +## Phase 1 — Modernization Intelligence Gathering (Parallel, 4 Agents) + +Spawn **4 parallel agents** in a single response before any code changes. + +### Agent 1 — Wheel Reinvention Audit +Hunt for every instance where custom code duplicates functionality that a well-maintained library provides better. + +**Scan for these patterns:** +- Custom HTTP clients instead of `axios`, `got`, `ky`, `httpx`, `aiohttp` +- Custom date/time parsing instead of `date-fns`, `dayjs`, `Temporal`, `arrow`, `pendulum` +- Custom validation schemas instead of `zod`, `valibot`, `pydantic`, `joi`, `yup` +- Custom environment/config parsing instead of `dotenv-safe`, `envalid`, `pydantic-settings`, `viper` +- Custom retry/backoff logic instead of `p-retry`, `tenacity`, `backoff`, `resilience4j` +- Custom queue/job processing instead of `BullMQ`, `bee-queue`, `celery`, `arq`, `temporal` +- Custom rate limiting instead of `bottleneck`, `limiter`, `slowapi`, `ratelimit` +- Custom deep clone/merge instead of `structuredClone`, `lodash/cloneDeep`, `immer` +- Custom UUID/ID generation instead of `uuid`, `nanoid`, `ulid`, `cuid2` +- Custom CSV/JSON/YAML parsing instead of `papaparse`, `fast-csv`, `pyyaml`, `orjson` +- Custom auth flows instead of `passport`, `lucia`, `better-auth`, `authjs`, `python-jose` +- Custom caching layers instead of `node-cache`, `lru-cache`, `cachetools`, `dogpile.cache` +- Custom test factories/fixtures instead of `faker`, `fishery`, `factory-boy`, `polyfactory` +- Custom logging instead of `pino`, `winston`, `structlog`, `loguru`, `zerolog` +- Custom metric collection instead of `prom-client`, `opentelemetry`, `statsd`, `micrometer` +- Custom migration tooling instead of `knex`, `drizzle`, `alembic`, `flyway`, `goose` +- Custom ORM queries instead of using the ORM's built-in advanced features +- Custom cryptography instead of `bcrypt`, `argon2`, `nacl`, `cryptography` (Python) +- Any `for` loop doing what `.map()`, `.filter()`, `.reduce()`, `itertools`, or stream APIs handle +- Any regex-based router or template engine instead of a proper framework feature +- Any hand-written state machine instead of `xstate`, `stately`, `transitions`, `robot3` + +For each finding: +``` +[REINVENTION] file:line +What it does: [description] +Replace with: [library name + version] +Benefit: [what the library provides that custom code doesn't — e.g., edge cases handled, battle-tested, maintained] +Migration complexity: [Drop-in / Low / Medium / High] +``` + +### Agent 2 — Trajectory Gap Analysis +Map the current codebase against `$TRAJECTORY` and identify every gap. + +For each dimension of `$TRAJECTORY`: +- What does the trajectory require? +- What does the codebase currently have? +- What is the gap? +- What is the specific library, pattern, or change that closes the gap? + +**Trajectory dimensions to evaluate** (filter to those relevant to `$TRAJECTORY`): + +| Dimension | Questions to Answer | +|-----------|-------------------| +| **Runtime/Platform** | Is the runtime (Node version, Python version, Go version) current for the trajectory? | +| **Type Safety** | Is there full type coverage? Are `any`/untyped patterns present? | +| **Async Patterns** | Are async patterns modern? (callbacks→promises→async/await→streaming) | +| **API Design** | REST/GraphQL/tRPC/gRPC — does the current shape match the trajectory? | +| **Data Layer** | Is the ORM/query layer appropriate for the trajectory's data needs? | +| **Auth & Security** | Does auth match modern standards (JWTs, OAuth2, PKCE, passkeys)? | +| **Observability** | Are logs structured? Are traces distributed? Are metrics exported? | +| **Error Handling** | Are errors typed, propagated cleanly, and user-safe? | +| **Build Pipeline** | Is the build system modern for the trajectory (ESM, Turbopack, Rye, etc.)? | +| **Testing Stack** | Does the test framework match the trajectory's requirements? | +| **Deployment Target** | Is code shaped for the target deployment (edge, serverless, container, monolith)? | +| **Performance** | Are bottlenecks present that the trajectory would expose at scale? | +| **Developer Experience** | Hot reload, type-checking, linting — are they fast enough for the trajectory? | + +For each gap: +``` +[GAP] Dimension: [name] +Current state: [what exists] +Trajectory requires: [what's needed] +Library/change: [specific recommendation — verified via WebSearch] +Priority: [Blocking / High / Medium / Low] +``` + +### Agent 3 — Ecosystem Intelligence (WebSearch Required) +For every library identified by Agents 1 and 2, verify currency using WebSearch. No assumptions from training data. + +For each candidate library: +1. Search: `"[library name] latest version 2025"` or `"[library name] changelog"` +2. Verify: latest stable version number +3. Verify: last commit date / release date (is it actively maintained?) +4. Verify: any known breaking changes between current installed version and latest +5. Verify: whether a newer alternative has overtaken it (e.g., `moment` → `date-fns` → `Temporal`) +6. Check: GitHub stars trajectory (growing/stable/declining) +7. Check: any security advisories (CVEs) against the library + +Output per library: +``` +Library: [name] +Current best version: [X.Y.Z] (verified [date]) +Maintained: [yes/no — last release: date] +Migration notes: [any breaking changes from older versions] +Verdict: [ADOPT / ADOPT_WITH_MIGRATION / SUPERSEDED_BY: name / AVOID: reason] +Source: [URL] +``` + +### Agent 4 — Dead Code & Modernization Blockers +Identify what must be cleaned up BEFORE modernization can proceed safely: + +- **Circular dependencies** that would make library injection hard +- **God files** that mix concerns and will need splitting before a library swap makes sense +- **Implicit global state** that would break when switching to a stateless/functional library +- **Type `any` / untyped surfaces** that would cause silent failures after library migration +- **Hardcoded magic values** that need extracting before config libraries can manage them +- **Dead imports** that inflate dependency surface unnecessarily +- **Duplicate implementations** of the same thing across files (consolidate before replacing) +- **Test coverage gaps** on code that will be replaced (needs tests first so replacement is safe) + +For each blocker: +``` +[BLOCKER] file:line +Type: [Circular / GodFile / GlobalState / Untyped / Magic / Dead / Duplicate / NoTest] +Description: [what the problem is] +Must fix before: [which modernization step this blocks] +Fix: [minimal pre-flight change] +``` + +--- + +## Phase 2 — Modernization Plan + +Synthesize Phase 1 findings into a structured plan. Present to the user before executing: + +``` +MODERNIZATION PLAN +Trajectory: [user's $TRAJECTORY] +Date: [date] + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +WHEEL REINVENTIONS TO REPLACE: [N] + Drop-in replacements: [N] (no logic change required) + Low-effort migrations: [N] (< 2 hours each) + Medium migrations: [N] (2–8 hours each) + High-effort rewrites: [N] (flag for separate planning) + +TRAJECTORY GAPS TO CLOSE: [N] + Blocking gaps: [N] (must fix before trajectory is achievable) + High priority: [N] + Medium/Low: [N] + +PRE-FLIGHT BLOCKERS: [N] + [list each — these run FIRST] + +LIBRARIES TO ADD: [N] + [list with versions] + +LIBRARIES TO REMOVE: [N] + [list — replaced by above] + +ESTIMATED TOTAL EFFORT: [Trivial / Small / Medium / Large / Very Large] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +**Pause.** Ask the user: *"Does this modernization plan match your intent? Any items to skip, reprioritize, or add before I proceed?"* + +--- + +## Phase 3 — Pre-Flight Cleanup + +Before ANY library swaps or trajectory changes, resolve all blockers from Agent 4. + +Each blocker is fixed minimally — do not over-engineer. The goal is to make modernization safe, not to refactor the entire codebase. + +After all pre-flight fixes: +- Run the full test suite — must pass at baseline before continuing +- Run lint/typecheck — must pass at baseline before continuing +- If either fails, stop and fix before proceeding + +--- + +## Phase 4 — Staged Modernization (Parallel per Domain) + +Group modernization items into non-conflicting domains. Spawn **parallel agents per domain** so independent changes don't conflict. + +**Recommended domain groupings** (adapt based on actual codebase): + +### Domain A — Type System Hardening +If trajectory involves type safety: +- Enable strict TypeScript (`strict: true`, `noUncheckedIndexedAccess`, `exactOptionalPropertyTypes`) +- Replace all `any` with proper types, `unknown`, or type guards +- Add Zod/Valibot schemas at all I/O boundaries (API inputs, env vars, config files, DB results) +- Generate types from schemas rather than maintaining parallel type definitions + +### Domain B — Library Swap — Drop-in Replacements +Handle all `Migration complexity: Drop-in` items from Agent 1: +- Remove the custom implementation +- Install the library +- Replace usages (API is compatible — this is near-mechanical) +- Run tests after each swap to catch any behavioral difference +- Remove the now-dead custom code + +### Domain C — Library Swap — Behavioral Migrations +Handle `Low` and `Medium` complexity migrations: +- For each: write characterization tests against the OLD implementation first (capture current behavior) +- Install new library +- Implement new version alongside old (do not delete yet) +- Verify characterization tests pass with new implementation +- Switch call sites to new implementation +- Delete old implementation +- Run full test suite + +### Domain D — Trajectory-Specific Upgrades +Implement the gaps identified by Agent 2, in priority order: +- Blocking gaps first +- Each gap gets its own sub-task with: implement → test → verify approach +- Apply the library or pattern recommended in the gap analysis +- Update all affected files to use the new approach +- Do not leave hybrid states (half old, half new) — complete each gap fully + +### Domain E — Observability & Production Readiness +If trajectory includes production-grade requirements: +- Add structured logging (replace `console.log` / `print` with `pino`/`structlog`/`zerolog`) +- Add OpenTelemetry instrumentation (traces, metrics, logs) +- Add health check endpoints (`/health`, `/ready`, `/live`) +- Add graceful shutdown handling +- Ensure all errors are logged with context (request ID, user ID, trace ID) +- Ensure no secrets appear in logs or error responses + +--- + +## Phase 5 — Post-Modernization Verification (Parallel, 3 Agents) + +Spawn **3 parallel agents** after all domain changes complete. + +### Verifier 1 — Regression Suite +- Run the full test suite +- Every failure must be triaged: is this a test that needs updating (behavior intentionally changed) or a regression (behavior was broken)? +- Report: pass/fail delta vs. pre-flight baseline + +### Verifier 2 — Wheel Reinvention Rescan +- Re-run the Agent 1 scan patterns on the updated codebase +- Confirm every identified reinvention was replaced +- Flag any NEW reinventions introduced during modernization (they sometimes appear during refactors) +- Confirm removed custom implementations have zero remaining references + +### Verifier 3 — Trajectory Compliance Check +- For each gap identified in Agent 2, verify the gap is now closed +- Verify the codebase demonstrably moves toward `$TRAJECTORY` — not just that changes were made +- Produce a before/after comparison per trajectory dimension +- Identify any remaining gaps that were scoped out (document them as deferred) + +--- + +## Phase 6 — Dependency Audit & Lock + +After all changes: + +```bash +# Confirm no unused dependencies remain +# [npm: depcheck | Python: deptry | Go: go mod tidy | Rust: cargo machete] + +# Confirm no duplicate packages at different versions +# [npm: npm dedupe | Python: pip check] + +# Confirm no security vulnerabilities introduced +# [npm: npm audit | Python: pip-audit | Go: govulncheck | Rust: cargo audit] + +# Update lockfile to reflect final state +# [appropriate lock command for the detected package manager] +``` + +Fix any audit findings before completing. + +--- + +## Phase 7 — Modernization Report + +Write `MODERNIZATION_REPORT.md`: + +```markdown +# Modernization Report +Trajectory: [user's $TRAJECTORY] +Date: [date] + +## Before State +- Wheel reinventions found: [N] +- Trajectory gaps found: [N] +- Libraries replaced: [N] +- Test suite baseline: [N passing / N failing] + +## Changes Made + +### Wheel Reinventions Replaced +| Was | Replaced With | Version | Complexity | +|-----|--------------|---------|-----------| +| [custom code description] | [library] | [ver] | Drop-in/Low/Med | + +### Trajectory Gaps Closed +| Dimension | Was | Now | Library/Change | +|-----------|-----|-----|---------------| +| [name] | [before] | [after] | [what was added] | + +### Libraries Added +| Package | Version | Purpose | +|---------|---------|---------| + +### Libraries Removed +| Package | Replaced By | Reason | +|---------|------------|--------| + +## After State +- Test suite: [N passing / N failing] +- Lint/typecheck: [clean / N issues] +- Security audit: [clean / N findings] +- Unused deps: [none / N removed] + +## Deferred Items +[Anything scoped out with justification — forms the next modernization backlog] + +## Trajectory Progress +| Dimension | Before | After | Complete? | +|-----------|--------|-------|-----------| +| [each trajectory dimension] | [rating] | [rating] | ✅ / ⚠️ partial / ❌ deferred | + +## Overall Trajectory Alignment +[Before: X% → After: Y%] +[One paragraph: what the codebase can now do that it couldn't before] + +--- +*Modernization verified by parallel rescan. All replaced libraries confirmed absent.* +``` + +--- + +## Modernization Rules (Non-Negotiable) + +1. **Trajectory is the north star** — every change must move toward `$TRAJECTORY`, not just improve code generally +2. **Verify before recommending** — every library recommendation must be confirmed current via WebSearch; no training-data version assumptions +3. **Characterization tests before replacement** — any behavioral migration must have tests capturing current behavior before the swap +4. **Complete each swap** — do not leave codebases in hybrid states; each replacement is fully done before the next begins +5. **Drop-ins first** — execute no-risk drop-in replacements before tackling complex behavioral migrations +6. **Zero new reinventions** — do not introduce new hand-rolled code that a library could provide during the modernization itself +7. **Library not bloat** — only add a library if it replaces more code than it introduces; a 3-line utility does not justify a new dependency +8. **Blockers before modernization** — pre-flight cleanup always runs before library swaps; never swap into unstable ground +9. **Audit before lock** — dependency security audit always runs after all changes, before reporting complete +10. **Deferred is documented** — any item scoped out must appear in the MODERNIZATION_REPORT deferred section with specific justification; nothing is silently dropped \ No newline at end of file diff --git a/Codegen/npx-research.md b/Codegen/npx-research.md new file mode 100644 index 00000000..102f3dc3 --- /dev/null +++ b/Codegen/npx-research.md @@ -0,0 +1,62 @@ +# Using NPX Claude Flow based systems +!! use npx when you want to use claude flow as a resarch, sparc or coding tool +!! use npm when you want it to be part of a software application + +!! run npx in a seperate terminal from claude code because sometimes claude code avoids using it (although that may change now that claude code itself can spawn multiple agents) + +!! assuming npx claude-flow and npx + +Instead of installing npm globally. +Can use NPX to run the latest versions of claude-flow etc + +- npx research-swarm init - Initialize SQLite database (run first, i think only needs to be run once?) + - npx research-swarm research - Run single research task + - npx research-swarm swarm - Run parallel research swarm + - npx research-swarm goal-research - GOAP-based goal research + - npx research-swarm list - List research jobs + - npx research-swarm stats - Show learning statistics + + + | --swarm-size | 5 | Number of swarm agents (3-7) | + | --max-concurrent | 4 | Max concurrent agents running | + | -t, --time | 120 | Time budget in minutes | + | -d, --depth | 5 | Research depth (1-10) | + | -f, --focus | balanced | Focus: narrow, balanced, or broad | + | --anti-hallucination | high | Anti-hallucination: low, medium, high | + | --verbose | - | Verbose output from agents | + | --single-agent | - | Use single-agent mode (legacy) | + | --no-citations | - | Disable citation requirements | + | --no-ed2551 | - | Disable ED2551 enhanced mode | + + + Example - Maximum Configuration: + npx research-swarm research researcher "Analyze quantum computing market trends 2025" \ + --swarm-size 7 \ + --max-concurrent 6 \ + --time 240 \ + --depth 10 \ + --focus broad \ + --anti-hallucination high \ + --verbose + + npx research-swarm goal-research [options] + + Options: + + | Option | Default | Description | + |---------------------------|---------|-----------------------------| + | --swarm-size | 5 | Base swarm size (3-7) | + | --max-concurrent | 3 | Max concurrent agents | + | -t, --time | 120 | Total time budget | + | -d, --depth | 5 | Research depth per sub-goal | + | --verbose | - | Verbose output | + + Example - Complex Goal with Max Agents: + npx research-swarm goal-research \ + "Comprehensive analysis of emerging AI hardware startups with revenue > $10M" \ + --swarm-size 7 \ + --max-concurrent 5 \ + --time 300 \ + --depth 9 \ + --verbose + diff --git a/Codegen/reflect.md b/Codegen/reflect.md new file mode 100644 index 00000000..c4ca29ae --- /dev/null +++ b/Codegen/reflect.md @@ -0,0 +1,176 @@ +--- +name: reflect +description: Perform a structured retrospective on recent work — what was built, what was decided, what went wrong, what was learned, and what must change before continuing +--- + +Perform a deep, honest retrospective on the work done in this session or on the current state of the codebase. This command forces a full stop — no new code, no new features — until the reflection is complete and its outputs are recorded. + +**Usage**: +- Run after completing a feature, fixing a bug, or finishing a work session +- Run when something broke unexpectedly and you need to understand why +- Run when the codebase feels messy, confusing, or hard to reason about +- Run before starting a new phase of work to clear accumulated confusion + +--- + +## Phase 1 — Work Inventory + +Spawn **2 parallel agents**: + +### Agent 1 — What Was Actually Built +- Read git log, diff, or recent file modifications to catalog all changes made +- For each changed file: what was the intent, what was actually done, do they match? +- Identify any work that was started but not finished (partial implementations, TODOs added, commented-out code) +- Identify any work that was done but not committed or documented +- Output: exact list of changes with intent vs. reality comparison + +### Agent 2 — Decision Log Reconstruction +- Identify all architectural, implementation, and design decisions made during this work +- For each decision: what alternatives were available, why was this choice made, what assumptions does it rely on? +- Flag any decisions that were made under time pressure, uncertainty, or incomplete information +- Flag any decisions that contradict earlier decisions in the same codebase +- Output: decision log with risk ratings + +--- + +## Phase 2 — Failure & Friction Analysis + +Answer these questions honestly. Read actual code and git history — do not rely on memory. + +### What Broke? +- List every bug introduced, every test that failed, every unexpected behavior +- For each: what was the root cause — logic error, wrong assumption, missing edge case, dependency issue? +- Classify root cause type: + - `ASSUMPTION` — the code was based on a false assumption about inputs, state, or behavior + - `COMPLEXITY` — the logic was too complex and a case was missed + - `COUPLING` — a change in one place unexpectedly broke another place + - `MISSING_CONTEXT` — the right information wasn't available at decision time + - `RUSHED` — the decision was made too quickly without sufficient analysis + +### What Was Harder Than Expected? +- Identify every place where progress stalled, required rework, or took longer than it should +- For each: was the difficulty inherent in the problem, or was it caused by the codebase's structure? +- Identify any areas where the existing code made the work unnecessarily hard + +### What Was Confusing? +- Identify any part of the codebase that was misunderstood during this work +- Identify any naming, structure, or behavior that was misleading +- Identify any documentation that was missing or wrong + +--- + +## Phase 3 — Quality Delta Assessment + +Compare the codebase state before and after the work on these dimensions: + +| Dimension | Before | After | Direction | +|-----------|--------|-------|-----------| +| Test coverage | [%/description] | [%/description] | ↑ / ↓ / → | +| Lint/type errors | [count] | [count] | ↑ / ↓ / → | +| Architectural clarity | [rating] | [rating] | ↑ / ↓ / → | +| Documentation completeness | [rating] | [rating] | ↑ / ↓ / → | +| Technical debt | [rating] | [rating] | ↑ / ↓ / → | + +**Rule**: If any dimension went in the wrong direction (↓), it must be listed as a remediation item. + +--- + +## Phase 4 — Learnings Extraction + +Produce concrete, actionable learnings — not vague observations. Each learning must be a rule that changes future behavior. + +### Format for each learning: +``` +LEARNING [N] +Observation: [What happened] +Root cause: [Why it happened] +Rule: [The specific behavior change this requires going forward] +Applies to: [This project only / All projects / This language / This pattern] +Priority: [Must apply immediately / Apply next session / Good to know] +``` + +**Minimum 3 learnings required.** If fewer than 3 genuine learnings exist, the reflection is not deep enough — dig further. + +--- + +## Phase 5 — Debt Register Update + +Identify all technical debt created or discovered during this work. Classify each item: + +``` +DEBT [N] +File: [path:line] +Type: [Shortcut / Missing test / Missing docs / Architectural compromise / Hardcoded value / TODO] +Description: [What the problem is] +Risk if left: [Low / Medium / High / Critical] +Estimated effort to fix: [Trivial / Small / Medium / Large] +Fix before: [Next commit / Next feature / Next sprint / Someday] +``` + +--- + +## Phase 6 — Pre-Continuation Checklist + +Before any new work begins, verify each item: + +``` +□ All broken tests are fixed (or explicitly deferred with documented reason) +□ No new lint or type errors were introduced +□ All TODOs added during this work are registered in the debt register +□ All partial implementations are documented (not left as silent dead code) +□ Decision log is written and justified +□ At least 3 learnings are extracted and written as rules +□ Any confusion about the codebase is resolved (re-read confusing code, add comments, ask questions) +□ The next step is clearly defined (not "continue working" — a specific, bounded task) +``` + +**Do not continue to new work until all boxes are checked or explicitly deferred with written justification.** + +--- + +## Output: REFLECTION.md + +Write all findings to `REFLECTION.md` (append if file exists, do not overwrite history): + +```markdown +# Reflection — [date] [session/feature name] + +## Work Inventory +[Agent 1 output] + +## Decisions Made +[Agent 2 output] + +## What Broke & Why +[Phase 2 findings] + +## Quality Delta +[Phase 3 table] + +## Learnings +[Phase 4 — each learning in structured format] + +## Debt Register +[Phase 5 — each debt item] + +## Pre-Continuation Checklist +[Phase 6 — with check/defer status] + +## Next Step +[Single, specific, bounded task to start next] + +--- +``` + +Tell the user: reflection is complete, summary of key findings, and the single next step. + +--- + +## Reflection Rules (Non-Negotiable) + +1. **Full stop** — no new implementation work until reflection is written to file +2. **Evidence-based** — every finding must reference actual code, actual errors, or actual git history +3. **No minimization** — do not soften failures or debt; name them precisely +4. **Forward-looking learnings** — every observation must convert to a behavioral rule, not just an acknowledgment +5. **Debt is permanent record** — debt items are never deleted from REFLECTION.md, only marked resolved +6. **One next step** — reflection ends with exactly one bounded, specific next action — not a list of things to do \ No newline at end of file diff --git a/Codegen/research-it.md b/Codegen/research-it.md new file mode 100644 index 00000000..99ca14bf --- /dev/null +++ b/Codegen/research-it.md @@ -0,0 +1,84 @@ +--- +name: research-it +description: Research the best 2026 tools, deps, and patterns for what you want to build, then output a concise RESEARCH.md +--- + +Research the best tools, dependencies, and architecture for the user's project. The user will describe what they want to build. If no description is provided, analyze the current codebase to infer the project type and goals. + +First, ask the user: **"What are you building? Describe features, target platform, and any constraints."** (Skip if they already provided this with the command.) + +Store the user's description as `$PROJECT_DESC`. + +Then spawn **6 agents in parallel** using the Agent tool (subagent_type: Explore). Every agent receives `$PROJECT_DESC` and must verify ALL recommendations using WebSearch or Grep MCP (mcp__grep__searchGitHub) - no training-data assumptions allowed. + +**Agent 1 - Project Scan**: Read the current working directory. Catalog what already exists: package.json, config files, installed deps, directory structure, language/framework already chosen. Report exactly what's in place so other agents don't duplicate it. + +**Agent 2 - Stack Validation**: Given `$PROJECT_DESC` and what Agent 1 would find in a typical scaffold, research via WebSearch whether the current framework/language is the best choice for this project in 2026. Compare top 2-3 alternatives on performance, ecosystem, and developer experience. Pick ONE winner. If the current stack is already the best choice, confirm it with evidence. + +**Agent 3 - Core Dependencies**: For EACH feature in `$PROJECT_DESC`, find the single best library for this stack in 2026. Use WebSearch to confirm latest stable version numbers. Use Grep MCP to verify real projects actually use these libraries. No outdated packages. No "popular in 2023" picks. Output: package name, exact latest version, one-line purpose. + +**Agent 4 - Dev Tooling**: Research the best 2026 dev tooling for this stack: package manager, bundler, linter, formatter, test framework, type checker. Use WebSearch to verify current recommendations. Pick ONE per category. Include exact versions. + +**Agent 5 - Architecture**: Use Grep MCP to find how real 2026 projects of this type structure their code. Look for directory layouts, file naming conventions, and key patterns (state management, routing, data fetching, etc.). Output a concrete directory tree and list of patterns to follow. + +**Agent 6 - Config & Integration**: Research required config files for the chosen stack and tools. Use WebSearch for current config best practices. Cover: linter config, formatter config, TS/type config, env setup, CI/CD basics, deployment target config. Provide exact file contents or key settings. + +## Agent Rules + +1. Every recommendation MUST be verified via WebSearch or Grep MCP - no guessing +2. Confirm 2026 latest stable versions - do not assume version numbers from training data +3. Pick ONE best option per category - no "you could also use X" +4. No prose, no hedging, no alternatives lists - decisive answers only +5. If something already exists in the project scaffold, note it and don't re-recommend it unless it should be replaced + +## Output + +After all agents complete, synthesize their findings into a single `RESEARCH.md` file written to the project root. The file must be optimized for LLM consumption - zero fluff, maximum actionability. Use this exact structure: + +```markdown +# RESEARCH: [short project description] +Generated: [today's date] +Stack: [framework + language + runtime] + +## INSTALL +[exact shell commands to run - copy-paste ready, in order] + +## DEPENDENCIES +| package | version | purpose | +|---------|---------|---------| +[each purpose max 5 words] + +## DEV DEPENDENCIES +| package | version | purpose | +|---------|---------|---------| +[each purpose max 5 words] + +## CONFIG FILES TO CREATE +### [filename] +[exact file contents or key settings] +[repeat for each config file] + +## PROJECT STRUCTURE +[tree showing recommended directories and key files] + +## SETUP STEPS +1. [concrete action] +2. [concrete action] +[ordered, each step is one command or action] + +## KEY PATTERNS +[brief list of architectural patterns to follow, with one-line descriptions] + +## SOURCES +[URLs used for verification, grouped by section] +``` + +Rules for RESEARCH.md: +- No alternatives sections +- No explanations of "why" - just what to do +- No "you could also use X" hedging +- Every version number must be verified, not assumed +- Commands must be copy-paste ready +- The entire file should be readable by another LLM session that can immediately execute the setup + +Write the file using the Write tool, then tell the user it's ready and summarize what was researched. diff --git a/Codegen/setup-claude-md.md b/Codegen/setup-claude-md.md new file mode 100644 index 00000000..b22f2e47 --- /dev/null +++ b/Codegen/setup-claude-md.md @@ -0,0 +1,116 @@ +--- +name: setup-claude-md +description: Generate or update a minimal CLAUDE.md with project guidelines and structure +--- + +Generate or update a minimal CLAUDE.md with project structure, guidelines, and quality checks. + +## Step 1: Check if CLAUDE.md Exists + +If `CLAUDE.md` exists: +- Read the existing file +- Preserve custom sections the user may have added +- Update the structure, quality checks, and organization rules + +If `CLAUDE.md` does NOT exist: +- Create a new one from scratch + +## Step 2: Analyze Project (Use Explore Agents in Parallel) + +Spawn parallel Explore agents to understand the codebase: + +1. **Project Purpose Agent**: Analyze README, package.json description, main files to understand what the project does +2. **Directory Structure Agent**: Map out the folder structure and what each folder contains +3. **Tech Stack Agent**: Identify languages, frameworks, tools, dependencies + +Wait for all agents to complete, then synthesize the information. + +## Step 3: Detect Project Type & Commands + +Check for config files: +- `package.json` → JavaScript/TypeScript (extract lint, typecheck, server scripts) +- `pyproject.toml` or `requirements.txt` → Python +- `go.mod` → Go +- `Cargo.toml` → Rust + +Extract: +- Linting commands +- Typechecking commands +- Server start command (if applicable) + +## Step 4: Generate Project Tree + +Create a concise tree structure showing key directories and files with brief descriptions. + +Example format: +``` +src/ + ├── api/ # API endpoints and routes + ├── components/ # Reusable UI components + ├── utils/ # Helper functions and utilities + ├── types/ # TypeScript type definitions + └── main.ts # Application entry point +``` + +## Step 5: Generate or Update CLAUDE.md + +Create `CLAUDE.md` with this structure: + +```markdown +# [Project Name] + +[Brief 1-2 sentence description of what this project does] + +## Project Structure + +[INSERT TREE HERE] + +## Organization Rules + +**Keep code organized and modularized:** +- API routes → `/api` folder, one file per route/resource +- Components → `/components`, one component per file +- Utilities → `/utils`, grouped by functionality +- Types/Interfaces → `/types` or co-located with usage +- Tests → Next to the code they test or in `/tests` + +**Modularity principles:** +- Single responsibility per file +- Clear, descriptive file names +- Group related functionality together +- Avoid monolithic files + +## Code Quality - Zero Tolerance + +After editing ANY file, run: + +```bash +[EXACT COMMANDS FROM PROJECT] +``` + +Fix ALL errors/warnings before continuing. + +[IF SERVER EXISTS:] +If changes require server restart (not hot-reloadable): +1. Restart server: `[SERVER COMMAND]` +2. Read server output/logs +3. Fix ALL warnings/errors before continuing +``` + +**Keep total file under 100 lines.** + +## Step 6: Preserve Custom Sections + +If updating an existing CLAUDE.md: +- Keep any custom sections the user added +- Update the generated sections (Project Structure, Quality Checks) +- Merge carefully without losing user content + +## Step 7: Confirm Completion + +Tell the user: +- ✅ CLAUDE.md [created/updated] +- 📋 Project: [brief description] +- 🗂️ Structure mapped with [X] directories +- 📐 Organization rules enforced +- 🎯 Zero-tolerance quality checks active diff --git a/Codegen/setup-code-quality.md b/Codegen/setup-code-quality.md new file mode 100644 index 00000000..150fd56a --- /dev/null +++ b/Codegen/setup-code-quality.md @@ -0,0 +1,168 @@ +--- +name: setup-code-quality +description: Detect project tools and generate a /check command for linting and typechecking +--- + +You are setting up a project for automated code quality checks. Follow these steps carefully: + +## Step 1: Detect Project Type + +Check for these files in the current directory to determine the project type: +- `package.json` → JavaScript/TypeScript (Node.js) +- `pyproject.toml` or `requirements.txt` or `setup.py` → Python +- `go.mod` → Go +- `Cargo.toml` → Rust +- `composer.json` → PHP +- `build.gradle` or `pom.xml` → Java + +Read the relevant config file to understand the project structure. + +## Step 2: Check Existing Tools + +Based on the project type, check if these tools are already configured: + +### JavaScript/TypeScript: +- Check `package.json` for: `eslint`, `prettier`, `typescript`, `@typescript-eslint/*` +- Check for config files: `.eslintrc.*`, `.prettierrc.*`, `tsconfig.json` +- Check `package.json` scripts for: `lint`, `typecheck`, `type-check`, or `tsc` + +### Python: +- Check for: `mypy`, `pylint`, `black`, `ruff`, `flake8` in dependencies +- Check for config files: `mypy.ini`, `.pylintrc`, `pyproject.toml` +- Look for linting/type checking configurations + +### Go: +- Check for: `golint`, `gofmt`, `staticcheck` +- Go has built-in tools, check if project uses them + +### Rust: +- Check for: `clippy`, `rustfmt` (built-in to Rust toolchain) +- Check `Cargo.toml` for workspace configuration + +## Step 3: Install Missing Tools (if needed) + +If tools are missing, install them based on the project type: + +### JavaScript/TypeScript: +```bash +# Detect package manager (npm, yarn, pnpm, bun) +# Install missing tools, e.g.: +npm install --save-dev eslint prettier typescript @typescript-eslint/parser @typescript-eslint/eslint-plugin + +# Add scripts to package.json if missing: +# "lint": "eslint ." +# "typecheck": "tsc --noEmit" +``` + +### Python: +```bash +pip install mypy pylint black ruff +# or add to requirements-dev.txt / pyproject.toml +``` + +### Go: +```bash +go install golang.org/x/lint/golint@latest +go install honnef.co/go/tools/cmd/staticcheck@latest +``` + +### Rust: +```bash +rustup component add clippy rustfmt +``` + +**IMPORTANT**: Always check if tools exist first. Only install if missing. + +## Step 4: Generate /fix Command + +Create a file at `.claude/commands/fix.md` with the following structure: + +```markdown +--- +name: fix +description: Run typechecking and linting, then spawn parallel agents to fix all issues +--- + +# Project Code Quality Check + +This command runs all linting and typechecking tools for this project, collects errors, groups them by domain, and spawns parallel agents to fix them. + +## Step 1: Run Linting and Typechecking + +Run the appropriate commands for this project: + +[INSERT PROJECT-SPECIFIC COMMANDS HERE] + +## Step 2: Collect and Parse Errors + +Parse the output from the linting and typechecking commands. Group errors by domain: +- **Type errors**: Issues from TypeScript, mypy, etc. +- **Lint errors**: Issues from eslint, pylint, ruff, clippy, etc. +- **Format errors**: Issues from prettier, black, rustfmt, gofmt + +Create a list of all files with issues and the specific problems in each file. + +## Step 3: Spawn Parallel Agents + +For each domain that has issues, spawn an agent in parallel using the Task tool: + +**IMPORTANT**: Use a SINGLE response with MULTIPLE Task tool calls to run agents in parallel. + +Example: +- Spawn a "type-fixer" agent for type errors +- Spawn a "lint-fixer" agent for lint errors +- Spawn a "format-fixer" agent for formatting errors + +Each agent should: +1. Receive the list of files and specific errors in their domain +2. Fix all errors in their domain +3. Run the relevant check command to verify fixes +4. Report completion + +## Step 4: Verify All Fixes + +After all agents complete, run the full check again to ensure all issues are resolved. +``` + +**Replace `[INSERT PROJECT-SPECIFIC COMMANDS HERE]` with the actual commands for the detected project type.** + +### JavaScript/TypeScript Example: +```bash +npm run lint +npm run typecheck +``` + +### Python Example: +```bash +mypy . +pylint src/ +black --check . +``` + +### Go Example: +```bash +go vet ./... +staticcheck ./... +gofmt -l . +``` + +### Rust Example: +```bash +cargo clippy -- -D warnings +cargo fmt -- --check +``` + +## Step 5: Confirm Completion + +After generating the `/fix` command, inform the user: +1. What project type was detected +2. Which tools were already present +3. Which tools were installed (if any) +4. That the `/fix` command has been created at `.claude/commands/fix.md` +5. How to use it: "Run `/fix` to lint, typecheck, and auto-fix all issues" + +**Important Notes**: +- Always create the `.claude/commands/` directory if it doesn't exist +- Ensure the YAML frontmatter includes both `name` and `description` +- The generated `/fix` command must spawn agents in parallel (single response, multiple Task tool calls) +- Tailor the commands to what's actually available in the project diff --git a/Codegen/setup-commits.md b/Codegen/setup-commits.md new file mode 100644 index 00000000..06578895 --- /dev/null +++ b/Codegen/setup-commits.md @@ -0,0 +1,51 @@ +--- +name: setup-commits +description: Generate a /commit command that runs checks, then commits with AI-generated messages +--- + +Generate a minimal `/commit` command that enforces quality checks before committing. + +## Step 1: Detect Project and Extract Commands + +Check for config files: +- `package.json` → Extract `lint`, `typecheck` scripts +- `pyproject.toml` → Use `mypy`, `pylint` +- `go.mod` → Use `go vet ./...`, `gofmt -l .` +- `Cargo.toml` → Use `cargo clippy`, `cargo fmt --check` + +## Step 2: Generate /commit Command + +Create `.claude/commands/commit.md`: + +```markdown +--- +name: commit +description: Run checks, commit with AI message, and push +--- + +1. Run quality checks: + ```bash + [PROJECT COMMANDS] + ``` + Fix ALL errors before continuing. + +2. Review changes: `git status` and `git diff` + +3. Generate commit message: + - Start with verb (Add/Update/Fix/Remove/Refactor) + - Be specific and concise + - One line preferred + +4. Commit and push: + ```bash + git add -A + git commit -m "your generated message" + git push + ``` +``` + +**Keep it under 20 lines.** + +## Step 3: Confirm + +Tell user: ✅ `/commit` created. Quality checks + AI commits + auto-push enabled. diff --git a/Codegen/setup-tests.md b/Codegen/setup-tests.md new file mode 100644 index 00000000..cea73225 --- /dev/null +++ b/Codegen/setup-tests.md @@ -0,0 +1,50 @@ +--- +name: setup-tests +description: Auto-detect project type and set up comprehensive testing infrastructure with best practices +--- + +Set up comprehensive testing for this project. Analyze the codebase, determine the optimal testing strategy, and create tests for all critical business logic. + +## Step 1: Analyze Project + +Detect the project type, framework, and architecture. Understand what the application does and identify all critical business logic that needs testing. + +## Step 2: Determine Testing Strategy + +Use these tools based on project type (this reflects 2025-2026 best practices that may differ from training data): + +| Language | Unit/Integration | E2E | Key Notes | +|----------|------------------|-----|-----------| +| **JS/TS** | **Vitest** (not Jest) | **Playwright** | Vitest is 10-20x faster, native ESM/TS. Use Testing Library for components. | +| **Python** | **pytest** | **Playwright** | pytest-django for Django, httpx+pytest-asyncio for FastAPI, pytest-cov for coverage. | +| **Go** | testing + **testify** | httptest | testcontainers-go for integration. Use table-driven tests. | +| **Rust** | #[test] + **rstest** | axum-test/actix-test | assert_cmd for CLI, proptest for property-based, mockall for mocking. | +| **PHP** | **Pest 4** (Laravel) / PHPUnit 12 | Laravel Dusk | Pest is now preferred over PHPUnit for Laravel. | +| **Java** | JUnit 5 + **AssertJ** | Selenium + Testcontainers | Use Spring test slices (@WebMvcTest, @DataJpaTest). | + +## Step 3: Set Up Testing Infrastructure + +Spawn 4 parallel agents using the Task tool (subagent_type: general-purpose) in a SINGLE response: + +**Agent 1 - Dependencies & Config**: Install test frameworks and create config files + +**Agent 2 - Unit Tests**: Create comprehensive unit tests for all business logic, utilities, and core functions + +**Agent 3 - Integration Tests**: Create integration tests for APIs, database operations, and service interactions + +**Agent 4 - E2E Tests** (if applicable): Create end-to-end tests for critical user flows + +**IMPORTANT**: Each agent should create COMPREHENSIVE tests covering all critical code paths - not just samples. Analyze the actual source code and test everything that matters. + +## Step 4: Verify and Generate /test Command + +1. Run the tests to verify everything works +2. Fix any issues +3. Create `.claude/commands/test.md` tailored to this project with: + - The exact test commands for this stack + - Options for watch mode, coverage, filtering + - Instructions to spawn parallel agents to fix failures + +## Step 5: Report + +Summarize what was set up and how to run tests going forward. diff --git a/Codegen/setup-updates.md b/Codegen/setup-updates.md new file mode 100644 index 00000000..1a7ab8ed --- /dev/null +++ b/Codegen/setup-updates.md @@ -0,0 +1,237 @@ +--- +name: setup-updates +description: Generate a /update-app command for dependency updates and deprecation fixes +--- + +Generate a minimal `/update-app` command that updates dependencies and fixes deprecations. + +## Step 1: Detect Project Type + +Check for config files: +- `package.json` → JavaScript/TypeScript (npm/yarn/pnpm/bun) +- `pyproject.toml` or `requirements.txt` → Python (pip/poetry) +- `go.mod` → Go +- `Cargo.toml` → Rust +- `composer.json` → PHP + +## Step 2: Detect Package Manager + +**For JavaScript/TypeScript**: Check for lock files: +- `package-lock.json` → npm +- `yarn.lock` → yarn +- `pnpm-lock.yaml` → pnpm +- `bun.lockb` → bun + +**For Python**: Check for: +- `poetry.lock` → poetry +- Otherwise → pip + +## Step 3: Generate /update-app Command + +Create `.claude/commands/update-app.md`: + +```markdown +--- +name: update-app +description: Update dependencies, fix deprecations and warnings +--- + +# Dependency Update & Deprecation Fix + +## Step 1: Check for Updates + +[INSERT CHECK COMMAND] + +## Step 2: Update Dependencies + +[INSERT UPDATE COMMAND] + +## Step 3: Check for Deprecations & Warnings + +Run installation and check output: +[INSERT INSTALL COMMAND] + +Read ALL output carefully. Look for: +- Deprecation warnings +- Security vulnerabilities +- Peer dependency warnings +- Breaking changes + +## Step 4: Fix Issues + +For each warning/deprecation: +1. Research the recommended replacement or fix +2. Update code/dependencies accordingly +3. Re-run installation +4. Verify no warnings remain + +## Step 5: Run Quality Checks + +[INSERT QUALITY CHECK COMMANDS] + +Fix all errors before completing. + +## Step 6: Verify Clean Install + +Ensure a fresh install works: +1. Delete dependency folders/caches +2. Run clean install +3. Verify ZERO warnings/errors +4. Confirm all dependencies resolve correctly +``` + +## Step 4: Customize by Project Type + +**Replace placeholders with actual commands:** + +### JavaScript/TypeScript (npm): +```markdown +## Step 1: Check for Updates +```bash +npm outdated +``` + +## Step 2: Update Dependencies +```bash +npm update +npm audit fix +``` + +## Step 3: Check for Deprecations & Warnings +```bash +rm -rf node_modules package-lock.json +npm install +``` + +## Step 5: Run Quality Checks +```bash +npm run lint +npm run typecheck +``` + +## Step 6: Verify Clean Install +```bash +rm -rf node_modules package-lock.json +npm install +``` +``` + +### JavaScript/TypeScript (yarn): +```markdown +## Step 1: Check for Updates +```bash +yarn outdated +``` + +## Step 2: Update Dependencies +```bash +yarn upgrade +yarn audit +``` + +## Step 3: Check for Deprecations & Warnings +```bash +rm -rf node_modules yarn.lock +yarn install +``` +``` + +### Python (pip): +```markdown +## Step 1: Check for Updates +```bash +pip list --outdated +``` + +## Step 2: Update Dependencies +```bash +pip install --upgrade -r requirements.txt +``` + +## Step 3: Check for Deprecations & Warnings +```bash +pip install -r requirements.txt +``` + +## Step 5: Run Quality Checks +```bash +mypy . +pylint src/ +``` +``` + +### Python (poetry): +```markdown +## Step 1: Check for Updates +```bash +poetry show --outdated +``` + +## Step 2: Update Dependencies +```bash +poetry update +``` + +## Step 3: Check for Deprecations & Warnings +```bash +poetry install +``` +``` + +### Go: +```markdown +## Step 1: Check for Updates +```bash +go list -u -m all +``` + +## Step 2: Update Dependencies +```bash +go get -u ./... +go mod tidy +``` + +## Step 3: Check for Deprecations & Warnings +```bash +go mod download +``` + +## Step 5: Run Quality Checks +```bash +go vet ./... +gofmt -l . +``` +``` + +### Rust: +```markdown +## Step 1: Check for Updates +```bash +cargo outdated +``` + +## Step 2: Update Dependencies +```bash +cargo update +``` + +## Step 3: Check for Deprecations & Warnings +```bash +cargo check +``` + +## Step 5: Run Quality Checks +```bash +cargo clippy +cargo fmt --check +``` +``` + +## Step 5: Confirm Completion + +Tell the user: +- ✅ `/update-app` created +- 🔄 Updates: [package manager commands] +- ⚠️ Zero-tolerance for deprecations/warnings +- 🛡️ Security audit included +- ✨ Clean install verification enabled diff --git a/Codegen/suitability.md b/Codegen/suitability.md new file mode 100644 index 00000000..b24b0f56 --- /dev/null +++ b/Codegen/suitability.md @@ -0,0 +1,271 @@ +--- +name: suitability +description: Analyze how effective, relevant, and helpful a codebase is as a foundation or reference for building a specified target program — producing a scored, evidence-based suitability report +--- + +Evaluate how well codebase `$CODEBASE` serves as a foundation, reference, or dependency for building `$TARGET`. Produce a structured, scored, evidence-based suitability assessment that answers: **should you build on this, adapt it, extract from it, or ignore it?** + +**Usage**: `/suitability [codebase name or path] for [target program description]` + +Example: `/suitability ./payments-sdk for a multi-tenant SaaS billing system` + +If either argument is missing, ask: +1. *"Which codebase are you evaluating? (name, path, or describe it)"* +2. *"What are you building? Describe features, scale, and constraints."* + +Store as `$CODEBASE` and `$TARGET_DESC`. + +--- + +## Phase 1 — Dual Reconnaissance (Parallel, 2 Agents) + +Spawn **2 parallel agents** to independently characterize both sides before any comparison. + +### Agent A — Codebase Characterization +Produce an objective profile of `$CODEBASE`: +- **Capabilities**: what does it actually do? (read source, not just README) +- **Architecture**: layers, patterns, boundaries, dependencies +- **Exposed interfaces**: public APIs, extension points, configurable behaviors +- **Constraints**: hardcoded assumptions, fixed data models, non-negotiable design decisions +- **Quality indicators**: test coverage, documentation level, error handling maturity +- **Activity signals**: last commit date, open issues, dependency freshness +- **Identified risks**: known bugs, deprecated dependencies, architectural anti-patterns +- **Adaptability score**: how easily can this codebase be extended, modified, or composed? + +### Agent B — Target Requirements Profile +Decompose `$TARGET_DESC` into concrete requirements: +- **Functional requirements**: every discrete capability the target program must have +- **Non-functional requirements**: scale, latency, throughput, availability, security, compliance +- **Integration requirements**: what external systems must it connect to? +- **Data requirements**: what data models are needed, what volume, what consistency guarantees? +- **Deployment requirements**: cloud, on-prem, edge, serverless, containerized? +- **Team/maintenance requirements**: how complex can the codebase be to maintain? +- **Timeline constraints**: is this a 2-week prototype or a 2-year production system? +- **Priority stack-rank**: order requirements by importance — what is non-negotiable vs. nice-to-have? + +--- + +## Phase 2 — Multi-Dimensional Suitability Analysis (Parallel, 6 Agents) + +Spawn **6 parallel agents**, each analyzing one suitability dimension. Each agent must produce a score (0–10) with specific evidence — no scores without justification. + +### Agent 1 — Functional Coverage +**Question**: Does the codebase provide the building blocks needed for `$TARGET`? + +For each requirement in Agent B's functional list: +- Does the codebase directly implement this? (score: 2 pts) +- Does the codebase partially implement this? (score: 1 pt) +- Is this completely absent? (score: 0 pts) +- Does the codebase do something that CONFLICTS with this requirement? (score: -1 pt) + +``` +Functional Coverage Score: [sum] / [max possible] +Coverage %: [%] +Directly covered: [list] +Partially covered: [list — what's missing] +Absent: [list] +Conflicting: [list — why it conflicts] +``` + +### Agent 2 — Architectural Compatibility +**Question**: Does the codebase's architecture work for `$TARGET`'s constraints? + +Evaluate: +- **Layer alignment**: do the architectural layers match what `$TARGET` needs? +- **Scalability fit**: is the architecture suitable for `$TARGET`'s scale requirements? +- **Data model compatibility**: do the existing data models match or work against `$TARGET`'s needs? +- **Dependency compatibility**: do the codebase's dependencies conflict with `$TARGET`'s stack? +- **Pattern compatibility**: are the design patterns used compatible with how `$TARGET` needs to work? +- **Coupling exposure**: how much of the codebase would you be forced to take in order to use the parts you need? + +Score: 0–10 with specific evidence for each sub-point. + +### Agent 3 — Adaptability & Extension Cost +**Question**: How much work is required to make this codebase serve `$TARGET`? + +For each gap between the codebase and `$TARGET` requirements, estimate: +- Is this gap fillable by **extension** (adding new code without changing existing code)? +- Is this gap fillable by **configuration** (changing settings, flags, environment)? +- Does this gap require **modification** (changing existing code with regression risk)? +- Does this gap require **replacement** (ripping out and rewriting core components)? + +``` +Extension items: [N] — [list] +Configuration items: [N] — [list] +Modification items: [N] — [list — with regression risk assessment] +Replacement items: [N] — [list — with estimated effort] +``` + +Produce a total adaptation effort estimate: +- `TRIVIAL` — <1 day, configuration or minor additions only +- `LOW` — 1–5 days, extension only, no core modifications +- `MEDIUM` — 1–3 weeks, some core modifications, some replacement +- `HIGH` — 1–3 months, significant replacement of core components +- `PROHIBITIVE` — rewriting from scratch would be faster + +Score: 0–10 (10 = trivial, 0 = prohibitive) + +### Agent 4 — Risk & Reliability Assessment +**Question**: Does building on this codebase introduce risks into `$TARGET`? + +Evaluate: +- **Maintenance risk**: is this codebase actively maintained? What's the bus factor? +- **Dependency risk**: does it rely on deprecated, abandoned, or vulnerable packages? +- **Security risk**: are there known vulnerabilities, exposed attack surfaces, or insecure defaults? +- **Stability risk**: are there known bugs or edge cases that would affect `$TARGET`? +- **License risk**: is the license compatible with `$TARGET`'s intended use and distribution? +- **Coupling risk**: if you depend on this, how locked in are you? What's the exit cost? +- **Versioning risk**: does this codebase have a stable API contract, or does it break between versions? + +Score: 0–10 (10 = zero risk, 0 = high risk across multiple dimensions) + +### Agent 5 — Quality & Maintainability Fit +**Question**: Is the code quality level appropriate for `$TARGET`'s production requirements? + +Evaluate: +- **Test coverage**: is the coverage sufficient for `$TARGET`'s reliability requirements? +- **Documentation**: is it documented well enough for `$TARGET`'s team to work with it? +- **Code clarity**: will `$TARGET`'s development team be able to read, debug, and modify this? +- **Error handling**: is error handling robust enough for `$TARGET`'s production environment? +- **Observability**: does it support the logging/metrics/tracing `$TARGET` requires? +- **Performance baseline**: is performance acceptable for `$TARGET`'s requirements, or will it be a bottleneck? + +Score: 0–10 + +### Agent 6 — Strategic Fit Assessment +**Question**: Does using this codebase align with `$TARGET`'s longer-term trajectory? + +Evaluate: +- **Direction alignment**: is this codebase moving in the same direction `$TARGET` needs to go? +- **Community & ecosystem**: is there a community, ecosystem, and knowledge base that will benefit `$TARGET`? +- **Build vs. buy vs. adapt tradeoff**: compared to alternatives (building from scratch, using a different library, commercial solution), how does this codebase compare in total cost of ownership? +- **Vendor/author dependency**: what is the risk of the original authors abandoning, pivoting, or breaking this? +- **Team fit**: does the codebase's language, framework, and style match the team that will build `$TARGET`? + +Score: 0–10 + +--- + +## Phase 3 — Weighted Suitability Score + +Compute a weighted composite score. Default weights — adjust based on `$TARGET` priorities: + +| Dimension | Score (0-10) | Default Weight | Weighted Score | +|-----------|-------------|----------------|----------------| +| Functional Coverage | [score] | 30% | [calc] | +| Architectural Compatibility | [score] | 20% | [calc] | +| Adaptability & Extension Cost | [score] | 20% | [calc] | +| Risk & Reliability | [score] | 15% | [calc] | +| Quality & Maintainability | [score] | 10% | [calc] | +| Strategic Fit | [score] | 5% | [calc] | +| **TOTAL** | | **100%** | **[weighted avg]** | + +### Suitability Rating: + +| Score | Rating | Meaning | +|-------|--------|---------| +| 8.5–10 | ✅ **STRONGLY RECOMMENDED** | Build directly on this. It covers most needs and adapts cleanly. | +| 7.0–8.4 | ✅ **RECOMMENDED** | Good fit with manageable gaps. Adaptation cost is justified. | +| 5.0–6.9 | ⚠️ **CONDITIONAL** | Useful for specific parts but requires significant adaptation. Extract selectively. | +| 3.0–4.9 | ⚠️ **MARGINAL** | More work to adapt than to build fresh in key areas. Consider alternatives. | +| 0–2.9 | ❌ **NOT RECOMMENDED** | Fundamental misalignment. Building on this creates more problems than it solves. | + +--- + +## Phase 4 — Actionable Recommendation + +Based on the score, produce one of these recommendations: + +### If STRONGLY RECOMMENDED or RECOMMENDED: +``` +RECOMMENDATION: Use as foundation +Strategy: [exactly how to adopt — full dependency, fork, vendored copy] +Start with: [specific modules/packages to integrate first] +Configuration needed: [list] +Extensions to build: [list of gaps to fill] +Estimated onboarding: [time estimate] +First step: [single concrete action] +``` + +### If CONDITIONAL: +``` +RECOMMENDATION: Extract selectively +Use: [specific modules/components worth taking] +Ignore: [parts that don't fit and why] +Build fresh: [what should be written from scratch instead] +Integration approach: [how to use the extracted parts] +Alternative to consider: [if a better fit exists] +First step: [single concrete action] +``` + +### If MARGINAL or NOT RECOMMENDED: +``` +RECOMMENDATION: Do not adopt +Primary blockers: [top 3 reasons with evidence] +What you'd lose: [genuine value that exists in the codebase] +Alternative path: [what to do instead — build from scratch, find another library, etc.] +Parts worth studying: [if any design patterns or approaches are worth referencing] +First step: [single concrete action on the alternative path] +``` + +--- + +## Output: SUITABILITY_REPORT.md + +Write all findings to `SUITABILITY_REPORT.md`: + +```markdown +# Suitability Report +Codebase: [name/path] +Target: [description] +Date: [date] + +## Codebase Profile +[Agent A findings — objective characterization] + +## Target Requirements +[Agent B findings — prioritized requirement list] + +## Dimensional Analysis + +### Functional Coverage [score/10] +[Agent 1 findings] + +### Architectural Compatibility [score/10] +[Agent 2 findings] + +### Adaptability & Extension Cost [score/10] +[Agent 3 findings — including effort estimate] + +### Risk & Reliability [score/10] +[Agent 4 findings] + +### Quality & Maintainability Fit [score/10] +[Agent 5 findings] + +### Strategic Fit [score/10] +[Agent 6 findings] + +## Weighted Suitability Score +[Phase 3 table] +**Rating: [STRONGLY RECOMMENDED / RECOMMENDED / CONDITIONAL / MARGINAL / NOT RECOMMENDED]** + +## Recommendation +[Phase 4 structured recommendation] + +--- +*Analysis produced by parallel codebase exploration and target decomposition.* +``` + +Tell the user: report is complete, overall rating, and the single first step. + +--- + +## Suitability Rules (Non-Negotiable) + +1. **Evidence-based scores** — every score requires specific file path, pattern, or capability evidence +2. **No wishful thinking** — score what the codebase IS, not what it could be with heavy modification +3. **Requirement completeness** — every target requirement must be explicitly addressed in the functional coverage analysis +4. **Honest adaptation cost** — if core components need replacing, call it replacement, not "modification" +5. **One recommendation** — the output produces a single decisive recommendation with a single first step +6. **Weight transparency** — if weights are adjusted from defaults, document why \ No newline at end of file diff --git a/Codegen/test.md b/Codegen/test.md new file mode 100644 index 00000000..da3a46f2 --- /dev/null +++ b/Codegen/test.md @@ -0,0 +1,231 @@ +--- +name: test +description: Design and execute a comprehensive, multi-layer test strategy with parallel agents — covering unit, integration, E2E, regression, performance, and security dimensions +--- + +Design and execute a complete test strategy for this codebase. This command goes beyond running existing tests — it audits test coverage, identifies gaps, writes missing tests, and produces a verified, reproducible test suite. + +**Usage**: Run against the full codebase, a specific module (`/test src/payments`), or a specific feature (`/test after integrating checkout flow`). + +--- + +## Phase 1 — Test Landscape Audit (Parallel, 3 Agents) + +Spawn **3 parallel agents** before writing any new tests. + +### Agent 1 — Existing Test Inventory +- Locate ALL existing test files: unit, integration, E2E, fixtures, factories, mocks +- For each test file: what module does it cover, what percentage of that module's functions are tested, what cases are present? +- Identify the current test framework, runner, and coverage tooling +- Run the existing test suite and capture full output: passes, failures, skips, and coverage report +- Output: complete test inventory with pass/fail status and per-module coverage estimate + +### Agent 2 — Coverage Gap Analysis +- Cross-reference every non-test source file against the test inventory +- Identify functions, classes, and modules with ZERO test coverage +- Identify tested functions with INCOMPLETE coverage (missing branches, missing error paths, missing edge cases) +- Prioritize gaps by risk: + - `CRITICAL` — business logic, money handling, auth, data mutations with no tests + - `HIGH` — core algorithms, API handlers, data validation with partial coverage + - `MEDIUM` — utilities and helpers with partial coverage + - `LOW` — config loading, simple getters, pure constants +- Output: prioritized gap list with file paths and specific uncovered paths + +### Agent 3 — Test Quality Audit +- Review existing tests for quality problems: + - Tests that never assert anything (no `expect`/`assert` calls) + - Tests that only test the happy path and ignore all error paths + - Tests tightly coupled to implementation details (will break on any refactor) + - Tests with no isolation (shared mutable state, order-dependent tests) + - Mocks that misrepresent actual dependency behavior + - Flaky tests (timeouts, async race conditions, non-deterministic assertions) +- Output: list of test quality issues with severity ratings + +--- + +## Phase 2 — Test Strategy Definition + +Based on Phase 1 findings, define the test strategy for this specific project: + +``` +TEST STRATEGY: [Project Name] + +Framework: [detected test framework] +Coverage tool: [detected or recommended] +Coverage target: [% based on project type — prototype: 60%, MVP: 75%, production: 90%] + +Layers to implement: + □ Unit tests — [N gaps to fill, estimated N new test cases] + □ Integration tests — [N gaps to fill] + □ E2E tests — [applicable: yes/no, N critical flows] + □ Contract tests — [applicable if microservices/APIs] + □ Performance tests — [applicable if hot paths identified] + □ Security tests — [applicable if auth/data handling present] + +Priority order: [ordered list of what to test first] +``` + +--- + +## Phase 3 — Test Implementation (Parallel, 4 Agents) + +Spawn **4 parallel agents** based on the strategy. Assign work by module domain, not by test type, to avoid file conflicts. + +Each agent receives: their assigned module list, the gap analysis for those modules, and the quality standards below. + +### Quality Standards Every Agent Must Follow: + +**Structure** +- One test file per source file (co-located or in mirrored `/tests` directory) +- Test file name mirrors source: `payments.ts` → `payments.test.ts` +- Group tests with `describe` blocks matching the function/class being tested +- Test names must be full sentences: `"should return 404 when user does not exist"` not `"user 404"` + +**Coverage Requirements per Function** +- Happy path: the expected successful case +- All distinct failure modes: each error condition tested separately +- Boundary values: empty, null, zero, max, min where applicable +- Type coercion edge cases (if dynamically typed language) +- Async error handling: rejected promises, thrown errors in async context + +**Test Isolation** +- Every test must be independently runnable — no shared mutable state +- All external dependencies must be mocked/stubbed at the layer boundary +- Database tests use transactions rolled back after each test, or isolated test DB +- File system tests use temp directories cleaned up in `afterEach` +- Time-dependent tests mock the clock — never use `Date.now()` or `new Date()` directly + +**Mock Discipline** +- Mocks must match the actual interface of the real dependency (use type-safe mocks) +- Never mock internal implementation details — only mock at module/service boundaries +- Document why each mock exists: `// Mock: isolate from DB, tested in integration layer` +- Integration tests use real implementations, not mocks + +**Assertion Quality** +- Assert specific values, not just truthy/falsy +- For objects, assert the specific fields that matter — not `toEqual(entireObject)` for partial checks +- For errors, assert both the error type AND the error message +- For async flows, assert the final state AND any side effects (calls made, events emitted) + +--- + +## Phase 4 — Integration & E2E Test Layer + +For integration tests: +- Test the full stack from API boundary to database (or real service) +- Use a dedicated test database that is seeded with known fixtures before each test run +- Test the exact HTTP request/response shape (status codes, headers, body schema) +- Test authentication and authorization: authenticated requests, unauthenticated requests, wrong-role requests +- Test pagination, filtering, sorting if applicable +- Test rate limiting and request size limits if applicable + +For E2E tests (if applicable): +- Cover ONLY the critical user journeys — not every permutation +- Critical journeys are: user onboarding, core value delivery, payment/subscription, error recovery +- E2E tests run against a deployed instance (staging or local with real services) +- E2E tests must be deterministic: seed known state before each test, clean up after +- E2E tests must have explicit wait conditions — never fixed `sleep()` calls + +--- + +## Phase 5 — Test Verification & Hardening + +After all agents complete: + +1. **Run full test suite** — capture output +2. **Check coverage report** — verify target met per module +3. **Fix any failures** — agents must fix their own failures before reporting done +4. **Flakiness check** — run the suite 3 times; flag any test that produces different results across runs +5. **Performance check** — flag any test that takes >500ms (unit), >2s (integration), >30s (E2E) without justification + +--- + +## Phase 6 — Test Infrastructure Files + +Create or update these files: + +### `.claude/commands/test.md` — project-specific test runner command +```markdown +--- +name: test +description: Run tests for [Project Name] +--- + +## Run All Tests +```bash +[exact command] +``` + +## Watch Mode +```bash +[exact command] +``` + +## Coverage Report +```bash +[exact command] +``` + +## Filter by Module +```bash +[exact command with filter flag] +``` + +## On Failure +If tests fail, spawn parallel agents grouped by failure domain: +- Spawn one agent per failing test file +- Each agent reads the error, reads the source, fixes the test or the source (whichever is wrong) +- Re-run after all agents complete +``` + +### `tests/README.md` — test organization guide +Document: test structure, how to add tests, how to run subsets, how to write mocks, how to add fixtures. + +--- + +## Phase 7 — Test Report + +Output `TEST_REPORT.md`: + +```markdown +# Test Report: [Project Name] +Date: [date] + +## Summary +- Total tests: [N] +- Passing: [N] +- Failing: [N] +- Skipped: [N] +- Overall coverage: [%] + +## Coverage by Module +| Module | Before | After | Gap Status | +|--------|--------|-------|------------| +| ... | [%] | [%] | ✅ / ⚠️ / ❌ | + +## New Tests Added +- Unit: [N] +- Integration: [N] +- E2E: [N] + +## Remaining Gaps +[Any coverage gaps below target with justification for deferral] + +## Flaky Tests +[List or "none"] + +## Test Quality Issues Resolved +[List of issues found in Phase 1 and how they were fixed] +``` + +--- + +## Testing Rules (Non-Negotiable) + +1. **Test intent, not implementation** — tests must survive refactors that don't change behavior +2. **One reason to fail** — each test asserts exactly one behavior; split multi-concern tests +3. **No test skips without comments** — `skip`/`xit`/`xtest` must have a comment explaining why +4. **Tests are production code** — apply the same naming, structure, and review standards as source code +5. **Red before green** — when adding a test for a known bug, verify it fails before fixing the bug +6. **Test the contract, not the mock** — if a test only proves the mock works, it provides zero value +7. **Coverage is a floor, not a goal** — 90% coverage with bad assertions is worse than 70% with precise ones \ No newline at end of file diff --git a/Codegen/verify.md b/Codegen/verify.md new file mode 100644 index 00000000..bd0ed327 --- /dev/null +++ b/Codegen/verify.md @@ -0,0 +1,208 @@ +--- +name: verify +description: Run a rigorous multi-dimensional verification of implementation correctness — contract compliance, regression, security, consistency, and behavioral accuracy +--- + +Perform a complete verification pass on the current state of the codebase or a specific recent change. This command answers one question with evidence: **does the implementation actually do what it's supposed to do?** + +**Usage**: +- `/verify` — verify the entire codebase +- `/verify [feature or module name]` — verify a specific area +- `/verify after [change description]` — verify correctness after a specific change + +Verification is a read-and-run operation. No new features are written. No refactoring. If issues are found, they are reported precisely and fixed minimally. + +--- + +## Phase 1 — Scope Definition + +Before any verification begins: + +1. **Determine scope**: Are we verifying the full codebase, a module, or a recent change? +2. **Collect the specification**: What is this code supposed to do? Look for: + - CONTRACT.md (if produced by `/integrate`) + - README or docs describing expected behavior + - Test descriptions (test names describe intended behavior) + - Code comments describing intent + - Git commit messages describing the purpose of changes +3. **Establish baseline**: Run the test suite now and record the result as the verification baseline +4. **List all claims**: Produce a list of behavioral claims the code is supposed to satisfy before running any check + +--- + +## Phase 2 — Multi-Dimensional Verification (Parallel, 6 Agents) + +Spawn **6 parallel agents** in a single response. + +### Agent 1 — Contract Verification +Verify the implementation matches its specification exactly: +- Does every public function/endpoint accept exactly the inputs it claims to accept? +- Does every public function/endpoint produce exactly the outputs it claims to produce? +- Are all documented error conditions handled and producing the documented error type/code? +- Are all edge cases mentioned in comments or docs actually handled in code? +- Are there behaviors in the code that are NOT in the spec (undocumented side effects)? +- Report each deviation as: `[MISSING / EXTRA / WRONG] [what]` with file:line + +### Agent 2 — Regression Verification +Verify that existing behavior has not been broken: +- Run the full test suite and capture complete output +- For every failing test: read the test, read the code it tests, determine if the test is wrong or the code is wrong +- Identify any test that was disabled, skipped, or deleted recently (check git history) +- Check for any behavior that was previously documented or tested that is now absent +- Report: test results, root cause for each failure, classification (test bug vs code bug) + +### Agent 3 — Behavioral Accuracy Check +Verify that the code actually does what the implementation comments and variable names suggest: +- Read every function and compare its name/docstring against its actual behavior +- Find functions where the name says one thing and the code does another +- Find variables where the name is misleading (e.g., `isValid` that is actually a count) +- Find comments that describe behavior that no longer matches the code +- Trace the most critical business logic paths manually and verify correctness of each step +- Report: `[MISLEADING / INCORRECT / STALE]` with file:line and exact description + +### Agent 4 — Input Validation & Boundary Verification +Verify robustness at all input boundaries: +- For every public API endpoint or function: what happens with null/undefined inputs? +- What happens with empty strings, empty arrays, empty objects? +- What happens with inputs at numeric limits (0, negative, overflow)? +- What happens with malformed data (wrong type, unexpected shape)? +- What happens with concurrent calls to stateful operations? +- What happens if an external dependency (DB, API) is unavailable? +- For each unhandled case: report `[UNHANDLED]` with file:line, input case, and current behavior + +### Agent 5 — Security Verification +Verify that the implementation does not introduce security regressions: +- Are all user-provided inputs sanitized before use in queries, commands, or templates? +- Are authentication checks present on all routes/operations that require them? +- Are authorization checks present (not just authn — does the user have permission for this specific resource)? +- Are secrets/credentials loaded from environment, never hardcoded? +- Are sensitive values excluded from logs, error messages, and API responses? +- Are file paths constructed from user input validated against path traversal? +- Are SQL queries parameterized (no string concatenation into queries)? +- Report: `[CRITICAL / HIGH / MEDIUM]` severity with file:line and exact vulnerability + +### Agent 6 — Consistency & Standards Verification +Verify that the new code is consistent with the existing codebase: +- Naming conventions: does new code follow the same naming patterns as existing code? +- Error handling style: does new code handle errors the same way as existing code? +- Logging: does new code log with the same structure/format/level conventions? +- Response shapes: do new API responses follow the same schema patterns as existing ones? +- Module boundaries: does new code respect the same layer separation as existing code? +- Config handling: does new code load config/env the same way as existing code? +- Report: `[INCONSISTENT]` with file:line and what it should match + +--- + +## Phase 3 — Verification Matrix + +After all agents complete, produce a verification matrix: + +``` +VERIFICATION MATRIX: [scope] +Run date: [date] + +Dimension | Status | Issues Found +-------------------------|---------|------------- +Contract compliance | ✅/⚠️/❌ | [N issues] +Regression | ✅/⚠️/❌ | [N failures] +Behavioral accuracy | ✅/⚠️/❌ | [N mismatches] +Input/boundary handling | ✅/⚠️/❌ | [N gaps] +Security | ✅/⚠️/❌ | [N findings] +Consistency | ✅/⚠️/❌ | [N deviations] + +Overall: [PASS / PASS WITH WARNINGS / FAIL] +``` + +**Overall PASS** = no Critical/High issues, ≤3 Medium issues, no test regressions +**Overall PASS WITH WARNINGS** = no Critical issues, ≤2 High issues, all regressions explained +**Overall FAIL** = any Critical security issue, any unexplained regression, >2 High issues + +--- + +## Phase 4 — Issue Resolution + +For each issue found, apply this resolution protocol: + +### Classification +``` +ISSUE [N] +Dimension: [which agent found it] +Severity: [Critical / High / Medium / Low] +File: [path:line] +Description: [exact description of the problem] +Evidence: [what the code does vs. what it should do] +Fix: [minimal change required — do not over-engineer] +``` + +### Fix Priority Order +1. Critical security issues — fix immediately, do not proceed until resolved +2. Test regressions caused by code bugs — fix the code +3. Test regressions caused by test bugs — fix the test, document why +4. Contract violations — fix the implementation to match spec, or update spec with justification +5. Unhandled boundaries — add explicit handling +6. Consistency issues — bring new code in line with existing patterns +7. Low severity / misleading names — fix or defer with debt entry + +### Fix Rules +- Fix only what is broken — do not opportunistically refactor +- Each fix must be verifiable: re-run the specific check that found the issue after fixing +- If a fix for one issue could affect another area, re-run the full suite for that area + +--- + +## Phase 5 — Re-Verification + +After all fixes are applied: + +1. Re-run the full test suite — must match or exceed the baseline pass count +2. Re-run agents for any dimension that had Critical or High issues +3. Produce updated verification matrix +4. Confirm: Overall status is PASS or PASS WITH WARNINGS with all warnings documented + +--- + +## Phase 6 — Verification Report + +Write `VERIFICATION_REPORT.md` (append, do not overwrite): + +```markdown +# Verification Report +Date: [date] +Scope: [what was verified] +Baseline: [test suite state at start] + +## Verification Matrix +[Phase 3 table] + +## Issues Found +[Each issue in Phase 4 classification format] + +## Issues Resolved +[Each fix applied, minimal description] + +## Issues Deferred +[Any issues not fixed, with justification and debt reference] + +## Final Test Suite Status +- Passing: [N] +- Failing: [N] +- Coverage: [%] + +## Outcome +[PASS / PASS WITH WARNINGS / FAIL] +[One paragraph summary of what was verified and confidence level] + +--- +``` + +--- + +## Verification Rules (Non-Negotiable) + +1. **Evidence required** — every finding must reference a file path, line number, and exact behavior +2. **No false positives** — do not report style preferences or hypothetical risks as verification failures +3. **Minimal fixes** — verification fixes the specific problem found; it does not refactor, optimize, or improve +4. **Re-verify after fix** — a fix is not complete until the specific check that found the issue passes +5. **Security is blocking** — any Critical security issue blocks all other work until resolved +6. **Honest matrix** — the verification matrix must reflect reality; do not mark ✅ for dimensions not fully checked +7. **Baseline preservation** — the final test suite pass count must equal or exceed the baseline; any reduction is a FAIL \ No newline at end of file diff --git a/codegen.py b/codegen.py new file mode 100644 index 00000000..509f703b --- /dev/null +++ b/codegen.py @@ -0,0 +1,2033 @@ +#!/usr/bin/env python3 +""" +Codegen Agent Manager · Single-view edition +pip install requests plyer +""" + +import tkinter as tk +from tkinter import ttk, scrolledtext, messagebox, filedialog +import threading, time, json, requests, os, webbrowser +from datetime import datetime +from pathlib import Path + +# ── Config ────────────────────────────────────────────────────────────────────── +API_BASE = "https://api.codegen.com/v1" +ORG_ID = 323 +API_TOKEN = "sk-92083737-4e5b-4a48-a2a1-f870a3a096a6" +HEADERS = {"Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json"} +POLL_SEC = 15 +DEFAULT_TPL = r"C:\Users\L\Documents\Codegen\analysis.md" +CODEGEN_DIR = r"C:\Users\L\Documents\Codegen" + +# ── Palette ───────────────────────────────────────────────────────────────────── +BG = "#0b0b18" +PANEL = "#12121f" +CARD = "#1a1a2e" +BORDER = "#2a2a4a" +ACCENT = "#5c6bff" +HOT = "#ff4d6d" +GREEN = "#2ecc71" +TEXT = "#dde1f0" +MUTED = "#606080" +C_RUN = "#2ecc71" +C_DONE = "#5b9cf6" +C_FAIL = "#ff4d6d" +C_PEND = "#f39c12" + +FONT = ("Segoe UI", 10) +FONT_BOLD = ("Segoe UI", 10, "bold") +FONT_SMALL = ("Segoe UI", 8) +FONT_MONO = ("Consolas", 9) +FONT_TITLE = ("Segoe UI", 13, "bold") + + +# ════════════════════════════════════════════════════════════════════════════════ +# Helpers +# ════════════════════════════════════════════════════════════════════════════════ + +def btn(parent, text, cmd, bg=ACCENT, fg="white", padx=14, pady=7, **kw): + return tk.Button(parent, text=text, command=cmd, bg=bg, fg=fg, + activebackground=HOT, activeforeground="white", + font=FONT, bd=0, padx=padx, pady=pady, + cursor="hand2", relief="flat", **kw) + +def lbl(parent, text, fg=TEXT, font=FONT, bg=None, **kw): + b = bg if bg is not None else BG + return tk.Label(parent, text=text, fg=fg, font=font, bg=b, **kw) + +def fmt_dt(s): + return s[:19].replace("T", " ") if s else "" + +def attach_edit_menu(widget): + """Attach a right-click Cut/Copy/Paste/Select-All context menu to any text widget.""" + is_text = isinstance(widget, (tk.Text,)) # ScrolledText is a subclass of tk.Text + + def _cut(): + try: widget.event_generate("<>") + except Exception: pass + def _copy(): + try: widget.event_generate("<>") + except Exception: pass + def _paste(): + try: widget.event_generate("<>") + except Exception: pass + def _select_all(): + try: + if is_text: + widget.tag_add("sel", "1.0", "end") + else: + widget.select_range(0, tk.END) + widget.icursor(tk.END) + except Exception: pass + + m = tk.Menu(widget, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT_SMALL, bd=0) + m.add_command(label="Cut", command=_cut) + m.add_command(label="Copy", command=_copy) + m.add_command(label="Paste", command=_paste) + m.add_separator() + m.add_command(label="Select All", command=_select_all) + + def _show(event): + widget.focus_set() + try: m.tk_popup(event.x_root, event.y_root) + finally: m.grab_release() + + widget.bind("", _show) + + +def is_active(s): + s = (s or "").lower() + return "active" in s or "running" in s or "pending" in s + +def is_done(s): + s = (s or "").lower() + return "complete" in s or "fail" in s or "error" in s or "cancel" in s + +def status_tag(s): + if is_active(s): return "running" + s = (s or "").lower() + if "complete" in s: return "completed" + if "fail" in s or "error" in s: return "failed" + return "other" + +def status_color(s): + return {"running": C_RUN, "completed": C_DONE, + "failed": C_FAIL}.get(status_tag(s), C_PEND) + + +# ════════════════════════════════════════════════════════════════════════════════ +# API layer +# ════════════════════════════════════════════════════════════════════════════════ + +class API: + @staticmethod + def _get(path, params=None): + r = requests.get(f"{API_BASE}{path}", headers=HEADERS, + params=params, timeout=20) + r.raise_for_status() + return r.json() + + @staticmethod + def _post(path, body): + r = requests.post(f"{API_BASE}{path}", headers=HEADERS, + json=body, timeout=20) + r.raise_for_status() + return r.json() + + @classmethod + def fetch_all_runs(cls): + """Fetch the most recent 1000 runs (10 pages of 100).""" + all_items, skip, limit, max_runs = [], 0, 100, 1000 + while len(all_items) < max_runs: + data = cls._get(f"/organizations/{ORG_ID}/agent/runs", + {"limit": limit, "skip": skip}) + items = data.get("items", []) + if not items: + break + all_items.extend(items) + skip += len(items) + total = data.get("total", 0) + if skip >= total: + break + return all_items[:max_runs] + + @classmethod + def fetch_all_logs(cls, run_id): + """Paginate /alpha logs until all log entries are collected.""" + all_logs, skip, limit, run_info = [], 0, 100, None + while True: + data = cls._get( + f"/alpha/organizations/{ORG_ID}/agent/run/{run_id}/logs", + {"limit": limit, "skip": skip}) + if run_info is None: + run_info = data + logs = data.get("logs", []) + all_logs.extend(logs) + total = data.get("total_logs") or 0 + skip += len(logs) + if skip >= total or not logs: + break + if run_info: + run_info["logs"] = all_logs + return run_info + + @classmethod + def create_run(cls, prompt, model=None): + body = {"prompt": prompt} + if model: + body["model"] = model + return cls._post(f"/organizations/{ORG_ID}/agent/run", body) + + @classmethod + def resume_run(cls, run_id, prompt): + return cls._post(f"/organizations/{ORG_ID}/agent/run/resume", + {"agent_run_id": run_id, "prompt": prompt}) + + + + +# ════════════════════════════════════════════════════════════════════════════════ +# MdPickerDialog — pick an .md file from the Codegen folder +# ════════════════════════════════════════════════════════════════════════════════ + +class MdPickerDialog(tk.Toplevel): + """ + Lists every .md / .txt file under CODEGEN_DIR. + Returns the selected full path via self.result (set before destroy). + """ + + def __init__(self, parent): + super().__init__(parent) + self.result = None + self.title("Select Instruction File") + self.geometry("480x440") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + self._scan() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "📄 Select File", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=18, pady=12) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=10, pady=8) + + # Search / filter + sf = tk.Frame(self, bg=BG) + sf.pack(fill=tk.X, padx=14, pady=(8, 4)) + lbl(sf, "Filter:", fg=MUTED, font=FONT_SMALL).pack(side=tk.LEFT, padx=(0,6)) + self._filter_var = tk.StringVar() + self._filter_var.trace_add("write", lambda *_: self._apply_filter()) + fe = ttk.Entry(sf, textvariable=self._filter_var, width=30) + fe.pack(side=tk.LEFT) + attach_edit_menu(fe) + fe.focus() + + self._dir_lbl = lbl(self, "", fg=MUTED, font=FONT_SMALL) + self._dir_lbl.pack(anchor="w", padx=14, pady=(0, 2)) + + # File list + lf = tk.Frame(self, bg=BG) + lf.pack(fill=tk.BOTH, expand=True, padx=14) + vsb = ttk.Scrollbar(lf) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._lb = tk.Listbox(lf, bg=PANEL, fg=TEXT, font=FONT, + selectbackground=ACCENT, bd=0, relief="flat", + yscrollcommand=vsb.set, activestyle="none", + height=16, cursor="hand2") + self._lb.pack(fill=tk.BOTH, expand=True) + vsb.config(command=self._lb.yview) + self._lb.bind("", lambda _: self._select()) + self._lb.bind("", lambda _: self._select()) + + # Browse button (fallback) + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._count_lbl = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._count_lbl.pack(side=tk.LEFT, padx=14, pady=10) + btn(foot, "Browse…", self._browse, CARD).pack(side=tk.RIGHT, padx=4, pady=8) + btn(foot, "Select", self._select, HOT ).pack(side=tk.RIGHT, padx=4, pady=8) + btn(foot, "Cancel", self.destroy, CARD).pack(side=tk.RIGHT, padx=4, pady=8) + + def _scan(self): + """Collect all .md and .txt files under CODEGEN_DIR.""" + self._all_files = [] # list of (display_name, full_path) + base = Path(CODEGEN_DIR) + self._dir_lbl.config(text=f" {CODEGEN_DIR}") + if base.is_dir(): + for ext in ("*.md", "*.txt"): + for p in sorted(base.rglob(ext)): + # Display: relative path without extension + try: + rel = p.relative_to(base) + except ValueError: + rel = p + name = str(rel.with_suffix("")) + self._all_files.append((name, str(p))) + self._apply_filter() + + def _apply_filter(self): + q = self._filter_var.get().lower() + self._lb.delete(0, tk.END) + self._shown = [] + for name, path in self._all_files: + if not q or q in name.lower(): + self._lb.insert(tk.END, f" {name}") + self._shown.append((name, path)) + n = len(self._shown) + self._count_lbl.config(text=f"{n} file{'s' if n != 1 else ''}") + if self._shown: + self._lb.selection_set(0) + + def _select(self): + sel = self._lb.curselection() + if not sel: + return + _, path = self._shown[sel[0]] + self.result = path + self.destroy() + + def _browse(self): + """Fallback: open native file picker if needed.""" + p = filedialog.askopenfilename( + parent=self, + initialdir=CODEGEN_DIR, + title="Select instruction file", + filetypes=[("Markdown", "*.md"), ("Text", "*.txt"), ("All", "*.*")]) + if p: + self.result = p + self.destroy() + +# ════════════════════════════════════════════════════════════════════════════════ +# Flow — data model + persistence +# ════════════════════════════════════════════════════════════════════════════════ + +FLOW_FILE = Path.home() / ".codegen_manager_flows.json" + +class FlowStore: + """Load / save named flows from disk.""" + + @staticmethod + def load(): + try: + raw = json.loads(FLOW_FILE.read_text(encoding="utf-8")) + return raw if isinstance(raw, dict) else {} + except Exception: + return {} + + @staticmethod + def save(flows: dict): + try: + FLOW_FILE.write_text(json.dumps(flows, indent=2), encoding="utf-8") + except Exception: + pass + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowCreateDialog — create / edit a flow +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowCreateDialog(tk.Toplevel): + """ + A flow is a named list of steps. + Each step has: label (str), file_path (str|None), extra_text (str) + """ + + def __init__(self, parent, on_saved, edit_name=None): + super().__init__(parent) + self.on_saved = on_saved + self._edit_name = edit_name + self._steps = [] # list of dicts: {label, path, text} + self._step_frames = [] + + flows = FlowStore.load() + if edit_name and edit_name in flows: + self._steps = [dict(s) for s in flows[edit_name]] + + title_str = f"Edit Flow: {edit_name}" if edit_name else "Create New Flow" + self.title(title_str) + self.geometry("780x640") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + + # ── UI ─────────────────────────────────────────────────────────────────────── + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "⛓ Flow Builder", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + body = tk.Frame(self, bg=BG) + body.pack(fill=tk.BOTH, expand=True, padx=18, pady=10) + + # Flow name + name_row = tk.Frame(body, bg=BG) + name_row.pack(fill=tk.X, pady=(0, 10)) + lbl(name_row, "Flow Name:", fg=MUTED, font=FONT_SMALL).pack( + side=tk.LEFT, padx=(0, 8)) + self._name_var = tk.StringVar(value=self._edit_name or "") + ttk.Entry(name_row, textvariable=self._name_var, width=36).pack( + side=tk.LEFT) + + # Steps list in a scrollable canvas + lbl(body, "Steps (each step is sent as a sequential follow-up resume)", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", pady=(0, 4)) + + canvas_frame = tk.Frame(body, bg=BG) + canvas_frame.pack(fill=tk.BOTH, expand=True) + + self._canvas = tk.Canvas(canvas_frame, bg=BG, bd=0, + highlightthickness=0) + vsb = ttk.Scrollbar(canvas_frame, orient="vertical", + command=self._canvas.yview) + self._canvas.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + self._steps_frame = tk.Frame(self._canvas, bg=BG) + self._cwin = self._canvas.create_window( + (0, 0), window=self._steps_frame, anchor="nw") + self._canvas.bind("", + lambda e: self._canvas.itemconfig(self._cwin, width=e.width)) + self._steps_frame.bind("", + lambda e: self._canvas.configure( + scrollregion=self._canvas.bbox("all"))) + + # Render existing steps + for step in self._steps: + self._add_step_ui(step) + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "+ Add Step", self._add_step, CARD).pack( + side=tk.LEFT, padx=(12, 4), pady=10) + self._msg = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._msg.pack(side=tk.LEFT, padx=8) + btn(foot, "Cancel", self.destroy, CARD).pack( + side=tk.RIGHT, padx=8, pady=10) + btn(foot, "💾 Save Flow", self._save, ACCENT).pack( + side=tk.RIGHT, padx=4, pady=10) + + # ── Step management ────────────────────────────────────────────────────────── + + def _add_step(self): + picker = MdPickerDialog(self) + self.wait_window(picker) + path = picker.result or "" + self._add_step_ui({"label": "", "path": path, "text": ""}) + + def _add_step_ui(self, step_data): + idx = len(self._step_frames) + sf = tk.Frame(self._steps_frame, bg=CARD, pady=2) + sf.pack(fill=tk.X, pady=4, padx=2) + + # Step header row + hrow = tk.Frame(sf, bg=CARD) + hrow.pack(fill=tk.X, padx=8, pady=(6, 2)) + step_num = lbl(hrow, f"Step {idx + 1}", fg=ACCENT, + font=FONT_BOLD, bg=CARD) + step_num.pack(side=tk.LEFT, padx=(0, 10)) + + label_var = tk.StringVar(value=step_data.get("label", "")) + _label_entry = ttk.Entry(hrow, textvariable=label_var, width=28) + _label_entry.pack(side=tk.LEFT, padx=(0, 6)) + attach_edit_menu(_label_entry) + lbl(hrow, "label (optional)", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT) + + # Delete button + def _remove(f=sf, i=idx): + f.destroy() + self._step_frames = [x for x in self._step_frames if x["frame"].winfo_exists()] + self._renumber() + btn(hrow, "✕", _remove, CARD, fg=MUTED, pady=2, padx=6).pack( + side=tk.RIGHT) + + # Up / Down + def _move_up(f=sf): + self._move_step(f, -1) + def _move_down(f=sf): + self._move_step(f, +1) + btn(hrow, "↑", _move_up, CARD, fg=MUTED, pady=2, padx=6).pack(side=tk.RIGHT) + btn(hrow, "↓", _move_down, CARD, fg=MUTED, pady=2, padx=6).pack(side=tk.RIGHT) + + # ── File section ────────────────────────────────────────────────────── + file_outer = tk.Frame(sf, bg=PANEL) + file_outer.pack(fill=tk.X, padx=8, pady=(2, 0)) + + frow = tk.Frame(file_outer, bg=PANEL) + frow.pack(fill=tk.X, padx=6, pady=(6, 2)) + lbl(frow, "📄 File:", fg=MUTED, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.LEFT, padx=(0, 6)) + path_var = tk.StringVar(value=step_data.get("path", "")) + path_entry = ttk.Entry(frow, textvariable=path_var, width=40) + path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4)) + attach_edit_menu(path_entry) + + file_status = lbl(frow, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + file_status.pack(side=tk.LEFT, padx=4) + + # preview widget (initially hidden) + prev_frame = tk.Frame(file_outer, bg=PANEL) + prev_frame.pack(fill=tk.X, padx=6, pady=(0, 4)) + file_prev = scrolledtext.ScrolledText( + prev_frame, bg="#0e0e22", fg="#88ccff", + insertbackground=TEXT, font=FONT_MONO, + height=4, bd=0, wrap=tk.WORD, relief="flat", + padx=6, pady=4) + # don't pack yet — shown only after a file is loaded + file_prev.config(state=tk.DISABLED) + + def _load_file(pv=path_var, fs=file_status, fp=file_prev, pf=prev_frame): + p = pv.get().strip() + if not p: + return + if not os.path.isfile(p): + fs.config(text="File not found", fg=C_FAIL) + pf.pack_forget() + return + try: + content = open(p, encoding="utf-8").read() + fs.config( + text=f"✓ {os.path.basename(p)} ({len(content):,} chars)", + fg=GREEN) + fp.config(state=tk.NORMAL) + fp.delete("1.0", tk.END) + fp.insert("1.0", + content[:1200] + ("\n…(truncated)" if len(content) > 1200 else "")) + fp.config(state=tk.DISABLED) + pf.pack(fill=tk.X) + except Exception as e: + fs.config(text=f"Error: {e}", fg=C_FAIL) + + def _browse_step(pv=path_var, load=_load_file, dlg=self): + p = filedialog.askopenfilename( + parent=dlg, + title="Select file for this step", + filetypes=[("Markdown","*.md"),("Text","*.txt"),("All","*.*")]) + if p: + pv.set(p) + load() + + btn(frow, "Browse", _browse_step, CARD).pack(side=tk.LEFT, padx=2) + btn(frow, "Load Preview", _load_file, CARD).pack(side=tk.LEFT, padx=2) + + # Auto-load if path already set + if step_data.get("path"): + self.after(50, _load_file) + + # ── Additional text ──────────────────────────────────────────────────── + trow = tk.Frame(sf, bg=CARD) + trow.pack(fill=tk.X, padx=8, pady=(4, 8)) + lbl(trow, "✏ Additional Text:", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, anchor="n", padx=(0, 6)) + text_box = scrolledtext.ScrolledText( + trow, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=3, bd=0, wrap=tk.WORD, + relief="flat", padx=6, pady=4) + text_box.pack(side=tk.LEFT, fill=tk.X, expand=True) + attach_edit_menu(text_box) + if step_data.get("text"): + text_box.insert("1.0", step_data["text"]) + + entry = {"frame": sf, "label": label_var, + "path": path_var, "text_box": text_box, + "num_lbl": step_num} + self._step_frames.append(entry) + + def _move_step(self, frame_widget, direction): + frames = [e["frame"] for e in self._step_frames + if e["frame"].winfo_exists()] + try: + idx = frames.index(frame_widget) + except ValueError: + return + new_idx = idx + direction + if new_idx < 0 or new_idx >= len(frames): + return + # Re-pack in new order + frames.insert(new_idx, frames.pop(idx)) + for f in frames: + f.pack_forget() + for f in frames: + f.pack(fill=tk.X, pady=4, padx=2) + self._step_frames = [e for f in frames + for e in self._step_frames if e["frame"] is f] + self._renumber() + + def _renumber(self): + for i, e in enumerate(self._step_frames): + if e["frame"].winfo_exists(): + e["num_lbl"].config(text=f"Step {i + 1}") + + def _collect_steps(self): + steps = [] + for e in self._step_frames: + if not e["frame"].winfo_exists(): + continue + steps.append({ + "label": e["label"].get().strip(), + "path": e["path"].get().strip(), + "text": e["text_box"].get("1.0", tk.END).strip(), + }) + return steps + + def _save(self): + name = self._name_var.get().strip() + if not name: + self._msg.config(text="⚠ Enter a flow name.", fg=C_PEND) + return + steps = self._collect_steps() + if not steps: + self._msg.config(text="⚠ Add at least one step.", fg=C_PEND) + return + for i, s in enumerate(steps): + if not s["path"] and not s["text"]: + self._msg.config( + text=f"⚠ Step {i+1} has no file or text.", fg=C_PEND) + return + flows = FlowStore.load() + if self._edit_name and self._edit_name != name: + flows.pop(self._edit_name, None) + flows[name] = steps + FlowStore.save(flows) + self._msg.config(text=f"✅ Saved '{name}'", fg=GREEN) + self.on_saved() + self.after(900, self.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowRunner — background sequencer +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowRunner: + """ + Monitors a run and, when it completes each step, sends the next resume. + Runs entirely in a daemon thread; posts UI callbacks via root.after(). + """ + POLL = 12 # seconds between status checks + + def __init__(self, root, run_id, steps, on_status): + self.root = root + self.run_id = run_id + self.steps = list(steps) # remaining steps (index 0 is next) + self.on_status = on_status # callable(msg, colour) + self._current_run_id = run_id + self._stop = False + threading.Thread(target=self._loop, daemon=True).start() + + def stop(self): + self._stop = True + + @staticmethod + def _step_prompt(step): + parts = [] + path = step.get("path", "") + if path and os.path.isfile(path): + try: + parts.append(open(path, encoding="utf-8").read()) + except Exception: + pass + text = step.get("text", "").strip() + if text: + parts.append(text) + return "\n\n".join(parts).strip() + + def _loop(self): + total = len(self.steps) + sent = 0 + self._post(f"Flow started — {total} step(s) queued", C_RUN) + + while not self._stop and self.steps: + # Poll until current run is done + while not self._stop: + time.sleep(self.POLL) + try: + data = API._get( + f"/organizations/{ORG_ID}/agent/run/{self._current_run_id}") + status = data.get("status") or "" + if is_done(status): + break + self._post( + f"Flow [{sent}/{total}] — waiting for #{self._current_run_id}" + f" ({status})", MUTED) + except Exception as e: + self._post(f"Flow poll error: {e}", C_FAIL) + time.sleep(self.POLL) + + if self._stop: + break + + # Send next step + step = self.steps.pop(0) + sent += 1 + prompt = self._step_prompt(step) + label = step.get("label") or f"Step {sent}" + if not prompt: + self._post(f"Flow: skipping empty step {sent}", MUTED) + continue + + self._post(f"Flow: sending {label} ({sent}/{total})…", C_PEND) + try: + result = API.resume_run(self._current_run_id, prompt) + self._current_run_id = result.get("id", self._current_run_id) + self._post( + f"Flow: {label} sent → run #{self._current_run_id}", C_RUN) + except Exception as e: + self._post(f"Flow error on {label}: {e}", C_FAIL) + break + + if not self._stop: + self._post(f"✅ Flow complete — all {total} step(s) sent", GREEN) + + def _post(self, msg, colour): + self.root.after(0, lambda m=msg, c=colour: self.on_status(m, c)) + + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowViewDialog — read-only preview of a single flow's steps +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowViewDialog(tk.Toplevel): + """Shows a flow's steps in read-only form with file preview.""" + + def __init__(self, parent, name, steps, on_edit): + super().__init__(parent) + self.name = name + self.steps = steps + self.on_edit = on_edit + self.title(f"Flow: {name}") + self.geometry("720x580") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, f"⛓ {self.name}", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + btn(hdr, "✏ Edit", self._edit, HOT).pack( + side=tk.RIGHT, padx=4, pady=8) + + lbl(self, f" {len(self.steps)} step(s) — double-click a step to preview its file", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14, pady=(6, 2)) + + # Steps treeview + tree_f = tk.Frame(self, bg=BG) + tree_f.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 4)) + + cols = ("#", "Label", "File", "Text Preview") + self._tree = ttk.Treeview(tree_f, columns=cols, + show="headings", selectmode="browse") + ws = {"#": 36, "Label": 160, "File": 200, "Text Preview": 0} + for c in cols: + self._tree.heading(c, text=c) + self._tree.column(c, width=ws.get(c, 120), + anchor="w", stretch=(c == "Text Preview")) + vsb = ttk.Scrollbar(tree_f, orient=tk.VERTICAL, + command=self._tree.yview) + self._tree.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self._tree.tag_configure("has_file", foreground=C_DONE) + self._tree.tag_configure("text_only", foreground=TEXT) + + for i, s in enumerate(self.steps): + path = s.get("path", "") or "" + fname = os.path.basename(path) if path else "—" + text = (s.get("text") or "").replace("\n", " ")[:80] + label = s.get("label") or f"Step {i+1}" + tag = "has_file" if path and os.path.isfile(path) else "text_only" + self._tree.insert("", tk.END, iid=str(i), + values=(i + 1, label, fname, text), tags=(tag,)) + + self._tree.bind("", self._preview_step) + + # Preview pane + lbl(self, " File Preview", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", padx=14, pady=(2, 1)) + self._preview = scrolledtext.ScrolledText( + self, bg=PANEL, fg="#88ccff", insertbackground=TEXT, + font=FONT_MONO, height=8, bd=0, wrap=tk.WORD, + relief="flat", padx=10, pady=6) + self._preview.pack(fill=tk.X, padx=14, pady=(0, 4)) + self._preview.insert("1.0", "Select a step above to preview its file content.") + self._preview.config(state=tk.DISABLED) + + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + def _preview_step(self, _event=None): + sel = self._tree.selection() + if not sel: + return + idx = int(sel[0]) + step = self.steps[idx] + path = step.get("path", "") or "" + self._preview.config(state=tk.NORMAL) + self._preview.delete("1.0", tk.END) + if path and os.path.isfile(path): + try: + content = open(path, encoding="utf-8").read() + self._preview.insert("1.0", content[:3000] + + ("\n…(truncated)" if len(content) > 3000 else "")) + except Exception as e: + self._preview.insert("1.0", f"Could not read file: {e}") + elif path: + self._preview.insert("1.0", f"File not found:\n{path}") + else: + text = step.get("text", "") or "(no text)" + self._preview.insert("1.0", text[:3000]) + self._preview.config(state=tk.DISABLED) + + def _edit(self): + self.destroy() + self.on_edit() + + +# ════════════════════════════════════════════════════════════════════════════════ +# FlowManagerDialog — list / edit / delete flows +# ════════════════════════════════════════════════════════════════════════════════ + +class FlowManagerDialog(tk.Toplevel): + def __init__(self, parent, on_changed=None): + super().__init__(parent) + self.on_changed = on_changed + self.title("Flows") + self.geometry("620x500") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._flows = {} + self._build() + self._reload() + + def _build(self): + tk.Frame(self, bg=ACCENT, height=3).pack(fill=tk.X) + + # Header + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "⛓ Flows", fg=ACCENT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + # Sub-toolbar + tb = tk.Frame(self, bg=PANEL) + tb.pack(fill=tk.X) + btn(tb, "+ New Flow", self._new, HOT ).pack(side=tk.LEFT, padx=(12,4), pady=8) + btn(tb, "✏ Edit", self._edit, CARD ).pack(side=tk.LEFT, padx=4, pady=8) + btn(tb, "🗑 Delete", self._delete, CARD ).pack(side=tk.LEFT, padx=4, pady=8) + self._tb_msg = lbl(tb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._tb_msg.pack(side=tk.LEFT, padx=12) + + # Flow list treeview + tree_f = tk.Frame(self, bg=BG) + tree_f.pack(fill=tk.BOTH, expand=True, padx=14, pady=10) + + cols = ("Flow Name", "Steps", "Step Labels") + self._tree = ttk.Treeview(tree_f, columns=cols, + show="headings", selectmode="browse") + self._tree.heading("Flow Name", text="Flow Name") + self._tree.heading("Steps", text="Steps") + self._tree.heading("Step Labels", text="Step Labels") + self._tree.column("Flow Name", width=180, anchor="w") + self._tree.column("Steps", width=52, anchor="center") + self._tree.column("Step Labels", width=0, anchor="w", stretch=True) + + vsb = ttk.Scrollbar(tree_f, orient=tk.VERTICAL, command=self._tree.yview) + self._tree.configure(yscrollcommand=vsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + self._tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + self._tree.bind("", lambda _: self._view()) + self._tree.bind("", lambda _: self._view()) + self._tree.bind("", self._ctx) + + # Hint + lbl(self, " Double-click to preview · Right-click for options", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14, pady=(0, 4)) + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + def _reload(self): + for row in self._tree.get_children(): + self._tree.delete(row) + self._flows = FlowStore.load() + for name, steps in self._flows.items(): + labels = ", ".join( + s.get("label") or f"Step {i+1}" + for i, s in enumerate(steps)) + self._tree.insert("", tk.END, iid=name, + values=(name, len(steps), labels)) + count = len(self._flows) + self._tb_msg.config( + text=f"{count} flow{'s' if count != 1 else ''}") + + def _selected_name(self): + sel = self._tree.selection() + return sel[0] if sel else None + + def _view(self): + name = self._selected_name() + if not name or name not in self._flows: + return + FlowViewDialog(self, name, self._flows[name], + on_edit=lambda n=name: self._edit_named(n)) + + def _new(self): + FlowCreateDialog(self, on_saved=self._on_saved) + + def _edit(self): + name = self._selected_name() + if name: + self._edit_named(name) + else: + self._tb_msg.config(text="Select a flow first", fg=C_PEND) + + def _edit_named(self, name): + FlowCreateDialog(self, on_saved=self._on_saved, edit_name=name) + + def _delete(self): + name = self._selected_name() + if not name: + self._tb_msg.config(text="Select a flow first", fg=C_PEND) + return + if messagebox.askyesno("Delete Flow", + f'Delete flow "{name}"?', + parent=self): + flows = FlowStore.load() + flows.pop(name, None) + FlowStore.save(flows) + self._on_saved() + + def _ctx(self, event): + row = self._tree.identify_row(event.y) + if not row: + return + self._tree.selection_set(row) + m = tk.Menu(self, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT, bd=0) + m.add_command(label="🔍 Preview", command=self._view) + m.add_command(label="✏ Edit", command=self._edit) + m.add_separator() + m.add_command(label="🗑 Delete", command=self._delete) + m.post(event.x_root, event.y_root) + + def _on_saved(self): + self._reload() + if self.on_changed: + self.on_changed() + +# ════════════════════════════════════════════════════════════════════════════════ +# Create Run Dialog +# ════════════════════════════════════════════════════════════════════════════════ + +class CreateRunDialog(tk.Toplevel): + def __init__(self, parent, on_created, on_flow_runner=None): + super().__init__(parent) + self.on_created = on_created + self.on_flow_runner = on_flow_runner # callback(runner) when flow starts + self._tpl_text = None + self.title("New Agent Run") + self.geometry("760x600") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build() + self.after(200, self._try_default_tpl) + + def _build(self): + tk.Frame(self, bg=HOT, height=3).pack(fill=tk.X) + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + lbl(hdr, "🚀 New Agent Run", fg=HOT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=20, pady=14) + btn(hdr, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.RIGHT, padx=12, pady=8) + + body = tk.Frame(self, bg=BG) + body.pack(fill=tk.BOTH, expand=True, padx=20, pady=10) + + lbl(body, "Template File (optional)", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", pady=(0, 3)) + tr = tk.Frame(body, bg=BG) + tr.pack(fill=tk.X) + self._tpl_var = tk.StringVar(value=DEFAULT_TPL) + ttk.Entry(tr, textvariable=self._tpl_var).pack( + side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 6)) + btn(tr, "Browse", self._browse, CARD).pack(side=tk.LEFT, padx=2) + btn(tr, "Load", self._load, ACCENT).pack(side=tk.LEFT, padx=2) + + self._tpl_info = lbl(body, "", fg=MUTED, font=FONT_SMALL) + self._tpl_info.pack(anchor="w", pady=(4, 8)) + + lbl(body, "Prompt / Instructions", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", pady=(0, 3)) + self._prompt = scrolledtext.ScrolledText( + body, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=8, bd=0, wrap=tk.WORD, relief="flat", + padx=10, pady=8) + self._prompt.pack(fill=tk.BOTH, expand=True) + self._prompt.focus() + + # ── Flow selector ──────────────────────────────────────────────────── + tk.Frame(body, bg=BORDER, height=1).pack(fill=tk.X, pady=(10, 6)) + flow_row = tk.Frame(body, bg=BG) + flow_row.pack(fill=tk.X) + lbl(flow_row, "⛓ Flow (optional):", fg=MUTED, font=FONT_SMALL + ).pack(side=tk.LEFT, padx=(0, 8)) + self._flow_var = tk.StringVar(value="None") + self._flow_combo = ttk.Combobox( + flow_row, textvariable=self._flow_var, + width=26, state="readonly") + self._flow_combo.pack(side=tk.LEFT, padx=(0, 6)) + self._flow_combo.bind("<>", self._on_flow_selected) + btn(flow_row, "⛓ Manage Flows", self._open_flow_manager, + CARD).pack(side=tk.LEFT, padx=4) + self._flow_info = lbl(flow_row, "", fg=MUTED, font=FONT_SMALL) + self._flow_info.pack(side=tk.LEFT, padx=8) + self._refresh_flow_combo() + + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._foot_msg = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._foot_msg.pack(side=tk.LEFT, padx=16, pady=12) + btn(foot, "Cancel", self.destroy, CARD).pack( + side=tk.RIGHT, padx=8, pady=10) + btn(foot, "🚀 Launch Run", self._launch, HOT).pack( + side=tk.RIGHT, padx=4, pady=10) + + def _browse(self): + p = filedialog.askopenfilename( + filetypes=[("Markdown","*.md"),("Text","*.txt"),("All","*.*")]) + if p: + self._tpl_var.set(p) + self._load() + + def _refresh_flow_combo(self): + flows = FlowStore.load() + names = ["None"] + sorted(flows.keys()) + self._flow_combo["values"] = names + if self._flow_var.get() not in names: + self._flow_var.set("None") + self._on_flow_selected() + + def _on_flow_selected(self, _event=None): + name = self._flow_var.get() + if name == "None": + self._flow_info.config(text="", fg=MUTED) + return + flows = FlowStore.load() + steps = flows.get(name, []) + self._flow_info.config( + text=f"{len(steps)} step(s)", fg=ACCENT) + + def _open_flow_manager(self): + FlowManagerDialog(self, on_changed=self._refresh_flow_combo) + + def _try_default_tpl(self): + if os.path.isfile(DEFAULT_TPL): + self._load() + + def _load(self): + path = self._tpl_var.get() + if not path or not os.path.isfile(path): + self._tpl_info.config(text="File not found", fg=C_FAIL) + return + try: + with open(path, encoding="utf-8") as f: + self._tpl_text = f.read() + self._tpl_info.config( + text=f"✓ {os.path.basename(path)} ({len(self._tpl_text):,} chars)", + fg=GREEN) + except Exception as e: + self._tpl_info.config(text=f"Error: {e}", fg=C_FAIL) + + def _launch(self): + extra = self._prompt.get("1.0", tk.END).strip() + parts = [p for p in [self._tpl_text, extra] if p and p.strip()] + prompt = "\n\n".join(parts).strip() + if not prompt: + self._foot_msg.config(text="⚠ Enter a prompt or load a template.", + fg=C_PEND) + return + flow_name = self._flow_var.get() + self._selected_flow = None + if flow_name != "None": + flows = FlowStore.load() + self._selected_flow = flows.get(flow_name) + self._foot_msg.config(text="Launching…", fg=C_PEND) + + def _bg(): + try: + res = API.create_run(prompt, model="claude-opus-4-6") + self.after(0, lambda: self._done(res)) + except Exception as e: + self.after(0, lambda: self._foot_msg.config( + text=f"Error: {e}", fg=C_FAIL)) + + threading.Thread(target=_bg, daemon=True).start() + + def _done(self, res): + rid = res.get("id", "?") + flow = getattr(self, "_selected_flow", None) + msg = f"✅ Run #{rid} created!" + if flow: + msg += f" ⛓ flow ({len(flow)} steps) queued" + self._foot_msg.config(text=msg, fg=GREEN) + self.on_created(res) + if flow and self.on_flow_runner: + self.on_flow_runner(rid, flow) + self.after(1400, self.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# Run Detail / Conversation Dialog +# ════════════════════════════════════════════════════════════════════════════════ + +class RunDialog(tk.Toplevel): + def __init__(self, parent, run, on_refreshed, on_start_flow=None): + super().__init__(parent) + self.run = run + self.on_refreshed = on_refreshed + self.on_start_flow = on_start_flow + rid = run["id"] + status = run.get("status", "") + self.title(f"Run #{rid} · {status}") + self.geometry("900x700") + self.configure(bg=BG) + self.resizable(True, True) + self.grab_set() + self.lift() + self._build(status) + self._load_logs() + + def _build(self, status): + sc = status_color(status) + + # Coloured accent bar + tk.Frame(self, bg=sc, height=3).pack(fill=tk.X) + + # Header + hdr = tk.Frame(self, bg=PANEL) + hdr.pack(fill=tk.X) + + lh = tk.Frame(hdr, bg=PANEL) + lh.pack(side=tk.LEFT, fill=tk.X, expand=True) + lbl(lh, f"Run #{self.run['id']}", fg=TEXT, font=FONT_TITLE, bg=PANEL + ).pack(side=tk.LEFT, padx=18, pady=(12, 4)) + lbl(lh, (status or "").upper(), fg=sc, font=FONT_BOLD, bg=PANEL + ).pack(side=tk.LEFT, padx=6) + + rh = tk.Frame(hdr, bg=PANEL) + rh.pack(side=tk.RIGHT) + if self.run.get("web_url"): + btn(rh, "🌐 Web", lambda: webbrowser.open(self.run["web_url"]), + CARD).pack(side=tk.LEFT, padx=4, pady=8) + btn(rh, "✕", self.destroy, CARD, fg=MUTED).pack( + side=tk.LEFT, padx=10, pady=8) + + # Meta + meta = tk.Frame(hdr, bg=PANEL) + meta.pack(fill=tk.X, padx=18, pady=(0, 10)) + lbl(meta, fmt_dt(self.run.get("created_at")), + fg=MUTED, font=FONT_SMALL, bg=PANEL).pack(side=tk.LEFT) + for pr in (self.run.get("github_pull_requests") or [])[:4]: + lk = tk.Label(meta, text=f" 🔗 PR #{pr['id']}", + fg=ACCENT, font=FONT_SMALL, bg=PANEL, cursor="hand2") + lk.pack(side=tk.LEFT) + lk.bind("", + lambda e, u=pr.get("url",""): webbrowser.open(u)) + + # Summary / result strip + summary = (self.run.get("summary") or self.run.get("result") or "").strip() + if summary: + sf = tk.Frame(self, bg=CARD) + sf.pack(fill=tk.X, padx=14, pady=(4, 0)) + lbl(sf, "Summary", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=12, pady=(6, 1)) + st = tk.Text(sf, bg=CARD, fg=TEXT, font=FONT_SMALL, + height=3, bd=0, wrap=tk.WORD, relief="flat", + padx=10, pady=4) + st.pack(fill=tk.X, padx=10, pady=(0, 8)) + st.insert("1.0", summary) + st.config(state=tk.DISABLED) + + # Conversation view + lbl(self, " Conversation Log", fg=MUTED, font=FONT_SMALL + ).pack(anchor="w", padx=14, pady=(8, 2)) + + self._conv = scrolledtext.ScrolledText( + self, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT_MONO, bd=0, wrap=tk.WORD, relief="flat", + padx=12, pady=10) + self._conv.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 4)) + self._conv.tag_configure("ts", foreground=MUTED, font=FONT_SMALL) + self._conv.tag_configure("tool", foreground="#88aaff", font=("Consolas",9,"bold")) + self._conv.tag_configure("thought", foreground="#c0a0ff") + self._conv.tag_configure("inp", foreground="#80d8c0") + self._conv.tag_configure("out", foreground=TEXT) + self._conv.tag_configure("div", foreground=BORDER) + self._conv.insert(tk.END, "Loading logs…", "ts") + self._conv.config(state=tk.DISABLED) + + # Resume panel — shown for all done runs + if is_done(status): + rf = tk.Frame(self, bg=CARD) + rf.pack(fill=tk.X, padx=14, pady=(2, 4)) + tk.Frame(rf, bg=BORDER, height=1).pack(fill=tk.X) + + # --- Single prompt resume (existing) --- + lbl(rf, " Follow‑up prompt (single message)", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(8, 3)) + row = tk.Frame(rf, bg=CARD) + row.pack(fill=tk.X, padx=10, pady=(0, 10)) + self._resume_box = scrolledtext.ScrolledText( + row, bg=PANEL, fg=TEXT, insertbackground=TEXT, + font=FONT, height=4, bd=0, wrap=tk.WORD, + relief="flat", padx=8, pady=6) + self._resume_box.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + self._resume_box.focus() + sb = tk.Frame(row, bg=CARD) + sb.pack(side=tk.LEFT, padx=(8, 0), fill=tk.Y) + btn(sb, "▶ Send", self._resume, HOT).pack(fill=tk.X, pady=2) + self._res_msg = lbl(sb, "", fg=MUTED, font=FONT_SMALL, bg=CARD) + self._res_msg.pack(pady=2) + self._resume_box.bind("", lambda _: self._resume()) + + # ⭐ NEW: Flow resume section + tk.Frame(rf, bg=BORDER, height=1).pack(fill=tk.X, padx=10, pady=(8, 4)) + flow_row = tk.Frame(rf, bg=CARD) + flow_row.pack(fill=tk.X, padx=10, pady=(0, 10)) + + lbl(flow_row, "⛓ Run a flow instead:", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=0, pady=(0, 4)) + + flow_sel_row = tk.Frame(flow_row, bg=CARD) + flow_sel_row.pack(fill=tk.X) + lbl(flow_sel_row, "Flow:", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 6)) + self._flow_var = tk.StringVar(value="None") + self._flow_combo = ttk.Combobox( + flow_sel_row, textvariable=self._flow_var, + width=26, state="readonly") + self._flow_combo.pack(side=tk.LEFT, padx=(0, 6)) + self._flow_combo.bind("<>", self._on_flow_selected) + btn(flow_sel_row, "Manage Flows", self._open_flow_manager, + CARD).pack(side=tk.LEFT, padx=2) + self._flow_info = lbl(flow_sel_row, "", fg=MUTED, font=FONT_SMALL, bg=CARD) + self._flow_info.pack(side=tk.LEFT, padx=8) + + run_flow_btn = btn(flow_sel_row, "▶ Run Flow", self._run_flow, ACCENT) + run_flow_btn.pack(side=tk.LEFT, padx=4) + + self._refresh_flow_combo() + else: + self._resume_box = None + self._flow_combo = None + + # Footer + foot = tk.Frame(self, bg=PANEL) + foot.pack(fill=tk.X) + self._log_lbl = lbl(foot, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._log_lbl.pack(side=tk.LEFT, padx=16, pady=8) + btn(foot, "Close", self.destroy, CARD).pack( + side=tk.RIGHT, padx=12, pady=8) + + # ── Logs ──────────────────────────────────────────────────────────────────── + + def _load_logs(self): + rid = self.run["id"] + def _bg(): + try: + data = API.fetch_all_logs(rid) + self.after(0, lambda d=data: self._render(d)) + except Exception as e: + self.after(0, lambda: self._render_err(str(e))) + threading.Thread(target=_bg, daemon=True).start() + + def _render_err(self, msg): + """Display an error message in the conversation pane.""" + if not self._conv.winfo_exists(): + return + self._conv.config(state=tk.NORMAL) + self._conv.delete("1.0", tk.END) + self._conv.insert(tk.END, f"⚠ {msg}", "ts") + self._conv.config(state=tk.DISABLED) + + def _render(self, data): + """Render the logs in the conversation text widget.""" + if not self._conv.winfo_exists(): + return + logs = (data or {}).get("logs", []) + self._conv.config(state=tk.NORMAL) + self._conv.delete("1.0", tk.END) + + if not logs: + self._conv.insert(tk.END, "(No log entries found)\n", "ts") + else: + for lg in logs: + ts = fmt_dt(lg.get("created_at")) + tool = lg.get("tool_name") or "" + mtype = lg.get("message_type") or "" + thought = (lg.get("thought") or "").strip() + inp = lg.get("tool_input") + out = lg.get("tool_output") + obs = lg.get("observation") + + # timestamp + tool header + self._conv.insert(tk.END, f"[{ts}] ", "ts") + if tool: + self._conv.insert(tk.END, f"⚙ {tool}", "tool") + if mtype: + self._conv.insert(tk.END, f" ({mtype})", "ts") + self._conv.insert(tk.END, "\n") + + if thought: + preview = thought[:400] + ("…" if len(thought) > 400 else "") + self._conv.insert(tk.END, f" 💭 {preview}\n", "thought") + if inp: + raw = json.dumps(inp, indent=2) if isinstance(inp, (dict,list)) else str(inp) + preview = raw[:500] + ("…" if len(raw) > 500 else "") + self._conv.insert(tk.END, f" ▸ {preview}\n", "inp") + if out: + raw = json.dumps(out, indent=2) if isinstance(out, (dict,list)) else str(out) + preview = raw[:500] + ("…" if len(raw) > 500 else "") + self._conv.insert(tk.END, f" ◂ {preview}\n", "out") + if obs and obs not in (inp, out): + raw = json.dumps(obs, indent=2) if isinstance(obs, (dict,list)) else str(obs) + self._conv.insert(tk.END, + f" 👁 {raw[:200]}{'…' if len(raw)>200 else ''}\n", "ts") + + self._conv.insert(tk.END, "─" * 66 + "\n", "div") + + self._conv.see(tk.END) + + self._conv.config(state=tk.DISABLED) + self._log_lbl.config(text=f"{len(logs)} log entries") + + # ── Resume ────────────────────────────────────────────────────────────────── + + def _resume(self): + if not self._resume_box: + return + prompt = self._resume_box.get("1.0", tk.END).strip() + if not prompt: + self._res_msg.config(text="Enter a prompt", fg=C_PEND) + return + self._res_msg.config(text="Sending…", fg=C_PEND) + + rid = self.run["id"] + def _bg(): + try: + res = API.resume_run(rid, prompt) + new_id = res.get("id", rid) + self.after(0, lambda: self._resumed(new_id)) + except Exception as e: + self.after(0, lambda: self._res_msg.config( + text=f"Error: {e}", fg=C_FAIL)) + + threading.Thread(target=_bg, daemon=True).start() + + def _resumed(self, new_id): + self._res_msg.config(text=f"✅ #{new_id} resumed!", fg=GREEN) + self.on_refreshed() + self.after(1500, self.destroy) + + + def _refresh_flow_combo(self): + flows = FlowStore.load() + names = ["None"] + sorted(flows.keys()) + self._flow_combo["values"] = names + if self._flow_var.get() not in names: + self._flow_var.set("None") + self._on_flow_selected() + + def _on_flow_selected(self, _event=None): + name = self._flow_var.get() + if name == "None": + self._flow_info.config(text="", fg=MUTED) + return + flows = FlowStore.load() + steps = flows.get(name, []) + self._flow_info.config(text=f"{len(steps)} step(s)", fg=ACCENT) + + def _open_flow_manager(self): + FlowManagerDialog(self, on_changed=self._refresh_flow_combo) + + def _run_flow(self): + """Start a flow runner for the selected flow.""" + if not self.on_start_flow: + self._res_msg.config(text="Flow runner not available", fg=C_FAIL) + return + name = self._flow_var.get() + if name == "None": + self._res_msg.config(text="Select a flow", fg=C_PEND) + return + flows = FlowStore.load() + steps = flows.get(name) + if not steps: + self._res_msg.config(text="Flow not found", fg=C_FAIL) + return + # Call the main app to start the flow runner + self.on_start_flow(self.run["id"], steps) + self._res_msg.config(text=f"✅ Flow '{name}' started", fg=GREEN) + self.after(1200, self.destroy) + +# ════════════════════════════════════════════════════════════════════════════════ +# Main Application +# ════════════════════════════════════════════════════════════════════════════════ + +class CodegenManager: + def __init__(self, root: tk.Tk): + self.root = root + self.root.title("Codegen Agent Manager") + self.root.geometry("1240x760") + self.root.minsize(900, 580) + self.root.configure(bg=BG) + + self._runs = [] + self._prev_statuses = {} + self._polling = True + self._sort_col = "Created At" + self._sort_rev = True + self._star_file = Path.home() / ".codegen_manager_stars.json" + self._starred = self._load_stars() + self._flow_runners = {} # run_id -> FlowRunner + + self._style() + self._build() + threading.Thread(target=self._poll_loop, daemon=True).start() + self.root.after(300, self._refresh) + + # ── Styles ────────────────────────────────────────────────────────────────── + + def _style(self): + s = ttk.Style() + s.theme_use("clam") + s.configure(".", background=BG, foreground=TEXT, font=FONT, borderwidth=0) + s.configure("TFrame", background=BG) + s.configure("TScrollbar", background=CARD, troughcolor=BG, arrowcolor=MUTED) + s.configure("Treeview", background=PANEL, foreground=TEXT, + fieldbackground=PANEL, rowheight=34) + s.configure("Treeview.Heading", background=CARD, foreground=MUTED, + font=("Segoe UI", 9, "bold"), relief="flat") + s.map("Treeview", + background=[("selected", ACCENT)], + foreground=[("selected", "white")]) + s.configure("TCombobox", fieldbackground=PANEL, background=PANEL, + foreground=TEXT, selectbackground=ACCENT, arrowcolor=MUTED) + s.configure("TEntry", fieldbackground=PANEL, foreground=TEXT, + insertcolor=TEXT) + + # ── Build ──────────────────────────────────────────────────────────────────── + + def _build(self): + self._topbar() + self._toolbar() + self._split_tables() + self._flow_statusbar() + self._statusbar() + + def _topbar(self): + bar = tk.Frame(self.root, bg=PANEL, height=56) + bar.pack(fill=tk.X) + bar.pack_propagate(False) + tk.Frame(bar, bg=ACCENT, width=4).pack(side=tk.LEFT, fill=tk.Y) + lbl(bar, "⚡ Codegen Agent Manager", fg=HOT, font=FONT_TITLE, + bg=PANEL).pack(side=tk.LEFT, padx=18) + + # right side + self._last_upd = lbl(bar, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._last_upd.pack(side=tk.RIGHT, padx=16) + lbl(bar, "● LIVE", fg=GREEN, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.RIGHT, padx=4) + + # Active-runs badge ── hover → dropdown, click item → RunDialog + tk.Frame(bar, bg=BORDER, width=1).pack( + side=tk.RIGHT, fill=tk.Y, pady=10, padx=8) + badge_frame = tk.Frame(bar, bg=PANEL) + badge_frame.pack(side=tk.RIGHT, padx=4) + lbl(badge_frame, "ACTIVE", fg=MUTED, font=FONT_SMALL, bg=PANEL + ).pack(side=tk.LEFT, padx=(0, 4)) + self._active_badge = tk.Label( + badge_frame, text="—", bg="#0d2a1a", fg=C_RUN, + font=("Segoe UI", 13, "bold"), padx=10, pady=4, + cursor="hand2", relief="flat") + self._active_badge.pack(side=tk.LEFT) + self._active_badge.bind("", self._badge_hover) + self._active_badge.bind("", self._badge_leave) + self._active_badge.bind("", self._badge_click) + self._dropdown_win = None + + def _update_active_badge(self, runs): + active_runs = [r for r in runs if is_active(r.get("status"))] + self._active_runs = active_runs + count = len(active_runs) + self._active_badge.config( + text=str(count) if count else "0", + bg="#0d2a1a" if count else CARD, + fg=C_RUN if count else MUTED) + + # ── Active-runs dropdown ───────────────────────────────────────────────────── + + def _badge_hover(self, event): + self._dropdown_show() + + def _badge_leave(self, event): + # Only hide if mouse didn't move into the dropdown window + self.root.after(200, self._maybe_hide_dropdown) + + def _badge_click(self, event): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + self._dropdown_hide() + else: + self._dropdown_show() + + def _dropdown_show(self): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + return + active = getattr(self, "_active_runs", []) + + win = tk.Toplevel(self.root) + win.overrideredirect(True) + win.attributes("-topmost", True) + win.configure(bg=BORDER) + self._dropdown_win = win + + # Position below badge + self._active_badge.update_idletasks() + bx = self._active_badge.winfo_rootx() + by = self._active_badge.winfo_rooty() + self._active_badge.winfo_height() + 2 + win.geometry(f"+{bx}+{by}") + + inner = tk.Frame(win, bg=CARD, padx=1, pady=1) + inner.pack(fill=tk.BOTH, expand=True) + + if not active: + lbl(inner, " No active runs ", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(pady=10, padx=10) + else: + lbl(inner, f" {len(active)} active run(s) — click to inspect", + fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(8, 4)) + tk.Frame(inner, bg=BORDER, height=1).pack(fill=tk.X, padx=8) + for run in active: + rid = run["id"] + stat = run.get("status") or "" + ts = fmt_dt(run.get("created_at")) + summ = (run.get("summary") or run.get("result") or "(no summary)") + summ = summ.replace("\n", " ")[:60] + row = tk.Frame(inner, bg=CARD, cursor="hand2") + row.pack(fill=tk.X, padx=0) + tk.Frame(row, bg=CARD, height=1).pack(fill=tk.X) + ri = tk.Frame(row, bg=CARD) + ri.pack(fill=tk.X, padx=12, pady=6) + lbl(ri, f"#{rid}", fg=C_RUN, font=FONT_BOLD, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 8)) + lbl(ri, stat, fg=C_RUN, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 10)) + lbl(ri, ts, fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT, padx=(0, 10)) + lbl(ri, summ + "…", fg=TEXT, font=FONT_SMALL, bg=CARD + ).pack(side=tk.LEFT) + + def _on_enter(e, r=row): r.config(bg="#1e2a3a"); [c.config(bg="#1e2a3a") for c in r.winfo_children() + [w for c in r.winfo_children() for w in (c.winfo_children() if hasattr(c,"winfo_children") else [])]] + def _on_leave(e, r=row): r.config(bg=CARD); [c.config(bg=CARD) for c in r.winfo_children() + [w for c in r.winfo_children() for w in (c.winfo_children() if hasattr(c,"winfo_children") else [])]] + def _on_click(e, run=run): self._dropdown_hide(); self._open_run_by(run) + for w in [row, ri] + ri.winfo_children(): + w.bind("", _on_enter) + w.bind("", _on_leave) + w.bind("", _on_click) + + tk.Frame(inner, bg=BORDER, height=1).pack(fill=tk.X, padx=8) + lbl(inner, " Click to open logs & resume", fg=MUTED, font=FONT_SMALL, bg=CARD + ).pack(anchor="w", padx=10, pady=(4, 8)) + + win.bind("", lambda e: self.root.after(250, self._maybe_hide_dropdown)) + win.update_idletasks() + # Clamp to screen + sw = self.root.winfo_screenwidth() + ww = win.winfo_width() + if bx + ww > sw: + bx = sw - ww - 10 + win.geometry(f"+{bx}+{by}") + + def _dropdown_hide(self): + if self._dropdown_win and self._dropdown_win.winfo_exists(): + self._dropdown_win.destroy() + self._dropdown_win = None + + def _maybe_hide_dropdown(self): + if not self._dropdown_win or not self._dropdown_win.winfo_exists(): + return + # Check if mouse is over badge or dropdown + x, y = self.root.winfo_pointerx(), self.root.winfo_pointery() + try: + wx = self._dropdown_win.winfo_rootx() + wy = self._dropdown_win.winfo_rooty() + ww = self._dropdown_win.winfo_width() + wh = self._dropdown_win.winfo_height() + bx = self._active_badge.winfo_rootx() + by = self._active_badge.winfo_rooty() + bw = self._active_badge.winfo_width() + bh = self._active_badge.winfo_height() + over_win = wx <= x <= wx+ww and wy <= y <= wy+wh + over_badge = bx <= x <= bx+bw and by <= y <= by+bh + if not over_win and not over_badge: + self._dropdown_hide() + except Exception: + self._dropdown_hide() + + def _toolbar(self): + tb = tk.Frame(self.root, bg=PANEL) + tb.pack(fill=tk.X, padx=14, pady=(0, 6)) + btn(tb, "+ New Run", self._open_create, HOT ).pack( + side=tk.LEFT, padx=(8, 4), pady=8) + btn(tb, "⛓ Flows", self._open_flows, CARD ).pack( + side=tk.LEFT, padx=4, pady=8) + btn(tb, "⟳ Refresh", self._refresh, ACCENT).pack( + side=tk.LEFT, padx=4, pady=8) + + tk.Frame(tb, bg=BORDER, width=1).pack( + side=tk.LEFT, fill=tk.Y, pady=8, padx=10) + + lbl(tb, "Status:", fg=MUTED, font=FONT_SMALL, bg=PANEL).pack( + side=tk.LEFT) + self._filt = ttk.Combobox( + tb, values=["All","ACTIVE","COMPLETE","FAILED"], + width=11, state="readonly") + self._filt.set("All") + self._filt.pack(side=tk.LEFT, padx=6) + self._filt.bind("<>", lambda _: self._repopulate()) + + lbl(tb, " Search:", fg=MUTED, font=FONT_SMALL, bg=PANEL).pack( + side=tk.LEFT) + self._svar = tk.StringVar() + self._svar.trace_add("write", lambda *_: self._repopulate()) + ttk.Entry(tb, textvariable=self._svar, width=24).pack( + side=tk.LEFT, padx=6) + + self._cnt_lbl = lbl(tb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._cnt_lbl.pack(side=tk.RIGHT, padx=16) + + def _make_tree(self, parent): + """Build a styled Treeview with scrollbars inside parent frame.""" + cols = ("★", "ID", "Status", "Created At", "Summary", "PRs", "Source") + widths = {"★": 28, "ID": 68, "Status": 112, "Created At": 162, + "Summary": 0, "PRs": 38, "Source": 90} + anchors = {"★": "center", "ID": "center", "Status": "center", "PRs": "center"} + + tree = ttk.Treeview(parent, columns=cols, show="headings", + selectmode="browse") + for c in cols: + tree.heading(c, text=c, + command=lambda cc=c: self._sort(cc)) + tree.column(c, width=widths.get(c, 110), + anchor=anchors.get(c, "w"), + stretch=(c == "Summary"), + minwidth=widths.get(c, 40)) + + vsb = ttk.Scrollbar(parent, orient=tk.VERTICAL, command=tree.yview) + hsb = ttk.Scrollbar(parent, orient=tk.HORIZONTAL, command=tree.xview) + tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) + vsb.pack(side=tk.RIGHT, fill=tk.Y) + hsb.pack(side=tk.BOTTOM, fill=tk.X) + tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + for tag, bg in (("running", "#0c2218"), ("completed", "#0b1a33"), + ("failed", "#280b0b"), ("other", PANEL), + ("starred", "#1e1a08"), ("star_run", "#0d2218")): + tree.tag_configure(tag, background=bg) + + tree.bind("", lambda e, t=tree: self._open_from_tree(t)) + tree.bind("", lambda e, t=tree: self._open_from_tree(t)) + tree.bind("", self._ctx_menu) + return tree + + def _split_tables(self): + pw = tk.PanedWindow(self.root, orient=tk.VERTICAL, bg=BG, + sashwidth=6, sashrelief="flat", sashpad=2) + pw.pack(fill=tk.BOTH, expand=True, padx=14, pady=(0, 2)) + + # ── Top pane: Pinned & Active ──────────────────────────────────────── + top_pane = tk.Frame(pw, bg=BG) + pw.add(top_pane, height=200, minsize=60) + + top_hdr = tk.Frame(top_pane, bg=PANEL, height=26) + top_hdr.pack(fill=tk.X) + top_hdr.pack_propagate(False) + lbl(top_hdr, " ★ Pinned & Active", fg="#f0c040", + font=FONT_BOLD, bg=PANEL).pack(side=tk.LEFT, padx=6) + self._top_cnt = lbl(top_hdr, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._top_cnt.pack(side=tk.RIGHT, padx=10) + + top_tree_frame = tk.Frame(top_pane, bg=BG) + top_tree_frame.pack(fill=tk.BOTH, expand=True) + self._top_tree = self._make_tree(top_tree_frame) + + # ── Bottom pane: Past Runs ─────────────────────────────────────────── + bot_pane = tk.Frame(pw, bg=BG) + pw.add(bot_pane, minsize=80) + + bot_hdr = tk.Frame(bot_pane, bg=PANEL, height=26) + bot_hdr.pack(fill=tk.X) + bot_hdr.pack_propagate(False) + lbl(bot_hdr, " ☰ Past Runs", fg=MUTED, + font=FONT_BOLD, bg=PANEL).pack(side=tk.LEFT, padx=6) + self._bot_cnt = lbl(bot_hdr, "", fg=MUTED, font=FONT_SMALL, bg=PANEL) + self._bot_cnt.pack(side=tk.RIGHT, padx=10) + + bot_tree_frame = tk.Frame(bot_pane, bg=BG) + bot_tree_frame.pack(fill=tk.BOTH, expand=True) + self._bot_tree = self._make_tree(bot_tree_frame) + + # Keep a ref so _open_run() still works for backward compat + self._tree = self._bot_tree + + lbl(self.root, " Double-click to view logs & resume · Right-click to star/unstar", + fg=MUTED, font=FONT_SMALL).pack(anchor="w", padx=14) + + def _flow_statusbar(self): + self._fsb = tk.Frame(self.root, bg="#0d1a0d", height=22) + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + self._fsb.pack_propagate(False) + self._flow_sv = tk.StringVar(value="") + self._flow_clr = C_RUN + self._flow_msg_lbl = tk.Label( + self._fsb, textvariable=self._flow_sv, + fg=C_RUN, font=FONT_SMALL, bg="#0d1a0d") + self._flow_msg_lbl.pack(side=tk.LEFT, padx=12) + self._fsb.pack_forget() # hidden until a flow is active + + def _statusbar(self): + sb = tk.Frame(self.root, bg=PANEL, height=22) + sb.pack(fill=tk.X, side=tk.BOTTOM) + sb.pack_propagate(False) + self._sv = tk.StringVar(value="Initialising…") + lbl(sb, "", fg=MUTED, font=FONT_SMALL, bg=PANEL, + textvariable=self._sv).pack(side=tk.LEFT, padx=12) + + # ── Poll ───────────────────────────────────────────────────────────────────── + + def _poll_loop(self): + while self._polling: + time.sleep(POLL_SEC) + try: + runs = API.fetch_all_runs() + self.root.after(0, lambda r=runs: self._apply(r)) + except Exception as e: + self.root.after(0, lambda msg=str(e): self._sv.set(f"Poll error: {msg}")) + + def _refresh(self): + self._sv.set("Fetching all runs (paginating)…") + def _bg(): + try: + runs = API.fetch_all_runs() + self.root.after(0, lambda r=runs: self._apply(r)) + except Exception as e: + self.root.after(0, lambda msg=str(e): self._sv.set(f"Error: {msg}")) + threading.Thread(target=_bg, daemon=True).start() + + def _apply(self, runs): + for run in runs: + rid = run.get("id") + new = run.get("status") or "" + old = self._prev_statuses.get(rid) + if old and old != new and is_active(old) and is_done(new): + self._notify(f"Run #{rid} finished", f"{old} → {new}") + self._prev_statuses[rid] = new + + self._runs = runs + self._update_active_badge(runs) + self._repopulate() + now = datetime.now().strftime("%H:%M:%S") + self._last_upd.config(text=f"Updated {now}") + self._sv.set(f"Loaded {len(runs)} run(s) · paginated") + + + + # ── Table ──────────────────────────────────────────────────────────────────── + + def _row_values(self, run): + """Build treeview value tuple for a run.""" + rid = run["id"] + s = run.get("status") or "" + summary = (run.get("summary") or run.get("result") or "").replace("\n", " ") + prs = len(run.get("github_pull_requests") or []) + star = "★" if rid in self._starred else "" + return (star, rid, s, fmt_dt(run.get("created_at")), + summary[:130], prs or "", run.get("source_type") or "") + + def _row_tag(self, run): + rid = run["id"] + s = run.get("status") or "" + if rid in self._starred and is_active(s): return "star_run" + if rid in self._starred: return "starred" + return status_tag(s) + + def _repopulate(self): + filt = self._filt.get() + query = self._svar.get().lower() + + for t in (self._top_tree, self._bot_tree): + for row in t.get_children(): + t.delete(row) + + top_n = bot_n = 0 + for run in self._runs: + rid = run["id"] + s = run.get("status") or "" + summary = (run.get("summary") or run.get("result") or "").replace("\n", " ") + + # Apply filter & search (filter only applies to bottom pane) + if query and query not in str(rid).lower() \ + and query not in s.lower() \ + and query not in summary.lower(): + continue + + starred = rid in self._starred + active = is_active(s) + filt_ok = (filt == "All" or filt.lower() in s.lower()) + + if starred or active: + # Always shown in top pane regardless of filter + self._top_tree.insert("", tk.END, iid=f"t_{rid}", + values=self._row_values(run), + tags=(self._row_tag(run),)) + top_n += 1 + + if not active and filt_ok: + # Past runs go to bottom — starred ones still appear here too (dimmed) + self._bot_tree.insert("", tk.END, iid=f"b_{rid}", + values=self._row_values(run), + tags=(self._row_tag(run),)) + bot_n += 1 + + self._top_cnt.config(text=f"{top_n} shown") + self._bot_cnt.config(text=f"{bot_n} shown") + total = len(self._runs) + self._cnt_lbl.config(text=f"{top_n + bot_n} / {total}") + + def _sort(self, col): + if self._sort_col == col: + self._sort_rev = not self._sort_rev + else: + self._sort_col, self._sort_rev = col, False + key_map = { + "ID": lambda r: r.get("id", 0), + "Status": lambda r: r.get("status") or "", + "Created At": lambda r: r.get("created_at") or "", + "Summary": lambda r: r.get("summary") or "", + "PRs": lambda r: len(r.get("github_pull_requests") or []), + "Source": lambda r: r.get("source_type") or "", + } + self._runs.sort(key=key_map.get(col, lambda r: ""), + reverse=self._sort_rev) + self._repopulate() + + # ── Dialogs ────────────────────────────────────────────────────────────────── + + def _open_create(self): + CreateRunDialog( + self.root, + on_created=lambda _: self._refresh(), + on_flow_runner=self._start_flow_runner) + + def _open_flows(self): + FlowManagerDialog(self.root) + + def _start_flow_runner(self, run_id, steps): + runner = FlowRunner( + self.root, run_id, steps, + on_status=self._on_flow_status) + self._flow_runners[run_id] = runner + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + self._on_flow_status( + f"⛓ Flow attached to run #{run_id} — {len(steps)} steps", C_RUN) + + def _on_flow_status(self, msg, colour): + self._flow_sv.set(f"⛓ {msg}") + self._flow_msg_lbl.config(fg=colour) + self._fsb.pack(fill=tk.X, side=tk.BOTTOM) + # Auto-hide "complete" messages after 8s + if "complete" in msg.lower() or "✅" in msg: + self.root.after(8000, self._maybe_hide_flow_bar) + + def _maybe_hide_flow_bar(self): + if "complete" in self._flow_sv.get().lower() or "✅" in self._flow_sv.get(): + self._fsb.pack_forget() + + def _iid_to_rid(self, iid): + """Strip t_/b_ prefix and return int run id.""" + return int(str(iid).lstrip("tb_").replace("_","")) + + def _open_from_tree(self, tree): + sel = tree.selection() + if not sel: + return + try: + rid = self._iid_to_rid(sel[0]) + except Exception: + return + run = next((r for r in self._runs if r["id"] == rid), None) + if run: + RunDialog(self.root, run, + on_refreshed=self._refresh, + on_start_flow=self._start_flow_runner) + + def _open_run(self): + # Try both trees + for tree in (self._top_tree, self._bot_tree): + sel = tree.selection() + if sel: + self._open_from_tree(tree) + return + + def _open_run_by(self, run): + RunDialog(self.root, run, + on_refreshed=self._refresh, + on_start_flow=self._start_flow_runner) + + def _toggle_star(self, rid): + if rid in self._starred: + self._starred.discard(rid) + else: + self._starred.add(rid) + self._save_stars() + self._repopulate() + + def _load_stars(self): + try: + data = json.loads(self._star_file.read_text(encoding="utf-8")) + return set(data) + except Exception: + return set() + + def _save_stars(self): + try: + self._star_file.write_text( + json.dumps(list(self._starred)), encoding="utf-8") + except Exception: + pass + + def _ctx_menu(self, event): + # Figure out which tree was right-clicked + widget = event.widget + row = widget.identify_row(event.y) + if not row: + return + widget.selection_set(row) + try: + rid = self._iid_to_rid(row) + except Exception: + return + run = next((r for r in self._runs if r["id"] == rid), None) + if not run: + return + starred = rid in self._starred + star_label = "☆ Remove Star" if starred else "★ Star this Run" + m = tk.Menu(self.root, tearoff=0, bg=CARD, fg=TEXT, + activebackground=ACCENT, activeforeground="white", + font=FONT, bd=0) + m.add_command(label="🔍 View / Resume", + command=lambda: self._open_run_by(run)) + m.add_separator() + m.add_command(label=star_label, + command=lambda: self._toggle_star(rid)) + m.add_separator() + if run.get("web_url"): + m.add_command(label="🌐 Open in Browser", + command=lambda: webbrowser.open(run["web_url"])) + m.add_command(label="📋 Copy Run ID", + command=lambda: (self.root.clipboard_clear(), + self.root.clipboard_append(str(rid)), + self._sv.set(f"Copied #{rid}"))) + m.post(event.x_root, event.y_root) + + # ── Notifications ──────────────────────────────────────────────────────────── + + def _notify(self, title, message): + try: + from plyer import notification + notification.notify(title=title, message=message, + app_name="Codegen Manager", timeout=6) + except Exception: + pass + self.root.after(0, lambda: self._toast(title, message)) + + def _toast(self, title, msg): + t = tk.Toplevel(self.root) + t.overrideredirect(True) + t.attributes("-topmost", True) + t.configure(bg=ACCENT) + inner = tk.Frame(t, bg=CARD) + inner.pack(fill=tk.BOTH, expand=True, padx=2, pady=2) + lbl(inner, f"🔔 {title}", fg=HOT, font=FONT_BOLD, bg=CARD + ).pack(anchor="w", padx=14, pady=(10, 2)) + lbl(inner, msg, fg=TEXT, font=FONT, bg=CARD + ).pack(anchor="w", padx=14, pady=(0, 10)) + t.update_idletasks() + sw = self.root.winfo_screenwidth() + sh = self.root.winfo_screenheight() + t.geometry(f"340x74+{sw-356}+{sh-110}") + t.after(5000, t.destroy) + + +# ════════════════════════════════════════════════════════════════════════════════ +# Entry point +# ════════════════════════════════════════════════════════════════════════════════ + +if __name__ == "__main__": + import subprocess, sys + for pkg in ("requests", "plyer"): + try: + __import__(pkg) + except ImportError: + subprocess.check_call([sys.executable, "-m", "pip", + "install", pkg, "-q"]) + root = tk.Tk() + try: + root.iconbitmap(default="") + except Exception: + pass + CodegenManager(root) + try: + root.mainloop() + except KeyboardInterrupt: + pass \ No newline at end of file diff --git a/eversale/README.md b/eversale/README.md index 36bc3521..7286d3ef 100755 --- a/eversale/README.md +++ b/eversale/README.md @@ -165,9 +165,9 @@ Need help? Email support@eversale.io or visit https://eversale.io/desktop | Repo | Path | Description | |------|------|-------------| -| **ev29** | `/mnt/c/ev29/` | Monorepo (web app, CLI, agent backend) | -| **cli** | `/mnt/c/ev29/cli/` | This CLI package (published to npm) | -| **agent-backend** | `/mnt/c/ev29/agent-backend/` | Python servers/workflow runtime; the CLI engine is the single brain (agent-backend shims into `cli/engine/agent`) | +| **eversale** | `eversale/` | Python package (pip install -e .) | +| **engine** | `eversale/engine/` | AI engine core (agent, config, prompts) | +| **agent** | `eversale/engine/agent/` | Core agent modules (~450 files) | ## Infrastructure diff --git a/eversale/engine/__init__.py b/eversale/engine/__init__.py new file mode 100644 index 00000000..8170a4ba --- /dev/null +++ b/eversale/engine/__init__.py @@ -0,0 +1,14 @@ +""" +Eversale Engine - AI Browser Automation Runtime + +This package provides the core AI engine for the Eversale CLI. +""" + +from pathlib import Path + +# Engine directory (this package's location) +ENGINE_DIR = Path(__file__).resolve().parent + +# Version +__version__ = "2.1.218" + diff --git a/eversale/engine/agent/a11y_browser.py b/eversale/engine/agent/a11y_browser.py index bd7af003..cb8988be 100755 --- a/eversale/engine/agent/a11y_browser.py +++ b/eversale/engine/agent/a11y_browser.py @@ -27,6 +27,7 @@ from dataclasses import dataclass, field from playwright.async_api import async_playwright, Page, Browser, BrowserContext, Locator from loguru import logger +from agent.a11y_compat import compat_accessibility_snapshot try: from . import a11y_config as config @@ -1183,7 +1184,7 @@ async def snapshot( # Get accessibility tree from Playwright if config.ENABLE_A11Y_TREE: raw_tree = await asyncio.wait_for( - self.page.accessibility.snapshot(), + compat_accessibility_snapshot(self.page), timeout=config.DEFAULT_TIMEOUT / 1000 ) if raw_tree: diff --git a/eversale/engine/agent/a11y_compat.py b/eversale/engine/agent/a11y_compat.py new file mode 100644 index 00000000..8840e266 --- /dev/null +++ b/eversale/engine/agent/a11y_compat.py @@ -0,0 +1,274 @@ +""" +Accessibility Snapshot Compatibility Layer +========================================== + +Drop-in replacement for the deprecated ``page.accessibility.snapshot()`` API +that was removed in Playwright 1.47+. + +Uses Chrome DevTools Protocol (CDP) ``Accessibility.getFullAXTree`` to build +an identical ``{role, name, children, …}`` dict tree. Falls back to +``page.locator('body').aria_snapshot()`` when CDP is unavailable. + +Usage:: + + from agent.a11y_compat import compat_accessibility_snapshot + + # Replaces: tree = await page.accessibility.snapshot(interesting_only=True) + tree = await compat_accessibility_snapshot(page, interesting_only=True) +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +async def compat_accessibility_snapshot( + page, + interesting_only: bool = True, +) -> Optional[Dict[str, Any]]: + """Return an accessibility tree dict compatible with the old Playwright API. + + Parameters + ---------- + page : playwright.async_api.Page + The Playwright page instance. + interesting_only : bool + When *True* (the default), skip nodes that the browser marks as + "ignored / uninteresting" — mirrors the old ``interesting_only`` + argument. + + Returns + ------- + dict | None + A nested dict with at least ``role``, ``name``, and optionally + ``children``, ``value``, ``checked``, ``disabled``, ``expanded``, + ``selected``, ``level``, ``description``. + """ + # ---------- primary path: CDP ---------- + try: + return await _snapshot_via_cdp(page, interesting_only) + except Exception as exc: + logger.debug("CDP accessibility snapshot failed (%s), trying fallback", exc) + + # ---------- fallback: aria_snapshot (YAML text) ---------- + try: + yaml_str = await page.locator("body").aria_snapshot() + title = "" + try: + title = await page.title() + except Exception: + pass + return { + "role": "RootWebArea", + "name": title, + "children": _parse_aria_yaml(yaml_str), + } + except Exception as exc2: + logger.debug("aria_snapshot fallback failed: %s", exc2) + + # ---------- last-resort: JS DOM walk ---------- + try: + return await _snapshot_via_js(page) + except Exception as exc3: + logger.warning("All accessibility snapshot methods failed: %s", exc3) + return None + + +# ────────────────────────────────────────────────────────────────────── +# CDP-based implementation +# ────────────────────────────────────────────────────────────────────── + +async def _snapshot_via_cdp( + page, + interesting_only: bool, +) -> Optional[Dict[str, Any]]: + """Build the tree using ``Accessibility.getFullAXTree`` over CDP.""" + cdp = await page.context.new_cdp_session(page) + try: + result = await cdp.send("Accessibility.getFullAXTree") + finally: + try: + await cdp.detach() + except Exception: + pass + + nodes = result.get("nodes", []) + if not nodes: + return None + + node_map: Dict[str, Dict[str, Any]] = {} + children_map: Dict[str, List[str]] = {} + root_id: Optional[str] = None + + for n in nodes: + nid = n.get("nodeId") + if nid is None: + continue + + child_ids = n.get("childIds", []) + if child_ids: + children_map[nid] = child_ids + + if interesting_only and n.get("ignored", False): + continue + + role_obj = n.get("role", {}) + name_obj = n.get("name", {}) + + role_val = ( + role_obj.get("value", "none") + if isinstance(role_obj, dict) + else str(role_obj) + ) + name_val = ( + name_obj.get("value", "") + if isinstance(name_obj, dict) + else str(name_obj) + ) + + # Collect properties + props: Dict[str, Any] = {} + for prop in n.get("properties", []): + pname = prop.get("name", "") + pval = prop.get("value", {}) + props[pname] = pval.get("value") if isinstance(pval, dict) else pval + + entry: Dict[str, Any] = {"role": role_val, "name": name_val} + + if props.get("value"): + entry["value"] = props["value"] + if props.get("checked") is not None: + entry["checked"] = props["checked"] + if props.get("disabled"): + entry["disabled"] = True + if props.get("expanded") is not None: + entry["expanded"] = props["expanded"] + if props.get("selected") is not None: + entry["selected"] = props["selected"] + if props.get("level") is not None: + entry["level"] = props["level"] + desc = n.get("description", {}) + if isinstance(desc, dict) and desc.get("value"): + entry["description"] = desc["value"] + + node_map[nid] = entry + if root_id is None: + root_id = nid + + # If root was filtered out, pick first visible node + if root_id is None or root_id not in node_map: + for n in nodes: + nid = n.get("nodeId") + if nid and nid in node_map: + root_id = nid + break + if root_id is None: + return None + + def _build(nid: str, depth: int = 0): + if depth > 25: + return None + node = node_map.get(nid) + if node is None: + # Filtered-out node — pass through its children + cids = children_map.get(nid, []) + flat: list = [] + for cid in cids: + child = _build(cid, depth + 1) + if isinstance(child, list): + flat.extend(child) + elif child is not None: + flat.append(child) + return flat or None + + result = dict(node) + cids = children_map.get(nid, []) + if cids: + children: list = [] + for cid in cids: + child = _build(cid, depth + 1) + if isinstance(child, list): + children.extend(child) + elif child is not None: + children.append(child) + if children: + result["children"] = children + return result + + return _build(root_id) + + +# ────────────────────────────────────────────────────────────────────── +# JS-based fallback +# ────────────────────────────────────────────────────────────────────── + +_JS_WALK = """() => { + const IMPLICIT = { + A: 'link', BUTTON: 'button', INPUT: 'textbox', TEXTAREA: 'textbox', + SELECT: 'combobox', IMG: 'img', H1: 'heading', H2: 'heading', + H3: 'heading', H4: 'heading', H5: 'heading', H6: 'heading', + NAV: 'navigation', MAIN: 'main', HEADER: 'banner', FOOTER: 'contentinfo', + ASIDE: 'complementary', FORM: 'form', TABLE: 'table', UL: 'list', + OL: 'list', LI: 'listitem', P: 'paragraph', + }; + function walk(el, depth) { + if (depth > 12) return null; + const tag = el.tagName; + const role = el.getAttribute('role') || IMPLICIT[tag] || tag.toLowerCase(); + const name = el.getAttribute('aria-label') + || el.getAttribute('alt') + || el.getAttribute('title') + || (el.innerText || '').trim().slice(0, 120) + || ''; + const node = { role, name }; + if (el.tagName === 'INPUT' && el.type === 'checkbox') node.role = 'checkbox'; + if (el.tagName === 'INPUT' && el.type === 'radio') node.role = 'radio'; + const children = []; + for (const c of el.children) { + const ch = walk(c, depth + 1); + if (ch) children.push(ch); + } + if (children.length) node.children = children; + return node; + } + return { role: 'RootWebArea', name: document.title, children: [walk(document.body, 0)] }; +}""" + + +async def _snapshot_via_js(page) -> Optional[Dict[str, Any]]: + """Walk the DOM with JavaScript and infer ARIA roles.""" + return await page.evaluate(_JS_WALK) + + +# ────────────────────────────────────────────────────────────────────── +# Minimal YAML-ish parser for aria_snapshot output +# ────────────────────────────────────────────────────────────────────── + +def _parse_aria_yaml(yaml_str: str) -> List[Dict[str, Any]]: + """Very lightweight parser for the YAML returned by aria_snapshot(). + + Only handles the flat ``- role "name"`` format emitted by Playwright. + This is intentionally minimal — CDP is the primary path. + """ + children: List[Dict[str, Any]] = [] + if not yaml_str: + return children + for line in yaml_str.strip().splitlines(): + line = line.strip() + if not line.startswith("- "): + continue + line = line[2:].strip() + # e.g. heading "Example Domain" [level=1] + parts = line.split('"', 2) + if len(parts) >= 2: + role = parts[0].strip() + name = parts[1].strip() + else: + role = line.split()[0] if line.split() else "unknown" + name = "" + children.append({"role": role.rstrip(":"), "name": name}) + return children + diff --git a/eversale/engine/agent/accessibility_element_finder.py b/eversale/engine/agent/accessibility_element_finder.py index 3d18ef13..cc256d95 100755 --- a/eversale/engine/agent/accessibility_element_finder.py +++ b/eversale/engine/agent/accessibility_element_finder.py @@ -46,6 +46,7 @@ from typing import Any, Dict, List, Optional, Tuple import re from loguru import logger +from agent.a11y_compat import compat_accessibility_snapshot # ============================================================================== @@ -428,9 +429,9 @@ async def _get_snapshot(self, page_or_mcp: Any) -> Any: return None # Otherwise assume it's a Playwright page - elif hasattr(page_or_mcp, 'accessibility'): + elif hasattr(page_or_mcp, 'goto'): try: - return await page_or_mcp.accessibility.snapshot() + return await compat_accessibility_snapshot(page_or_mcp) except Exception as e: logger.error(f"Playwright snapshot failed: {e}") return None diff --git a/eversale/engine/agent/action_templates.py b/eversale/engine/agent/action_templates.py index 20d6b1dd..9d9dd883 100755 --- a/eversale/engine/agent/action_templates.py +++ b/eversale/engine/agent/action_templates.py @@ -216,7 +216,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_scroll", {"direction": "down", "amount": 400}, "Finding more...", wait_after=0.6), ActionStep("playwright_extract_list", {"limit": 100}, "Collecting prospects..."), ], - variables={"query": r'(?:output\s+\d+\s+([^;]+?)\s+(?:URL|prospect)|(?:search|find)\s+["\']?(.+?)["\']?(?:\s+on linkedin|\s*$))'} + variables={"query": r'(?:output\s+\d+\s+([^;]+?)\s+(?:URL|prospect)|(?:search|find)\s+(?:on\s+)?(?:linkedin\s+)?(?:for\s+)?["\']?(.+?)["\']?(?:\s+on linkedin|\s*$))'} ), # Reddit operations @@ -232,7 +232,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_scroll", {"direction": "down", "amount": 800}, "Loading more...", wait_after=0.8), ActionStep("playwright_extract_list", {"limit": 30, "type": "reddit_users"}, "Collecting user profiles..."), ], - variables={"query": r'(?:from\s+([^.\n]+?)\s+talk|about\s+([^.\n]+?)(?:\s*\.|$)|discussing\s+([^.\n]+?)(?:\s*\.|$|,|\s+or\s+)|(?:search|find)\s+["\']?(.+?)["\']?(?:\s+on reddit|\s*$))', "_default": "lead generation"} + variables={"query": r'(?:from\s+([^.\n]+?)\s+talk|about\s+([^.\n]+?)(?:\s*\.|$)|discussing\s+([^.\n]+?)(?:\s*\.|$|,|\s+or\s+)|(?:search|find)\s+(?:on\s+)?(?:reddit\s+)?(?:for\s+)?["\']?(.+?)["\']?(?:\s+on reddit|\s*$))', "_default": "lead generation"} ), ActionTemplate( @@ -257,7 +257,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_navigate", {"url": "https://www.google.com"}, "Navigate to Google"), ActionStep("playwright_snapshot", {}, "Capture search page"), ], - variables={"query": r'(?:search|google|find)\s+(?:for\s+)?["\']?(.+?)["\']?(?:\s+on google|\s*$)'} + variables={"query": r'(?:search|google|find)\s+(?:(?:google|search)\s+)?(?:for\s+)?(?:on\s+google\s+)?["\']?(.+?)["\']?(?:\s+on google|\s*$)'} ), # Zoho Mail @@ -282,7 +282,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_navigate", {"url": "https://www.youtube.com"}, "Navigate to YouTube"), ActionStep("playwright_snapshot", {}, "Capture YouTube state"), ], - variables={"query": r'(?:search|find|watch)\s+["\']?(.+?)["\']?(?:\s+on youtube|\s+video|\s*$)'} + variables={"query": r'(?:search|find|watch)\s+(?:(?:on\s+)?(?:youtube|video)\s+)?(?:for\s+|about\s+)?["\']?(.+?)["\']?(?:\s+on youtube|\s+video|\s*$)'} ), # Twitter/X @@ -295,7 +295,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_navigate", {"url": "https://twitter.com"}, "Navigate to Twitter"), ActionStep("playwright_snapshot", {}, "Capture Twitter state"), ], - variables={"query": r'(?:search|find)\s+["\']?(.+?)["\']?(?:\s+on twitter|\s+on x|\s*$)'} + variables={"query": r'(?:search|find)\s+(?:on\s+)?(?:twitter\s+|x\s+)?(?:for\s+)?["\']?(.+?)["\']?(?:\s+on twitter|\s+on x|\s*$)'} ), # GitHub @@ -308,7 +308,7 @@ def extract_variables(self, prompt: str) -> Dict[str, str]: ActionStep("playwright_navigate", {"url": "https://github.com"}, "Navigate to GitHub"), ActionStep("playwright_snapshot", {}, "Capture GitHub state"), ], - variables={"query": r'(?:search|find|repo)\s+["\']?(.+?)["\']?(?:\s+on github|\s*$)'} + variables={"query": r'(?:search|find|repo)\s+(?:on\s+)?(?:github\s+)?(?:for\s+)?["\']?(.+?)["\']?(?:\s+on github|\s*$)'} ), # Login template (generic) diff --git a/eversale/engine/agent/apply_incremental_changes.py b/eversale/engine/agent/apply_incremental_changes.py index 12144863..64bb9f5d 100755 --- a/eversale/engine/agent/apply_incremental_changes.py +++ b/eversale/engine/agent/apply_incremental_changes.py @@ -4,7 +4,7 @@ """ # Read the file -with open('/mnt/c/ev29/cli/engine/agent/agentic_browser.py', 'r') as f: +with open('engine/agent/agentic_browser.py', 'r') as f: lines = f.readlines() # Find the __init__ method and add snapshot state variables @@ -153,7 +153,7 @@ def _format_incremental_snapshot(self, diff: Dict, title: str, url: str) -> str: # Update docstring for k in range(j + 1, j + 10): if '"""' in lines[k] and 'Get page snapshot' in lines[k]: - lines.insert(k + 2, '\n Args:\n mode: \\'full\\' or \\'incremental\\' (default: uses self._snapshot_mode)\n') + lines.insert(k + 2, '\n Args:\n mode: "full" or "incremental" (default: uses self._snapshot_mode)\n') break # Add mode handling after docstring for k in range(j, j + 20): @@ -200,7 +200,7 @@ def _format_incremental_snapshot(self, diff: Dict, title: str, url: str) -> str: print("WARNING: Could not find snapshot section") # Write back -with open('/mnt/c/ev29/cli/engine/agent/agentic_browser.py', 'w') as f: +with open('engine/agent/agentic_browser.py', 'w') as f: f.writelines(lines) print("\n✓ All changes applied successfully!") diff --git a/eversale/engine/agent/apply_incremental_snapshot_fix.py b/eversale/engine/agent/apply_incremental_snapshot_fix.py index d517ff34..63fa1476 100755 --- a/eversale/engine/agent/apply_incremental_snapshot_fix.py +++ b/eversale/engine/agent/apply_incremental_snapshot_fix.py @@ -6,7 +6,7 @@ import re # Read the file -with open('/mnt/c/ev29/cli/engine/agent/agentic_browser.py', 'r') as f: +with open('engine/agent/agentic_browser.py', 'r') as f: content = f.read() # Find and replace the section @@ -59,7 +59,7 @@ if content != content_new: print("Replacement successful!") # Write back - with open('/mnt/c/ev29/cli/engine/agent/agentic_browser.py', 'w') as f: + with open('engine/agent/agentic_browser.py', 'w') as f: f.write(content_new) print("File updated.") else: diff --git a/eversale/engine/agent/cdp_browser_connector.py b/eversale/engine/agent/cdp_browser_connector.py index 19b81a11..d1bd7c92 100755 --- a/eversale/engine/agent/cdp_browser_connector.py +++ b/eversale/engine/agent/cdp_browser_connector.py @@ -41,6 +41,7 @@ import aiohttp from loguru import logger +from agent.a11y_compat import compat_accessibility_snapshot # Check for Playwright CDP support try: @@ -76,7 +77,7 @@ async def snapshot(self) -> Dict[str, Any]: """Get accessibility-focused snapshot of current page""" try: title = await self.page.title() - acc_tree = await self.page.accessibility.snapshot(interesting_only=True) + acc_tree = await compat_accessibility_snapshot(self.page, interesting_only=True) snapshot_text = self._format_accessibility_tree(acc_tree) return { diff --git a/eversale/engine/agent/config_loader.py b/eversale/engine/agent/config_loader.py index bafd95bc..612c9b98 100755 --- a/eversale/engine/agent/config_loader.py +++ b/eversale/engine/agent/config_loader.py @@ -108,7 +108,7 @@ def load_config() -> Dict[str, Any]: pass else: # Default to eversale.io proxy - config['llm']['base_url'] = os.environ.get('OPENAI_BASE_URL', os.environ.get('ANTHROPIC_BASE_URL', 'https://api.z.ai/api/anthropic')) + config['llm']['base_url'] = os.environ.get('OPENAI_BASE_URL', os.environ.get('ANTHROPIC_BASE_URL', 'https://api.z.ai/api/coding/paas/v4')) return config diff --git a/eversale/engine/agent/coordinate_targeting.py b/eversale/engine/agent/coordinate_targeting.py index 422f66e3..76411cc1 100755 --- a/eversale/engine/agent/coordinate_targeting.py +++ b/eversale/engine/agent/coordinate_targeting.py @@ -18,6 +18,7 @@ from loguru import logger import re import json +from agent.a11y_compat import compat_accessibility_snapshot @dataclass @@ -204,7 +205,7 @@ async def get_snapshot_with_coords(self, page) -> Tuple[str, List[TargetedElemen """Get accessibility snapshot and populate coordinates""" # Get snapshot try: - snapshot = await page.accessibility.snapshot() + snapshot = await compat_accessibility_snapshot(page) snapshot_text = self._serialize_snapshot(snapshot) except Exception as e: logger.warning(f"[COORD_TARGET] Snapshot failed: {e}") diff --git a/eversale/engine/agent/dom_first_browser.py b/eversale/engine/agent/dom_first_browser.py index c8b0134b..7a136b32 100755 --- a/eversale/engine/agent/dom_first_browser.py +++ b/eversale/engine/agent/dom_first_browser.py @@ -37,6 +37,7 @@ from dataclasses import dataclass, field from playwright.async_api import async_playwright, Page, Browser, BrowserContext from loguru import logger +from agent.a11y_compat import compat_accessibility_snapshot # Import stealth configuration if available STEALTH_AVAILABLE = False @@ -379,7 +380,7 @@ async def snapshot(self, force: bool = False) -> SnapshotResult: # Get accessibility tree try: raw_tree = await asyncio.wait_for( - self._page.accessibility.snapshot(), + compat_accessibility_snapshot(self._page), timeout=config.DEFAULT_TIMEOUT / 1000 ) diff --git a/eversale/engine/agent/example_recovery_usage.py b/eversale/engine/agent/example_recovery_usage.py index 9c99db36..18551fc8 100755 --- a/eversale/engine/agent/example_recovery_usage.py +++ b/eversale/engine/agent/example_recovery_usage.py @@ -239,10 +239,9 @@ async def main(): print("\n" + "="*70) print("Examples complete! Check the integration guide for more details:") - print(" /mnt/c/ev29/agent/RECOVERY_SYSTEM_INTEGRATION.md") + print(" See RECOVERY_SYSTEM_INTEGRATION.md in the agent directory") print("="*70 + "\n") if __name__ == "__main__": - asyncio.run(main()) - + asyncio.run(main()) diff --git a/eversale/engine/agent/fast_extract.py b/eversale/engine/agent/fast_extract.py index bf9bab5d..742b2df0 100755 --- a/eversale/engine/agent/fast_extract.py +++ b/eversale/engine/agent/fast_extract.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 """ Eversale Fast Extract - Playwright MCP equivalent. diff --git a/eversale/engine/agent/gpu_llm_client.py b/eversale/engine/agent/gpu_llm_client.py index e6b8a5c9..8c9d7498 100755 --- a/eversale/engine/agent/gpu_llm_client.py +++ b/eversale/engine/agent/gpu_llm_client.py @@ -30,7 +30,7 @@ 'ANTHROPIC_BASE_URL', os.environ.get( 'GPU_LLM_URL', - os.environ.get('SUPPORT_AGENT_LLM_CHAIN_REMOTE_ORIGIN', 'https://api.z.ai/api/anthropic'))) + os.environ.get('SUPPORT_AGENT_LLM_CHAIN_REMOTE_ORIGIN', 'https://api.z.ai/api/coding/paas/v4'))) ) GPU_LLM_TIMEOUT = int(os.environ.get('GPU_LLM_TIMEOUT_MS', '60000')) / 1000 # Convert to seconds @@ -38,12 +38,12 @@ MAX_RETRIES = 5 INITIAL_BACKOFF_SECONDS = 1.0 -# Available models on GPU server - 0000/ui-tars-1.5-7b:latest is best for tool calling +# Available models on GPU server GPU_MODELS = { 'fast': 'glm-5', # Fast and excellent at tool calling 'default': 'glm-5', # Best for tool calling (balanced) 'quality': 'glm-5', # High quality tool calling - 'vision': 'glm-5', # Vision tasks + 'vision': 'glm-4.7v', # Vision tasks } diff --git a/eversale/engine/agent/humanization/fast_track_safety.py b/eversale/engine/agent/humanization/fast_track_safety.py index 8a780738..59321540 100755 --- a/eversale/engine/agent/humanization/fast_track_safety.py +++ b/eversale/engine/agent/humanization/fast_track_safety.py @@ -187,7 +187,7 @@ def add_safe_pattern(self, pattern: str): Add a URL pattern to the safe list. Args: - pattern: Regex pattern (e.g., r'^https://.*\.internal\.company\.com') + pattern: Regex pattern (e.g., r'^https://.*\\.internal\\.company\\.com') """ self.config.safe_patterns.append(pattern) logger.info(f"Added pattern '{pattern}' to FAST_TRACK whitelist") @@ -262,4 +262,4 @@ def enforce_fast_track_safety(url: str, cursor_config, typer_config, scroller_co Automatically disables FAST_TRACK if URL is not whitelisted. """ checker = get_safety_checker() - checker.enforce(url, cursor_config, typer_config, scroller_config) + checker.enforce(url, cursor_config, typer_config, scroller_config) diff --git a/eversale/engine/agent/llm_client.py b/eversale/engine/agent/llm_client.py index 03e09007..e6f4d8d7 100755 --- a/eversale/engine/agent/llm_client.py +++ b/eversale/engine/agent/llm_client.py @@ -244,21 +244,21 @@ def __init__(self, config: Optional[Dict] = None): pass # ============================================================= - # MODEL CONFIGURATION (3 models only) + # MODEL CONFIGURATION # ============================================================= - # 1. 0000/ui-tars-1.5-7b:latest - Primary model for all tasks + function calling - self.main_model = os.getenv('OPENAI_MODEL', '') or os.getenv('EVERSALE_LLM_MODEL', llm_config.get('main_model', '0000/ui-tars-1.5-7b:latest')) + # Primary model for all tasks + function calling (default: glm-5) + self.main_model = os.getenv('OPENAI_MODEL', '') or os.getenv('EVERSALE_LLM_MODEL', llm_config.get('main_model', 'glm-5')) self.fast_model = self.main_model # Same as main - self.tool_calling_model = self.main_model # qwen3 has native function calling + self.tool_calling_model = self.main_model # Same for tool calling - # 2. UI-TARS - Vision only - self.vision_model = llm_config.get('vision_model', '0000/ui-tars-1.5-7b:latest') + # Vision model (default: glm-4.7v) + self.vision_model = os.getenv('OPENAI_MODEL_VISION', '') or llm_config.get('vision_model', 'glm-4.7v') self.web_vision_model = self.vision_model # Same for all vision - # 3. Kimi API - Complex reasoning (external) + # Complex reasoning fallback self.kimi_api_key = os.getenv('KIMI_API_KEY', '') - self.kimi_api_url = llm_config.get('kimi_api_url', 'https://api.moonshot.ai/v1') - self.complex_model = 'moonshot-v1-8k' # Kimi model name + self.kimi_api_url = llm_config.get('kimi_api_url', 'https://api.z.ai/api/coding/paas/v4') + self.complex_model = self.main_model # Settings self.temperature = llm_config.get('temperature', 0.1) diff --git a/eversale/engine/agent/mcp_client.py b/eversale/engine/agent/mcp_client.py index e180be6e..a13d3961 100755 --- a/eversale/engine/agent/mcp_client.py +++ b/eversale/engine/agent/mcp_client.py @@ -123,7 +123,9 @@ def __init__(self, working_dir: str = None): self.servers = {} self.tools = {} self.config = self._load_config() - self._headless_override = None # None = use config, True/False = override + # Check env var from CLI flag propagation, else None = use config + _env_headless = os.environ.get("EVERSALE_HEADLESS") + self._headless_override = True if _env_headless == "1" else None self.agent_network = AgentNetwork() self._mcp_server = None # Internal MCP server instance diff --git a/eversale/engine/agent/output_path.py b/eversale/engine/agent/output_path.py index 2332d485..f56ef8bc 100755 --- a/eversale/engine/agent/output_path.py +++ b/eversale/engine/agent/output_path.py @@ -24,9 +24,6 @@ def get_output_folder() -> Path: system = platform.system() home = Path.home() - # Check if running in WSL (can access Windows folders) - is_wsl = "microsoft" in platform.uname().release.lower() if hasattr(platform.uname(), 'release') else False - # Try Desktop first (most visible to user) desktop_paths = [] @@ -41,19 +38,11 @@ def get_output_folder() -> Path: desktop_paths = [ home / "Desktop", ] - else: # Linux / WSL + else: # Linux desktop_paths = [ home / "Desktop", home / "desktop", # Some distros use lowercase ] - # WSL: also check Windows user folders - if is_wsl: - # Try common Windows user paths via /mnt/c - for user in ["Owner", "User", os.environ.get("USER", "")]: - desktop_paths.extend([ - Path(f"/mnt/c/Users/{user}/Desktop"), - Path(f"/mnt/c/Users/{user}/OneDrive/Desktop"), - ]) # Check Desktop paths for desktop in desktop_paths: @@ -80,10 +69,6 @@ def get_output_folder() -> Path: home / "Downloads", home / "downloads", ] - # WSL: also check Windows Downloads - if is_wsl: - for user in ["Owner", "User", os.environ.get("USER", "")]: - downloads_paths.append(Path(f"/mnt/c/Users/{user}/Downloads")) for downloads in downloads_paths: if downloads.exists() and downloads.is_dir(): @@ -206,10 +191,10 @@ def get_file_location_message(path: Path, row_count: int = 0, appended: bool = F # Make path more readable for users path_str = str(path) - # Shorten WSL paths for readability - if "/mnt/c/Users/" in path_str: - # Convert /mnt/c/Users/Owner/... to C:\Users\Owner\... - path_str = path_str.replace("/mnt/c/", "C:\\").replace("/", "\\") + # Shorten home paths for readability + home_str = str(Path.home()) + if path_str.startswith(home_str): + path_str = "~" + path_str[len(home_str):] return f"{action}{count_msg} to: {path_str}" diff --git a/eversale/engine/agent/playwright_direct.py b/eversale/engine/agent/playwright_direct.py index 025cb53a..a2b5ac71 100755 --- a/eversale/engine/agent/playwright_direct.py +++ b/eversale/engine/agent/playwright_direct.py @@ -105,6 +105,7 @@ def _is_chrome_available() -> bool: from loguru import logger import re from functools import wraps +from agent.a11y_compat import compat_accessibility_snapshot # Import ToolResult for standardized return format try: @@ -3014,7 +3015,7 @@ async def get_accessibility_snapshot(self) -> Dict[str, Any]: await self._ensure_page() # Get accessibility tree snapshot - snapshot = await self.page.accessibility.snapshot() + snapshot = await compat_accessibility_snapshot(self.page) if not snapshot: return {"error": "No accessibility snapshot available"} @@ -3823,7 +3824,7 @@ async def snapshot(self) -> Dict[str, Any]: logger.debug(f"Cloudflare check failed: {cf_check_err}") title = await self.page.title() - acc_tree = await self.page.accessibility.snapshot(interesting_only=True) + acc_tree = await compat_accessibility_snapshot(self.page, interesting_only=True) snapshot_text = self._format_accessibility_tree(acc_tree) summary = await self._summarize_page() @@ -3965,7 +3966,7 @@ async def browser_snapshot(self) -> Dict[str, Any]: self._mmid_elements = {el['mmid']: el for el in elements} # Get accessibility tree - acc_tree = await self.page.accessibility.snapshot(interesting_only=True) + acc_tree = await compat_accessibility_snapshot(self.page, interesting_only=True) # Format for LLM consumption (compact view) formatted_lines = [] @@ -10106,7 +10107,7 @@ async def _try_accessibility_extraction(self, prompt_lower: str) -> Optional[Dic await self._ensure_page() # Get accessibility tree - much faster than full HTML - acc_tree = await self.page.accessibility.snapshot(interesting_only=True) + acc_tree = await compat_accessibility_snapshot(self.page, interesting_only=True) if not acc_tree: return None diff --git a/eversale/engine/agent/selector_fallbacks.py b/eversale/engine/agent/selector_fallbacks.py index 843afb2c..4f6cee31 100755 --- a/eversale/engine/agent/selector_fallbacks.py +++ b/eversale/engine/agent/selector_fallbacks.py @@ -18,8 +18,8 @@ 4. Refs survive page structure changes For details, see: -- /mnt/c/ev29/cli/engine/agent/accessibility_element_finder.py -- /mnt/c/ev29/cli/engine/agent/ACCESSIBILITY_ELEMENT_FINDER_README.md +- engine/agent/accessibility_element_finder.py +- engine/agent/ACCESSIBILITY_ELEMENT_FINDER_README.md """ import warnings diff --git a/eversale/engine/agent/self_healing_selectors.py b/eversale/engine/agent/self_healing_selectors.py index 9a45a8b9..2c72d6a1 100755 --- a/eversale/engine/agent/self_healing_selectors.py +++ b/eversale/engine/agent/self_healing_selectors.py @@ -18,8 +18,8 @@ 4. No healing needed - refs are stable across page changes For details, see: -- /mnt/c/ev29/cli/engine/agent/accessibility_element_finder.py -- /mnt/c/ev29/cli/engine/agent/ACCESSIBILITY_ELEMENT_FINDER_README.md +- engine/agent/accessibility_element_finder.py +- engine/agent/ACCESSIBILITY_ELEMENT_FINDER_README.md """ import warnings diff --git a/eversale/engine/agent/verify_uitars_integration.py b/eversale/engine/agent/verify_uitars_integration.py index 7b676008..e254b936 100755 --- a/eversale/engine/agent/verify_uitars_integration.py +++ b/eversale/engine/agent/verify_uitars_integration.py @@ -110,7 +110,7 @@ def verify_integration(): print("STATUS: WARNING - Integration mostly complete") print("\nReview warnings, but integration should work") print("\nNext steps:") - print(" 1. Test: cd /mnt/c/ev29/cli && node bin/eversale.js \"take a screenshot\"") + print(" 1. Test: eversale \"take a screenshot\"") print(" 2. Look for [UITARS] log messages") print(" 3. Verify screenshot retry on failures") return True @@ -122,8 +122,7 @@ def verify_integration(): print(" - Screenshot context limited to last 5 (auto-prune)") print(" - Enhanced reliability for browser automation") print("\nTest with:") - print(" cd /mnt/c/ev29/cli") - print(" node bin/eversale.js \"take a screenshot and describe it\"") + print(" eversale \"take a screenshot and describe it\"") print("\nMonitor logs for:") print(" [UITARS] Screenshot context management enabled (max 5)") print(" [UITARS] Enhanced browser automation with tiered retry enabled") diff --git a/eversale/engine/agent/verify_uitars_upgrade.py b/eversale/engine/agent/verify_uitars_upgrade.py index 58565a8c..3cceed42 100755 --- a/eversale/engine/agent/verify_uitars_upgrade.py +++ b/eversale/engine/agent/verify_uitars_upgrade.py @@ -68,7 +68,7 @@ def verify_upgrade(): if errors: print("STATUS: FAILED - Upgrade not complete") print("\nTo fix, run:") - print(" cd /mnt/c/ev29/cli/engine/agent") + print(" cd engine/agent") print(" cp captcha_solver.py.backup captcha_solver.py") print(" # Then manually apply changes from UI_TARS_CAPTCHA_UPGRADE.patch") return False diff --git a/eversale/engine/agent/workflow_dsl.py b/eversale/engine/agent/workflow_dsl.py index 5c535a6f..25f627d0 100755 --- a/eversale/engine/agent/workflow_dsl.py +++ b/eversale/engine/agent/workflow_dsl.py @@ -370,7 +370,7 @@ class WorkflowStore: """ Workflow storage manager for agent-backend integration. - Stores workflows in /mnt/c/ev29/agent-backend/memory/workflows/ + Stores workflows in the local memory/workflows/ directory. Compatible with existing memory structure. """ diff --git a/eversale/engine/agent/workspace_paths.py b/eversale/engine/agent/workspace_paths.py index 8fe391b3..101c7285 100755 --- a/eversale/engine/agent/workspace_paths.py +++ b/eversale/engine/agent/workspace_paths.py @@ -1,4 +1,8 @@ -"""Workspace path detection utilities for the Eversale monorepo.""" +"""Workspace path detection utilities for the Eversale package. + +Resolves paths dynamically based on the package installation location. +Works natively on Windows, macOS, and Linux — no WSL paths. +""" from __future__ import annotations @@ -7,44 +11,69 @@ from functools import lru_cache +@lru_cache(maxsize=1) +def get_engine_dir() -> Path: + """Return the absolute path to the engine directory. + + This is the parent of the agent/ directory (i.e. engine/). + """ + return Path(__file__).resolve().parent.parent + + @lru_cache(maxsize=1) def get_workspace_root() -> str: """ - Return the absolute path to the ev29 workspace root. + Return the absolute path to the workspace root. Priority: 1. EVERSALE_WORKSPACE_ROOT environment variable (allows overrides). - 2. Parent directories that contain both `agent-backend/` and `cli/`. - 3. Known defaults (/mnt/c/ev29 or C:/ev29). - 4. The highest parent of this file (repository root fallback). + 2. EVERSALE_ENGINE_DIR environment variable (set by the CLI entry point). + 3. Parent directories that contain both ``agent/`` and ``config/``. + 4. The engine directory (package root fallback). """ - + # 1. Explicit env var override env_root = os.environ.get("EVERSALE_WORKSPACE_ROOT") if env_root: return env_root.rstrip("/\\") - current = Path(__file__).resolve() - parents = [p for p in current.parents] + # 2. Engine dir from CLI entry point + env_engine = os.environ.get("EVERSALE_ENGINE_DIR") + if env_engine: + engine_path = Path(env_engine) + if engine_path.exists(): + return str(engine_path) - for parent in parents: + # 3. Walk up from this file to find the engine root + engine_dir = get_engine_dir() + if (engine_dir / "agent").exists() and (engine_dir / "config").exists(): + return str(engine_dir) + + # 4. Check parent directories for a workspace marker + current = Path(__file__).resolve() + for parent in current.parents: + # Check for the eversale engine layout + if (parent / "agent").exists() and (parent / "config").exists(): + return str(parent) + # Check for a monorepo layout (agent-backend + cli) if (parent / "agent-backend").exists() and (parent / "cli").exists(): return str(parent) - default_candidates = [Path("/mnt/c/ev29"), Path("C:/ev29")] - for candidate in default_candidates: - try: - if candidate.exists(): - return str(candidate) - except OSError: - continue - - # Fallback to the repository root (highest parent) - if parents: - return str(parents[-1]) - - return str(current) + # 5. Fallback to the engine directory + return str(engine_dir) def get_workspace_root_path() -> Path: """Return the workspace root as a Path object.""" return Path(get_workspace_root()).resolve() + + +@lru_cache(maxsize=1) +def get_eversale_home() -> Path: + """Return the eversale home directory (~/.eversale). + + This is where runtime data, logs, cache, and outputs are stored. + """ + home = Path(os.environ.get("EVERSALE_HOME", Path.home() / ".eversale")) + home.mkdir(parents=True, exist_ok=True) + return home + diff --git a/eversale/engine/config/config.yaml b/eversale/engine/config/config.yaml index 61b82a40..8f8473e8 100755 --- a/eversale/engine/config/config.yaml +++ b/eversale/engine/config/config.yaml @@ -8,17 +8,17 @@ fast_mode: verbose: false llm: - mode: local - local_url: https://api.z.ai/api/anthropic - base_url: https://api.z.ai/api/anthropic - remote_url: https://api.z.ai/api/anthropic + mode: remote + local_url: http://localhost:11434 + base_url: https://api.z.ai/api/coding/paas/v4 + remote_url: https://api.z.ai/api/coding/paas/v4 main_model: glm-5 fast_model: glm-5 tool_calling_model: glm-5 - vision_model: glm-5 - web_vision_model: glm-5 + vision_model: glm-4.7v + web_vision_model: glm-4.7v complex_model: glm-5 - kimi_api_url: https://api.z.ai/api/anthropic + kimi_api_url: https://api.z.ai/api/coding/paas/v4 strategic_planner: enabled: true @@ -47,7 +47,7 @@ llm_strategy: visual_targeting: enabled: true mode: auto - model: glm-5 + model: glm-4.7v browser: headless_default: true @@ -97,3 +97,4 @@ safety: - permanently_delete_data - unsubscribe_all destructive_confirmation: true + diff --git a/eversale/engine/run_mcp.py b/eversale/engine/run_mcp.py index 9d26ab2d..5d1c85d7 100755 --- a/eversale/engine/run_mcp.py +++ b/eversale/engine/run_mcp.py @@ -217,29 +217,29 @@ async def get_action(task: str, url: str, title: str, elements: List[str], histo - DONE: [reason] - WAIT_HUMAN: [reason]""" - async with httpx.AsyncClient(timeout=60) as client: - r = await client.post( - "https://irrpfq5xoh5dto-4174.proxy.runpod.net/v1/chat/completions", - headers={"Authorization": f"Bearer {LICENSE_KEY}", "Content-Type": "application/json"}, - json={ - "model": "0000/ui-tars-1.5-7b:latest", - "messages": [{"role": "user", "content": prompt}], - "temperature": 0.1, - "max_tokens": 1000 # Need enough for reasoning + action - } - ) - msg = r.json().get("choices", [{}])[0].get("message", {}) - # qwen3 outputs to reasoning field in thinking mode - reasoning = msg.get("reasoning", "") or msg.get("content", "") - - # Check if we should prioritize DONE detection - check_done = len(history) > 1 and any("click" in h.lower() for h in history) - - # Check if we need to navigate (on blank page) - need_navigate = url == "about:blank" or not url.startswith("http") - - # Extract action from reasoning - return extract_action_from_reasoning(reasoning, has_captcha, check_done, need_navigate) + async with httpx.AsyncClient(timeout=60) as client: + r = await client.post( + "https://irrpfq5xoh5dto-4174.proxy.runpod.net/v1/chat/completions", + headers={"Authorization": f"Bearer {LICENSE_KEY}", "Content-Type": "application/json"}, + json={ + "model": "0000/ui-tars-1.5-7b:latest", + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.1, + "max_tokens": 1000 # Need enough for reasoning + action + } + ) + msg = r.json().get("choices", [{}])[0].get("message", {}) + # qwen3 outputs to reasoning field in thinking mode + reasoning = msg.get("reasoning", "") or msg.get("content", "") + + # Check if we should prioritize DONE detection + check_done = len(history) > 1 and any("click" in h.lower() for h in history) + + # Check if we need to navigate (on blank page) + need_navigate = url == "about:blank" or not url.startswith("http") + + # Extract action from reasoning + return extract_action_from_reasoning(reasoning, has_captcha, check_done, need_navigate) # ============================================================================= diff --git a/eversale/engine/run_simple.py b/eversale/engine/run_simple.py index 6a5a3802..79eedaa4 100755 --- a/eversale/engine/run_simple.py +++ b/eversale/engine/run_simple.py @@ -34,6 +34,7 @@ from agent.accessibility_element_finder import AccessibilityTreeParser, AccessibilityRef from agent.action_templates import find_template, TEMPLATES from loguru import logger +from agent.a11y_compat import compat_accessibility_snapshot # Try to import a11y template executor (graceful fallback if not yet created) try: @@ -92,6 +93,9 @@ def __init__( self.page = None self.browser = None + # Map ref_id -> (role, name) for element resolution + self._ref_map: Dict[str, Dict[str, str]] = {} + async def _init_browser(self): """Initialize Playwright browser.""" if self.browser is not None: @@ -119,23 +123,81 @@ async def _close_browser(self): await self.playwright.stop() async def _get_snapshot(self) -> str: - """Get accessibility snapshot of current page.""" + """Get accessibility snapshot of current page and build ref map. + + Uses Playwright's aria_snapshot() API (v1.49+) which returns a + YAML-like accessibility tree. Falls back to page.accessibility.snapshot() + for older versions. + """ try: - # Get accessibility tree - snapshot = await self.page.accessibility.snapshot() + # Wait briefly for SPA rendering to settle + await asyncio.sleep(0.3) + + # Try modern aria_snapshot() first (Playwright 1.49+) + aria_text = None + try: + aria_text = await self.page.locator('body').aria_snapshot() + except Exception: + pass + + if not aria_text: + # Retry after wait + await asyncio.sleep(1.5) + try: + aria_text = await self.page.locator('body').aria_snapshot() + except Exception: + pass + + if not aria_text: + # Last resort: try deprecated API + try: + snapshot = await compat_accessibility_snapshot(self.page) + refs = self.parser.parse_snapshot(snapshot) + self._ref_map.clear() + for ref in refs: + self._ref_map[ref.ref] = {"role": ref.role, "name": ref.name, "value": ref.value or ""} + lines = [f"- {r.role} \"{r.name}\" [ref={r.ref}]" for r in refs] + return "\n".join(lines) if lines else "(page is empty or still loading)" + except Exception: + return "(page is empty or still loading)" + + # Parse the aria_snapshot YAML text into ref map + self._ref_map.clear() + lines_out = [] + ref_counter = 0 + + import re as _re + for line in aria_text.split('\n'): + stripped = line.strip() + if not stripped or stripped.startswith('/url:') or stripped.startswith('#'): + continue + + # Match patterns like: - role "name" or - role: or - text: content + match = _re.match(r'^-\s+(\w+)(?:\s+"([^"]*)")?(?:\s*:\s*(.*))?', stripped) + if match: + role = match.group(1).lower() + name = match.group(2) or match.group(3) or '' + name = name.strip() - # Parse into refs - refs = self.parser.parse_snapshot(snapshot) + # Skip purely structural/decorative elements + if role in ('text',) and not name: + continue - # Format as markdown (like MCP) - lines = [] - for ref in refs: - lines.append(f"- {ref.role} \"{ref.name}\" [ref={ref.ref}]") + ref_id = f"e{ref_counter}" + ref_counter += 1 + + self._ref_map[ref_id] = { + "role": role, + "name": name, + "value": "", + } + + lines_out.append(f"- {role} \"{name}\" [ref={ref_id}]") - return "\n".join(lines) + return "\n".join(lines_out) if lines_out else "(page is empty or still loading)" except Exception as e: logger.error(f"Failed to get snapshot: {e}") - return "" + return "(snapshot failed)" async def _plan_next_action(self, goal: str, snapshot: str, history: List[str]) -> Dict[str, Any]: """ @@ -154,19 +216,30 @@ async def _plan_next_action(self, goal: str, snapshot: str, history: List[str]) return self._fallback_planner(goal, snapshot) # Build prompt - prompt = f"""You are a browser automation agent. Your goal: {goal} + prompt = f"""You are a fast browser automation agent. Goal: {goal} -Current page state (accessibility tree): +URL: {self.page.url} + +Page elements: {snapshot} -Actions taken so far: -{chr(10).join(history) if history else "None yet"} +History (last 8): +{chr(10).join(history[-8:]) if history else "None"} -What should I do next? Respond with JSON only: -{{"action": "navigate|click|type|wait|extract|done", "target": "ref or URL", "value": "text for type action", "reason": "why"}} +Actions: navigate(target=URL), click(target=ref), type(target=ref, value=text), press(value=key), scroll(value=down/up), wait(value=seconds), extract, done -If goal is complete, use action "done". -""" +CRITICAL RULES: +1. ALWAYS use the exact [ref=...] ID from "Page elements" as target (e.g. "s3e3", "s5e5") +2. NEVER use descriptive names like "email" or "password" as target — use the ref ID +3. Be EFFICIENT — avoid unnecessary wait/extract. Act directly on visible elements +4. For textbox elements: use "type" with the ref and value +5. For buttons/links: use "click" with the ref +6. After filling ALL form fields AND solving captcha, click the submit/sign-in button +7. Only use "done" when the original goal is fully complete (e.g. response retrieved) +8. If an element was "not found", the page may have changed — try a different ref from a fresh snapshot + +JSON only: +{{"action":"...","target":"ref","value":"text if needed","reason":"why"}}""" try: response = await self.llm_client.generate(prompt, temperature=0.1) @@ -231,15 +304,134 @@ def _fallback_planner(self, goal: str, snapshot: str) -> Dict[str, Any]: return {"action": "done", "reason": "No clear next action"} - async def _execute_action(self, action: Dict[str, Any]) -> str: + async def _resolve_element(self, target: str): """ - Execute a single action. + Resolve an element reference to a Playwright locator. - Args: - action: Action dict from planner + Uses the ref map built during _get_snapshot() to convert + synthetic ref IDs (e.g., 'e9') into actual Playwright locators + based on accessibility role + name. - Returns: - Status message + Falls back to: + 1. Role + name based locator (most reliable for SPAs) + 2. Text-based search + 3. CSS selector (if target looks like a selector) + + Returns the first visible, enabled locator or None. + """ + # Check the ref map first + ref_info = self._ref_map.get(target) + if ref_info: + role = ref_info["role"] + name = ref_info["name"] + + # Map accessibility roles to Playwright get_by_role roles + role_map = { + "textbox": "textbox", + "text": "textbox", + "searchbox": "searchbox", + "button": "button", + "link": "link", + "checkbox": "checkbox", + "radio": "radio", + "combobox": "combobox", + "menuitem": "menuitem", + "tab": "tab", + "heading": "heading", + "img": "img", + "dialog": "dialog", + "navigation": "navigation", + "listitem": "listitem", + } + pw_role = role_map.get(role) + + if pw_role and name: + try: + loc = self.page.get_by_role(pw_role, name=name, exact=False) + count = await loc.count() + if count > 0: + logger.debug(f"Resolved {target} via role={pw_role} name='{name}' ({count} match)") + return loc.first + except Exception as e: + logger.debug(f"Role locator failed for {target}: {e}") + + # Fallback: match by name text + if name: + try: + loc = self.page.get_by_text(name, exact=False) + count = await loc.count() + if count > 0: + logger.debug(f"Resolved {target} via text='{name}' ({count} match)") + return loc.first + except Exception as e: + logger.debug(f"Text locator failed for {target}: {e}") + + # Fallback: match by placeholder/label (for inputs) + if role in ("textbox", "searchbox", "text") and name: + try: + loc = self.page.get_by_placeholder(name, exact=False) + count = await loc.count() + if count > 0: + logger.debug(f"Resolved {target} via placeholder='{name}'") + return loc.first + except Exception: + pass + try: + loc = self.page.get_by_label(name, exact=False) + count = await loc.count() + if count > 0: + logger.debug(f"Resolved {target} via label='{name}'") + return loc.first + except Exception: + pass + + # If target is NOT a ref ID, LLM may have sent a name like "email" or "Sign in" + # Search the ref map by name to find a matching element + if not ref_info and target: + target_lower = str(target).lower().strip() + for ref_id, info in self._ref_map.items(): + if info["name"].lower() == target_lower or target_lower in info["name"].lower(): + logger.debug(f"Matched target '{target}' to ref {ref_id} ({info['role']} '{info['name']}')") + return await self._resolve_element(ref_id) # Recurse with the actual ref + + # If target looks like a CSS selector, try it directly + if any(c in str(target) for c in ['#', '.', '[', '>', ' ']): + try: + element = await self.page.query_selector(str(target)) + if element: + logger.debug(f"Resolved {target} via CSS selector") + return element + except Exception: + pass + + # Last resort: try getting element by visible text + if target: + try: + loc = self.page.get_by_text(str(target), exact=False) + count = await loc.count() + if count > 0: + return loc.first + except Exception: + pass + + # Last-last resort: try by placeholder + if target: + try: + loc = self.page.get_by_placeholder(str(target), exact=False) + count = await loc.count() + if count > 0: + return loc.first + except Exception: + pass + + return None + + async def _execute_action(self, action: Dict[str, Any]) -> str: + """ + Execute a single action using accessibility-aware element resolution. + + Uses the ref map from the latest snapshot to find elements via + Playwright's role-based locators instead of fragile CSS selectors. """ action_type = action.get("action", "").lower() target = action.get("target") @@ -247,35 +439,64 @@ async def _execute_action(self, action: Dict[str, Any]) -> str: try: if action_type == "navigate": - await self.page.goto(target, wait_until="domcontentloaded", timeout=10000) + try: + await self.page.goto(target, wait_until="networkidle", timeout=15000) + except Exception: + # Fallback: some SPAs never reach networkidle + try: + await self.page.goto(target, wait_until="domcontentloaded", timeout=10000) + except Exception: + pass + # Extra wait for SPA hydration + await asyncio.sleep(1.0) return f"Navigated to {target}" elif action_type == "click": - # Find element by ref - element = await self.page.query_selector(f'[data-ref="{target}"]') + element = await self._resolve_element(target) if element: - await element.click() + await element.click(timeout=5000) + await asyncio.sleep(0.5) # Wait for response to click return f"Clicked {target}" else: return f"Element not found: {target}" elif action_type == "type": - element = await self.page.query_selector(f'[data-ref="{target}"]') + element = await self._resolve_element(target) if element: - await element.fill(value) + await element.click(timeout=3000) # Focus first + await asyncio.sleep(0.1) + await element.fill(value, timeout=5000) return f"Typed '{value}' into {target}" else: return f"Element not found: {target}" + elif action_type == "press": + key = value or target or "Enter" + await self.page.keyboard.press(key) + return f"Pressed {key}" + + elif action_type == "scroll": + direction = (value or "down").lower() + delta = 400 if direction == "down" else -400 + await self.page.mouse.wheel(0, delta) + await asyncio.sleep(0.3) + return f"Scrolled {direction}" + elif action_type == "wait": wait_time = float(value) if value else 2.0 await asyncio.sleep(wait_time) return f"Waited {wait_time}s" elif action_type == "extract": - # Get page text content content = await self.page.inner_text("body") - return f"Extracted page content ({len(content)} chars)" + # Truncate for history but include useful prefix + preview = content[:500].strip() + return f"Extracted content: {preview}" + + elif action_type == "screenshot": + path = value or "/tmp/eversale_screenshot.png" + await self.page.screenshot(path=path) + return f"Screenshot saved to {path}" elif action_type == "done": return "Task complete" @@ -298,6 +519,8 @@ async def run(self, goal: str) -> AgentResult: AgentResult with success status and details """ logger.info(f"Starting agent with goal: {goal}") + steps = 0 + history = [] try: await self._init_browser() @@ -338,6 +561,7 @@ async def run(self, goal: str) -> AgentResult: logger.info("Using LLM-based planning") history = [] steps = 0 + consecutive_passive = 0 # Track consecutive extract/wait actions while steps < self.max_steps: steps += 1 @@ -346,8 +570,19 @@ async def run(self, goal: str) -> AgentResult: snapshot = await self._get_snapshot() current_url = self.page.url + # Add hint if LLM is stuck in extract/wait loop + extra_hint = "" + if consecutive_passive >= 2: + extra_hint = "\n⚠️ You've done multiple extract/wait actions in a row. The page content is already visible in the elements list above. TAKE ACTION NOW — click a button or type in a field!" + # Plan next action - action = await self._plan_next_action(goal, snapshot, history) + action = await self._plan_next_action(goal, snapshot + extra_hint, history) + + # Track passive vs active actions + if action.get("action") in ("extract", "wait"): + consecutive_passive += 1 + else: + consecutive_passive = 0 # Execute status = await self._execute_action(action) @@ -450,8 +685,8 @@ def parse_args(): def print_banner(): - """Print startup banner.""" - print(""" + r"""Print startup banner.""" + print(r""" _____ _ _ _____ ____ ____ _ _ _____ | ____| | | | ____| _ \/ ___| / \ | | | ____| | _| | | | | _| | |_) \___ \ / _ \ | | | _| @@ -526,12 +761,23 @@ async def main(): from local_server_launcher import ensure_local_server local_url = ensure_local_server() if local_url: - logging.getLogger(__name__).info(f"[run_simple] Local API server active at {local_url}") + logger.info(f"[run_simple] Local API server active at {local_url}") except ImportError: pass args = parse_args() + # Also respect env var from CLI flag propagation + if os.environ.get("EVERSALE_HEADLESS") == "1": + args.headless = True + if os.environ.get("EVERSALE_DEBUG") == "1": + args.verbose = True + if os.environ.get("EVERSALE_MAX_STEPS"): + try: + args.max_steps = int(os.environ["EVERSALE_MAX_STEPS"]) + except ValueError: + pass + # Interactive mode if no goal provided if not args.goal: await run_interactive() diff --git a/eversale/engine/run_ultimate.py b/eversale/engine/run_ultimate.py index e0dae721..2d9ab7c7 100755 --- a/eversale/engine/run_ultimate.py +++ b/eversale/engine/run_ultimate.py @@ -1429,7 +1429,7 @@ async def main(): print("Starting Eversale as external MCP Server (stdio mode)...", file=sys.stderr) print("For Claude Desktop, add to claude_desktop_config.json:", file=sys.stderr) print(' "eversale": {"command": "python", "args": ["run_ultimate.py", "mcp"]}', file=sys.stderr) - server = EversaleMCPServer(headless="--headless" in sys.argv) + server = EversaleMCPServer(headless="--headless" in sys.argv or os.environ.get("EVERSALE_HEADLESS") == "1") await server.run_stdio() return diff --git a/eversale/eversale_cli.py b/eversale/eversale_cli.py new file mode 100644 index 00000000..a615669f --- /dev/null +++ b/eversale/eversale_cli.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +""" +Eversale CLI - Your AI Employee + +Pure Python entry point for the eversale command. +Installed via: pip install -e . + +Usage: + eversale "Task description" # Simple task (run_simple.py) + eversale --ultimate "Complex task" # Ultimate mode (run_ultimate.py) + eversale --help # Show help + eversale --version # Show version +""" + +import sys +import os +import asyncio +from pathlib import Path + + +# ─── Resolve engine directory ───────────────────────────────────────── +# The engine dir is always relative to this file (eversale_cli.py sits +# next to engine/). Works for both `pip install -e .` and direct runs. +ENGINE_DIR = str(Path(__file__).resolve().parent / "engine") +AGENT_DIR = str(Path(ENGINE_DIR) / "agent") + +# Ensure engine is importable (mimics the old Node.js wrapper behavior) +if ENGINE_DIR not in sys.path: + sys.path.insert(0, ENGINE_DIR) + + +def _show_help(): + """Print usage help.""" + print(""" +Eversale CLI - Your AI Employee +================================ + +Usage: + eversale "Task description" Run a task (accessibility-first agent) + eversale --ultimate "Complex task" Run complex task (full orchestration engine) + eversale Interactive mode + +Options: + --ultimate Use the full orchestration engine for complex multi-step tasks + --headless Run browser in headless mode + --max-steps N Maximum steps before giving up (default: 20) + --debug, -d Enable debug output + --version Show version + --help, -h Show this help + +Examples: + eversale "Search Google for AI news" + eversale "Find 10 marketing agencies in Miami" + eversale --ultimate "Research Stripe competitors and create a report" + eversale --headless "Navigate to github.com" + +Environment Variables: + OPENAI_API_KEY API key for LLM provider (e.g. Z.AI) + OPENAI_BASE_URL Base URL for OpenAI-compatible API + OPENAI_MODEL Model name (default: glm-5) + EVERSALE_HOME Home directory (default: ~/.eversale) +""") + + +def _show_version(): + """Print version.""" + print("Eversale CLI v2.1.218 (Python)") + + +def main(): + """Main entry point for the eversale command.""" + args = sys.argv[1:] + + # Quick flags that don't need engine imports + if "--help" in args or "-h" in args: + _show_help() + return + + if "--version" in args: + _show_version() + return + + # ─── Parse and strip known flags ─────────────────────────────── + use_ultimate = False + headless = False + debug = False + max_steps = None + + # Strip --ultimate + if "--ultimate" in args: + use_ultimate = True + args.remove("--ultimate") + + # Strip --headless → set env var so downstream scripts can read it + if "--headless" in args: + headless = True + args.remove("--headless") + + # Strip --debug / -d + if "--debug" in args: + debug = True + args.remove("--debug") + if "-d" in args: + debug = True + args.remove("-d") + + # Strip --max-steps N + if "--max-steps" in args: + idx = args.index("--max-steps") + if idx + 1 < len(args): + try: + max_steps = int(args[idx + 1]) + except ValueError: + pass + args.pop(idx + 1) + args.pop(idx) + + # ─── Propagate flags via environment variables ───────────────── + if headless: + os.environ["EVERSALE_HEADLESS"] = "1" + if debug: + os.environ["EVERSALE_DEBUG"] = "1" + if max_steps is not None: + os.environ["EVERSALE_MAX_STEPS"] = str(max_steps) + + # Reconstruct sys.argv: only the script name + remaining non-flag args + # (which should be the task string) + sys.argv = [sys.argv[0]] + args + + # Ensure EVERSALE_HOME exists + eversale_home = Path(os.environ.get("EVERSALE_HOME", Path.home() / ".eversale")) + eversale_home.mkdir(parents=True, exist_ok=True) + (eversale_home / "logs").mkdir(parents=True, exist_ok=True) + (eversale_home / "outputs").mkdir(parents=True, exist_ok=True) + + # Set ENGINE_DIR env var for runtime file resolution + os.environ["EVERSALE_ENGINE_DIR"] = ENGINE_DIR + os.environ.setdefault("EVERSALE_HOME", str(eversale_home)) + + # Change working directory to ENGINE_DIR so relative paths + # like "config/config.yaml" resolve correctly. + os.chdir(ENGINE_DIR) + + if use_ultimate: + _run_ultimate() + else: + _run_simple() + + +def _run_simple(): + """Delegate to run_simple.py (accessibility-first agent).""" + try: + # Import and run the simple agent + import run_simple + exit_code = asyncio.run(run_simple.main()) + sys.exit(exit_code or 0) + except KeyboardInterrupt: + print("\nInterrupted. Goodbye!") + sys.exit(0) + except Exception as e: + print(f"\nError: {e}") + print("Check ~/.eversale/logs/eversale.log for details") + sys.exit(1) + + +def _run_ultimate(): + """Delegate to run_ultimate.py (full orchestration engine).""" + try: + # Import run_ultimate — it has its own _run_with_clean_shutdown + import run_ultimate + run_ultimate._run_with_clean_shutdown() + except KeyboardInterrupt: + print("\nInterrupted. Goodbye!") + sys.exit(0) + except Exception as e: + print(f"\nError: {e}") + print("Check ~/.eversale/logs/eversale.log for details") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/eversale/pyproject.toml b/eversale/pyproject.toml new file mode 100644 index 00000000..d7e6755d --- /dev/null +++ b/eversale/pyproject.toml @@ -0,0 +1,74 @@ +[build-system] +requires = ["setuptools>=68.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "eversale" +version = "2.1.218" +description = "Agentic browser runtime. AI that controls your browser autonomously - give it any task, it figures out how." +readme = "README.md" +license = {text = "MIT"} +requires-python = ">=3.10" +keywords = ["ai", "agent", "automation", "browser", "cli", "desktop", "sales", "research", "playwright"] + +dependencies = [ + # LLM API client + "httpx>=0.24.0", + "aiohttp>=3.8.0", + # Data models + "pydantic>=2.0.0", + # Browser automation + "playwright>=1.40.0", + "patchright>=1.0.0", + "curl_cffi>=0.5.0", + # Humanization (Bezier curves, etc.) + "scipy>=1.10.0", + "numpy>=1.24.0", + # Scheduling + "apscheduler>=3.10.0", + # Database + "chromadb>=0.4.0", + # Utilities + "pyyaml>=6.0", + "python-dotenv>=1.0.0", + "rich>=13.0.0", + "loguru>=0.7.0", + "psutil>=5.9.0", + "html2text>=2020.1.16", +] + +[project.optional-dependencies] +local = [ + "ollama>=0.1.0", +] + +[project.scripts] +eversale = "eversale_cli:main" + +[tool.setuptools] +py-modules = ["eversale_cli"] + +[tool.setuptools.packages.find] +where = ["."] +include = ["engine*"] + +[tool.setuptools.package-data] +"engine" = [ + "config/*.yaml", + "prompts/*.txt", + "rust_wheels/*", + "*.sh", +] +"engine.agent" = [ + "config/*.yaml", + "*.md", + "*.txt", +] +"engine.ace" = [ + "*.yaml", + "*.md", +] +"engine.mcp_servers" = [ + "*.yaml", +] +