feat: update example configurations and remove deprecated files for improved clarity and usability

simon-klk · simon-klk · commit c1fd3cb81e65 · 2026-04-11T18:28:26.000+02:00
diff --git a/.env.example b/.env.example
@@ -1,6 +1,6 @@
 # ── AutoPrompt Core ─────────────────────────────────────
 # This is the only required key for the optimizer loop to run.
-# AutoPrompt does NOT need access to your agent's actual database (Supabase/Postgres) 
-# or Slack. It runs completely isolated using local SQLite for its own logging.
+# AutoPrompt does NOT need access to your agent's actual database
+# It runs completely isolated using local SQLite for its own logging.
 OPENROUTER_API_KEY=your_openrouter_api_key_here
 OPENROUTER_MODEL=deepseek/deepseek-v3.2
diff --git a/README.md b/README.md
@@ -39,23 +39,40 @@ Copy env example and fill out:
 cp .env.example .env
 ```
 
-Validate your config:
+The fastest way to try AutoPrompt is with the multi-turn example — it runs entirely offline, no API key required for the agent itself:
 
 ```bash
-autoprompt validate autoprompt.yaml
+autoprompt run examples/multi_turn/autoprompt.yaml --dry-run
+autoprompt run examples/multi_turn/autoprompt.yaml
 ```
 
-Run in dry-run mode:
+For a real LLM agent (requires `OPENROUTER_API_KEY`):
 
 ```bash
-autoprompt run autoprompt.yaml --dry-run
+autoprompt run examples/simple/autoprompt.yaml
 ```
 
-Execute the optimization loop:
+## Choosing an Adapter
 
-```bash
-autoprompt run autoprompt.yaml
-```
+AutoPrompt connects to your agent via one of three built-in adapters:
+
+| Adapter | When to use | Config field |
+|---|---|---|
+| `python_callable` | Your agent is a Python function in the same repo | `import_path: "my_module:my_function"` |
+| `http` | Your agent runs as an HTTP service | `endpoint: "http://localhost:8000/chat"` |
+| `cli` | Your agent is a CLI tool that reads from stdin | `command: "python my_agent.py"` |
+
+**You need a custom adapter** only when the built-in ones don't fit — for example, if your HTTP endpoint requires JWT authentication or a non-standard request format. Custom adapters subclass `AgentAdapter` from [`autoprompt/adapters/base.py`](autoprompt/adapters/base.py) and implement two methods: `send()` and `health_check()`.
+
+### What your callable needs to accept
+
+For `python_callable`, AutoPrompt calls your function with either:
+- `handle_message(message: str, context: dict)` — single-turn
+- `chat(messages: list[dict], context: dict)` — multi-turn
+
+Return `{"content": "..."}` or just a plain `str`.
+
+For `http`, AutoPrompt POSTs `{"message": "..."}` (or `{"messages": [...]}` for multi-turn) and expects `{"content": "..."}` in the response.
 
 ## Example Config
 
@@ -108,13 +125,8 @@ autoprompt diff autoprompt.yaml
 
 | Example | Description |
 |---|---|
-| [`examples/simple/`](examples/simple/) | Minimal setup with a Python callable adapter |
-| [`examples/multi_turn/`](examples/multi_turn/) | Multi-turn agent with offline mock (no API key needed) |
-| [`examples/ora/`](examples/ora/) | HTTP adapter with custom JWT authentication |
-
-## Creating Custom Adapters
-
-If your agent runs inside Python without an HTTP endpoint, use `python_callable` or write a custom class subclassing `AgentAdapter` (see [`autoprompt/adapters/base.py`](autoprompt/adapters/base.py) and [`examples/ora/adapter.py`](examples/ora/adapter.py) for a pattern).
+| [`examples/multi_turn/`](examples/multi_turn/) | Complete multi-turn agent — runs fully offline, no API key needed for the agent |
+| [`examples/simple/`](examples/simple/) | Minimal single-turn LLM agent using `python_callable` |
 
 ## Development
 
diff --git a/autoprompt.yaml b/autoprompt.yaml
@@ -1,51 +1,49 @@
+# AutoPrompt configuration template
+# Copy this file and adjust it for your agent.
+# All LLM calls go through OpenRouter — set OPENROUTER_API_KEY in .env
+
 agent:
-  adapter: "http"
+  adapter: "python_callable"         # "python_callable" | "http" | "cli"
   name: "My Agent"
   description: "Describe what this agent does and who it serves."
-  endpoint: "http://localhost:8000/chat"
+
+  # python_callable: import path to your handler function
+  import_path: "my_module:handle_message"
+
+  # http: uncomment if your agent runs as an HTTP service
+  # endpoint: "http://localhost:8000/chat"
+
+  # cli: uncomment if your agent is a CLI tool
+  # command: "python my_agent.py"
+
   optimizable:
     - type: system_prompt
-      path: "./prompts/system.md"
-    - type: memory_config
-      path: "./config/memory.yaml"
-    - type: tool_config
-      path: "./config/tools.yaml"
+      path: "./system.md"            # AutoPrompt will read, mutate, and roll back this file
 
 rubric:
-  path: "./rubrics/program.md"
-  scoring_model: "claude-sonnet-4-20250514"
+  path: "./rubric.md"                # Markdown file describing what a good response looks like
+  scoring_model: "${OPENROUTER_MODEL:-deepseek/deepseek-v3.2}"
   score_range: [1.0, 10.0]
   dimensions:
     - name: "helpfulness"
-      weight: 0.3
+      weight: 0.6
     - name: "accuracy"
-      weight: 0.3
-    - name: "tone"
-      weight: 0.2
-    - name: "conciseness"
-      weight: 0.2
+      weight: 0.4
 
 tests:
-  mode: "dynamic"
-  static_suite: "./tests/suite.yaml"
-  generator_model: "claude-sonnet-4-20250514"
-  tests_per_iteration: 10
+  mode: "mix"                        # "static" | "dynamic" | "mix"
+  static_suite: "./tests.yaml"       # your hand-written test cases (required for "static" and "mix")
+  generator_model: "${OPENROUTER_MODEL:-deepseek/deepseek-v3.2}"
+  tests_per_iteration: 6
   categories:
-    - "general_knowledge"
-    - "personal_context"
-    - "task_execution"
+    - "general"
     - "edge_cases"
 
 loop:
   max_iterations: 10
-  budget_limit_usd: 5.00
-  improvement_threshold: 0.05
-  mutation_strategy: "targeted"
-  mutation_model: "claude-sonnet-4-20250514"
-  keep_top_n: 3
+  budget_limit_usd: 2.00
+  mutation_model: "${OPENROUTER_MODEL:-deepseek/deepseek-v3.2}"
+  improvement_threshold: 0.05        # minimum score gain to accept a mutation
 
 logging:
-  backend: "supabase"
-  supabase_url: "${SUPABASE_URL}"
-  supabase_key: "${SUPABASE_KEY}"
-
+  backend: "sqlite"                  # "sqlite" | "jsonl" | "supabase"
diff --git a/examples/simple/agent.py b/examples/simple/agent.py
@@ -0,0 +1,44 @@
+"""
+Minimal single-turn LLM agent for the AutoPrompt simple example.
+
+This is the agent AutoPrompt will optimize. It loads its system prompt
+from system.txt — that file is what gets mutated each iteration.
+
+Run the optimization loop with:
+    autoprompt run examples/simple/autoprompt.yaml
+"""
+
+import os
+from pathlib import Path
+import openai
+
+SYSTEM_PROMPT_PATH = Path(__file__).parent / "system.txt"
+
+_client: openai.OpenAI | None = None
+
+
+def _get_client() -> openai.OpenAI:
+    global _client
+    if _client is None:
+        _client = openai.OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=os.environ["OPENROUTER_API_KEY"],
+        )
+    return _client
+
+
+def health_check() -> bool:
+    return bool(os.getenv("OPENROUTER_API_KEY"))
+
+
+def handle_message(message: str, context: dict | None = None) -> dict:
+    system = SYSTEM_PROMPT_PATH.read_text(encoding="utf-8")
+    response = _get_client().chat.completions.create(
+        model=os.getenv("OPENROUTER_MODEL", "deepseek/deepseek-v3.2"),
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": message},
+        ],
+        max_tokens=512,
+    )
+    return {"content": response.choices[0].message.content}
diff --git a/examples/simple/autoprompt.yaml b/examples/simple/autoprompt.yaml
@@ -1,16 +1,21 @@
 agent:
   adapter: "python_callable"
   name: "Simple Example Agent"
-  description: "A minimal example agent for testing the AutoPrompt optimization loop."
-  import_path: "my_agent.core:handle_message"
+  description: "A minimal single-turn LLM agent. AutoPrompt will optimize its system prompt."
+  import_path: "examples.simple.agent:handle_message"
   optimizable:
     - type: system_prompt
       path: "./system.txt"
 
 rubric:
   path: "./simple-rubric.md"
-  scoring_model: "claude-sonnet-4-20250514"
+  scoring_model: "${OPENROUTER_MODEL:-deepseek/deepseek-v3.2}"
   score_range: [1.0, 10.0]
+  dimensions:
+    - name: "helpfulness"
+      weight: 0.6
+    - name: "clarity"
+      weight: 0.4
 
 tests:
   mode: "static"
@@ -19,7 +24,7 @@ tests:
 loop:
   max_iterations: 3
   budget_limit_usd: 1.0
-  mutation_model: "claude-sonnet-4-20250514"
+  mutation_model: "${OPENROUTER_MODEL:-deepseek/deepseek-v3.2}"
 
 logging:
   backend: "jsonl"
diff --git a/examples/simple/simple-rubric.md b/examples/simple/simple-rubric.md
@@ -0,0 +1,7 @@
+# Evaluation Rubric
+
+## Helpfulness
+Does the response actually answer the question? Is the information correct and useful to the user?
+
+## Clarity
+Is the response easy to understand? Is it appropriately concise — no unnecessary filler, no excessive length?
diff --git a/examples/simple/system.txt b/examples/simple/system.txt
@@ -0,0 +1 @@
+You are a helpful assistant. Answer questions clearly and concisely.
diff --git a/examples/simple/tests.yaml b/examples/simple/tests.yaml
@@ -0,0 +1,20 @@
+tests:
+  - prompt: "What is the capital of France?"
+    category: "factual"
+    expected_behavior: "Correctly states Paris as the capital of France"
+
+  - prompt: "Explain what a neural network is in one or two sentences."
+    category: "explanation"
+    expected_behavior: "Clear, jargon-free explanation accessible to a non-technical reader"
+
+  - prompt: "How do I reverse a list in Python?"
+    category: "coding"
+    expected_behavior: "Shows a working Python example, e.g. list[::-1] or list.reverse()"
+
+  - prompt: "What are three tips for better sleep?"
+    category: "advice"
+    expected_behavior: "Three practical, concrete tips — not vague platitudes"
+
+  - prompt: "Summarize the water cycle in two sentences."
+    category: "explanation"
+    expected_behavior: "Accurate summary covering evaporation, condensation, and precipitation"

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+You are a helpful assistant. Answer questions clearly and concisely.`