feat: implement adapter building, rubric path resolution, and new label/calibrate commands for enhanced functionality

simon-klk · simon-klk · commit d56501c2e745 · 2026-04-11T18:57:46.000+02:00
diff --git a/autoprompt/main.py b/autoprompt/main.py
@@ -3,12 +3,20 @@
 import json
 import os
 import typer
+import openai
 from rich import print
 from rich.table import Table
 from rich.console import Console
 
+from autoprompt.adapters.base import AgentAdapter
+from autoprompt.adapters.callable import CallableAdapter
+from autoprompt.adapters.cli import CLIAdapter
+from autoprompt.adapters.http import HttpAdapter
+from autoprompt.core.budget import BudgetTracker
 from autoprompt.core.config import load_config
 from autoprompt.core.runner import Runner
+from autoprompt.pipeline.calibrator import Calibrator
+from autoprompt.pipeline.labeler import generate_labels
 
 import dotenv
 dotenv.load_dotenv()
@@ -17,6 +25,32 @@
 console = Console()
 
 
+def _build_adapter(cfg) -> AgentAdapter:
+    if cfg.agent.adapter == "http":
+        return HttpAdapter(cfg.agent.endpoint)
+    if cfg.agent.adapter == "python_callable":
+        return CallableAdapter(cfg.agent.import_path)
+    if cfg.agent.adapter == "cli":
+        return CLIAdapter(cfg.agent.command)
+    raise ValueError(f"Unknown adapter: {cfg.agent.adapter}")
+
+
+def _resolve_rubric_path(cfg, config_path: str) -> None:
+    config_dir = os.path.dirname(os.path.abspath(config_path)) if config_path else os.getcwd()
+    if not os.path.isabs(cfg.rubric.path):
+        cfg.rubric.path = os.path.join(config_dir, cfg.rubric.path)
+
+
+def _build_openrouter_client() -> openai.AsyncOpenAI:
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        print("[yellow]Warning: OPENROUTER_API_KEY not set. API calls will fail.[/yellow]")
+    return openai.AsyncOpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key=api_key,
+    )
+
+
 @app.command()
 def run(config: str = typer.Argument("autoprompt.yaml", help="Path to config file"), dry_run: bool = False):
     """Start the AutoPrompt optimization loop."""
@@ -169,6 +203,67 @@ def snapshot_lines(snapshot: dict) -> list[str]:
         print(f"[bold red]Error:[/bold red] {e}")
 
 
+@app.command()
+def label(
+    config: str = typer.Argument("autoprompt.yaml", help="Path to config file"),
+    count: int = typer.Option(10, "--count", "-n", help="Number of prompts to generate"),
+    out: str = typer.Option("labels.yaml", "--out", "-o", help="Output labels file"),
+):
+    """Generate prompts, call the real agent, and collect manual scores interactively."""
+    try:
+        cfg = load_config(config)
+        adapter = _build_adapter(cfg)
+        client = _build_openrouter_client()
+        budget = BudgetTracker(cfg.loop.budget_limit_usd)
+        config_dir = os.path.dirname(os.path.abspath(config)) if config else os.getcwd()
+
+        async def _run_label_flow():
+            healthy = await adapter.health_check()
+            if not healthy:
+                print("[bold red]Agent is not responding.[/bold red]")
+                return
+            await generate_labels(
+                config=cfg,
+                adapter=adapter,
+                client=client,
+                budget=budget,
+                output_path=out,
+                count=count,
+                config_dir=config_dir,
+            )
+
+        asyncio.run(_run_label_flow())
+    except Exception as e:
+        import traceback
+        print(f"[bold red]Error:[/bold red] {e}")
+        traceback.print_exc()
+
+
+@app.command()
+def calibrate(
+    config: str = typer.Argument("autoprompt.yaml", help="Path to config file"),
+    labels: str = typer.Option("labels.yaml", "--labels", "-l", help="Path to labels YAML"),
+):
+    """Compare LLM-judge scores against manual labels and print calibration report."""
+    try:
+        cfg = load_config(config)
+        _resolve_rubric_path(cfg, config)
+        client = _build_openrouter_client()
+        budget = BudgetTracker(cfg.loop.budget_limit_usd)
+        calibrator = Calibrator(client, budget)
+
+        async def _run_calibration():
+            results = await calibrator.run(labels, cfg.rubric)
+            calibrator.report(results, cfg.rubric)
+            print(f"[dim]Cost so far: ${budget.current_cost_usd:.4f}[/dim]")
+
+        asyncio.run(_run_calibration())
+    except Exception as e:
+        import traceback
+        print(f"[bold red]Error:[/bold red] {e}")
+        traceback.print_exc()
+
+
 
 if __name__ == "__main__":
     app()