aibtcdev · whoabuddy · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/app/services/ai/simple_workflows/evaluation_openrouter_v2.py b/app/services/ai/simple_workflows/evaluation_openrouter_v2.py
@@ -527,7 +527,7 @@ async def evaluate_proposal_openrouter(
     model: Optional[str] = None,
     temperature: Optional[float] = None,
     reasoning: Optional[bool] = None,
-) -> Optional[EvaluationOutput]:
+) -> Optional[Dict[str, Any]]:
     """
     Evaluate a proposal using OpenRouter and Grok prompts.
 
@@ -537,7 +537,7 @@ async def evaluate_proposal_openrouter(
         temperature: Generation temperature.
 
     Returns:
-        Parsed EvaluationOutput or None if evaluation fails.
+        Dict with evaluation output and captured prompts, or None if evaluation fails.
     """
     try:
         # parse the uuid
@@ -706,7 +706,12 @@ async def evaluate_proposal_openrouter(
 
             logger.info(f"Successfully evaluated proposal {proposal_id}")
 
-            return evaluation_output
+            return {
+                "evaluation_output": evaluation_output.model_dump(),
+                "full_system_prompt": system_prompt,
+                "full_user_prompt": formatted_user_content,
+                "full_messages": messages,
+            }
 
         except json.JSONDecodeError as e:
             logger.error(f"JSON decode error: {e}")

diff --git a/eval_viewer_v2.html b/eval_viewer_v2.html
@@ -258,6 +258,9 @@ <h1 class="text-3xl font-bold text-gray-900">Eval Viewer V2</h1>
               expected_decision: item.expected_decision || null,
               mismatch: false,
               usage: { input_tokens: "0", output_tokens: "0", est_cost: "$0.000000" },
+              full_system_prompt: item.full_system_prompt || "N/A",
+              full_user_prompt: item.full_user_prompt || "N/A",
+              full_messages: item.full_messages || [],
             };
           }
 
@@ -285,6 +288,9 @@ <h1 class="text-3xl font-bold text-gray-900">Eval Viewer V2</h1>
               output_tokens: evalOut.usage_output_tokens || "0",
               est_cost: evalOut.usage_est_cost || "$0.000000",
             },
+            full_system_prompt: item.full_system_prompt || "N/A",
+            full_user_prompt: item.full_user_prompt || "N/A",
+            full_messages: item.full_messages || [],
           };
         });
 
@@ -692,6 +698,25 @@ <h3 class="font-bold mb-2">Category Averages</h3>
         `;
         details.appendChild(usageInfo);
 
+        // Prompt info (mirroring original viewer)
+        const promptInfo = document.createElement("div");
+        promptInfo.className = "space-y-2 mb-4";
+        promptInfo.innerHTML = `
+          <details class="bg-gray-50 p-3 rounded-md">
+            <summary class="font-medium text-gray-700 cursor-pointer">Full System Prompt</summary>
+            <pre class="mt-2 text-sm text-gray-600 whitespace-pre-wrap overflow-x-auto">${escapeHtml(prop.full_system_prompt)}</pre>
+          </details>
+          <details class="bg-gray-50 p-3 rounded-md">
+            <summary class="font-medium text-gray-700 cursor-pointer">Full User Prompt</summary>
+            <pre class="mt-2 text-sm text-gray-600 whitespace-pre-wrap overflow-x-auto">${escapeHtml(prop.full_user_prompt)}</pre>
+          </details>
+          <details class="bg-gray-50 p-3 rounded-md">
+            <summary class="font-medium text-gray-700 cursor-pointer">Full LLM Messages</summary>
+            <pre class="mt-2 text-sm text-gray-600 whitespace-pre-wrap overflow-x-auto">${escapeHtml(JSON.stringify(prop.full_messages, null, 2))}</pre>
+          </details>
+        `;
+        details.appendChild(promptInfo);
+
         // Categories table
         details.innerHTML += `
           <div class="border-t border-gray-200 pt-4 overflow-x-auto">

diff --git a/scripts/generate_evals_manifest.py b/scripts/generate_evals_manifest.py
@@ -1,29 +1,44 @@
 #!/usr/bin/env python3
+
+import os
+
 """
 Utility script to generate or update evals-manifest.json based on contents of ./evals/.
 Scans for files matching *_summary.json and creates a manifest with path and name (timestamp).
 """
 
 import json
-import os
 from datetime import datetime
 
+import re
+
 
 def generate_manifest(evals_dir="./evals", manifest_path="./evals/evals-manifest.json"):
-    """Generate manifest from JSON files in evals_dir."""
+    """Generate manifest from JSON files in evals_dir matching new pattern."""
+    ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    evals_dir = os.path.join(ROOT_DIR, evals_dir)
+    manifest_path = os.path.join(ROOT_DIR, manifest_path)
     manifest = []
+    timestamp_pattern = re.compile(
+        r"^(\d{8}_\d{6})_summary\.json$"
+    )  # Matches YYYYMMDD_HHMMSS_summary.json
+
     for filename in os.listdir(evals_dir):
-        if filename.endswith("_summary.json"):
-            timestamp_str = filename.split("_")[0]  # Extract YYYYMMDD_HHMMSS
+        match = timestamp_pattern.match(filename)
+        if match:
+            timestamp_str = match.group(1)  # e.g., 20251118_160840
             try:
-                timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
-                name = timestamp.strftime("%Y-%m-%d %H:%M:%S")
+                # Parse YYYYMMDD_HHMMSS
+                dt = datetime.strptime(timestamp_str.replace("_", ""), "%Y%m%d%H%M%S")
+                name = dt.strftime("%Y-%m-%d %H:%M:%S")  # Display format
             except ValueError:
                 name = filename
             manifest.append({"path": f"./evals/{filename}", "name": name})
 
-    # Sort by timestamp descending
-    manifest.sort(key=lambda x: x["name"], reverse=True)
+    # Sort by parsed datetime descending
+    manifest.sort(
+        key=lambda x: datetime.strptime(x["name"], "%Y-%m-%d %H:%M:%S"), reverse=True
+    )
 
     os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
     with open(manifest_path, "w") as f: