diff --git a/app/api/tools/evaluation.py b/app/api/tools/evaluation.py index 9987c835..7703570a 100644 --- a/app/api/tools/evaluation.py +++ b/app/api/tools/evaluation.py @@ -11,6 +11,9 @@ DEFAULT_SYSTEM_PROMPT, DEFAULT_USER_PROMPT_TEMPLATE, ) +from app.services.ai.simple_workflows.network_school_evaluator import ( + evaluate_user_posts, +) import uuid # Configure logger @@ -145,3 +148,66 @@ async def run_comprehensive_evaluation( status_code=500, detail=f"Failed to run comprehensive evaluation: {str(e)}", ) + + +@router.post("/network-school/{username}") +async def evaluate_network_school_posts( + username: str, + request: Request, + profile: Profile = Depends(verify_profile), +) -> JSONResponse: + """Evaluate Twitter/X posts for Network School alignment. + + This endpoint evaluates a user's recent Twitter/X posts for alignment with + Network School and startup society ideals. It fetches up to 100 recent posts, + scores them using Grok's search capabilities, and returns the top posts with + payout recommendations. + + Args: + username: Twitter/X username (with or without @ symbol) + request: The FastAPI request object + profile: The authenticated user's profile + + Returns: + JSONResponse: Evaluation results including: + - username: Twitter username evaluated + - total_posts_analyzed: Number of posts analyzed + - top_posts: Top 3 posts with scores, reasons, and payouts + - usage_input_tokens: Input tokens used + - usage_output_tokens: Output tokens used + - usage_est_cost: Estimated cost in USD + - citations: List of tweet URLs analyzed + - search_queries: Search queries used by Grok + - raw_openrouter_response: Complete OpenRouter API response + + Raises: + HTTPException: If there's an error during evaluation + """ + try: + logger.info( + f"Network School evaluation request for @{username} from {request.client.host if request.client else 'unknown'} for profile {profile.id}" + ) + + # Run the evaluation + result = await evaluate_user_posts(username) + + logger.info( + f"Network School evaluation completed for @{username}: " + f"{len(result.top_posts)} top posts, " + f"{result.total_posts_analyzed} total analyzed" + ) + + # Convert to dict and exclude raw_openrouter_response for frontend + response_data = result.model_dump(exclude={"raw_openrouter_response"}) + + return JSONResponse(content=response_data) + + except Exception as e: + logger.error( + f"Failed to run Network School evaluation for @{username} (profile {profile.id})", + exc_info=e, + ) + raise HTTPException( + status_code=500, + detail=f"Failed to evaluate Network School posts: {str(e)}", + ) diff --git a/app/services/ai/simple_workflows/__init__.py b/app/services/ai/simple_workflows/__init__.py index 249c7b8b..9c0599e9 100644 --- a/app/services/ai/simple_workflows/__init__.py +++ b/app/services/ai/simple_workflows/__init__.py @@ -25,6 +25,11 @@ generate_dao_tweet, analyze_tweet, ) +from .network_school_evaluator import ( + evaluate_user_posts, + NetworkSchoolEvaluationResult, + PostEvaluation, +) __all__ = [ "evaluate_proposal_strict", @@ -34,4 +39,7 @@ "execute_workflow_stream", "generate_dao_tweet", "analyze_tweet", + "evaluate_user_posts", + "NetworkSchoolEvaluationResult", + "PostEvaluation", ] diff --git a/app/services/ai/simple_workflows/network_school_evaluator.py b/app/services/ai/simple_workflows/network_school_evaluator.py new file mode 100644 index 00000000..41cfee1b --- /dev/null +++ b/app/services/ai/simple_workflows/network_school_evaluator.py @@ -0,0 +1,174 @@ +"""Network School post evaluator. + +Evaluates Twitter/X posts for Network School alignment and quality. +Uses Grok's search capabilities to fetch and score posts. +""" + +import json +from typing import Any, Dict, List, Optional +from pydantic import BaseModel, Field + +from app.lib.logger import configure_logger +from app.services.ai.simple_workflows.evaluation_openrouter_v2 import ( + call_openrouter, +) + +logger = configure_logger(__name__) + + +class PostEvaluation(BaseModel): + """Evaluation result for a single post.""" + + post_url: str = Field(..., description="URL to the tweet") + score: int = Field(..., ge=0, le=100, description="Score from 0-100") + reason: str = Field(..., description="1-sentence explanation of the score") + recommended_payout: str = Field( + ..., description="Recommended payout in USD (e.g., '$100')" + ) + + +class NetworkSchoolEvaluationResult(BaseModel): + """Complete evaluation result.""" + + username: str + total_posts_analyzed: int + top_posts: List[PostEvaluation] = Field(default_factory=list) + usage_input_tokens: Optional[int] = None + usage_output_tokens: Optional[int] = None + raw_response: Optional[str] = None + citations: Optional[List[str]] = Field(default_factory=list) + search_queries: Optional[List[str]] = Field(default_factory=list) + raw_openrouter_response: Optional[Dict[str, Any]] = None + + +EVALUATION_PROMPT = """You are evaluating Twitter/X posts for Network School alignment and quality. + +IMPORTANT: You must analyze ALL 50 posts and then select the top 3. Do not stop after finding 3 good posts. + +Task: +1. Search for and fetch the 50 most recent posts from @{username} +2. Score ALL 50 posts (not just the first few) +3. Return ONLY the top 3 highest-scoring posts + +Scoring Criteria (0-100): +- REJECT slop (low effort, generic content) +- REJECT generic positivity (empty cheerleading) +- REWARD clarity (clear, well-articulated ideas) +- REWARD depth (substantive, thoughtful analysis) +- REWARD persuasion (compelling arguments) +- REWARD real work (evidence of actual building/doing) + +Process: +1. Use the search tool to find 50 recent posts from @{username} +2. Evaluate and score each of the 50 posts individually +3. Sort all 50 posts by score (highest to lowest) +4. Return ONLY the top 3 posts + +Output Format (JSON): +{{ + "total_posts_analyzed": 50, + "top_posts": [ + {{ + "post_url": "Direct link to the tweet", + "score": 0-100, + "reason": "1-sentence explanation", + "recommended_payout": "$100 or $50 or $25 or $10 or $0" + }} + ] +}} + +Payout Calculation: +- 90-100: $100 +- 80-89: $50 +- 70-79: $25 +- 60-69: $10 +- Below 60: $0 + +CRITICAL: You must set total_posts_analyzed to the actual number of posts you evaluated.""" + + +async def evaluate_user_posts(username: str) -> NetworkSchoolEvaluationResult: + """Evaluate recent posts from a Twitter/X user using Grok's search. + + Args: + username: Twitter/X username (with or without @ symbol) + + Returns: + NetworkSchoolEvaluationResult with top posts and scores + """ + # Remove @ if present + username = username.lstrip("@") + + logger.info(f"Starting evaluation for @{username} using Grok search") + + try: + prompt = EVALUATION_PROMPT.format(username=username) + messages = [{"role": "user", "content": prompt}] + + logger.debug(f"Sending evaluation request to Grok for @{username}") + + x_ai_tools = [{"type": "web_search"}, {"type": "x_search"}] + openrouter_response = await call_openrouter( + messages=messages, + model="x-ai/grok-4-fast", + temperature=0.3, + tools=x_ai_tools, + ) + + usage = openrouter_response.get("usage") or {} + usage_input_tokens = usage.get("prompt_tokens") + usage_output_tokens = usage.get("completion_tokens") + choices = openrouter_response.get("choices", []) + if not choices: + raise ValueError("No choices in OpenRouter response") + + first_choice = choices[0] + choice_message = first_choice.get("message") + if not choice_message or not isinstance(choice_message.get("content"), str): + raise ValueError("Invalid message content in OpenRouter response") + + content = choice_message["content"] + logger.debug(f"Raw LLM response for @{username}: {content[:500]}...") + + annotations = choice_message.get("annotations") or [] + citations_list: List[str] = [] + search_queries_list: List[str] = [] + for annotation in annotations: + annotation_type = annotation.get("type") + if annotation_type == "url_citation": + url = (annotation.get("url_citation") or {}).get("url") + if url: + citations_list.append(url) + elif annotation_type == "search_query": + query = (annotation.get("search_query") or {}).get("query") + if query: + search_queries_list.append(query) + + evaluation_data = json.loads(content) + if not isinstance(evaluation_data, dict): + raise ValueError("Evaluation response must be a JSON object") + + evaluation_payload = { + **evaluation_data, + "username": username, + "citations": citations_list, + "search_queries": search_queries_list, + "raw_openrouter_response": openrouter_response, + "usage_input_tokens": usage_input_tokens, + "usage_output_tokens": usage_output_tokens, + "raw_response": content[:1000], + } + + result = NetworkSchoolEvaluationResult(**evaluation_payload) + + logger.info( + f"Evaluation complete for @{username} - " + f"analyzed {result.total_posts_analyzed} posts, " + f"returned {len(result.top_posts)} top posts" + ) + + return result + + except Exception as e: + logger.error(f"Error evaluating posts for @{username}: {str(e)}") + raise diff --git a/test_network_school_eval.py b/test_network_school_eval.py new file mode 100755 index 00000000..b2f4fc86 --- /dev/null +++ b/test_network_school_eval.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""Test script for Network School post evaluator. + +Usage: + python test_network_school_eval.py + +Example: + python test_network_school_eval.py balajis +""" + +import asyncio +import sys +import json +from app.services.ai.simple_workflows.network_school_evaluator import ( + evaluate_user_posts, +) +from app.lib.logger import configure_logger + +logger = configure_logger(__name__) + + +def format_result(result): + """Format the evaluation result for display.""" + output = [] + output.append("=" * 80) + output.append(f"Network School Evaluation for @{result.username}") + output.append("=" * 80) + output.append(f"\nTotal posts analyzed: {result.total_posts_analyzed}") + + # Display usage statistics if available + if result.usage_input_tokens or result.usage_output_tokens: + output.append("\nUsage Statistics:") + if result.usage_input_tokens: + output.append(f" Input tokens: {result.usage_input_tokens:,}") + if result.usage_output_tokens: + output.append(f" Output tokens: {result.usage_output_tokens:,}") + if result.usage_est_cost: + output.append(f" Estimated cost: {result.usage_est_cost}") + + # Display citations and search queries + if result.citations or result.search_queries: + output.append("\nSearch Information:") + if result.search_queries: + output.append(f" Search queries used: {len(result.search_queries)}") + for i, query in enumerate(result.search_queries[:5], 1): + output.append(f" {i}. {query}") + if result.citations: + output.append(f" Sources cited: {len(result.citations)}") + for i, citation in enumerate(result.citations[:10], 1): + output.append(f" {i}. {citation}") + + output.append("") + + if not result.top_posts: + output.append("No posts found or evaluated.") + return "\n".join(output) + + output.append(f"Top {len(result.top_posts)} Posts:\n") + + for i, post in enumerate(result.top_posts, 1): + output.append(f"{'─' * 80}") + output.append(f"#{i} - Score: {post.score}/100") + output.append(f"Recommended Payout: {post.recommended_payout}") + output.append(f"URL: {post.post_url}") + output.append(f"\nReason: {post.reason}") + output.append("") + + output.append("=" * 80) + return "\n".join(output) + + +async def main(): + """Main test function.""" + if len(sys.argv) < 3: + print("Usage: python test_network_school_eval.py ") + print( + "Example: python test_network_school_eval.py balajis evaluation_prompt.txt" + ) + print("\nThe prompt file must include {username} placeholder") + sys.exit(1) + + username = sys.argv[1] + prompt_file = sys.argv[2] + + # Load the evaluation prompt + try: + with open(prompt_file, "r") as f: + evaluation_prompt = f.read() + logger.info(f"Loaded evaluation prompt from {prompt_file}") + except FileNotFoundError: + logger.error(f"Prompt file not found: {prompt_file}") + sys.exit(1) + + # Validate prompt has {username} placeholder + if "{username}" not in evaluation_prompt: + logger.error("Prompt must contain {username} placeholder") + sys.exit(1) + + logger.info(f"Testing Network School evaluator with @{username}") + + try: + # Run evaluation with prompt from file + result = await evaluate_user_posts( + username, evaluation_prompt=evaluation_prompt + ) + + # Display formatted result + print("\n" + format_result(result)) + + # Save evaluation result JSON + output_file = f"network_school_eval_{username}.json" + result_dict = result.model_dump() + + # Extract and save raw OpenRouter response separately + raw_response = result_dict.pop("raw_openrouter_response", None) + + with open(output_file, "w") as f: + json.dump(result_dict, f, indent=2) + + print(f"\nāœ… Evaluation results saved to: {output_file}") + + # Save raw OpenRouter response to separate file + if raw_response: + raw_output_file = f"network_school_eval_{username}_raw_openrouter.json" + with open(raw_output_file, "w") as f: + json.dump(raw_response, f, indent=2, default=str) + print(f"āœ… Raw OpenRouter response saved to: {raw_output_file}") + + # Print citations summary if available + if result.citations: + print(f"\nšŸ“š Found {len(result.citations)} citations (tweet sources)") + print("Check the logs above for full citation details!") + if result.search_queries: + print(f"šŸ” Used {len(result.search_queries)} search queries") + + except Exception as e: + logger.error(f"Error during evaluation: {str(e)}", exc_info=True) + sys.exit(1) + + +if __name__ == "__main__": + asyncio.run(main())