-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocessor.py
More file actions
170 lines (134 loc) · 6.54 KB
/
processor.py
File metadata and controls
170 lines (134 loc) · 6.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# processor.py — AI processing layer (OpenRouter/MiniMax)
# Single responsibility: given content (text or image bytes), return structured classification.
# WHY fallback values exist: if AI is down, the prompt is still stored. A record with
# category="Uncategorized" is infinitely better than a lost prompt.
import json
import base64
import httpx
from config import OPENROUTER_API_KEY, OPENROUTER_MODEL
from logger import logger
OPENROUTER_URL = 'https://openrouter.ai/api/v1/chat/completions'
API_TIMEOUT = 30.0 # seconds
MAX_RETRIES = 1
VALID_CATEGORIES = ['Marketing', 'Business Strategy', 'Coding', 'Writing', 'Research']
SYSTEM_PROMPT = """You are a prompt classification assistant. You will receive content that contains an AI prompt. Your job is to:
1. Extract the core prompt text — the actual prompt a user would paste into an AI tool. Strip any surrounding commentary, author attribution, engagement bait, or article filler. If the entire content IS the prompt, return it as-is.
2. Generate a concise, specific title (5-10 words) that describes what the prompt does. Be specific: "LinkedIn Cold Outreach Framework" not "Marketing Prompt".
3. Assign exactly ONE category from: Marketing, Business Strategy, Coding, Writing, Research
4. Generate 1-5 descriptive tags. Tags must be lowercase and hyphenated for multi-word tags. Capture: platform (linkedin, twitter), use case (cold-outreach, brainstorming), target AI model if mentioned (chatgpt, claude, midjourney), and other salient attributes.
Respond ONLY with valid JSON, no markdown code fences, no explanation:
{"title": "...", "prompt_text": "...", "category": "...", "tags": ["...", "..."]}"""
def _build_fallback(raw_content):
"""Fallback values when AI processing fails entirely."""
title = raw_content[:50].strip()
if len(raw_content) > 50:
title += '...'
return {
'title': title,
'prompt_text': raw_content,
'category': 'Uncategorized',
'tags': []
}
def _parse_response(raw_json, raw_content):
"""Parse and validate AI response JSON. Returns dict or fallback."""
try:
# Strip markdown code fences if the model wraps its output
cleaned = raw_json.strip()
if cleaned.startswith('```'):
cleaned = cleaned.split('\n', 1)[1] if '\n' in cleaned else cleaned[3:]
if cleaned.endswith('```'):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
data = json.loads(cleaned)
# Validate required fields, fall back per-field if missing
result = {
'title': data.get('title') or raw_content[:50] + '...',
'prompt_text': data.get('prompt_text') or raw_content,
'category': data.get('category', 'Uncategorized'),
'tags': data.get('tags', [])
}
# Validate category is in allowed list
if result['category'] not in VALID_CATEGORIES:
result['category'] = 'Uncategorized'
# Validate tags are strings
result['tags'] = [str(t).lower().strip() for t in result['tags'] if t][:5]
return result
except (json.JSONDecodeError, TypeError, KeyError) as e:
logger.warning(f"[processor] Failed to parse AI response: {e}")
return _build_fallback(raw_content)
async def process_text(content):
"""Process text content through OpenRouter AI for classification.
Args:
content: The text to classify (raw prompt text or extracted URL content)
Returns:
dict with keys: title, prompt_text, category, tags
Always returns a valid dict — falls back to defaults on any failure.
"""
messages = [
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': content}
]
return await _call_openrouter(messages, content)
async def process_image(image_bytes, content_type='image/png'):
"""Process an image through OpenRouter AI for OCR + classification.
Args:
image_bytes: Raw bytes of the image
content_type: MIME type of the image
Returns:
dict with keys: title, prompt_text, category, tags
"""
b64_image = base64.b64encode(image_bytes).decode('utf-8')
messages = [
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': [
{
'type': 'image_url',
'image_url': {'url': f'data:{content_type};base64,{b64_image}'}
},
{
'type': 'text',
'text': 'Extract all visible text from this screenshot, then classify the AI prompt contained within it.'
}
]}
]
# For OCR fallback, we have no raw text to fall back on
return await _call_openrouter(messages, '[image content — OCR failed]')
async def _call_openrouter(messages, raw_content_for_fallback):
"""Make the actual OpenRouter API call with retry logic.
Returns dict with title, prompt_text, category, tags.
Never raises — always returns a result (real or fallback).
"""
headers = {
'Authorization': f'Bearer {OPENROUTER_API_KEY}',
'Content-Type': 'application/json',
'HTTP-Referer': 'https://promptcorder.local',
'X-Title': 'PromptCorder'
}
payload = {
'model': OPENROUTER_MODEL,
'messages': messages,
'temperature': 0.3, # Low temp for consistent classification
'max_tokens': 2000
}
for attempt in range(MAX_RETRIES + 1):
try:
async with httpx.AsyncClient(timeout=API_TIMEOUT) as client:
response = await client.post(
OPENROUTER_URL,
headers=headers,
json=payload
)
response.raise_for_status()
data = response.json()
ai_content = data['choices'][0]['message']['content']
logger.info(f"[processor] AI response received ({len(ai_content)} chars), attempt {attempt + 1}")
return _parse_response(ai_content, raw_content_for_fallback)
except httpx.TimeoutException:
logger.warning(f"[processor] OpenRouter timeout (attempt {attempt + 1}/{MAX_RETRIES + 1})")
except httpx.HTTPStatusError as e:
logger.warning(f"[processor] OpenRouter HTTP {e.response.status_code} (attempt {attempt + 1})")
except Exception as e:
logger.warning(f"[processor] OpenRouter error: {e} (attempt {attempt + 1})")
# All retries exhausted — return fallback
logger.error("[processor] All retries exhausted. Using fallback values.")
return _build_fallback(raw_content_for_fallback)