-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
113 lines (94 loc) · 3.43 KB
/
config.py
File metadata and controls
113 lines (94 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import logging
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
# Paths
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
RAW_DIR = DATA_DIR / "raw"
NORMALIZED_DIR = DATA_DIR / "normalized"
REPORTS_DIR = DATA_DIR / "reports"
LOG_DIR = BASE_DIR / "logs"
# Ensure directories exist
for d in (DATA_DIR, RAW_DIR, NORMALIZED_DIR, REPORTS_DIR, LOG_DIR):
d.mkdir(exist_ok=True, parents=True)
# DeepSeek API
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
if not DEEPSEEK_API_KEY:
logging.warning("DEEPSEEK_API_KEY not set. AI analysis will be disabled.")
DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
DEEPSEEK_MAX_TOKENS = int(os.getenv("DEEPSEEK_MAX_TOKENS", "4096"))
DEEPSEEK_TEMPERATURE = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.2"))
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1/chat/completions"
# HTTP settings
HTTP_TIMEOUT = int(os.getenv("HTTP_TIMEOUT", "30"))
MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
USER_AGENT = os.getenv(
"USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
# Database
DB_PATH = os.getenv("DB_PATH", str(DATA_DIR / "hackathons.db"))
# Logging
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
LOG_FILE = os.getenv("LOG_FILE", str(LOG_DIR / "hakethon_hunter.log"))
# Scheduler
SCHEDULER_HOUR = int(os.getenv("SCHEDULER_HOUR", "9"))
SCHEDULER_MINUTE = int(os.getenv("SCHEDULER_MINUTE", "0"))
# Platforms
ENABLED_PLATFORMS = [
p.strip() for p in os.getenv("ENABLED_PLATFORMS", "dorahacks,ethglobal,devpost").split(",")
]
# Playwright
PLAYWRIGHT_HEADLESS = os.getenv("PLAYWRIGHT_HEADLESS", "true").lower() == "true"
# Content detection thresholds
SECTION_KEYWORD_THRESHOLD = 0.6
MIN_SECTION_LENGTH = 50 # characters
# Output formats
DATE_FORMAT = "%Y-%m-%d"
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Unified schema fields
UNIFIED_FIELDS = [
"name",
"url",
"platform",
"description",
"requirements",
"rules",
"prizes",
"timeline",
"technologies",
"eligibility",
"judging_criteria",
"sponsors",
"raw_sections",
]
# Mapping keywords to fields
FIELD_KEYWORDS = {
"requirements": ["build", "must", "submission", "requirement", "submit", "deliverable"],
"rules": ["rule", "guideline", "term", "condition", "policy", "prohibited"],
"prizes": ["$", "prize", "award", "bounty", "reward", "winning", "cash"],
"timeline": ["deadline", "date", "starts", "ends", "schedule", "timeline", "phase"],
"technologies": ["stack", "technology", "must use", "track", "framework", "language", "tool"],
"eligibility": ["eligible", "eligibility", "who can apply", "qualify", "participant"],
"judging_criteria": ["judging", "criteria", "evaluation", "score", "metric"],
"sponsors": ["sponsor", "partner", "supported by", "backed by"],
}
# DeepSeek system prompt
DEEPSEEK_SYSTEM_PROMPT = """You are a strategic hackathon analyst.
Analyze the structured hackathon data below.
Do not hallucinate.
If data is missing or unclear, return "unknown".
Return a JSON object with these exact fields:
- summary (string)
- real_requirements (list of strings)
- hidden_constraints (list of strings)
- difficulty (string: "easy", "medium", "hard")
- opportunity_score (integer 0-10)
- best_strategy (string)
- idea_to_win (string)
- why_people_fail (string)
- relevance_to_ai_agents (integer 0-10)
- risks (string)
Keep responses concise and data-driven."""