-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathstart_bots.py
More file actions
315 lines (259 loc) · 9.6 KB
/
start_bots.py
File metadata and controls
315 lines (259 loc) · 9.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
#!/usr/bin/env python3
"""
Bot launcher with built-in monitoring.
Starts both Sarah and Atlas, monitors them, and restarts on request.
Usage:
python start_bots.py # Start and monitor both bots
python start_bots.py sarah # Start and monitor only Sarah
python start_bots.py atlas # Start and monitor only Atlas
"""
import os
import sys
import subprocess
import time
import signal
import atexit
from pathlib import Path
from datetime import datetime
SIBLINGS_DIR = Path(__file__).parent
PID_DIR = SIBLINGS_DIR / ".pids"
PID_DIR.mkdir(exist_ok=True)
RESTART_REQUEST_DIR = SIBLINGS_DIR / ".restart_requests"
RESTART_REQUEST_DIR.mkdir(exist_ok=True)
LAUNCHER_LOCK = PID_DIR / "launcher.lock"
# Safety limits
MAX_RESTARTS_PER_HOUR = 5
MONITOR_INTERVAL = 3 # seconds between checks
# Global lock file handle
_lock_fd = None
def acquire_lock() -> bool:
"""Acquire launcher lock to prevent double launches."""
global _lock_fd
if sys.platform != "win32":
return True
import msvcrt
try:
if not LAUNCHER_LOCK.exists():
LAUNCHER_LOCK.write_text("init")
_lock_fd = open(LAUNCHER_LOCK, 'r+')
msvcrt.locking(_lock_fd.fileno(), msvcrt.LK_NBLCK, 10)
_lock_fd.seek(0)
_lock_fd.write(f"{os.getpid()}\n{time.time()}")
_lock_fd.flush()
return True
except (IOError, OSError):
return False
def release_lock():
"""Release launcher lock."""
global _lock_fd
if _lock_fd is None:
return
try:
if sys.platform == "win32":
import msvcrt
try:
msvcrt.locking(_lock_fd.fileno(), msvcrt.LK_UNLCK, 10)
except Exception:
pass
_lock_fd.close()
_lock_fd = None
except Exception:
pass
def kill_process(pid: int):
"""Kill a process by PID."""
try:
subprocess.run(["taskkill", "/F", "/PID", str(pid)],
capture_output=True, check=False)
except Exception:
pass
def is_process_running(pid: int) -> bool:
"""Check if a process is running."""
try:
result = subprocess.run(
["tasklist", "/FI", f"PID eq {pid}"],
capture_output=True, text=True
)
return str(pid) in result.stdout
except Exception:
return False
def kill_existing_bots():
"""Kill any existing bot processes."""
# Kill by PID files
for name in ["sarah", "atlas"]:
pid_file = PID_DIR / f"{name}.pid"
if pid_file.exists():
try:
pid = int(pid_file.read_text().strip())
if is_process_running(pid):
print(f" Killing existing {name} (PID {pid})...")
kill_process(pid)
except (ValueError, FileNotFoundError):
pass
pid_file.unlink(missing_ok=True)
# Kill orphan bot.main processes
try:
result = subprocess.run(
["wmic", "process", "where", "name='python.exe'",
"get", "processid,commandline"],
capture_output=True, text=True
)
for line in result.stdout.split('\n'):
if 'bot.main' in line:
parts = line.strip().split()
if parts:
try:
pid = int(parts[-1])
print(f" Killing orphan bot (PID {pid})...")
kill_process(pid)
except ValueError:
pass
except Exception:
pass
time.sleep(2)
def start_bot(name: str) -> subprocess.Popen:
"""Start a bot and return the Popen object."""
bot_dir = SIBLINGS_DIR / name
venv_python = SIBLINGS_DIR / ".venv" / "Scripts" / "python.exe"
# Clean environment (no inherited token, no CLAUDECODE)
clean_env = os.environ.copy()
clean_env.pop('TELEGRAM_BOT_TOKEN', None)
clean_env.pop('CLAUDECODE', None)
process = subprocess.Popen(
[str(venv_python), "-m", "bot.main"],
cwd=str(bot_dir),
env=clean_env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP
)
# Save PID
pid_file = PID_DIR / f"{name}.pid"
pid_file.write_text(str(process.pid))
return process
def check_restart_request(name: str) -> bool:
"""Check if there's a restart request for this bot."""
# Check multiple possible request files
request_files = [
RESTART_REQUEST_DIR / f"{name}.request",
RESTART_REQUEST_DIR / f"{name}_restart.json",
]
for f in request_files:
if f.exists():
try:
content = f.read_text(encoding='utf-8').strip()
print(f" Restart request found: {content[:100]}")
except Exception:
pass
f.unlink(missing_ok=True)
return True
return False
def clear_restart_request(name: str):
"""Remove any restart request files for this bot."""
for f in RESTART_REQUEST_DIR.glob(f"{name}*"):
f.unlink(missing_ok=True)
def monitor_bots(bots: dict):
"""
Monitor running bots and restart them when requested.
bots: dict of {name: subprocess.Popen}
"""
restart_counts = {name: [] for name in bots} # timestamps of restarts
print(f"\n[Monitor] Watching {', '.join(bots.keys())}... (Ctrl+C to stop)")
print(f"[Monitor] Checking every {MONITOR_INTERVAL}s")
try:
while True:
time.sleep(MONITOR_INTERVAL)
for name in list(bots.keys()):
proc = bots[name]
now = time.time()
ts = datetime.now().strftime("%H:%M:%S")
# Check if process is still running
ret = proc.poll()
if ret is not None:
# Process exited — check if restart was requested
if check_restart_request(name):
hour_ago = now - 3600
restart_counts[name] = [t for t in restart_counts[name] if t > hour_ago]
if len(restart_counts[name]) >= MAX_RESTARTS_PER_HOUR:
print(f"[{ts}] ⚠ {name} hit restart limit ({MAX_RESTARTS_PER_HOUR}/hour). Skipping.")
continue
print(f"[{ts}] 🔄 {name} exited (restart requested). Restarting...")
time.sleep(2)
new_proc = start_bot(name)
bots[name] = new_proc
restart_counts[name].append(now)
print(f"[{ts}] ✅ {name} restarted (PID {new_proc.pid})")
else:
print(f"[{ts}] ❌ {name} exited unexpectedly (code={ret})")
print(f"[{ts}] Not restarting (no request file found)")
else:
# Process is still running — check for cross-bot restart request
# (e.g. Sarah wrote atlas.request while Atlas is still alive)
if check_restart_request(name):
hour_ago = now - 3600
restart_counts[name] = [t for t in restart_counts[name] if t > hour_ago]
if len(restart_counts[name]) >= MAX_RESTARTS_PER_HOUR:
print(f"[{ts}] ⚠ {name} hit restart limit. Skipping.")
continue
print(f"[{ts}] 🔄 {name} restart requested (cross-bot). Killing and restarting...")
kill_process(proc.pid)
proc.wait(timeout=10)
time.sleep(2)
new_proc = start_bot(name)
bots[name] = new_proc
restart_counts[name].append(now)
print(f"[{ts}] ✅ {name} restarted (PID {new_proc.pid})")
except KeyboardInterrupt:
print("\n[Monitor] Stopping...")
def main():
# Parse args
target = None
if len(sys.argv) > 1:
target = sys.argv[1].lower()
if target not in ("sarah", "atlas"):
print(f"Unknown bot: {target}")
print("Usage: python start_bots.py [sarah|atlas]")
sys.exit(1)
# Acquire lock
if not acquire_lock():
print("ERROR: Another launcher is already running!")
sys.exit(1)
atexit.register(release_lock)
# Find venv
venv_python = SIBLINGS_DIR / ".venv" / "Scripts" / "python.exe"
if not venv_python.exists():
print(f"ERROR: Virtual environment not found at {venv_python}")
sys.exit(1)
print("=" * 50)
print("Siblings Bot Launcher + Monitor")
print("=" * 50)
# Kill existing
print("\nCleaning up existing processes...")
kill_existing_bots()
# Clear old restart requests
if target:
clear_restart_request(target)
else:
clear_restart_request("sarah")
clear_restart_request("atlas")
# Start bots
print("\nStarting bots...")
bots = {}
if target is None or target == "sarah":
proc = start_bot("sarah")
bots["sarah"] = proc
print(f"Sarah started (PID {proc.pid})")
if target is None or target == "atlas":
time.sleep(1)
proc = start_bot("atlas")
bots["atlas"] = proc
print(f"Atlas started (PID {proc.pid})")
print("\n" + "=" * 50)
for name, proc in bots.items():
print(f"{name.capitalize()} PID: {proc.pid}")
print("=" * 50)
print("\nMonitoring bots (Ctrl+C to stop)...")
print("To stop bots: python stop_bots.py\n")
# Enter monitor loop
monitor_bots(bots)
if __name__ == "__main__":
main()