diff --git a/scripts/restart.sh b/scripts/restart.sh index 62715a4f..966d0d88 100755 --- a/scripts/restart.sh +++ b/scripts/restart.sh @@ -58,6 +58,29 @@ else echo "No ccbot process running in $TARGET" fi +# Sweep any orphan ccbot processes elsewhere on the host (e.g. another tmux +# server). Multiple instances polling the same bot token compete for updates +# and silently break command routing. +ORPHAN_PIDS=$(pgrep -f '\.venv/bin/ccbot' | grep -vxF "$$" || true) +# Exclude pids inside our target pane's process tree (already handled above) +if [ -n "$ORPHAN_PIDS" ]; then + OWN_TREE=$(pstree -p "$PANE_PID" 2>/dev/null | grep -oE '\([0-9]+\)' | tr -d '()' || true) + for pid in $ORPHAN_PIDS; do + if ! echo "$OWN_TREE" | grep -qx "$pid"; then + echo "Killing orphan ccbot process $pid (outside $TARGET)..." + kill "$pid" 2>/dev/null || true + fi + done + sleep 1 + # Anything still alive — SIGKILL + for pid in $ORPHAN_PIDS; do + if kill -0 "$pid" 2>/dev/null; then + echo "Orphan $pid still alive, SIGKILL..." + kill -9 "$pid" 2>/dev/null || true + fi + done +fi + # Brief pause to let the shell settle sleep 1 diff --git a/src/ccbot/bot.py b/src/ccbot/bot.py index 87ca30b7..3c89dc20 100644 --- a/src/ccbot/bot.py +++ b/src/ccbot/bot.py @@ -277,6 +277,53 @@ async def unbind_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> ) +async def kill_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Kill the tmux window for this topic and delete the topic itself.""" + user = update.effective_user + if not user or not is_user_allowed(user.id): + return + if not update.message: + return + + thread_id = _get_thread_id(update) + if thread_id is None: + await safe_reply(update.message, "❌ This command only works in a topic.") + return + + wid = session_manager.get_window_for_thread(user.id, thread_id) + if wid: + display = session_manager.get_display_name(wid) + w = await tmux_manager.find_window_by_id(wid) + if w: + await tmux_manager.kill_window(w.window_id) + logger.info( + "/kill: killed window %s (user=%d, thread=%d)", + display, + user.id, + thread_id, + ) + session_manager.unbind_thread(user.id, thread_id) + session_manager.purge_window(wid) + await session_manager.remove_session_map_entry(wid) + await clear_topic_state(user.id, thread_id, context.bot, context.user_data) + + chat = update.effective_chat + if chat is None: + await safe_reply(update.message, "✅ Session killed.") + return + + try: + await context.bot.delete_forum_topic( + chat_id=chat.id, message_thread_id=thread_id + ) + except Exception as e: + logger.warning("delete_forum_topic failed: %s", e) + await safe_reply( + update.message, + "✅ Session killed. Couldn't delete topic — close or delete it manually.", + ) + + async def esc_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: """Send Escape key to interrupt Claude.""" user = update.effective_user @@ -434,6 +481,8 @@ async def topic_closed_handler( thread_id, ) session_manager.unbind_thread(user.id, thread_id) + session_manager.purge_window(wid) + await session_manager.remove_session_map_entry(wid) # Clean up all memory state for this topic await clear_topic_state(user.id, thread_id, context.bot, context.user_data) else: @@ -1746,7 +1795,10 @@ async def handle_new_message(msg: NewMessage, bot: Bot) -> None: await clear_interactive_msg(user_id, bot, thread_id) # Skip tool call notifications when CCBOT_SHOW_TOOL_CALLS=false - if not config.show_tool_calls and msg.content_type in ("tool_use", "tool_result"): + if not config.show_tool_calls and msg.content_type in ( + "tool_use", + "tool_result", + ): continue parts = build_response_parts( @@ -1872,6 +1924,7 @@ def create_bot() -> Application: application.add_handler(CommandHandler("history", history_command)) application.add_handler(CommandHandler("screenshot", screenshot_command)) application.add_handler(CommandHandler("esc", esc_command)) + application.add_handler(CommandHandler("kill", kill_command)) application.add_handler(CommandHandler("unbind", unbind_command)) application.add_handler(CommandHandler("usage", usage_command)) application.add_handler(CallbackQueryHandler(callback_handler)) diff --git a/src/ccbot/handlers/message_sender.py b/src/ccbot/handlers/message_sender.py index d54cd139..3c053e2a 100644 --- a/src/ccbot/handlers/message_sender.py +++ b/src/ccbot/handlers/message_sender.py @@ -51,6 +51,29 @@ def _ensure_formatted(text: str) -> str: NO_LINK_PREVIEW = LinkPreviewOptions(is_disabled=True) +# Substrings in Telegram errors that mean the topic/thread is gone +_TOPIC_GONE_MARKERS = ("Topic_id_invalid", "Message thread not found") + + +async def _maybe_cleanup_dead_topic( + chat_id: int, kwargs: dict[str, Any], err: BaseException +) -> None: + """If the error indicates a dead forum topic, tear down the binding. + + Called from send fallbacks after both attempts have failed. Lazy-imports + session_manager to avoid a circular import at module load. + """ + msg = str(err) + if not any(m in msg for m in _TOPIC_GONE_MARKERS): + return + thread_id = kwargs.get("message_thread_id") + if thread_id is None: + return + from ..session import session_manager # lazy: avoid circular import + + await session_manager.cleanup_dead_topic(int(chat_id), int(thread_id)) + + async def send_with_fallback( bot: Bot, chat_id: int, @@ -81,6 +104,7 @@ async def send_with_fallback( raise except Exception as e: logger.error(f"Failed to send message to {chat_id}: {e}") + await _maybe_cleanup_dead_topic(chat_id, kwargs, e) return None @@ -196,3 +220,4 @@ async def safe_send( raise except Exception as e: logger.error(f"Failed to send message to {chat_id}: {e}") + await _maybe_cleanup_dead_topic(chat_id, kwargs, e) diff --git a/src/ccbot/handlers/status_polling.py b/src/ccbot/handlers/status_polling.py index c4de1c6e..1efcce02 100644 --- a/src/ccbot/handlers/status_polling.py +++ b/src/ccbot/handlers/status_polling.py @@ -40,7 +40,10 @@ STATUS_POLL_INTERVAL = 1.0 # seconds - faster response (rate limiting at send layer) # Topic existence probe interval -TOPIC_CHECK_INTERVAL = 60.0 # seconds +TOPIC_CHECK_INTERVAL = 5.0 # seconds + +# Substrings in BadRequest messages that indicate the topic is gone +_TOPIC_GONE_MARKERS = ("Topic_id_invalid", "Message thread not found") async def update_status_message( @@ -138,12 +141,14 @@ async def status_poll_loop(bot: Bot) -> None: message_thread_id=thread_id, ) except BadRequest as e: - if "Topic_id_invalid" in str(e): + if any(m in str(e) for m in _TOPIC_GONE_MARKERS): # Topic deleted — kill window, unbind, and clean up state w = await tmux_manager.find_window_by_id(wid) if w: await tmux_manager.kill_window(w.window_id) session_manager.unbind_thread(user_id, thread_id) + session_manager.purge_window(wid) + await session_manager.remove_session_map_entry(wid) await clear_topic_state(user_id, thread_id, bot) logger.info( "Topic deleted: killed window_id '%s' and " @@ -171,6 +176,8 @@ async def status_poll_loop(bot: Bot) -> None: w = await tmux_manager.find_window_by_id(wid) if not w: session_manager.unbind_thread(user_id, thread_id) + session_manager.purge_window(wid) + await session_manager.remove_session_map_entry(wid) await clear_topic_state(user_id, thread_id, bot) logger.info( "Cleaned up stale binding: user=%d thread=%d window_id=%s", diff --git a/src/ccbot/session.py b/src/ccbot/session.py index 173293b1..19b203b2 100644 --- a/src/ccbot/session.py +++ b/src/ccbot/session.py @@ -708,6 +708,81 @@ async def resolve_session_for_window(self, window_id: str) -> ClaudeSession | No self._save_state() return None + def purge_window(self, window_id: str) -> None: + """Remove all in-memory state for a tmux window after it is killed. + + Drops the window from window_states, window_display_names, and every + user's user_window_offsets, then persists state.json. Thread bindings + are left to the caller (unbind_thread is the inverse op there). + """ + changed = False + if window_id in self.window_states: + del self.window_states[window_id] + changed = True + if window_id in self.window_display_names: + del self.window_display_names[window_id] + changed = True + for uid, offsets in list(self.user_window_offsets.items()): + if window_id in offsets: + del offsets[window_id] + changed = True + if not offsets: + del self.user_window_offsets[uid] + if changed: + self._save_state() + logger.info("Purged window state for %s", window_id) + + async def cleanup_dead_topic(self, chat_id: int, thread_id: int) -> bool: + """Tear down any binding whose chat_id+thread_id matches a dead topic. + + Called from send-failure paths when Telegram returns "Message thread not + found" / "Topic_id_invalid". The 60s topic-existence probe can miss this + in private chats (Telegram returns ok=true for unpinAllForumTopicMessages + on private chats with arbitrary thread_ids). Returns True if any binding + was cleaned up. + """ + cleaned = False + for user_id, t_id, wid in list(self.iter_thread_bindings()): + if t_id != thread_id: + continue + if self.resolve_chat_id(user_id, t_id) != chat_id: + continue + self.unbind_thread(user_id, t_id) + self.purge_window(wid) + await self.remove_session_map_entry(wid) + cleaned = True + logger.info( + "Auto-cleaned dead topic via send error: " + "user=%d thread=%d window_id=%s", + user_id, + t_id, + wid, + ) + return cleaned + + async def remove_session_map_entry(self, window_id: str) -> None: + """Delete the session_map.json entry for a window. + + session_map.json is owned by the Claude Code SessionStart hook, but + the hook does not emit a "session ended" event, so killed windows + leave orphan entries. We rewrite the file atomically. + """ + if not config.session_map_file.exists(): + return + try: + async with aiofiles.open(config.session_map_file, "r") as f: + content = await f.read() + session_map = json.loads(content) + except (json.JSONDecodeError, OSError): + return + + key = f"{config.tmux_session_name}:{window_id}" + if key not in session_map: + return + del session_map[key] + atomic_write_json(config.session_map_file, session_map) + logger.info("Removed session_map entry: %s", key) + # --- User window offset management --- def update_user_window_offset( diff --git a/tests/ccbot/test_kill_command.py b/tests/ccbot/test_kill_command.py new file mode 100644 index 00000000..ed74df8b --- /dev/null +++ b/tests/ccbot/test_kill_command.py @@ -0,0 +1,158 @@ +"""Tests for kill_command — kill tmux window and delete forum topic.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _make_update(user_id: int = 1, thread_id: int = 42) -> MagicMock: + update = MagicMock() + update.effective_user = MagicMock() + update.effective_user.id = user_id + update.message = MagicMock() + update.message.message_thread_id = thread_id + update.effective_chat = MagicMock() + update.effective_chat.id = 100 + update.effective_chat.type = "supergroup" + return update + + +def _make_context() -> MagicMock: + context = MagicMock() + context.bot = AsyncMock() + context.user_data = {} + return context + + +class TestKillCommand: + @pytest.mark.asyncio + async def test_kill_in_bound_topic_kills_window_and_deletes_topic(self): + update = _make_update() + context = _make_context() + + with ( + patch("ccbot.bot.is_user_allowed", return_value=True), + patch("ccbot.bot._get_thread_id", return_value=42), + patch("ccbot.bot.session_manager") as mock_sm, + patch("ccbot.bot.tmux_manager") as mock_tmux, + patch("ccbot.bot.clear_topic_state", new_callable=AsyncMock), + patch("ccbot.bot.safe_reply", new_callable=AsyncMock) as mock_reply, + ): + mock_sm.get_window_for_thread.return_value = "@5" + mock_sm.get_display_name.return_value = "project" + mock_sm.remove_session_map_entry = AsyncMock() + mock_window = MagicMock() + mock_window.window_id = "@5" + mock_tmux.find_window_by_id = AsyncMock(return_value=mock_window) + mock_tmux.kill_window = AsyncMock() + + from ccbot.bot import kill_command + + await kill_command(update, context) + + mock_tmux.kill_window.assert_called_once_with("@5") + mock_sm.unbind_thread.assert_called_once_with(1, 42) + mock_sm.purge_window.assert_called_once_with("@5") + mock_sm.remove_session_map_entry.assert_awaited_once_with("@5") + context.bot.delete_forum_topic.assert_called_once_with( + chat_id=100, message_thread_id=42 + ) + mock_reply.assert_not_called() + + @pytest.mark.asyncio + async def test_kill_in_unbound_topic_still_deletes_topic(self): + update = _make_update() + context = _make_context() + + with ( + patch("ccbot.bot.is_user_allowed", return_value=True), + patch("ccbot.bot._get_thread_id", return_value=42), + patch("ccbot.bot.session_manager") as mock_sm, + patch("ccbot.bot.tmux_manager") as mock_tmux, + patch("ccbot.bot.clear_topic_state", new_callable=AsyncMock), + patch("ccbot.bot.safe_reply", new_callable=AsyncMock), + ): + mock_sm.get_window_for_thread.return_value = None + mock_tmux.kill_window = AsyncMock() + + from ccbot.bot import kill_command + + await kill_command(update, context) + + mock_tmux.kill_window.assert_not_called() + mock_sm.unbind_thread.assert_not_called() + context.bot.delete_forum_topic.assert_called_once_with( + chat_id=100, message_thread_id=42 + ) + + @pytest.mark.asyncio + async def test_kill_outside_topic_replies_with_error(self): + update = _make_update() + context = _make_context() + + with ( + patch("ccbot.bot.is_user_allowed", return_value=True), + patch("ccbot.bot._get_thread_id", return_value=None), + patch("ccbot.bot.session_manager") as mock_sm, + patch("ccbot.bot.tmux_manager") as mock_tmux, + patch("ccbot.bot.safe_reply", new_callable=AsyncMock) as mock_reply, + ): + mock_tmux.kill_window = AsyncMock() + + from ccbot.bot import kill_command + + await kill_command(update, context) + + mock_reply.assert_called_once() + mock_tmux.kill_window.assert_not_called() + mock_sm.unbind_thread.assert_not_called() + context.bot.delete_forum_topic.assert_not_called() + + @pytest.mark.asyncio + async def test_kill_falls_back_to_reply_when_delete_fails(self): + update = _make_update() + context = _make_context() + context.bot.delete_forum_topic = AsyncMock(side_effect=RuntimeError("nope")) + + with ( + patch("ccbot.bot.is_user_allowed", return_value=True), + patch("ccbot.bot._get_thread_id", return_value=42), + patch("ccbot.bot.session_manager") as mock_sm, + patch("ccbot.bot.tmux_manager") as mock_tmux, + patch("ccbot.bot.clear_topic_state", new_callable=AsyncMock), + patch("ccbot.bot.safe_reply", new_callable=AsyncMock) as mock_reply, + ): + mock_sm.get_window_for_thread.return_value = "@5" + mock_sm.get_display_name.return_value = "project" + mock_sm.remove_session_map_entry = AsyncMock() + mock_window = MagicMock() + mock_window.window_id = "@5" + mock_tmux.find_window_by_id = AsyncMock(return_value=mock_window) + mock_tmux.kill_window = AsyncMock() + + from ccbot.bot import kill_command + + await kill_command(update, context) + + mock_tmux.kill_window.assert_called_once_with("@5") + mock_reply.assert_called_once() + + @pytest.mark.asyncio + async def test_kill_rejects_unauthorized_user(self): + update = _make_update() + context = _make_context() + + with ( + patch("ccbot.bot.is_user_allowed", return_value=False), + patch("ccbot.bot.session_manager") as mock_sm, + patch("ccbot.bot.tmux_manager") as mock_tmux, + ): + mock_tmux.kill_window = AsyncMock() + + from ccbot.bot import kill_command + + await kill_command(update, context) + + mock_tmux.kill_window.assert_not_called() + mock_sm.unbind_thread.assert_not_called() + context.bot.delete_forum_topic.assert_not_called()