From a27fbb80e58b505775a9be284d052392b4e3e4e6 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 14 Jun 2026 09:11:15 +0200 Subject: [PATCH 1/5] refac --- README.md | 61 ++++++++++++++++++++++++++++++++-- cptr/app.py | 12 +++++++ cptr/env.py | 10 ++++++ cptr/socket/main.py | 3 +- cptr/utils/proxy_middleware.py | 30 +++++++++++++++++ 5 files changed, 113 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 98ade4c..d60a270 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,67 @@ Push a hotfix from the train. Check on a deploy from bed. Ship a side project fr Close the tab. Come back tomorrow on any device. Everything is where you left it. Sessions survive disconnects. Your work doesn't care which screen you're on. -AI is there if you want it. Bring your own key. Works fine without it. - Life is short. Touch grass. +## What you get + +| | | +|---|---| +| ๐Ÿ“ **File browser** | Navigate, create, rename, upload, drag and drop. Icons by type, sizes at a glance. | +| โŒจ๏ธ **Terminal** | Full PTY-backed shell in the browser. Anything you'd run at your desk. | +| ๐Ÿ”€ **Git** | Stage, commit, diff, branch, push. Visual changes view. No command line required. | +| โœ๏ธ **Editor** | Syntax-highlighted editing with tabs. Open multiple files side by side. | +| ๐Ÿ“‚ **Workspaces** | Multiple projects, one instance. Switch without losing your place. | +| ๐Ÿ” **Search** | Find files by name, search across file contents and chat history. โŒ˜K to find anything. | +| ๐Ÿ“ฑ **Mobile-first** | Not a desktop UI made smaller. Built for the screen in your pocket. | +| ๐Ÿ”„ **Sessions persist** | Terminal keeps running when you close the tab. Come back on any device. | + +## AI agent + +Bring your own API key. Works with OpenAI, Anthropic, Ollama, or any OpenAI-compatible endpoint. + +| | | +|---|---| +| ๐Ÿ’ฌ **Chat** | Built-in AI with streaming responses and tool calling. Not just conversation: it can act. | +| ๐Ÿ”ง **File tools** | AI reads, writes, edits, and searches your codebase directly. | +| โ–ถ๏ธ **Run commands** | AI executes shell commands and reads the output. Foreground or background. | +| ๐ŸŒ **Web browsing** | Navigate pages, click elements, fill forms, take screenshots. | +| ๐Ÿ” **Web search** | Brave, DuckDuckGo, Exa, Tavily, Perplexity, or any chat completions endpoint. | +| ๐Ÿ–ผ๏ธ **Image understanding** | AI reads and describes images and screenshots from your workspace. | +| ๐Ÿ“‹ **Plan mode** | Request an implementation plan before the AI writes a single line. | +| โœ๏ธ **Output editing** | Review and edit AI-generated changes before applying. | +| ๐Ÿ“Ž **File mentions** | Type `@` to give the AI context about specific files. | +| ๐Ÿงฉ **Skills** | Reusable instruction sets (SKILL.md files). Type `$` to mention one. | +| โฑ๏ธ **Automations** | Schedule recurring AI tasks. "Run tests every morning." "Deploy every Friday." | +| ๐Ÿค– **Sub-agents** | AI spins up parallel workers for complex tasks. Each gets full tool access. | +| ๐Ÿ”Œ **Tool servers** | Connect external tools via MCP or OpenAPI. | +| ๐Ÿง  **Context compaction** | Long conversations are automatically summarised to stay fast. | + +## Messaging bots + +Connect the AI to your chat apps. Full tool access, streaming responses, conversations synced back to the web UI. + +**Telegram** ยท **Discord** ยท **Slack** ยท **WhatsApp** ยท **Signal** + +Message your computer from wherever you are. Ask it to check a build, push a fix, or explain a file. Switch workspaces with `/workspace`, start fresh with `/new`. + +## Gateway API + +cptr exposes an OpenAI-compatible API (`/v1/chat/completions`). Any client that speaks OpenAI, including [Open WebUI](https://github.com/open-webui/open-webui), can use each cptr workspace as a model with full agent capabilities: file access, terminal, web search, tools. + +## More + +| | | +|---|---| +| ๐ŸŽ™๏ธ **Voice memos** | Record audio, auto-transcribe to markdown. | +| ๐Ÿ’ฌ **Message queue** | Queue follow-up messages while the AI is responding. | +| ๐Ÿ”” **Notifications** | Browser notifications and webhooks (Slack, Discord, Teams) when tasks finish. | +| ๐Ÿ“Š **Usage** | Token counts and timing on every response. | +| ๐Ÿ“„ **System prompts** | Per-model, per-workspace, or global. Template variables included. | +| โŒจ๏ธ **Keyboard shortcuts** | Customisable keybindings with a settings panel. | +| ๐ŸŒ **10 languages** | EN, DE, ES, FR, JA, KO, PT-BR, RU, ZH-CN, ZH-TW. | +| ๐Ÿ” **Auth** | Username/password with JWT sessions. Signup toggle for admins. | + ## Design principles **Mobile is first-class.** The interface is built for the phone. Touch-native, portrait-native, designed for the screen people carry. Sessions survive disconnects because on a phone, they will. If a feature only works at a desk, it's not done. diff --git a/cptr/app.py b/cptr/app.py index 7ee442f..4c09e0c 100644 --- a/cptr/app.py +++ b/cptr/app.py @@ -128,6 +128,18 @@ async def auth_middleware(request: Request, call_next): app.add_middleware(ProxyFallbackMiddleware) +# CORS middleware: uses CPTR_CORS_ALLOWED_ORIGINS env var (default "*"). +from fastapi.middleware.cors import CORSMiddleware +from cptr.env import CORS_ALLOWED_ORIGINS + +app.add_middleware( + CORSMiddleware, + allow_origins=CORS_ALLOWED_ORIGINS if isinstance(CORS_ALLOWED_ORIGINS, list) else ["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + # Path normalization middleware (Windows: \ โ†’ / in JSON responses) import platform diff --git a/cptr/env.py b/cptr/env.py index 6d50a38..dc178f3 100644 --- a/cptr/env.py +++ b/cptr/env.py @@ -37,3 +37,13 @@ # โ”€โ”€ Automation scheduler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ AUTOMATION_POLL_INTERVAL = int(os.environ.get("AUTOMATION_POLL_INTERVAL", "10")) +# โ”€โ”€ CORS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Socket.IO CORS allowed origins. +# Default โ†’ "*" (allow all origins) +# Comma-separated list โ†’ allow specific origins only +# e.g. "https://example.com,https://app.example.com" +_cors_raw = os.environ.get("CPTR_CORS_ALLOWED_ORIGINS", "*") +if _cors_raw.strip() == "*": + CORS_ALLOWED_ORIGINS = "*" +else: + CORS_ALLOWED_ORIGINS = [o.strip() for o in _cors_raw.split(",") if o.strip()] or "*" diff --git a/cptr/socket/main.py b/cptr/socket/main.py index 0f20e92..b46c229 100644 --- a/cptr/socket/main.py +++ b/cptr/socket/main.py @@ -10,8 +10,9 @@ import socketio from cptr.utils.config import check_access +from cptr.env import CORS_ALLOWED_ORIGINS -sio = socketio.AsyncServer(async_mode="asgi", cors_allowed_origins="*") +sio = socketio.AsyncServer(async_mode="asgi", cors_allowed_origins=CORS_ALLOWED_ORIGINS) # user_id โ†’ set of connected sids _user_sids: dict[str, set[str]] = {} diff --git a/cptr/utils/proxy_middleware.py b/cptr/utils/proxy_middleware.py index 616093b..8cc2a7d 100644 --- a/cptr/utils/proxy_middleware.py +++ b/cptr/utils/proxy_middleware.py @@ -70,11 +70,41 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: await self.app(scope, receive, send) return + # Auth check: this middleware runs before the auth middleware (outermost), + # so we must verify the session ourselves to prevent unauthenticated SSRF. + if not self._is_authenticated(scope): + # Not authenticated โ€” fall through to the normal app (which will + # either serve a page or return 401 via the auth middleware). + await self.app(scope, receive, send) + return + if scope["type"] == "http": await self._proxy_http(scope, receive, send, port, path) elif scope["type"] == "websocket": await self._proxy_websocket(scope, receive, send, port, path) + @staticmethod + def _is_authenticated(scope: Scope) -> bool: + """Check if the request carries a valid cptr session cookie.""" + from cptr.utils.config import check_access + + headers = dict(scope.get("headers", [])) + cookie_header = (headers.get(b"cookie") or b"").decode("latin-1", errors="replace") + + token = None + for part in cookie_header.split(";"): + part = part.strip() + if part.startswith("cptr_session="): + token = part.split("=", 1)[1] + break + + if not token: + return False + + client = scope.get("client") + client_host = client[0] if client else "127.0.0.1" + return check_access(client_host=client_host, jwt_token=token) is not None + def _resolve_port(self, scope: Scope) -> int | None: """Determine which proxy port this request belongs to.""" from cptr.routers.proxy import resolve_cached_port From 5761f804fcbac23804df569decf8b24c08323ec4 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 14 Jun 2026 09:19:24 +0200 Subject: [PATCH 2/5] refac --- README.md | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d60a270..0fe7efc 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,26 @@ Close the tab. Come back tomorrow on any device. Everything is where you left it Life is short. Touch grass. +## Install + +```bash +pip install cptr +``` + +Or with [uv](https://docs.astral.sh/uv/): `uvx cptr@latest run` + +## Run + +```bash +cptr run +``` + +Opens in your browser. From other devices: + +```bash +cptr run --host 0.0.0.0 +``` + ## What you get | | | @@ -83,25 +103,7 @@ cptr exposes an OpenAI-compatible API (`/v1/chat/completions`). Any client that Read our [Manifesto](MANIFESTO.md). -## Install - -```bash -pip install cptr -``` - -Or with [uv](https://docs.astral.sh/uv/): `uvx cptr@latest run` - -## Run -```bash -cptr run -``` - -Opens in your browser. From other devices: - -```bash -cptr run --host 0.0.0.0 -``` ## Docker From b0aae29e18cdb17da97a87afe93a36270be7cda7 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 14 Jun 2026 09:20:06 +0200 Subject: [PATCH 3/5] refac --- README.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/README.md b/README.md index 0fe7efc..4dfd91a 100644 --- a/README.md +++ b/README.md @@ -18,16 +18,11 @@ Life is short. Touch grass. ```bash pip install cptr +cptr run ``` Or with [uv](https://docs.astral.sh/uv/): `uvx cptr@latest run` -## Run - -```bash -cptr run -``` - Opens in your browser. From other devices: ```bash From 0b3eaea5969a5a5d85fece1d75f1af17defbdc35 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 14 Jun 2026 13:13:02 +0200 Subject: [PATCH 4/5] refac: telegram rich messages (InputRichMessage) + streaming fix --- cptr/utils/adapters/telegram.py | 103 ++++++++++++++++++++------------ cptr/utils/bridge.py | 2 +- 2 files changed, 67 insertions(+), 38 deletions(-) diff --git a/cptr/utils/adapters/telegram.py b/cptr/utils/adapters/telegram.py index 6b6246c..5443e02 100644 --- a/cptr/utils/adapters/telegram.py +++ b/cptr/utils/adapters/telegram.py @@ -1,10 +1,15 @@ """Telegram adapter โ€” zero SDK dependencies. Uses raw httpx calls to the Telegram Bot API: -- ``sendMessageDraft`` for native streaming (Bot API 9.5+) -- ``editMessageText`` as fallback for older clients +- ``sendRichMessageDraft`` for native rich streaming (Bot API 10.1+) +- ``sendRichMessage`` to persist the final response +- ``editMessageText`` with rich_message for edit-based fallback - ``getUpdates`` long-polling for inbound messages - ``sendChatAction(typing)`` for typing indicators + +Rich messages use InputRichMessage which is just {markdown: str} โ€” Telegram +parses the markdown server-side into headings, tables, code blocks, math, etc. +Since AI responses are already markdown, we pass them straight through. """ from __future__ import annotations @@ -21,13 +26,16 @@ API_BASE = "https://api.telegram.org/bot{token}" POLL_TIMEOUT = 30 # seconds (Telegram long-poll) -MAX_MESSAGE_LEN = 4096 +MAX_MESSAGE_LEN = 32_768 # Rich message limit +PLAIN_MESSAGE_LEN = 4096 # Plain message limit RECONNECT_BASE_DELAY = 2.0 RECONNECT_MAX_DELAY = 60.0 + + class TelegramAdapter(BaseAdapter): - """Telegram bot via raw HTTP โ€” getUpdates long-polling + native streaming.""" + """Telegram bot via raw HTTP โ€” getUpdates long-polling + rich streaming.""" platform = "telegram" @@ -40,7 +48,7 @@ def __init__(self, token: str) -> None: self._poll_task: Optional[asyncio.Task] = None self._running = False self._bot_info: dict = {} - self._supports_draft: bool = True # Assume 9.5+, fallback on error + self._supports_draft: bool = True # โ”€โ”€ Lifecycle โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ @@ -79,8 +87,21 @@ async def disconnect(self) -> None: # โ”€โ”€ Sending โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ async def send(self, chat_id: str, text: str) -> str | None: - """Send a message. Returns the message ID for later editing.""" - chunks = chunk_message(text, MAX_MESSAGE_LEN) + """Send a message. Tries rich, falls back to plain.""" + if text and text.strip(): + try: + result = await self._api( + "sendRichMessage", + chat_id=chat_id, + rich_message={"markdown": text.strip()[:MAX_MESSAGE_LEN]}, + ) + if result: + return str(result.get("message_id", "")) + except Exception: + logger.debug("sendRichMessage failed, falling back to sendMessage", exc_info=True) + + # Fallback: plain text + chunks = chunk_message(text, PLAIN_MESSAGE_LEN) msg_id = None for chunk in chunks: result = await self._api( @@ -95,64 +116,79 @@ async def send(self, chat_id: str, text: str) -> str | None: return msg_id async def edit(self, chat_id: str, message_id: str, text: str) -> None: - """Edit a previously sent message.""" + """Edit a message. Tries rich, falls back to plain.""" + if text and text.strip(): + try: + await self._api( + "editMessageText", + chat_id=chat_id, + message_id=int(message_id), + rich_message={"markdown": text.strip()[:MAX_MESSAGE_LEN]}, + ) + return + except TelegramAPIError as e: + if "message is not modified" in e.description.lower(): + return + logger.debug("Rich edit failed, falling back to plain", exc_info=True) + except Exception: + logger.debug("Rich edit failed, falling back to plain", exc_info=True) + + # Fallback: plain text edit try: await self._api( "editMessageText", chat_id=chat_id, message_id=int(message_id), - text=text[:MAX_MESSAGE_LEN], + text=text[:PLAIN_MESSAGE_LEN], parse_mode="Markdown", disable_web_page_preview=True, ) except TelegramAPIError as e: if "message is not modified" in e.description.lower(): - pass # Content unchanged, ignore + pass elif "parse" in e.description.lower(): - # Markdown parse error โ€” retry without parse_mode await self._api( "editMessageText", chat_id=chat_id, message_id=int(message_id), - text=text[:MAX_MESSAGE_LEN], + text=text[:PLAIN_MESSAGE_LEN], disable_web_page_preview=True, ) else: raise async def send_draft(self, chat_id: str, text: str, draft_id: str | None = None) -> str: - """Send a streaming draft using sendMessageDraft (Bot API 9.5+). - - Returns the draft_id for subsequent updates. - Falls back to edit-based streaming if the API doesn't support drafts. - """ + """Stream a draft. Uses rich drafts, falls back to plain drafts.""" if not self._supports_draft: raise TelegramAPIError(400, "Draft not supported") + if not text or not text.strip(): + text = "_Thinking..._" + + # Try rich draft try: params: dict = { "chat_id": chat_id, - "text": text[:MAX_MESSAGE_LEN], - "parse_mode": "Markdown", + "rich_message": {"markdown": text.strip()[:MAX_MESSAGE_LEN]}, } if draft_id: params["draft_id"] = draft_id - - result = await self._api("sendMessageDraft", **params) + result = await self._api("sendRichMessageDraft", **params) return str(result.get("draft_id", result.get("message_id", ""))) - except TelegramAPIError as e: if e.code == 404 or "unknown method" in e.description.lower(): - # API doesn't support sendMessageDraft โ€” fall back self._supports_draft = False - logger.info("Telegram sendMessageDraft not available, falling back to editMessageText") raise - # Markdown parse failure โ€” retry without parse_mode - if "parse" in e.description.lower(): - params.pop("parse_mode", None) - result = await self._api("sendMessageDraft", **params) - return str(result.get("draft_id", result.get("message_id", ""))) - raise + logger.debug("sendRichMessageDraft failed, trying plain", exc_info=True) + except Exception: + logger.debug("sendRichMessageDraft failed, trying plain", exc_info=True) + + # Fallback: plain draft (no parse_mode โ€” guaranteed to work) + params = {"chat_id": chat_id, "text": text[:PLAIN_MESSAGE_LEN]} + if draft_id: + params["draft_id"] = draft_id + result = await self._api("sendMessageDraft", **params) + return str(result.get("draft_id", result.get("message_id", ""))) async def send_typing(self, chat_id: str) -> None: try: @@ -237,13 +273,6 @@ async def _api(self, method: str, **params) -> any: if not body.get("ok"): error_code = body.get("error_code", resp.status_code) description = body.get("description", "Unknown error") - # Markdown parse failure โ€” retry without parse_mode - if error_code == 400 and "parse" in description.lower() and "text" in data: - data.pop("parse_mode", None) - resp = await self._client.post(url, json=data) - body = resp.json() - if body.get("ok"): - return body.get("result") raise TelegramAPIError(error_code, description) return body.get("result") diff --git a/cptr/utils/bridge.py b/cptr/utils/bridge.py index 32b1772..cef364a 100644 --- a/cptr/utils/bridge.py +++ b/cptr/utils/bridge.py @@ -572,7 +572,7 @@ async def _stream_loop( if not adapter: return - max_len = 4096 if bot["platform"] == "telegram" else 2000 + max_len = 32_768 if bot["platform"] == "telegram" else 2000 last_sent = "" is_telegram = bot["platform"] == "telegram" From 5fcd0a6411bd205c86f1caaf2ce7f10276600126 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 14 Jun 2026 14:14:08 +0200 Subject: [PATCH 5/5] refac --- CHANGELOG.md | 9 ++ cptr/utils/adapters/discord.py | 68 +++++++++- cptr/utils/adapters/signal.py | 85 +++++++++++- cptr/utils/adapters/slack.py | 83 ++++++++++- cptr/utils/adapters/telegram.py | 116 +++++++++++++++- cptr/utils/adapters/whatsapp.py | 151 +++++++++++++++++++-- cptr/utils/bridge.py | 234 +++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 8 files changed, 725 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f72734..7a10d64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.3] - 2026-06-14 + +### Changed + +- ๐Ÿ’ฌ **Telegram rich message formatting.** Telegram messages now use `sendRichMessage` / `sendRichMessageDraft` (Bot API `InputRichMessage`) for properly rendered Markdown with bold, italic, code blocks, and links. Falls back gracefully to plain text when the API is unavailable. +- ๐Ÿ“ **Increased Telegram streaming buffer.** The streaming buffer limit for Telegram was raised from 4 096 to 32 768 characters, allowing longer messages to stream without premature chunking. +- ๐Ÿ”’ **Proxy middleware authentication.** The reverse-proxy middleware now verifies the `cptr_session` cookie before forwarding requests, preventing unauthenticated access to proxied local services. +- ๐Ÿ“– **README refresh.** Updated the README with revised feature descriptions and setup instructions. + ## [0.4.2] - 2026-06-14 ### Fixed diff --git a/cptr/utils/adapters/discord.py b/cptr/utils/adapters/discord.py index 056755b..92392b5 100644 --- a/cptr/utils/adapters/discord.py +++ b/cptr/utils/adapters/discord.py @@ -17,7 +17,7 @@ import httpx -from cptr.utils.bridge import BaseAdapter, MessageEvent, chunk_message +from cptr.utils.bridge import Attachment, BaseAdapter, MessageEvent, chunk_message logger = logging.getLogger(__name__) @@ -232,7 +232,29 @@ async def _handle_message_create(self, data: dict) -> None: return content = data.get("content", "").strip() - if not content: + + # Process Discord attachments + attachments: list[Attachment] = [] + for att in data.get("attachments", []): + url = att.get("url") + if not url: + continue + file_data = await self._download_url(url) + if not file_data: + continue + fname = att.get("filename", "file") + ctype = att.get("content_type", "application/octet-stream") + if ctype.startswith("image/"): + att_type = "image" + elif ctype.startswith("audio/"): + att_type = "audio" + else: + att_type = "document" + attachments.append(Attachment( + type=att_type, filename=fname, data=file_data, mime_type=ctype, + )) + + if not content and not attachments: return event = MessageEvent( @@ -241,11 +263,53 @@ async def _handle_message_create(self, data: dict) -> None: sender_id=author.get("id", ""), sender_name=author.get("global_name") or author.get("username", "User"), text=content, + attachments=attachments, ) if self.on_message: await self.on_message(event) + async def _download_url(self, url: str) -> bytes | None: + """Download a file from a URL (Discord CDN).""" + if not self._http: + return None + try: + resp = await self._http.get(url) + if resp.status_code == 200: + return resp.content + logger.warning("[discord] File download failed: HTTP %d", resp.status_code) + except Exception: + logger.exception("[discord] Failed to download %s", url) + return None + + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo as a file attachment.""" + return await self._send_file(chat_id, data, filename, caption) + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a document as a file attachment.""" + return await self._send_file(chat_id, data, filename, caption) + + async def _send_file(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a file via Discord multipart upload.""" + if not self._http: + return None + try: + files = {"files[0]": (filename, data, "application/octet-stream")} + form_data = {} + if caption: + form_data["content"] = caption[:MAX_MESSAGE_LEN] + resp = await self._http.post( + f"{API_BASE}/channels/{chat_id}/messages", + files=files, + data=form_data, + ) + if resp.status_code == 200: + return resp.json().get("id") + except Exception: + logger.exception("[discord] Failed to send file") + return None + async def verify_token(token: str) -> dict: async with httpx.AsyncClient(timeout=10) as client: diff --git a/cptr/utils/adapters/signal.py b/cptr/utils/adapters/signal.py index 50385c8..3302372 100644 --- a/cptr/utils/adapters/signal.py +++ b/cptr/utils/adapters/signal.py @@ -19,7 +19,7 @@ import httpx -from cptr.utils.bridge import BaseAdapter, MessageEvent, chunk_message +from cptr.utils.bridge import Attachment, BaseAdapter, MessageEvent, chunk_message logger = logging.getLogger(__name__) @@ -29,6 +29,16 @@ RECONNECT_MAX_DELAY = 60.0 +def _ext_for_mime(mime: str) -> str: + """Map common MIME types to file extensions.""" + mapping = { + "image/jpeg": ".jpg", "image/png": ".png", "image/gif": ".gif", + "image/webp": ".webp", "audio/ogg": ".ogg", "audio/mpeg": ".mp3", + "audio/aac": ".aac", "video/mp4": ".mp4", "application/pdf": ".pdf", + } + return mapping.get(mime, "") + + def _parse_token(token: str) -> tuple[str, str]: """Parse 'base_url|phone_number' format.""" if "|" in token: @@ -171,7 +181,29 @@ async def _process_message(self, msg: dict) -> None: return text = (data_message.get("message") or "").strip() - if not text: + + # Process attachments from signal-cli + attachments: list[Attachment] = [] + for att in data_message.get("attachments", []): + att_id = att.get("id") + if not att_id: + continue + file_data = await self._download_attachment(att_id) + if not file_data: + continue + content_type = att.get("contentType", "application/octet-stream") + fname = att.get("filename") or f"attachment{_ext_for_mime(content_type)}" + if content_type.startswith("image/"): + att_type = "image" + elif content_type.startswith("audio/"): + att_type = "audio" + else: + att_type = "document" + attachments.append(Attachment( + type=att_type, filename=fname, data=file_data, mime_type=content_type, + )) + + if not text and not attachments: return source = envelope.get("source", "") @@ -187,11 +219,60 @@ async def _process_message(self, msg: dict) -> None: sender_id=source, sender_name=source_name, text=text, + attachments=attachments, ) if self.on_message: await self.on_message(event) + async def _download_attachment(self, attachment_id: str) -> bytes | None: + """Download an attachment from signal-cli REST API.""" + if not self._http: + return None + try: + resp = await self._http.get( + f"{self._base_url}/v1/attachments/{attachment_id}", + ) + if resp.status_code == 200: + return resp.content + logger.warning("[signal] Attachment download failed: HTTP %d", resp.status_code) + except Exception: + logger.exception("[signal] Failed to download attachment %s", attachment_id) + return None + + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo as a base64 attachment.""" + return await self._send_with_attachment(chat_id, data, filename, caption) + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a document as a base64 attachment.""" + return await self._send_with_attachment(chat_id, data, filename, caption) + + async def _send_with_attachment( + self, chat_id: str, data: bytes, filename: str, caption: str = "", + ) -> str | None: + """Send a message with a base64-encoded attachment via signal-cli.""" + if not self._http: + return None + import base64 + try: + resp = await self._http.post( + f"{self._base_url}/v2/send", + json={ + "message": caption or "", + "number": self._phone, + "recipients": [chat_id], + "base64_attachments": [ + base64.b64encode(data).decode("ascii") + ], + }, + ) + result = resp.json() + return str(result.get("timestamp", "")) + except Exception: + logger.exception("[signal] Failed to send attachment") + return None + async def verify_token(token: str) -> dict: """Verify signal-cli REST API connection and phone number.""" diff --git a/cptr/utils/adapters/slack.py b/cptr/utils/adapters/slack.py index 166b1fd..21dcb40 100644 --- a/cptr/utils/adapters/slack.py +++ b/cptr/utils/adapters/slack.py @@ -16,7 +16,7 @@ import httpx -from cptr.utils.bridge import BaseAdapter, MessageEvent, chunk_message +from cptr.utils.bridge import Attachment, BaseAdapter, MessageEvent, chunk_message logger = logging.getLogger(__name__) @@ -192,7 +192,29 @@ async def _handle_event(self, event: dict) -> None: return text = (event.get("text") or "").strip() - if not text: + + # Process file attachments + attachments: list[Attachment] = [] + for f in event.get("files", []): + url = f.get("url_private") + if not url: + continue + file_data = await self._download_file(url) + if not file_data: + continue + fname = f.get("name") or f.get("title") or "file" + mime = f.get("mimetype", "application/octet-stream") + if mime.startswith("image/"): + att_type = "image" + elif mime.startswith("audio/"): + att_type = "audio" + else: + att_type = "document" + attachments.append(Attachment( + type=att_type, filename=fname, data=file_data, mime_type=mime, + )) + + if not text and not attachments: return # Resolve display name @@ -205,6 +227,7 @@ async def _handle_event(self, event: dict) -> None: sender_id=user_id, sender_name=sender_name, text=text, + attachments=attachments, ) if self.on_message: @@ -223,6 +246,62 @@ async def _resolve_name(self, user_id: str) -> str: pass return "User" + async def _download_file(self, url: str) -> bytes | None: + """Download a file from Slack (requires bot token auth).""" + if not self._http: + return None + try: + resp = await self._http.get(url) + if resp.status_code == 200: + return resp.content + logger.warning("[slack] File download failed: HTTP %d", resp.status_code) + except Exception: + logger.exception("[slack] Failed to download %s", url) + return None + + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo via Slack file upload.""" + return await self._upload_file(chat_id, data, filename, caption) + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a document via Slack file upload.""" + return await self._upload_file(chat_id, data, filename, caption) + + async def _upload_file(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Upload a file to Slack via files.uploadV2.""" + if not self._http: + return None + try: + # Step 1: Get upload URL + resp = await self._http.get( + f"{API_BASE}/files.getUploadURLExternal", + params={"filename": filename, "length": len(data)}, + ) + url_data = resp.json() + if not url_data.get("ok"): + logger.warning("[slack] files.getUploadURLExternal failed: %s", url_data.get("error")) + return None + + # Step 2: Upload to the provided URL + upload_url = url_data["upload_url"] + file_id = url_data["file_id"] + await self._http.post(upload_url, content=data) + + # Step 3: Complete the upload + await self._http.post( + f"{API_BASE}/files.completeUploadExternal", + json={ + "files": [{"id": file_id, "title": filename}], + "channel_id": chat_id, + "initial_comment": caption or "", + }, + ) + return file_id + except Exception: + logger.exception("[slack] Failed to upload file") + return None + + async def verify_token(token: str) -> dict: """Verify a Slack bot token by calling auth.test.""" diff --git a/cptr/utils/adapters/telegram.py b/cptr/utils/adapters/telegram.py index 5443e02..03ab97b 100644 --- a/cptr/utils/adapters/telegram.py +++ b/cptr/utils/adapters/telegram.py @@ -20,7 +20,7 @@ import httpx -from cptr.utils.bridge import BaseAdapter, MessageEvent, chunk_message +from cptr.utils.bridge import Attachment, BaseAdapter, MessageEvent, chunk_message logger = logging.getLogger(__name__) @@ -238,7 +238,65 @@ async def _process_update(self, update: dict) -> None: return text = message.get("text") or message.get("caption") or "" - if not text.strip(): + + # Collect attachments from media types + attachments: list = [] + + # Photos โ€” Telegram sends multiple sizes, pick the largest + if message.get("photo"): + photo = message["photo"][-1] # largest size + file_data = await self._download_file(photo["file_id"]) + if file_data: + attachments.append(Attachment( + type="image", + filename="photo.jpg", + data=file_data, + mime_type="image/jpeg", + )) + + # Documents (PDF, spreadsheets, etc.) + if message.get("document"): + doc = message["document"] + file_data = await self._download_file(doc["file_id"]) + if file_data: + fname = doc.get("file_name", "document") + mime = doc.get("mime_type", "application/octet-stream") + att_type = "image" if mime.startswith("image/") else "document" + attachments.append(Attachment( + type=att_type, + filename=fname, + data=file_data, + mime_type=mime, + )) + + # Voice messages (OGG/Opus) + if message.get("voice"): + voice = message["voice"] + file_data = await self._download_file(voice["file_id"]) + if file_data: + attachments.append(Attachment( + type="audio", + filename="voice.ogg", + data=file_data, + mime_type=voice.get("mime_type", "audio/ogg"), + )) + + # Audio files (music, audio messages sent as files) + if message.get("audio"): + audio = message["audio"] + file_data = await self._download_file(audio["file_id"]) + if file_data: + from cptr.utils.bridge import Attachment + fname = audio.get("file_name", "audio.mp3") + attachments.append(Attachment( + type="audio", + filename=fname, + data=file_data, + mime_type=audio.get("mime_type", "audio/mpeg"), + )) + + # Skip if no text AND no attachments + if not text.strip() and not attachments: return chat = message.get("chat", {}) @@ -253,11 +311,65 @@ async def _process_update(self, update: dict) -> None: + (" " + sender.get("last_name", "") if sender.get("last_name") else "") ).strip() or "User", text=text, + attachments=attachments, ) if self.on_message: await self.on_message(event) + async def _download_file(self, file_id: str) -> bytes | None: + """Download a file from Telegram servers via getFile API.""" + try: + file_info = await self._api("getFile", file_id=file_id) + file_path = file_info.get("file_path") + if not file_path: + return None + url = f"https://api.telegram.org/file/bot{self._token}/{file_path}" + resp = await self._client.get(url) + if resp.status_code == 200: + return resp.content + logger.warning("[telegram] File download failed: HTTP %d", resp.status_code) + return None + except Exception: + logger.exception("[telegram] Failed to download file %s", file_id) + return None + + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo via multipart upload.""" + if not self._client: + return None + url = f"{self._base}/sendPhoto" + files = {"photo": (filename, data, "image/jpeg")} + form_data = {"chat_id": chat_id} + if caption: + form_data["caption"] = caption[:1024] + try: + resp = await self._client.post(url, files=files, data=form_data) + body = resp.json() + if body.get("ok"): + return str(body.get("result", {}).get("message_id", "")) + except Exception: + logger.exception("[telegram] Failed to send photo") + return None + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a document via multipart upload.""" + if not self._client: + return None + url = f"{self._base}/sendDocument" + files = {"document": (filename, data, "application/octet-stream")} + form_data = {"chat_id": chat_id} + if caption: + form_data["caption"] = caption[:1024] + try: + resp = await self._client.post(url, files=files, data=form_data) + body = resp.json() + if body.get("ok"): + return str(body.get("result", {}).get("message_id", "")) + except Exception: + logger.exception("[telegram] Failed to send document") + return None + # โ”€โ”€ API helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ async def _api(self, method: str, **params) -> any: diff --git a/cptr/utils/adapters/whatsapp.py b/cptr/utils/adapters/whatsapp.py index fa9ad48..414c74d 100644 --- a/cptr/utils/adapters/whatsapp.py +++ b/cptr/utils/adapters/whatsapp.py @@ -20,7 +20,7 @@ import httpx -from cptr.utils.bridge import BaseAdapter, MessageEvent, chunk_message +from cptr.utils.bridge import Attachment, BaseAdapter, MessageEvent, chunk_message logger = logging.getLogger(__name__) @@ -96,14 +96,65 @@ async def handle_webhook(self, payload: dict) -> None: for change in entry.get("changes", []): value = change.get("value", {}) for message in value.get("messages", []): - if message.get("type") != "text": + msg_type = message.get("type", "") + + text = "" + attachments: list[Attachment] = [] + + if msg_type == "text": + text = message.get("text", {}).get("body", "").strip() + elif msg_type == "image": + image = message.get("image", {}) + text = image.get("caption", "").strip() + media_data = await self._download_media(image.get("id", "")) + if media_data: + attachments.append(Attachment( + type="image", + filename="photo.jpg", + data=media_data, + mime_type=image.get("mime_type", "image/jpeg"), + )) + elif msg_type == "audio": + audio = message.get("audio", {}) + media_data = await self._download_media(audio.get("id", "")) + if media_data: + attachments.append(Attachment( + type="audio", + filename="voice.ogg", + data=media_data, + mime_type=audio.get("mime_type", "audio/ogg"), + )) + elif msg_type == "document": + doc = message.get("document", {}) + text = doc.get("caption", "").strip() + media_data = await self._download_media(doc.get("id", "")) + if media_data: + fname = doc.get("filename", "document") + attachments.append(Attachment( + type="document", + filename=fname, + data=media_data, + mime_type=doc.get("mime_type", "application/octet-stream"), + )) + elif msg_type == "video": + video = message.get("video", {}) + text = video.get("caption", "").strip() + media_data = await self._download_media(video.get("id", "")) + if media_data: + attachments.append(Attachment( + type="document", + filename="video.mp4", + data=media_data, + mime_type=video.get("mime_type", "video/mp4"), + )) + else: continue - sender = message.get("from", "") - text = message.get("text", {}).get("body", "").strip() - if not text: + if not text and not attachments: continue + sender = message.get("from", "") + # Resolve sender name from contacts contacts = value.get("contacts", []) sender_name = "User" @@ -119,6 +170,7 @@ async def handle_webhook(self, payload: dict) -> None: sender_id=sender, sender_name=sender_name, text=text, + attachments=attachments, ) await self._message_queue.put(event) @@ -162,8 +214,6 @@ async def send(self, chat_id: str, text: str) -> str | None: async def edit(self, chat_id: str, message_id: str, text: str) -> None: """WhatsApp doesn't support message editing. Send a new message instead.""" - # WhatsApp has no edit API โ€” for streaming we just send new messages - # The stream loop will handle this gracefully pass async def send_typing(self, chat_id: str) -> None: @@ -171,18 +221,101 @@ async def send_typing(self, chat_id: str) -> None: if not self._http: return try: - # Mark as read (closest to typing indicator) await self._http.post( f"{API_BASE}/{self._phone_number_id}/messages", json={ "messaging_product": "whatsapp", "status": "read", - "message_id": "placeholder", # Not ideal but API requires it + "message_id": "placeholder", }, ) except Exception: pass + # โ”€โ”€ Media โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + async def _download_media(self, media_id: str) -> bytes | None: + """Download media from WhatsApp Cloud API. + + Two-step: GET /{media_id} for URL, then GET the URL for the bytes. + """ + if not self._http or not media_id: + return None + try: + # Step 1: Get the media URL + resp = await self._http.get(f"{API_BASE}/{media_id}") + data = resp.json() + url = data.get("url") + if not url: + return None + # Step 2: Download the actual file (requires auth header) + resp2 = await self._http.get(url) + if resp2.status_code == 200: + return resp2.content + logger.warning("[whatsapp] Media download failed: HTTP %d", resp2.status_code) + except Exception: + logger.exception("[whatsapp] Failed to download media %s", media_id) + return None + + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo via WhatsApp media upload.""" + media_id = await self._upload_media(data, filename, "image/jpeg") + if not media_id: + return await self.send(chat_id, caption) if caption else None + return await self._send_media_message(chat_id, "image", media_id, caption) + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a document via WhatsApp media upload.""" + media_id = await self._upload_media(data, filename, "application/octet-stream") + if not media_id: + return await self.send(chat_id, caption) if caption else None + return await self._send_media_message(chat_id, "document", media_id, caption, filename) + + async def _upload_media(self, data: bytes, filename: str, mime_type: str) -> str | None: + """Upload media to WhatsApp Cloud API, return media_id.""" + if not self._http: + return None + try: + resp = await self._http.post( + f"{API_BASE}/{self._phone_number_id}/media", + files={"file": (filename, data, mime_type)}, + data={"messaging_product": "whatsapp", "type": mime_type}, + ) + result = resp.json() + return result.get("id") + except Exception: + logger.exception("[whatsapp] Failed to upload media") + return None + + async def _send_media_message( + self, chat_id: str, media_type: str, media_id: str, caption: str = "", filename: str = "", + ) -> str | None: + """Send a media message using an uploaded media_id.""" + if not self._http: + return None + media_obj: dict = {"id": media_id} + if caption: + media_obj["caption"] = caption[:1024] + if filename and media_type == "document": + media_obj["filename"] = filename + try: + resp = await self._http.post( + f"{API_BASE}/{self._phone_number_id}/messages", + json={ + "messaging_product": "whatsapp", + "to": chat_id, + "type": media_type, + media_type: media_obj, + }, + ) + data = resp.json() + messages = data.get("messages", []) + return messages[0].get("id") if messages else None + except Exception: + logger.exception("[whatsapp] Failed to send %s message", media_type) + return None + + async def verify_token(token: str) -> dict: """Verify WhatsApp Cloud API credentials.""" diff --git a/cptr/utils/bridge.py b/cptr/utils/bridge.py index cef364a..287535d 100644 --- a/cptr/utils/bridge.py +++ b/cptr/utils/bridge.py @@ -16,7 +16,7 @@ import time import uuid from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Awaitable, Callable, Optional logger = logging.getLogger(__name__) @@ -31,6 +31,16 @@ # โ”€โ”€ Adapter interface โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +@dataclass +class Attachment: + """A file attached to a message.""" + + type: str # "image", "audio", "document" + filename: str # Original filename or generated one + data: bytes # Raw file content + mime_type: str # e.g. "image/jpeg", "audio/ogg" + + @dataclass class MessageEvent: """Normalized inbound message from any platform.""" @@ -40,6 +50,7 @@ class MessageEvent: sender_id: str # Platform's user ID sender_name: str # Display name text: str # Message content + attachments: list[Attachment] = field(default_factory=list) class BaseAdapter(ABC): @@ -80,6 +91,18 @@ async def send_typing(self, chat_id: str) -> None: """Show a typing indicator in the chat.""" ... + async def send_photo(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a photo. Default: falls back to send() with caption only.""" + if caption: + return await self.send(chat_id, caption) + return None + + async def send_document(self, chat_id: str, data: bytes, filename: str, caption: str = "") -> str | None: + """Send a file. Default: falls back to send() with caption only.""" + if caption: + return await self.send(chat_id, caption) + return None + # โ”€โ”€ Message chunking โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ @@ -353,9 +376,94 @@ async def _handle_message(self, event: MessageEvent, bot: dict) -> None: pass return + if cmd == "/stop": + chat_id = await find_chat_for_thread(bot["id"], event.chat_id) + cancelled = False + if chat_id: + from cptr.utils.chat_task import _tasks, _task_chat, cancel_task + for mid, cid in list(_task_chat.items()): + if cid == chat_id and mid in _tasks and not _tasks[mid].done(): + await cancel_task(mid) + cancelled = True + break + if adapter: + try: + msg = "โน๏ธ Stopped." if cancelled else "Nothing running." + await adapter.send(event.chat_id, msg) + except Exception: + pass + return + + if cmd == "/retry": + from cptr.models import ChatMessage + chat_id = await find_chat_for_thread(bot["id"], event.chat_id) + if not chat_id: + if adapter: + try: + await adapter.send(event.chat_id, "No active conversation. Send a message first.") + except Exception: + pass + return + # Check if a task is already running + from cptr.utils.chat_task import get_active_chat_ids + if chat_id in get_active_chat_ids(): + if adapter: + try: + await adapter.send(event.chat_id, "โณ A task is already running. Use /stop first.") + except Exception: + pass + return + # Find the last user message + msgs = await ChatMessage.get_all_by_chat(chat_id) + last_user = None + for m in reversed(msgs): + if m.role == "user": + last_user = m + break + if not last_user: + if adapter: + try: + await adapter.send(event.chat_id, "Nothing to retry.") + except Exception: + pass + return + # Re-dispatch with the same text as a new branch + event.text = last_user.content + await self._dispatch_task(chat_id, event, bot, adapter) + return + + if cmd == "/model": + parts = clean.split(None, 1) + new_model = parts[1].strip() if len(parts) > 1 else "" + if not new_model: + if adapter: + try: + await adapter.send(event.chat_id, f"Current model: `{bot['model_id']}`") + except Exception: + pass + return + # Update bot config + from cptr.models import Config + bots_raw = await Config.get("bots") or [] + for b in bots_raw: + if b.get("id") == bot["id"]: + b["model_id"] = new_model + break + await Config.upsert({"bots": bots_raw}) + bot["model_id"] = new_model + if adapter: + try: + await adapter.send(event.chat_id, f"โœ… Model switched to: `{new_model}`") + except Exception: + pass + return + if cmd == "/help": help_text = ( "/new โ€” Start a new conversation\n" + "/stop โ€” Stop the running agent\n" + "/retry โ€” Retry the last message\n" + "/model [id] โ€” Show or switch model\n" "/workspace โ€” Switch workspace (starts new chat)\n" "/workspaces โ€” List available workspaces\n" "/help โ€” Show this message" @@ -434,7 +542,7 @@ async def _handle_message(self, event: MessageEvent, bot: dict) -> None: pass return - if not clean: + if not clean and not event.attachments: return # 3. Thread mapping โ€” find or create a cptr chat @@ -474,14 +582,130 @@ async def _create_chat(self, event: MessageEvent, bot: dict) -> str: return chat.id + async def _process_attachments( + self, + attachments: list[Attachment], + adapter: BaseAdapter | None, + platform_chat_id: str, + ) -> tuple[list[dict], str]: + """Process inbound attachments: save to storage, transcribe voice. + + Returns (file_entries_for_meta, text_to_prepend). + file_entries match the web UI format so _load_message_history handles + them automatically (base64 for images, file:// refs for documents). + """ + from cptr.models import Config + from cptr.utils.config import _get_jwt_secret + from cptr.utils.crypto import decrypt_key + from cptr.utils.storage import get_storage + + file_entries: list[dict] = [] + text_parts: list[str] = [] + + for att in attachments: + if att.type == "audio": + # Voice message โ†’ transcribe via Whisper + transcript = await self._transcribe_voice(att, adapter, platform_chat_id) + if transcript: + text_parts.append(transcript) + continue + + # Images and documents โ†’ save to blob storage + file_id = str(uuid.uuid4()) + try: + await get_storage().put(file_id, att.data) + except Exception: + logger.exception("[bridge] Failed to save attachment %s", att.filename) + continue + + entry: dict = { + "id": file_id, + "name": att.filename, + "content_type": att.mime_type, + } + if att.type == "image": + entry["type"] = "image" + else: + entry["type"] = "file" + + file_entries.append(entry) + logger.info("[bridge] Saved attachment %s (%s, %d bytes)", att.filename, att.type, len(att.data)) + + return file_entries, "\n".join(text_parts) + + async def _transcribe_voice( + self, + att: Attachment, + adapter: BaseAdapter | None, + platform_chat_id: str, + ) -> str: + """Transcribe a voice attachment using the configured STT API. + + Returns the transcript text, or empty string on failure/not configured. + """ + from cptr.models import Config + from cptr.utils.config import _get_jwt_secret + from cptr.utils.crypto import decrypt_key + + api_key_encrypted = await Config.get("audio.stt_api_key") + if not api_key_encrypted: + # STT not configured โ€” warn the user + if adapter: + try: + await adapter.send( + platform_chat_id, + "โš ๏ธ Voice messages require speech-to-text to be configured in Settings โ†’ Audio.", + ) + except Exception: + pass + return "" + + api_key = decrypt_key(api_key_encrypted, _get_jwt_secret()) + base_url = (await Config.get("audio.stt_base_url")) or "https://api.openai.com/v1" + model = (await Config.get("audio.stt_model")) or "whisper-1" + + try: + from cptr.routers.audio import _transcribe_chunk + + transcript = await _transcribe_chunk( + data=att.data, + filename=att.filename, + content_type=att.mime_type, + base_url=base_url, + api_key=api_key, + model=model, + ) + if transcript: + logger.info("[bridge] Transcribed voice message: %d chars", len(transcript)) + return transcript + except Exception: + logger.exception("[bridge] Voice transcription failed") + if adapter: + try: + await adapter.send(platform_chat_id, "โš ๏ธ Failed to transcribe voice message.") + except Exception: + pass + return "" + async def _dispatch_task( self, chat_id: str, event: MessageEvent, bot: dict, adapter: BaseAdapter | None, ) -> None: - from cptr.models import Chat, ChatMessage + from cptr.models import Chat, ChatMessage, Config from cptr.routers.chat import _resolve_connection from cptr.utils.chat_task import start_task, _task_chat from cptr.utils.config import now_ms + # โ”€โ”€ Process attachments โ”€โ”€ + user_meta: dict | None = None + if event.attachments: + file_entries, text_prepend = await self._process_attachments( + event.attachments, adapter, event.chat_id, + ) + if file_entries: + user_meta = {"files": file_entries} + if text_prepend: + event.text = f"{text_prepend}\n{event.text}".strip() if event.text else text_prepend + # Get current chat state for parent_id threading chat = await Chat.get_by_id(chat_id) parent_id = chat.current_message_id if chat else None @@ -492,7 +716,7 @@ async def _dispatch_task( await ChatMessage.create( chat_id=chat_id, role="user", content=event.text, parent_id=parent_id, - meta={"queued": True}, created_at=now_ms(), + meta={"queued": True, **(user_meta or {})}, created_at=now_ms(), ) logger.info("[bridge] Queued message for busy chat %s", chat_id[:8]) return @@ -500,7 +724,7 @@ async def _dispatch_task( # Create user message โ€” parent_id chains to previous assistant reply user_msg = await ChatMessage.create( chat_id=chat_id, role="user", content=event.text, - parent_id=parent_id, created_at=now_ms(), + parent_id=parent_id, meta=user_meta, created_at=now_ms(), ) # Resolve model connection diff --git a/pyproject.toml b/pyproject.toml index 425683d..9c623fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cptr" -version = "0.4.2" +version = "0.4.3" description = "Your computer, from anywhere. Code, manage, and control your machine from the web." license = {file = "LICENSE"} readme = "README.md"