diff --git a/README.md b/README.md index 739a471..f48ae51 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Store your thoughts. Save your images and links. Ask anything, anytime. [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![Anthropic API](https://img.shields.io/badge/Anthropic-API-blueviolet.svg)](https://docs.anthropic.com/) [![Telegram Bot](https://img.shields.io/badge/Telegram-Bot-26A5E4.svg)](https://core.telegram.org/bots) -[![WhatsApp](https://img.shields.io/badge/WhatsApp-coming_soon-lightgrey.svg)]() +[![WhatsApp](https://img.shields.io/badge/WhatsApp-Bot-25D366.svg)](https://github.com/krypton-byte/neonize) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md) @@ -46,32 +46,54 @@ memclaw On first run, Memclaw will prompt you for your API keys and save them to `~/.memclaw/.env`. You can update them anytime with `memclaw configure`. -## Telegram Bot +## Messaging Platforms The main way to use Memclaw. Just talk to it naturally — no commands needed. Send text, photos, voice messages, or links. The agent figures out what to do: store it, search your memories, retrieve images, or just chat. -> **WhatsApp support is coming soon.** Telegram is the only supported messaging platform for now. +Both platforms share the same agent, memories, and search index — your data is unified regardless of how you interact. -The bot shows a **typing indicator** while processing so you know it's working on your request. +### Telegram Bot -### Setup +The Telegram bot shows a **typing indicator** while processing so you know it's working on your request. + +#### Setup 1. Create a bot via [@BotFather](https://t.me/BotFather) and copy the token. 2. Get your Telegram user ID (e.g. via [@userinfobot](https://t.me/userinfobot)). 3. Start the bot — on first run you'll be prompted for all keys: ```bash -memclaw bot +memclaw telegram +``` + +### WhatsApp Bot + +Uses your **personal WhatsApp account** via WhatsApp Web pairing — no Meta Business account, no webhooks, no public server. Powered by [`neonize`](https://github.com/krypton-byte/neonize) (whatsmeow under the hood). + +#### Prerequisite: libmagic + +neonize depends on `python-magic`, which needs the `libmagic` system library: + +- macOS: `brew install libmagic` +- Debian/Ubuntu: `sudo apt install libmagic1` +- Fedora/RHEL: `sudo dnf install file-libs` + +#### Setup + +```bash +memclaw whatsapp ``` -To update keys later: `memclaw configure`. +On first run a QR code is printed to your terminal. On your phone: **Settings → Linked Devices → Link a Device**, and scan it. The session persists under `~/.memclaw/whatsapp/` so you only pair once. + +Only messages you send to yourself (via WhatsApp's "Message Yourself" chat) are processed. DMs from other people and group messages are ignored. ### What it handles | Message type | What happens | |-------------|-------------| | **Text** | Agent decides: store as memory, search existing memories, or both. Links are extracted, fetched, and summarized automatically. | -| **Photo** | AI-described via vision model, stored and indexed. Agent acknowledges and responds. File ID saved for later retrieval. | +| **Photo** | AI-described via vision model, stored and indexed. Agent acknowledges and responds. Saved for later retrieval. | | **Voice** | Transcribed via Whisper, stored as text. Agent responds to the content. Links extracted. | ### Examples @@ -93,7 +115,8 @@ Here's the sprint planning whiteboard you saved last week. ```mermaid flowchart LR - You -->|text / images / links| Agent[Memclaw Agent] + TG[Telegram] <-->|text / images / links
response + images| Agent[Memclaw Agent] + WA[WhatsApp] <-->|text / images / links
response + images| Agent subgraph sandbox ["~/.memclaw/"] Agent -->|save| Tools1["memory_save
image_save
file_write"] @@ -102,8 +125,6 @@ flowchart LR DB --> Tools2["memory_search
image_search"] Tools2 -->|results| Agent end - - Agent -->|response + images| You ``` Memclaw draws inspiration from [OpenClaw](https://github.com/openclaw/openclaw)'s memory architecture and uses the [Anthropic API](https://docs.anthropic.com/) directly with a lightweight agentic loop. @@ -196,7 +217,7 @@ The image is described by an AI vision model and the description is stored and i ## Configuration -On first run, `memclaw` or `memclaw bot` will launch an interactive setup wizard that saves your keys to `~/.memclaw/.env`. Run `memclaw configure` anytime to update them. +On first run, `memclaw`, `memclaw telegram`, or `memclaw whatsapp` will launch an interactive setup wizard that saves your keys to `~/.memclaw/.env`. The wizard only prompts for keys relevant to the command you ran — run `memclaw configure` anytime to update all keys. You can also set keys via environment variables or a `.env` in the current directory — these take the usual precedence over the saved config. diff --git a/memclaw/agent.py b/memclaw/agent.py index 04657e4..9974b9a 100644 --- a/memclaw/agent.py +++ b/memclaw/agent.py @@ -89,8 +89,9 @@ class MemclawAgent: Uses the raw Anthropic Messages API with a hand-rolled agentic loop. """ - def __init__(self, config: MemclawConfig): + def __init__(self, config: MemclawConfig, platform: str | None = None): self.config = config + self.platform = platform self.store = MemoryStore(config) self.index = MemoryIndex(config) self.search = HybridSearch(config, self.index) @@ -103,6 +104,7 @@ def __init__(self, config: MemclawConfig): index=self.index, search=self.search, found_images=self._found_images, + platform=platform, ) # ── Startup / sync ─────────────────────────────────────────────── diff --git a/memclaw/bot/handlers.py b/memclaw/bot/handlers.py index 4a147ad..c1b9407 100644 --- a/memclaw/bot/handlers.py +++ b/memclaw/bot/handlers.py @@ -39,7 +39,7 @@ class MessageHandlers: def __init__(self, config: MemclawConfig, openai_client: AsyncOpenAI): self.config = config self.openai_client = openai_client - self.agent = MemclawAgent(config) + self.agent = MemclawAgent(config, platform="telegram") self.link_processor = LinkProcessor(openai_client) def _check_user(self, user_id: int) -> bool: @@ -151,7 +151,7 @@ async def handle_photo(self, update: Update, context: ContextTypes.DEFAULT_TYPE) "\nThis summary has NOT been saved yet. Save it if the content is worth remembering." ) - prompt_text = f"User sent a photo. file_id={photo.file_id}" + prompt_text = f"User sent a photo. media_ref={photo.file_id}" if caption: prompt_text += f"\nCaption: {caption}" if link_info: diff --git a/memclaw/bot/whatsapp_handlers.py b/memclaw/bot/whatsapp_handlers.py new file mode 100644 index 0000000..dd31245 --- /dev/null +++ b/memclaw/bot/whatsapp_handlers.py @@ -0,0 +1,272 @@ +"""WhatsApp bot handlers for Memclaw (personal account via WhatsApp Web). + +Uses `neonize` (whatsmeow Go library under the hood) to connect to the user's +personal WhatsApp account via QR code pairing — no Meta Business account, +no webhooks, no public server. + +Every incoming message (text, image, voice) goes through the unified +MemclawAgent, which decides whether to store, search, or just respond. +""" + +from __future__ import annotations + +import base64 +import uuid +from pathlib import Path + +from loguru import logger +from openai import AsyncOpenAI + +from neonize.aioze.client import NewAClient +from neonize.aioze.events import ConnectedEv, MessageEv, PairStatusEv + +from ..agent import MemclawAgent +from ..config import MemclawConfig +from .link_processor import LinkProcessor + + +class WhatsAppBot: + """Personal WhatsApp bot backed by neonize + MemclawAgent.""" + + def __init__(self, config: MemclawConfig, openai_client: AsyncOpenAI): + self.config = config + self.openai_client = openai_client + self.agent = MemclawAgent(config, platform="whatsapp") + self.link_processor = LinkProcessor(openai_client) + + self.client = NewAClient(str(config.whatsapp_session_db)) + self._register_handlers() + + # ------------------------------------------------------------------ + # Event registration + # ------------------------------------------------------------------ + + def _register_handlers(self): + @self.client.event(ConnectedEv) + async def _on_connected(_cli: NewAClient, _ev: ConnectedEv): + logger.info("WhatsApp connected") + + @self.client.event(PairStatusEv) + async def _on_pair(_cli: NewAClient, ev: PairStatusEv): + logger.info("WhatsApp paired as +{jid}", jid=ev.ID.User) + + @self.client.event(MessageEv) + async def _on_message(cli: NewAClient, ev: MessageEv): + try: + await self._route_message(cli, ev) + except Exception as exc: + logger.exception("Error handling WhatsApp message: {exc}", exc=exc) + + # ------------------------------------------------------------------ + # Access control + # ------------------------------------------------------------------ + + def _check_sender(self, ev: MessageEv) -> bool: + """Only process self-notes (your own messages to yourself).""" + source = ev.Info.MessageSource + if source.IsGroup: + return False + return source.IsFromMe + + # ------------------------------------------------------------------ + # Message routing + # ------------------------------------------------------------------ + + async def _route_message(self, cli: NewAClient, ev: MessageEv): + if not self._check_sender(ev): + return + + msg = ev.Message + + if msg.imageMessage.ListFields(): + await self._handle_image(cli, ev) + return + if msg.audioMessage.ListFields(): + await self._handle_audio(cli, ev) + return + + text = msg.conversation or msg.extendedTextMessage.text + if text: + await self._handle_text(cli, ev, text) + else: + logger.debug("Ignoring unsupported WhatsApp message type") + + # ------------------------------------------------------------------ + # Text messages + # ------------------------------------------------------------------ + + async def _handle_text(self, cli: NewAClient, ev: MessageEv, text: str): + sender = ev.Info.MessageSource.Sender.User + logger.info("WhatsApp text from {s}: {t}", s=sender, t=text[:100]) + + prompt_parts = [text] + links = await self.link_processor.process_links(text) + for link in links: + if link.get("summary"): + prompt_parts.append( + f"\n[Link summary] {link['url']}: {link['summary']}" + "\nThis summary has NOT been saved yet. Save it if the content is worth remembering." + ) + + prompt = "\n".join(prompt_parts) + response_text, found_images = await self.agent.handle(prompt) + await self._send_response(cli, ev, response_text, found_images) + + # ------------------------------------------------------------------ + # Image messages + # ------------------------------------------------------------------ + + async def _handle_image(self, cli: NewAClient, ev: MessageEv): + img_msg = ev.Message.imageMessage + caption = img_msg.caption or "" + sender = ev.Info.MessageSource.Sender.User + logger.info("WhatsApp image from {s}, caption={c!r}", s=sender, c=caption) + + try: + image_bytes = await cli.download_any(ev.Message) + except Exception as exc: + logger.error("Failed to download WhatsApp image: {exc}", exc=exc) + await cli.reply_message("Sorry, I couldn't download that image.", ev) + return + + mime_type = img_msg.mimetype or "image/jpeg" + ext = _mime_to_ext(mime_type) or ".jpg" + local_path = self.config.whatsapp_media_dir / f"{uuid.uuid4().hex}{ext}" + local_path.write_bytes(image_bytes) + + base64_image = base64.b64encode(image_bytes).decode("utf-8") + + link_info = "" + if caption: + links = await self.link_processor.process_links(caption) + for link in links: + if link.get("summary"): + link_info += ( + f"\n[Link summary] {link['url']}: {link['summary']}" + "\nThis summary has NOT been saved yet. Save it if the content is worth remembering." + ) + + prompt_text = f"User sent a photo. media_ref={local_path}" + if caption: + prompt_text += f"\nCaption: {caption}" + if link_info: + prompt_text += link_info + + media_type = mime_type if mime_type.startswith("image/") else "image/jpeg" + response_text, found_images = await self.agent.handle( + prompt_text, image_b64=base64_image, image_media_type=media_type, + ) + await self._send_response(cli, ev, response_text, found_images) + + # ------------------------------------------------------------------ + # Audio / voice messages + # ------------------------------------------------------------------ + + async def _handle_audio(self, cli: NewAClient, ev: MessageEv): + audio_msg = ev.Message.audioMessage + sender = ev.Info.MessageSource.Sender.User + logger.info("WhatsApp voice/audio from {s}", s=sender) + + try: + audio_bytes = await cli.download_any(ev.Message) + except Exception as exc: + logger.error("Failed to download WhatsApp audio: {exc}", exc=exc) + await cli.reply_message("Sorry, I couldn't download that audio message.", ev) + return + + mime_type = audio_msg.mimetype or "audio/ogg" + ext = _mime_to_ext(mime_type) or ".ogg" + + transcription = await self.openai_client.audio.transcriptions.create( + model="whisper-1", + file=(f"voice{ext}", audio_bytes, mime_type), + ) + text = transcription.text + logger.debug("Transcribed WhatsApp voice: {t}", t=text[:100]) + + link_info = "" + links = await self.link_processor.process_links(text) + for link in links: + if link.get("summary"): + link_info += ( + f"\n[Link summary] {link['url']}: {link['summary']}" + "\nThis summary has NOT been saved yet. Save it if the content is worth remembering." + ) + + prompt = ( + f"[Voice message] {text}" + "\nThis transcription has NOT been saved yet. Save it if the content is worth remembering." + f"{link_info}" + ) + response_text, found_images = await self.agent.handle(prompt) + await self._send_response(cli, ev, response_text, found_images) + + # ------------------------------------------------------------------ + # Sending replies + # ------------------------------------------------------------------ + + async def _send_response( + self, + cli: NewAClient, + ev: MessageEv, + response_text: str, + found_images: list[dict], + ): + """Send agent response: images first, then text.""" + chat = ev.Info.MessageSource.Chat + + for img in found_images: + platform = img.get("platform", "telegram") + media_ref = img.get("media_ref") or img.get("file_id", "") + caption = img.get("caption") or None + + if platform == "whatsapp" and media_ref and Path(media_ref).exists(): + try: + await cli.send_image(chat, media_ref, caption=caption) + except Exception as exc: + logger.error("Failed to send WhatsApp image: {exc}", exc=exc) + else: + desc = img.get("description", "an image") + note = f"(Found image: {desc} — originally saved via {platform})" + response_text = f"{response_text}\n\n{note}" if response_text else note + + if response_text: + try: + await cli.send_message(chat, response_text) + except Exception as exc: + logger.error("Failed to send WhatsApp message: {exc}", exc=exc) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + async def start(self): + """Connect to WhatsApp. On first run, a QR code is printed to stdout.""" + await self.agent.start() + await self.agent.start_background_sync(interval=60) + await self.client.connect() + await self.client.idle() + + def close(self): + self.agent.close() + + +# ------------------------------------------------------------------ +# Utilities +# ------------------------------------------------------------------ + +_MIME_TO_EXT = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/webp": ".webp", + "image/gif": ".gif", + "audio/ogg": ".ogg", + "audio/ogg; codecs=opus": ".ogg", + "audio/mpeg": ".mp3", + "audio/mp4": ".m4a", + "audio/aac": ".aac", +} + + +def _mime_to_ext(mime_type: str) -> str: + return _MIME_TO_EXT.get(mime_type, _MIME_TO_EXT.get(mime_type.split(";")[0].strip(), "")) diff --git a/memclaw/cli.py b/memclaw/cli.py index 499c7bd..e466148 100644 --- a/memclaw/cli.py +++ b/memclaw/cli.py @@ -17,10 +17,13 @@ console = Console() -def _ensure_setup(ctx): - """Run first-time setup if ~/.memclaw/.env doesn't exist, then reload config.""" +def _ensure_setup(ctx, channel: str | None = None): + """Run first-time setup if ~/.memclaw/.env doesn't exist, then reload config. + + `channel` scopes which optional keys are prompted for (e.g. "telegram"). + """ if needs_setup(): - run_setup() + run_setup(channel=channel) # Reload .env so newly saved keys are picked up from dotenv import load_dotenv load_dotenv(Path.home() / ".memclaw" / ".env", override=True) @@ -267,7 +270,7 @@ def configure(ctx): @cli.command() @click.pass_context -def bot(ctx): +def telegram(ctx): """Start the Memclaw Telegram bot.""" import sys @@ -277,7 +280,7 @@ def bot(ctx): from .bot.handlers import MessageHandlers - _ensure_setup(ctx) + _ensure_setup(ctx, channel="telegram") config: MemclawConfig = ctx.obj["config"] if not config.telegram_bot_token: @@ -349,3 +352,67 @@ async def _voice(update, context): f"(allowed users: {config.allowed_user_ids_list or 'all'})" ) app.run_polling(allowed_updates=["message"]) + + +# ------------------------------------------------------------------ +# WhatsApp bot (personal account via WhatsApp Web — neonize) +# ------------------------------------------------------------------ + +@cli.command() +@click.pass_context +def whatsapp(ctx): + """Start the Memclaw WhatsApp bot using your personal account. + + On first run, a QR code is printed to the terminal — open WhatsApp on + your phone → Settings → Linked Devices → Link a Device, and scan it. + Session credentials are stored under ~/.memclaw/whatsapp/. + """ + import sys + + from loguru import logger + from openai import AsyncOpenAI + + from .bot.whatsapp_handlers import WhatsAppBot + + _ensure_setup(ctx, channel="whatsapp") + config: MemclawConfig = ctx.obj["config"] + + if not config.openai_api_key: + console.print("[red]Error:[/red] OPENAI_API_KEY is not set.") + console.print("Run [bold]memclaw configure[/bold] to set it.") + raise SystemExit(1) + + if not config.anthropic_api_key: + console.print("[red]Error:[/red] ANTHROPIC_API_KEY is not set.") + console.print("Run [bold]memclaw configure[/bold] to set it.") + raise SystemExit(1) + + # Logging + logger.remove() + logger.add(sys.stderr, level="INFO", + format="{time:HH:mm:ss} | {level:<8} | {message}") + logger.add( + str(config.memory_dir / "whatsapp.log"), + rotation="10 MB", + retention="7 days", + level="DEBUG", + ) + + openai_client = AsyncOpenAI(api_key=config.openai_api_key) + bot_ = WhatsAppBot(config, openai_client) + + console.print( + "[green]Starting Memclaw WhatsApp bot[/green] (self-notes only)" + ) + if not config.whatsapp_session_db.exists(): + console.print( + "[cyan]First run:[/cyan] a QR code will appear below. " + "Open WhatsApp → Settings → Linked Devices → Link a Device, and scan it." + ) + + try: + asyncio.run(bot_.start()) + except KeyboardInterrupt: + pass + finally: + bot_.close() diff --git a/memclaw/config.py b/memclaw/config.py index 26b46ea..0bfe355 100644 --- a/memclaw/config.py +++ b/memclaw/config.py @@ -85,6 +85,28 @@ def daily_file(self, dt: date | None = None) -> Path: dt = dt or date.today() return self.memory_subdir / f"{dt.isoformat()}.md" + @property + def images_dir(self) -> Path: + d = self.memory_dir / "images" + d.mkdir(exist_ok=True) + return d + + @property + def whatsapp_dir(self) -> Path: + d = self.memory_dir / "whatsapp" + d.mkdir(exist_ok=True) + return d + + @property + def whatsapp_session_db(self) -> Path: + return self.whatsapp_dir / "session.db" + + @property + def whatsapp_media_dir(self) -> Path: + d = self.whatsapp_dir / "media" + d.mkdir(exist_ok=True) + return d + @property def allowed_user_ids_list(self) -> list[int]: if not self.allowed_user_ids: @@ -94,3 +116,4 @@ def allowed_user_ids_list(self) -> list[int]: for uid in self.allowed_user_ids.split(",") if uid.strip() ] + diff --git a/memclaw/defaults/AGENTS.md b/memclaw/defaults/AGENTS.md index a14a29c..6898243 100644 --- a/memclaw/defaults/AGENTS.md +++ b/memclaw/defaults/AGENTS.md @@ -6,10 +6,9 @@ You are Memclaw, a personal memory assistant. You help users store and retrieve 1. **Store**: When the user shares information worth remembering — save it using memory_save. Briefly confirm what you saved. 2. **Search**: When the user asks a question or wants to recall something — search using memory_search. Present results clearly with dates. -3. **Images (local file)**: When the user provides a local image file path, describe and save it with image_save. -4. **Images (Telegram)**: When you see an image with a file_id, describe what you see in detail and save using telegram_image_save. -5. **Image retrieval**: When the user asks to find an image — use image_search. The image will be sent automatically. -6. **Conversation**: Sometimes the user just wants to chat. Respond naturally. If they mention something worth remembering, save it too. +3. **Images**: When you see an image with a media_ref (from a messaging channel or a local path), describe what you see in detail and save using image_save. Pass the media_ref verbatim. +4. **Image retrieval**: When the user asks to find an image — use image_search. The image will be sent automatically. +5. **Conversation**: Sometimes the user just wants to chat. Respond naturally. If they mention something worth remembering, save it too. ## Storage guidelines diff --git a/memclaw/index.py b/memclaw/index.py index 80b83e3..62fbb35 100644 --- a/memclaw/index.py +++ b/memclaw/index.py @@ -106,8 +106,39 @@ def _init_db(self): created_at TEXT DEFAULT (datetime('now')) ) """) + + # Platform-agnostic image registry (Telegram, WhatsApp, etc.) + self.db.execute(""" + CREATE TABLE IF NOT EXISTS platform_images ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + platform TEXT NOT NULL, + media_ref TEXT NOT NULL, + description TEXT NOT NULL, + caption TEXT, + embedding BLOB, + created_at TEXT DEFAULT (datetime('now')) + ) + """) + + self._migrate_telegram_images() self.db.commit() + def _migrate_telegram_images(self): + """One-time migration: copy telegram_images rows into platform_images.""" + existing = self.db.execute( + "SELECT COUNT(*) FROM platform_images WHERE platform = 'telegram'" + ).fetchone()[0] + if existing > 0: + return + telegram_count = self.db.execute("SELECT COUNT(*) FROM telegram_images").fetchone()[0] + if telegram_count == 0: + return + self.db.execute(""" + INSERT INTO platform_images (platform, media_ref, description, caption, embedding, created_at) + SELECT 'telegram', file_id, description, caption, embedding, created_at + FROM telegram_images + """) + @property def openai(self) -> AsyncOpenAI: if self._openai is None: @@ -308,7 +339,7 @@ async def sync(self) -> bool: return changed # ------------------------------------------------------------------ - # Telegram image registry + # Telegram image registry (backward-compatible wrappers) # ------------------------------------------------------------------ async def store_telegram_image( @@ -324,33 +355,74 @@ async def store_telegram_image( "VALUES (?, ?, ?, ?)", (file_id, description, caption, self.serialize_embedding(embedding)), ) - self.db.commit() + await self.store_platform_image("telegram", file_id, description, caption, _skip_embed=True, _embedding=embedding) def search_telegram_images( self, query_embedding: np.ndarray, limit: int = 5 ) -> list[dict]: """Vector search over stored Telegram images. Returns dicts with file_id.""" - rows = self.db.execute( - "SELECT id, file_id, description, caption, embedding, created_at " - "FROM telegram_images WHERE embedding IS NOT NULL" - ).fetchall() + return self.search_platform_images(query_embedding, limit=limit, platform="telegram") + + # ------------------------------------------------------------------ + # Platform-agnostic image registry + # ------------------------------------------------------------------ + + async def store_platform_image( + self, + platform: str, + media_ref: str, + description: str, + caption: str | None = None, + *, + _skip_embed: bool = False, + _embedding: np.ndarray | None = None, + ): + """Store an image reference for any platform (telegram, whatsapp, etc.).""" + embedding = _embedding if _skip_embed else await self.get_embedding(description) + self.db.execute( + "INSERT INTO platform_images (platform, media_ref, description, caption, embedding) " + "VALUES (?, ?, ?, ?, ?)", + (platform, media_ref, description, caption, self.serialize_embedding(embedding)), + ) + self.db.commit() + + def search_platform_images( + self, + query_embedding: np.ndarray, + limit: int = 5, + platform: str | None = None, + ) -> list[dict]: + """Vector search over stored images. Optionally filter by platform.""" + if platform: + rows = self.db.execute( + "SELECT id, platform, media_ref, description, caption, embedding, created_at " + "FROM platform_images WHERE embedding IS NOT NULL AND platform = ?", + (platform,), + ).fetchall() + else: + rows = self.db.execute( + "SELECT id, platform, media_ref, description, caption, embedding, created_at " + "FROM platform_images WHERE embedding IS NOT NULL" + ).fetchall() if not rows: return [] results = [] for row in rows: - stored_emb = self.deserialize_embedding(row[4]) + stored_emb = self.deserialize_embedding(row[5]) similarity = float( np.dot(query_embedding, stored_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(stored_emb) + 1e-8) ) results.append({ "id": row[0], - "file_id": row[1], - "description": row[2], - "caption": row[3], - "created_at": row[5], + "platform": row[1], + "media_ref": row[2], + "file_id": row[2], # backward compat alias + "description": row[3], + "caption": row[4], + "created_at": row[6], "score": similarity, }) diff --git a/memclaw/setup.py b/memclaw/setup.py index e876cce..003f172 100644 --- a/memclaw/setup.py +++ b/memclaw/setup.py @@ -12,12 +12,14 @@ ENV_FILE = Path.home() / ".memclaw" / ".env" -# Keys in the order they are prompted -KEYS = [ - ("OPENAI_API_KEY", "OpenAI API key", True), - ("ANTHROPIC_API_KEY", "Anthropic API key", True), - ("TELEGRAM_BOT_TOKEN", "Telegram bot token", False), - ("ALLOWED_USER_IDS", "Allowed Telegram user IDs (comma-separated)", False), +# Keys in the order they are prompted. +# `channel` is None for always-asked keys, or a channel name (e.g. "telegram") +# for keys that are only relevant to that bot command. +KEYS: list[tuple[str, str, bool, str | None]] = [ + ("OPENAI_API_KEY", "OpenAI API key", True, None), + ("ANTHROPIC_API_KEY", "Anthropic API key", True, None), + ("TELEGRAM_BOT_TOKEN", "Telegram bot token", False, "telegram"), + ("ALLOWED_USER_IDS", "Allowed Telegram user IDs (comma-separated)", False, "telegram"), ] @@ -48,11 +50,13 @@ def needs_setup() -> bool: return not ENV_FILE.exists() -def run_setup(*, reconfigure: bool = False) -> None: +def run_setup(*, reconfigure: bool = False, channel: str | None = None) -> None: """Run the interactive setup wizard. Args: - reconfigure: If True, show existing values and allow updating. + reconfigure: If True, show existing values and allow updating all keys. + channel: If set (e.g. "telegram"), only prompt for always-asked keys + plus keys scoped to that channel. Ignored when reconfiguring. """ existing = _load_existing() @@ -77,15 +81,21 @@ def run_setup(*, reconfigure: bool = False) -> None: ) ) - values: dict[str, str] = {} + # Start from any previously-saved values so channel-scoped keys that we + # skip this round are preserved. + values: dict[str, str] = dict(existing) + + for env_key, label, required, key_channel in KEYS: + # Skip channel-scoped keys that don't match this invocation (unless + # the user is explicitly reconfiguring, in which case show all). + if not reconfigure and key_channel is not None and key_channel != channel: + continue - for env_key, label, required in KEYS: current = existing.get(env_key, "") masked = _mask(current) if reconfigure and current: - default_display = masked - prompt_text = f"{label} [{default_display}]" + prompt_text = f"{label} [{masked}]" elif required: prompt_text = f"{label} (required)" else: diff --git a/memclaw/tools.py b/memclaw/tools.py index 631fcf6..4c36dce 100644 --- a/memclaw/tools.py +++ b/memclaw/tools.py @@ -57,34 +57,22 @@ { "name": "image_save", "description": ( - "Save a local image by generating an AI description and storing it as " - "a memory. You can see the image — describe it yourself and pass your " - "description as content." - ), - "input_schema": { - "type": "object", - "properties": { - "image_path": {"type": "string", "description": "Path to the image file"}, - "caption": {"type": "string", "description": "Optional caption"}, - }, - "required": ["image_path"], - }, - }, - { - "name": "telegram_image_save", - "description": ( - "Save a Telegram image with your description for later retrieval. " - "You MUST call this when you receive an image with a file_id. Describe " - "the image in detail and pass the description along with the file_id." + "Save an image with your description so it can be retrieved later. " + "You MUST call this when you receive an image with a media_ref in " + "the prompt, or when the user shares a local image path. Describe " + "the image in detail and pass the media_ref verbatim." ), "input_schema": { "type": "object", "properties": { "description": {"type": "string", "description": "Detailed image description"}, - "file_id": {"type": "string", "description": "Telegram file_id"}, + "media_ref": { + "type": "string", + "description": "Opaque reference (file_id, local path, etc.) from the prompt", + }, "caption": {"type": "string", "description": "Optional caption"}, }, - "required": ["description", "file_id"], + "required": ["description", "media_ref"], }, }, { @@ -195,18 +183,19 @@ def __init__( index: MemoryIndex, search: HybridSearch, found_images: list[dict], + platform: str | None = None, ): self.config = config self.store = store self.index = index self.search = search self.found_images = found_images + self.platform = platform self._dispatch: dict[str, Any] = { "memory_save": self._memory_save, "memory_search": self._memory_search, "image_save": self._image_save, - "telegram_image_save": self._telegram_image_save, "image_search": self._image_search, "update_instructions": self._update_instructions, "file_write": self._file_write, @@ -247,38 +236,31 @@ async def _memory_search(self, args: dict) -> str: return formatted async def _image_save(self, args: dict) -> str: - image_path = Path(args["image_path"]).expanduser().resolve() - caption: str = args.get("caption", "") - if not image_path.exists(): - logger.info(" → image_save result: not found {path}", path=image_path) - return f"Image not found: {image_path}" - memory_content = f"**Image:** {image_path.name}\n" - if caption: - memory_content += f"**Caption:** {caption}\n" - memory_content += f"**Path:** {image_path}\n" - file_path = self.store.save(memory_content, entry_type="image") - await self.index.index_file(file_path) - logger.info(" → image_save result: saved {name}", name=image_path.name) - return f"Image saved from {image_path.name}" - - async def _telegram_image_save(self, args: dict) -> str: description: str = args["description"] - file_id: str = args["file_id"] + media_ref: str = args["media_ref"] caption: str = args.get("caption", "") + platform = self.platform or "local" + combined = f"Image: {description}" if caption: combined += f" Caption: {caption}" - file_path = self.store.save(combined, entry_type="image") - await self.index.index_file(file_path) - await self.index.store_telegram_image( - file_id=file_id, description=combined, caption=caption, + store_path = self.store.save(combined, entry_type="image") + await self.index.index_file(store_path) + await self.index.store_platform_image( + platform=platform, + media_ref=media_ref, + description=combined, + caption=caption, + ) + logger.info( + " → image_save({platform}) result: {desc}", + platform=platform, desc=description[:100], ) - logger.info(" → telegram_image_save result: {desc}", desc=description[:100]) return f"Image saved: {description[:100]}" async def _image_search(self, args: dict) -> str: query_emb = await self.index.get_embedding(args["query"]) - candidates = self.index.search_telegram_images(query_emb, limit=5) + candidates = self.index.search_platform_images(query_emb, limit=5) if candidates: best_score = candidates[0]["score"] threshold = best_score * 0.9 diff --git a/pyproject.toml b/pyproject.toml index 87fc23a..d70c16e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "beautifulsoup4>=4.12.0", "loguru>=0.7.0", "python-dotenv>=1.0.0", + "neonize>=0.3.16", ] [project.scripts] diff --git a/tests/test_agent.py b/tests/test_agent.py index dce4adc..a111ad6 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -476,14 +476,14 @@ def test_tool_definitions_contain_file_tools(self): assert "file_read" in names def test_tool_definitions_contain_all_tools(self): - """TOOL_DEFINITIONS should have all 8 tools.""" + """TOOL_DEFINITIONS should expose the full catalog.""" from memclaw.tools import TOOL_DEFINITIONS names = [t["name"] for t in TOOL_DEFINITIONS] - assert len(names) == 8 expected = { - "memory_save", "memory_search", "image_save", - "telegram_image_save", "image_search", + "memory_save", "memory_search", + "image_save", "image_search", "update_instructions", "file_write", "file_read", } assert set(names) == expected + assert len(names) == len(expected)