diff --git a/config.example.yaml b/config.example.yaml index 3fe3da2..34bb5c8 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -26,8 +26,10 @@ outies: # 5. Click "Copy ID" - outie_id: outie_user_id topics: - - name: "Name of the outie" - role: "Role of the outie" + - name: "Name of the topic" + role: | + This is the full description of the role for the bot to play during the conversation. + This is the "system prompt" that will be used to set the context for the bot. # Path to the directory containing the documents to be processed docs_dir: "./data/documents" channels: @@ -45,8 +47,9 @@ outies: # 3. Click "Copy ID" # 4. Paste the ID below channel_id: discord_channel_id - - name: "Name of the outie" - role: "Role of the outie" + - name: "Name of the topic" + role: | + System prompt for the bot to play during the conversation. docs_dir: "./data/documents2" channels: - guild_id: discord_server_id diff --git a/src/innieme/discord_bot.py b/src/innieme/discord_bot.py index 2f6fed7..40cd3af 100644 --- a/src/innieme/discord_bot.py +++ b/src/innieme/discord_bot.py @@ -56,7 +56,7 @@ async def quit(ctx): if ctx.author.id != topic_outie: outie_name = getattr(ctx.guild.get_member(topic_outie), 'display_name', 'unknown') await ctx.send(f"This command is only available to the outie ({outie_name}).") - return + return await ctx.send("Goodbye! Bot shutting down...") await self.bot.close() @@ -151,38 +151,41 @@ async def on_ready(self): for guild in self.bot.guilds: print(f"- {guild.name} (ID: {guild.id})") - if not self.channels: - print("No channels configured for this bot.") - return - innie = self.innies[0] - outie_id = innie.outie_config.outie_id - topic = innie.topics[0] - channel = topic.config.channels[0] - guild_id = channel.guild_id - channel_id = channel.channel_id - # Connect to specific guild/server - guild = self.bot.get_guild(guild_id) - if not guild: - print(f"Could not connect to server with ID: {guild_id}") - print("Please make sure the bot has been invited to this server.") - print("Invite URL: https://discord.com/api/oauth2/authorize?client_id=1356846600692957315&permissions=377957210176&scope=bot") - return - # Get channel within the guild - channel = guild.get_channel(channel_id) - if not isinstance(channel, discord.TextChannel): - print(f"Channel with ID: {channel_id} is not a text channel.") - channel = None - outie_member = guild.get_member(outie_id) - if not channel: - if outie_member: - await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {channel_id}") + for innie in self.innies: + for topic in innie.topics: + await self.connect_and_prepare(topic) + + async def connect_and_prepare(self, topic: Topic): + outie_id = topic.outie_config.outie_id + channels = [] + for channel in topic.config.channels: + guild_id = channel.guild_id + channel_id = channel.channel_id + # Connect to specific guild/server + guild = self.bot.get_guild(guild_id) + if not guild: + print(f"Could not connect to server with ID: {guild_id}") + print("Please make sure the bot has been invited to this server.") + print("Invite URL: https://discord.com/api/oauth2/authorize?client_id=1356846600692957315&permissions=377957210176&scope=bot") + return + # Get channel within the guild + channel = guild.get_channel(channel_id) + if not isinstance(channel, discord.TextChannel): + print(f"Channel with ID: {channel_id} is not a text channel.") + channel = None + outie_member = guild.get_member(outie_id) + if not channel: + if outie_member: + await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {channel_id}") + else: + print(f"Could not find channel with ID: {channel_id} in server {guild.name} or outie user {outie_id}.") else: - print(f"Could not find channel with ID: {channel_id} in server {guild.name} or outie user {outie_id}.") - return - await channel.send(f"Bot {self.bot.user} is connected, preparing documents...") + channels.append((channel, outie_member)) + await channel.send(f"Bot {self.bot.user} is connected, preparing documents for {topic.config.name}...") scanning_result = await topic.scan_and_vectorize() - mention = f"(fyi <@{outie_id}>)" if outie_member else f"(no outie user {outie_id})" - await channel.send(f"{scanning_result} {mention}") + for channel, outie_member in channels: + mention = f"(fyi <@{outie_id}>)" if outie_member else f"(no outie user {outie_id})" + await channel.send(f"{scanning_result} {mention}") async def on_message(self, message): """Event handler for when a message is received""" diff --git a/src/innieme/document_processor.py b/src/innieme/document_processor.py index 5e1b1c3..8f5294e 100644 --- a/src/innieme/document_processor.py +++ b/src/innieme/document_processor.py @@ -64,7 +64,7 @@ def _create_empty_store(self): ) - async def scan_and_vectorize(self): + async def scan_and_vectorize(self, topic_name:str) -> str: """Scan all documents in the specified directory and create vector embeddings""" document_texts = [] @@ -73,7 +73,7 @@ async def scan_and_vectorize(self): for ext in ['*.pdf', '*.docx', '*.txt', '*.md']: files.extend(glob.glob(os.path.join(self.docs_dir, '**', ext), recursive=True)) - print(f"Found {len(files)} documents to process under {self.docs_dir}...") + print(f"For {topic_name}: Found {len(files)} documents to process under {self.docs_dir}...") # Process each file based on its type count = 0 for file_path in files: @@ -98,11 +98,11 @@ async def scan_and_vectorize(self): response = "" if not texts: self.vectorstore = self._create_empty_store() - response = "no documents found to process" + response = f"On topic '{topic_name}': no documents found to process" else: metadatas = [{"source": chunk["source"]} for chunk in all_chunks] self.vectorstore = FAISS.from_texts(texts, self.embeddings, metadatas=metadatas) - response = f"{len(all_chunks)} chunks created from {count} out of {len(files)} references" + response = f"On topic '{topic_name}': {len(all_chunks)} chunks created from {count} out of {len(files)} references" return response async def _extract_text(self, file_path): diff --git a/src/innieme/innie.py b/src/innieme/innie.py index a9f201e..6cb242c 100644 --- a/src/innieme/innie.py +++ b/src/innieme/innie.py @@ -32,7 +32,7 @@ async def process_query(self, query:str, thread_id:int, context_messages:list[di return await self.conversation_engine.process_query(query, thread_id, context_messages) async def scan_and_vectorize(self) -> str: - return await self.document_processor.scan_and_vectorize() + return await self.document_processor.scan_and_vectorize(self.config.name) async def generate_summary(self, thread_id) -> str: return await self.knowledge_manager.generate_summary(thread_id) diff --git a/tests/test_document_processor.py b/tests/test_document_processor.py index f7e9f7f..df8b21e 100644 --- a/tests/test_document_processor.py +++ b/tests/test_document_processor.py @@ -1,6 +1,4 @@ -import os import pytest -import asyncio from innieme.document_processor import DocumentProcessor # Test data directory @@ -21,7 +19,7 @@ def sample_txt_file(test_docs_dir): return file_path @pytest.fixture -def document_processor(test_docs_dir): +def document_processor(test_docs_dir) -> DocumentProcessor: """Create a DocumentProcessor instance for testing""" return DocumentProcessor(str(test_docs_dir)) @@ -36,8 +34,8 @@ async def test_extract_from_txt(document_processor, sample_txt_file): @pytest.mark.asyncio async def test_scan_and_vectorize_empty_dir(document_processor): """Test scanning an empty directory""" - result = await document_processor.scan_and_vectorize() - assert result == "no documents found to process" + result = await document_processor.scan_and_vectorize(topic_name="test_topic") + assert result == "On topic 'test_topic': no documents found to process" assert document_processor.vectorstore is not None @pytest.mark.asyncio @@ -50,7 +48,7 @@ async def test_search_documents_empty_vectorstore(document_processor): async def test_search_documents_with_data(document_processor, sample_txt_file): """Test searching after processing documents""" # First scan and vectorize - await document_processor.scan_and_vectorize() + await document_processor.scan_and_vectorize(topic_name="test_topic") # Then search results = await document_processor.search_documents("test document")