Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ outies:
# 5. Click "Copy ID"
- outie_id: outie_user_id
topics:
- name: "Name of the outie"
role: "Role of the outie"
- name: "Name of the topic"
role: |
This is the full description of the role for the bot to play during the conversation.
This is the "system prompt" that will be used to set the context for the bot.
# Path to the directory containing the documents to be processed
docs_dir: "./data/documents"
channels:
Expand All @@ -45,8 +47,9 @@ outies:
# 3. Click "Copy ID"
# 4. Paste the ID below
channel_id: discord_channel_id
- name: "Name of the outie"
role: "Role of the outie"
- name: "Name of the topic"
role: |
System prompt for the bot to play during the conversation.
docs_dir: "./data/documents2"
channels:
- guild_id: discord_server_id
Expand Down
65 changes: 34 additions & 31 deletions src/innieme/discord_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ async def quit(ctx):
if ctx.author.id != topic_outie:
outie_name = getattr(ctx.guild.get_member(topic_outie), 'display_name', 'unknown')
await ctx.send(f"This command is only available to the outie ({outie_name}).")
return
return
await ctx.send("Goodbye! Bot shutting down...")
await self.bot.close()

Expand Down Expand Up @@ -151,38 +151,41 @@ async def on_ready(self):
for guild in self.bot.guilds:
print(f"- {guild.name} (ID: {guild.id})")

if not self.channels:
print("No channels configured for this bot.")
return
innie = self.innies[0]
outie_id = innie.outie_config.outie_id
topic = innie.topics[0]
channel = topic.config.channels[0]
guild_id = channel.guild_id
channel_id = channel.channel_id
# Connect to specific guild/server
guild = self.bot.get_guild(guild_id)
if not guild:
print(f"Could not connect to server with ID: {guild_id}")
print("Please make sure the bot has been invited to this server.")
print("Invite URL: https://discord.com/api/oauth2/authorize?client_id=1356846600692957315&permissions=377957210176&scope=bot")
return
# Get channel within the guild
channel = guild.get_channel(channel_id)
if not isinstance(channel, discord.TextChannel):
print(f"Channel with ID: {channel_id} is not a text channel.")
channel = None
outie_member = guild.get_member(outie_id)
if not channel:
if outie_member:
await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {channel_id}")
for innie in self.innies:
for topic in innie.topics:
await self.connect_and_prepare(topic)

async def connect_and_prepare(self, topic: Topic):
outie_id = topic.outie_config.outie_id
channels = []
for channel in topic.config.channels:
guild_id = channel.guild_id
channel_id = channel.channel_id
# Connect to specific guild/server
guild = self.bot.get_guild(guild_id)
if not guild:
print(f"Could not connect to server with ID: {guild_id}")
print("Please make sure the bot has been invited to this server.")
print("Invite URL: https://discord.com/api/oauth2/authorize?client_id=1356846600692957315&permissions=377957210176&scope=bot")
return
# Get channel within the guild
channel = guild.get_channel(channel_id)
if not isinstance(channel, discord.TextChannel):
print(f"Channel with ID: {channel_id} is not a text channel.")
channel = None
outie_member = guild.get_member(outie_id)
if not channel:
if outie_member:
await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {channel_id}")
else:
print(f"Could not find channel with ID: {channel_id} in server {guild.name} or outie user {outie_id}.")
else:
print(f"Could not find channel with ID: {channel_id} in server {guild.name} or outie user {outie_id}.")
return
await channel.send(f"Bot {self.bot.user} is connected, preparing documents...")
channels.append((channel, outie_member))
await channel.send(f"Bot {self.bot.user} is connected, preparing documents for {topic.config.name}...")
scanning_result = await topic.scan_and_vectorize()
mention = f"(fyi <@{outie_id}>)" if outie_member else f"(no outie user {outie_id})"
await channel.send(f"{scanning_result} {mention}")
for channel, outie_member in channels:
mention = f"(fyi <@{outie_id}>)" if outie_member else f"(no outie user {outie_id})"
await channel.send(f"{scanning_result} {mention}")

async def on_message(self, message):
"""Event handler for when a message is received"""
Expand Down
8 changes: 4 additions & 4 deletions src/innieme/document_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _create_empty_store(self):
)


async def scan_and_vectorize(self):
async def scan_and_vectorize(self, topic_name:str) -> str:
"""Scan all documents in the specified directory and create vector embeddings"""
document_texts = []

Expand All @@ -73,7 +73,7 @@ async def scan_and_vectorize(self):
for ext in ['*.pdf', '*.docx', '*.txt', '*.md']:
files.extend(glob.glob(os.path.join(self.docs_dir, '**', ext), recursive=True))

print(f"Found {len(files)} documents to process under {self.docs_dir}...")
print(f"For {topic_name}: Found {len(files)} documents to process under {self.docs_dir}...")
# Process each file based on its type
count = 0
for file_path in files:
Expand All @@ -98,11 +98,11 @@ async def scan_and_vectorize(self):
response = ""
if not texts:
self.vectorstore = self._create_empty_store()
response = "no documents found to process"
response = f"On topic '{topic_name}': no documents found to process"
else:
metadatas = [{"source": chunk["source"]} for chunk in all_chunks]
self.vectorstore = FAISS.from_texts(texts, self.embeddings, metadatas=metadatas)
response = f"{len(all_chunks)} chunks created from {count} out of {len(files)} references"
response = f"On topic '{topic_name}': {len(all_chunks)} chunks created from {count} out of {len(files)} references"
return response

async def _extract_text(self, file_path):
Expand Down
2 changes: 1 addition & 1 deletion src/innieme/innie.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def process_query(self, query:str, thread_id:int, context_messages:list[di
return await self.conversation_engine.process_query(query, thread_id, context_messages)

async def scan_and_vectorize(self) -> str:
return await self.document_processor.scan_and_vectorize()
return await self.document_processor.scan_and_vectorize(self.config.name)

async def generate_summary(self, thread_id) -> str:
return await self.knowledge_manager.generate_summary(thread_id)
Expand Down
10 changes: 4 additions & 6 deletions tests/test_document_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import pytest
import asyncio
from innieme.document_processor import DocumentProcessor

# Test data directory
Expand All @@ -21,7 +19,7 @@ def sample_txt_file(test_docs_dir):
return file_path

@pytest.fixture
def document_processor(test_docs_dir):
def document_processor(test_docs_dir) -> DocumentProcessor:
"""Create a DocumentProcessor instance for testing"""
return DocumentProcessor(str(test_docs_dir))

Expand All @@ -36,8 +34,8 @@ async def test_extract_from_txt(document_processor, sample_txt_file):
@pytest.mark.asyncio
async def test_scan_and_vectorize_empty_dir(document_processor):
"""Test scanning an empty directory"""
result = await document_processor.scan_and_vectorize()
assert result == "no documents found to process"
result = await document_processor.scan_and_vectorize(topic_name="test_topic")
assert result == "On topic 'test_topic': no documents found to process"
assert document_processor.vectorstore is not None

@pytest.mark.asyncio
Expand All @@ -50,7 +48,7 @@ async def test_search_documents_empty_vectorstore(document_processor):
async def test_search_documents_with_data(document_processor, sample_txt_file):
"""Test searching after processing documents"""
# First scan and vectorize
await document_processor.scan_and_vectorize()
await document_processor.scan_and_vectorize(topic_name="test_topic")

# Then search
results = await document_processor.search_documents("test document")
Expand Down