From e24e853ab2190500b5d3a422a6bb27ef6e3fa793 Mon Sep 17 00:00:00 2001 From: Shane Date: Sat, 5 Apr 2025 15:53:25 -0700 Subject: [PATCH] support the configuration by picking the first one for now. --- src/innieme/cli/run_bot.py | 14 +-- src/innieme/conversation_engine.py | 34 ++---- src/innieme/discord_bot.py | 188 ++++++++++++++++------------- src/innieme/discord_bot_config.py | 36 +++++- src/innieme/innie.py | 47 ++++++++ tests/test_discord_bot.py | 52 ++++---- tests/test_discord_bot_config.py | 76 ++++++++++++ 7 files changed, 307 insertions(+), 140 deletions(-) create mode 100644 src/innieme/innie.py create mode 100644 tests/test_discord_bot_config.py diff --git a/src/innieme/cli/run_bot.py b/src/innieme/cli/run_bot.py index d199cdf..553774e 100644 --- a/src/innieme/cli/run_bot.py +++ b/src/innieme/cli/run_bot.py @@ -1,18 +1,14 @@ -import os, yaml -from dotenv import load_dotenv +import os from innieme.discord_bot import DiscordBot from innieme.discord_bot_config import DiscordBotConfig -def load_config_from_yaml(file_path: str) -> DiscordBotConfig: - with open(file_path, "r") as yaml_file: - yaml_data = yaml.safe_load(yaml_file) - return DiscordBotConfig(**yaml_data) - # Load environment variables current_dir = os.getcwd() yaml_path = os.path.join(current_dir, 'config.yaml') -config = load_config_from_yaml(yaml_path) -print(f"Loaded config: {config}") +with open(yaml_path, "r") as yaml_file: + yaml_content = yaml_file.read() +config = DiscordBotConfig.from_yaml(yaml_content) +print(f"Loaded config from {yaml_path}") def main(): # Create and run the bot diff --git a/src/innieme/conversation_engine.py b/src/innieme/conversation_engine.py index 0e61a86..a5b643a 100644 --- a/src/innieme/conversation_engine.py +++ b/src/innieme/conversation_engine.py @@ -1,14 +1,15 @@ -import asyncio -import uuid from datetime import datetime from .document_processor import DocumentProcessor from .knowledge_manager import KnowledgeManager +from .discord_bot_config import TopicConfig from openai import AsyncOpenAI import os class ConversationEngine: - def __init__(self, document_processor:DocumentProcessor, knowledge_manager:KnowledgeManager, admin_id:int): + def __init__(self, api_key:str, topic:TopicConfig, document_processor:DocumentProcessor, knowledge_manager:KnowledgeManager, admin_id:int): + self.api_key = api_key + self.topic = topic self.document_processor = document_processor self.knowledge_manager = knowledge_manager self.admin_id = admin_id @@ -44,30 +45,21 @@ async def _generate_response(self, relevant_docs, history) -> str: relevant_docs: List of relevant document chunks from document processor history: List of previous conversation messages """ - client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) - - # Generate context from relevant documents - context = "\n\n".join([doc.page_content for doc in relevant_docs]) + client = AsyncOpenAI(api_key=self.api_key) # Format conversation history into OpenAI messages format messages = [] # Add system message with context - system_msg = ( - "You are an experienced Assistant Scoutmaster for Scouting America, " - "formerly known as BSA. You work as a caring coach with the scouts " - "who are asking questions and need quick answers. Please make your answer clear, short and " - "easy to understand, and provide official references whenever possible." - "When you need additional information, please ask at most three times before providing your best educated answer." - ) + system_msg = self.topic.role print("--------- Sent to LLM ---------") print(f"System message: {system_msg}") - if context: - system_msg += ( - f"\n\nHere is some relevant information to help answer " - f"the query:\n\n{context}" - ) - + # Generate context from relevant documents + context = "\n\n".join([doc.page_content for doc in relevant_docs]) + system_msg += ( + f"\n\nHere is some relevant information to help answer the query:" + f"\n\n{context}" + ) messages.append({"role": "system", "content": system_msg}) print(f"...(matched {len(relevant_docs)} as context)...") @@ -98,7 +90,7 @@ async def _generate_response(self, relevant_docs, history) -> str: print("------------------------------") return response - def is_following_thread(self, thread): + def is_following_thread(self, thread) -> bool: """Check if this is a thread we should be following""" return thread.id in self.active_threads diff --git a/src/innieme/discord_bot.py b/src/innieme/discord_bot.py index a53d723..2f6fed7 100644 --- a/src/innieme/discord_bot.py +++ b/src/innieme/discord_bot.py @@ -1,56 +1,45 @@ -import os +from .discord_bot_config import DiscordBotConfig +from .innie import Innie, Topic + import discord from discord.ext import commands -from .discord_bot_config import DiscordBotConfig -from .document_processor import DocumentProcessor -from .conversation_engine import ConversationEngine -from .knowledge_manager import KnowledgeManager -class DiscordBot: +from collections import defaultdict +from typing import Optional, List +import io + +class DiscordBot: def __init__(self, config: DiscordBotConfig): - """Initialize the Discord bot with the necessary components""" - self.token = config.discord_token - first_outtie = config.outies[0] - self.outie_id = first_outtie.outie_id - first_topic = first_outtie.topics[0] - self.docs_dir = first_topic.docs_dir - first_channel = first_topic.channels[0] - self.guild_id = first_channel.guild_id - self.channel_id = first_channel.channel_id - # Bot setup with required intents - self.intents = discord.Intents.default() - self.intents.message_content = True - self.intents.members = True - self.intents.guilds = True - self.bot = commands.Bot(command_prefix='!', intents=self.intents) - - # Initialize components - self.document_processor = DocumentProcessor( - self.docs_dir, - embedding_type="openai", - embedding_config={"api_key": os.getenv("OPENAI_API_KEY")} - ) - - self.knowledge_manager = KnowledgeManager() - self.conversation_engine = ConversationEngine( - self.document_processor, - self.knowledge_manager, - self.outie_id - ) - - # Store original modules for reloading - self.original_modules = {} + self.token = config.discord_token + self.bot = commands.Bot(command_prefix='!', intents=self._create_intents()) + + # Innies setup + self.innies = [Innie(config.openai_api_key, outie_config) for outie_config in config.outies] + # Channel->Topic mapping + self.channels: defaultdict[int, List[Topic]] = defaultdict(list) + for innie in self.innies: + for topic in innie.topics: + for channel_config in topic.config.channels: + self.channels[channel_config.channel_id].append(topic) # Register event handlers and commands self._register_events() self._register_commands() - + + def _create_intents(self) -> discord.Intents: + """Set up Discord intents""" + intents = discord.Intents.default() + intents.message_content = True + intents.members = True + intents.guilds = True + return intents + def _register_events(self): """Register all event handlers""" self.bot.event(self.on_ready) self.bot.event(self.on_message) - + def _register_commands(self): """Register all commands""" @self.bot.command(name='approve') @@ -59,27 +48,27 @@ async def approve(ctx): @self.bot.command(name='quit') async def quit(ctx): - if ctx.author.id != self.outie_id: - outie_name = getattr(ctx.guild.get_member(self.outie_id), 'display_name', 'unknown') - await ctx.send(f"This command is only available to the outie ({outie_name}).") + topic = self._identify_topic(ctx.channel.id) + if not topic: + await ctx.send(f"'quit' command ignored as there is no topic in this channel to support.") return - - if ctx.channel.id != self.channel_id: - channel_name = getattr(ctx.guild.get_channel(self.channel_id), 'name', 'unknown') - await ctx.send(f"This command can only be used in the bot's primary channel (#{channel_name}).") - return - + topic_outie = topic.outie_config.outie_id + if ctx.author.id != topic_outie: + outie_name = getattr(ctx.guild.get_member(topic_outie), 'display_name', 'unknown') + await ctx.send(f"This command is only available to the outie ({outie_name}).") + return await ctx.send("Goodbye! Bot shutting down...") await self.bot.close() + def _identify_topic(self, channel_id) -> Optional[Topic]: + topics = self.channels.get(channel_id, []) + return topics[0] if topics else None + + def _identify_topic_by_message(self, message) -> Optional[Topic]: + channel_id = message.channel.parent.id if message.channel.type == discord.ChannelType.public_thread else message.channel.id + return self._identify_topic(channel_id) async def _should_follow_thread(self, thread, user): - """Check if this is a thread we should be following""" - print(f"Checking thread with name: {thread.name}") - # First check the cache - if self.conversation_engine.is_following_thread(thread): - return True - print(f"Checking if thread {thread.id} should be followed") try: # Get the starter message that created the thread @@ -126,22 +115,32 @@ async def get_thread_context(self, thread, limit=10): return list(reversed(messages)) # Return in chronological order - async def process_and_respond(self, channel, query, thread_id, context_channel): + async def process_and_respond(self, topic, channel, query, thread_id, context_channel): """Process a query and respond in the channel""" context_messages = await self.get_thread_context(context_channel) if context_channel else [{ "role": "user", "content": query, }] - # Add typing indicator while processing async with channel.typing(): - response = await self.conversation_engine.process_query( - query, - thread_id, - context_messages=context_messages - ) + try: + response = await topic.process_query( + query, + thread_id, + context_messages=context_messages + ) + if len(response) > 2000: + # Create a file object with the response + file = discord.File(io.BytesIO(response.encode()), filename="response.txt") + await channel.send("Response is too long, sending as a file:", file=file) + else: + # Send as normal message if under limit + await channel.send(response) + except Exception as e: + error_message = f"Sorry, I encountered an error while processing your request: {str(e)}" + await channel.send(error_message) + raise # Re-raise the exception for logging/debugging - await channel.send(response) async def on_ready(self): """Event handler for when the bot is ready""" @@ -152,28 +151,37 @@ async def on_ready(self): for guild in self.bot.guilds: print(f"- {guild.name} (ID: {guild.id})") + if not self.channels: + print("No channels configured for this bot.") + return + innie = self.innies[0] + outie_id = innie.outie_config.outie_id + topic = innie.topics[0] + channel = topic.config.channels[0] + guild_id = channel.guild_id + channel_id = channel.channel_id # Connect to specific guild/server - guild = self.bot.get_guild(self.guild_id) + guild = self.bot.get_guild(guild_id) if not guild: - print(f"Could not connect to server with ID: {self.guild_id}") + print(f"Could not connect to server with ID: {guild_id}") print("Please make sure the bot has been invited to this server.") print("Invite URL: https://discord.com/api/oauth2/authorize?client_id=1356846600692957315&permissions=377957210176&scope=bot") return # Get channel within the guild - channel = guild.get_channel(self.channel_id) + channel = guild.get_channel(channel_id) if not isinstance(channel, discord.TextChannel): - print(f"Channel with ID: {self.channel_id} is not a text channel.") + print(f"Channel with ID: {channel_id} is not a text channel.") channel = None - outie_member = guild.get_member(self.outie_id) + outie_member = guild.get_member(outie_id) if not channel: if outie_member: - await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {self.channel_id}") + await outie_member.send(f"Bot {self.bot.user} is now online but could not find text channel with ID: {channel_id}") else: - print(f"Could not find channel with ID: {self.channel_id} in server {guild.name} or outie user {self.outie_id}.") + print(f"Could not find channel with ID: {channel_id} in server {guild.name} or outie user {outie_id}.") return await channel.send(f"Bot {self.bot.user} is connected, preparing documents...") - scanning_result = await self.document_processor.scan_and_vectorize() - mention = f"(fyi <@{self.outie_id}>)" if outie_member else f"(no outie user {self.outie_id})" + scanning_result = await topic.scan_and_vectorize() + mention = f"(fyi <@{outie_id}>)" if outie_member else f"(no outie user {outie_id})" await channel.send(f"{scanning_result} {mention}") async def on_message(self, message): @@ -182,27 +190,40 @@ async def on_message(self, message): if not self.bot.user or message.author == self.bot.user: return + topic = self._identify_topic_by_message(message) + if not topic: + return + print(f"On message, located topic: {topic.config.name}") + outie_id = topic.outie_config.outie_id + # Check if message is in a thread if message.channel.type == discord.ChannelType.public_thread: # Check if this is a thread we should be following starter_message = await message.channel.parent.fetch_message(message.channel.id) - print(f"Starter message: [{starter_message.author}]: '{starter_message.content[:50]}...'") - if self.bot.user.mentioned_in(message) or await self._should_follow_thread(message.channel, self.bot.user): + if ( + self.bot.user.mentioned_in(message) + or topic.is_following_thread(message.channel) + or await self._should_follow_thread(message.channel, self.bot.user) + ): # Get recent context from the thread await self.process_and_respond( + topic, message.channel, message.content, message.channel.id, message.channel ) return - + else: + print(f"Not responding to thread") + # Check if bot is mentioned (for starting new threads) if self.bot.user.mentioned_in(message): # Create a new thread thread = await message.create_thread(name=f"Chat with {message.author.display_name}") # Process the query and respond await self.process_and_respond( + topic, thread, message.content.replace(f'<@{self.bot.user.id}>', '').strip(), thread.id, @@ -211,24 +232,27 @@ async def on_message(self, message): return # Check for outie commands - elif message.author.id == self.outie_id and "summary and file" in message.content.lower(): + elif message.author.id == outie_id and "summary and file" in message.content.lower(): # This command should be used in a thread if message.channel.type == discord.ChannelType.public_thread: - summary = await self.knowledge_manager.generate_summary(message.channel.id) + summary = await topic.generate_summary(message.channel.id) await message.channel.send(f"Summary generated:\n\n{summary}\n\nApprove to add to knowledge base? (yes/no)") # Check for consultation requests elif "please consult outie" in message.content.lower(): if message.channel.type == discord.ChannelType.public_thread: - outie_user = self.bot.get_user(self.outie_id) - await message.channel.send(f"<@{self.outie_id}> Your consultation has been requested in this thread.") + outie_user = self.bot.get_user(outie_id) + await message.channel.send(f"<@{outie_id}> Your consultation has been requested in this thread.") await self.bot.process_commands(message) async def approve_summary(self, ctx): - """Command to approve a summary and add it to the knowledge base""" - if ctx.author.id == self.outie_id and ctx.channel.type == discord.ChannelType.public_thread: - await self.knowledge_manager.store_summary(ctx.channel.id) + topic = self._identify_topic_by_message(ctx.message) + if not topic: + return + outie_id = topic.outie_config.outie_id + if ctx.author.id == outie_id and ctx.channel.type == discord.ChannelType.public_thread: + await topic.store_summary(ctx.channel.id) await ctx.send("Summary approved and added to knowledge base.") def run(self): diff --git a/src/innieme/discord_bot_config.py b/src/innieme/discord_bot_config.py index 6c17b74..d009ed4 100644 --- a/src/innieme/discord_bot_config.py +++ b/src/innieme/discord_bot_config.py @@ -1,32 +1,51 @@ -import os -from typing import List, Dict -from pydantic import BaseModel, field_validator +import os, yaml +from typing import List, TYPE_CHECKING + +if TYPE_CHECKING: + from .discord_bot_config import TopicConfig, OutieConfig, DiscordBotConfig + +from pydantic import BaseModel, field_validator, model_validator class ChannelConfig(BaseModel): guild_id: int channel_id: int + topic: 'TopicConfig' = None # type: ignore class TopicConfig(BaseModel): name: str role: str docs_dir: str channels: List[ChannelConfig] + outie: 'OutieConfig' = None # type: ignore @field_validator('docs_dir') def docs_dir_must_exist(cls, v): if not os.path.exists(v): raise ValueError(f'Document directory does not exist: {v}') return v + + @model_validator(mode='after') + def set_back_references(self): + for channel in self.channels: + channel.topic = self + return self class OutieConfig(BaseModel): outie_id: int topics: List[TopicConfig] + bot: 'DiscordBotConfig' = None # type: ignore @field_validator('outie_id') def id_must_be_positive(cls, v): if v <= 0: raise ValueError(f'ID value must be positive, got: {v}') return v + + @model_validator(mode='after') + def set_back_references(self): + for topic in self.topics: + topic.outie = self + return self class DiscordBotConfig(BaseModel): discord_token: str @@ -38,3 +57,14 @@ def token_must_not_be_empty(cls, v): if not v: raise ValueError('Discord token cannot be empty') return v + + @model_validator(mode='after') + def set_back_references(self): + for outie in self.outies: + outie.bot = self + return self + + @classmethod + def from_yaml(cls, yaml_content: str) -> "DiscordBotConfig": + config_data = yaml.safe_load(yaml_content) + return cls(**config_data) diff --git a/src/innieme/innie.py b/src/innieme/innie.py new file mode 100644 index 0000000..a9f201e --- /dev/null +++ b/src/innieme/innie.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass +from typing import List +from functools import wraps +from .document_processor import DocumentProcessor +from .knowledge_manager import KnowledgeManager +from .conversation_engine import ConversationEngine +from .discord_bot_config import OutieConfig, TopicConfig + +class Topic: + def __init__(self, outie_config:OutieConfig, api_key:str, config: TopicConfig): + self.config = config + self.outie_config = outie_config + # Initialize components + self.document_processor = DocumentProcessor( + config.docs_dir, + embedding_type="openai", + embedding_config={"api_key": api_key} + ) + self.knowledge_manager = KnowledgeManager() + self.conversation_engine = ConversationEngine( + api_key, + config, + self.document_processor, + self.knowledge_manager, + outie_config.outie_id + ) + + def is_following_thread(self, thread) -> bool: + return self.conversation_engine.is_following_thread(thread) + + async def process_query(self, query:str, thread_id:int, context_messages:list[dict[str,str]]) -> str: + return await self.conversation_engine.process_query(query, thread_id, context_messages) + + async def scan_and_vectorize(self) -> str: + return await self.document_processor.scan_and_vectorize() + + async def generate_summary(self, thread_id) -> str: + return await self.knowledge_manager.generate_summary(thread_id) + + async def store_summary(self, thread_id) -> bool: + return await self.knowledge_manager.store_summary(thread_id) + +class Innie: + def __init__(self, api_key:str, outie_config: OutieConfig): + """Initialize an Innie instance with configuration""" + self.outie_config = outie_config + self.topics = [Topic(outie_config, api_key, topic_config) for topic_config in outie_config.topics] diff --git a/tests/test_discord_bot.py b/tests/test_discord_bot.py index 004ed30..009e4ea 100644 --- a/tests/test_discord_bot.py +++ b/tests/test_discord_bot.py @@ -1,28 +1,37 @@ -import pytest -import os -import sys - from innieme.discord_bot_config import DiscordBotConfig, OutieConfig, TopicConfig, ChannelConfig from innieme.discord_bot import DiscordBot +import os + os.environ['OPENAI_API_KEY'] = 'test_openai_key' # Create test documents directory if it doesn't exist test_docs_dir = 'data/test-documents' os.makedirs(test_docs_dir, exist_ok=True) -config = DiscordBotConfig( - discord_token='test_token', - openai_api_key='test_openai_key', - outies=[OutieConfig(outie_id=123456789, topics=[TopicConfig( - name='test_topic', - role='test_role', - docs_dir=test_docs_dir, - channels=[ChannelConfig(guild_id=123456789, channel_id=987654321)] - )])] +bot_config = DiscordBotConfig( + discord_token="test_token", + openai_api_key="test_key", + outies=[] +) +outie_config = OutieConfig( + outie_id=123, + topics=[], + bot=bot_config ) +bot_config.outies.append(outie_config) +topic_config = TopicConfig( + name="test_topic", + role="test_role", + docs_dir=test_docs_dir, + channels=[], + outie=outie_config +) +outie_config.topics.append(topic_config) +channel_config = ChannelConfig(guild_id=123456789, channel_id=987654321, topic=topic_config) +topic_config.channels.append(channel_config) -bot = DiscordBot(config=config) +bot = DiscordBot(config=bot_config) def test_bot_initialization(): """Test that the bot and its components are initialized correctly""" @@ -30,17 +39,10 @@ def test_bot_initialization(): assert bot.bot.command_prefix == '!' # Check if document processor is initialized - assert bot.document_processor is not None - assert bot.document_processor.docs_dir.endswith(test_docs_dir) - - # Check if knowledge manager is initialized - assert bot.knowledge_manager is not None - - # Check if conversation engine is initialized - assert bot.conversation_engine is not None - assert bot.conversation_engine.admin_id == 123456789 + assert bot._identify_topic(987654321) is not None + def test_bot_intents(): """Test that the bot has the required intents""" - assert bot.intents.message_content is True - assert bot.intents.members is True \ No newline at end of file + assert bot.bot.intents.message_content is True + assert bot.bot.intents.members is True \ No newline at end of file diff --git a/tests/test_discord_bot_config.py b/tests/test_discord_bot_config.py new file mode 100644 index 0000000..826de74 --- /dev/null +++ b/tests/test_discord_bot_config.py @@ -0,0 +1,76 @@ +from innieme.discord_bot_config import OutieConfig, DiscordBotConfig + +import pytest + +import os + +def test_valid_outie_id(): + """Test that a positive outie_id is accepted""" + # Create a bot config first + bot = DiscordBotConfig(discord_token="test_token", openai_api_key="key", outies=[]) # Add minimal bot config + outie = OutieConfig(outie_id=1, topics=[], bot=bot) # Add bot reference + assert outie.outie_id == 1 + +@pytest.mark.parametrize("invalid_id,expected_message", [ + (0, "ID value must be positive, got: 0"), + (-1, "ID value must be positive, got: -1"), + (-100, "ID value must be positive, got: -100") +]) + +def test_invalid_outie_id(invalid_id, expected_message): + """Test that non-positive outie_ids raise ValueError with correct message""" + with pytest.raises(ValueError) as exc_info: + OutieConfig(outie_id=invalid_id, topics=[]) + + assert expected_message in str(exc_info.value) + +def test_config_from_yaml(): + math_docs_dir = 'data/math' + scouting_docs_dir = 'data/scouting' + innieme_docs_dir = 'data/innieme' + for dir in [math_docs_dir, scouting_docs_dir, innieme_docs_dir]: + os.makedirs(dir, exist_ok=True) + + """Test creating config from multi-line YAML content""" + yaml_content = f""" + discord_token: "test_discord_token" + openai_api_key: "test_openai_key" + outies: + - outie_id: 1 + topics: + - name: "math" + role: "Math Teacher" + docs_dir: "{math_docs_dir}" + channels: + - guild_id: "11111111" + channel_id: "22222222" + - name: "scouting" + role: "ASM" + docs_dir: "{scouting_docs_dir}" + channels: + - guild_id: "33333333" + channel_id: "44444444" + - outie_id: 2 + topics: + - name: "innieme" + role: "Support" + docs_dir: "{innieme_docs_dir}" + channels: + - guild_id: "55555555" + channel_id: "66666666" + """ + + config = DiscordBotConfig.from_yaml(yaml_content) + + assert config.discord_token == "test_discord_token" + assert config.openai_api_key == "test_openai_key" + assert len(config.outies) == 2 + + # Verify first outie + assert config.outies[0].outie_id == 1 + assert config.outies[0].topics[0].name == "math" + + # Verify second outie + assert config.outies[1].outie_id == 2 + assert config.outies[1].topics[0].name == "innieme" +