SoftwareDevLabs
diff --git a/‎src/analyzers/semantic_analyzer.py‎
Lines changed: 484 additions & 0 deletions b/‎src/analyzers/semantic_analyzer.py‎
Lines changed: 484 additions & 0 deletions
diff --git a/‎src/conversation/__init__.py‎
Lines changed: 26 additions & 0 deletions b/‎src/conversation/__init__.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/conversation/context_tracker.py‎
Lines changed: 364 additions & 0 deletions b/‎src/conversation/context_tracker.py‎
Lines changed: 364 additions & 0 deletions
@@ -0,0 +1,26 @@
+"""
+Conversation module for handling multi-turn document conversations.
+
+This module provides classes for managing conversational interactions about documents,
+including conversation management, dialogue generation, and context tracking.
+"""
+
+from .context_tracker import ContextTracker
+from .context_tracker import ConversationTopic
+from .context_tracker import DocumentReference
+from .conversation_manager import ConversationManager
+from .conversation_manager import ConversationSession
+from .dialogue_agent import DialogueAgent
+from .dialogue_agent import DialogueContext
+from .dialogue_agent import IntentClassifier
+
+__all__ = [
+    'ConversationManager',
+    'ConversationSession',
+    'DialogueAgent',
+    'DialogueContext',
+    'IntentClassifier',
+    'ContextTracker',
+    'DocumentReference',
+    'ConversationTopic'
+]
@@ -0,0 +1,364 @@
+"""
+Context Tracker for maintaining conversation state and document references.
+
+This module provides the ContextTracker class that manages conversation context,
+tracking document relationships and maintaining coherent dialogue state.
+"""
+
+from dataclasses import dataclass
+from dataclasses import field
+from datetime import datetime
+from datetime import timedelta
+import logging
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class DocumentReference:
+    """Represents a reference to a document in conversation."""
+    document_id: str
+    file_path: str
+    title: str
+    content_hash: str
+    last_accessed: datetime
+    access_count: int = 0
+    relevant_sections: list[str] = field(default_factory=list)
+    topics: set[str] = field(default_factory=set)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "document_id": self.document_id,
+            "file_path": self.file_path,
+            "title": self.title,
+            "content_hash": self.content_hash,
+            "last_accessed": self.last_accessed.isoformat(),
+            "access_count": self.access_count,
+            "relevant_sections": self.relevant_sections,
+            "topics": list(self.topics)
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> 'DocumentReference':
+        return cls(
+            document_id=data["document_id"],
+            file_path=data["file_path"],
+            title=data["title"],
+            content_hash=data["content_hash"],
+            last_accessed=datetime.fromisoformat(data["last_accessed"]),
+            access_count=data.get("access_count", 0),
+            relevant_sections=data.get("relevant_sections", []),
+            topics=set(data.get("topics", []))
+        )
+
+@dataclass
+class ConversationTopic:
+    """Represents a topic of conversation."""
+    name: str
+    keywords: set[str] = field(default_factory=set)
+    documents: set[str] = field(default_factory=set)
+    first_mentioned: datetime = field(default_factory=datetime.now)
+    last_mentioned: datetime = field(default_factory=datetime.now)
+    mention_count: int = 0
+
+class ContextTracker:
+    """Tracks conversation context and document relationships."""
+
+    def __init__(self, config: dict[str, Any] | None = None):
+        self.config = config or {}
+        self.documents: dict[str, DocumentReference] = {}
+        self.topics: dict[str, ConversationTopic] = {}
+        self.current_focus: str | None = None  # Current document focus
+        self.conversation_thread: list[str] = []  # Topic progression
+
+        # Configuration
+        self.max_context_documents = self.config.get("max_context_documents", 5)
+        self.topic_decay_hours = self.config.get("topic_decay_hours", 24)
+        self.similarity_threshold = self.config.get("similarity_threshold", 0.3)
+
+    def add_document_reference(self, document_id: str, file_path: str,
+                             title: str, content_hash: str,
+                             topics: set[str] | None = None) -> DocumentReference:
+        """Add or update a document reference."""
+        if document_id in self.documents:
+            doc_ref = self.documents[document_id]
+            doc_ref.access_count += 1
+            doc_ref.last_accessed = datetime.now()
+        else:
+            doc_ref = DocumentReference(
+                document_id=document_id,
+                file_path=file_path,
+                title=title,
+                content_hash=content_hash,
+                last_accessed=datetime.now(),
+                access_count=1,
+                topics=topics or set()
+            )
+            self.documents[document_id] = doc_ref
+
+        # Update topics
+        if topics:
+            for topic in topics:
+                self.add_topic(topic, document_id)
+
+        logger.debug(f"Added/updated document reference: {document_id}")
+        return doc_ref
+
+    def add_topic(self, topic_name: str, document_id: str | None = None):
+        """Add or update a conversation topic."""
+        topic_name = topic_name.lower().strip()
+
+        if topic_name in self.topics:
+            topic = self.topics[topic_name]
+            topic.mention_count += 1
+            topic.last_mentioned = datetime.now()
+        else:
+            topic = ConversationTopic(
+                name=topic_name,
+                keywords=self._extract_keywords(topic_name)
+            )
+            self.topics[topic_name] = topic
+
+        if document_id:
+            topic.documents.add(document_id)
+
+        # Update conversation thread
+        if not self.conversation_thread or self.conversation_thread[-1] != topic_name:
+            self.conversation_thread.append(topic_name)
+
+        logger.debug(f"Added/updated topic: {topic_name}")
+
+    def set_document_focus(self, document_id: str):
+        """Set the current document focus for conversation."""
+        if document_id in self.documents:
+            self.current_focus = document_id
+            self.documents[document_id].access_count += 1
+            self.documents[document_id].last_accessed = datetime.now()
+            logger.debug(f"Set document focus: {document_id}")
+        else:
+            logger.warning(f"Cannot set focus to unknown document: {document_id}")
+
+    def get_current_context(self) -> dict[str, Any]:
+        """Get the current conversation context."""
+        context = {
+            "current_focus": self.current_focus,
+            "active_documents": [],
+            "recent_topics": [],
+            "conversation_thread": self.conversation_thread[-10:],  # Last 10 topics
+            "suggested_documents": []
+        }
+
+        # Get active documents (recently accessed)
+        cutoff_time = datetime.now() - timedelta(hours=1)
+        active_docs = [
+            doc for doc in self.documents.values()
+            if doc.last_accessed > cutoff_time
+        ]
+        active_docs.sort(key=lambda x: x.last_accessed, reverse=True)
+        context["active_documents"] = [
+            doc.to_dict() for doc in active_docs[:self.max_context_documents]
+        ]
+
+        # Get recent topics
+        cutoff_time = datetime.now() - timedelta(hours=self.topic_decay_hours)
+        recent_topics = [
+            topic for topic in self.topics.values()
+            if topic.last_mentioned > cutoff_time
+        ]
+        recent_topics.sort(key=lambda x: x.last_mentioned, reverse=True)
+        context["recent_topics"] = [
+            {
+                "name": topic.name,
+                "mention_count": topic.mention_count,
+                "documents": list(topic.documents)
+            }
+            for topic in recent_topics[:10]
+        ]
+
+        # Suggest related documents
+        if self.current_focus and self.current_focus in self.documents:
+            current_doc = self.documents[self.current_focus]
+            suggestions = self._find_related_documents(current_doc)
+            context["suggested_documents"] = [
+                {"document_id": doc_id, "relevance_score": score}
+                for doc_id, score in suggestions[:3]
+            ]
+
+        return context
+
+    def get_document_context(self, document_id: str) -> dict[str, Any] | None:
+        """Get context for a specific document."""
+        if document_id not in self.documents:
+            return None
+
+        doc_ref = self.documents[document_id]
+        return {
+            "document": doc_ref.to_dict(),
+            "related_topics": [
+                topic.name for topic in self.topics.values()
+                if document_id in topic.documents
+            ],
+            "related_documents": [
+                {"document_id": doc_id, "relevance_score": score}
+                for doc_id, score in self._find_related_documents(doc_ref)[:5]
+            ],
+            "access_history": {
+                "total_accesses": doc_ref.access_count,
+                "last_accessed": doc_ref.last_accessed.isoformat()
+            }
+        }
+
+    def track_topic_shift(self, new_topic: str, context_clues: list[str] | None = None) -> bool:
+        """Track when conversation shifts to a new topic."""
+        new_topic = new_topic.lower().strip()
+
+        # Check if this is actually a new topic
+        if self.conversation_thread and self.conversation_thread[-1] == new_topic:
+            return False
+
+        # Add the topic
+        self.add_topic(new_topic)
+
+        # Add context clues as keywords
+        if context_clues and new_topic in self.topics:
+            for clue in context_clues:
+                self.topics[new_topic].keywords.add(clue.lower())
+
+        logger.info(f"Topic shift detected: {new_topic}")
+        return True
+
+    def suggest_related_content(self, query: str) -> list[dict[str, Any]]:
+        """Suggest related content based on a query."""
+        suggestions = []
+        query_lower = query.lower()
+
+        # Find matching documents
+        for doc_ref in self.documents.values():
+            score = 0.0
+
+            # Title match
+            if any(word in doc_ref.title.lower() for word in query_lower.split()):
+                score += 0.5
+
+            # Topic match
+            for topic_name in doc_ref.topics:
+                if topic_name in query_lower or query_lower in topic_name:
+                    score += 0.3
+
+            # Keyword match in topics
+            for topic_name in doc_ref.topics:
+                if topic_name in self.topics:
+                    topic = self.topics[topic_name]
+                    for keyword in topic.keywords:
+                        if keyword in query_lower:
+                            score += 0.2
+
+            if score > self.similarity_threshold:
+                suggestions.append({
+                    "type": "document",
+                    "document_id": doc_ref.document_id,
+                    "title": doc_ref.title,
+                    "relevance_score": score,
+                    "access_count": doc_ref.access_count
+                })
+
+        # Find matching topics
+        for topic in self.topics.values():
+            score = 0.0
+
+            # Direct topic name match
+            if topic.name in query_lower or query_lower in topic.name:
+                score += 0.7
+
+            # Keyword match
+            for keyword in topic.keywords:
+                if keyword in query_lower:
+                    score += 0.3
+
+            if score > self.similarity_threshold:
+                suggestions.append({
+                    "type": "topic",
+                    "name": topic.name,
+                    "relevance_score": score,
+                    "mention_count": topic.mention_count,
+                    "related_documents": list(topic.documents)
+                })
+
+        # Sort by relevance score
+        suggestions.sort(key=lambda x: x["relevance_score"], reverse=True)
+        return suggestions[:10]
+
+    def _extract_keywords(self, text: str) -> set[str]:
+        """Extract keywords from text."""
+        # Simple keyword extraction
+        words = text.lower().split()
+        # Filter out common stop words
+        stop_words = {"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with"}
+        keywords = {word for word in words if len(word) > 2 and word not in stop_words}
+        return keywords
+
+    def _find_related_documents(self, doc_ref: DocumentReference) -> list[tuple[str, float]]:
+        """Find documents related to the given document."""
+        related = []
+
+        for other_doc_id, other_doc in self.documents.items():
+            if other_doc_id == doc_ref.document_id:
+                continue
+
+            # Calculate similarity based on shared topics
+            shared_topics = doc_ref.topics.intersection(other_doc.topics)
+            if shared_topics:
+                similarity_score = len(shared_topics) / max(len(doc_ref.topics), len(other_doc.topics), 1)
+                related.append((other_doc_id, similarity_score))
+
+        # Sort by similarity score
+        related.sort(key=lambda x: x[1], reverse=True)
+        return related
+
+    def cleanup_old_context(self):
+        """Clean up old context data to prevent memory bloat."""
+        cutoff_time = datetime.now() - timedelta(hours=self.topic_decay_hours * 2)
+
+        # Remove old topics with low mention count
+        topics_to_remove = [
+            topic_name for topic_name, topic in self.topics.items()
+            if topic.last_mentioned < cutoff_time and topic.mention_count < 2
+        ]
+
+        for topic_name in topics_to_remove:
+            del self.topics[topic_name]
+            if topic_name in self.conversation_thread:
+                self.conversation_thread.remove(topic_name)
+
+        # Limit conversation thread length
+        if len(self.conversation_thread) > 50:
+            self.conversation_thread = self.conversation_thread[-30:]
+
+        logger.info(f"Cleaned up {len(topics_to_remove)} old topics")
+
+    def get_context_summary(self) -> dict[str, Any]:
+        """Get a summary of the current context state."""
+        return {
+            "total_documents": len(self.documents),
+            "total_topics": len(self.topics),
+            "current_focus": self.current_focus,
+            "conversation_length": len(self.conversation_thread),
+            "most_accessed_documents": [
+                {
+                    "document_id": doc.document_id,
+                    "title": doc.title,
+                    "access_count": doc.access_count
+                }
+                for doc in sorted(self.documents.values(),
+                                key=lambda x: x.access_count, reverse=True)[:5]
+            ],
+            "active_topics": [
+                {
+                    "name": topic.name,
+                    "mention_count": topic.mention_count,
+                    "document_count": len(topic.documents)
+                }
+                for topic in sorted(self.topics.values(),
+                                  key=lambda x: x.mention_count, reverse=True)[:5]
+            ]
+        }