NikanEidi
diff --git a/‎main.py‎
Lines changed: 181 additions & 109 deletions b/‎main.py‎
Lines changed: 181 additions & 109 deletions
diff --git a/‎src/formatter.py‎
Lines changed: 2 additions & 1 deletion b/‎src/formatter.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/model_engine.py‎
Lines changed: 139 additions & 63 deletions b/‎src/model_engine.py‎
Lines changed: 139 additions & 63 deletions
diff --git a/‎src/postprocessor.py‎
Lines changed: 15 additions & 7 deletions b/‎src/postprocessor.py‎
Lines changed: 15 additions & 7 deletions
@@ -17,6 +17,7 @@
     - IOError on write → logs error and re-raises
     - IOError on validate → logs error and returns invalid result
 """
+
 from __future__ import annotations
 
 import os
@@ -146,4 +147,4 @@ def validate_guide(self, file_path: str) -> ValidationResult:
                 warnings=[f"Failed to read file: {exc}"],
             )
 
-        return OutputValidator.validate(content)
+        return OutputValidator.validate(content)
@@ -18,6 +18,7 @@
     • Content capping prevents context window overflow
     • Encoding fallback chain: UTF-8 → Latin-1 → error-replace (never crashes)
 """
+
 from __future__ import annotations
 
 import os
@@ -86,68 +87,130 @@ class MimeClassifier:
     """
 
     # ── Image formats ─────────────────────────────────────────────────
-    IMAGE_TYPES: frozenset = frozenset({
-        "image/jpeg", "image/png", "image/gif", "image/bmp",
-        "image/tiff", "image/webp", "image/svg+xml", "image/heic",
-        "image/heif", "image/x-icon", "image/vnd.microsoft.icon",
-    })
+    IMAGE_TYPES: frozenset = frozenset(
+        {
+            "image/jpeg",
+            "image/png",
+            "image/gif",
+            "image/bmp",
+            "image/tiff",
+            "image/webp",
+            "image/svg+xml",
+            "image/heic",
+            "image/heif",
+            "image/x-icon",
+            "image/vnd.microsoft.icon",
+        }
+    )
 
     # ── PDF ────────────────────────────────────────────────────────────
     PDF_TYPES: frozenset = frozenset({"application/pdf"})
 
     # ── Office documents (ZIP archives with XML content) ──────────────
-    OFFICE_TYPES: frozenset = frozenset({
-        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",    # .docx
-        "application/vnd.openxmlformats-officedocument.presentationml.presentation",  # .pptx
-        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",          # .xlsx
-        "application/vnd.oasis.opendocument.text",                                    # .odt
-        "application/vnd.oasis.opendocument.spreadsheet",                             # .ods
-        "application/vnd.oasis.opendocument.presentation",                            # .odp
-        "application/msword",                                                         # .doc
-        "application/vnd.ms-excel",                                                   # .xls
-        "application/vnd.ms-powerpoint",                                              # .ppt
-    })
+    OFFICE_TYPES: frozenset = frozenset(
+        {
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",  # .docx
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",  # .pptx
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",  # .xlsx
+            "application/vnd.oasis.opendocument.text",  # .odt
+            "application/vnd.oasis.opendocument.spreadsheet",  # .ods
+            "application/vnd.oasis.opendocument.presentation",  # .odp
+            "application/msword",  # .doc
+            "application/vnd.ms-excel",  # .xls
+            "application/vnd.ms-powerpoint",  # .ppt
+        }
+    )
 
     # ── Structured data ───────────────────────────────────────────────
-    STRUCTURED_TYPES: frozenset = frozenset({
-        "application/json", "text/csv", "text/xml", "application/xml",
-        "text/yaml", "text/x-yaml", "application/x-yaml",
-        "text/tab-separated-values",
-    })
+    STRUCTURED_TYPES: frozenset = frozenset(
+        {
+            "application/json",
+            "text/csv",
+            "text/xml",
+            "application/xml",
+            "text/yaml",
+            "text/x-yaml",
+            "application/x-yaml",
+            "text/tab-separated-values",
+        }
+    )
 
     # ── Text-readable (code, markup, config, etc.) ────────────────────
-    TEXT_TYPES: frozenset = frozenset({
-        "text/plain", "text/html", "text/css", "text/javascript",
-        "text/x-python", "text/x-java", "text/x-c", "text/x-c++",
-        "text/x-go", "text/x-rust", "text/x-ruby", "text/x-perl",
-        "text/x-shellscript", "text/x-sh", "text/x-script.python",
-        "text/markdown", "text/x-markdown", "text/x-rst",
-        "text/x-tex", "text/x-latex",
-        "text/x-diff", "text/x-patch",
-        "text/x-log", "text/x-config",
-        "application/javascript", "application/typescript",
-        "application/x-httpd-php", "application/x-sh",
-        "application/x-python-code",
-    })
+    TEXT_TYPES: frozenset = frozenset(
+        {
+            "text/plain",
+            "text/html",
+            "text/css",
+            "text/javascript",
+            "text/x-python",
+            "text/x-java",
+            "text/x-c",
+            "text/x-c++",
+            "text/x-go",
+            "text/x-rust",
+            "text/x-ruby",
+            "text/x-perl",
+            "text/x-shellscript",
+            "text/x-sh",
+            "text/x-script.python",
+            "text/markdown",
+            "text/x-markdown",
+            "text/x-rst",
+            "text/x-tex",
+            "text/x-latex",
+            "text/x-diff",
+            "text/x-patch",
+            "text/x-log",
+            "text/x-config",
+            "application/javascript",
+            "application/typescript",
+            "application/x-httpd-php",
+            "application/x-sh",
+            "application/x-python-code",
+        }
+    )
 
     # ── Binary (not text-readable) ────────────────────────────────────
-    BINARY_TYPES: frozenset = frozenset({
-        "application/octet-stream", "application/zip", "application/gzip",
-        "application/x-tar", "application/x-7z-compressed",
-        "application/x-rar-compressed", "application/java-archive",
-        "application/x-executable", "application/x-mach-binary",
-        "application/x-sharedlib", "application/x-object",
-        "application/wasm", "application/x-sqlite3",
-        "audio/mpeg", "audio/wav", "audio/ogg", "audio/flac",
-        "video/mp4", "video/x-matroska", "video/quicktime",
-        "font/ttf", "font/otf", "font/woff", "font/woff2",
-    })
+    BINARY_TYPES: frozenset = frozenset(
+        {
+            "application/octet-stream",
+            "application/zip",
+            "application/gzip",
+            "application/x-tar",
+            "application/x-7z-compressed",
+            "application/x-rar-compressed",
+            "application/java-archive",
+            "application/x-executable",
+            "application/x-mach-binary",
+            "application/x-sharedlib",
+            "application/x-object",
+            "application/wasm",
+            "application/x-sqlite3",
+            "audio/mpeg",
+            "audio/wav",
+            "audio/ogg",
+            "audio/flac",
+            "video/mp4",
+            "video/x-matroska",
+            "video/quicktime",
+            "font/ttf",
+            "font/otf",
+            "font/woff",
+            "font/woff2",
+        }
+    )
 
     # ── Binary MIME prefixes for heuristic fallback ───────────────────
     _BINARY_PREFIXES: tuple = ("audio/", "video/", "font/")
     _BINARY_KEYWORDS: tuple = (
-        "octet-stream", "executable", "archive",
-        "compressed", "x-mach", "sqlite", "x-object", "x-sharedlib",
+        "octet-stream",
+        "executable",
+        "archive",
+        "compressed",
+        "x-mach",
+        "sqlite",
+        "x-object",
+        "x-sharedlib",
     )
 
     @classmethod
@@ -462,7 +525,8 @@ def __init__(self, model_path: str = "google/gemma-3-4b-it") -> None:
 
         log.info(
             "Engine operational — template: %d → %d chars",
-            len(self.master_template), len(self._prompt_template),
+            len(self.master_template),
+            len(self._prompt_template),
         )
 
     # ── Backward-compatible class methods (used by existing tests) ────
@@ -556,11 +620,11 @@ def _format_and_stream(
         Returns:
             Post-processed study guide markdown.
         """
-        prompt_text = self._build_system_prompt(
-            raw_content=content, is_image=is_image
-        )
+        prompt_text = self._build_system_prompt(raw_content=content, is_image=is_image)
 
-        messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}]
+        messages = [
+            {"role": "user", "content": [{"type": "text", "text": prompt_text}]}
+        ]
         formatted_prompt = self.tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
@@ -592,24 +656,32 @@ def process_resource(
         strategy = MimeClassifier.classify(resource.mime_type)
         log.info(
             "Processing %s → strategy=%s (mime=%s)",
-            os.path.basename(resource.file_path), strategy, resource.mime_type,
+            os.path.basename(resource.file_path),
+            strategy,
+            resource.mime_type,
         )
 
         # Route to the correct analyzer
         dispatch = {
-            "image":      lambda: self._analyze_image(resource.file_path, on_token),
-            "pdf":        lambda: self._analyze_pdf(resource.file_path, on_token),
-            "office":     lambda: self._analyze_office(resource.file_path, on_token),
-            "structured": lambda: self._analyze_structured(resource.file_path, resource.mime_type, on_token),
-            "binary":     lambda: self._analyze_binary(resource.file_path, on_token),
+            "image": lambda: self._analyze_image(resource.file_path, on_token),
+            "pdf": lambda: self._analyze_pdf(resource.file_path, on_token),
+            "office": lambda: self._analyze_office(resource.file_path, on_token),
+            "structured": lambda: self._analyze_structured(
+                resource.file_path, resource.mime_type, on_token
+            ),
+            "binary": lambda: self._analyze_binary(resource.file_path, on_token),
         }
 
-        analyzer = dispatch.get(strategy, lambda: self._analyze_text(resource.file_path, on_token))
+        analyzer = dispatch.get(
+            strategy, lambda: self._analyze_text(resource.file_path, on_token)
+        )
         return analyzer()
 
     # ── Private Analyzers ─────────────────────────────────────────────
 
-    def _analyze_image(self, image_path: str, on_token: Optional[Callable] = None) -> str:
+    def _analyze_image(
+        self, image_path: str, on_token: Optional[Callable] = None
+    ) -> str:
         """
         Multimodal analysis for screenshots, diagrams, and photos.
 
@@ -685,7 +757,9 @@ def _analyze_pdf(self, file_path: str, on_token: Optional[Callable] = None) -> s
             log.error("PDF analysis failed for %s: %s", file_path, exc)
             return f"S T A R R Y N O T E PDF Error: {exc}"
 
-    def _analyze_office(self, file_path: str, on_token: Optional[Callable] = None) -> str:
+    def _analyze_office(
+        self, file_path: str, on_token: Optional[Callable] = None
+    ) -> str:
         """
         Office document analysis (.docx, .pptx, .xlsx, .odt).
 
@@ -744,7 +818,9 @@ def _analyze_structured(
             log.error("Structured data analysis failed for %s: %s", file_path, exc)
             return f"S T A R R Y N O T E Structured Data Error: {exc}"
 
-    def _analyze_binary(self, file_path: str, on_token: Optional[Callable] = None) -> str:
+    def _analyze_binary(
+        self, file_path: str, on_token: Optional[Callable] = None
+    ) -> str:
         """
         Binary file analysis via metadata summarization.
 
@@ -788,4 +864,4 @@ def _analyze_text(self, file_path: str, on_token: Optional[Callable] = None) ->
             return self._format_and_stream(content=content, on_token=on_token)
         except Exception as exc:
             log.error("Text analysis failed for %s: %s", file_path, exc)
-            return f"S T A R R Y N O T E Text Error: {exc}"
+            return f"S T A R R Y N O T E Text Error: {exc}"
@@ -18,6 +18,7 @@
     This avoids recompilation on every call — critical when
     processing batches of files.
 """
+
 from __future__ import annotations
 
 import re
@@ -69,10 +70,16 @@ class MermaidFixer:
     _RE_TRAILING_SEMI = re.compile(r";(\s*)$", re.MULTILINE)
 
     # Valid diagram type declarations that support classDef
-    _VALID_TYPES = frozenset({
-        "graph TD", "graph LR", "graph TB",
-        "flowchart TD", "flowchart LR", "flowchart TB",
-    })
+    _VALID_TYPES = frozenset(
+        {
+            "graph TD",
+            "graph LR",
+            "graph TB",
+            "flowchart TD",
+            "flowchart LR",
+            "flowchart TB",
+        }
+    )
 
     @classmethod
     def fix(cls, text: str) -> str:
@@ -109,6 +116,7 @@ def _inject_classdef(cls, text: str) -> str:
         type line (graph TD, flowchart LR, etc.) if they are not
         already present in the block.
         """
+
         def _ensure_classdef(match: re.Match) -> str:
             block = match.group(0)
 
@@ -137,6 +145,7 @@ def _remove_inline_styles(cls, text: str) -> str:
         The LLM sometimes generates `style NodeID fill:red` directives
         that conflict with the classDef-based styling system.
         """
+
         def _clean_block(match: re.Match) -> str:
             return cls._RE_INLINE_STYLE.sub("", match.group(0))
 
@@ -150,6 +159,7 @@ def _remove_semicolons(cls, text: str) -> str:
         Mermaid.js v10+ does not use semicolons, but the LLM
         sometimes generates them from JavaScript/Java training data.
         """
+
         def _clean_block(match: re.Match) -> str:
             return cls._RE_TRAILING_SEMI.sub(r"\1", match.group(0))
 
@@ -304,9 +314,7 @@ def validate(cls, text: str) -> ValidationResult:
             result.warnings.append("No Mermaid diagram found")
 
         # ── Exam question check ───────────────────────────────────
-        result.has_exam_questions = (
-            "QUESTION 01" in text or "QUESTION 1" in text
-        )
+        result.has_exam_questions = "QUESTION 01" in text or "QUESTION 1" in text
         if not result.has_exam_questions:
             result.warnings.append("No exam questions found")