diff --git a/.gitignore b/.gitignore index acea1fa..4d3dc97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,59 @@ -# Python +# ═══════════════════════════════════════════════════════════════ +# S T A R R Y N O T E · .gitignore +# ═══════════════════════════════════════════════════════════════ + +# ── Python ──────────────────────────────────────────────────── __pycache__/ *.py[cod] -*.class -vwen/ +*$py.class +*.so +*.egg-info/ +dist/ +build/ +*.egg + +# ── Virtual Environments ───────────────────────────────────── +.venv/ +venv/ env/ .env -# JavaScript / Node -node_modules/ -npm-debug.log* -# OS files + +# ── MLX / Model Weights ───────────────────────────────────── +models/ +*.mlx +*.safetensors +*.bin +*.gguf + +# ── Generated Output ───────────────────────────────────────── +output/ +Instructions/ + +# ── OS Files ────────────────────────────────────────────────── .DS_Store Thumbs.db -# IDEs -.vscode/ +Desktop.ini + +# ── IDEs & Editors ─────────────────────────────────────────── .idea/ +.vscode/ *.swp -# Logs +*.swo +*~ +.project +.classpath +.settings/ + +# ── Testing & Coverage ────────────────────────────────────── +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.mypy_cache/ + +# ── Logs ───────────────────────────────────────────────────── *.log +npm-debug.log* + +# ── Node (if applicable) ──────────────────────────────────── +node_modules/ diff --git a/Assets/UI-Demo/UI-Demo.png b/Assets/UI-Demo/UI-Demo.png new file mode 100644 index 0000000..e691f28 Binary files /dev/null and b/Assets/UI-Demo/UI-Demo.png differ diff --git a/README.md b/README.md index 7fa1443..a61494e 100644 --- a/README.md +++ b/README.md @@ -1 +1,583 @@ -StarryNote project started! \ No newline at end of file +
+ +``` + ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ + ░ ░ + ░ ███████╗████████╗ █████╗ ██████╗ ██████╗ ██╗ ██╗ ░ + ░ ██╔════╝╚══██╔══╝██╔══██╗██╔══██╗██╔══██╗╚██╗ ██╔╝ ░ + ░ ███████╗ ██║ ███████║██████╔╝██████╔╝ ╚████╔╝ ░ + ░ ╚════██║ ██║ ██╔══██║██╔══██╗██╔══██╗ ╚██╔╝ ░ + ░ ███████║ ██║ ██║ ██║██║ ██║██║ ██║ ██║ ░ + ░ ╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ░ + ░ N O T E ░ + ░ ░ + ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ +``` + +**Cybernetic Knowledge Architecture System** + +*Transform raw academic chaos into structured, exam-ready study guides — powered by Gemma 3 running locally on Apple Silicon.* + +[![Python](https://img.shields.io/badge/Python-3.11+-3776AB?style=for-the-badge&logo=python&logoColor=white)](https://python.org) +[![MLX](https://img.shields.io/badge/Apple_MLX-Metal_GPU-000000?style=for-the-badge&logo=apple&logoColor=white)](https://github.com/ml-explore/mlx) +[![Gemma 3](https://img.shields.io/badge/Gemma_3-4B_IT-4285F4?style=for-the-badge&logo=google&logoColor=white)](https://huggingface.co/google/gemma-3-4b-it) +[![Rich TUI](https://img.shields.io/badge/Rich-Terminal_UI-bc13fe?style=for-the-badge)](https://github.com/Textualize/rich) +[![License](https://img.shields.io/badge/License-MIT-00f3ff?style=for-the-badge)](LICENSE) + +
+ +--- + +## 📋 Table of Contents + +- [What Is StarryNote?](#-what-is-starrynote) +- [Key Features](#-key-features) +- [System Architecture](#-system-architecture) +- [Project Structure](#-project-structure) +- [Prerequisites](#-prerequisites) +- [Installation](#-installation) +- [Usage](#-usage) +- [Pipeline Deep Dive](#-pipeline-deep-dive) +- [The Master Template](#-the-master-template) +- [Knowledge Architect Prompt](#-knowledge-architect-prompt) +- [Terminal UI](#-terminal-ui) +- [Testing](#-testing) +- [Configuration](#-configuration) +- [Contributing](#-contributing) + +--- + +## 🌟 What Is StarryNote? + +**StarryNote** is a local-first, AI-powered knowledge synthesis engine that transforms raw study materials — lecture notes, code files, PDFs, screenshots — into **professional-grade, structured study guides** with zero cloud dependency. + +Unlike generic summarizers, StarryNote acts as a **Knowledge Architect**: it doesn't just restate your input — it *synthesizes* original code examples, mathematical proofs, Mermaid diagrams, and exam questions that explain the source material at a deeper level. + +> **The Philosophy:** *Your notes are fragments. StarryNote turns them into architecture.* + +### Why StarryNote? + +| Problem | StarryNote's Solution | +|:--------|:---------------------| +| Notes are scattered across formats | Universal MIME scanner processes **any file type** | +| AI summaries are surface-level | Knowledge Architect prompt forces **synthesis > summary** | +| Cloud AI raises privacy concerns | Runs **100% locally** on Apple Silicon via MLX | +| Output varies wildly | 545-line **Master Template** enforces consistent, exam-ready output | +| No way to self-assess | **Metacognitive Calibration** with confidence meters | + +--- + +## ✨ Key Features + + + + + + + + + + +
+ +### 🧠 AI Engine +- **Gemma 3 4B-IT** running natively on Metal GPU +- Multimodal: processes text, images, and PDFs +- OCR fallback for scanned/image-based PDFs +- Knowledge Architect prompt with 5 core directives + + + +### 🖥️ Cyberpunk Terminal UI +- Large ASCII hero banner in neon purple +- 4-phase pipeline with animated spinners +- Resource discovery table with MIME icons +- **Knowledge Density** star rating (✦ to ✦✦✦✦✦) + +
+ +### 📜 10-Section Master Template +- Executive Summary · Concept Register +- Cyberpunk Mermaid diagrams +- 3-tier exam questions (Apply → Analyze → Synthesize) +- Quick Reference Card · Metacognitive Calibration + + + +### 🔍 Universal Scanner +- DFS directory traversal +- MIME-based detection (not file extensions) +- Auto-skips `.venv`, `__pycache__`, `.git`, etc. +- Packages every file as a `UniversalResource` + +
+ +--- + +## 🏗️ System Architecture + +```mermaid +graph TD + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff + classDef input fill:#1a1a1a,stroke:#ff6ec7,stroke-width:2px,color:#ff6ec7 + classDef output fill:#1a1a1a,stroke:#39ff14,stroke-width:2px,color:#39ff14 + + A["📂 Raw Study Materials"]:::input --> B["🔍 StarryScanner
MIME Detection · DFS Walk"] + B --> C{"File Type Router"} + C -->|"image/*"| D["🖼️ Image Analyzer
PIL · Multimodal Prompt"] + C -->|"application/pdf"| E["📄 PDF Analyzer
PyMuPDF · OCR Fallback"] + C -->|"text/*"| F["📝 Text Analyzer
Raw Content Injection"] + D --> G["🧠 Gemma 3 Engine
MLX · Metal GPU · 4B-IT"] + E --> G + F --> G + G --> H["📐 Master Template
545-line · 10 Sections"] + H --> I["💾 StarryFormatter
Instructions/ Output"] + I --> J["📘 Study Guides"]:::output + + style A fill:#1a1a1a,stroke:#ff6ec7 + style J fill:#1a1a1a,stroke:#39ff14 +``` + +### Data Flow + +```mermaid +sequenceDiagram + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff + + participant U as 👤 User + participant M as main.py
TUI Hub + participant S as StarryScanner + participant E as StarryEngine + participant G as Gemma 3
MLX Metal + participant F as StarryFormatter + + U->>M: python main.py + M->>E: Initialize (load model) + E->>G: Load weights into Unified Memory + G-->>E: Model ready + M->>S: scan_directory(cwd) + S-->>M: List[UniversalResource] + + loop For each resource + M->>E: process_resource(resource) + E->>E: _build_system_prompt() + E->>G: generate(prompt, max_tokens=3000) + G-->>E: Synthesized Markdown + E-->>M: guide_content + M->>F: save_guide(file_path, content) + F-->>M: output_path + end + + M-->>U: Mission Report + Constellation +``` + +--- + +## 📁 Project Structure + +``` +StarryNote/ +├── main.py # 🖥️ TUI entry point (4-phase pipeline) +├── requirements.txt # 📦 Python dependencies +├── README.md # 📖 You are here +├── .gitignore # 🚫 Git exclusion rules +│ +├── src/ # ⚙️ Core engine modules +│ ├── __init__.py # Package initializer +│ ├── model_engine.py # 🧠 Gemma 3 inference (Knowledge Architect) +│ ├── scanner.py # 🔍 Universal MIME-based file scanner +│ └── formatter.py # 💾 Output formatter (Instructions/ writer) +│ +├── templates/ # 📐 AI output templates +│ └── master_template.md # 📜 545-line, 10-section study guide template +│ +├── tests/ # 🧪 Test suite +│ ├── __init__.py # Package initializer +│ ├── test_model.py # 🔬 GPU + model inference validation +│ ├── test_scanner.py # 🔬 Scanner logic tests (legacy) +│ ├── test_universal_scanner.py # 🔬 Multimodal MIME scanner tests +│ └── sample_note.txt # 📝 Test fixture with regex markers +│ +├── .github/ # 🤖 CI/CD +│ └── workflows/ +│ └── main.yml # ▶️ GitHub Actions: pytest on push/PR +│ +├── models/ # 🗄️ MLX model weights (auto-downloaded, gitignored) +├── output/ # 📂 Legacy output directory (gitignored) +└── Instructions/ # 📘 Generated study guides (created at runtime) +``` + +--- + +## ⚡ Prerequisites + +| Requirement | Minimum | Recommended | +|:------------|:--------|:------------| +| **macOS** | 13.0 (Ventura) | 14.0+ (Sonoma) | +| **Chip** | Apple M1 | Apple M3 / M4 | +| **RAM** | 8 GB Unified | 16 GB+ Unified | +| **Python** | 3.11 | 3.12+ | +| **Disk** | ~5 GB (model weights) | 10 GB+ | +| **libmagic** | Required | `brew install libmagic` | + +> ⚠️ **Apple Silicon Required.** StarryNote uses [MLX](https://github.com/ml-explore/mlx), Apple's Metal-optimized ML framework. It will **not** run on Intel Macs or Linux/Windows without modifying the engine. + +--- + +## 🚀 Installation + +### 1. Clone the Repository + +```bash +git clone https://github.com/NikanEidi/StarryNote.git +cd StarryNote +``` + +### 2. Install System Dependencies + +```bash +# libmagic is required for MIME type detection +brew install libmagic +``` + +### 3. Create & Activate Virtual Environment + +```bash +python3 -m venv .venv +source .venv/bin/activate +``` + +### 4. Install Python Dependencies + +```bash +pip install -r requirements.txt +``` + +### 5. Verify GPU Access + +```bash +python -c "import mlx.core as mx; print(f'Metal GPU: {mx.metal.is_available()}')" +# Expected output: Metal GPU: True +``` + +> 💡 **First Run Note:** Gemma 3 weights (~5 GB) are downloaded automatically from Hugging Face on the first execution. Subsequent runs load from cache. + +--- + +## 🎯 Usage + +### Basic Usage + +Navigate to any directory containing study materials, then run: + +```bash +cd /path/to/your/study/materials +python /path/to/StarryNote/main.py +``` + +Or from the StarryNote directory itself: + +```bash +python main.py +``` + +### What Happens + +1. **⚡ Phase 1 — Neural Initialization:** Loads Gemma 3 into Apple Silicon's unified memory +2. **🔍 Phase 2 — Deep Scan:** DFS traversal discovering all files via MIME detection +3. **🧠 Phase 3 — Knowledge Synthesis:** Processes each file through the Knowledge Architect pipeline +4. **📊 Phase 4 — Mission Report:** Displays results table with timing and density ratings + +### Output + +Study guides are saved to an `Instructions/` folder in the current working directory: + +``` +Instructions/ +├── lecture_notes_StudyGuide.md +├── algorithm_code_StudyGuide.md +└── exam_review_StudyGuide.md +``` + +--- + +## 🔬 Pipeline Deep Dive + +### The Scanner (`src/scanner.py`) + +```mermaid +graph LR + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff + + A["os.walk()"] --> B["python-magic
MIME detection"] + B --> C{"Classify"} + C -->|"image/jpeg"| D["🖼️ UniversalResource"] + C -->|"application/pdf"| E["📄 UniversalResource"] + C -->|"text/x-python"| F["🐍 UniversalResource"] + C -->|"text/plain"| G["📝 UniversalResource"] +``` + +The `StarryScanner` doesn't rely on file extensions. It uses **libmagic** to read binary headers and determine the true MIME type of every file. Each file is packaged into a `UniversalResource` dataclass: + +```python +@dataclass +class UniversalResource: + file_path: str # Absolute path to the file + mime_type: str # e.g., 'image/jpeg', 'application/pdf' + raw_data: Any # Path reference for downstream processing +``` + +### The Engine (`src/model_engine.py`) + +The engine routes each `UniversalResource` through the appropriate analyzer: + +| MIME Type | Analyzer | Strategy | +|:----------|:---------|:---------| +| `image/*` | `_analyze_image()` | PIL → RGB conversion → multimodal prompt | +| `application/pdf` | `_analyze_pdf()` | PyMuPDF text extraction → OCR fallback if <100 chars | +| `text/*` | `_analyze_text()` | Direct content injection into prompt | + +All three analyzers feed into the same `_build_system_prompt()` method, which constructs the **Knowledge Architect** prompt with the 545-line Master Template embedded. + +### The Formatter (`src/formatter.py`) + +Handles output persistence: +- Creates `Instructions/` directory at the current working directory +- Generates filenames: `{original_name}_StudyGuide.md` +- Writes UTF-8 encoded Markdown + +--- + +## 📜 The Master Template + +The heart of StarryNote is its **545-line Master Template** (`templates/master_template.md`). Every generated study guide follows this exact structure: + +```mermaid +graph TD + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff + classDef unique fill:#1a1a1a,stroke:#39ff14,stroke-width:2px,color:#39ff14 + + A["I. Executive Summary"] --> B["II. Core Concepts"] + B --> C["III. Visual Knowledge Graph"] + C --> D["IV. Technical Deep Dive"] + D --> E["V. Annotated Glossary"] + E --> F["VI. Exam Preparation"] + F --> G["VII. Knowledge Connections"] + G --> H["VIII. Quick Reference Card"]:::unique + H --> I["IX. Metacognitive Calibration"]:::unique + I --> J["X. Source Archive"] +``` + +### Section Breakdown + +| # | Section | Purpose | Unique Feature | +|:-:|:--------|:--------|:---------------| +| I | **Executive Summary** | Abstract + Central Thesis + Applied Context | Forces non-obvious insight extraction | +| II | **Core Concepts** | Concept Register table + Comparative Analysis | Requires specific "Common Pitfall" per concept | +| III | **Visual Knowledge Graph** | Auto-selected Mermaid diagram | Cyberpunk styling: `#bc13fe` stroke, `#00f3ff` text | +| IV | **Technical Deep Dive** | Code (CS) / LaTeX (Math) / Source Analysis (Humanities) | Auto-selects block type by subject classification | +| V | **Annotated Glossary** | Domain terms with etymology & related terms | Requires linguistic root for scientific terms | +| VI | **Exam Preparation** | 3-tier questions: Application → Analysis → Synthesis | Collapsible answers with reasoning chains | +| VII | **Knowledge Connections** | Dependencies, next topics, cross-domain links | Maps learning pathways | +| VIII | **Quick Reference Card** | Condensed cheat sheet: takeaways + formulas + traps | 🆕 Pre-exam checklist | +| IX | **Metacognitive Calibration** | Confidence Meter (🔴🟡🟢🔵) per concept | 🆕 Personalized study prescriptions | +| X | **Source Archive** | Verbatim original input (read-only) | Audit trail for review | + +### Mermaid Cyberpunk Styling + +Every generated diagram uses this class definition: + +``` +classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff; +``` + +This produces diagrams with a dark background, neon purple borders, and cyan text — consistent across all outputs. + +--- + +## 🤖 Knowledge Architect Prompt + +The AI doesn't just "summarize." It follows 5 **Core Directives**: + +```mermaid +mindmap + root((Knowledge
Architect)) + 1. AUTHORSHIP + Set Author to S T A R R Y N O T E + 2. SYNTHESIS > SUMMARY + Original code examples + Mathematical proofs + Beyond the source material + 3. FORMATTING + Strict Master Template adherence + No skipped sections + 4. VISUAL REASONING + Auto-select Mermaid type + Cyberpunk Neon Purple/Cyan + 5. ACADEMIC TONE + Scholarly and precise + No conversational filler +``` + +--- + +## 🖥️ Terminal UI + +StarryNote's TUI is built with [Rich](https://github.com/Textualize/rich) and follows a **4-phase pipeline** design: + +### Phase Layout + +| Phase | Name | Visual Elements | +|:-----:|:-----|:----------------| +| ⚡ 1 | **Neural Initialization** | Animated spinner while loading Gemma 3 into unified memory | +| 🔍 2 | **Deep Scan** | Resource table with MIME icons (🐍🖼📄📝📦), file sizes | +| 🧠 3 | **Knowledge Synthesis** | Progress bar per file + overall, elapsed time, density rating | +| 📊 4 | **Mission Report** | Results table, summary panel, constellation footer | + +### Knowledge Density Rating + +A unique feature that measures **AI amplification** — how much original content the AI generated relative to the input size: + +| Rating | Ratio | Meaning | +|:------:|:-----:|:--------| +| ✦ | < 1× | Minimal expansion | +| ✦✦ | 1–2× | Moderate synthesis | +| ✦✦✦ | 3–4× | Strong synthesis | +| ✦✦✦✦ | 5–7× | Deep synthesis | +| ✦✦✦✦✦ | 8×+ | Maximum amplification | + +### Constellation Footer + +Instead of a static message, the TUI renders one ✦ star per processed file in a cosmic field: + +``` + · ˚ ✧ · ˚ · ✧ · ˚ + ✦ ✦ ✦ ✦ ✦ + ✧ · ˚ · ✦ · ˚ ✧ · + + Knowledge Archived · Stars Aligned +``` + +--- + +## 🧪 Testing + +### Run All Tests + +```bash +# Activate virtual environment first +source .venv/bin/activate + +# Run test suite +pytest tests/ -v +``` + +### Test Files + +| File | Tests | Requires GPU | +|:-----|:------|:------------:| +| `test_model.py` | Metal GPU detection, model loading, inference pipeline | ✅ Yes | +| `test_scanner.py` | Extension-based scanning logic (legacy) | ❌ No | +| `test_universal_scanner.py` | MIME-based multimodal detection | ❌ No | + +### CI/CD + +GitHub Actions runs `pytest tests/` on every push to `main`/`master` and on pull requests. See `.github/workflows/main.yml`. + +> ⚠️ **Note:** `test_model.py` requires Apple Silicon with Metal GPU — it will skip/fail in CI (Ubuntu runner). Scanner tests run on any platform. + +--- + +## ⚙️ Configuration + +### Model Selection + +Change the model in `src/model_engine.py`: + +```python +engine = StarryEngine(model_path="google/gemma-3-4b-it") # Default +engine = StarryEngine(model_path="google/gemma-3-12b-it") # Larger (needs 16GB+ RAM) +``` + +### Max Token Output + +Adjust `max_tokens` in the `generate()` calls within `model_engine.py`: + +```python +max_tokens=3000 # Default — ~2,000 words +max_tokens=5000 # Longer, more detailed guides +``` + +### Skip Patterns + +Customize which directories/files to skip in `main.py`: + +```python +SKIP = { + "Instructions", ".venv", "__pycache__", ".git", + ".DS_Store", ".idea", ".pytest_cache", "node_modules", ".github", +} +``` + +--- + +## 🤝 Contributing + +1. **Fork** the repository +2. **Create** a feature branch: `git checkout -b feature/my-feature` +3. **Commit** with clear messages: `git commit -m "feat: add X"` +4. **Push** to your fork: `git push origin feature/my-feature` +5. **Open** a Pull Request + +### Code Style + +```bash +# Format code with Black +black src/ main.py tests/ +``` + +--- + +## 📊 Tech Stack + +```mermaid +graph LR + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff + + subgraph "AI Layer" + A["Gemma 3 4B-IT"] --> B["MLX Framework"] + B --> C["Metal GPU"] + end + + subgraph "Processing Layer" + D["python-magic"] --> E["StarryScanner"] + F["PyMuPDF"] --> G["PDF Analyzer"] + H["Pillow"] --> I["Image Analyzer"] + end + + subgraph "Presentation Layer" + J["Rich"] --> K["Cyberpunk TUI"] + L["Master Template"] --> M["Markdown Output"] + end + + E --> A + G --> A + I --> A + A --> L +``` + +--- + +
+ +``` + ───────────────────────────────────────────────────────────────────────────── + S T A R R Y N O T E · Knowledge Architecture System · v2.0 + Gemma 3 · Apple Silicon · MLX + Structured for clarity. Engineered for mastery. Calibrated for you. + ───────────────────────────────────────────────────────────────────────────── +``` + +**Made with ✦ by [Nikan Eidi](https://github.com/NikanEidi)** + +
\ No newline at end of file diff --git a/main.py b/main.py index 8f5bb8d..fabc5e1 100644 --- a/main.py +++ b/main.py @@ -1,2 +1,262 @@ -# Entry point for the StarryNote CLI. -# Handles user arguments (input path, output format) and orchestrates the workflow. \ No newline at end of file +""" +S T A R R Y N O T E · Cybernetic Knowledge Architecture v2.0 +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Transforms raw academic materials into structured study guides +using Gemma 3 on Apple Silicon. +""" + +import os +import sys +import time +from datetime import datetime + +from rich.console import Console +from rich.panel import Panel +from rich.progress import ( + Progress, + SpinnerColumn, + TextColumn, + BarColumn, + TimeElapsedColumn, +) +from rich.table import Table +from rich.align import Align +from rich.rule import Rule + +from src.model_engine import StarryEngine, MAX_TOKENS +from src.scanner import StarryScanner +from src.formatter import StarryFormatter + +# ═══════════════════════════════════════════════════════════════════════════ +# Design System +# ═══════════════════════════════════════════════════════════════════════════ + +PURPLE = "#bc13fe" +CYAN = "#00f3ff" +GREEN = "#39ff14" +AMBER = "#ffbf00" +DIM = "#555555" + +console = Console() + +HERO = ( + f"[bold {PURPLE}]" + " ███████╗████████╗ █████╗ ██████╗ ██████╗ ██╗ ██╗\n" + " ██╔════╝╚══██╔══╝██╔══██╗██╔══██╗██╔══██╗╚██╗ ██╔╝\n" + " ███████╗ ██║ ███████║██████╔╝██████╔╝ ╚████╔╝ \n" + " ╚════██║ ██║ ██╔══██║██╔══██╗██╔══██╗ ╚██╔╝ \n" + " ███████║ ██║ ██║ ██║██║ ██║██║ ██║ ██║ \n" + " ╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ \n" + f"[/bold {PURPLE}]" + f"[bold {CYAN}] N O T E[/bold {CYAN}]\n" + f"[dim] ╌╌╌ Cybernetic Knowledge Architecture v2.0 ╌╌╌[/dim]" +) + +SKIP = { + "Instructions", ".venv", "__pycache__", ".git", + ".DS_Store", ".idea", ".pytest_cache", "node_modules", ".github", +} + +MIME_ICONS = { + "image": "🖼 ", "pdf": "📄", "python": "🐍", "javascript": "⚡", + "markdown": "📘", "json": "🔧", "csv": "📊", + "html": "🌐", "css": "🎨", "xml": "📋", + "text": "📝", +} + + +# ═══════════════════════════════════════════════════════════════════════════ +# Utilities +# ═══════════════════════════════════════════════════════════════════════════ + +def _icon(mime: str) -> str: + for k, v in MIME_ICONS.items(): + if k in mime: + return v + return "📦" + + +def _sz(n: int) -> str: + for u in ("B", "KB", "MB", "GB"): + if n < 1024: + return f"{n:.0f} {u}" if u == "B" else f"{n:.1f} {u}" + n /= 1024 + return f"{n:.1f} TB" + + +def _density(input_bytes: int, output_len: int) -> str: + """Star-rate knowledge amplification: how much the AI expanded the input.""" + ratio = output_len / max(input_bytes, 1) + stars = min(5, max(1, int(ratio) + 1)) + colors = [DIM, AMBER, CYAN, PURPLE, GREEN] + c = colors[min(stars - 1, len(colors) - 1)] + return f"[{c}]{'✦' * stars}[/{c}]" + + +def _phase(n: int, title: str, glyph: str): + console.print(f"\n[bold {CYAN}]{glyph} PHASE {n} · {title}[/bold {CYAN}]") + console.print(Rule(style=DIM)) + + +def _should_skip(path: str) -> bool: + return any(s in path for s in SKIP) + + +# ═══════════════════════════════════════════════════════════════════════════ +# Pipeline +# ═══════════════════════════════════════════════════════════════════════════ + +def run(): + t0 = time.time() + console.clear() + + # ── HERO ────────────────────────────────────────────────────────────── + console.print(Panel(Align.center(HERO), border_style=PURPLE, padding=(1, 4))) + ts = datetime.now().strftime("%Y-%m-%d · %H:%M:%S") + console.print(Align.center(f"[dim]Session {ts} · Apple Silicon · Gemma 3[/dim]\n")) + + # ── PHASE 1 : NEURAL INITIALIZATION ────────────────────────────────── + _phase(1, "NEURAL INITIALIZATION", "⚡") + + with console.status(f"[bold {CYAN}]Loading Gemma 3 into Unified Memory…[/bold {CYAN}]", spinner="dots12"): + engine = StarryEngine() + console.print(f" [{GREEN}]✦[/{GREEN}] Gemma 3 locked & loaded") + + scanner = StarryScanner() + console.print(f" [{GREEN}]✦[/{GREEN}] MIME scanner initialized") + + cwd = os.getcwd() + formatter = StarryFormatter(cwd) + console.print(f" [{GREEN}]✦[/{GREEN}] Output → [dim]{formatter.output_dir}[/dim]") + + # ── PHASE 2 : DEEP SCAN ────────────────────────────────────────────── + _phase(2, "DEEP SCAN", "🔍") + + with console.status(f"[bold {CYAN}]Traversing directory tree…[/bold {CYAN}]", spinner="dots12"): + raw = scanner.scan_directory(cwd) + resources = [r for r in raw if not _should_skip(r.file_path)] + + tbl = Table( + border_style=PURPLE, show_lines=False, padding=(0, 1), + title=f"[bold {CYAN}]Discovered Resources[/bold {CYAN}]", + ) + tbl.add_column("#", style=f"bold {PURPLE}", justify="right", width=4) + tbl.add_column("", width=3) + tbl.add_column("File", style="white", max_width=55, no_wrap=True) + tbl.add_column("Type", style=CYAN, justify="center") + tbl.add_column("Size", style="dim", justify="right") + + total_bytes = 0 + for i, r in enumerate(resources, 1): + try: + sz = os.path.getsize(r.file_path) + except OSError: + sz = 0 + total_bytes += sz + tbl.add_row( + str(i), _icon(r.mime_type), os.path.basename(r.file_path), + r.mime_type.split("/")[-1].upper(), _sz(sz), + ) + + console.print(tbl) + console.print(f" [dim]{len(resources)} files · {_sz(total_bytes)}[/dim]\n") + + if not resources: + console.print(Panel( + "[yellow]No processable files detected in this directory.[/yellow]", + border_style="yellow", title="⚠ Warning", + )) + return + + # ── PHASE 3 : KNOWLEDGE SYNTHESIS ──────────────────────────────────── + _phase(3, "KNOWLEDGE SYNTHESIS", "🧠") + console.print(f" [dim {CYAN}]Generating ~{MAX_TOKENS} tokens per file · progress updates live[/dim {CYAN}]\n") + + results = [] # (name, path, seconds, input_bytes, output_len) + errors = [] + + with Progress( + SpinnerColumn(style=PURPLE), + TextColumn(f"[{CYAN}]{{task.description}}[/{CYAN}]"), + BarColumn(bar_width=30, style=DIM, complete_style=PURPLE, finished_style=GREEN), + TextColumn("[dim]{task.percentage:>3.0f}%[/dim]"), + TimeElapsedColumn(), + console=console, + ) as prog: + master = prog.add_task("Overall", total=len(resources)) + + for r in resources: + name = os.path.basename(r.file_path) + sub = prog.add_task(f" {name}", total=MAX_TOKENS) + t1 = time.time() + + # Live progress callback — updates the bar every token + def _tick(tokens_so_far, _sub=sub): + prog.update(_sub, completed=tokens_so_far) + + try: + in_sz = os.path.getsize(r.file_path) + content = engine.process_resource(r, on_token=_tick) + prog.update(sub, completed=MAX_TOKENS) # Ensure 100% + path = formatter.save_guide(r.file_path, content) + dt = time.time() - t1 + results.append((name, path, dt, in_sz, len(content))) + console.print( + f" [{GREEN}]✦[/{GREEN}] {name} → " + f"[dim]{os.path.basename(path)}[/dim] " + f"[{CYAN}]{dt:.1f}s[/{CYAN}] " + f"{_density(in_sz, len(content))}" + ) + except Exception as exc: + errors.append((name, str(exc))) + console.print(f" [red]✗[/red] {name} — {exc}") + + prog.update(sub, completed=MAX_TOKENS) + prog.update(master, advance=1) + + # ── PHASE 4 : MISSION REPORT ───────────────────────────────────────── + _phase(4, "MISSION REPORT", "📊") + elapsed = time.time() - t0 + + # Detailed results + if results: + det = Table( + border_style=PURPLE, show_lines=False, padding=(0, 1), + title=f"[bold {CYAN}]Synthesis Results[/bold {CYAN}]", + ) + det.add_column("#", style=f"bold {PURPLE}", justify="right", width=4) + det.add_column("Source", style="white", no_wrap=True) + det.add_column("Guide", style="dim", no_wrap=True) + det.add_column("Time", style=CYAN, justify="right") + det.add_column("Density", justify="center") + + for i, (name, path, dt, isz, olen) in enumerate(results, 1): + det.add_row( + str(i), name, os.path.basename(path), + f"{dt:.1f}s", _density(isz, olen), + ) + console.print(det) + + # Summary panel + stats = Table(show_header=False, border_style=PURPLE, padding=(0, 2)) + stats.add_column(style=f"bold {CYAN}") + stats.add_column(style="white") + stats.add_row("Processed", str(len(results))) + stats.add_row("Errors", f"[red]{len(errors)}[/red]" if errors else f"[{GREEN}]0[/{GREEN}]") + stats.add_row("Session Time", f"{elapsed:.1f}s") + stats.add_row("Avg / File", f"{elapsed / max(len(results), 1):.1f}s") + stats.add_row("Output Dir", formatter.output_dir) + console.print(Panel(stats, title=f"[bold {CYAN}]Mission Summary[/bold {CYAN}]", border_style=PURPLE)) + + # ── Constellation Footer ───────────────────────────────────────────── + stars = " ".join(f"[{PURPLE}]✦[/{PURPLE}]" for _ in results) + console.print(Align.center( + f"\n[dim {PURPLE}]· ˚ ✧ · ˚ · ✧ · ˚[/dim {PURPLE}]\n" + f" {stars}\n" + f"[dim {PURPLE}]✧ · ˚ · ✦ · ˚ ✧ ·[/dim {PURPLE}]\n" + f"\n[bold {CYAN}]Knowledge Archived · Stars Aligned[/bold {CYAN}]\n" + )) + + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bb8c97d..8383b44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,20 @@ +# ═══════════════════════════════════════════════════════════════ +# S T A R R Y N O T E · Requirements +# Python 3.11+ · Apple Silicon (M1/M2/M3/M4) +# ═══════════════════════════════════════════════════════════════ + # --- Core AI & ML (Apple Silicon Optimized) --- -mlx-lm # Apple's framework for running LLMs on M3/M4 GPU -huggingface_hub # To download Gemma 3 weights from Hugging Face +mlx-lm # Apple's MLX framework for running LLMs on Metal GPU +huggingface_hub # Download Gemma 3 weights from Hugging Face -# --- CLI & Interface --- -click # For creating professional command-line interfaces +# --- Terminal UI --- +rich # Cyberpunk TUI: neon panels, tables, progress bars -# --- File Processing & Regex --- -pathlib # Modern object-oriented filesystem paths -python-magic # To detect file types (prevents scanning binaries) +# --- File Processing --- +python-magic # MIME type detection via binary headers (libmagic) +pymupdf # High-performance PDF text & image extraction (fitz) +pillow # Image processing (PIL) for multimodal analysis # --- Development & Testing --- -pytest # Our unit testing framework -black # Code formatter to keep your src/ folder clean -ipython # Enhanced interactive shell for debugging logic \ No newline at end of file +pytest # Unit testing framework +black # Code formatter (PEP 8 compliance) \ No newline at end of file diff --git a/src/formatter.py b/src/formatter.py index e223258..164ad8a 100644 --- a/src/formatter.py +++ b/src/formatter.py @@ -1,2 +1,27 @@ -# Converts the raw AI responses into standardized Markdown (.md) files. -# Ensures syntax highlighting for code blocks and creates exam-style Q&A sections. \ No newline at end of file +# src/formatter.py +import os + + +class StarryFormatter: + def __init__(self, current_execution_dir: str): + """ + Creates an 'Instructions' folder dynamically in the CURRENT directory + where the user ran the command. + """ + self.output_dir = os.path.join(current_execution_dir, 'Instructions') + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + def save_guide(self, original_filepath: str, content: str) -> str: + """Saves the Markdown file inside the dynamically created Instructions folder.""" + base_name = os.path.basename(original_filepath) + clean_name = os.path.splitext(base_name)[0] + safe_name = f"{clean_name}_StudyGuide.md".replace(" ", "_") + + file_path = os.path.join(self.output_dir, safe_name) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + return file_path \ No newline at end of file diff --git a/src/model_engine.py b/src/model_engine.py index 79110d1..582d03a 100644 --- a/src/model_engine.py +++ b/src/model_engine.py @@ -1,2 +1,243 @@ -# Manages Gemma 3 lifecycle using the MLX framework for Apple Silicon. -# Handles quantization, inference, and prompt calibration for structured output. +# src/model_engine.py - The Multimodal Brain of StarryNote +import os +import io +import re +import logging +import time + +import fitz +from PIL import Image +from mlx_lm import load +from mlx_lm.generate import stream_generate + +from src.scanner import UniversalResource + +log = logging.getLogger("starry.engine") + +# ── Token budget ────────────────────────────────────────────────────────── +MAX_TOKENS = 4096 # ~300 lines of dense Markdown output (halved for speed) + + +class StarryEngine: + def __init__(self, model_path="google/gemma-3-4b-it"): + """ + Initializes the S T A R R Y N O T E Knowledge Engine on M3 Unified Memory. + """ + log.info("Initializing S T A R R Y N O T E Core: %s", model_path) + self.model, self.tokenizer = load(model_path) + + # Resolve path to the master template + base_dir = os.path.dirname(__file__) + template_path = os.path.abspath(os.path.join(base_dir, '..', 'templates', 'master_template.md')) + + try: + with open(template_path, 'r', encoding='utf-8') as f: + self.master_template = f.read() + log.info("Knowledge Architecture Template synchronized.") + except FileNotFoundError: + log.warning("Master template not found — using recovery format.") + self.master_template = "# S T A R R Y N O T E \n\n[Recovery Mode Active]" + + # Pre-clean template: strip HTML comments to reduce prompt tokens + self._prompt_template = self._compact_template(self.master_template) + log.info("S T A R R Y N O T E Engine is fully operational (template: %d → %d chars).", + len(self.master_template), len(self._prompt_template)) + + @staticmethod + def _clean_template(template: str) -> str: + """Strip HTML comments and excessive whitespace from the template. + This reduces prompt token count by ~40% without losing structure.""" + cleaned = re.sub(r'', '', template, flags=re.DOTALL) + cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) + return cleaned.strip() + + @classmethod + def _compact_template(cls, template: str) -> str: + """Build a minimal prompt-ready template that preserves section structure + but strips all placeholder repetition. Cuts input tokens by ~60%.""" + cleaned = cls._clean_template(template) + # Remove duplicate placeholder table rows (keep first example row only) + cleaned = re.sub( + r'(\|\s*\*\*\{\{\w+\}\}\*\*.*\|\n)(?:\|\s*\*\*\{\{\w+\}\}\*\*.*\|\n)+', + r'\1', + cleaned, + ) + # Remove variable-definition table rows after the first + cleaned = re.sub( + r'(\|\s*\$\{\{\w+\}\}\$.*\|\n)(?:\|\s*\$\{\{\w+\}\}\$.*\|\n)+', + r'\1', + cleaned, + ) + # Remove redundant code placeholders after the first + cleaned = re.sub( + r'(\{\{CODE_LINE_\d+\}\}.*\n)(?:\{\{CODE_LINE_\d+\}\}.*\n)+', + r'\1', + cleaned, + ) + # Remove redundant Mermaid content lines after the first + cleaned = re.sub( + r'(\{\{MERMAID_CONTENT_LINE_\d+\}\}\n)(?:\s*\{\{MERMAID_CONTENT_LINE_\d+\}\}\n)+', + r'\1', + cleaned, + ) + # Collapse excessive whitespace again + cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) + return cleaned.strip() + + # ── Streaming generate wrapper ──────────────────────────────────────── + + def _stream(self, prompt, on_token=None, images=None): + """ + Stream tokens from the model. Calls on_token(tokens_so_far) after + every token so the TUI can render live progress. + """ + kwargs = {"max_tokens": MAX_TOKENS} + if images: + kwargs["images"] = images + + text = "" + for i, response in enumerate(stream_generate( + self.model, self.tokenizer, prompt=prompt, **kwargs + )): + text = response.text + if on_token: + on_token(i + 1) + + return text + + # ── Public API ──────────────────────────────────────────────────────── + + def process_resource(self, resource: UniversalResource, on_token=None) -> str: + """Determines the processing pipeline based on the detected MIME type.""" + if "image" in resource.mime_type: + return self._analyze_image(resource.file_path, on_token) + elif "pdf" in resource.mime_type: + return self._analyze_pdf(resource.file_path, on_token) + else: + return self._analyze_text(resource.file_path, on_token) + + def _build_system_prompt(self, raw_content: str, is_image: bool = False) -> str: + """ + Constructs the high-fidelity Knowledge Architect prompt for S T A R R Y N O T E v2.0. + Forces synthesis over summary, visual reasoning via Mermaid, and strict authorship. + """ + context_label = "visual architecture" if is_image else "structured data" + + knowledge_architect_prompt = ( + f"Act as the S T A R R Y N O T E Knowledge Architect. Your purpose is to ingest " + f"raw, fragmented academic data ({context_label}) and synthesize it into a " + f"high-density, structured study guide.\n\n" + f"CORE DIRECTIVES:\n" + f"1. AUTHORSHIP: Set the Author field to 'S T A R R Y N O T E' for every document generated.\n" + f"2. SYNTHESIS > SUMMARY: Do not repeat the input. Identify the underlying logic. " + f"Create original, advanced coding examples and mathematical proofs that aren't in " + f"the source but explain the source perfectly.\n" + f"3. FORMATTING: Use the provided MASTER TEMPLATE exactly. Do not skip sections. " + f"If a section is irrelevant, mark it with \"—\".\n" + f"4. VISUAL REASONING: Select the most logical Mermaid diagram type " + f"(Flowchart for logic, Mindmap for concepts, Sequence for protocols). " + f"Apply cyberpunk styling (Neon Purple/Cyan) via class definitions.\n" + f"5. ACADEMIC TONE: Use a scholarly, precise, and human-centric tone. " + f"No conversational filler.\n\n" + f"OUTPUT STRUCTURE:\n" + f"- metadata block (Title, Date, Topic, Difficulty)\n" + f"- Executive Abstract (Intellectual core)\n" + f"- Concept Register (Definitions + Common Pitfalls)\n" + f"- Technical Deep Dive (Code Trace or LaTeX Formulation)\n" + f"- Exam Prep (3-tier questions: Application, Analysis, Synthesis)\n\n" + f"Strictly avoid HTML comments or instruction markers in the final Markdown output.\n\n" + ) + + return ( + f"{knowledge_architect_prompt}" + f"--- MASTER TEMPLATE START ---\n" + f"{self._prompt_template}\n" + f"--- MASTER TEMPLATE END ---\n\n" + f"SOURCE INPUT TO SYNTHESIZE:\n" + f"{raw_content}" + ) + + # ── Analyzers ───────────────────────────────────────────────────────── + + def _analyze_image(self, image_path: str, on_token=None) -> str: + """Multimodal analysis for screenshots and diagrams.""" + log.info("Scanning visual: %s", os.path.basename(image_path)) + + try: + img = Image.open(image_path).convert("RGB") + prompt_text = self._build_system_prompt( + raw_content="[Attached Image Resource: Extract logic, diagrams, and handwriting.]", + is_image=True + ) + + messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}] + formatted_prompt = self.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + return self._stream(formatted_prompt, on_token=on_token, images=[img]) + except Exception as e: + return f"S T A R R Y N O T E Visual Error: {str(e)}" + + def _analyze_pdf(self, file_path: str, on_token=None) -> str: + """Handles PDF documents with automated OCR fallback for scanned slides.""" + log.info("Analyzing document: %s", os.path.basename(file_path)) + + try: + doc = fitz.open(file_path) + text_buffer = "" + + for page in doc: + text_buffer += page.get_text() + "\n" + + content = text_buffer.strip()[:8000] + + if len(content) < 100: + log.info("Image-based PDF detected — initializing Vision OCR…") + + captured_pages = [] + for i in range(min(2, len(doc))): + pix = doc.load_page(i).get_pixmap(dpi=150) + img = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB") + captured_pages.append(img) + + prompt_text = self._build_system_prompt( + raw_content="[Scanned PDF Resource: Execute OCR and extract technical data.]", + is_image=True + ) + + messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}] + formatted_prompt = self.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + return self._stream(formatted_prompt, on_token=on_token, images=captured_pages) + + prompt_text = self._build_system_prompt(raw_content=content, is_image=False) + messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}] + formatted_prompt = self.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + return self._stream(formatted_prompt, on_token=on_token) + + except Exception as e: + return f"S T A R R Y N O T E PDF Error: {str(e)}" + + def _analyze_text(self, file_path: str, on_token=None) -> str: + """Deep semantic analysis for code scripts and text notes.""" + log.info("Reading text: %s", os.path.basename(file_path)) + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + prompt_text = self._build_system_prompt(raw_content=content, is_image=False) + messages = [{"role": "user", "content": [{"type": "text", "text": prompt_text}]}] + formatted_prompt = self.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + return self._stream(formatted_prompt, on_token=on_token) + except Exception as e: + return f"S T A R R Y N O T E Text Error: {str(e)}" \ No newline at end of file diff --git a/src/scanner.py b/src/scanner.py index a4c60c0..795c218 100644 --- a/src/scanner.py +++ b/src/scanner.py @@ -2,58 +2,39 @@ # Uses Regex to tokenize notes based on custom syntax like [Time: O(n)]. import os -import re +import magic # Library to detect file types based on binary headers from dataclasses import dataclass -from typing import List, Optional +from typing import List, Any @dataclass -class NoteBlock: - """Structure to hold a single piece of learned concept.""" +class UniversalResource: + """A container for any type of study material (Text, Image, PDF).""" file_path: str - content: str - time_complexity: Optional[str] = None - important: bool = False + mime_type: str # e.g., 'image/jpeg' or 'application/pdf' + raw_data: Any # Holds the actual content or path for the AI to process class StarryScanner: - def __init__(self, target_exts=(".md", ".txt")): - self.target_exts = target_exts - # Regex to capture [Time: Value] - self.time_pattern = re.compile(r"\[Time:\s*(.*?)\]") - # Regex to capture @important tag - self.imp_pattern = re.compile(r"@important") - - def scan_directory(self, root_path: str) -> List[NoteBlock]: - """DFS traversal to find and parse study notes.""" - found_notes = [] - - for root, dirs, files in os.walk(root_path): + def __init__(self): + # Initialize the magic engine to detect file types accurately + self.mime = magic.Magic(mime=True) + + def scan_directory(self, root_path: str) -> List[UniversalResource]: + """ + DFS Traversal that identifies EVERY file type. + Logic: Instead of filtering by extension, we classify by MIME type. + """ + resources = [] + for root, _, files in os.walk(root_path): for file in files: - if file.endswith(self.target_exts): - full_path = os.path.join(root, file) - found_notes.extend(self._parse_file(full_path)) - - return found_notes - - def _parse_file(self, file_path: str) -> List[NoteBlock]: - """Reads a file and extracts metadata using Regex.""" - blocks = [] - with open(file_path, 'r', encoding='utf-8') as f: - data = f.read() - - # Logic: Extract metadata before cleaning the content - time_match = self.time_pattern.search(data) - is_imp = bool(self.imp_pattern.search(data)) - - # Clean the tags out to leave only pure study content - clean_content = self.time_pattern.sub("", data) - clean_content = self.imp_pattern.sub("", clean_content).strip() - - blocks.append(NoteBlock( - file_path=file_path, - content=clean_content, - time_complexity=time_match.group(1) if time_match else "N/A", - important=is_imp - )) - return blocks \ No newline at end of file + full_path = os.path.join(root, file) + mime_type = self.mime.from_file(full_path) + + # Logic: We package everything. The AI Engine will decide how to 'read' it. + resources.append(UniversalResource( + file_path=full_path, + mime_type=mime_type, + raw_data=full_path # Passing the path for heavy-duty processing + )) + return resources \ No newline at end of file diff --git a/templates/master_template.md b/templates/master_template.md new file mode 100644 index 0000000..241d5c2 --- /dev/null +++ b/templates/master_template.md @@ -0,0 +1,546 @@ + + +
+ +``` + ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ + ░ ░ + ░ S T A R R Y N O T E Knowledge Architecture System ░ + ░ ░ + ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ +``` + +
+ +--- + +# {{NOTE_TITLE}} + +
+ +*{{SUBJECT_AREA}} · {{SPECIFIC_TOPIC}} · {{DATE_YYYY-MM-DD}}* + +
+ +--- + +## DOCUMENT RECORD + +``` +┌─────────────────────┬──────────────────────────────────────────────────────┐ +│ Title │ {{NOTE_TITLE}} │ +│ Subject │ {{SUBJECT_AREA}} │ +│ Topic │ {{SPECIFIC_TOPIC}} │ +│ Date │ {{DATE_YYYY-MM-DD}} │ +│ Source / Author │ {{AUTHOR_OR_SOURCE}} │ +│ Difficulty │ {{DIFFICULTY_LEVEL}} │ +│ Classification │ {{SUBJECT_CLASS}} │ +│ Keywords │ {{KEYWORD_1}} / {{KEYWORD_2}} / {{KEYWORD_3}} │ +└─────────────────────┴──────────────────────────────────────────────────────┘ +``` + + + +--- + +## I. EXECUTIVE SUMMARY + +> **ABSTRACT** +> +> {{ONE_PARAGRAPH_SUMMARY — 3 to 5 sentences synthesizing the material in +> scholarly language. Do not copy-paste from the source. Distill the +> intellectual core of the topic.}} + +> **CENTRAL THESIS** +> +> {{SINGLE_MOST_IMPORTANT_INSIGHT — The non-obvious truth or governing +> principle this topic rests on. One sentence, precise and arguable.}} + +> **APPLIED CONTEXT** +> +> {{REAL_WORLD_RELEVANCE — Where does this knowledge live outside the +> classroom? One to two sentences connecting theory to tangible consequence.}} + +--- + +## II. CORE CONCEPTS + + + +### Concept Register + +| Concept | Definition | Key Property | Common Pitfall | +|:--------|:-----------|:-------------|:---------------| +| **{{CONCEPT_1}}** | {{DEFINITION_1}} | {{KEY_PROPERTY_1}} | {{PITFALL_1}} | +| **{{CONCEPT_2}}** | {{DEFINITION_2}} | {{KEY_PROPERTY_2}} | {{PITFALL_2}} | +| **{{CONCEPT_3}}** | {{DEFINITION_3}} | {{KEY_PROPERTY_3}} | {{PITFALL_3}} | +| **{{CONCEPT_4}}** | {{DEFINITION_4}} | {{KEY_PROPERTY_4}} | {{PITFALL_4}} | +| **{{CONCEPT_N}}** | {{DEFINITION_N}} | {{KEY_PROPERTY_N}} | {{PITFALL_N}} | + +--- + +### Comparative Analysis + + + +| Dimension | {{OPTION_A}} | {{OPTION_B}} | {{OPTION_C}} | +|:----------|:------------|:------------|:------------| +| **{{DIMENSION_1}}** | {{A1}} | {{B1}} | {{C1}} | +| **{{DIMENSION_2}}** | {{A2}} | {{B2}} | {{C2}} | +| **{{DIMENSION_3}}** | {{A3}} | {{B3}} | {{C3}} | +| **{{DIMENSION_4}}** | {{A4}} | {{B4}} | {{C4}} | +| **Optimal When** | {{SCENARIO_A}} | {{SCENARIO_B}} | {{SCENARIO_C}} | + +--- + +## III. VISUAL KNOWLEDGE GRAPH + + + +### {{GRAPH_TITLE}} + +```mermaid +{{MERMAID_DIAGRAM_TYPE}} + + %% ── Cyberpunk Style ────────────────────────────────────────────────── + classDef default fill:#1a1a1a,stroke:#bc13fe,stroke-width:2px,color:#00f3ff; + + %% ── {{GRAPH_TITLE}} ───────────────────────────────────────────────── + %% Subject : {{SUBJECT_AREA}} + %% Generated : StarryNote + + {{MERMAID_CONTENT_LINE_1}} + {{MERMAID_CONTENT_LINE_2}} + {{MERMAID_CONTENT_LINE_3}} + {{MERMAID_CONTENT_LINE_4}} + {{MERMAID_CONTENT_LINE_5}} + {{MERMAID_CONTENT_LINE_N}} +``` + +**Diagram key:** {{ONE_SENTENCE_EXPLAINING_THE_DIAGRAM_LOGIC_AND_HOW_TO_READ_IT}} + +--- + +## IV. TECHNICAL DEEP DIVE + + + +### {{DEEP_DIVE_SECTION_TITLE}} + +{{DEEP_DIVE_INTRODUCTORY_SENTENCE}} + + + +```{{LANGUAGE_TAG}} +# ════════════════════════════════════════════════════════════════════════ +# {{CODE_BLOCK_TITLE}} +# Purpose : {{CODE_PURPOSE}} +# Complexity : Time O({{TIME_COMPLEXITY}}) +# Space O({{SPACE_COMPLEXITY}}) +# Notes : {{IMPORTANT_IMPLEMENTATION_NOTE}} +# ════════════════════════════════════════════════════════════════════════ + +{{CODE_LINE_1}} # {{INLINE_COMMENT_1}} +{{CODE_LINE_2}} # {{INLINE_COMMENT_2}} +{{CODE_LINE_3}} +{{CODE_LINE_4}} # {{INLINE_COMMENT_4}} +{{CODE_LINE_N}} +``` + +**Trace walkthrough:** {{ONE_PARAGRAPH_DESCRIBING_EXECUTION_FLOW_OF_THE_CODE}} + +--- + + + +**Core Formula** + +$$ +{{LATEX_FORMULA_BLOCK}} +$$ + +**Variable Definitions** + +| Symbol | Meaning | Unit / Domain | +|:------:|:--------|:-------------| +| ${{VAR_1}}$ | {{VAR_1_DEFINITION}} | {{VAR_1_UNIT}} | +| ${{VAR_2}}$ | {{VAR_2_DEFINITION}} | {{VAR_2_UNIT}} | +| ${{VAR_3}}$ | {{VAR_3_DEFINITION}} | {{VAR_3_UNIT}} | +| ${{VAR_N}}$ | {{VAR_N_DEFINITION}} | {{VAR_N_UNIT}} | + +**Worked Example** + +Given ${{EXAMPLE_INPUT_VALUES}}$: + +$${{STEP_1_SUBSTITUTION}}$$ + +$${{STEP_2_SIMPLIFICATION}}$$ + +$${{STEP_3_RESULT}} \quad \therefore \; {{FINAL_ANSWER_STATEMENT}}$$ + +**Proof Sketch** *(for theorems and derivations — omit if not applicable)* + +> {{PROOF_OR_DERIVATION_SUMMARY — 2 to 4 sentences outlining the logical +> steps from hypothesis to conclusion.}} + +--- + + + +**Primary Source** + +> *"{{PRIMARY_SOURCE_QUOTE_VERBATIM}}"* +> +> — {{SOURCE_AUTHOR}}, *{{SOURCE_TITLE}}*, {{SOURCE_DATE}} + +**Textual Analysis** + +{{SCHOLARLY_ANNOTATION — 3 to 5 sentences interpreting the source. Address: +(1) what the author asserts, (2) the historical or intellectual context, +(3) the significance for the broader topic. Do not merely paraphrase.}} + +**Historiographical or Critical Note** + +> {{COUNTERPOINT_OR_SCHOLARLY_DEBATE — What do other scholars argue against +> or in tension with this source? One to two sentences. Write "—" if none.}} + +--- + +## V. ANNOTATED GLOSSARY + + + +| Term | Precise Definition | Etymology / Origin | Related Term | +|:-----|:------------------|:------------------|:-------------| +| **{{TERM_1}}** | {{TERM_1_DEFINITION}} | {{TERM_1_ETYMOLOGY}} | {{TERM_1_RELATED}} | +| **{{TERM_2}}** | {{TERM_2_DEFINITION}} | {{TERM_2_ETYMOLOGY}} | {{TERM_2_RELATED}} | +| **{{TERM_3}}** | {{TERM_3_DEFINITION}} | {{TERM_3_ETYMOLOGY}} | {{TERM_3_RELATED}} | +| **{{TERM_4}}** | {{TERM_4_DEFINITION}} | {{TERM_4_ETYMOLOGY}} | {{TERM_4_RELATED}} | +| **{{TERM_N}}** | {{TERM_N_DEFINITION}} | {{TERM_N_ETYMOLOGY}} | {{TERM_N_RELATED}} | + +--- + +## VI. EXAM PREPARATION + + + +``` +────────────────────────────────────────────────────────────────────────────── + QUESTION 01 · TIER: APPLICATION +────────────────────────────────────────────────────────────────────────────── +``` + +{{EXAM_QUESTION_1 — Require the student to apply a concept from the notes +to a new, specific, concrete scenario. Not a definition question.}} + +
+Reveal Answer and Reasoning + +**Answer** + +{{EXAM_ANSWER_1 — A direct, substantive answer of 3 or more sentences. +Explain not just what the answer is but why it is correct.}} + +**Reasoning Chain** + +1. {{STEP_1A — First logical step establishing the foundation}} +2. {{STEP_1B — Second step applying the relevant concept}} +3. {{STEP_1C — Third step arriving at and justifying the conclusion}} + +**Core Principle Tested:** {{PRINCIPLE_TESTED_1}} + +
+ +--- + +``` +────────────────────────────────────────────────────────────────────────────── + QUESTION 02 · TIER: ANALYSIS +────────────────────────────────────────────────────────────────────────────── +``` + +{{EXAM_QUESTION_2 — Require the student to break down, compare, or critically +evaluate two or more elements from the material.}} + +
+Reveal Answer and Reasoning + +**Answer** + +{{EXAM_ANSWER_2 — A direct, substantive answer of 3 or more sentences. +Draw on comparative or structural knowledge from the notes.}} + +**Reasoning Chain** + +1. {{STEP_2A — Establish the analytical framework or evaluative criteria}} +2. {{STEP_2B — Apply the framework to the material}} +3. {{STEP_2C — Deliver the evaluative conclusion with justification}} + +**Core Principle Tested:** {{PRINCIPLE_TESTED_2}} + +
+ +--- + +``` +────────────────────────────────────────────────────────────────────────────── + QUESTION 03 · TIER: SYNTHESIS +────────────────────────────────────────────────────────────────────────────── +``` + +{{EXAM_QUESTION_3 — Require the student to construct an argument, design a +solution, or evaluate tradeoffs across multiple concepts simultaneously.}} + +
+Reveal Answer and Reasoning + +**Answer** + +{{EXAM_ANSWER_3 — A substantive answer of 3 or more sentences that integrates +multiple concepts from the material. Show the synthesis explicitly.}} + +**Reasoning Chain** + +1. {{STEP_3A — Identify the relevant concepts that must be combined}} +2. {{STEP_3B — Articulate the relationship or tension between them}} +3. {{STEP_3C — Construct and defend the synthesized position or solution}} + +**Core Principle Tested:** {{PRINCIPLE_TESTED_3}} + +
+ +--- + +## VII. KNOWLEDGE CONNECTIONS + +### Conceptual Dependencies + +| Relationship | Concept | +|:------------|:--------| +| **Builds upon** | {{PREREQUISITE_1}} · {{PREREQUISITE_2}} | +| **Leads toward** | {{NEXT_TOPIC_1}} · {{NEXT_TOPIC_2}} | +| **Cross-domain link** | {{INTERDISCIPLINARY_CONNECTION}} | +| **Commonly confused with** | {{COMMONLY_CONFLATED_CONCEPT}} | + +--- + +### Curated Further Study + + + +| # | Resource | Type | Why It Matters | +|:-:|:---------|:-----|:---------------| +| 1 | **{{RESOURCE_1_TITLE}}** | {{RESOURCE_1_TYPE}} | {{RESOURCE_1_REASON}} | +| 2 | **{{RESOURCE_2_TITLE}}** | {{RESOURCE_2_TYPE}} | {{RESOURCE_2_REASON}} | +| 3 | **{{RESOURCE_3_TITLE}}** | {{RESOURCE_3_TYPE}} | {{RESOURCE_3_REASON}} | + +--- + +## VIII. QUICK REFERENCE CARD + + + +### 🔑 Core Takeaways + +| # | Takeaway | +|:-:|:---------| +| 1 | {{TAKEAWAY_1 — Single sentence capturing a complete, testable fact}} | +| 2 | {{TAKEAWAY_2}} | +| 3 | {{TAKEAWAY_3}} | +| 4 | {{TAKEAWAY_4}} | +| 5 | {{TAKEAWAY_5}} | + +### ⚡ Critical Formulas / Patterns + +``` +{{FORMULA_OR_PATTERN_1}} +{{FORMULA_OR_PATTERN_2}} +{{FORMULA_OR_PATTERN_3}} +``` + +### ⚠️ Exam Traps + +> **Trap 1:** {{EXAM_TRAP_1 — A specific misconception examiners exploit}} +> +> **Trap 2:** {{EXAM_TRAP_2}} +> +> **Trap 3:** {{EXAM_TRAP_3}} + +### ✅ Pre-Exam Checklist + +- [ ] I can explain {{KEY_CONCEPT_1}} without notes +- [ ] I can solve a problem involving {{KEY_CONCEPT_2}} +- [ ] I understand the difference between {{CONCEPT_A}} and {{CONCEPT_B}} +- [ ] I can draw the {{DIAGRAM_TYPE}} from memory +- [ ] I can answer all three exam-prep questions above from memory + +--- + +## IX. METACOGNITIVE CALIBRATION + + + +### Confidence Meter + +*Rate your understanding after studying this guide:* + +| Concept | 🔴 Lost | 🟡 Shaky | 🟢 Solid | 🔵 Can Teach | +|:--------|:-------:|:-------:|:-------:|:-----------:| +| {{CONCEPT_1}} | ○ | ○ | ○ | ○ | +| {{CONCEPT_2}} | ○ | ○ | ○ | ○ | +| {{CONCEPT_3}} | ○ | ○ | ○ | ○ | +| {{CONCEPT_4}} | ○ | ○ | ○ | ○ | + +### Study Prescriptions + +> **If mostly 🔴 (Lost):** {{RED_PRESCRIPTION — e.g., "Re-read Section IV and re-attempt the worked example with different inputs."}} +> +> **If mostly 🟡 (Shaky):** {{YELLOW_PRESCRIPTION — e.g., "Focus on the Exam Traps in Section VIII and re-do Tier 2 questions."}} +> +> **If mostly 🟢 (Solid):** {{GREEN_PRESCRIPTION — e.g., "Attempt the Synthesis question without hints, then explain it aloud."}} +> +> **If mostly 🔵 (Can Teach):** {{BLUE_PRESCRIPTION — e.g., "Create a novel problem that combines at least two concepts from the register."}} + +--- + +## X. SOURCE ARCHIVE + +*The original student input is preserved verbatim below for audit and review. +This section is read-only. No transformations are applied to this content.* + +
+View Original Source Notes + +``` +{{RAW_STUDENT_INPUT_VERBATIM}} +``` + +
+ +--- + +
+ +``` + ───────────────────────────────────────────────────────────────────────────── + S T A R R Y N O T E · Knowledge Architecture System · v2.0 + Generated {{DATE_YYYY-MM-DD}} · Gemma 3 · Apple Silicon + Structured for clarity. Engineered for mastery. Calibrated for you. + ───────────────────────────────────────────────────────────────────────────── +``` + +
\ No newline at end of file diff --git a/tests/test_engine.py b/tests/test_engine.py new file mode 100644 index 0000000..c4acb67 --- /dev/null +++ b/tests/test_engine.py @@ -0,0 +1,215 @@ +""" +Tests for StarryEngine — AI inference and prompt construction. +Uses mocks for the ML model so tests run instantly without GPU. +""" +import os +import re +import pytest +from unittest.mock import patch, MagicMock +from src.scanner import UniversalResource + + +class TestCleanTemplate: + """Validate the HTML comment stripping logic.""" + + def test_strips_html_comments(self): + from src.model_engine import StarryEngine + + template = "Header\n\nContent" + result = StarryEngine._clean_template(template) + assert "" not in result + assert "Header" in result + assert "Content" in result + + def test_strips_multiline_comments(self): + from src.model_engine import StarryEngine + + template = "Before\n\nAfter" + result = StarryEngine._clean_template(template) + assert "Line 1" not in result + assert "Before" in result + assert "After" in result + + def test_preserves_markdown_structure(self): + from src.model_engine import StarryEngine + + template = "# Title\n\n## Section\n\n\n\n| Col |\n|-----|\n| Val |" + result = StarryEngine._clean_template(template) + assert "# Title" in result + assert "## Section" in result + assert "| Col |" in result + + def test_collapses_excessive_newlines(self): + from src.model_engine import StarryEngine + + template = "A\n\n\n\n\nB" + result = StarryEngine._clean_template(template) + assert "\n\n\n" not in result + assert "A" in result + assert "B" in result + + def test_clean_reduces_template_size(self): + """The real master template should be significantly reduced.""" + from src.model_engine import StarryEngine + + base_dir = os.path.dirname(os.path.dirname(__file__)) + template_path = os.path.join(base_dir, "templates", "master_template.md") + + with open(template_path, "r", encoding="utf-8") as f: + raw = f.read() + + cleaned = StarryEngine._clean_template(raw) + reduction = 1 - len(cleaned) / len(raw) + assert reduction > 0.3, f"Expected >30% reduction, got {reduction:.0%}" + + def test_empty_template(self): + from src.model_engine import StarryEngine + + result = StarryEngine._clean_template("") + assert result == "" + + def test_template_with_no_comments(self): + from src.model_engine import StarryEngine + + template = "# Pure Markdown\n\nNo comments here." + result = StarryEngine._clean_template(template) + assert result == template + + +class TestPromptBuilding: + """Validate the Knowledge Architect prompt construction (without loading the model).""" + + @patch("src.model_engine.load") + def test_prompt_contains_knowledge_architect(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# Template" + engine._prompt_template = "# Template" + + prompt = engine._build_system_prompt("test content", is_image=False) + assert "Knowledge Architect" in prompt + assert "S T A R R Y N O T E" in prompt + + @patch("src.model_engine.load") + def test_prompt_contains_directives(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# Template" + engine._prompt_template = "# Template" + + prompt = engine._build_system_prompt("content", is_image=False) + assert "AUTHORSHIP" in prompt + assert "SYNTHESIS" in prompt + assert "FORMATTING" in prompt + assert "VISUAL REASONING" in prompt + assert "ACADEMIC TONE" in prompt + + @patch("src.model_engine.load") + def test_prompt_contains_template(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# My Template Content" + engine._prompt_template = "# My Template Content" + + prompt = engine._build_system_prompt("input data") + assert "MASTER TEMPLATE START" in prompt + assert "# My Template Content" in prompt + assert "MASTER TEMPLATE END" in prompt + + @patch("src.model_engine.load") + def test_prompt_contains_source_input(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# T" + engine._prompt_template = "# T" + + prompt = engine._build_system_prompt("my raw lecture notes here") + assert "my raw lecture notes here" in prompt + + @patch("src.model_engine.load") + def test_image_prompt_uses_visual_label(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# T" + engine._prompt_template = "# T" + + prompt = engine._build_system_prompt("image data", is_image=True) + assert "visual architecture" in prompt + + @patch("src.model_engine.load") + def test_text_prompt_uses_structured_data_label(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.master_template = "# T" + engine._prompt_template = "# T" + + prompt = engine._build_system_prompt("text data", is_image=False) + assert "structured data" in prompt + + +class TestProcessRouting: + """Validate that process_resource routes to the correct analyzer.""" + + @patch("src.model_engine.load") + def test_routes_image_to_image_analyzer(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.model = MagicMock() + engine.tokenizer = MagicMock() + engine.master_template = "# T" + engine._prompt_template = "# T" + + with patch.object(engine, "_analyze_image", return_value="image result") as mock: + res = UniversalResource("test.jpg", "image/jpeg", "test.jpg") + result = engine.process_resource(res) + mock.assert_called_once_with("test.jpg", None) + assert result == "image result" + + @patch("src.model_engine.load") + def test_routes_pdf_to_pdf_analyzer(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.model = MagicMock() + engine.tokenizer = MagicMock() + engine.master_template = "# T" + engine._prompt_template = "# T" + + with patch.object(engine, "_analyze_pdf", return_value="pdf result") as mock: + res = UniversalResource("doc.pdf", "application/pdf", "doc.pdf") + result = engine.process_resource(res) + mock.assert_called_once_with("doc.pdf", None) + assert result == "pdf result" + + @patch("src.model_engine.load") + def test_routes_text_to_text_analyzer(self, mock_load): + mock_load.return_value = (MagicMock(), MagicMock()) + from src.model_engine import StarryEngine + + engine = StarryEngine.__new__(StarryEngine) + engine.model = MagicMock() + engine.tokenizer = MagicMock() + engine.master_template = "# T" + engine._prompt_template = "# T" + + with patch.object(engine, "_analyze_text", return_value="text result") as mock: + res = UniversalResource("code.py", "text/x-python", "code.py") + result = engine.process_resource(res) + mock.assert_called_once_with("code.py", None) + assert result == "text result" diff --git a/tests/test_formatter.py b/tests/test_formatter.py new file mode 100644 index 0000000..9b30a2b --- /dev/null +++ b/tests/test_formatter.py @@ -0,0 +1,102 @@ +""" +Tests for StarryFormatter — output persistence engine. +Validates directory creation, file naming, and content writing. +""" +import os +import tempfile +import pytest +from src.formatter import StarryFormatter + + +class TestFormatterInit: + """Validate formatter initialization and directory creation.""" + + def test_creates_instructions_directory(self): + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + assert os.path.exists(formatter.output_dir) + assert formatter.output_dir == os.path.join(tmpdir, "Instructions") + + def test_does_not_fail_if_dir_exists(self): + """Initializing twice should not raise an error.""" + with tempfile.TemporaryDirectory() as tmpdir: + StarryFormatter(tmpdir) + StarryFormatter(tmpdir) # Should not raise + + def test_output_dir_is_inside_target(self): + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + assert tmpdir in formatter.output_dir + + +class TestSaveGuide: + """Validate the study guide saving logic.""" + + def test_save_creates_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + path = formatter.save_guide("/source/notes.txt", "# Study Guide Content") + assert os.path.exists(path) + + def test_save_correct_filename(self): + """Output filename should be {original_name}_StudyGuide.md.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + path = formatter.save_guide("/source/lecture.pdf", "content") + assert os.path.basename(path) == "lecture_StudyGuide.md" + + def test_save_replaces_spaces(self): + """Spaces in filenames should be replaced with underscores.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + path = formatter.save_guide("/source/my notes file.txt", "content") + assert " " not in os.path.basename(path) + assert "my_notes_file_StudyGuide.md" == os.path.basename(path) + + def test_save_content_integrity(self): + """Saved file should contain exactly the content provided.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + content = "# Test Guide\n\nThis is a **test** study guide." + path = formatter.save_guide("/source/test.txt", content) + + with open(path, "r", encoding="utf-8") as f: + saved = f.read() + assert saved == content + + def test_save_utf8_content(self): + """Should handle Unicode content (math symbols, emojis, etc.).""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + content = "∑ σ² = E[(X − μ)²] 🧠 ✦✦✦" + path = formatter.save_guide("/source/math.txt", content) + + with open(path, "r", encoding="utf-8") as f: + saved = f.read() + assert saved == content + + def test_save_empty_content(self): + """Should handle empty string content gracefully.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + path = formatter.save_guide("/source/empty.txt", "") + assert os.path.exists(path) + assert os.path.getsize(path) == 0 + + def test_save_strips_extension(self): + """Should strip the original extension before adding _StudyGuide.md.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + path = formatter.save_guide("/source/code.py", "content") + assert os.path.basename(path) == "code_StudyGuide.md" + assert ".py" not in os.path.basename(path) + + def test_save_multiple_files(self): + """Multiple saves should create separate files.""" + with tempfile.TemporaryDirectory() as tmpdir: + formatter = StarryFormatter(tmpdir) + p1 = formatter.save_guide("/source/a.txt", "content a") + p2 = formatter.save_guide("/source/b.txt", "content b") + assert p1 != p2 + assert os.path.exists(p1) + assert os.path.exists(p2) diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 0000000..da02369 --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,54 @@ +import sys +import os + +# --- Path Configuration --- +# Dynamically add the project root to sys.path to resolve 'src' as a module. +# This ensures the script is portable across different execution environments. +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.model_engine import StarryEngine +import mlx.core as mx + + +def test_gpu_and_model(): + """ + Validates Hardware Acceleration (Metal) and Gemma 3 Model Inference. + + This test serves as a Sanity Check for the Apple Silicon M3 environment. + It verifies: + 1. Metal GPU availability via MLX. + 2. Successful model weights loading from Hugging Face. + 3. Tokenization and generation pipeline integrity. + """ + print("--- Environment Validation: Apple Silicon GPU ---") + + # Check if the MLX framework has access to the Metal backend (GPU) + gpu_available = mx.metal.is_available() + print(f"Metal GPU Backend Active: {gpu_available}") + + if not gpu_available: + print("CRITICAL WARNING: GPU not detected. Performance will be degraded on CPU.") + + print("\n--- Model Lifecycle: Initializing Gemma 3 ---") + try: + # Initialize the AI Engine. + # Note: Triggers a ~5-8GB download on the first execution. + engine = StarryEngine() + + # Define a mock study note to verify inference logic + test_note = "A Linked List is a linear collection of data elements." + + print("Executing Inference (Generating Study Guide)...") + response = engine.generate_study_guide(test_note, time_complexity="O(n)") + + print("\n--- Generated Output (Markdown) ---") + print(response) + print("\nUnit Test Status: PASSED") + + except Exception as e: + print(f"\nUnit Test Status: FAILED") + print(f"Error Diagnostic: {e}") + + +if __name__ == "__main__": + test_gpu_and_model() \ No newline at end of file diff --git a/tests/test_scanner.py b/tests/test_scanner.py index 62da616..047e64d 100644 --- a/tests/test_scanner.py +++ b/tests/test_scanner.py @@ -1,28 +1,126 @@ -# Unit test for StarryScanner logic -from src.scanner import StarryScanner +""" +Tests for StarryScanner — MIME-based file discovery engine. +Validates directory traversal, MIME detection, and UniversalResource packaging. +""" import os +import tempfile +import pytest +from src.scanner import StarryScanner, UniversalResource -def test_basic_scanning(): - # Setup: Point to our tests folder - scanner = StarryScanner(target_exts=(".txt", ".md")) - test_dir = os.path.dirname(__file__) +class TestUniversalResource: + """Validate the UniversalResource dataclass.""" - # Execution: Run the scan - results = scanner.scan_directory(test_dir) + def test_resource_creation(self): + res = UniversalResource( + file_path="/test/file.py", + mime_type="text/x-python", + raw_data="/test/file.py", + ) + assert res.file_path == "/test/file.py" + assert res.mime_type == "text/x-python" + assert res.raw_data == "/test/file.py" - # Logic Validation (Assertions) - assert len(results) > 0, "Scanner should find at least one file" + def test_resource_fields_are_strings(self): + res = UniversalResource(file_path="a", mime_type="b", raw_data="c") + assert isinstance(res.file_path, str) + assert isinstance(res.mime_type, str) - note = results[0] - print(f"\n--- Test Results for: {note.file_path} ---") - print(f"Captured Time: {note.time_complexity}") - print(f"Is Important: {note.important}") - # Check if Regex worked - assert note.time_complexity == "O(n)", f"Expected O(n), got {note.time_complexity}" - assert note.important is True, "Important tag was missed!" +class TestStarryScanner: + """Validate the directory scanning logic.""" + def test_scanner_initializes(self): + scanner = StarryScanner() + assert scanner.mime is not None -if __name__ == "__main__": - test_basic_scanning() \ No newline at end of file + def test_scan_finds_files(self): + """Scanner should find at least one file in a directory with files.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create a test file + test_file = os.path.join(tmpdir, "test.txt") + with open(test_file, "w") as f: + f.write("Hello StarryNote") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + assert len(results) >= 1 + assert any("test.txt" in r.file_path for r in results) + + def test_scan_returns_universal_resources(self): + """Each result should be a UniversalResource.""" + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, "note.txt"), "w") as f: + f.write("Study material") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + for res in results: + assert isinstance(res, UniversalResource) + assert res.file_path != "" + assert res.mime_type != "" + + def test_scan_detects_text_mime(self): + """Plain text files should be detected as text/plain.""" + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, "plain.txt"), "w") as f: + f.write("This is plain text content for testing.") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + txt_results = [r for r in results if "plain.txt" in r.file_path] + assert len(txt_results) == 1 + assert "text" in txt_results[0].mime_type + + def test_scan_empty_directory(self): + """Empty directory should return an empty list.""" + with tempfile.TemporaryDirectory() as tmpdir: + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + assert results == [] + + def test_scan_recursive(self): + """Scanner should find files in subdirectories (DFS).""" + with tempfile.TemporaryDirectory() as tmpdir: + subdir = os.path.join(tmpdir, "nested", "deep") + os.makedirs(subdir) + with open(os.path.join(subdir, "deep_file.txt"), "w") as f: + f.write("Found in the depths") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + assert any("deep_file.txt" in r.file_path for r in results) + + def test_scan_multiple_file_types(self): + """Scanner should handle different file types in the same directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Text file + with open(os.path.join(tmpdir, "notes.txt"), "w") as f: + f.write("Study notes here") + # Python file + with open(os.path.join(tmpdir, "code.py"), "w") as f: + f.write("print('hello')") + # Markdown file + with open(os.path.join(tmpdir, "readme.md"), "w") as f: + f.write("# Title\nContent") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + assert len(results) == 3 + + def test_raw_data_equals_file_path(self): + """raw_data should be set to the file path for downstream processing.""" + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, "test.txt"), "w") as f: + f.write("data") + + scanner = StarryScanner() + results = scanner.scan_directory(tmpdir) + + for res in results: + assert res.raw_data == res.file_path \ No newline at end of file diff --git a/tests/test_template.py b/tests/test_template.py new file mode 100644 index 0000000..b672c78 --- /dev/null +++ b/tests/test_template.py @@ -0,0 +1,121 @@ +""" +Tests for the Master Template — validates structural integrity. +Ensures all required sections, placeholders, and formatting rules +are present in the template file. +""" +import os +import pytest + + +@pytest.fixture +def template_content(): + """Load the master template content.""" + base_dir = os.path.dirname(os.path.dirname(__file__)) + path = os.path.join(base_dir, "templates", "master_template.md") + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +class TestTemplateSections: + """Ensure all 10 required sections exist.""" + + REQUIRED_SECTIONS = [ + "EXECUTIVE SUMMARY", + "CORE CONCEPTS", + "VISUAL KNOWLEDGE GRAPH", + "TECHNICAL DEEP DIVE", + "ANNOTATED GLOSSARY", + "EXAM PREPARATION", + "KNOWLEDGE CONNECTIONS", + "QUICK REFERENCE CARD", + "METACOGNITIVE CALIBRATION", + "SOURCE ARCHIVE", + ] + + @pytest.mark.parametrize("section", REQUIRED_SECTIONS) + def test_section_exists(self, template_content, section): + assert section in template_content, f"Missing required section: {section}" + + def test_sections_are_numbered(self, template_content): + """Sections should be numbered with Roman numerals.""" + for numeral in ["I.", "II.", "III.", "IV.", "V.", "VI.", "VII.", "VIII.", "IX.", "X."]: + assert numeral in template_content, f"Missing numeral: {numeral}" + + +class TestTemplateStructure: + """Validate key structural elements.""" + + def test_has_document_record(self, template_content): + assert "DOCUMENT RECORD" in template_content + + def test_has_concept_register_table(self, template_content): + assert "| Concept |" in template_content + assert "| Definition |" in template_content or "Definition" in template_content + + def test_has_mermaid_block(self, template_content): + assert "```mermaid" in template_content + + def test_has_cyberpunk_styling(self, template_content): + assert "classDef default" in template_content + assert "#bc13fe" in template_content # Neon purple + assert "#00f3ff" in template_content # Neon cyan + + def test_has_exam_questions(self, template_content): + assert "QUESTION 01" in template_content + assert "QUESTION 02" in template_content + assert "QUESTION 03" in template_content + assert "APPLICATION" in template_content + assert "ANALYSIS" in template_content + assert "SYNTHESIS" in template_content + + def test_has_collapsible_answers(self, template_content): + assert "
" in template_content + assert "" in template_content + assert "
" in template_content + + def test_has_confidence_meter(self, template_content): + """Metacognitive section should have the confidence scale.""" + assert "🔴" in template_content + assert "🟡" in template_content + assert "🟢" in template_content + assert "🔵" in template_content + + def test_has_quick_reference_elements(self, template_content): + assert "Core Takeaways" in template_content + assert "Exam Traps" in template_content + assert "Pre-Exam Checklist" in template_content + + def test_has_study_prescriptions(self, template_content): + assert "Study Prescriptions" in template_content + + def test_has_source_archive(self, template_content): + assert "RAW_STUDENT_INPUT_VERBATIM" in template_content + + def test_has_footer(self, template_content): + assert "Knowledge Architecture System" in template_content + assert "v2.0" in template_content + + def test_has_starry_note_branding(self, template_content): + assert "S T A R R Y N O T E" in template_content + + +class TestTemplatePlaceholders: + """Validate that key placeholders exist for the AI to fill.""" + + REQUIRED_PLACEHOLDERS = [ + "{{NOTE_TITLE}}", + "{{SUBJECT_AREA}}", + "{{SPECIFIC_TOPIC}}", + "{{DATE_YYYY-MM-DD}}", + "{{DIFFICULTY_LEVEL}}", + "{{MERMAID_DIAGRAM_TYPE}}", + ] + + @pytest.mark.parametrize("placeholder", REQUIRED_PLACEHOLDERS) + def test_placeholder_exists(self, template_content, placeholder): + assert placeholder in template_content, f"Missing placeholder: {placeholder}" + + def test_minimum_template_length(self, template_content): + """Template should be substantial (500+ lines).""" + lines = template_content.strip().split("\n") + assert len(lines) >= 400, f"Template too short: {len(lines)} lines" diff --git a/tests/test_tui.py b/tests/test_tui.py new file mode 100644 index 0000000..ab5c61c --- /dev/null +++ b/tests/test_tui.py @@ -0,0 +1,134 @@ +""" +Tests for the TUI utility functions in main.py. +These are pure functions — no GPU, no model, no Rich rendering needed. +""" +import sys +import os +import pytest + +# Add project root to path for imports +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from main import _icon, _sz, _density, _should_skip, SKIP, MIME_ICONS + + +class TestIconMapping: + """Validate MIME type → emoji icon mapping.""" + + def test_image_icon(self): + assert "🖼" in _icon("image/jpeg") + assert "🖼" in _icon("image/png") + + def test_pdf_icon(self): + assert "📄" in _icon("application/pdf") + + def test_python_icon(self): + assert "🐍" in _icon("text/x-python") + + def test_text_icon(self): + assert "📝" in _icon("text/plain") + + def test_markdown_icon(self): + assert "📘" in _icon("text/markdown") + + def test_unknown_mime_fallback(self): + assert "📦" in _icon("application/octet-stream") + assert "📦" in _icon("something/unknown") + + def test_all_mapped_types_have_icons(self): + for mime_key in MIME_ICONS: + result = _icon(mime_key) + assert result != "📦", f"'{mime_key}' should have a specific icon" + + +class TestSizeFormatting: + """Validate human-readable file size output.""" + + def test_bytes(self): + assert _sz(0) == "0 B" + assert _sz(512) == "512 B" + assert _sz(1023) == "1023 B" + + def test_kilobytes(self): + result = _sz(1024) + assert "KB" in result + assert "1.0" in result + + def test_megabytes(self): + result = _sz(1024 * 1024) + assert "MB" in result + + def test_gigabytes(self): + result = _sz(1024 ** 3) + assert "GB" in result + + def test_terabytes(self): + result = _sz(1024 ** 4) + assert "TB" in result + + def test_fractional(self): + result = _sz(1536) # 1.5 KB + assert "1.5 KB" == result + + +class TestDensityRating: + """Validate the Knowledge Density star rating system.""" + + def test_minimum_one_star(self): + result = _density(1000, 100) # ratio < 1 + assert "✦" in result + + def test_scales_with_ratio(self): + low = _density(1000, 500) # ratio ~0.5 + high = _density(100, 1000) # ratio ~10 + # High ratio should have more stars + assert high.count("✦") > low.count("✦") + + def test_max_five_stars(self): + result = _density(1, 100000) # huge ratio + assert result.count("✦") == 5 + + def test_zero_input_no_crash(self): + """Should handle zero input bytes without division error.""" + result = _density(0, 1000) + assert "✦" in result + + def test_zero_output(self): + result = _density(1000, 0) + assert "✦" in result + + +class TestSkipPatterns: + """Validate the directory/file skip logic.""" + + def test_skips_venv(self): + assert _should_skip("/project/.venv/lib/python3.11/site.py") + + def test_skips_pycache(self): + assert _should_skip("/project/__pycache__/module.cpython-311.pyc") + + def test_skips_git(self): + assert _should_skip("/project/.git/HEAD") + + def test_skips_instructions(self): + assert _should_skip("/project/Instructions/guide.md") + + def test_skips_ds_store(self): + assert _should_skip("/project/.DS_Store") + + def test_skips_idea(self): + assert _should_skip("/project/.idea/workspace.xml") + + def test_does_not_skip_source(self): + assert not _should_skip("/project/src/model_engine.py") + + def test_does_not_skip_notes(self): + assert not _should_skip("/study/lecture_notes.pdf") + + def test_does_not_skip_images(self): + assert not _should_skip("/study/diagram.png") + + def test_all_skip_patterns_defined(self): + """Ensure critical patterns are in the SKIP set.""" + for pattern in ["Instructions", ".venv", "__pycache__", ".git", ".DS_Store"]: + assert pattern in SKIP, f"'{pattern}' should be in SKIP set" diff --git a/tests/test_universal_scanner.py b/tests/test_universal_scanner.py new file mode 100644 index 0000000..cc7035c --- /dev/null +++ b/tests/test_universal_scanner.py @@ -0,0 +1,30 @@ +import sys +import os + +# Dynamic Path Mapping for Professional Project Structure +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.scanner import StarryScanner + + +def test_multimodal_scanner(): + """ + Logic: Ensure the scanner accurately identifies different file types + without relying solely on extensions. + """ + print("--- Multimodal Scanner: Logic Verification ---") + scanner = StarryScanner() + + # We will scan the current directory to see what it finds + found_items = scanner.scan_directory(".") + + for item in found_items: + # Professional Logging of detected MIME types + filename = os.path.basename(item.file_path) + print(f"File: {filename:<25} | Type: {item.mime_type}") + + print("\nScanner Status: OPERATIONAL") + + +if __name__ == "__main__": + test_multimodal_scanner() \ No newline at end of file