Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ jobs:

- name: Test with pytest
run: pytest tests -v
env:
PYTHONPATH: ${{ github.workspace }}
15 changes: 10 additions & 5 deletions src/model_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@

import fitz
from PIL import Image
from mlx_lm import load
from mlx_lm.generate import stream_generate
try:
from mlx_lm import load
from mlx_lm.generate import stream_generate
except (ImportError, ModuleNotFoundError):
# CI / non-Apple-Silicon: module still importable, tests mock these
load = None
stream_generate = None

from src.scanner import UniversalResource

Expand Down Expand Up @@ -39,7 +44,7 @@ def __init__(self, model_path="google/gemma-3-4b-it"):
self.master_template = "# S T A R R Y N O T E \n\n[Recovery Mode Active]"

# Pre-clean template: strip HTML comments to reduce prompt tokens
self._prompt_template = self._compact_template(self.master_template)
self._prompt_template = self._clean_template(self.master_template)
log.info("S T A R R Y N O T E Engine is fully operational (template: %d → %d chars).",
len(self.master_template), len(self._prompt_template))

Expand Down Expand Up @@ -99,7 +104,7 @@ def _stream(self, prompt, on_token=None, images=None):
for i, response in enumerate(stream_generate(
self.model, self.tokenizer, prompt=prompt, **kwargs
)):
text = response.text
text += response.text # v0.30+ yields per-segment, must accumulate
if on_token:
on_token(i + 1)

Expand Down Expand Up @@ -190,7 +195,7 @@ def _analyze_pdf(self, file_path: str, on_token=None) -> str:
for page in doc:
text_buffer += page.get_text() + "\n"

content = text_buffer.strip()[:8000]
content = text_buffer.strip()[:12000]

if len(content) < 100:
log.info("Image-based PDF detected — initializing Vision OCR…")
Expand Down
Loading