ramsani · ramsani · May 16, 2026 · May 16, 2026 · May 16, 2026
@@ -123,4 +123,4 @@ def main() -> int:
 
 
 if __name__ == "__main__":
-    raise SystemExit(main())
+    raise SystemExit(main())
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+"""Detect language from text using character frequency analysis."""
+
+from __future__ import annotations
+import re
+
+SPANISH_MARKERS = {"á","é","í","ó","ú","ñ","ü","¿","¡","los","las","del","que","es","en","con","para","por","su","una"}
+ENGLISH_MARKERS = {"the","and","of","to","is","in","it","you","that","was","for","are","as","be","this","have","from"}
+
+def detect_language(text: str) -> str:
+    """Detect language from text. Returns 'en' or 'es'."""
+    if not text:
+        return "en"
+    text_lower = text.lower()
+    words = set(re.findall(r"[a-záéíóúñü]+", text_lower))
+    if not words:
+        return "en"
+    es_score = len(words & SPANISH_MARKERS)
+    en_score = len(words & ENGLISH_MARKERS)
+    return "es" if es_score > en_score else "en"
+
+def update_html_lang(html_path, lang: str) -> bool:
+    """Update <html lang="..."> attribute in an HTML file."""
+    import re
+    html = html_path.read_text(encoding="utf-8")
+    if re.search(r'<html[^>]*lang=["\']' + lang + r'["\']', html, re.IGNORECASE):
+        return False  # already correct
+    html = re.sub(r'<html([^>]*)lang=["\'][^"\']+["\']', f'<html\\1lang="{lang}"', html, flags=re.IGNORECASE)
+    if 'lang=' not in html:
+        html = re.sub(r'<html', f'<html lang="{lang}"', html, count=1)
+    html_path.write_text(html, encoding="utf-8")
+    return True
+
+if __name__ == "__main__":
+    import sys
+    text = sys.stdin.read() if len(sys.argv) < 2 else open(sys.argv[1]).read()
+    print(detect_language(text))
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+"""Test i18n language detection and --lang flag."""
+import subprocess, sys, json
+from pathlib import Path
+
+def test_detect_english():
+    r = subprocess.run([sys.executable, "-c", "from scripts.i18n_detect import detect_language; print(detect_language('The architecture consists of three main services'))"], capture_output=True, text=True, cwd="/tmp/html-explainer-clean")
+    assert r.returncode == 0 and r.stdout.strip() == "en"
+    print("PASS: English text detected as 'en'")
+
+def test_detect_spanish():
+    r = subprocess.run([sys.executable, "-c", "from scripts.i18n_detect import detect_language; print(detect_language('La arquitectura consiste en tres servicios principales'))"], capture_output=True, text=True, cwd="/tmp/html-explainer-clean")
+    assert r.returncode == 0 and r.stdout.strip() == "es"
+    print("PASS: Spanish text detected as 'es'")
+
+def test_phrases_coverage():
+    phrases = json.load(open("/tmp/html-explainer-clean/scripts/i18n-phrases.json"))
+    assert len(phrases["en"]) >= 20, f"Expected 20+ phrases, got {len(phrases["en"])}"
+    assert len(phrases["es"]) >= 20
+    assert set(phrases["en"].keys()) == set(phrases["es"].keys()), "ES and EN must have same keys"
+    print(f"PASS: i18n-phrases.json has {len(phrases["en"])} phrases in both EN and ES")
+
+if __name__ == "__main__":
+    test_detect_english()
+    test_detect_spanish()
+    test_phrases_coverage()
+    print("\nAll i18n tests passed.")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -123,4 +123,4 @@ def main() -> int:


		if __name__ == "__main__":
		raise SystemExit(main())
		raise SystemExit(main())