-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_match_logged.py
More file actions
129 lines (107 loc) · 4.24 KB
/
run_match_logged.py
File metadata and controls
129 lines (107 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python
"""Run match command with detailed logging."""
import sys
sys.path.insert(0, '/c/Users/ryand/Documents/Claude/Projects/CornerFlash')
import json
from pathlib import Path
from difflib import SequenceMatcher
PROJECT_ROOT = Path(__file__).resolve().parent
FRAMES_DIR = PROJECT_ROOT / "frames"
TRACK_DATA = PROJECT_ROOT / "public" / "data" / "nordschleife.json"
MATCHES_FILE = PROJECT_ROOT / "scripts" / "corner_matches.json"
TESSERACT = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
def log(msg):
print(msg, flush=True)
with open(PROJECT_ROOT / "match_logged.log", "a") as f:
f.write(msg + "\n")
f.flush()
def ocr_frame(frame_path):
"""Run Tesseract OCR on a frame, return detected text."""
try:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = TESSERACT
from PIL import Image
img = Image.open(frame_path)
text = pytesseract.image_to_string(img)
return text.strip()
except Exception as e:
return ""
def fuzzy_match(detected_text, corner_names, threshold=0.6):
"""Match detected text against known corner names."""
detected_lower = detected_text.lower()
best_match = None
best_score = 0
for name in corner_names:
if name in detected_lower:
return name, 1.0
for line in detected_lower.split("\n"):
line = line.strip()
if not line:
continue
score = SequenceMatcher(None, name, line).ratio()
if score > best_score and score >= threshold:
best_score = score
best_match = name
words = line.split()
for i in range(len(words)):
for j in range(i + 1, min(i + 5, len(words) + 1)):
chunk = " ".join(words[i:j])
score = SequenceMatcher(None, name, chunk).ratio()
if score > best_score and score >= threshold:
best_score = score
best_match = chunk if score < 0.8 else name
if best_match:
return best_match, best_score
return None, 0
# Main logic
log("=== Starting match with logging ===")
# Load corners
with open(TRACK_DATA, "r", encoding="utf-8") as f:
corners = json.load(f)
corner_names = {c["name"].lower(): c for c in corners}
log(f"Loaded {len(corner_names)} corner names")
# Load existing matches
matches = {}
# Get frames
frames = sorted(FRAMES_DIR.glob("*.jpg"))
log(f"OCR-ing {len(frames)} frames against {len(corner_names)} corner names...")
matched_count = 0
for i, frame in enumerate(frames):
if (i + 1) % 100 == 0:
log(f" Processed {i + 1}/{len(frames)}... (matched: {matched_count})")
text = ocr_frame(frame)
if not text:
continue
match_name, score = fuzzy_match(text, corner_names)
if match_name and match_name in corner_names:
corner = corner_names[match_name]
corner_id = corner["id"]
fname = frame.stem
parts = fname.rsplit("_", 1)
ts_str = parts[-1].replace("ms", "")
video_stem = parts[0]
ts_ms = int(ts_str)
existing = matches.get(corner_id)
if not existing or score > existing.get("score", 0):
matches[corner_id] = {
"corner_id": corner_id,
"corner_name": corner["name"],
"video_stem": video_stem,
"timestamp_ms": ts_ms,
"frame_file": frame.name,
"score": score,
"detected_text": text[:200],
}
matched_count += 1
log(f" [OK] Matched '{corner['name']}' (score={score:.2f}) from {frame.name}")
# Save matches
with open(MATCHES_FILE, "w", encoding="utf-8") as f:
json.dump(matches, f, indent=2)
log(f"\nMatched {matched_count} corners. {len(corner_names) - len(matches)} unmatched.")
if len(corner_names) - len(matches) > 0:
unmatched = set(corner_names.keys()) - {corner_names[m["corner_name"].lower()]["name"].lower()
for m in matches.values()
if m["corner_name"].lower() in corner_names}
if unmatched:
log(f" Unmatched corners: {', '.join(sorted(unmatched))}")
log("=== Match complete ===")