-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathretry-errors.py
More file actions
85 lines (66 loc) · 2.45 KB
/
retry-errors.py
File metadata and controls
85 lines (66 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import pickle
import numpy as np
from PIL import Image
from insightface.app import FaceAnalysis
from transformers import CLIPProcessor, CLIPModel
from tqdm import tqdm
import hashlib
ERROR_LOG = "index.err"
INDEX_FILE = "face_index.pkl"
THUMBNAIL_DIR = "static/thumbnails"
face_model = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
face_model.prepare(ctx_id=0)
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
def extract_clip_embedding(image):
inputs = clip_processor(images=image, return_tensors="pt", padding=True)
return clip_model.get_image_features(**inputs)[0].detach().numpy()
def extract_face_embedding(image_np):
faces = face_model.get(image_np)
return faces[0].embedding if faces else None
def hash_filename(path):
return hashlib.md5(path.encode()).hexdigest() + os.path.splitext(path)[1]
# Load existing index
if os.path.exists(INDEX_FILE):
with open(INDEX_FILE, "rb") as f:
face_db = pickle.load(f)
else:
face_db = []
# Read error log
if not os.path.exists(ERROR_LOG):
print("No error log found.")
exit(1)
with open(ERROR_LOG, "r") as ef:
lines = [line.strip() for line in ef.readlines() if line.strip()]
retry_paths = [line.split(" | ")[0] for line in lines if os.path.exists(line.split(" | ")[0])]
successful = 0
remaining_errors = []
for filepath in tqdm(retry_paths):
try:
img = Image.open(filepath).convert("RGB")
img_np = np.array(img)
face_vec = extract_face_embedding(img_np)
if face_vec is None:
raise ValueError("No face detected.")
bg_vec = extract_clip_embedding(img)
if bg_vec is None:
raise ValueError("Failed to compute background vector.")
thumb_name = hash_filename(filepath)
thumb_path = os.path.join(THUMBNAIL_DIR, thumb_name)
img.thumbnail((160, 160))
img.save(thumb_path)
face_db.append({
"path": filepath,
"thumb_name": thumb_name,
"face_vec": face_vec,
"bg_vec": bg_vec
})
successful += 1
except Exception as e:
remaining_errors.append(f"{filepath} | {str(e)}")
with open(INDEX_FILE, "wb") as f:
pickle.dump(face_db, f)
with open(ERROR_LOG, "w") as ef:
ef.write("\n".join(remaining_errors))
print(f"✅ Recovered {successful} files. Remaining errors: {len(remaining_errors)}")