-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclustering.py
More file actions
46 lines (38 loc) · 1.35 KB
/
clustering.py
File metadata and controls
46 lines (38 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pickle
import numpy as np
from sklearn.cluster import DBSCAN
from pathlib import Path
import os
import shutil
CLUSTER_OUTPUT_DIR = "static/clusters"
PKL_PATH = "face_index.pkl"
with open(PKL_PATH, "rb") as f:
face_db = pickle.load(f)
# Combine all features for clustering
vectors = []
valid_entries = []
for entry in face_db:
if entry["face_vec"] is not None and entry["bg_vec"] is not None:
combined = np.concatenate([entry["face_vec"], entry["bg_vec"]])
vectors.append(combined)
valid_entries.append(entry)
vectors = np.array(vectors).astype("float32")
# Use DBSCAN with automatic epsilon guess (can be tuned)
print("Clustering", len(vectors), "images...")
clustering = DBSCAN(eps=30, min_samples=3, metric='euclidean').fit(vectors)
labels = clustering.labels_
# Save clusters to folders
output_dir = Path(CLUSTER_OUTPUT_DIR)
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
for label, entry in zip(labels, valid_entries):
cluster_name = f"cluster_{label}" if label >= 0 else "noise"
cluster_path = output_dir / cluster_name
cluster_path.mkdir(parents=True, exist_ok=True)
dest = cluster_path / Path(entry["path"]).name
try:
shutil.copy(entry["path"], dest)
except Exception as e:
print(f"Failed to copy {entry['path']}: {e}")
print("✅ Clustering complete.")