diff --git a/backend/app/config/settings.py b/backend/app/config/settings.py
index 912134a64..172b1d2da 100644
--- a/backend/app/config/settings.py
+++ b/backend/app/config/settings.py
@@ -1,8 +1,13 @@
+from __future__ import annotations
+
+import logging
import os
import sys
-
from platformdirs import user_data_dir
+logger = logging.getLogger(__name__)
+
+
if getattr(sys, "frozen", False):
MODEL_EXPORTS_PATH = os.path.join(user_data_dir("PictoPy"), "models")
else:
@@ -35,3 +40,99 @@
DATABASE_PATH = os.path.join(user_data_dir("PictoPy"), "database", "PictoPy.db")
THUMBNAIL_IMAGES_PATH = os.path.join(user_data_dir("PictoPy"), "thumbnails")
IMAGES_PATH = "./images"
+
+
+def _get_env_float(
+ name: str,
+ default: float,
+ min_value: float | None = None,
+ max_value: float | None = None,
+) -> float:
+ raw = os.getenv(name)
+ if raw is None:
+ return default
+ try:
+ value = float(raw)
+ except ValueError:
+ logger.warning(
+ "Invalid value %r for %s (expected float); using default %s",
+ raw,
+ name,
+ default,
+ )
+ return default
+ if (min_value is not None and value < min_value) or (
+ max_value is not None and value > max_value
+ ):
+ logger.warning(
+ "Out-of-range value %s for %s (expected [%s, %s]); using default %s",
+ value,
+ name,
+ min_value,
+ max_value,
+ default,
+ )
+ return default
+ return value
+
+
+def _get_env_int(
+ name: str,
+ default: int,
+ min_value: int | None = None,
+ max_value: int | None = None,
+) -> int:
+ raw = os.getenv(name)
+ if raw is None:
+ return default
+ try:
+ value = int(raw)
+ except ValueError:
+ logger.warning(
+ "Invalid value %r for %s (expected int); using default %s",
+ raw,
+ name,
+ default,
+ )
+ return default
+ if (min_value is not None and value < min_value) or (
+ max_value is not None and value > max_value
+ ):
+ logger.warning(
+ "Out-of-range value %s for %s (expected [%s, %s]); using default %s",
+ value,
+ name,
+ min_value,
+ max_value,
+ default,
+ )
+ return default
+ return value
+
+
+# Clustering Configuration
+PICTO_CLUSTERING_EPS = _get_env_float("PICTO_CLUSTERING_EPS", 0.75, min_value=0.0)
+PICTO_CLUSTERING_MIN_SAMPLES = _get_env_int(
+ "PICTO_CLUSTERING_MIN_SAMPLES", 2, min_value=1
+)
+if PICTO_CLUSTERING_MIN_SAMPLES < 2:
+ logger.warning(
+ f"PICTO_CLUSTERING_MIN_SAMPLES={PICTO_CLUSTERING_MIN_SAMPLES} is invalid "
+ f"(minimum is 2). Resetting to 2 to prevent cluster chaining."
+ )
+ PICTO_CLUSTERING_MIN_SAMPLES = 2
+PICTO_CLUSTERING_SIMILARITY_THRESHOLD = _get_env_float(
+ "PICTO_CLUSTERING_SIMILARITY_THRESHOLD", 0.85, min_value=0.0, max_value=1.0
+)
+PICTO_CLUSTERING_MERGE_THRESHOLD = _get_env_float(
+ "PICTO_CLUSTERING_MERGE_THRESHOLD", 0.7, min_value=0.0, max_value=1.0
+)
+PICTO_CLUSTERING_CONF_THRESHOLD = _get_env_float(
+ "PICTO_CLUSTERING_CONF_THRESHOLD", 0.45, min_value=0.0, max_value=1.0
+)
+PICTO_CLUSTERING_BLUR_THRESHOLD = _get_env_float(
+ "PICTO_CLUSTERING_BLUR_THRESHOLD", 80.0, min_value=0.0
+)
+PICTO_CLUSTERING_MIN_FACE_SIZE = _get_env_int(
+ "PICTO_CLUSTERING_MIN_FACE_SIZE", 1600, min_value=1
+)
diff --git a/backend/app/models/FaceDetector.py b/backend/app/models/FaceDetector.py
index 9e10fd5fc..3d4a9f385 100644
--- a/backend/app/models/FaceDetector.py
+++ b/backend/app/models/FaceDetector.py
@@ -7,6 +7,12 @@
from app.models.YOLO import YOLO
from app.database.faces import db_insert_face_embeddings_by_image_id
from app.logging.setup_logging import get_logger
+from app.config.settings import (
+ PICTO_CLUSTERING_CONF_THRESHOLD,
+ PICTO_CLUSTERING_BLUR_THRESHOLD,
+ PICTO_CLUSTERING_MIN_FACE_SIZE,
+)
+from app.utils.face_quality import face_passes_quality_gate
# Initialize logger
logger = get_logger(__name__)
@@ -16,7 +22,7 @@ class FaceDetector:
def __init__(self):
self.yolo_detector = YOLO(
YOLO_util_get_model_path("face"),
- conf_threshold=0.45,
+ conf_threshold=PICTO_CLUSTERING_CONF_THRESHOLD,
iou_threshold=0.45,
)
self.facenet = FaceNet(FaceNet_util_get_model_path())
@@ -34,26 +40,38 @@ def detect_faces(self, image_id: str, image_path: str, forSearch: bool = False):
logger.info(f"Detected {len(boxes)} faces in image {image_id}.")
processed_faces, embeddings, bboxes, confidences = [], [], [], []
+ faces_skipped = 0
for box, score in zip(boxes, scores):
- if score > self.yolo_detector.conf_threshold:
- x1, y1, x2, y2 = map(int, box)
+ x1, y1, x2, y2 = map(int, box)
- # Create bounding box dictionary in JSON format
- bbox = {"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1}
- bboxes.append(bbox)
- confidences.append(float(score))
+ padding = 20
+ face_img = img[
+ max(0, y1 - padding) : min(img.shape[0], y2 + padding),
+ max(0, x1 - padding) : min(img.shape[1], x2 + padding),
+ ]
- padding = 20
- face_img = img[
- max(0, y1 - padding) : min(img.shape[0], y2 + padding),
- max(0, x1 - padding) : min(img.shape[1], x2 + padding),
- ]
- processed_face = FaceNet_util_preprocess_image(face_img)
- processed_faces.append(processed_face)
+ if not face_passes_quality_gate(
+ face_crop=face_img,
+ bbox=(x1, y1, x2, y2),
+ conf_score=float(score),
+ conf_threshold=self.yolo_detector.conf_threshold,
+ blur_threshold=PICTO_CLUSTERING_BLUR_THRESHOLD,
+ min_face_size=PICTO_CLUSTERING_MIN_FACE_SIZE,
+ ):
+ faces_skipped += 1
+ continue
- embedding = self.facenet.get_embedding(processed_face)
- embeddings.append(embedding)
+ # Create bounding box dictionary in JSON format
+ bbox = {"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1}
+ bboxes.append(bbox)
+ confidences.append(float(score))
+
+ processed_face = FaceNet_util_preprocess_image(face_img)
+ processed_faces.append(processed_face)
+
+ embedding = self.facenet.get_embedding(processed_face)
+ embeddings.append(embedding)
if not forSearch and embeddings:
db_insert_face_embeddings_by_image_id(
@@ -64,6 +82,7 @@ def detect_faces(self, image_id: str, image_path: str, forSearch: bool = False):
"ids": f"{class_ids}",
"processed_faces": processed_faces,
"num_faces": len(embeddings),
+ "faces_skipped": faces_skipped,
}
def close(self):
diff --git a/backend/app/routes/face_clusters.py b/backend/app/routes/face_clusters.py
index 78394df79..ca057638f 100644
--- a/backend/app/routes/face_clusters.py
+++ b/backend/app/routes/face_clusters.py
@@ -9,8 +9,9 @@
db_get_cluster_by_id,
db_update_cluster,
db_get_all_clusters_with_face_counts,
- db_get_images_by_cluster_id, # Add this import
+ db_get_images_by_cluster_id,
)
+from app.utils.face_clusters import cluster_util_face_clusters_sync
from app.schemas.face_clusters import (
RenameClusterRequest,
RenameClusterResponse,
@@ -313,16 +314,17 @@ def trigger_global_reclustering():
try:
logger.info("Starting manual global face reclustering...")
- # Use the smart clustering function with force flag set to True
- from app.utils.face_clusters import cluster_util_face_clusters_sync
-
- result = cluster_util_face_clusters_sync(force_full_reclustering=True)
+ result, total_faces_skipped = cluster_util_face_clusters_sync(
+ force_full_reclustering=True
+ )
if result == 0:
return GlobalReclusterResponse(
success=True,
message="No faces found to cluster",
- data=GlobalReclusterData(clusters_created=0),
+ data=GlobalReclusterData(
+ clusters_created=0, faces_skipped=total_faces_skipped
+ ),
)
logger.info("Global reclustering completed successfully")
@@ -330,7 +332,9 @@ def trigger_global_reclustering():
return GlobalReclusterResponse(
success=True,
message="Global reclustering completed successfully.",
- data=GlobalReclusterData(clusters_created=result),
+ data=GlobalReclusterData(
+ clusters_created=result, faces_skipped=total_faces_skipped
+ ),
)
except Exception as e:
diff --git a/backend/app/schemas/face_clusters.py b/backend/app/schemas/face_clusters.py
index 7744d91ce..69aebd4d1 100644
--- a/backend/app/schemas/face_clusters.py
+++ b/backend/app/schemas/face_clusters.py
@@ -76,6 +76,7 @@ class GetClusterImagesResponse(BaseModel):
class GlobalReclusterData(BaseModel):
clusters_created: Optional[int] = None
+ faces_skipped: Optional[int] = None
class GlobalReclusterResponse(BaseModel):
diff --git a/backend/app/utils/face_clusters.py b/backend/app/utils/face_clusters.py
index 4c373c981..7e4ea59d7 100644
--- a/backend/app/utils/face_clusters.py
+++ b/backend/app/utils/face_clusters.py
@@ -6,12 +6,13 @@
import sqlite3
from datetime import datetime
from sklearn.cluster import DBSCAN
+from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict, Counter
-from typing import List, Dict, Optional, Union
+from typing import List, Dict, Optional, Union, Tuple
from numpy.typing import NDArray
from app.database.connection import get_db_connection
@@ -26,7 +27,13 @@
db_get_metadata,
db_update_metadata,
)
-from app.config.settings import DATABASE_PATH
+from app.config.settings import (
+ DATABASE_PATH,
+ PICTO_CLUSTERING_EPS,
+ PICTO_CLUSTERING_MIN_SAMPLES,
+ PICTO_CLUSTERING_SIMILARITY_THRESHOLD,
+ PICTO_CLUSTERING_MERGE_THRESHOLD,
+)
from app.logging.setup_logging import get_logger
# Initialize logger
@@ -102,10 +109,10 @@ def cluster_util_face_clusters_sync(force_full_reclustering: bool = False):
metadata = db_get_metadata()
if force_full_reclustering or cluster_util_is_reclustering_needed(metadata):
# Perform clustering operation
- results = cluster_util_cluster_all_face_embeddings()
+ results, total_faces_skipped = cluster_util_cluster_all_face_embeddings()
if not results:
- return 0
+ return 0, total_faces_skipped
results = [result.to_dict() for result in results]
@@ -153,13 +160,15 @@ def cluster_util_face_clusters_sync(force_full_reclustering: bool = False):
current_metadata = metadata or {}
current_metadata["reclustering_time"] = datetime.now().timestamp()
db_update_metadata(current_metadata, cursor)
- return len(cluster_list)
+ return len(cluster_list), total_faces_skipped
else:
- face_cluster_mappings = cluster_util_assign_cluster_to_faces_without_clusterId()
+ face_cluster_mappings, total_faces_skipped = (
+ cluster_util_assign_cluster_to_faces_without_clusterId()
+ )
with get_db_connection() as conn:
cursor = conn.cursor()
db_update_face_cluster_ids_batch(face_cluster_mappings, cursor)
- return len(face_cluster_mappings)
+ return len(face_cluster_mappings), total_faces_skipped
def _validate_embedding(embedding: NDArray, min_norm: float = 1e-6) -> bool:
@@ -185,12 +194,26 @@ def _validate_embedding(embedding: NDArray, min_norm: float = 1e-6) -> bool:
return True
+def estimate_eps(embeddings: np.ndarray, k: int) -> Optional[float]:
+ if len(embeddings) <= k:
+ return None
+
+ nn = NearestNeighbors(n_neighbors=k + 1, metric="cosine")
+ nn.fit(embeddings)
+ distances, _ = nn.kneighbors(embeddings)
+
+ kth_distances = distances[:, -1]
+ kth_distances.sort()
+ estimated_eps = np.percentile(kth_distances, 90)
+ return float(estimated_eps)
+
+
def cluster_util_cluster_all_face_embeddings(
- eps: float = 0.75,
- min_samples: int = 2,
- similarity_threshold: float = 0.85,
+ eps: float = PICTO_CLUSTERING_EPS,
+ min_samples: int = PICTO_CLUSTERING_MIN_SAMPLES,
+ similarity_threshold: float = PICTO_CLUSTERING_SIMILARITY_THRESHOLD,
merge_threshold: float = None,
-) -> List[ClusterResult]:
+) -> Tuple[List[ClusterResult], int]:
"""
Cluster face embeddings using DBSCAN with similarity validation.
@@ -232,9 +255,11 @@ def cluster_util_cluster_all_face_embeddings(
if invalid_count > 0:
logger.warning(f"Filtered out {invalid_count} invalid embeddings")
+ total_faces_skipped = invalid_count
+
if not embeddings:
logger.error("No valid embeddings found after validation")
- return []
+ return [], total_faces_skipped
logger.info(f"Total valid faces to cluster: {len(face_ids)}")
@@ -259,6 +284,15 @@ def cluster_util_cluster_all_face_embeddings(
f"Applied similarity threshold: {similarity_threshold} (max_distance: {max_distance:.3f})"
)
+ estimated_eps = estimate_eps(embeddings_array, k=min_samples)
+ if estimated_eps is not None:
+ logger.info(f"Adaptive eps estimated: {estimated_eps:.4f}")
+ eps = estimated_eps
+ else:
+ logger.warning(
+ f"Too few embeddings for eps estimation, using config default: {eps}"
+ )
+
# Perform DBSCAN clustering with precomputed distances
dbscan = DBSCAN(
eps=eps,
@@ -307,17 +341,21 @@ def cluster_util_cluster_all_face_embeddings(
# Post-clustering merge: merge similar clusters based on representative faces
# Use similarity_threshold if merge_threshold not explicitly provided
- effective_merge_threshold = merge_threshold if merge_threshold is not None else 0.7
+ effective_merge_threshold = (
+ merge_threshold
+ if merge_threshold is not None
+ else PICTO_CLUSTERING_MERGE_THRESHOLD
+ )
results = _merge_similar_clusters(
results, merge_threshold=effective_merge_threshold
)
- return results
+ return results, total_faces_skipped
def cluster_util_assign_cluster_to_faces_without_clusterId(
similarity_threshold: float = 0.8,
-) -> List[Dict]:
+) -> Tuple[List[Dict], int]:
"""
Assign cluster IDs to faces that don't have clusters using nearest mean method with similarity threshold.
@@ -339,13 +377,13 @@ def cluster_util_assign_cluster_to_faces_without_clusterId(
# Get faces without cluster assignments
unassigned_faces = db_get_faces_unassigned_clusters()
if not unassigned_faces:
- return []
+ return [], 0
# Get cluster mean embeddings
cluster_means = db_get_cluster_mean_embeddings()
if not cluster_means:
- return []
+ return [], 0
# Prepare data for nearest neighbor assignment with validation
cluster_ids = []
@@ -370,7 +408,7 @@ def cluster_util_assign_cluster_to_faces_without_clusterId(
if not mean_embeddings:
logger.error("No valid cluster means found after validation")
- return []
+ return [], 0
mean_embeddings_array = np.array(mean_embeddings)
@@ -415,7 +453,9 @@ def cluster_util_assign_cluster_to_faces_without_clusterId(
f"Skipped {skipped_invalid} faces with invalid embeddings during assignment"
)
- return face_cluster_mappings
+ total_faces_skipped = skipped_invalid
+
+ return face_cluster_mappings, total_faces_skipped
def _merge_similar_clusters(
diff --git a/backend/app/utils/face_quality.py b/backend/app/utils/face_quality.py
new file mode 100644
index 000000000..4b3c082dc
--- /dev/null
+++ b/backend/app/utils/face_quality.py
@@ -0,0 +1,40 @@
+import cv2
+import numpy as np
+
+
+def face_passes_quality_gate(
+ face_crop: np.ndarray,
+ bbox: tuple, # (x1, y1, x2, y2)
+ conf_score: float,
+ conf_threshold: float,
+ blur_threshold: float,
+ min_face_size: int,
+) -> bool:
+ """
+ Evaluates a detected face against quality thresholds before it proceeds
+ to embedding. All checks must pass for the face to be considered valid.
+ """
+ # 1. Completeness check (Confidence)
+ if conf_score < conf_threshold:
+ return False
+
+ # 2. Size check
+ x1, y1, x2, y2 = bbox
+ area = (x2 - x1) * (y2 - y1)
+ if area < min_face_size:
+ return False
+
+ # 3. Blur check
+ if face_crop.size == 0:
+ return False
+
+ if len(face_crop.shape) == 3:
+ gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = face_crop
+
+ variance = cv2.Laplacian(gray, cv2.CV_64F).var()
+ if variance < blur_threshold:
+ return False
+
+ return True
diff --git a/backend/app/utils/images.py b/backend/app/utils/images.py
index e874ea267..59b46c663 100644
--- a/backend/app/utils/images.py
+++ b/backend/app/utils/images.py
@@ -106,10 +106,11 @@ def image_util_process_untagged_images() -> bool:
def image_util_classify_and_face_detect_images(
untagged_images: List[Dict[str, str]],
-) -> None:
+) -> int:
"""Classify untagged images and detect faces if applicable."""
object_classifier = ObjectClassifier()
face_detector = FaceDetector()
+ total_faces_skipped = 0
try:
for image in untagged_images:
image_path = image["path"]
@@ -129,7 +130,9 @@ def image_util_classify_and_face_detect_images(
# Step 3: Detect faces if "person" class is present
if classes and 0 in classes and 0 < classes.count(0) < 7:
- face_detector.detect_faces(image_id, image_path)
+ result = face_detector.detect_faces(image_id, image_path)
+ if result:
+ total_faces_skipped += result.get("faces_skipped", 0)
# Step 4: Update the image status in the database
db_update_image_tagged_status(image_id, True)
@@ -138,6 +141,8 @@ def image_util_classify_and_face_detect_images(
object_classifier.close()
face_detector.close()
+ return total_faces_skipped
+
def image_util_prepare_image_records(
image_files: List[str], folder_path_to_id: Dict[str, int]
diff --git a/backend/tests/test_face_clusters.py b/backend/tests/test_face_clusters.py
index 3ccd284d5..2c09795c9 100644
--- a/backend/tests/test_face_clusters.py
+++ b/backend/tests/test_face_clusters.py
@@ -1,6 +1,12 @@
import pytest
+import numpy as np
from unittest.mock import patch
from fastapi import FastAPI
+from app.utils.face_clusters import (
+ cluster_util_cluster_all_face_embeddings,
+ estimate_eps,
+)
+from app.utils.face_quality import face_passes_quality_gate
from fastapi.testclient import TestClient
from app.routes.face_clusters import router as face_clusters_router
@@ -410,3 +416,212 @@ def test_unsupported_http_methods(self, method, endpoint):
"""Test that unsupported HTTP methods return 405."""
response = client.request(method, endpoint)
assert response.status_code == 405
+
+
+# ============================================================================
+# Algorithmic Logic Tests
+# ============================================================================
+
+
+def generate_synthetic_embeddings(
+ num_identities=2, points_per_identity=10, dim=512, noise_std=0.005
+):
+ """Helper to generate tight clusters of embeddings."""
+ embeddings = []
+ labels = []
+
+ np.random.seed(42) # For reproducibility
+
+ for i in range(num_identities):
+ # Random unit vector as center
+ center = np.random.randn(dim)
+ center = center / np.linalg.norm(center)
+
+ # Add points around center
+ for _ in range(points_per_identity):
+ noise = np.random.randn(dim) * noise_std
+ point = center + noise
+ # Re-normalize as cosine distance works best with unit vectors
+ point = point / np.linalg.norm(point)
+ embeddings.append(point)
+ labels.append(i)
+
+ return np.array(embeddings), np.array(labels)
+
+
+def generate_noise_embeddings(num_points=80, dim=512):
+ """Helper to generate random noise embeddings."""
+ np.random.seed(43)
+ noise = np.random.randn(num_points, dim)
+ norms = np.linalg.norm(noise, axis=1, keepdims=True)
+ return noise / norms
+
+
+def mock_faces_data(embeddings):
+ """Format embeddings into the expected database return format."""
+ return [
+ {"face_id": i, "embeddings": emb, "cluster_name": None}
+ for i, emb in enumerate(embeddings)
+ ]
+
+
+class TestFaceClusteringAlgo:
+ @patch("app.utils.face_clusters.db_get_all_faces_with_cluster_names")
+ def test_folder_size_regression(self, mock_db_get):
+ """Test 1: Folder-size regression (the original bug)"""
+ # Generate 20 embeddings (2 identities, 10 points each)
+ identity_embs, identity_labels = generate_synthetic_embeddings(
+ num_identities=2, points_per_identity=10
+ )
+
+ # --- Run 1: Isolated run ---
+ mock_db_get.return_value = mock_faces_data(identity_embs)
+
+ # Run clustering with eps estimation disabled (by using fixed eps, though estimate_eps runs internally)
+ # and strict similarity threshold.
+ results_isolated, _ = cluster_util_cluster_all_face_embeddings(
+ eps=0.75, min_samples=2, similarity_threshold=0.85
+ )
+
+ # Count clusters in isolated run
+ isolated_clusters = set(r.cluster_uuid for r in results_isolated)
+ assert (
+ len(isolated_clusters) == 2
+ ), f"The folder-size bug is present: expected 2 clusters, got {len(isolated_clusters)} in isolated run"
+
+ # Verify points were assigned correctly (10 points per cluster)
+ cluster_counts = {}
+ for r in results_isolated:
+ cluster_counts[r.cluster_uuid] = cluster_counts.get(r.cluster_uuid, 0) + 1
+
+ for count in cluster_counts.values():
+ assert (
+ count >= 8
+ ), f"Identity cluster should contain majority of points, got {count}"
+
+ # --- Run 2: With noise ---
+ noise_embs = generate_noise_embeddings(num_points=80)
+ all_embs = np.vstack([identity_embs, noise_embs])
+
+ mock_db_get.return_value = mock_faces_data(all_embs)
+
+ results_noise, _ = cluster_util_cluster_all_face_embeddings(
+ eps=0.75, min_samples=2, similarity_threshold=0.85
+ )
+
+ # We need to find the clusters containing the original identity points (face_ids 0 to 19)
+ identity_results = [r for r in results_noise if r.face_id < 20]
+
+ noise_run_identity_clusters = set(r.cluster_uuid for r in identity_results)
+ assert (
+ len(noise_run_identity_clusters) == 2
+ ), f"Expected 2 clusters for identity points with noise, got {len(noise_run_identity_clusters)}"
+
+ def test_adaptive_eps_stability(self):
+ """Test 2: Adaptive eps stability"""
+ identity_embs, _ = generate_synthetic_embeddings(
+ num_identities=2, points_per_identity=10
+ )
+
+ sizes = [20, 50, 100]
+
+ for size in sizes:
+ num_noise = size - len(identity_embs)
+ if num_noise > 0:
+ noise_embs = generate_noise_embeddings(num_points=num_noise)
+ test_embs = np.vstack([identity_embs, noise_embs])
+ else:
+ test_embs = identity_embs
+
+ eps = estimate_eps(test_embs, k=2)
+
+ assert eps is not None
+ assert (
+ 0.0 < eps < 1.0
+ ), f"eps value {eps} out of expected bounds for size {size}"
+
+ def test_estimate_eps_fallback(self):
+ """Test 3: estimate_eps() fallback"""
+ # Empty array
+ assert estimate_eps(np.array([]), k=2) is None
+
+ # 1 element
+ assert estimate_eps(np.random.randn(1, 512), k=2) is None
+
+ # 2 elements
+ assert estimate_eps(np.random.randn(2, 512), k=2) is None
+
+ def test_quality_gate(self):
+ """Test 4: Quality gate unit tests"""
+ # A sharp, large face crop should pass
+ # Random noise image has high variance (sharp)
+ np.random.seed(42)
+ sharp_crop = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
+
+ assert (
+ face_passes_quality_gate(
+ face_crop=sharp_crop,
+ bbox=(0, 0, 100, 100),
+ conf_score=0.9,
+ conf_threshold=0.45,
+ blur_threshold=10.0, # Random noise will be well above this
+ min_face_size=400,
+ )
+ is True
+ )
+
+ # A blurred crop should fail
+ # Flat image has zero variance
+ blurred_crop = np.ones((100, 100, 3), dtype=np.uint8) * 128
+
+ assert (
+ face_passes_quality_gate(
+ face_crop=blurred_crop,
+ bbox=(0, 0, 100, 100),
+ conf_score=0.9,
+ conf_threshold=0.45,
+ blur_threshold=10.0,
+ min_face_size=400,
+ )
+ is False
+ )
+
+ # A small bbox should fail
+ assert (
+ face_passes_quality_gate(
+ face_crop=sharp_crop,
+ bbox=(0, 0, 10, 10), # area = 100
+ conf_score=0.9,
+ conf_threshold=0.45,
+ blur_threshold=10.0,
+ min_face_size=400,
+ )
+ is False
+ )
+
+ # A low confidence score should fail
+ assert (
+ face_passes_quality_gate(
+ face_crop=sharp_crop,
+ bbox=(0, 0, 100, 100),
+ conf_score=0.4, # < 0.45
+ conf_threshold=0.45,
+ blur_threshold=10.0,
+ min_face_size=400,
+ )
+ is False
+ )
+
+ # An empty crop should fail
+ empty_crop = np.zeros((0, 0, 3), dtype=np.uint8)
+ assert (
+ face_passes_quality_gate(
+ face_crop=empty_crop,
+ bbox=(0, 0, 500, 500),
+ conf_score=0.9,
+ conf_threshold=0.45,
+ blur_threshold=10.0,
+ min_face_size=400,
+ )
+ is False
+ )
diff --git a/docs/backend/backend_python/openapi.json b/docs/backend/backend_python/openapi.json
index 4ab3ffa44..f5298ffdc 100644
--- a/docs/backend/backend_python/openapi.json
+++ b/docs/backend/backend_python/openapi.json
@@ -2073,119 +2073,6 @@
],
"title": "DeleteFoldersResponse"
},
- "app__schemas__folders__ErrorResponse": {
- "properties": {
- "success": {
- "type": "boolean",
- "title": "Success",
- "default": false
- },
- "message": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Message"
- },
- "error": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Error"
- }
- },
- "type": "object",
- "title": "ErrorResponse"
- },
- "app__schemas__face_clusters__ErrorResponse": {
- "properties": {
- "success": {
- "type": "boolean",
- "title": "Success",
- "default": false
- },
- "message": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Message"
- },
- "error": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "title": "Error"
- }
- },
- "type": "object",
- "title": "ErrorResponse"
- },
- "app__schemas__images__ErrorResponse": {
- "properties": {
- "success": {
- "type": "boolean",
- "title": "Success",
- "default": false
- },
- "message": {
- "type": "string",
- "title": "Message"
- },
- "error": {
- "type": "string",
- "title": "Error"
- }
- },
- "type": "object",
- "required": [
- "message",
- "error"
- ],
- "title": "ErrorResponse"
- },
- "app__schemas__user_preferences__ErrorResponse": {
- "properties": {
- "success": {
- "type": "boolean",
- "title": "Success"
- },
- "error": {
- "type": "string",
- "title": "Error"
- },
- "message": {
- "type": "string",
- "title": "Message"
- }
- },
- "type": "object",
- "required": [
- "success",
- "error",
- "message"
- ],
- "title": "ErrorResponse",
- "description": "Error response model"
- },
"FaceSearchRequest": {
"properties": {
"path": {
@@ -2612,6 +2499,17 @@
}
],
"title": "Clusters Created"
+ },
+ "faces_skipped": {
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Faces Skipped"
}
},
"type": "object",
@@ -3407,6 +3305,119 @@
"type"
],
"title": "ValidationError"
+ },
+ "app__schemas__face_clusters__ErrorResponse": {
+ "properties": {
+ "success": {
+ "type": "boolean",
+ "title": "Success",
+ "default": false
+ },
+ "message": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Message"
+ },
+ "error": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Error"
+ }
+ },
+ "type": "object",
+ "title": "ErrorResponse"
+ },
+ "app__schemas__folders__ErrorResponse": {
+ "properties": {
+ "success": {
+ "type": "boolean",
+ "title": "Success",
+ "default": false
+ },
+ "message": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Message"
+ },
+ "error": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Error"
+ }
+ },
+ "type": "object",
+ "title": "ErrorResponse"
+ },
+ "app__schemas__images__ErrorResponse": {
+ "properties": {
+ "success": {
+ "type": "boolean",
+ "title": "Success",
+ "default": false
+ },
+ "message": {
+ "type": "string",
+ "title": "Message"
+ },
+ "error": {
+ "type": "string",
+ "title": "Error"
+ }
+ },
+ "type": "object",
+ "required": [
+ "message",
+ "error"
+ ],
+ "title": "ErrorResponse"
+ },
+ "app__schemas__user_preferences__ErrorResponse": {
+ "properties": {
+ "success": {
+ "type": "boolean",
+ "title": "Success"
+ },
+ "error": {
+ "type": "string",
+ "title": "Error"
+ },
+ "message": {
+ "type": "string",
+ "title": "Message"
+ }
+ },
+ "type": "object",
+ "required": [
+ "success",
+ "error",
+ "message"
+ ],
+ "title": "ErrorResponse",
+ "description": "Error response model"
}
}
}
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 2c5b6bddb..523d37a5f 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -6,6 +6,7 @@ import { ThemeProvider } from '@/contexts/ThemeContext';
import QueryClientProviders from '@/config/QueryClientProvider';
import { GlobalLoader } from './components/Loader/GlobalLoader';
import { InfoDialog } from './components/Dialog/InfoDialog';
+import GlobalAlert from './components/GlobalAlert/GlobalAlert';
import { useSelector } from 'react-redux';
import { RootState } from './app/store';
const App: React.FC = () => {
@@ -31,6 +32,7 @@ const App: React.FC = () => {
variant={variant}
showCloseButton={showCloseButton}
/>
+
);
diff --git a/frontend/src/api/api-functions/face_clusters.ts b/frontend/src/api/api-functions/face_clusters.ts
index 7decba4cd..d219f3b19 100644
--- a/frontend/src/api/api-functions/face_clusters.ts
+++ b/frontend/src/api/api-functions/face_clusters.ts
@@ -67,8 +67,15 @@ export const fetchSearchedFacesBase64 = async (
return response.data;
};
-export const triggerGlobalReclustering = async (): Promise => {
- const response = await apiClient.post(
+export interface GlobalReclusterData {
+ clusters_created: number | null;
+ faces_skipped: number | null;
+}
+
+export const triggerGlobalReclustering = async (): Promise<
+ BackendRes
+> => {
+ const response = await apiClient.post>(
faceClustersEndpoints.globalRecluster,
);
return response.data;
diff --git a/frontend/src/app/store.ts b/frontend/src/app/store.ts
index 98f69e7a9..12fd7c9c4 100644
--- a/frontend/src/app/store.ts
+++ b/frontend/src/app/store.ts
@@ -5,6 +5,7 @@ import searchReducer from '@/features/searchSlice';
import imageReducer from '@/features/imageSlice';
import faceClustersReducer from '@/features/faceClustersSlice';
import infoDialogReducer from '@/features/infoDialogSlice';
+import globalAlertReducer from '@/features/globalAlertSlice';
import folderReducer from '@/features/folderSlice';
import memoriesReducer from '@/features/memoriesSlice';
@@ -15,6 +16,7 @@ export const store = configureStore({
images: imageReducer,
faceClusters: faceClustersReducer,
infoDialog: infoDialogReducer,
+ globalAlert: globalAlertReducer,
folders: folderReducer,
search: searchReducer,
memories: memoriesReducer,
diff --git a/frontend/src/components/GlobalAlert/GlobalAlert.tsx b/frontend/src/components/GlobalAlert/GlobalAlert.tsx
new file mode 100644
index 000000000..7f646470f
--- /dev/null
+++ b/frontend/src/components/GlobalAlert/GlobalAlert.tsx
@@ -0,0 +1,53 @@
+import React, { useEffect } from 'react';
+import { AlertCircle, X } from 'lucide-react';
+import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert';
+import { useDispatch, useSelector } from 'react-redux';
+import { RootState } from '@/app/store';
+import { hideGlobalAlert } from '@/features/globalAlertSlice';
+
+const GlobalAlert: React.FC = () => {
+ const dispatch = useDispatch();
+ const { isOpen, title, message } = useSelector(
+ (state: RootState) => state.globalAlert,
+ );
+
+ useEffect(() => {
+ if (!isOpen) {
+ return;
+ }
+
+ const timer = window.setTimeout(() => {
+ dispatch(hideGlobalAlert());
+ }, 5000);
+
+ return () => window.clearTimeout(timer);
+ }, [dispatch, isOpen, title, message]);
+
+ if (!isOpen) {
+ return null;
+ }
+
+ return (
+
+
+
+
+ {title}
+
+
+ {message}
+
+
+
+
+ );
+};
+
+export default GlobalAlert;
diff --git a/frontend/src/features/globalAlertSlice.ts b/frontend/src/features/globalAlertSlice.ts
new file mode 100644
index 000000000..58f1e3650
--- /dev/null
+++ b/frontend/src/features/globalAlertSlice.ts
@@ -0,0 +1,39 @@
+import { createSlice, PayloadAction } from '@reduxjs/toolkit';
+
+interface GlobalAlertState {
+ isOpen: boolean;
+ title: string;
+ message: string;
+}
+
+const initialState: GlobalAlertState = {
+ isOpen: false,
+ title: '',
+ message: '',
+};
+
+const globalAlertSlice = createSlice({
+ name: 'globalAlert',
+ initialState,
+ reducers: {
+ showGlobalAlert(
+ state,
+ action: PayloadAction<{
+ title: string;
+ message: string;
+ }>,
+ ) {
+ state.isOpen = true;
+ state.title = action.payload.title;
+ state.message = action.payload.message;
+ },
+ hideGlobalAlert(state) {
+ state.isOpen = false;
+ state.title = '';
+ state.message = '';
+ },
+ },
+});
+
+export const { showGlobalAlert, hideGlobalAlert } = globalAlertSlice.actions;
+export default globalAlertSlice.reducer;
diff --git a/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx b/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx
index bcecd3f08..dc81ab7d7 100644
--- a/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx
+++ b/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx
@@ -12,6 +12,7 @@ import { showInfoDialog } from '@/features/infoDialogSlice';
import { triggerGlobalReclustering } from '@/api/api-functions/face_clusters';
import { usePictoMutation } from '@/hooks/useQueryExtension';
import { useMutationFeedback } from '@/hooks/useMutationFeedback';
+import { showGlobalAlert } from '@/features/globalAlertSlice';
/**
* Component for application controls in settings
@@ -34,6 +35,18 @@ const ApplicationControlsCard: React.FC = () => {
const reclusterMutation = usePictoMutation({
mutationFn: triggerGlobalReclustering,
autoInvalidateTags: ['clusters'],
+ onSuccess: (data) => {
+ const facesSkipped = data.data?.faces_skipped;
+
+ if (facesSkipped != null && facesSkipped > 0) {
+ dispatch(
+ showGlobalAlert({
+ title: 'Faces Skipped',
+ message: `${facesSkipped} face(s) were skipped during clustering due to invalid embeddings.`,
+ }),
+ );
+ }
+ },
});
const feedbackOptions = React.useMemo(