From bebafe6899a019b2548a4ef0b5ae48751198085c Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Thu, 21 May 2026 23:48:30 +0530 Subject: [PATCH 1/6] feat(backend): implement face quality parsing with DBSCAN and centralize clustering parameters --- backend/app/config/settings.py | 19 +++++++++++ backend/app/models/FaceDetector.py | 51 ++++++++++++++++++++---------- backend/app/utils/face_clusters.py | 20 +++++++++--- backend/app/utils/face_quality.py | 37 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 21 deletions(-) create mode 100644 backend/app/utils/face_quality.py diff --git a/backend/app/config/settings.py b/backend/app/config/settings.py index 912134a64..1f0b295b6 100644 --- a/backend/app/config/settings.py +++ b/backend/app/config/settings.py @@ -35,3 +35,22 @@ DATABASE_PATH = os.path.join(user_data_dir("PictoPy"), "database", "PictoPy.db") THUMBNAIL_IMAGES_PATH = os.path.join(user_data_dir("PictoPy"), "thumbnails") IMAGES_PATH = "./images" + +# Clustering Configuration +PICTO_CLUSTERING_EPS = float(os.getenv("PICTO_CLUSTERING_EPS", "0.75")) +PICTO_CLUSTERING_MIN_SAMPLES = int(os.getenv("PICTO_CLUSTERING_MIN_SAMPLES", "2")) +PICTO_CLUSTERING_SIMILARITY_THRESHOLD = float( + os.getenv("PICTO_CLUSTERING_SIMILARITY_THRESHOLD", "0.85") +) +PICTO_CLUSTERING_MERGE_THRESHOLD = float( + os.getenv("PICTO_CLUSTERING_MERGE_THRESHOLD", "0.7") +) +PICTO_CLUSTERING_CONF_THRESHOLD = float( + os.getenv("PICTO_CLUSTERING_CONF_THRESHOLD", "0.45") +) +PICTO_CLUSTERING_BLUR_THRESHOLD = float( + os.getenv("PICTO_CLUSTERING_BLUR_THRESHOLD", "80.0") +) +PICTO_CLUSTERING_MIN_FACE_SIZE = int( + os.getenv("PICTO_CLUSTERING_MIN_FACE_SIZE", "1600") +) diff --git a/backend/app/models/FaceDetector.py b/backend/app/models/FaceDetector.py index 9e10fd5fc..3d4a9f385 100644 --- a/backend/app/models/FaceDetector.py +++ b/backend/app/models/FaceDetector.py @@ -7,6 +7,12 @@ from app.models.YOLO import YOLO from app.database.faces import db_insert_face_embeddings_by_image_id from app.logging.setup_logging import get_logger +from app.config.settings import ( + PICTO_CLUSTERING_CONF_THRESHOLD, + PICTO_CLUSTERING_BLUR_THRESHOLD, + PICTO_CLUSTERING_MIN_FACE_SIZE, +) +from app.utils.face_quality import face_passes_quality_gate # Initialize logger logger = get_logger(__name__) @@ -16,7 +22,7 @@ class FaceDetector: def __init__(self): self.yolo_detector = YOLO( YOLO_util_get_model_path("face"), - conf_threshold=0.45, + conf_threshold=PICTO_CLUSTERING_CONF_THRESHOLD, iou_threshold=0.45, ) self.facenet = FaceNet(FaceNet_util_get_model_path()) @@ -34,26 +40,38 @@ def detect_faces(self, image_id: str, image_path: str, forSearch: bool = False): logger.info(f"Detected {len(boxes)} faces in image {image_id}.") processed_faces, embeddings, bboxes, confidences = [], [], [], [] + faces_skipped = 0 for box, score in zip(boxes, scores): - if score > self.yolo_detector.conf_threshold: - x1, y1, x2, y2 = map(int, box) + x1, y1, x2, y2 = map(int, box) - # Create bounding box dictionary in JSON format - bbox = {"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1} - bboxes.append(bbox) - confidences.append(float(score)) + padding = 20 + face_img = img[ + max(0, y1 - padding) : min(img.shape[0], y2 + padding), + max(0, x1 - padding) : min(img.shape[1], x2 + padding), + ] - padding = 20 - face_img = img[ - max(0, y1 - padding) : min(img.shape[0], y2 + padding), - max(0, x1 - padding) : min(img.shape[1], x2 + padding), - ] - processed_face = FaceNet_util_preprocess_image(face_img) - processed_faces.append(processed_face) + if not face_passes_quality_gate( + face_crop=face_img, + bbox=(x1, y1, x2, y2), + conf_score=float(score), + conf_threshold=self.yolo_detector.conf_threshold, + blur_threshold=PICTO_CLUSTERING_BLUR_THRESHOLD, + min_face_size=PICTO_CLUSTERING_MIN_FACE_SIZE, + ): + faces_skipped += 1 + continue - embedding = self.facenet.get_embedding(processed_face) - embeddings.append(embedding) + # Create bounding box dictionary in JSON format + bbox = {"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1} + bboxes.append(bbox) + confidences.append(float(score)) + + processed_face = FaceNet_util_preprocess_image(face_img) + processed_faces.append(processed_face) + + embedding = self.facenet.get_embedding(processed_face) + embeddings.append(embedding) if not forSearch and embeddings: db_insert_face_embeddings_by_image_id( @@ -64,6 +82,7 @@ def detect_faces(self, image_id: str, image_path: str, forSearch: bool = False): "ids": f"{class_ids}", "processed_faces": processed_faces, "num_faces": len(embeddings), + "faces_skipped": faces_skipped, } def close(self): diff --git a/backend/app/utils/face_clusters.py b/backend/app/utils/face_clusters.py index 4c373c981..abde7aa6d 100644 --- a/backend/app/utils/face_clusters.py +++ b/backend/app/utils/face_clusters.py @@ -26,7 +26,13 @@ db_get_metadata, db_update_metadata, ) -from app.config.settings import DATABASE_PATH +from app.config.settings import ( + DATABASE_PATH, + PICTO_CLUSTERING_EPS, + PICTO_CLUSTERING_MIN_SAMPLES, + PICTO_CLUSTERING_SIMILARITY_THRESHOLD, + PICTO_CLUSTERING_MERGE_THRESHOLD, +) from app.logging.setup_logging import get_logger # Initialize logger @@ -186,9 +192,9 @@ def _validate_embedding(embedding: NDArray, min_norm: float = 1e-6) -> bool: def cluster_util_cluster_all_face_embeddings( - eps: float = 0.75, - min_samples: int = 2, - similarity_threshold: float = 0.85, + eps: float = PICTO_CLUSTERING_EPS, + min_samples: int = PICTO_CLUSTERING_MIN_SAMPLES, + similarity_threshold: float = PICTO_CLUSTERING_SIMILARITY_THRESHOLD, merge_threshold: float = None, ) -> List[ClusterResult]: """ @@ -307,7 +313,11 @@ def cluster_util_cluster_all_face_embeddings( # Post-clustering merge: merge similar clusters based on representative faces # Use similarity_threshold if merge_threshold not explicitly provided - effective_merge_threshold = merge_threshold if merge_threshold is not None else 0.7 + effective_merge_threshold = ( + merge_threshold + if merge_threshold is not None + else PICTO_CLUSTERING_MERGE_THRESHOLD + ) results = _merge_similar_clusters( results, merge_threshold=effective_merge_threshold ) diff --git a/backend/app/utils/face_quality.py b/backend/app/utils/face_quality.py new file mode 100644 index 000000000..7722230d2 --- /dev/null +++ b/backend/app/utils/face_quality.py @@ -0,0 +1,37 @@ +import cv2 +import numpy as np + + +def face_passes_quality_gate( + face_crop: np.ndarray, + bbox: tuple, # (x1, y1, x2, y2) + conf_score: float, + conf_threshold: float, + blur_threshold: float, + min_face_size: int, +) -> bool: + """ + Evaluates a detected face against quality thresholds before it proceeds + to embedding. All checks must pass for the face to be considered valid. + """ + # 1. Completeness check (Confidence) + if conf_score < conf_threshold: + return False + + # 2. Size check + x1, y1, x2, y2 = bbox + area = (x2 - x1) * (y2 - y1) + if area < min_face_size: + return False + + # 3. Blur check + if len(face_crop.shape) == 3: + gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY) + else: + gray = face_crop + + variance = cv2.Laplacian(gray, cv2.CV_64F).var() + if variance < blur_threshold: + return False + + return True From fdccbbe6819ad9fff88a88ae12f562e349c4272d Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Sat, 23 May 2026 13:48:30 +0530 Subject: [PATCH 2/6] feat(backend): Include skipped face clusters images logic to backend --- backend/app/routes/face_clusters.py | 18 +++++---- backend/app/schemas/face_clusters.py | 1 + backend/app/utils/face_clusters.py | 58 +++++++++++++++++++++------- backend/app/utils/images.py | 9 ++++- 4 files changed, 63 insertions(+), 23 deletions(-) diff --git a/backend/app/routes/face_clusters.py b/backend/app/routes/face_clusters.py index 78394df79..ca057638f 100644 --- a/backend/app/routes/face_clusters.py +++ b/backend/app/routes/face_clusters.py @@ -9,8 +9,9 @@ db_get_cluster_by_id, db_update_cluster, db_get_all_clusters_with_face_counts, - db_get_images_by_cluster_id, # Add this import + db_get_images_by_cluster_id, ) +from app.utils.face_clusters import cluster_util_face_clusters_sync from app.schemas.face_clusters import ( RenameClusterRequest, RenameClusterResponse, @@ -313,16 +314,17 @@ def trigger_global_reclustering(): try: logger.info("Starting manual global face reclustering...") - # Use the smart clustering function with force flag set to True - from app.utils.face_clusters import cluster_util_face_clusters_sync - - result = cluster_util_face_clusters_sync(force_full_reclustering=True) + result, total_faces_skipped = cluster_util_face_clusters_sync( + force_full_reclustering=True + ) if result == 0: return GlobalReclusterResponse( success=True, message="No faces found to cluster", - data=GlobalReclusterData(clusters_created=0), + data=GlobalReclusterData( + clusters_created=0, faces_skipped=total_faces_skipped + ), ) logger.info("Global reclustering completed successfully") @@ -330,7 +332,9 @@ def trigger_global_reclustering(): return GlobalReclusterResponse( success=True, message="Global reclustering completed successfully.", - data=GlobalReclusterData(clusters_created=result), + data=GlobalReclusterData( + clusters_created=result, faces_skipped=total_faces_skipped + ), ) except Exception as e: diff --git a/backend/app/schemas/face_clusters.py b/backend/app/schemas/face_clusters.py index 7744d91ce..69aebd4d1 100644 --- a/backend/app/schemas/face_clusters.py +++ b/backend/app/schemas/face_clusters.py @@ -76,6 +76,7 @@ class GetClusterImagesResponse(BaseModel): class GlobalReclusterData(BaseModel): clusters_created: Optional[int] = None + faces_skipped: Optional[int] = None class GlobalReclusterResponse(BaseModel): diff --git a/backend/app/utils/face_clusters.py b/backend/app/utils/face_clusters.py index abde7aa6d..7e4ea59d7 100644 --- a/backend/app/utils/face_clusters.py +++ b/backend/app/utils/face_clusters.py @@ -6,12 +6,13 @@ import sqlite3 from datetime import datetime from sklearn.cluster import DBSCAN +from sklearn.neighbors import NearestNeighbors from sklearn.metrics.pairwise import cosine_distances from sklearn.metrics.pairwise import cosine_similarity from collections import defaultdict, Counter -from typing import List, Dict, Optional, Union +from typing import List, Dict, Optional, Union, Tuple from numpy.typing import NDArray from app.database.connection import get_db_connection @@ -108,10 +109,10 @@ def cluster_util_face_clusters_sync(force_full_reclustering: bool = False): metadata = db_get_metadata() if force_full_reclustering or cluster_util_is_reclustering_needed(metadata): # Perform clustering operation - results = cluster_util_cluster_all_face_embeddings() + results, total_faces_skipped = cluster_util_cluster_all_face_embeddings() if not results: - return 0 + return 0, total_faces_skipped results = [result.to_dict() for result in results] @@ -159,13 +160,15 @@ def cluster_util_face_clusters_sync(force_full_reclustering: bool = False): current_metadata = metadata or {} current_metadata["reclustering_time"] = datetime.now().timestamp() db_update_metadata(current_metadata, cursor) - return len(cluster_list) + return len(cluster_list), total_faces_skipped else: - face_cluster_mappings = cluster_util_assign_cluster_to_faces_without_clusterId() + face_cluster_mappings, total_faces_skipped = ( + cluster_util_assign_cluster_to_faces_without_clusterId() + ) with get_db_connection() as conn: cursor = conn.cursor() db_update_face_cluster_ids_batch(face_cluster_mappings, cursor) - return len(face_cluster_mappings) + return len(face_cluster_mappings), total_faces_skipped def _validate_embedding(embedding: NDArray, min_norm: float = 1e-6) -> bool: @@ -191,12 +194,26 @@ def _validate_embedding(embedding: NDArray, min_norm: float = 1e-6) -> bool: return True +def estimate_eps(embeddings: np.ndarray, k: int) -> Optional[float]: + if len(embeddings) <= k: + return None + + nn = NearestNeighbors(n_neighbors=k + 1, metric="cosine") + nn.fit(embeddings) + distances, _ = nn.kneighbors(embeddings) + + kth_distances = distances[:, -1] + kth_distances.sort() + estimated_eps = np.percentile(kth_distances, 90) + return float(estimated_eps) + + def cluster_util_cluster_all_face_embeddings( eps: float = PICTO_CLUSTERING_EPS, min_samples: int = PICTO_CLUSTERING_MIN_SAMPLES, similarity_threshold: float = PICTO_CLUSTERING_SIMILARITY_THRESHOLD, merge_threshold: float = None, -) -> List[ClusterResult]: +) -> Tuple[List[ClusterResult], int]: """ Cluster face embeddings using DBSCAN with similarity validation. @@ -238,9 +255,11 @@ def cluster_util_cluster_all_face_embeddings( if invalid_count > 0: logger.warning(f"Filtered out {invalid_count} invalid embeddings") + total_faces_skipped = invalid_count + if not embeddings: logger.error("No valid embeddings found after validation") - return [] + return [], total_faces_skipped logger.info(f"Total valid faces to cluster: {len(face_ids)}") @@ -265,6 +284,15 @@ def cluster_util_cluster_all_face_embeddings( f"Applied similarity threshold: {similarity_threshold} (max_distance: {max_distance:.3f})" ) + estimated_eps = estimate_eps(embeddings_array, k=min_samples) + if estimated_eps is not None: + logger.info(f"Adaptive eps estimated: {estimated_eps:.4f}") + eps = estimated_eps + else: + logger.warning( + f"Too few embeddings for eps estimation, using config default: {eps}" + ) + # Perform DBSCAN clustering with precomputed distances dbscan = DBSCAN( eps=eps, @@ -322,12 +350,12 @@ def cluster_util_cluster_all_face_embeddings( results, merge_threshold=effective_merge_threshold ) - return results + return results, total_faces_skipped def cluster_util_assign_cluster_to_faces_without_clusterId( similarity_threshold: float = 0.8, -) -> List[Dict]: +) -> Tuple[List[Dict], int]: """ Assign cluster IDs to faces that don't have clusters using nearest mean method with similarity threshold. @@ -349,13 +377,13 @@ def cluster_util_assign_cluster_to_faces_without_clusterId( # Get faces without cluster assignments unassigned_faces = db_get_faces_unassigned_clusters() if not unassigned_faces: - return [] + return [], 0 # Get cluster mean embeddings cluster_means = db_get_cluster_mean_embeddings() if not cluster_means: - return [] + return [], 0 # Prepare data for nearest neighbor assignment with validation cluster_ids = [] @@ -380,7 +408,7 @@ def cluster_util_assign_cluster_to_faces_without_clusterId( if not mean_embeddings: logger.error("No valid cluster means found after validation") - return [] + return [], 0 mean_embeddings_array = np.array(mean_embeddings) @@ -425,7 +453,9 @@ def cluster_util_assign_cluster_to_faces_without_clusterId( f"Skipped {skipped_invalid} faces with invalid embeddings during assignment" ) - return face_cluster_mappings + total_faces_skipped = skipped_invalid + + return face_cluster_mappings, total_faces_skipped def _merge_similar_clusters( diff --git a/backend/app/utils/images.py b/backend/app/utils/images.py index e874ea267..59b46c663 100644 --- a/backend/app/utils/images.py +++ b/backend/app/utils/images.py @@ -106,10 +106,11 @@ def image_util_process_untagged_images() -> bool: def image_util_classify_and_face_detect_images( untagged_images: List[Dict[str, str]], -) -> None: +) -> int: """Classify untagged images and detect faces if applicable.""" object_classifier = ObjectClassifier() face_detector = FaceDetector() + total_faces_skipped = 0 try: for image in untagged_images: image_path = image["path"] @@ -129,7 +130,9 @@ def image_util_classify_and_face_detect_images( # Step 3: Detect faces if "person" class is present if classes and 0 in classes and 0 < classes.count(0) < 7: - face_detector.detect_faces(image_id, image_path) + result = face_detector.detect_faces(image_id, image_path) + if result: + total_faces_skipped += result.get("faces_skipped", 0) # Step 4: Update the image status in the database db_update_image_tagged_status(image_id, True) @@ -138,6 +141,8 @@ def image_util_classify_and_face_detect_images( object_classifier.close() face_detector.close() + return total_faces_skipped + def image_util_prepare_image_records( image_files: List[str], folder_path_to_id: Dict[str, int] From 315699605f18c1742381159fcb695891cc7cd65d Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Sat, 23 May 2026 22:18:23 +0530 Subject: [PATCH 3/6] feat(frontend): add GlobalAlert and skipped face_clusters logic to frontend --- docs/backend/backend_python/openapi.json | 237 +++++++++--------- frontend/src/App.tsx | 2 + .../src/api/api-functions/face_clusters.ts | 11 +- frontend/src/app/store.ts | 2 + .../components/GlobalAlert/GlobalAlert.tsx | 53 ++++ frontend/src/features/globalAlertSlice.ts | 39 +++ .../components/ApplicationControlsCard.tsx | 13 + 7 files changed, 242 insertions(+), 115 deletions(-) create mode 100644 frontend/src/components/GlobalAlert/GlobalAlert.tsx create mode 100644 frontend/src/features/globalAlertSlice.ts diff --git a/docs/backend/backend_python/openapi.json b/docs/backend/backend_python/openapi.json index 4ab3ffa44..f5298ffdc 100644 --- a/docs/backend/backend_python/openapi.json +++ b/docs/backend/backend_python/openapi.json @@ -2073,119 +2073,6 @@ ], "title": "DeleteFoldersResponse" }, - "app__schemas__folders__ErrorResponse": { - "properties": { - "success": { - "type": "boolean", - "title": "Success", - "default": false - }, - "message": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Message" - }, - "error": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Error" - } - }, - "type": "object", - "title": "ErrorResponse" - }, - "app__schemas__face_clusters__ErrorResponse": { - "properties": { - "success": { - "type": "boolean", - "title": "Success", - "default": false - }, - "message": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Message" - }, - "error": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Error" - } - }, - "type": "object", - "title": "ErrorResponse" - }, - "app__schemas__images__ErrorResponse": { - "properties": { - "success": { - "type": "boolean", - "title": "Success", - "default": false - }, - "message": { - "type": "string", - "title": "Message" - }, - "error": { - "type": "string", - "title": "Error" - } - }, - "type": "object", - "required": [ - "message", - "error" - ], - "title": "ErrorResponse" - }, - "app__schemas__user_preferences__ErrorResponse": { - "properties": { - "success": { - "type": "boolean", - "title": "Success" - }, - "error": { - "type": "string", - "title": "Error" - }, - "message": { - "type": "string", - "title": "Message" - } - }, - "type": "object", - "required": [ - "success", - "error", - "message" - ], - "title": "ErrorResponse", - "description": "Error response model" - }, "FaceSearchRequest": { "properties": { "path": { @@ -2612,6 +2499,17 @@ } ], "title": "Clusters Created" + }, + "faces_skipped": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Faces Skipped" } }, "type": "object", @@ -3407,6 +3305,119 @@ "type" ], "title": "ValidationError" + }, + "app__schemas__face_clusters__ErrorResponse": { + "properties": { + "success": { + "type": "boolean", + "title": "Success", + "default": false + }, + "message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Message" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error" + } + }, + "type": "object", + "title": "ErrorResponse" + }, + "app__schemas__folders__ErrorResponse": { + "properties": { + "success": { + "type": "boolean", + "title": "Success", + "default": false + }, + "message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Message" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error" + } + }, + "type": "object", + "title": "ErrorResponse" + }, + "app__schemas__images__ErrorResponse": { + "properties": { + "success": { + "type": "boolean", + "title": "Success", + "default": false + }, + "message": { + "type": "string", + "title": "Message" + }, + "error": { + "type": "string", + "title": "Error" + } + }, + "type": "object", + "required": [ + "message", + "error" + ], + "title": "ErrorResponse" + }, + "app__schemas__user_preferences__ErrorResponse": { + "properties": { + "success": { + "type": "boolean", + "title": "Success" + }, + "error": { + "type": "string", + "title": "Error" + }, + "message": { + "type": "string", + "title": "Message" + } + }, + "type": "object", + "required": [ + "success", + "error", + "message" + ], + "title": "ErrorResponse", + "description": "Error response model" } } } diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 2c5b6bddb..523d37a5f 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -6,6 +6,7 @@ import { ThemeProvider } from '@/contexts/ThemeContext'; import QueryClientProviders from '@/config/QueryClientProvider'; import { GlobalLoader } from './components/Loader/GlobalLoader'; import { InfoDialog } from './components/Dialog/InfoDialog'; +import GlobalAlert from './components/GlobalAlert/GlobalAlert'; import { useSelector } from 'react-redux'; import { RootState } from './app/store'; const App: React.FC = () => { @@ -31,6 +32,7 @@ const App: React.FC = () => { variant={variant} showCloseButton={showCloseButton} /> + ); diff --git a/frontend/src/api/api-functions/face_clusters.ts b/frontend/src/api/api-functions/face_clusters.ts index 7decba4cd..d219f3b19 100644 --- a/frontend/src/api/api-functions/face_clusters.ts +++ b/frontend/src/api/api-functions/face_clusters.ts @@ -67,8 +67,15 @@ export const fetchSearchedFacesBase64 = async ( return response.data; }; -export const triggerGlobalReclustering = async (): Promise => { - const response = await apiClient.post( +export interface GlobalReclusterData { + clusters_created: number | null; + faces_skipped: number | null; +} + +export const triggerGlobalReclustering = async (): Promise< + BackendRes +> => { + const response = await apiClient.post>( faceClustersEndpoints.globalRecluster, ); return response.data; diff --git a/frontend/src/app/store.ts b/frontend/src/app/store.ts index 98f69e7a9..12fd7c9c4 100644 --- a/frontend/src/app/store.ts +++ b/frontend/src/app/store.ts @@ -5,6 +5,7 @@ import searchReducer from '@/features/searchSlice'; import imageReducer from '@/features/imageSlice'; import faceClustersReducer from '@/features/faceClustersSlice'; import infoDialogReducer from '@/features/infoDialogSlice'; +import globalAlertReducer from '@/features/globalAlertSlice'; import folderReducer from '@/features/folderSlice'; import memoriesReducer from '@/features/memoriesSlice'; @@ -15,6 +16,7 @@ export const store = configureStore({ images: imageReducer, faceClusters: faceClustersReducer, infoDialog: infoDialogReducer, + globalAlert: globalAlertReducer, folders: folderReducer, search: searchReducer, memories: memoriesReducer, diff --git a/frontend/src/components/GlobalAlert/GlobalAlert.tsx b/frontend/src/components/GlobalAlert/GlobalAlert.tsx new file mode 100644 index 000000000..7f646470f --- /dev/null +++ b/frontend/src/components/GlobalAlert/GlobalAlert.tsx @@ -0,0 +1,53 @@ +import React, { useEffect } from 'react'; +import { AlertCircle, X } from 'lucide-react'; +import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'; +import { useDispatch, useSelector } from 'react-redux'; +import { RootState } from '@/app/store'; +import { hideGlobalAlert } from '@/features/globalAlertSlice'; + +const GlobalAlert: React.FC = () => { + const dispatch = useDispatch(); + const { isOpen, title, message } = useSelector( + (state: RootState) => state.globalAlert, + ); + + useEffect(() => { + if (!isOpen) { + return; + } + + const timer = window.setTimeout(() => { + dispatch(hideGlobalAlert()); + }, 5000); + + return () => window.clearTimeout(timer); + }, [dispatch, isOpen, title, message]); + + if (!isOpen) { + return null; + } + + return ( +
+ + + + {title} + + + {message} + + + +
+ ); +}; + +export default GlobalAlert; diff --git a/frontend/src/features/globalAlertSlice.ts b/frontend/src/features/globalAlertSlice.ts new file mode 100644 index 000000000..58f1e3650 --- /dev/null +++ b/frontend/src/features/globalAlertSlice.ts @@ -0,0 +1,39 @@ +import { createSlice, PayloadAction } from '@reduxjs/toolkit'; + +interface GlobalAlertState { + isOpen: boolean; + title: string; + message: string; +} + +const initialState: GlobalAlertState = { + isOpen: false, + title: '', + message: '', +}; + +const globalAlertSlice = createSlice({ + name: 'globalAlert', + initialState, + reducers: { + showGlobalAlert( + state, + action: PayloadAction<{ + title: string; + message: string; + }>, + ) { + state.isOpen = true; + state.title = action.payload.title; + state.message = action.payload.message; + }, + hideGlobalAlert(state) { + state.isOpen = false; + state.title = ''; + state.message = ''; + }, + }, +}); + +export const { showGlobalAlert, hideGlobalAlert } = globalAlertSlice.actions; +export default globalAlertSlice.reducer; diff --git a/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx b/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx index bcecd3f08..dc81ab7d7 100644 --- a/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx +++ b/frontend/src/pages/SettingsPage/components/ApplicationControlsCard.tsx @@ -12,6 +12,7 @@ import { showInfoDialog } from '@/features/infoDialogSlice'; import { triggerGlobalReclustering } from '@/api/api-functions/face_clusters'; import { usePictoMutation } from '@/hooks/useQueryExtension'; import { useMutationFeedback } from '@/hooks/useMutationFeedback'; +import { showGlobalAlert } from '@/features/globalAlertSlice'; /** * Component for application controls in settings @@ -34,6 +35,18 @@ const ApplicationControlsCard: React.FC = () => { const reclusterMutation = usePictoMutation({ mutationFn: triggerGlobalReclustering, autoInvalidateTags: ['clusters'], + onSuccess: (data) => { + const facesSkipped = data.data?.faces_skipped; + + if (facesSkipped != null && facesSkipped > 0) { + dispatch( + showGlobalAlert({ + title: 'Faces Skipped', + message: `${facesSkipped} face(s) were skipped during clustering due to invalid embeddings.`, + }), + ); + } + }, }); const feedbackOptions = React.useMemo( From e11cc6864c0da0d8015206c806246b580cc3e764 Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Mon, 25 May 2026 16:49:26 +0530 Subject: [PATCH 4/6] tests(backend): add algo check tests for quality check and adaptive eps stability --- backend/tests/test_face_clusters.py | 201 ++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/backend/tests/test_face_clusters.py b/backend/tests/test_face_clusters.py index 3ccd284d5..fc6d7c948 100644 --- a/backend/tests/test_face_clusters.py +++ b/backend/tests/test_face_clusters.py @@ -1,6 +1,12 @@ import pytest +import numpy as np from unittest.mock import patch from fastapi import FastAPI +from app.utils.face_clusters import ( + cluster_util_cluster_all_face_embeddings, + estimate_eps, +) +from app.utils.face_quality import face_passes_quality_gate from fastapi.testclient import TestClient from app.routes.face_clusters import router as face_clusters_router @@ -410,3 +416,198 @@ def test_unsupported_http_methods(self, method, endpoint): """Test that unsupported HTTP methods return 405.""" response = client.request(method, endpoint) assert response.status_code == 405 + + +# ============================================================================ +# Algorithmic Logic Tests +# ============================================================================ + + +def generate_synthetic_embeddings( + num_identities=2, points_per_identity=10, dim=512, noise_std=0.005 +): + """Helper to generate tight clusters of embeddings.""" + embeddings = [] + labels = [] + + np.random.seed(42) # For reproducibility + + for i in range(num_identities): + # Random unit vector as center + center = np.random.randn(dim) + center = center / np.linalg.norm(center) + + # Add points around center + for _ in range(points_per_identity): + noise = np.random.randn(dim) * noise_std + point = center + noise + # Re-normalize as cosine distance works best with unit vectors + point = point / np.linalg.norm(point) + embeddings.append(point) + labels.append(i) + + return np.array(embeddings), np.array(labels) + + +def generate_noise_embeddings(num_points=80, dim=512): + """Helper to generate random noise embeddings.""" + np.random.seed(43) + noise = np.random.randn(num_points, dim) + norms = np.linalg.norm(noise, axis=1, keepdims=True) + return noise / norms + + +def mock_faces_data(embeddings): + """Format embeddings into the expected database return format.""" + return [ + {"face_id": i, "embeddings": emb, "cluster_name": None} + for i, emb in enumerate(embeddings) + ] + + +class TestFaceClusteringAlgo: + @patch("app.utils.face_clusters.db_get_all_faces_with_cluster_names") + def test_folder_size_regression(self, mock_db_get): + """Test 1: Folder-size regression (the original bug)""" + # Generate 20 embeddings (2 identities, 10 points each) + identity_embs, identity_labels = generate_synthetic_embeddings( + num_identities=2, points_per_identity=10 + ) + + # --- Run 1: Isolated run --- + mock_db_get.return_value = mock_faces_data(identity_embs) + + # Run clustering with eps estimation disabled (by using fixed eps, though estimate_eps runs internally) + # and strict similarity threshold. + results_isolated, _ = cluster_util_cluster_all_face_embeddings( + eps=0.75, min_samples=2, similarity_threshold=0.85 + ) + + # Count clusters in isolated run + isolated_clusters = set(r.cluster_uuid for r in results_isolated) + assert ( + len(isolated_clusters) == 2 + ), f"The folder-size bug is present: expected 2 clusters, got {len(isolated_clusters)} in isolated run" + + # Verify points were assigned correctly (10 points per cluster) + cluster_counts = {} + for r in results_isolated: + cluster_counts[r.cluster_uuid] = cluster_counts.get(r.cluster_uuid, 0) + 1 + + for count in cluster_counts.values(): + assert ( + count >= 8 + ), f"Identity cluster should contain majority of points, got {count}" + + # --- Run 2: With noise --- + noise_embs = generate_noise_embeddings(num_points=80) + all_embs = np.vstack([identity_embs, noise_embs]) + + mock_db_get.return_value = mock_faces_data(all_embs) + + results_noise, _ = cluster_util_cluster_all_face_embeddings( + eps=0.75, min_samples=2, similarity_threshold=0.85 + ) + + # We need to find the clusters containing the original identity points (face_ids 0 to 19) + identity_results = [r for r in results_noise if r.face_id < 20] + + noise_run_identity_clusters = set(r.cluster_uuid for r in identity_results) + assert ( + len(noise_run_identity_clusters) == 2 + ), f"Expected 2 clusters for identity points with noise, got {len(noise_run_identity_clusters)}" + + def test_adaptive_eps_stability(self): + """Test 2: Adaptive eps stability""" + identity_embs, _ = generate_synthetic_embeddings( + num_identities=2, points_per_identity=10 + ) + + sizes = [20, 50, 100] + + for size in sizes: + num_noise = size - len(identity_embs) + if num_noise > 0: + noise_embs = generate_noise_embeddings(num_points=num_noise) + test_embs = np.vstack([identity_embs, noise_embs]) + else: + test_embs = identity_embs + + eps = estimate_eps(test_embs, k=2) + + assert eps is not None + assert ( + 0.0 < eps < 1.0 + ), f"eps value {eps} out of expected bounds for size {size}" + + def test_estimate_eps_fallback(self): + """Test 3: estimate_eps() fallback""" + # Empty array + assert estimate_eps(np.array([]), k=2) is None + + # 1 element + assert estimate_eps(np.random.randn(1, 512), k=2) is None + + # 2 elements + assert estimate_eps(np.random.randn(2, 512), k=2) is None + + def test_quality_gate(self): + """Test 4: Quality gate unit tests""" + # A sharp, large face crop should pass + # Random noise image has high variance (sharp) + np.random.seed(42) + sharp_crop = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8) + + assert ( + face_passes_quality_gate( + face_crop=sharp_crop, + bbox=(0, 0, 100, 100), + conf_score=0.9, + conf_threshold=0.45, + blur_threshold=10.0, # Random noise will be well above this + min_face_size=400, + ) + is True + ) + + # A blurred crop should fail + # Flat image has zero variance + blurred_crop = np.ones((100, 100, 3), dtype=np.uint8) * 128 + + assert ( + face_passes_quality_gate( + face_crop=blurred_crop, + bbox=(0, 0, 100, 100), + conf_score=0.9, + conf_threshold=0.45, + blur_threshold=10.0, + min_face_size=400, + ) + is False + ) + + # A small bbox should fail + assert ( + face_passes_quality_gate( + face_crop=sharp_crop, + bbox=(0, 0, 10, 10), # area = 100 + conf_score=0.9, + conf_threshold=0.45, + blur_threshold=10.0, + min_face_size=400, + ) + is False + ) + + # A low confidence score should fail + assert ( + face_passes_quality_gate( + face_crop=sharp_crop, + bbox=(0, 0, 100, 100), + conf_score=0.4, # < 0.45 + conf_threshold=0.45, + blur_threshold=10.0, + min_face_size=400, + ) + is False + ) From 7ad239f8c39a1ff87a9dba594775f317cb9e6ceb Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Tue, 26 May 2026 14:44:07 +0530 Subject: [PATCH 5/6] feat: add face quality validation utility and configure clustering parameters --- backend/app/config/settings.py | 102 ++++++++++++++++++++++++---- backend/app/utils/face_quality.py | 3 + backend/tests/test_face_clusters.py | 14 ++++ 3 files changed, 106 insertions(+), 13 deletions(-) diff --git a/backend/app/config/settings.py b/backend/app/config/settings.py index 1f0b295b6..54a595a5b 100644 --- a/backend/app/config/settings.py +++ b/backend/app/config/settings.py @@ -1,8 +1,13 @@ +from __future__ import annotations + +import logging import os import sys - from platformdirs import user_data_dir +logger = logging.getLogger(__name__) + + if getattr(sys, "frozen", False): MODEL_EXPORTS_PATH = os.path.join(user_data_dir("PictoPy"), "models") else: @@ -36,21 +41,92 @@ THUMBNAIL_IMAGES_PATH = os.path.join(user_data_dir("PictoPy"), "thumbnails") IMAGES_PATH = "./images" + +def _get_env_float( + name: str, + default: float, + min_value: float | None = None, + max_value: float | None = None, +) -> float: + raw = os.getenv(name) + if raw is None: + return default + try: + value = float(raw) + except ValueError: + logger.warning( + "Invalid value %r for %s (expected float); using default %s", + raw, + name, + default, + ) + return default + if (min_value is not None and value < min_value) or ( + max_value is not None and value > max_value + ): + logger.warning( + "Out-of-range value %s for %s (expected [%s, %s]); using default %s", + value, + name, + min_value, + max_value, + default, + ) + return default + return value + + +def _get_env_int( + name: str, + default: int, + min_value: int | None = None, + max_value: int | None = None, +) -> int: + raw = os.getenv(name) + if raw is None: + return default + try: + value = int(raw) + except ValueError: + logger.warning( + "Invalid value %r for %s (expected int); using default %s", + raw, + name, + default, + ) + return default + if (min_value is not None and value < min_value) or ( + max_value is not None and value > max_value + ): + logger.warning( + "Out-of-range value %s for %s (expected [%s, %s]); using default %s", + value, + name, + min_value, + max_value, + default, + ) + return default + return value + + # Clustering Configuration -PICTO_CLUSTERING_EPS = float(os.getenv("PICTO_CLUSTERING_EPS", "0.75")) -PICTO_CLUSTERING_MIN_SAMPLES = int(os.getenv("PICTO_CLUSTERING_MIN_SAMPLES", "2")) -PICTO_CLUSTERING_SIMILARITY_THRESHOLD = float( - os.getenv("PICTO_CLUSTERING_SIMILARITY_THRESHOLD", "0.85") +PICTO_CLUSTERING_EPS = _get_env_float("PICTO_CLUSTERING_EPS", 0.75, min_value=0.0) +PICTO_CLUSTERING_MIN_SAMPLES = _get_env_int( + "PICTO_CLUSTERING_MIN_SAMPLES", 2, min_value=1 +) +PICTO_CLUSTERING_SIMILARITY_THRESHOLD = _get_env_float( + "PICTO_CLUSTERING_SIMILARITY_THRESHOLD", 0.85, min_value=0.0, max_value=1.0 ) -PICTO_CLUSTERING_MERGE_THRESHOLD = float( - os.getenv("PICTO_CLUSTERING_MERGE_THRESHOLD", "0.7") +PICTO_CLUSTERING_MERGE_THRESHOLD = _get_env_float( + "PICTO_CLUSTERING_MERGE_THRESHOLD", 0.7, min_value=0.0, max_value=1.0 ) -PICTO_CLUSTERING_CONF_THRESHOLD = float( - os.getenv("PICTO_CLUSTERING_CONF_THRESHOLD", "0.45") +PICTO_CLUSTERING_CONF_THRESHOLD = _get_env_float( + "PICTO_CLUSTERING_CONF_THRESHOLD", 0.45, min_value=0.0, max_value=1.0 ) -PICTO_CLUSTERING_BLUR_THRESHOLD = float( - os.getenv("PICTO_CLUSTERING_BLUR_THRESHOLD", "80.0") +PICTO_CLUSTERING_BLUR_THRESHOLD = _get_env_float( + "PICTO_CLUSTERING_BLUR_THRESHOLD", 80.0, min_value=0.0 ) -PICTO_CLUSTERING_MIN_FACE_SIZE = int( - os.getenv("PICTO_CLUSTERING_MIN_FACE_SIZE", "1600") +PICTO_CLUSTERING_MIN_FACE_SIZE = _get_env_int( + "PICTO_CLUSTERING_MIN_FACE_SIZE", 1600, min_value=1 ) diff --git a/backend/app/utils/face_quality.py b/backend/app/utils/face_quality.py index 7722230d2..4b3c082dc 100644 --- a/backend/app/utils/face_quality.py +++ b/backend/app/utils/face_quality.py @@ -25,6 +25,9 @@ def face_passes_quality_gate( return False # 3. Blur check + if face_crop.size == 0: + return False + if len(face_crop.shape) == 3: gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY) else: diff --git a/backend/tests/test_face_clusters.py b/backend/tests/test_face_clusters.py index fc6d7c948..7501e1280 100644 --- a/backend/tests/test_face_clusters.py +++ b/backend/tests/test_face_clusters.py @@ -611,3 +611,17 @@ def test_quality_gate(self): ) is False ) + + # An empty crop should fail + empty_crop = np.zeros((0, 0, 3), dtype=np.uint8) + assert ( + face_passes_quality_gate( + face_crop=empty_crop, + bbox=(0, 0, 0, 0), + conf_score=0.9, + conf_threshold=0.45, + blur_threshold=10.0, + min_face_size=400, + ) + is False + ) From 31b5b59f0b15ea187e3e11b0d741b94d6ab1d9ca Mon Sep 17 00:00:00 2001 From: ROHAN PANDEY <95585299+rohan-pandeyy@users.noreply.github.com> Date: Wed, 27 May 2026 11:41:07 +0530 Subject: [PATCH 6/6] update face clustering configuration settings and endpoint tests --- backend/app/config/settings.py | 6 ++++++ backend/tests/test_face_clusters.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/backend/app/config/settings.py b/backend/app/config/settings.py index 54a595a5b..172b1d2da 100644 --- a/backend/app/config/settings.py +++ b/backend/app/config/settings.py @@ -115,6 +115,12 @@ def _get_env_int( PICTO_CLUSTERING_MIN_SAMPLES = _get_env_int( "PICTO_CLUSTERING_MIN_SAMPLES", 2, min_value=1 ) +if PICTO_CLUSTERING_MIN_SAMPLES < 2: + logger.warning( + f"PICTO_CLUSTERING_MIN_SAMPLES={PICTO_CLUSTERING_MIN_SAMPLES} is invalid " + f"(minimum is 2). Resetting to 2 to prevent cluster chaining." + ) + PICTO_CLUSTERING_MIN_SAMPLES = 2 PICTO_CLUSTERING_SIMILARITY_THRESHOLD = _get_env_float( "PICTO_CLUSTERING_SIMILARITY_THRESHOLD", 0.85, min_value=0.0, max_value=1.0 ) diff --git a/backend/tests/test_face_clusters.py b/backend/tests/test_face_clusters.py index 7501e1280..2c09795c9 100644 --- a/backend/tests/test_face_clusters.py +++ b/backend/tests/test_face_clusters.py @@ -617,7 +617,7 @@ def test_quality_gate(self): assert ( face_passes_quality_gate( face_crop=empty_crop, - bbox=(0, 0, 0, 0), + bbox=(0, 0, 500, 500), conf_score=0.9, conf_threshold=0.45, blur_threshold=10.0,