diff --git a/.circleci/data b/.circleci/data new file mode 120000 index 0000000000..b8e5c68cee --- /dev/null +++ b/.circleci/data @@ -0,0 +1 @@ +${DATA_DIR:-./data} \ No newline at end of file diff --git a/deeplabcut/common_tools/download.py b/deeplabcut/common_tools/download.py new file mode 100644 index 0000000000..68223bf24e --- /dev/null +++ b/deeplabcut/common_tools/download.py @@ -0,0 +1,58 @@ +from huggingface_hub import hf_hub_download +import os + +def download_from_huggingface_hub(target_folder_path, repo_id, filename, subfolder=None): + """ + Download a file from the Hugging Face Hub to a specified local directory. + + Parameters: + target_folder_path (str): Local directory path where file will be saved + repo_id (str): Hugging Face repository ID (e.g., 'noahcao/sapiens-pose-coco') + filename (str): Name of the file to download + subfolder (str, optional): Path to subfolder within the repository where the file is located + + Returns: + str: Full path to the downloaded file + + Examples: + >>> # Download a model file from noahcao/sapiens-pose-coco + >>> download_from_huggingface_hub( + ... target_folder_path="./models/sapiens", + ... repo_id="noahcao/sapiens-pose-coco", + ... filename="sapiens_2b_coco_best_coco_AP_822_torchscript.pt2", + ... subfolder="sapiens_lite_host/torchscript/pose/checkpoints/sapiens_2b" + ... ) + + >>> # Download a file without specifying a subfolder + >>> download_from_huggingface_hub( + ... target_folder_path="./data", + ... repo_id="noahcao/sapiens-pose-coco", + ... filename="COCO_val2017_detections_AP_H_70_person.json", + ... subfolder="sapiens_host/pose/person_detection_results" + ... ) + """ + # Create the target directory if it does not exist + os.makedirs(target_folder_path, exist_ok=True) + + # Download the file from Hugging Face Hub + return hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + local_dir=target_folder_path + ) + +# Example usage: +if __name__ == "__main__": + # Example to download the model from noahcao/sapiens-pose-coco repository + target_dir = "xxxx/sapiens/sapiens_lite_host/torchscript/pose/checkpoints/sapiens_2b" + + downloaded_file = download_from_huggingface_hub( + target_folder_path=target_dir, + repo_id="noahcao/sapiens-pose-coco", + filename="sapiens_2b_coco_best_coco_AP_822_torchscript.pt2", + subfolder="sapiens_lite_host/torchscript/pose/checkpoints/sapiens_2b" + ) + + print(f"File downloaded to: {downloaded_file}") + \ No newline at end of file diff --git a/deeplabcut/common_tools/json_tools.py b/deeplabcut/common_tools/json_tools.py new file mode 100644 index 0000000000..e338298c8a --- /dev/null +++ b/deeplabcut/common_tools/json_tools.py @@ -0,0 +1,29 @@ +import json +import os +from pathlib import Path + +def convert_json_indent_folder_level(source_folder_path, target_folder_path): + """ + Read JSON files and save them with proper indentation. + + Args: + source_folder_path (str): Path to the folder containing original JSON files + target_folder_path (str): Path to save the formatted JSON files + """ + # Create target folder if it doesn't exist + Path(target_folder_path).mkdir(parents=True, exist_ok=True) + + # Get all JSON files from source folder + json_files = [f for f in os.listdir(source_folder_path) if f.endswith('.json')] + print(f"Processing {len(json_files)} JSON files...") + + for json_file in json_files: + source_path = os.path.join(source_folder_path, json_file) + target_path = os.path.join(target_folder_path, json_file) + + # Read source JSON file and save with proper indentation + with open(source_path, 'r') as f: + data = json.load(f) + + with open(target_path, 'w') as f: + json.dump(data, f, indent=4) \ No newline at end of file diff --git a/deeplabcut/modelzoo/video_inference.py b/deeplabcut/modelzoo/video_inference.py index 5b597275b2..d82e91cd18 100644 --- a/deeplabcut/modelzoo/video_inference.py +++ b/deeplabcut/modelzoo/video_inference.py @@ -12,6 +12,7 @@ import json import os +import torch from pathlib import Path from typing import Optional, Union @@ -34,6 +35,21 @@ video_to_frames, ) +def get_checkpoint_epoch(checkpoint_path): + """ + Load a PyTorch checkpoint and return the current epoch number. + + Args: + checkpoint_path (str): Path to the checkpoint file + + Returns: + int: Current epoch number, or None if not found + """ + checkpoint = torch.load(checkpoint_path) + if 'metadata' in checkpoint and 'epoch' in checkpoint['metadata']: + return checkpoint['metadata']['epoch'] + else: + return 0 def get_checkpoint_epoch(checkpoint_path): """ @@ -517,7 +533,6 @@ def video_inference_superanimal( model_snapshot_prefix = f"snapshot-{model_name}" detector_snapshot_prefix = f"snapshot-{detector_name}" - config["runner"]["snapshot_prefix"] = model_snapshot_prefix config["detector"]["runner"]["snapshot_prefix"] = detector_snapshot_prefix @@ -599,6 +614,7 @@ def video_inference_superanimal( detector_batch_size=video_adapt_batch_size, ) + # after video adaptation, re-update the adapted checkpoint path, if the checkpoint does not exist, use the best checkpoint adapted_detector_checkpoint = ( model_folder diff --git a/deeplabcut/pose_estimation_pytorch/apis/evaluation.py b/deeplabcut/pose_estimation_pytorch/apis/evaluation.py index f9b02e2c78..a02410f7e2 100755 --- a/deeplabcut/pose_estimation_pytorch/apis/evaluation.py +++ b/deeplabcut/pose_estimation_pytorch/apis/evaluation.py @@ -48,7 +48,10 @@ plot_evaluation_results, save_labeled_frame, ) - +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from typing import Optional, Union, List, Tuple, Dict +import logging def predict( pose_runner: InferenceRunner, @@ -462,6 +465,659 @@ def plot_gt_and_predictions( plt.close() +def visualize_predictions_PFM( + predictions: Dict[str, Dict], + ground_truth: Dict[str, np.ndarray], + output_dir: Optional[Union[str, Path]] = None, + num_samples: Optional[int] = None, + random_select: bool = False, + plot_bboxes: bool = True, + skeleton: Optional[List[Tuple[int, int]]] = None, + keypoint_vis_mask: Optional[List[int]] = None, + keypoint_names: Optional[List[str]] = None, + confidence_threshold: float = 0.6 +) -> None: + """Visualize model predictions alongside ground truth keypoints with additional PFM-specific configurations.""" + # Setup output directory and logging + output_dir = Path(output_dir or "predictions_visualizations") + output_dir.mkdir(exist_ok=True, parents=True) + + # Configure logging with a unique handler + log_file = output_dir / "visualization.log" + handler = logging.FileHandler(log_file) + handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger = logging.getLogger('PFM_visualization') + logger.setLevel(logging.INFO) + logger.addHandler(handler) + logger.info(f"Starting visualization process. Output directory: {output_dir}") + + # Select images to process efficiently + image_paths = list(predictions.keys()) + if num_samples and num_samples < len(image_paths): + if random_select: + image_paths = np.random.choice( + image_paths, num_samples, replace=False + ).tolist() + else: + image_paths = image_paths[:num_samples] + + # Process each selected image + for image_path in image_paths: + # Get prediction and ground truth data + pred_data = predictions[image_path] + gt_keypoints = ground_truth[image_path] # Shape: [N, num_keypoints, 3] + + # Process predicted keypoints + pred_keypoints = pred_data["bodyparts"] + + if plot_bboxes: + bboxes = predictions[image_path].get("bboxes", None) + bbox_scores = predictions[image_path].get("bbox_scores", None) + # this means the bboxes is the GT; so we should set the score as 1 + if bbox_scores is None: + bbox_scores = np.ones(len(bboxes)) + # print("bboxes:", bboxes) + # print("bbox_scores:", bbox_scores) + bounding_boxes = ( + (bboxes, bbox_scores) + if bbox_scores is not None and bboxes is not None + else None + ) + else: + bounding_boxes = None + + # print("bounding_boxes:", bounding_boxes) + + # Generate visualization + plot_gt_and_predictions_PFM( + image_path=image_path, + output_dir=output_dir, + gt_bodyparts=gt_keypoints, + pred_bodyparts=pred_keypoints, + bounding_boxes=bounding_boxes, + skeleton=skeleton, + keypoint_names=keypoint_names, + p_cutoff=confidence_threshold, + keypoint_vis_mask=keypoint_vis_mask, # Pass the mask to plotting function + ) + logger.info(f"Successfully visualized predictions for {image_path}") + + # Clean up logging handler + logger.removeHandler(handler) + handler.close() + +def get_dynamic_skeleton(skeleton, keypoints, p_cutoff=0.6): + """ + Modify skeleton connections based on keypoint confidence scores. + + If certain keypoints have low confidence (below threshold), alternative + skeleton connections will be used instead of the original ones. + + Args: + skeleton (list): List of tuples/lists representing skeleton connections as (start_idx, end_idx) + keypoints (numpy.ndarray): Array of shape (..., 3) where the last dimension contains + [x, y, confidence] for each keypoint + p_cutoff (float): Confidence threshold (0.0-1.0) + + Returns: + list: Modified skeleton connections based on confidence scores + """ + dynamic_skeleton = skeleton.copy() + confidences = keypoints[..., 2] # Get confidence scores + + # Dictionary to store special connection rules + # dict_name_to_idx = {name: idx for idx, name in enumerate(keypoint_name_simplified)} + dict_name_to_idx = {"L_Shoulder" : 12, "R_Shoulder" : 13, "L_Elbow": 18, "R_Elbow": 19, "neck": 11, + "L_Wrist": 20, "R_Wrist": 21, "L_Hand": 22, "R_Hand": 23, "L_Knee": 27, "R_Knee": 28, "L_hip": 24, "R_hip": 25, "C_hip": 26} + + + # Template for special connections with rules for alternative connections + special_connections = { + # Format: (point_to_check, [(original_connections), (alternative_connection)]) + "L_Shoulder": [(("neck", "L_Shoulder"), ("L_Shoulder", "L_Elbow") ), ("neck", "L_Elbow")], # L_S: ori_connection: {L_S to L_elbow, L_S to neck}; alt_connection: L_Elbow to neck if L_S is below threshold + "R_Shoulder": [(("neck", "R_Shoulder"), ("R_Shoulder", "R_Elbow") ), ("neck", "R_Elbow")], # R_S: ori_connection: {R_S to R_elbow, R_S to neck}; alt_connection: R_Elbow to neck if R_S is below threshold + "L_Wrist": [(("L_Hand", "L_Wrist"), ("L_Wrist", "L_Elbow")), ("L_Hand", "L_Elbow")], # L_W: ori_connection: {L_H to L_W, L_W to L_Elbow}; alt_connection: L_H to L_Elbow if L_W is below threshold + "R_Wrist": [(("R_Hand", "R_Wrist"), ("R_Wrist", "R_Elbow")), ("R_Hand", "R_Elbow")], # R_W: ori_connection: {R_H to R_W, R_W to R_Elbow}; alt_connection: R_H to R_Elbow if R_W is below threshold + "L_hip": [(( "L_Knee", "L_hip"), ("L_hip", "C_hip")), ("L_Knee", "C_hip")], + "R_hip": [(( "R_Knee", "R_hip"), ("R_hip", "C_hip")), ("R_Knee", "C_hip")], + } + # Process each keypoint in special connections + for keypoint_name, (original_connections, alternative_connection) in special_connections.items(): + # Get the index of the keypoint + keypoint_idx = dict_name_to_idx[keypoint_name] + + # Check if keypoint confidence is below threshold + if confidences[keypoint_idx] < p_cutoff: + # Convert named connections to index-based connections + original_connections_idx = [] + for conn1, conn2 in original_connections: + # Add both connections to the list + original_connections_idx.append([dict_name_to_idx[conn1], dict_name_to_idx[conn2]]) + # Also consider reverse connection + original_connections_idx.append([dict_name_to_idx[conn2], dict_name_to_idx[conn1]]) + + # Convert alternative connection to index-based + # todo: alternative_connection also could contain multiple connections + alt_conn_idx = [dict_name_to_idx[alternative_connection[0]], dict_name_to_idx[alternative_connection[1]]] + + # Remove original connections from dynamic skeleton + for conn in original_connections_idx: + if conn in dynamic_skeleton: + dynamic_skeleton.remove(conn) + + # Add alternative connection if it's not already in the skeleton + if alt_conn_idx not in dynamic_skeleton and [alt_conn_idx[1], alt_conn_idx[0]] not in dynamic_skeleton: + dynamic_skeleton.append(alt_conn_idx) + return dynamic_skeleton + + +class DynamicSkeleton: + def __init__(self, pred_bodyparts, p_cutoff=0.6): + self.keypoints = [ + "forehead", + "head", + "L_Eye", + "R_Eye", + "nose", + "L_Ear", + "R_Ear", + "mouth_front_top", + "mouth_front_bottom", + "mouth_B_L", + "mouth_B_R", + "neck", + "L_Shoulder", + "R_Shoulder", + "upper_B", + "torso_M_B", + "body_C", + "lower_B", + "L_Elbow", + "R_Elbow", + "L_Wrist", + "R_Wrist", + "L_Hand", + "R_Hand", + "L_hip", + "R_hip", + "C_hip", + "L_Knee", + "R_Knee", + "L_Ankle", + "R_Ankle", + "L_foot", + "R_foot", + "root_tail", + "M_tail", + "M_end_tail", + "end_tail" + ] + self.parent_mapping = { + # body part + 'head': "neck", + 'neck': None, # root + 'L_Shoulder': 'neck', # Left Shoulder + 'R_Shoulder': 'neck', # Right Shoulder + 'L_Elbow': 'L_Shoulder', + 'R_Elbow': 'R_Shoulder', + 'L_Wrist': 'L_Elbow', + 'R_Wrist': 'R_Elbow', + 'L_Hand': 'L_Wrist', + 'R_Hand': 'R_Wrist', + 'C_hip': None, # Hip connected to lower body + 'L_hip': 'C_hip', + 'R_hip': 'C_hip', + 'L_Knee': 'L_hip', + 'R_Knee': 'R_hip', + 'L_Ankle': 'L_Knee', + 'R_Ankle': 'R_Knee', + 'L_foot': 'L_Ankle', + 'R_foot': 'R_Ankle', + 'root_tail': 'C_hip', + 'M_tail': 'root_tail', + 'M_end_tail': 'M_tail', + 'end_tail': 'M_end_tail', + # mouse part + 'L_Ear': 'L_Eye', + 'R_Ear': 'R_Eye', + 'L_Eye' : 'nose', + 'R_Eye' : 'nose', + 'nose' : None, + } + + confidence_dict = {} + for idx, keypoint in enumerate(self.keypoints): + confidence_dict[keypoint] = pred_bodyparts[idx, 2] + self.confidence_dict = confidence_dict + self.p_cutoff = p_cutoff + self.dynamic_skeleton = [] + + # if C_hip is None, then we use root_tail to replace C_hip, and remove {'root_tail': 'C_hip'} + if self.confidence_dict.get('C_hip') < self.p_cutoff: + self.parent_mapping['L_hip'] = 'root_tail' + self.parent_mapping['R_hip'] = 'root_tail' + self.parent_mapping['M_tail'] = 'root_tail' + self.parent_mapping['root_tail'] = None + self.dynamic_skeleton.append(('root_tail', 'C_hip')) + + def change_name_to_idx_dynamic_skeleton(self): + """Convert skeleton from keypoint names to indices.""" + result = [] + for idx, (from_node, end_node) in enumerate(self.dynamic_skeleton): + # Convert keypoint names to their corresponding indices in self.keypoints + # from_node: the starting keypoint of the connection + # end_node: the ending keypoint of the connection + from_idx = self.keypoints.index(from_node) + end_idx = self.keypoints.index(end_node) + result.append((from_idx, end_idx)) + return result + + + def find_nearest_ancestor(self, node): + current_node = self.parent_mapping.get(node) + while current_node is not None: + current_node_conf = self.confidence_dict[current_node] + if current_node_conf > self.p_cutoff: + return current_node + else: + current_node = self.parent_mapping.get(current_node) + return None + + def get_dynamic_skeleton(self): + # only consider the keypoints that are in the parent_mapping + for keypoint in self.parent_mapping.keys(): + keypoint_conf = self.confidence_dict[keypoint] + if keypoint_conf > self.p_cutoff: + ancestor = self.find_nearest_ancestor(keypoint) + if ancestor is not None: + self.dynamic_skeleton.append((ancestor, keypoint)) + + # add connection between C_hip and neck + if self.confidence_dict.get('C_hip') > self.p_cutoff and self.confidence_dict.get('neck') > self.p_cutoff: + self.dynamic_skeleton.append(('C_hip', 'neck')) + # if conf[C_hip] self.p_cutoff and self.confidence_dict.get('root_tail') > self.p_cutoff: + self.dynamic_skeleton.append(('root_tail', 'neck')) + + return self.change_name_to_idx_dynamic_skeleton() + +def plot_gt_and_predictions_PFM( + image_path: Union[str, Path], + output_dir: Union[str, Path], + gt_bodyparts: Optional[np.ndarray] = None, + pred_bodyparts: Optional[np.ndarray] = None, + mode: str = "bodypart", + colormap: str = "rainbow", + dot_size: int = 12, + alpha_value: float = 0.8, + p_cutoff: float = 0.6, + bounding_boxes: tuple[np.ndarray, np.ndarray] | None = None, + bounding_boxes_color="k", + bboxes_pcutoff: float = 0.6, + skeleton: Optional[List[Tuple[int, int]]] = None, + keypoint_names: Optional[List[str]] = None, + keypoint_vis_mask: Optional[List[int]] = None, + labels: List[str] = ["+", ".", "x"], +) -> None: + """Plot ground truth and predictions on an image. + + Args: + image_path: Path to the image file + output_dir: Directory to save the visualization + gt_bodyparts: Ground truth keypoints array [N, num_keypoints, 3] (x, y, vis_label) + pred_bodyparts: Predicted keypoints array [N, num_keypoints, 3] (x, y, confidence) + bounding_boxes: Tuple of (boxes, scores) for bounding box visualization + dot_size: Size of the keypoint markers + alpha_value: Transparency for points and lines + p_cutoff: Confidence threshold for predictions + mode: How to color the points ("bodypart" or "individual") + colormap: Matplotlib colormap name + bbox_color: Color for bounding boxes + skeleton: List of joint pairs for skeleton visualization + keypoint_names: List of keypoint names for labeling + keypoint_vis_mask: List of keypoint indices to show (default: all keypoints visible) + labels: Marker styles for [ground truth, reliable predictions, unreliable predictions] + """ + # Set default keypoint visibility mask if not provided + if pred_bodyparts is not None and keypoint_vis_mask is None: + keypoint_vis_mask = [1] * pred_bodyparts.shape[1] # All keypoints visible by default + + # Read image and calculate dot size + frame = auxfun_videos.imread(str(image_path), mode="skimage") + h, w = frame.shape[:2] + # Calculate adaptive dot size based on image dimensions + # Use a logarithmic scale to handle very large or small images better + diagonal = np.sqrt(w * w + h * h) # Image diagonal length + base_size = np.log10(diagonal) * 3 # Logarithmic scaling + # print("diagonal:", diagonal) + # Fine-tune the dot size + if diagonal > 1200: # High resolution + dot_size = base_size * 2.0 + elif diagonal < 800: # Low resolution + dot_size = base_size * 1.0 + else: # Medium resolution + dot_size = base_size + + # Ensure dot size stays within reasonable bounds + dot_size = int(max(4, min(dot_size, 15)))*0.8 # *5 for oap # # Tighter bounds for dots + + # filter out the non exist individuals + if bounding_boxes is not None: + valid_individuals = [] + for idx, bbox_score in enumerate(bounding_boxes[1]): + if bbox_score > bboxes_pcutoff: + valid_individuals.append(idx) + + # if gt_bodyparts is None: + # tmp_valid_bodyparts = pred_bodyparts + # else: + # tmp_valid_bodyparts = gt_bodyparts + + # if tmp_valid_bodyparts is not None: + # valid_individuals = [] + # for idx in range(tmp_valid_bodyparts.shape[0]): + # # Check if this individual has any valid keypoints + # # A keypoint is valid if its visibility (3rd value) is not -1 + # has_valid_keypoints = False + + # for kp_idx in range(tmp_valid_bodyparts.shape[1]): + # kp = tmp_valid_bodyparts[idx, kp_idx] + # # Check if keypoint is visible + # if kp[2] != -1: + # has_valid_keypoints = True + # break # We found at least one valid keypoint, no need to check more + + # # Include individual if they have at least one valid keypoint + # if has_valid_keypoints: + # valid_individuals.append(idx) + + # print(f"Found {len(valid_individuals)} valid individuals out of {gt_bodyparts.shape[0]}") + # Filter both ground truth and predictions + + if valid_individuals: + if gt_bodyparts is not None: + gt_bodyparts = gt_bodyparts[valid_individuals] + if pred_bodyparts is not None: + pred_bodyparts = pred_bodyparts[valid_individuals] + if bounding_boxes is not None: + bounding_boxes = ( + bounding_boxes[0][valid_individuals], + bounding_boxes[1][valid_individuals] + ) + + num_pred, num_keypoints = pred_bodyparts.shape[:2] + + # print("After filtering:") + # print("num_pred, num_keypoints:", num_pred, num_keypoints) + # if gt_bodyparts is not None: + # print("gt_bodyparts shape:", gt_bodyparts.shape) + + # Create figure with optimal settings + fig, ax = create_minimal_figure() + fig.set_size_inches(w/100, h/100) + ax.set_xlim(0, w) + ax.set_ylim(0, h) + ax.invert_yaxis() + ax.imshow(frame, "gray") + + # Set up colors based on mode + if mode == "bodypart": + num_colors = num_keypoints + # if pred_unique_bodyparts is not None: + # num_colors += pred_unique_bodyparts.shape[1] + colors = get_cmap(num_colors, name=colormap) + # print("colors:", colors) + # predictions = pred_bodyparts.swapaxes(0, 1) + # ground_truth = gt_bodyparts.swapaxes(0, 1) + elif mode == "individual": + colors = get_cmap(num_pred + 1, name=colormap) + # predictions = pred_bodyparts + # ground_truth = gt_bodyparts + else: + raise ValueError(f"Invalid mode: {mode}") + + # print("bounding_boxes:", bounding_boxes) + + # Draw bounding boxes if provided + if bounding_boxes is not None: + # print(f"bounding_boxes: {bounding_boxes}") + for bbox, bbox_score in zip(bounding_boxes[0], bounding_boxes[1]): + bbox_origin = (bbox[0], bbox[1]) + (bbox_width, bbox_height) = (bbox[2], bbox[3]) + rect = patches.Rectangle( + bbox_origin, + bbox_width, + bbox_height, + linewidth=2, + edgecolor=bounding_boxes_color, + facecolor='none', + linestyle="--" if bbox_score < bboxes_pcutoff else "-" + ) + ax.add_patch(rect) + + scale_factor = min(w, h) / 1000 # Normalize scale factor based on image size + + + plot_individual = False + if plot_individual: + # Save individual plots for each animal + for idx_individual in range(num_pred): + # print("plot individual:", idx_individual) + # Create a new figure for each individual + fig_ind, ax_ind = create_minimal_figure() + fig_ind.set_size_inches(w/100, h/100) + ax_ind.set_xlim(0, w) + ax_ind.set_ylim(0, h) + ax_ind.invert_yaxis() + ax_ind.imshow(frame, "gray") + + # Draw bounding box for this individual if available + if bounding_boxes is not None: + bbox = bounding_boxes[0][idx_individual] + bbox_score = bounding_boxes[1][idx_individual] + bbox_origin = (bbox[0], bbox[1]) + (bbox_width, bbox_height) = (bbox[2], bbox[3]) + rect = patches.Rectangle( + bbox_origin, + bbox_width, + bbox_height, + linewidth=2, + edgecolor=bounding_boxes_color, + facecolor='none', + linestyle="--" if bbox_score < bboxes_pcutoff else "-" + ) + ax_ind.add_patch(rect) + + # Reset text positions for each individual + existing_text_positions = [] + + # Plot keypoints for this individual + for idx_keypoint in range(num_keypoints): + if keypoint_vis_mask[idx_keypoint]: + + keypoint_confidence = pred_bodyparts[idx_individual, idx_keypoint, 2] + # print("keypoint_confidence_individual:", keypoint_confidence) + if keypoint_confidence > p_cutoff: + x_kp = pred_bodyparts[idx_individual, idx_keypoint, 0] + y_kp = pred_bodyparts[idx_individual, idx_keypoint, 1] + + ax_ind.plot( + x_kp, + y_kp, + labels[1] if keypoint_confidence > p_cutoff else labels[2], + color=colors(idx_keypoint), + alpha=alpha_value, + markersize=dot_size + ) + + if keypoint_names is not None: + # Calculate and adjust text position + x_text = x_kp - (10 * scale_factor) + y_text = y_kp - (15 * scale_factor) + x_text = min(max(0, x_text), w - 100) + y_text = min(max(0, y_text), h - 10) + + while any(abs(x_text - existing_x) < 50 * scale_factor and abs(y_text - existing_y) < 30 * scale_factor + for existing_x, existing_y in existing_text_positions): + y_text += 5 * scale_factor + if y_text > h - 10: + y_text = y_kp + x_text += 50 * scale_factor + + existing_text_positions.append((x_text, y_text)) + + ax_ind.text( + x_text, + y_text, + keypoint_names[idx_keypoint], + color=colors(idx_keypoint), + alpha=alpha_value, + fontsize=dot_size * 0.8 + ) + + # Plot ground truth for this individual + if gt_bodyparts is not None: + if gt_bodyparts[idx_individual, idx_keypoint, 2] != -1: + ax_ind.plot( + gt_bodyparts[idx_individual, idx_keypoint, 0], + gt_bodyparts[idx_individual, idx_keypoint, 1], + labels[0], + color=colors(idx_keypoint), + alpha=alpha_value, + markersize=dot_size + ) + + # Save individual plot + if num_pred > 1: + # Add index for multi-animal images + output_path = Path(output_dir) / f"{Path(image_path).stem}_animal_{idx_individual}_predictions.png" + else: + # No index needed for single animal + output_path = Path(output_dir) / f"{Path(image_path).stem}_predictions.png" + + plt.savefig( + output_path, + bbox_inches='tight', + pad_inches=0, + transparent=False + ) + plt.close(fig_ind) + + # Original combined plot + # Track existing text positions to avoid overlap + existing_text_positions = [] + + for idx_individual in range(num_pred): + for idx_keypoint in range(num_keypoints): + if pred_bodyparts is not None and keypoint_vis_mask[idx_keypoint]: + # if the keypoint is allowed to be shown and the prediction is reliable + keypoint_confidence = pred_bodyparts[idx_individual, idx_keypoint, 2] + if keypoint_confidence > p_cutoff: + pred_label = labels[1] + else: + pred_label = labels[2] + if keypoint_confidence > p_cutoff: + x_kp = pred_bodyparts[idx_individual, idx_keypoint, 0] + y_kp = pred_bodyparts[idx_individual, idx_keypoint, 1] + + ax.plot( + x_kp, + y_kp, + pred_label, + color=colors(idx_keypoint), + alpha=alpha_value, + markersize=dot_size + ) + + if keypoint_names is not None: + # Calculate initial text position + x_text = x_kp - (10 * scale_factor) + y_text = y_kp - (15 * scale_factor) + + # Ensure text stays within image bounds + x_text = min(max(0, x_text), w - 100) + y_text = min(max(0, y_text), h - 10) + + # ToDo + # dynamic text position; + # Avoid overlapping with existing text + while any(abs(x_text - existing_x) <= 15 * scale_factor and abs(y_text - existing_y) <= 15 * scale_factor + for existing_x, existing_y in existing_text_positions): + y_text += 7.5 * scale_factor + x_text += 4 * scale_factor + # if y_text > h - 10: # If we run out of vertical space + # y_text = pred_bodyparts[idx_individual, idx_keypoint, 1] # Reset to original y + # x_text += 50 * scale_factor # Move text horizontally instead + + # Record this position + existing_text_positions.append((x_text, y_text)) + + ax.text( + x_text, + y_text, + keypoint_names[idx_keypoint], + color=colors(idx_keypoint), + alpha=alpha_value, + fontsize=dot_size * 0.5 + ) + + # plot ground truth + if gt_bodyparts is not None: + if gt_bodyparts[idx_individual, idx_keypoint, 2] != -1: + ax.plot( + gt_bodyparts[idx_individual, idx_keypoint, 0], + gt_bodyparts[idx_individual, idx_keypoint, 1], + labels[0], + color=colors(idx_keypoint), + alpha=alpha_value, + markersize=dot_size*0.5 + ) + if skeleton is not None: + # Draw all valid connections + # plot the skeleton is the skeleton is not None + connection_pairs = [] + + # dynamic_skeleton = skeleton.copy() + # dynamic_skeleton = get_dynamic_skeleton(dynamic_skeleton, pred_bodyparts[idx_individual], p_cutoff) + + dynamic_skeleton = DynamicSkeleton(pred_bodyparts[idx_individual], p_cutoff).get_dynamic_skeleton() + + for [idx1, idx2] in dynamic_skeleton: + # idx1 = idx1 - 1 + # idx2 = idx2 - 1 + # Only add the connection if both keypoints are visible and have confidence above threshold + if (pred_bodyparts[idx_individual, idx1, 2] > p_cutoff and + pred_bodyparts[idx_individual, idx2, 2] > p_cutoff): + connection_pairs.append({ + 'start': (pred_bodyparts[idx_individual, idx1, 0], + pred_bodyparts[idx_individual, idx1, 1]), + 'end': (pred_bodyparts[idx_individual, idx2, 0], + pred_bodyparts[idx_individual, idx2, 1]) + }) + + for connection in connection_pairs: + ax.plot( + [connection['start'][0], connection['end'][0]], + [connection['start'][1], connection['end'][1]], + 'g', # black solid line + alpha=alpha_value * 0.8, # slightly more transparent than points + linewidth=dot_size * 0.1 # scale line width with dot size + ) + + # Save the figure + output_path = Path(output_dir) / f"{Path(image_path).stem}_predictions.png" + # save_labeled_frame(fig, str(image_path), str(output_dir), belongs_to_train=False) + plt.savefig( + output_path, + dpi=200, + bbox_inches='tight', + pad_inches=0, + transparent=False + ) + erase_artists(ax) + plt.close() + + def evaluate_snapshot( cfg: dict, loader: DLCLoader, diff --git a/deeplabcut/pose_estimation_pytorch/data/image.py b/deeplabcut/pose_estimation_pytorch/data/image.py index f62548c2ed..a3fbc8f3a6 100644 --- a/deeplabcut/pose_estimation_pytorch/data/image.py +++ b/deeplabcut/pose_estimation_pytorch/data/image.py @@ -237,6 +237,14 @@ def top_down_crop( image_h, image_w, c = image.shape out_w, out_h = output_size x, y, w, h = bbox + + # Safety check for zero dimensions + # if w <= 0: + # print(f"ERROR: Zero width in bbox {bbox}, using default width=1") + # w = 1 + # if h <= 0: + # print(f"ERROR: Zero height in bbox {bbox}, using default height=1") + # h = 1 cx = x + w / 2 cy = y + h / 2 diff --git a/deeplabcut/pose_estimation_pytorch/data/utils.py b/deeplabcut/pose_estimation_pytorch/data/utils.py index 5abf776f6f..7768754ffd 100644 --- a/deeplabcut/pose_estimation_pytorch/data/utils.py +++ b/deeplabcut/pose_estimation_pytorch/data/utils.py @@ -50,6 +50,11 @@ def bbox_from_keypoints( # we do not estimate bbox on keypoints that have 0 or -1 flag keypoints = np.copy(keypoints) + + # debug + if np.all(keypoints[..., -1] <= 0): + print(f"All keypoints: {keypoints}") + keypoints[keypoints[..., -1] <= 0] = np.nan if len(keypoints.shape) == 2: @@ -72,9 +77,14 @@ def bbox_from_keypoints( ) bboxes[..., 2] = bboxes[..., 2] - bboxes[..., 0] # to width bboxes[..., 3] = bboxes[..., 3] - bboxes[..., 1] # to height + + # debug + if np.array_equal(bboxes[0], np.array([0, 0, 0, 0])): + print(f"All bboxes: {bboxes}") + if squeeze: return bboxes[0] - + return bboxes diff --git a/deeplabcut/pose_estimation_pytorch/runners/train.py b/deeplabcut/pose_estimation_pytorch/runners/train.py index 2bfed99117..80249a65e7 100644 --- a/deeplabcut/pose_estimation_pytorch/runners/train.py +++ b/deeplabcut/pose_estimation_pytorch/runners/train.py @@ -241,7 +241,7 @@ def fit( line_length = max([len(name) for name in epoch_metrics.keys()]) + 2 for name, score in epoch_metrics.items(): logging.info(f" {(name + ':').ljust(line_length)}{score:6.2f}") - + def _epoch( self, loader: torch.utils.data.DataLoader, @@ -273,6 +273,12 @@ def _epoch( epoch_loss = [] loss_metrics = defaultdict(list) for i, batch in enumerate(loader): + # batch: dict_keys(['image', 'image_id', 'path', 'original_size', 'offsets', 'scales', 'annotations']) + # print("batch:", batch.keys()) + # we can get the dataset name from the path; + # path ['xxx/v8_coco/images/mbw_0_cam1_0149.jpg', 'xx/v8_coco/images/mbw_0_cam2_0060.jpg'] + # print("path", batch["path"]) + losses_dict = self.step(batch, mode) if "total_loss" in losses_dict: epoch_loss.append(losses_dict["total_loss"]) @@ -433,7 +439,7 @@ def step( if mode == "train": self.optimizer.zero_grad() - inputs = batch["image"] + inputs = batch["image"] # [batch_size, channels, height, width] inputs = inputs.to(self.device).float() if 'cond_keypoints' in batch['context']: cond_kpts = batch['context']['cond_keypoints'] @@ -445,7 +451,7 @@ def step( underlying_model = self.model.module else: underlying_model = self.model - + # the same structure with outputs target = underlying_model.get_target(outputs, batch["annotations"]) losses_dict = underlying_model.get_loss(outputs, target) if mode == "train": diff --git a/deeplabcut/utils/make_labeled_video.py b/deeplabcut/utils/make_labeled_video.py index a9cdb46825..52f3689788 100644 --- a/deeplabcut/utils/make_labeled_video.py +++ b/deeplabcut/utils/make_labeled_video.py @@ -883,6 +883,9 @@ def proc_video( df, filepath, _, _ = auxiliaryfunctions.load_analyzed_data( destfolder, vname, DLCscorer, filtered, track_method ) + full_data = auxiliaryfunctions.load_video_full_data( + destfolder, vname, DLCscorer + ) metadata = auxiliaryfunctions.load_video_metadata( destfolder, vname, DLCscorer )