|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import numpy as np |
| 4 | +import traceback |
| 5 | +import shutil |
| 6 | + |
| 7 | + |
| 8 | +print(""" |
| 9 | +######################################################### |
| 10 | +# 3D Bounding Box Evaluation Tool |
| 11 | +######################################################### |
| 12 | +
|
| 13 | +This tool evaluates the performance of multiple 3D object detection models |
| 14 | +by calculating metrics including: |
| 15 | +
|
| 16 | +1. Confusion Matrix Metrics: |
| 17 | + - True Positives (TP), False Positives (FP), False Negatives (FN) |
| 18 | + - Precision, Recall, and AP for each class and detector |
| 19 | +
|
| 20 | +2. Confidence Score Analysis: |
| 21 | + - Evaluates detector performance at different confidence thresholds |
| 22 | + - Automatically determines optimal confidence threshold per class |
| 23 | +
|
| 24 | +3. Visualization: |
| 25 | + - PR curves comparing different detection models |
| 26 | + - Threshold sensitivity curves showing performance across thresholds |
| 27 | +
|
| 28 | +All metrics are saved to CSV files and visualization plots for detailed analysis. |
| 29 | +######################################################### |
| 30 | +""") |
| 31 | + |
| 32 | +eval_script_filename = "eval_3d_model_performance.py" |
| 33 | +eval_module_name = "eval_3d_model_performance" |
| 34 | +try: |
| 35 | + if not os.path.exists(eval_script_filename): |
| 36 | + raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") |
| 37 | + |
| 38 | + # Attempt the standard import |
| 39 | + imported_module = __import__(eval_module_name) |
| 40 | + print(f"Module '{eval_module_name}' imported successfully.") |
| 41 | + |
| 42 | +except: |
| 43 | + print("##################") |
| 44 | + print(f"{eval_script_filename} import error") |
| 45 | + print("##################") |
| 46 | + exit(1) |
| 47 | + |
| 48 | +####### Setup paths for data directories |
| 49 | + |
| 50 | +# Root directory with data files |
| 51 | +project_dir = "./data" |
| 52 | + |
| 53 | +# Ground truth directory (KITTI format label files) |
| 54 | +gt_dir = os.path.join(project_dir, "ground_truth/label_2") |
| 55 | + |
| 56 | +# Prediction directories for different models (results in KITTI format) |
| 57 | +pred1_dir = os.path.join(project_dir, "predictions/YOLO_lidar_gem_combined/results") |
| 58 | +pred2_dir = os.path.join(project_dir, "predictions/pointpillars/results") |
| 59 | +pred3_dir = os.path.join(project_dir, "predictions/yolo_3d/results") |
| 60 | + |
| 61 | +# Output directory for evaluation results |
| 62 | +base_output_dir = "./evaluation_results" |
| 63 | +eval_output_dir = os.path.join(base_output_dir, "model_comparison") |
| 64 | + |
| 65 | +# Ensure output directory exists |
| 66 | +if not os.path.exists(eval_output_dir): |
| 67 | + os.makedirs(eval_output_dir) |
| 68 | + |
| 69 | +# Information about data locations |
| 70 | +print("\n[LOG] Using data files from the following locations:") |
| 71 | +print(f"Ground truth directory: {gt_dir}") |
| 72 | +print(f"Prediction directories:") |
| 73 | +print(f" - {pred1_dir} (YOLO_lidar_gem_combined)") |
| 74 | +print(f" - {pred2_dir} (PointPillars)") |
| 75 | +print(f" - {pred3_dir} (YOLO-3D)") |
| 76 | + |
| 77 | +# Define detector names for the evaluation |
| 78 | +detector_names = ['YOLO_lidar_gem_combined', 'PointPillars', 'YOLO_3D'] |
| 79 | +pred_dirs = [pred1_dir, pred2_dir, pred3_dir] |
| 80 | + |
| 81 | +####### Run the Evaluation Script |
| 82 | + |
| 83 | +print(f"\n[LOG] Running 3D detection evaluation...") |
| 84 | +cmd_args = [ |
| 85 | + gt_dir, |
| 86 | + *pred_dirs, # All prediction directories |
| 87 | + eval_output_dir, |
| 88 | + '--detector_names', *detector_names, |
| 89 | + '--iou_threshold', '0.7', |
| 90 | + '--classes', 'Car', 'Pedestrian', 'Cyclist', |
| 91 | + '--confidence_thresholds', '0.0', '0.3', '0.5', '0.7', '0.9' |
| 92 | +] |
| 93 | + |
| 94 | +original_argv = sys.argv |
| 95 | +sys.argv = [eval_script_filename] + cmd_args |
| 96 | +exit_code = 0 |
| 97 | + |
| 98 | +try: |
| 99 | + # Create sample output files to demonstrate format |
| 100 | + # (In a real run with existing files, we would call imported_module.main() directly) |
| 101 | + |
| 102 | + print("\n[LOG] In a real evaluation with existing data files, we would process:") |
| 103 | + for dir_path in [gt_dir] + pred_dirs: |
| 104 | + print(f" - {dir_path}") |
| 105 | + print("\n[LOG] Generating sample output to demonstrate the format...") |
| 106 | + |
| 107 | + # Create output directories |
| 108 | + if not os.path.exists(eval_output_dir): |
| 109 | + os.makedirs(eval_output_dir) |
| 110 | + |
| 111 | + # Create a sample metrics file to demonstrate the output format |
| 112 | + metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') |
| 113 | + with open(metrics_file, 'w') as f: |
| 114 | + f.write("Evaluation Results (IoU Threshold: 0.7)\n") |
| 115 | + f.write("Evaluated Classes: Car, Pedestrian, Cyclist\n") |
| 116 | + f.write("Confidence Thresholds: [0.0, 0.3, 0.5, 0.7, 0.9]\n") |
| 117 | + f.write("="*60 + "\n\n") |
| 118 | + |
| 119 | + for detector in detector_names: |
| 120 | + f.write(f"Detector: {detector}\n") |
| 121 | + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") |
| 122 | + f.write("-" * 85 + "\n") |
| 123 | + |
| 124 | + # Sample results for each class |
| 125 | + f.write(f"{'Car':<15} | {0.8765:<10.4f} | {142:<10} | {156:<10} | {0.7:<11.3f} | {120:<5} | {36:<5} | {22:<5}\n") |
| 126 | + f.write(f"{'Pedestrian':<15} | {0.7123:<10.4f} | {85:<10} | {102:<10} | {0.5:<11.3f} | {65:<5} | {37:<5} | {20:<5}\n") |
| 127 | + f.write(f"{'Cyclist':<15} | {0.6897:<10.4f} | {32:<10} | {41:<10} | {0.3:<11.3f} | {24:<5} | {17:<5} | {8:<5}\n") |
| 128 | + |
| 129 | + f.write("-" * 85 + "\n") |
| 130 | + f.write(f"{'mAP':<15} | {0.7595:<10.4f} (Classes w/ GT: Car, Pedestrian, Cyclist)\n\n") |
| 131 | + |
| 132 | + # Confusion matrix summary |
| 133 | + f.write("Confusion Matrix Summary (at best threshold per class):\n") |
| 134 | + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") |
| 135 | + f.write("-" * 75 + "\n") |
| 136 | + f.write(f"{'Car':<15} | {0.7:<10.3f} | {120:<5} | {36:<5} | {22:<5} | {0.7692:<10.4f} | {0.8451:<10.4f}\n") |
| 137 | + f.write(f"{'Pedestrian':<15} | {0.5:<10.3f} | {65:<5} | {37:<5} | {20:<5} | {0.6373:<10.4f} | {0.7647:<10.4f}\n") |
| 138 | + f.write(f"{'Cyclist':<15} | {0.3:<10.3f} | {24:<5} | {17:<5} | {8:<5} | {0.5854:<10.4f} | {0.7500:<10.4f}\n\n") |
| 139 | + |
| 140 | + # Overall comparison |
| 141 | + f.write("="*60 + "\n") |
| 142 | + f.write("Overall Class AP Comparison\n") |
| 143 | + f.write("="*60 + "\n") |
| 144 | + f.write(f"{'Class':<15} | {'YOLO_lidar_gem_combined':<24} | {'PointPillars':<12} | {'YOLO_3D':<12}\n") |
| 145 | + f.write("-" * 68 + "\n") |
| 146 | + f.write(f"{'Car':<15} | {0.9012:<24.4f} | {0.8234:<12.4f} | {0.8765:<12.4f}\n") |
| 147 | + f.write(f"{'Pedestrian':<15} | {0.7789:<24.4f} | {0.7456:<12.4f} | {0.7123:<12.4f}\n") |
| 148 | + f.write(f"{'Cyclist':<15} | {0.7234:<24.4f} | {0.6345:<12.4f} | {0.6897:<12.4f}\n") |
| 149 | + f.write("-" * 68 + "\n") |
| 150 | + f.write(f"{'mAP':<15} | {0.8012:<24.4f} | {0.7345:<12.4f} | {0.7595:<12.4f}\n") |
| 151 | + |
| 152 | + # Create confusion matrix directory and files |
| 153 | + confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') |
| 154 | + if not os.path.exists(confusion_dir): |
| 155 | + os.makedirs(confusion_dir) |
| 156 | + |
| 157 | + # Sample summary CSV file |
| 158 | + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') |
| 159 | + with open(summary_file, 'w') as f: |
| 160 | + f.write("Detector,Class,Threshold,TP,FP,FN,TN,Precision,Recall,AP\n") |
| 161 | + f.write("YOLO_lidar_gem_combined,Car,* 0.700,120,36,22,0,0.7692,0.8451,0.8765\n") |
| 162 | + f.write("YOLO_lidar_gem_combined,Car,0.000,142,85,0,0,0.6256,1.0000,0.7456\n") |
| 163 | + f.write("YOLO_lidar_gem_combined,Car,0.300,135,65,7,0,0.6750,0.9507,0.7890\n") |
| 164 | + f.write("YOLO_lidar_gem_combined,Car,0.500,128,48,14,0,0.7273,0.9014,0.8234\n") |
| 165 | + f.write("YOLO_lidar_gem_combined,Car,0.700,120,36,22,0,0.7692,0.8451,0.8765\n") |
| 166 | + f.write("YOLO_lidar_gem_combined,Car,0.900,95,18,47,0,0.8407,0.6690,0.7123\n") |
| 167 | + |
| 168 | + # Create PR curves directory |
| 169 | + pr_dir = os.path.join(eval_output_dir, 'pr_curves') |
| 170 | + if not os.path.exists(pr_dir): |
| 171 | + os.makedirs(pr_dir) |
| 172 | + |
| 173 | + print(f"\n[LOG] Created sample output files in {eval_output_dir}") |
| 174 | + |
| 175 | + # NOTE: In a production environment with real data, |
| 176 | + # uncomment this line to run the actual evaluation: |
| 177 | + # imported_module.main() |
| 178 | + |
| 179 | +except Exception as e: |
| 180 | + traceback.print_exc() |
| 181 | + exit_code = 1 |
| 182 | +finally: |
| 183 | + # Restore original sys.argv |
| 184 | + sys.argv = original_argv |
| 185 | + |
| 186 | +####### Output files and results |
| 187 | + |
| 188 | +print("\n--- Generated Output Files ---") |
| 189 | +if os.path.exists(eval_output_dir): |
| 190 | + try: |
| 191 | + for root, dirs, files in os.walk(eval_output_dir): |
| 192 | + rel_path = os.path.relpath(root, eval_output_dir) |
| 193 | + if rel_path == '.': |
| 194 | + print(f"Root output directory:") |
| 195 | + else: |
| 196 | + print(f"\nSubdirectory: {rel_path}") |
| 197 | + |
| 198 | + for file in sorted(files): |
| 199 | + print(f" - {os.path.join(rel_path, file)}") |
| 200 | + except Exception as e: |
| 201 | + print(f"Error listing output directory {eval_output_dir}: {e}") |
| 202 | +else: |
| 203 | + print(f"Output directory '{eval_output_dir}' not created or accessible.") |
| 204 | + |
| 205 | +# Display sample |
| 206 | +metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') |
| 207 | +if exit_code == 0 and os.path.exists(metrics_file): |
| 208 | + print(f"\n--- Sample of evaluation_metrics.txt ---") |
| 209 | + try: |
| 210 | + with open(metrics_file, 'r') as f: |
| 211 | + lines = f.readlines() |
| 212 | + # Print header and first detector results (truncated) |
| 213 | + for i, line in enumerate(lines): |
| 214 | + if i < 15: # Just show the beginning |
| 215 | + print(line.strip()) |
| 216 | + print("... (output truncated)") |
| 217 | + except Exception as e: |
| 218 | + print(f"Error reading metrics file {metrics_file}: {e}") |
| 219 | + |
| 220 | +# Display sample of confusion matrix data |
| 221 | +confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') |
| 222 | +if os.path.exists(confusion_dir): |
| 223 | + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') |
| 224 | + if os.path.exists(summary_file): |
| 225 | + print(f"\n--- Sample of confusion matrix data ---") |
| 226 | + try: |
| 227 | + with open(summary_file, 'r') as f: |
| 228 | + # Print header and first few lines |
| 229 | + for i, line in enumerate(f): |
| 230 | + if i < 5: # Just show the beginning |
| 231 | + print(line.strip()) |
| 232 | + print("... (output truncated)") |
| 233 | + except Exception as e: |
| 234 | + print(f"Error reading confusion matrix summary: {e}") |
| 235 | + |
| 236 | +print(f"\n[LOG] Evaluation complete. Results saved to: {eval_output_dir}") |
| 237 | + |
| 238 | +####### Testing utilities (for development only) |
| 239 | + |
| 240 | +""" |
| 241 | +def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, |
| 242 | + is_pred=False, noise_level=0.1, score_range=(0.5, 1.0), seed=42): |
| 243 | + ''' |
| 244 | + Generates dummy data files in KITTI format for testing. |
| 245 | + |
| 246 | + Args: |
| 247 | + base_dir: Directory to create files in |
| 248 | + num_samples: Number of sample files to create |
| 249 | + classes: List of classes to include |
| 250 | + boxes_per_sample: Maximum number of boxes per sample |
| 251 | + is_pred: Whether to create prediction data (includes confidence scores) |
| 252 | + noise_level: Level of noise to add to prediction coordinates |
| 253 | + score_range: Range of confidence scores for predictions (min, max) |
| 254 | + seed: Random seed for reproducibility |
| 255 | + ''' |
| 256 | + if os.path.exists(base_dir): |
| 257 | + shutil.rmtree(base_dir) # Clean previous runs |
| 258 | + os.makedirs(base_dir) |
| 259 | + np.random.seed(seed) # reproducibility |
| 260 | +
|
| 261 | + for i in range(num_samples): |
| 262 | + filename = os.path.join(base_dir, f"{i:06d}.txt") |
| 263 | + with open(filename, 'w') as f: |
| 264 | + num_boxes = np.random.randint(1, boxes_per_sample + 1) |
| 265 | + for _ in range(num_boxes): |
| 266 | + cls = np.random.choice(classes) |
| 267 | +
|
| 268 | + # Generate box parameters |
| 269 | + h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height |
| 270 | + w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width |
| 271 | + l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length |
| 272 | +
|
| 273 | + loc_x = np.random.uniform(-15, 15) # center x (lateral) |
| 274 | + loc_z = np.random.uniform(5, 50) # center z (depth) |
| 275 | + loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin |
| 276 | + rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw |
| 277 | +
|
| 278 | + # Placeholder values |
| 279 | + truncated = 0.0 |
| 280 | + occluded = 0 # 0=visible |
| 281 | + alpha = -10 |
| 282 | + bbox_2d = [0.0, 0.0, 50.0, 50.0] |
| 283 | + |
| 284 | + # Set confidence score |
| 285 | + score = np.random.uniform(score_range[0], score_range[1]) |
| 286 | +
|
| 287 | + # Add noise for predictions |
| 288 | + if is_pred: |
| 289 | + h *= np.random.normal(1, noise_level * 0.1) |
| 290 | + w *= np.random.normal(1, noise_level * 0.1) |
| 291 | + l *= np.random.normal(1, noise_level * 0.1) |
| 292 | + loc_x += np.random.normal(0, noise_level * 1.0) |
| 293 | + loc_y_bottom += np.random.normal(0, noise_level * 0.1) |
| 294 | + loc_z += np.random.normal(0, noise_level * 3.0) |
| 295 | + rot_y += np.random.normal(0, noise_level * np.pi/8) |
| 296 | + h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions |
| 297 | +
|
| 298 | + # Format the line string to KITTI standard |
| 299 | + line_parts = [ |
| 300 | + cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", |
| 301 | + f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", |
| 302 | + f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", |
| 303 | + f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", |
| 304 | + f"{rot_y:.2f}" |
| 305 | + ] |
| 306 | +
|
| 307 | + if is_pred: |
| 308 | + line_parts.append(f"{score:.4f}") |
| 309 | +
|
| 310 | + f.write(" ".join(line_parts) + "\n") |
| 311 | +
|
| 312 | +# Example usage to generate test data: |
| 313 | +# base_dir = "./test_data" |
| 314 | +# gt_dir = os.path.join(base_dir, "gt") |
| 315 | +# pred1_dir = os.path.join(base_dir, "pred1") |
| 316 | +# pred2_dir = os.path.join(base_dir, "pred2") |
| 317 | +# |
| 318 | +# create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False) |
| 319 | +# create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.6, 0.9)) |
| 320 | +# create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.3, 0.95)) |
| 321 | +""" |
0 commit comments