From ff5338f45c7698ca490c8925bd7a2ea90faaec84 Mon Sep 17 00:00:00 2001 From: Bhavya Mehta <144762266+Bhavya1604@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:20:35 +0530 Subject: [PATCH 1/2] added doublecounting algorithm for review --- DoubleCounting.py | 279 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 DoubleCounting.py diff --git a/DoubleCounting.py b/DoubleCounting.py new file mode 100644 index 000000000..38b6c8059 --- /dev/null +++ b/DoubleCounting.py @@ -0,0 +1,279 @@ +import os +os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' + +import cv2 +import h5py +import numpy as np +import pandas as pd +import torch +import torchvision +import pycolmap +from pathlib import Path +from matplotlib import pyplot +import geopandas as gpd +from shapely.geometry import box + +from deepforest import main +from hloc import extract_features, match_features, pairs_from_exhaustive +from hloc.utils.io import get_matches + + +def get_matching_points(h5_file, image1_name, image2_name, min_score=None): + """Get matching points between two images from an h5 file.""" + matches, scores = get_matches(h5_file, image1_name, image2_name) + if min_score is not None: + matches = matches[scores > min_score] + match_index = pd.DataFrame(matches, columns=["image1", "image2"]) + + features_path = os.path.join(os.path.dirname(h5_file), "features.h5") + with h5py.File(features_path, 'r') as features_h5_f: + keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"]) + keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"]) + points1 = keypoints_image1.iloc[match_index["image1"].values].values + points2 = keypoints_image2.iloc[match_index["image2"].values].values + return points1, points2 + +def compute_homography_matrix(h5_file, image1_name, image2_name): + """Compute the homography matrix between two images.""" + points1, points2 = get_matching_points(h5_file, image1_name, image2_name) + if len(points1) < 4 or len(points2) < 4: + raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}") + + ransac_options = pycolmap.RANSACOptions(max_error=4.0) + report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options) + + if report is None: + raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}") + return report + +def warp_box(xmin, ymin, xmax, ymax, homography): + """Warp a bounding box using a homography matrix.""" + points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32) + reshaped_points = points.reshape(-1, 1, 2) + warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1) + + warped_xmin, warped_ymin = warped_points.min(axis=0) + warped_xmax, warped_ymax = warped_points.max(axis=0) + return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax) + +def align_predictions(predictions, homography_matrix): + """Aligns a DataFrame of predictions using a homography matrix.""" + transformed_predictions = predictions.copy() + for index, row in transformed_predictions.iterrows(): + xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix) + transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax + return transformed_predictions + +def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'): + """Remove overlapping predictions using different strategies.""" + if strategy == "highest-score": + dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True) + boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device) + scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device) + + keep_indices = torchvision.ops.nms(boxes, scores, threshold) + indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()] + + src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)] + dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)] + else: + aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) + dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) + aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry") + dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry') + + joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects') + + if strategy == "left-hand": + src_indices_to_keep = src_predictions.box_id + dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id + elif strategy == "right-hand": + src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id + dst_indices_to_keep = dst_predictions.box_id + else: + raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.") + + src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)] + dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)] + + return src_filtered, dst_filtered + +def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'): + """Given predictions, align and delete overlapping boxes using a specified strategy.""" + image_names = sorted(predictions.image_path.unique()) + if len(image_names) < 2: + return predictions + + predictions["box_id"] = range(len(predictions)) + filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names} + + num_pairs = len(image_names) * (len(image_names) - 1) // 2 + pair_count = 0 + + for i in range(len(image_names)): + for j in range(i + 1, len(image_names)): + src_image_name, dst_image_name = image_names[i], image_names[j] + pair_count += 1 + print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})") + + try: + homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name) + except ValueError as e: + print(f"Skipping pair, could not compute homography: {e}") + continue + + src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name] + + if src_preds.empty or dst_preds.empty: + continue + + aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"]) + + src_filtered, dst_filtered = remove_predictions( + src_predictions=src_preds, + dst_predictions=dst_preds, + aligned_predictions=aligned_src_preds, + threshold=threshold, + device=device, + strategy='left-hand' + ) + + filtered_predictions[src_image_name] = src_filtered + filtered_predictions[dst_image_name] = dst_filtered + + return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id") + +def create_sfm_model(image_dir, output_path, references, overwrite=False): + """Generate SfM feature files needed for matching.""" + feature_conf = extract_features.confs["disk"] + matcher_conf = match_features.confs["disk+lightglue"] + + sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5' + + extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite) + pairs_from_exhaustive.main(sfm_pairs, image_list=references) + match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite) + + +# =============================MAIN FUNCTION==================================== +def unique_predictions_images(image_dir, save_dir, strategy='highest-score', visualization=True): + """ + High-level function to get unique predictions from a directory of overlapping images. + + Args: + image_dir (str): Path to the directory containing input images. + save_dir (str): Path to a directory for saving intermediate SfM files. + strategy (str, optional): The strategy for deduplication. + Options: 'highest-score', 'left-hand', 'right-hand'. Defaults to 'highest-score'. + visualization (bool, optional): If True, shows a plot comparing original and final predictions. Defaults to True. + + Returns: + pandas.DataFrame: A DataFrame containing the final, deduplicated predictions. + """ + # 1. SETUP + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + model = main.deepforest() + model.use_release() + model.to(device) + + # Ensure the save directory exists + os.makedirs(save_dir, exist_ok=True) + + image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))]) + + print(f"Found {len(image_files)} images to process: {image_files}") + if not image_files: + raise FileNotFoundError(f"No images found in directory: {image_dir}") + + # --- 2. PRE-PROCESSING: CREATE SFM FEATURES --- + print("\nStep 1: Creating SfM features...") + create_sfm_model( + image_dir=Path(image_dir), + output_path=Path(save_dir), + references=image_files, + overwrite=True + ) + print("SfM features created.") + + # 3. PREDICTION: GET INITIAL BOXES + print("\nStep 2: Running prediction on all images...") + all_predictions = [] + for image_file in image_files: + print(f"Predicting on: {image_file}") + image_path = os.path.join(image_dir, image_file) + preds = model.predict_image(path=image_path, return_plot=False) + if preds is not None and not preds.empty: + preds["image_path"] = os.path.basename(image_file) + all_predictions.append(preds) + + if not all_predictions: + raise ValueError("No predictions were made on any images. Cannot proceed.") + + predictions = pd.concat(all_predictions, ignore_index=True) + print(f"Found {len(predictions)} total predictions before filtering.") + + # 4. DEDUPLICATION + print("\nStep 3: Resolving overlaps using SfM...") + matching_file = os.path.join(save_dir, "matches.h5") + + final_predictions = align_and_delete( + predictions=predictions, + matching_h5_file=matching_file, + device=device, + strategy=strategy + ) + print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}") + + # 5. VISUALIZATION + if visualization and not final_predictions.empty: + print("\nStep 4: Generating plots...") + num_images = len(image_files) + # Adjust subplot grid to fit all images + cols = int(np.ceil(np.sqrt(num_images))) + rows = int(np.ceil(num_images / cols)) + fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8)) + axs = axs.flatten() + + for i, image_path in enumerate(image_files): + full_image_path = os.path.join(image_dir, image_path) + image = cv2.imread(full_image_path) + + original_image_predictions = predictions[predictions["image_path"] == image_path] + for _, row in original_image_predictions.iterrows(): + cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original + + final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path] + for _, row in final_image_predictions_plot.iterrows(): + cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final + + axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + axs[i].set_title(f"Final predictions for {image_path}") + axs[i].axis('off') + + # Hide any unused subplots + for j in range(i + 1, len(axs)): + axs[j].axis('off') + + pyplot.tight_layout() + pyplot.show() + + return final_predictions + + +if __name__ == "__main__": + + base_path = r"C:\Users\Bhavya\GSoC\Predict_&_delete" + image_directory = os.path.join(base_path, "Gregg1_2") + save_directory = os.path.join(base_path, "Save_dir_new") + + final_results = unique_predictions_images( + image_dir=image_directory, + save_dir=save_directory, + strategy='left-hand', + visualization=True + ) + + print("\nFinal deduplicated predictions DataFrame:") + print(final_results.head()) \ No newline at end of file From e0beed725ad3808756f0a7103be1b83f3b7a89c7 Mon Sep 17 00:00:00 2001 From: Bhavya Mehta <144762266+Bhavya1604@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:25:31 +0530 Subject: [PATCH 2/2] Refactored docs, updated evaluate.py and main.py, added Unique_predictions doc, removed DoubleCounting.py --- DoubleCounting.py | 279 ------------ docs/user_guide/16_prediction.md | 6 + docs/user_guide/17_Unique_predictions.md | 557 +++++++++++++++++++++++ pyproject.toml | 16 + src/deepforest/evaluate.py | 364 +++++++++++++++ src/deepforest/main.py | 119 +++++ 6 files changed, 1062 insertions(+), 279 deletions(-) delete mode 100644 DoubleCounting.py create mode 100644 docs/user_guide/17_Unique_predictions.md diff --git a/DoubleCounting.py b/DoubleCounting.py deleted file mode 100644 index 38b6c8059..000000000 --- a/DoubleCounting.py +++ /dev/null @@ -1,279 +0,0 @@ -import os -os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' - -import cv2 -import h5py -import numpy as np -import pandas as pd -import torch -import torchvision -import pycolmap -from pathlib import Path -from matplotlib import pyplot -import geopandas as gpd -from shapely.geometry import box - -from deepforest import main -from hloc import extract_features, match_features, pairs_from_exhaustive -from hloc.utils.io import get_matches - - -def get_matching_points(h5_file, image1_name, image2_name, min_score=None): - """Get matching points between two images from an h5 file.""" - matches, scores = get_matches(h5_file, image1_name, image2_name) - if min_score is not None: - matches = matches[scores > min_score] - match_index = pd.DataFrame(matches, columns=["image1", "image2"]) - - features_path = os.path.join(os.path.dirname(h5_file), "features.h5") - with h5py.File(features_path, 'r') as features_h5_f: - keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"]) - keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"]) - points1 = keypoints_image1.iloc[match_index["image1"].values].values - points2 = keypoints_image2.iloc[match_index["image2"].values].values - return points1, points2 - -def compute_homography_matrix(h5_file, image1_name, image2_name): - """Compute the homography matrix between two images.""" - points1, points2 = get_matching_points(h5_file, image1_name, image2_name) - if len(points1) < 4 or len(points2) < 4: - raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}") - - ransac_options = pycolmap.RANSACOptions(max_error=4.0) - report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options) - - if report is None: - raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}") - return report - -def warp_box(xmin, ymin, xmax, ymax, homography): - """Warp a bounding box using a homography matrix.""" - points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32) - reshaped_points = points.reshape(-1, 1, 2) - warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1) - - warped_xmin, warped_ymin = warped_points.min(axis=0) - warped_xmax, warped_ymax = warped_points.max(axis=0) - return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax) - -def align_predictions(predictions, homography_matrix): - """Aligns a DataFrame of predictions using a homography matrix.""" - transformed_predictions = predictions.copy() - for index, row in transformed_predictions.iterrows(): - xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix) - transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax - return transformed_predictions - -def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'): - """Remove overlapping predictions using different strategies.""" - if strategy == "highest-score": - dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True) - boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device) - scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device) - - keep_indices = torchvision.ops.nms(boxes, scores, threshold) - indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()] - - src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)] - dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)] - else: - aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) - dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) - aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry") - dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry') - - joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects') - - if strategy == "left-hand": - src_indices_to_keep = src_predictions.box_id - dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id - elif strategy == "right-hand": - src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id - dst_indices_to_keep = dst_predictions.box_id - else: - raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.") - - src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)] - dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)] - - return src_filtered, dst_filtered - -def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'): - """Given predictions, align and delete overlapping boxes using a specified strategy.""" - image_names = sorted(predictions.image_path.unique()) - if len(image_names) < 2: - return predictions - - predictions["box_id"] = range(len(predictions)) - filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names} - - num_pairs = len(image_names) * (len(image_names) - 1) // 2 - pair_count = 0 - - for i in range(len(image_names)): - for j in range(i + 1, len(image_names)): - src_image_name, dst_image_name = image_names[i], image_names[j] - pair_count += 1 - print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})") - - try: - homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name) - except ValueError as e: - print(f"Skipping pair, could not compute homography: {e}") - continue - - src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name] - - if src_preds.empty or dst_preds.empty: - continue - - aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"]) - - src_filtered, dst_filtered = remove_predictions( - src_predictions=src_preds, - dst_predictions=dst_preds, - aligned_predictions=aligned_src_preds, - threshold=threshold, - device=device, - strategy='left-hand' - ) - - filtered_predictions[src_image_name] = src_filtered - filtered_predictions[dst_image_name] = dst_filtered - - return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id") - -def create_sfm_model(image_dir, output_path, references, overwrite=False): - """Generate SfM feature files needed for matching.""" - feature_conf = extract_features.confs["disk"] - matcher_conf = match_features.confs["disk+lightglue"] - - sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5' - - extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite) - pairs_from_exhaustive.main(sfm_pairs, image_list=references) - match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite) - - -# =============================MAIN FUNCTION==================================== -def unique_predictions_images(image_dir, save_dir, strategy='highest-score', visualization=True): - """ - High-level function to get unique predictions from a directory of overlapping images. - - Args: - image_dir (str): Path to the directory containing input images. - save_dir (str): Path to a directory for saving intermediate SfM files. - strategy (str, optional): The strategy for deduplication. - Options: 'highest-score', 'left-hand', 'right-hand'. Defaults to 'highest-score'. - visualization (bool, optional): If True, shows a plot comparing original and final predictions. Defaults to True. - - Returns: - pandas.DataFrame: A DataFrame containing the final, deduplicated predictions. - """ - # 1. SETUP - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print(f"Using device: {device}") - - model = main.deepforest() - model.use_release() - model.to(device) - - # Ensure the save directory exists - os.makedirs(save_dir, exist_ok=True) - - image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))]) - - print(f"Found {len(image_files)} images to process: {image_files}") - if not image_files: - raise FileNotFoundError(f"No images found in directory: {image_dir}") - - # --- 2. PRE-PROCESSING: CREATE SFM FEATURES --- - print("\nStep 1: Creating SfM features...") - create_sfm_model( - image_dir=Path(image_dir), - output_path=Path(save_dir), - references=image_files, - overwrite=True - ) - print("SfM features created.") - - # 3. PREDICTION: GET INITIAL BOXES - print("\nStep 2: Running prediction on all images...") - all_predictions = [] - for image_file in image_files: - print(f"Predicting on: {image_file}") - image_path = os.path.join(image_dir, image_file) - preds = model.predict_image(path=image_path, return_plot=False) - if preds is not None and not preds.empty: - preds["image_path"] = os.path.basename(image_file) - all_predictions.append(preds) - - if not all_predictions: - raise ValueError("No predictions were made on any images. Cannot proceed.") - - predictions = pd.concat(all_predictions, ignore_index=True) - print(f"Found {len(predictions)} total predictions before filtering.") - - # 4. DEDUPLICATION - print("\nStep 3: Resolving overlaps using SfM...") - matching_file = os.path.join(save_dir, "matches.h5") - - final_predictions = align_and_delete( - predictions=predictions, - matching_h5_file=matching_file, - device=device, - strategy=strategy - ) - print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}") - - # 5. VISUALIZATION - if visualization and not final_predictions.empty: - print("\nStep 4: Generating plots...") - num_images = len(image_files) - # Adjust subplot grid to fit all images - cols = int(np.ceil(np.sqrt(num_images))) - rows = int(np.ceil(num_images / cols)) - fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8)) - axs = axs.flatten() - - for i, image_path in enumerate(image_files): - full_image_path = os.path.join(image_dir, image_path) - image = cv2.imread(full_image_path) - - original_image_predictions = predictions[predictions["image_path"] == image_path] - for _, row in original_image_predictions.iterrows(): - cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original - - final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path] - for _, row in final_image_predictions_plot.iterrows(): - cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final - - axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) - axs[i].set_title(f"Final predictions for {image_path}") - axs[i].axis('off') - - # Hide any unused subplots - for j in range(i + 1, len(axs)): - axs[j].axis('off') - - pyplot.tight_layout() - pyplot.show() - - return final_predictions - - -if __name__ == "__main__": - - base_path = r"C:\Users\Bhavya\GSoC\Predict_&_delete" - image_directory = os.path.join(base_path, "Gregg1_2") - save_directory = os.path.join(base_path, "Save_dir_new") - - final_results = unique_predictions_images( - image_dir=image_directory, - save_dir=save_directory, - strategy='left-hand', - visualization=True - ) - - print("\nFinal deduplicated predictions DataFrame:") - print(final_results.head()) \ No newline at end of file diff --git a/docs/user_guide/16_prediction.md b/docs/user_guide/16_prediction.md index fc15038bf..76038a025 100644 --- a/docs/user_guide/16_prediction.md +++ b/docs/user_guide/16_prediction.md @@ -12,6 +12,12 @@ There are atleast four ways to make predictions with DeepForest. In general, during inference, for large images it is most common to use predict_tile. +```{tip} +Working with overlapping surveys and want to remove duplicate detections across images? +See the new guide: [Unique Predictions (Double-Counting Removal)](./17_Unique_predictions.md). +It covers the SfM-based pipeline, extra installation, and best practices. +``` + ## Predict an image using the command line We provide a basic utility script to run a prediction task with the ability to save and/or plot outputs. This command is called `deepforest predict` and is included as part of the standard installation. You can run the command without any arguments, or the `--help` flag to check that it's available. The script will run in tiled prediction mode by default. diff --git a/docs/user_guide/17_Unique_predictions.md b/docs/user_guide/17_Unique_predictions.md new file mode 100644 index 000000000..4224bd636 --- /dev/null +++ b/docs/user_guide/17_Unique_predictions.md @@ -0,0 +1,557 @@ +# Unique Predictions: Double-Counting Removal + +This guide covers DeepForest's advanced double-counting removal capabilities for overlapping imagery scenarios. The algorithm uses Structure-from-Motion (SfM) techniques to resolve duplicate detections across multiple images, ensuring accurate object counts in drone surveys. + +## Overview + +When analyzing overlapping aerial imagery (common in drone surveys), the same objects (e.g., tree crowns) often appear in multiple images, leading to inflated counts. Traditional approaches like simple IoU-based Non-Maximum Suppression fail because they don't account for the geometric relationship between images. + +DeepForest's double-counting removal algorithm solves this by: + +1. **Feature Extraction**: Creating SfM features for all images using DISK+LightGlue +2. **Geometric Alignment**: Computing homography matrices between image pairs +3. **Prediction Alignment**: Transforming predictions between coordinate systems +4. **Overlap Resolution**: Removing duplicates using configurable strategies +5. **Visualization**: Optional before/after comparison plots + +## Use Cases + +- **Drone Surveys**: Overlapping flight paths with redundant tree crown detections +- **Multi-Angle Detection**: Same objects viewed from different perspectives +- **Forest Inventory**: Accurate tree counts across overlapping imagery +- **Ecological Monitoring**: Precise wildlife or vegetation counts + +## Quick Start + +First, install DeepForest with the double-counting dependencies: + +```bash +pip install deepforest[double_counting] +``` + +Then use the double-counting functionality: + +```python +from deepforest import main + +# Initialize model +model = main.deepforest() +model.use_release() + +# Run double-counting removal +results = model.predict_unique( + image_dir="/path/to/overlapping/images", + save_dir="/path/to/sfm_output", + strategy='highest-score', + visualization=True +) + +print(f"Found {len(results)} unique tree crowns") +``` + +## Algorithm Details + +### Feature Extraction & Matching + +The algorithm uses DISK+LightGlue for robust feature extraction and matching: + +- **DISK descriptors**: Scale-invariant features effective for aerial imagery +- **LightGlue matcher**: High-quality correspondences with viewpoint robustness +- **Exhaustive pairing**: All possible image pairs are considered for matching + +### Homography Estimation + +For each image pair, a perspective transformation matrix is computed: + +- **RANSAC algorithm**: Robust estimation in presence of outliers +- **Minimum 4 points**: Required for reliable homography computation +- **Error threshold**: 4.0 pixels (tunable for different image types) + +### Prediction Alignment + +Bounding box predictions are transformed between coordinate systems: + +- **Corner transformation**: All four corners of each bounding box are transformed +- **Axis-aligned bounding**: Transformed quadrilaterals are converted to rectangles +- **Coordinate mapping**: Predictions aligned to common reference frame + +### Overlap Resolution Strategies + +Three strategies are available for removing overlapping detections: + +#### 1. Highest-Score Strategy (`'highest-score'`) + +**Recommended for most use cases** + +- Uses Non-Maximum Suppression (NMS) based on confidence scores +- Keeps predictions with highest confidence, removes lower-scoring overlaps +- Most sophisticated approach considering both spatial overlap and confidence +- Requires GPU for optimal performance + +```python +results = model.predict_unique( + image_dir="images/", + save_dir="output/", + strategy='highest-score' # Default +) +``` + +#### 2. Left-Hand Strategy (`'left-hand'`) + +- Keeps all predictions from the first image +- Removes overlapping predictions from the second image +- Simple but may not be optimal for all scenarios +- Faster than highest-score strategy + +```python +results = model.predict_unique( + image_dir="images/", + save_dir="output/", + strategy='left-hand' +) +``` + +#### 3. Right-Hand Strategy (`'right-hand'`) + +- Keeps all predictions from the second image +- Removes overlapping predictions from the first image +- Simple but may not be optimal for all scenarios +- Faster than highest-score strategy + +```python +results = model.predict_unique( + image_dir="images/", + save_dir="output/", + strategy='right-hand' +) +``` + +## Requirements + +### Image Requirements + +- **Overlap**: Images must have sufficient overlap (>30% recommended) +- **Viewpoints**: Similar viewpoints for reliable homography estimation +- **Quality**: Clear images with distinctive features for matching +- **Formats**: Supported formats include .tif, .png, .jpg + +### System Requirements + +- **Memory**: Scales with number of predictions per image +- **GPU**: Recommended for highest-score strategy (CUDA-compatible) +- **Storage**: Space for SfM feature files (typically 10-50MB per image) + +### Dependencies + +The double-counting functionality requires additional dependencies that are not included in the base DeepForest installation. Install them using: + +```bash +pip install deepforest[double_counting] +``` + +This installs, among others: +- `opencv-python==4.11.0.86`: OpenCV (GUI wheel) used by the double-counting pipeline +- `pycolmap>=0.6.0`: For robust homography estimation +- `hloc>=1.4.0`: For feature extraction and matching +- `kornia>=0.7.0` and `kornia-feature>=0.7.0`: For feature ops/matching helpers + +Important: Do not install `opencv-python` and `opencv-python-headless` in the same environment. + +Alternatively, you can install the dependencies manually: + +```bash +pip install opencv-python==4.11.0.86 pycolmap hloc kornia kornia-feature +``` + +## API Reference + +### `predict_unique()` + +High-level function for double-counting removal across multiple images. + +```python +def predict_unique(image_dir, save_dir, strategy='highest-score', visualization=True): + """ + Get unique predictions from overlapping images. + + Args: + image_dir (str): Path to directory containing input images + save_dir (str): Path for saving intermediate SfM files + strategy (str): Deduplication strategy ('highest-score', 'left-hand', 'right-hand') + visualization (bool): Show before/after comparison plots + + Returns: + pandas.DataFrame: Deduplicated predictions with columns: + ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path'] + """ +``` + +### Core Functions + +#### `create_sfm_model()` + +Generate SfM feature files needed for geometric matching. + +```python +from deepforest.evaluate import create_sfm_model + +create_sfm_model( + image_dir=Path("images/"), + output_path=Path("sfm_output/"), + references=["image1.tif", "image2.tif"], + overwrite=True +) +``` + +#### `align_and_delete()` + +Main function for removing double-counting across image pairs. + +```python +from deepforest.evaluate import align_and_delete + +final_predictions = align_and_delete( + matching_h5_file="matches.h5", + predictions=initial_predictions, + device=device, + threshold=0.325, + strategy='highest-score' +) +``` + +## Performance Considerations + +### Computational Complexity + +- **Time Complexity**: O(N²) where N is the number of images +- **Pair Processing**: N*(N-1)/2 image pairs must be processed +- **Feature Extraction**: Most time-consuming step (scales with image size) +- **GPU Acceleration**: Available for NMS operations + +### Memory Usage + +- **Scales with predictions**: More predictions per image = more memory +- **SfM Features**: Feature files require additional storage +- **Batch Processing**: Consider processing large datasets in batches + +### Optimization Tips + +1. **Use GPU**: Enable CUDA for highest-score strategy +2. **Batch Processing**: Process large datasets in smaller batches +3. **Image Resolution**: Consider downsampling very large images +4. **Strategy Selection**: Use simpler strategies for faster processing + +## Troubleshooting + +### Common Issues + +#### Insufficient Matching Points + +**Error**: `"Not enough matching points (<4) found between images"` + +**Solutions**: +- Ensure images have sufficient overlap (>30%) +- Check image quality and clarity +- Verify images are from similar viewpoints +- Consider adjusting RANSAC parameters + +#### Homography Estimation Failure + +**Error**: `"Homography matrix estimation failed"` + +**Solutions**: +- Increase image overlap +- Improve image quality +- Check for motion blur or distortion +- Verify feature extraction succeeded + +#### Memory Issues + +**Symptoms**: Out of memory errors during processing + +**Solutions**: +- Reduce batch size +- Use CPU instead of GPU for NMS +- Process fewer images at once +- Consider image downsampling + +### Performance Optimization + +#### For Large Datasets + +```python +# Process in batches +image_batches = [images[i:i+5] for i in range(0, len(images), 5)] + +for batch in image_batches: + results = model.predict_unique( + image_dir=f"batch_{batch_idx}/", + save_dir=f"output_{batch_idx}/", + strategy='highest-score' + ) +``` + +#### For Speed-Critical Applications + +```python +# Use faster strategy +results = model.predict_unique( + image_dir="images/", + save_dir="output/", + strategy='left-hand', # Faster than highest-score + visualization=False # Skip visualization +) +``` + +## Examples + +### Basic Usage + +```python +# First install: pip install deepforest[double_counting] +from deepforest import main + +# Initialize model +model = main.deepforest() +model.use_release() + +# Process overlapping images +results = model.predict_unique( + image_dir="drone_survey/images/", + save_dir="drone_survey/sfm_output/", + strategy='highest-score', + visualization=True +) + +# Save results +results.to_csv("unique_predictions.csv", index=False) +print(f"Processed {len(results)} unique tree crowns") +``` + +### Advanced Configuration + +```python +# First install: pip install deepforest[double_counting] +import torch +from deepforest import main +from deepforest.evaluate import create_sfm_model, align_and_delete + +# Custom processing pipeline +model = main.deepforest() +model.use_release() + +# Create SfM features with custom settings +create_sfm_model( + image_dir=Path("images/"), + output_path=Path("sfm_output/"), + references=["img1.tif", "img2.tif", "img3.tif"], + overwrite=True +) + +# Run detection on individual images +all_predictions = [] +for image_file in ["img1.tif", "img2.tif", "img3.tif"]: + preds = model.predict_image(path=f"images/{image_file}") + preds["image_path"] = image_file + all_predictions.append(preds) + +predictions = pd.concat(all_predictions) + +# Custom double-counting removal +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +final_results = align_and_delete( + matching_h5_file="sfm_output/matches.h5", + predictions=predictions, + device=device, + threshold=0.4, # Custom IoU threshold + strategy='highest-score' +) +``` + +### Batch Processing + +```python +import os +from deepforest import main +import shutil +import pandas as pd + +def process_image_batch(image_dir, batch_size=5): + """Process images in batches to manage memory usage.""" + model = main.deepforest() + model.use_release() + + image_files = [f for f in os.listdir(image_dir) + if f.lower().endswith(('.tif', '.png', '.jpg'))] + + # Process in batches + batches = [image_files[i:i+batch_size] + for i in range(0, len(image_files), batch_size)] + + all_results = [] + + for i, batch in enumerate(batches): + print(f"Processing batch {i+1}/{len(batches)}") + + # Create temporary directory for batch + batch_dir = f"batch_{i}" + os.makedirs(batch_dir, exist_ok=True) + + # Copy batch images + for img_file in batch: + shutil.copy(f"{image_dir}/{img_file}", f"{batch_dir}/{img_file}") + + # Process batch + results = model.predict_unique( + image_dir=batch_dir, + save_dir=f"sfm_batch_{i}", + strategy='highest-score', + visualization=False + ) + + all_results.append(results) + + # Cleanup + shutil.rmtree(batch_dir) + + return pd.concat(all_results, ignore_index=True) + +# Usage +results = process_image_batch("large_dataset/", batch_size=3) +``` + +## Best Practices + +### Image Preparation + +1. **Ensure Sufficient Overlap**: Aim for >30% overlap between adjacent images +2. **Maintain Consistent Viewpoints**: Similar camera angles improve matching +3. **Optimize Image Quality**: Clear, well-lit images with distinctive features +4. **Consider Resolution**: Balance between detail and processing speed + +### Strategy Selection + +- **Highest-Score**: Best accuracy, requires GPU, slower processing +- **Left/Right-Hand**: Faster processing, may be less accurate +- **Consider Use Case**: Choose based on accuracy vs. speed requirements + +### Performance Optimization + +1. **Use GPU**: Enable CUDA for NMS operations +2. **Batch Processing**: Process large datasets in manageable chunks +3. **Memory Management**: Monitor memory usage, adjust batch sizes +4. **Storage Planning**: Allocate sufficient space for SfM files + +## Integration with Existing Workflows + +### With DeepForest Training + +```python +# Train model on non-overlapping data +model = main.deepforest() +model.train( + csv_file="training_data.csv", + root_dir="training_images/" +) + +# Apply to overlapping survey data +survey_results = model.predict_unique( + image_dir="survey_images/", + save_dir="survey_sfm/", + strategy='highest-score' +) +``` + +### With Evaluation Metrics + +```python +# Evaluate unique predictions +evaluation_results = model.evaluate( + csv_file="ground_truth.csv", + root_dir="ground_truth_images/", + predictions=survey_results +) + +print(f"Precision: {evaluation_results['box_precision']:.3f}") +print(f"Recall: {evaluation_results['box_recall']:.3f}") +``` + +## Conclusion + +DeepForest's double-counting removal algorithm provides a robust solution for accurate object counting in overlapping imagery scenarios. By leveraging SfM techniques and configurable strategies, users can achieve precise results while balancing accuracy and computational efficiency. + +For additional support or questions about the double-counting removal functionality, please refer to the DeepForest documentation or community forums. + +## Using only the double-counting tools (standalone) + +If you only need the SfM-based de-duplication (without running DeepForest inference), you can use the building blocks in `deepforest.evaluate` directly: `create_sfm_model()` and `align_and_delete()`. In that case, you must supply your own predictions DataFrame with columns `['xmin','ymin','xmax','ymax','score','label','image_path']`. + +### Why a separate environment is recommended + +- Different OpenCV wheels conflict (GUI `opencv-python` vs `opencv-python-headless`). The repo uses the headless build; mixing both breaks imports. +- Torch/TorchVision/CUDA builds must be matched; installing other CV stacks may overwrite them. +- Geo stack (`geopandas`, `shapely`) often pulls native libs; isolating avoids version pin clashes. + +Create a fresh environment to avoid these conflicts. + +### Minimal standalone environment (no DeepForest inference) + +Recommended with conda-forge (for clean Geo/CV binaries): + +```bash +conda create -n df-doublecounting -c conda-forge python=3.11 +conda activate df-doublecounting + +# Core numeric/geo/cv +pip install opencv-python==4.11.0.86 numpy pandas shapely geopandas h5py matplotlib + +# Torch + vision (pick the right CUDA/CPU build for your system if needed) +pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu + +# SfM and features +pip install pycolmap hloc kornia kornia-feature +``` + +Now you can run: + +```python +from pathlib import Path +import pandas as pd +from deepforest.evaluate import create_sfm_model, align_and_delete +import torch + +# 1) Build SfM artifacts once +create_sfm_model( + image_dir=Path("/path/to/images"), + output_path=Path("/path/to/sfm_output"), + references=sorted(["img1.tif","img2.tif","img3.tif"]), + overwrite=True, +) + +# 2) Load your own predictions (must include image_path per row) +predictions = pd.read_csv("/path/to/predictions.csv") + +# 3) De-duplicate +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +final_predictions = align_and_delete( + matching_h5_file=str(Path("/path/to/sfm_output")/"matches.h5"), + predictions=predictions, + device=device, + threshold=0.325, + strategy="highest-score", +) +``` + +### Using only the extras without DeepForest + +If you still prefer pip extras management but won’t use DeepForest inference, you can install just the extra group and your own stack: + +```bash +pip install deepforest[double_counting] +# Add your preferred torch build afterwards (to avoid pip picking an unintended one) +pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu +``` + +Notes: +- Ensure only one OpenCV wheel is present in the env. For double-counting, use `opencv-python==4.11.0.86` and avoid installing `opencv-python-headless` concurrently. If you hit conflicts, uninstall all OpenCV wheels and reinstall the one you need. + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ff61074f8..643f1e78f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,6 +100,22 @@ docs = [ "sphinx_rtd_theme", ] +double_counting = [ + "opencv-python==4.11.0.86", + "numpy<2.0", + "pandas", + "matplotlib", + "h5py", + "shapely>2.0.0", + "geopandas", + "torch>=2.2.0", + "torchvision>=0.17.0", + "pycolmap>=0.6.0", + "hloc>=1.4.0", + "kornia>=0.7.0", + "kornia-feature>=0.7.0", +] + [project.scripts] deepforest = "deepforest.scripts.cli:main" diff --git a/src/deepforest/evaluate.py b/src/deepforest/evaluate.py index d4f76a9af..5a84092d3 100644 --- a/src/deepforest/evaluate.py +++ b/src/deepforest/evaluate.py @@ -1,14 +1,24 @@ """Evaluation module.""" +import os import warnings +import cv2 import geopandas as gpd +import h5py import numpy as np import pandas as pd +import pycolmap import shapely +import torch +import torchvision +from shapely.geometry import box from deepforest import IoU from deepforest.utilities import determine_geometry_type +from kornia.feature import extract_features, match_features +from kornia.feature.matching import pairs_from_exhaustive +from pycolmap import get_matches def evaluate_image_boxes(predictions, ground_df): @@ -398,3 +408,357 @@ def point_recall(predictions, ground_df): class_recall = compute_class_recall(matched_results) return {"results": results, "box_recall": box_recall, "class_recall": class_recall} + +def get_matching_points(h5_file, image1_name, image2_name, min_score=None): + """ + Extract matching feature points between two images from SfM feature files. + + This function retrieves corresponding feature points between two images that were + previously extracted and matched using Structure-from-Motion techniques. The matching + points are essential for computing geometric transformations between images. + + Args: + h5_file (str): Path to the HDF5 file containing feature matches + image1_name (str): Name of the first image (as stored in the feature files) + image2_name (str): Name of the second image (as stored in the feature files) + min_score (float, optional): Minimum matching score threshold for filtering + low-quality matches. If None, all matches are returned. + + Returns: + tuple: (points1, points2) where each is a numpy array of shape (N, 2) + containing the (x, y) coordinates of matching points in each image + + Note: + This function requires that SfM features have been previously extracted + using create_sfm_model() or similar functionality. + """ + matches, scores = get_matches(h5_file, image1_name, image2_name) + if min_score is not None: + matches = matches[scores > min_score] + match_index = pd.DataFrame(matches, columns=["image1", "image2"]) + + features_path = os.path.join(os.path.dirname(h5_file), "features.h5") + with h5py.File(features_path, 'r') as features_h5_f: + keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"]) + keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"]) + points1 = keypoints_image1.iloc[match_index["image1"].values].values + points2 = keypoints_image2.iloc[match_index["image2"].values].values + return points1, points2 + +def compute_homography_matrix(h5_file, image1_name, image2_name): + """ + Compute the homography matrix between two images using RANSAC. + + A homography matrix is a 3x3 transformation matrix that maps points from one + image plane to another. This is essential for aligning predictions between + overlapping images in the double-counting removal algorithm. + + The function uses RANSAC (Random Sample Consensus) to robustly estimate the + homography matrix even in the presence of outliers and noise in the feature matches. + + Args: + h5_file (str): Path to the HDF5 file containing feature matches + image1_name (str): Name of the first image + image2_name (str): Name of the second image + + Returns: + dict: A report dictionary containing: + - 'H': The 3x3 homography matrix (numpy array) + - 'inliers': Boolean array indicating which matches are inliers + - 'num_inliers': Number of inlier matches used + - Additional RANSAC statistics + + Raises: + ValueError: If fewer than 4 matching points are found between the images + ValueError: If homography matrix estimation fails (insufficient inliers) + + Note: + The RANSAC parameters are set to max_error=4.0 pixels, which works well + for most aerial imagery scenarios. For different image types, these parameters + may need adjustment. + """ + points1, points2 = get_matching_points(h5_file, image1_name, image2_name) + if len(points1) < 4 or len(points2) < 4: + raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}") + + ransac_options = pycolmap.RANSACOptions(max_error=4.0) + report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options) + + if report is None: + raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}") + return report + +def warp_box(xmin, ymin, xmax, ymax, homography): + """ + Transform a bounding box using a homography matrix. + + This function applies a perspective transformation to a bounding box by transforming + its four corner points and then computing the axis-aligned bounding box of the + transformed corners. This is used to align predictions from one image coordinate + system to another. + + Args: + xmin (float): Left coordinate of the bounding box + ymin (float): Top coordinate of the bounding box + xmax (float): Right coordinate of the bounding box + ymax (float): Bottom coordinate of the bounding box + homography (numpy.ndarray): 3x3 homography matrix + + Returns: + tuple: (warped_xmin, warped_ymin, warped_xmax, warped_ymax) - the transformed + bounding box coordinates as integers + + Note: + The transformation may result in non-rectangular shapes, so the function + computes the axis-aligned bounding box of the transformed quadrilateral. + This can lead to slight expansion of the bounding box area. + """ + points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32) + reshaped_points = points.reshape(-1, 1, 2) + warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1) + + warped_xmin, warped_ymin = warped_points.min(axis=0) + warped_xmax, warped_ymax = warped_points.max(axis=0) + return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax) + +def align_predictions(predictions, homography_matrix): + """ + Transform all bounding box predictions using a homography matrix. + + This function applies geometric transformation to align predictions from one image + coordinate system to another. It's a key step in the double-counting removal process, + allowing comparison of predictions between overlapping images. + + Args: + predictions (pandas.DataFrame): DataFrame containing bounding box predictions + with columns ['xmin', 'ymin', 'xmax', 'ymax', ...] + homography_matrix (numpy.ndarray): 3x3 homography matrix for coordinate transformation + + Returns: + pandas.DataFrame: A copy of the input DataFrame with transformed bounding box + coordinates. All other columns remain unchanged. + + Note: + The function creates a copy of the input DataFrame to avoid modifying the original. + The transformation is applied row-wise to each bounding box prediction. + """ + transformed_predictions = predictions.copy() + for index, row in transformed_predictions.iterrows(): + xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix) + transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax + return transformed_predictions + +def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'): + """ + Remove overlapping predictions between two images using specified strategies. + + This function implements the core logic for resolving double-counting by removing + overlapping detections between aligned image pairs. Three different strategies are + available, each with different trade-offs between accuracy and computational cost. + + **Strategies:** + + 1. **'highest-score'**: Uses Non-Maximum Suppression (NMS) to keep predictions with + the highest confidence scores. This is the most sophisticated approach that considers + both spatial overlap and prediction confidence. + + 2. **'left-hand'**: Keeps all predictions from the source image and removes overlapping + predictions from the destination image. Simple but may not be optimal. + + 3. **'right-hand'**: Keeps all predictions from the destination image and removes + overlapping predictions from the source image. Simple but may not be optimal. + + Args: + src_predictions (pandas.DataFrame): Predictions from the source image + dst_predictions (pandas.DataFrame): Predictions from the destination image + aligned_predictions (pandas.DataFrame): Source predictions transformed to destination + coordinate system using homography matrix + threshold (float): IoU threshold for determining overlap (0.0 to 1.0) + device (torch.device): PyTorch device for tensor operations (CPU or GPU) + strategy (str): Strategy for removing overlaps. Options: 'highest-score', + 'left-hand', 'right-hand'. Defaults to 'highest-score'. + + Returns: + tuple: (src_filtered, dst_filtered) - DataFrames containing the filtered + predictions for source and destination images respectively + + Raises: + ValueError: If an unknown strategy is specified + + Note: + For the 'highest-score' strategy, the function uses PyTorch's NMS implementation + which requires the predictions to have a 'score' column. For geometric strategies, + the function uses GeoPandas spatial operations which require 'box_id' columns. + """ + if strategy == "highest-score": + dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True) + boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device) + scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device) + + keep_indices = torchvision.ops.nms(boxes, scores, threshold) + indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()] + + src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)] + dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)] + else: + aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) + dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1) + aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry") + dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry') + + joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects') + + if strategy == "left-hand": + src_indices_to_keep = src_predictions.box_id + dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id + elif strategy == "right-hand": + src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id + dst_indices_to_keep = dst_predictions.box_id + else: + raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.") + + src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)] + dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)] + + return src_filtered, dst_filtered + +def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'): + """ + Main function for removing double-counting across multiple overlapping images. + + This function implements the complete double-counting removal pipeline by processing + all pairs of images in the dataset. For each pair, it computes geometric alignment + and removes overlapping predictions using the specified strategy. + + **Algorithm Overview:** + + 1. **Pairwise Processing**: Processes all unique pairs of images (N*(N-1)/2 pairs) + 2. **Homography Estimation**: Computes transformation matrix between each image pair + 3. **Prediction Alignment**: Transforms predictions from source to destination coordinates + 4. **Overlap Resolution**: Removes overlapping detections using chosen strategy + 5. **Iterative Refinement**: Updates predictions after each pair processing + + **Performance Considerations:** + + - Computational complexity: O(N²) where N is the number of images + - Memory usage scales with the number of predictions per image + - GPU acceleration is used for NMS operations when available + + Args: + matching_h5_file (str): Path to HDF5 file containing feature matches between images + predictions (pandas.DataFrame): Initial predictions from all images with columns: + ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path'] + device (torch.device): PyTorch device for tensor operations + threshold (float, optional): IoU threshold for overlap detection. Defaults to 0.325 + strategy (str, optional): Strategy for removing overlaps. Options: 'highest-score', + 'left-hand', 'right-hand'. Defaults to 'highest-score' + + Returns: + pandas.DataFrame: Final predictions with double-counting removed. Contains the same + columns as input plus a 'box_id' column for tracking. + + Raises: + ValueError: If fewer than 2 images are provided + ValueError: If homography computation fails for image pairs + + Note: + The function processes images in sorted order by name for consistent results. + Images that cannot be geometrically aligned are skipped with a warning. + The threshold parameter should be tuned based on the expected overlap between images. + """ + image_names = sorted(predictions.image_path.unique()) + if len(image_names) < 2: + return predictions + + predictions["box_id"] = range(len(predictions)) + filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names} + + num_pairs = len(image_names) * (len(image_names) - 1) // 2 + pair_count = 0 + + for i in range(len(image_names)): + for j in range(i + 1, len(image_names)): + src_image_name, dst_image_name = image_names[i], image_names[j] + pair_count += 1 + print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})") + + try: + homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name) + except ValueError as e: + print(f"Skipping pair, could not compute homography: {e}") + continue + + src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name] + + if src_preds.empty or dst_preds.empty: + continue + + aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"]) + + src_filtered, dst_filtered = remove_predictions( + src_predictions=src_preds, + dst_predictions=dst_preds, + aligned_predictions=aligned_src_preds, + threshold=threshold, + device=device, + strategy='left-hand' + ) + + filtered_predictions[src_image_name] = src_filtered + filtered_predictions[dst_image_name] = dst_filtered + + return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id") + +def create_sfm_model(image_dir, output_path, references, overwrite=False): + """ + Generate Structure-from-Motion (SfM) feature files needed for geometric matching. + + This function creates the essential SfM infrastructure required for the double-counting + removal algorithm. It extracts distinctive features from images and establishes matches + between overlapping image pairs using state-of-the-art computer vision techniques. + + **Process Overview:** + + 1. **Feature Extraction**: Uses DISK descriptors to extract robust, scale-invariant + features from each image. DISK is particularly effective for aerial imagery. + + 2. **Pair Generation**: Creates exhaustive pairs between all images for matching. + This ensures all possible overlaps are considered. + + 3. **Feature Matching**: Uses LightGlue matcher to establish correspondences between + feature points in overlapping image pairs. LightGlue provides high-quality matches + with good robustness to viewpoint changes. + + **Output Files:** + + - `features.h5`: Contains extracted feature points and descriptors for each image + - `matches.h5`: Contains feature matches between all image pairs + - `pairs-sfm.txt`: Lists all image pairs to be matched + + Args: + image_dir (pathlib.Path): Directory containing input images + output_path (pathlib.Path): Directory where SfM files will be saved + references (list): List of image filenames to process + overwrite (bool, optional): If True, overwrite existing SfM files. + If False, skip processing if files already exist. Defaults to False. + + Raises: + FileNotFoundError: If image files cannot be found + RuntimeError: If feature extraction or matching fails + + Note: + This function requires the hloc library and can be computationally intensive + for large images or many images. Processing time scales roughly quadratically + with the number of images due to exhaustive pairwise matching. + + The DISK+LightGlue combination is chosen for its effectiveness on aerial imagery + and robustness to illumination changes, which are common in drone surveys. + """ + feature_conf = extract_features.confs["disk"] + matcher_conf = match_features.confs["disk+lightglue"] + + sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5' + + extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite) + pairs_from_exhaustive.main(sfm_pairs, image_list=references) + match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite) \ No newline at end of file diff --git a/src/deepforest/main.py b/src/deepforest/main.py index 3d53375cf..f98a24a59 100644 --- a/src/deepforest/main.py +++ b/src/deepforest/main.py @@ -2,8 +2,11 @@ import importlib import os import warnings +from pathlib import Path +import cv2 import geopandas as gpd +import matplotlib.pyplot as pyplot import numpy as np import pandas as pd import pytorch_lightning as pl @@ -19,6 +22,7 @@ from deepforest import evaluate as evaluate_iou from deepforest import predict, utilities from deepforest.datasets import prediction, training +from evaluate import create_sfm_model, align_and_delete class deepforest(pl.LightningModule): @@ -1071,3 +1075,118 @@ def __evaluation_logs__(self, results): self.log(key, value) except MisconfigurationException: pass + + def predict_unique(image_dir, save_dir, strategy='highest-score', visualization=True): + """ + High-level function to get unique predictions from a directory of overlapping images. + + Args: + image_dir (str): Path to the directory containing input images. + Supported formats: .tif, .png, .jpg + save_dir (str): Path to a directory for saving intermediate SfM files. + Will be created if it doesn't exist. + strategy (str, optional): The strategy for deduplication. + Options: 'highest-score', 'left-hand', 'right-hand'. + Defaults to 'highest-score'. + visualization (bool, optional): If True, shows a plot comparing original + and final predictions. Blue boxes = original, Pink boxes = final. + Defaults to True. + + Returns: + pandas.DataFrame: A DataFrame containing the final, deduplicated predictions + with columns: ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path'] + + Note: + This function requires additional dependencies: pycolmap, hloc, torchvision. + The SfM feature extraction can be computationally intensive for large images. + """ + # 1. SETUP + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + model = deepforest() + model.use_release() + model.to(device) + + # Ensure the save directory exists + os.makedirs(save_dir, exist_ok=True) + + image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))]) + + print(f"Found {len(image_files)} images to process: {image_files}") + if not image_files: + raise FileNotFoundError(f"No images found in directory: {image_dir}") + + # --- 2. PRE-PROCESSING: CREATE SFM FEATURES --- + print("\nStep 1: Creating SfM features...") + create_sfm_model( + image_dir=Path(image_dir), + output_path=Path(save_dir), + references=image_files, + overwrite=True + ) + print("SfM features created.") + + # 3. PREDICTION: GET INITIAL BOXES + print("\nStep 2: Running prediction on all images...") + all_predictions = [] + for image_file in image_files: + print(f"Predicting on: {image_file}") + image_path = os.path.join(image_dir, image_file) + preds = model.predict_image(path=image_path, return_plot=False) + if preds is not None and not preds.empty: + preds["image_path"] = os.path.basename(image_file) + all_predictions.append(preds) + + if not all_predictions: + raise ValueError("No predictions were made on any images. Cannot proceed.") + + predictions = pd.concat(all_predictions, ignore_index=True) + print(f"Found {len(predictions)} total predictions before filtering.") + + # 4. DEDUPLICATION + print("\nStep 3: Resolving overlaps using SfM...") + matching_file = os.path.join(save_dir, "matches.h5") + + final_predictions = align_and_delete( + predictions=predictions, + matching_h5_file=matching_file, + device=device, + strategy=strategy + ) + print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}") + + # 5. VISUALIZATION + if visualization and not final_predictions.empty: + print("\nStep 4: Generating plots...") + num_images = len(image_files) + # Adjust subplot grid to fit all images + cols = int(np.ceil(np.sqrt(num_images))) + rows = int(np.ceil(num_images / cols)) + fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8)) + axs = axs.flatten() + + for i, image_path in enumerate(image_files): + full_image_path = os.path.join(image_dir, image_path) + image = cv2.imread(full_image_path) + + original_image_predictions = predictions[predictions["image_path"] == image_path] + for _, row in original_image_predictions.iterrows(): + cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original + + final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path] + for _, row in final_image_predictions_plot.iterrows(): + cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final + + axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + axs[i].set_title(f"Final predictions for {image_path}") + axs[i].axis('off') + + # Hide any unused subplots + for j in range(i + 1, len(axs)): + axs[j].axis('off') + + pyplot.tight_layout() + pyplot.show() + + return final_predictions