From ff5338f45c7698ca490c8925bd7a2ea90faaec84 Mon Sep 17 00:00:00 2001
From: Bhavya Mehta <144762266+Bhavya1604@users.noreply.github.com>
Date: Tue, 29 Jul 2025 10:20:35 +0530
Subject: [PATCH 1/2] added doublecounting algorithm for review

---
 DoubleCounting.py | 279 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 279 insertions(+)
 create mode 100644 DoubleCounting.py

diff --git a/DoubleCounting.py b/DoubleCounting.py
new file mode 100644
index 000000000..38b6c8059
--- /dev/null
+++ b/DoubleCounting.py
@@ -0,0 +1,279 @@
+import os
+os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
+
+import cv2
+import h5py
+import numpy as np
+import pandas as pd
+import torch
+import torchvision
+import pycolmap
+from pathlib import Path
+from matplotlib import pyplot
+import geopandas as gpd
+from shapely.geometry import box
+
+from deepforest import main
+from hloc import extract_features, match_features, pairs_from_exhaustive
+from hloc.utils.io import get_matches
+
+
+def get_matching_points(h5_file, image1_name, image2_name, min_score=None):
+    """Get matching points between two images from an h5 file."""
+    matches, scores = get_matches(h5_file, image1_name, image2_name)
+    if min_score is not None:
+        matches = matches[scores > min_score]
+    match_index = pd.DataFrame(matches, columns=["image1", "image2"])
+    
+    features_path = os.path.join(os.path.dirname(h5_file), "features.h5")
+    with h5py.File(features_path, 'r') as features_h5_f:
+        keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"])
+        keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"])
+        points1 = keypoints_image1.iloc[match_index["image1"].values].values
+        points2 = keypoints_image2.iloc[match_index["image2"].values].values
+    return points1, points2
+
+def compute_homography_matrix(h5_file, image1_name, image2_name):
+    """Compute the homography matrix between two images."""
+    points1, points2 = get_matching_points(h5_file, image1_name, image2_name)
+    if len(points1) < 4 or len(points2) < 4:
+        raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}")
+
+    ransac_options = pycolmap.RANSACOptions(max_error=4.0)
+    report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options)
+
+    if report is None:
+        raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}")
+    return report
+
+def warp_box(xmin, ymin, xmax, ymax, homography):
+    """Warp a bounding box using a homography matrix."""
+    points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32)
+    reshaped_points = points.reshape(-1, 1, 2)
+    warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1)
+    
+    warped_xmin, warped_ymin = warped_points.min(axis=0)
+    warped_xmax, warped_ymax = warped_points.max(axis=0)
+    return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax)
+
+def align_predictions(predictions, homography_matrix):
+    """Aligns a DataFrame of predictions using a homography matrix."""
+    transformed_predictions = predictions.copy()
+    for index, row in transformed_predictions.iterrows():
+        xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix)
+        transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax
+    return transformed_predictions
+
+def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'):
+    """Remove overlapping predictions using different strategies."""
+    if strategy == "highest-score":
+        dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True)
+        boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device)
+        scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device)
+        
+        keep_indices = torchvision.ops.nms(boxes, scores, threshold)
+        indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()]
+        
+        src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)]
+        dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)]
+    else:
+        aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
+        dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
+        aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry")
+        dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry')
+
+        joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects')
+
+        if strategy == "left-hand":
+            src_indices_to_keep = src_predictions.box_id
+            dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id
+        elif strategy == "right-hand":
+            src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id
+            dst_indices_to_keep = dst_predictions.box_id
+        else:
+            raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.")
+
+        src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)]
+        dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)]
+
+    return src_filtered, dst_filtered
+
+def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'):
+    """Given predictions, align and delete overlapping boxes using a specified strategy."""
+    image_names = sorted(predictions.image_path.unique())
+    if len(image_names) < 2:
+        return predictions
+
+    predictions["box_id"] = range(len(predictions))
+    filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names}
+    
+    num_pairs = len(image_names) * (len(image_names) - 1) // 2
+    pair_count = 0
+
+    for i in range(len(image_names)):
+        for j in range(i + 1, len(image_names)):
+            src_image_name, dst_image_name = image_names[i], image_names[j]
+            pair_count += 1
+            print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})")
+
+            try:
+                homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name)
+            except ValueError as e:
+                print(f"Skipping pair, could not compute homography: {e}")
+                continue
+
+            src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name]
+            
+            if src_preds.empty or dst_preds.empty:
+                continue
+
+            aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"])
+            
+            src_filtered, dst_filtered = remove_predictions(
+                src_predictions=src_preds,
+                dst_predictions=dst_preds,
+                aligned_predictions=aligned_src_preds,
+                threshold=threshold,
+                device=device,
+                strategy='left-hand'
+            )
+            
+            filtered_predictions[src_image_name] = src_filtered
+            filtered_predictions[dst_image_name] = dst_filtered
+            
+    return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id")
+
+def create_sfm_model(image_dir, output_path, references, overwrite=False):
+    """Generate SfM feature files needed for matching."""
+    feature_conf = extract_features.confs["disk"]
+    matcher_conf = match_features.confs["disk+lightglue"]
+    
+    sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5'
+    
+    extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite)
+    pairs_from_exhaustive.main(sfm_pairs, image_list=references)
+    match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite)
+
+
+# =============================MAIN FUNCTION====================================
+def unique_predictions_images(image_dir, save_dir, strategy='highest-score', visualization=True):
+    """
+    High-level function to get unique predictions from a directory of overlapping images.
+
+    Args:
+        image_dir (str): Path to the directory containing input images.
+        save_dir (str): Path to a directory for saving intermediate SfM files.
+        strategy (str, optional): The strategy for deduplication. 
+            Options: 'highest-score', 'left-hand', 'right-hand'. Defaults to 'highest-score'.
+        visualization (bool, optional): If True, shows a plot comparing original and final predictions. Defaults to True.
+
+    Returns:
+        pandas.DataFrame: A DataFrame containing the final, deduplicated predictions.
+    """
+    # 1. SETUP
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+
+    model = main.deepforest()
+    model.use_release()
+    model.to(device)
+    
+    # Ensure the save directory exists
+    os.makedirs(save_dir, exist_ok=True)
+
+    image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))])
+    
+    print(f"Found {len(image_files)} images to process: {image_files}")
+    if not image_files:
+        raise FileNotFoundError(f"No images found in directory: {image_dir}")
+
+    # --- 2. PRE-PROCESSING: CREATE SFM FEATURES ---
+    print("\nStep 1: Creating SfM features...")
+    create_sfm_model(
+        image_dir=Path(image_dir),
+        output_path=Path(save_dir),
+        references=image_files,
+        overwrite=True
+    )
+    print("SfM features created.")
+
+    # 3. PREDICTION: GET INITIAL BOXES
+    print("\nStep 2: Running prediction on all images...")
+    all_predictions = []
+    for image_file in image_files:
+        print(f"Predicting on: {image_file}")
+        image_path = os.path.join(image_dir, image_file)
+        preds = model.predict_image(path=image_path, return_plot=False)
+        if preds is not None and not preds.empty:
+            preds["image_path"] = os.path.basename(image_file)
+            all_predictions.append(preds)
+
+    if not all_predictions:
+        raise ValueError("No predictions were made on any images. Cannot proceed.")
+        
+    predictions = pd.concat(all_predictions, ignore_index=True)
+    print(f"Found {len(predictions)} total predictions before filtering.")
+
+    # 4. DEDUPLICATION
+    print("\nStep 3: Resolving overlaps using SfM...")
+    matching_file = os.path.join(save_dir, "matches.h5")
+
+    final_predictions = align_and_delete(
+        predictions=predictions,
+        matching_h5_file=matching_file,
+        device=device,
+        strategy=strategy
+    )
+    print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}")
+
+    # 5. VISUALIZATION
+    if visualization and not final_predictions.empty:
+        print("\nStep 4: Generating plots...")
+        num_images = len(image_files)
+        # Adjust subplot grid to fit all images
+        cols = int(np.ceil(np.sqrt(num_images)))
+        rows = int(np.ceil(num_images / cols))
+        fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8))
+        axs = axs.flatten()
+
+        for i, image_path in enumerate(image_files):
+            full_image_path = os.path.join(image_dir, image_path)
+            image = cv2.imread(full_image_path)
+            
+            original_image_predictions = predictions[predictions["image_path"] == image_path]
+            for _, row in original_image_predictions.iterrows():
+                cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original
+            
+            final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path]
+            for _, row in final_image_predictions_plot.iterrows():
+                cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final
+                
+            axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            axs[i].set_title(f"Final predictions for {image_path}")
+            axs[i].axis('off')
+        
+        # Hide any unused subplots
+        for j in range(i + 1, len(axs)):
+            axs[j].axis('off')
+
+        pyplot.tight_layout()
+        pyplot.show()
+
+    return final_predictions
+
+
+if __name__ == "__main__":
+    
+    base_path = r"C:\Users\Bhavya\GSoC\Predict_&_delete"
+    image_directory = os.path.join(base_path, "Gregg1_2")
+    save_directory = os.path.join(base_path, "Save_dir_new")
+
+    final_results = unique_predictions_images(
+        image_dir=image_directory,
+        save_dir=save_directory,
+        strategy='left-hand',
+        visualization=True
+    )
+
+    print("\nFinal deduplicated predictions DataFrame:")
+    print(final_results.head())
\ No newline at end of file

From e0beed725ad3808756f0a7103be1b83f3b7a89c7 Mon Sep 17 00:00:00 2001
From: Bhavya Mehta <144762266+Bhavya1604@users.noreply.github.com>
Date: Thu, 16 Oct 2025 19:25:31 +0530
Subject: [PATCH 2/2] Refactored docs, updated evaluate.py and main.py, added
 Unique_predictions doc, removed DoubleCounting.py

---
 DoubleCounting.py                        | 279 ------------
 docs/user_guide/16_prediction.md         |   6 +
 docs/user_guide/17_Unique_predictions.md | 557 +++++++++++++++++++++++
 pyproject.toml                           |  16 +
 src/deepforest/evaluate.py               | 364 +++++++++++++++
 src/deepforest/main.py                   | 119 +++++
 6 files changed, 1062 insertions(+), 279 deletions(-)
 delete mode 100644 DoubleCounting.py
 create mode 100644 docs/user_guide/17_Unique_predictions.md

diff --git a/DoubleCounting.py b/DoubleCounting.py
deleted file mode 100644
index 38b6c8059..000000000
--- a/DoubleCounting.py
+++ /dev/null
@@ -1,279 +0,0 @@
-import os
-os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
-
-import cv2
-import h5py
-import numpy as np
-import pandas as pd
-import torch
-import torchvision
-import pycolmap
-from pathlib import Path
-from matplotlib import pyplot
-import geopandas as gpd
-from shapely.geometry import box
-
-from deepforest import main
-from hloc import extract_features, match_features, pairs_from_exhaustive
-from hloc.utils.io import get_matches
-
-
-def get_matching_points(h5_file, image1_name, image2_name, min_score=None):
-    """Get matching points between two images from an h5 file."""
-    matches, scores = get_matches(h5_file, image1_name, image2_name)
-    if min_score is not None:
-        matches = matches[scores > min_score]
-    match_index = pd.DataFrame(matches, columns=["image1", "image2"])
-    
-    features_path = os.path.join(os.path.dirname(h5_file), "features.h5")
-    with h5py.File(features_path, 'r') as features_h5_f:
-        keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"])
-        keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"])
-        points1 = keypoints_image1.iloc[match_index["image1"].values].values
-        points2 = keypoints_image2.iloc[match_index["image2"].values].values
-    return points1, points2
-
-def compute_homography_matrix(h5_file, image1_name, image2_name):
-    """Compute the homography matrix between two images."""
-    points1, points2 = get_matching_points(h5_file, image1_name, image2_name)
-    if len(points1) < 4 or len(points2) < 4:
-        raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}")
-
-    ransac_options = pycolmap.RANSACOptions(max_error=4.0)
-    report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options)
-
-    if report is None:
-        raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}")
-    return report
-
-def warp_box(xmin, ymin, xmax, ymax, homography):
-    """Warp a bounding box using a homography matrix."""
-    points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32)
-    reshaped_points = points.reshape(-1, 1, 2)
-    warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1)
-    
-    warped_xmin, warped_ymin = warped_points.min(axis=0)
-    warped_xmax, warped_ymax = warped_points.max(axis=0)
-    return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax)
-
-def align_predictions(predictions, homography_matrix):
-    """Aligns a DataFrame of predictions using a homography matrix."""
-    transformed_predictions = predictions.copy()
-    for index, row in transformed_predictions.iterrows():
-        xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix)
-        transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax
-    return transformed_predictions
-
-def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'):
-    """Remove overlapping predictions using different strategies."""
-    if strategy == "highest-score":
-        dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True)
-        boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device)
-        scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device)
-        
-        keep_indices = torchvision.ops.nms(boxes, scores, threshold)
-        indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()]
-        
-        src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)]
-        dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)]
-    else:
-        aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
-        dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
-        aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry")
-        dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry')
-
-        joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects')
-
-        if strategy == "left-hand":
-            src_indices_to_keep = src_predictions.box_id
-            dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id
-        elif strategy == "right-hand":
-            src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id
-            dst_indices_to_keep = dst_predictions.box_id
-        else:
-            raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.")
-
-        src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)]
-        dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)]
-
-    return src_filtered, dst_filtered
-
-def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'):
-    """Given predictions, align and delete overlapping boxes using a specified strategy."""
-    image_names = sorted(predictions.image_path.unique())
-    if len(image_names) < 2:
-        return predictions
-
-    predictions["box_id"] = range(len(predictions))
-    filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names}
-    
-    num_pairs = len(image_names) * (len(image_names) - 1) // 2
-    pair_count = 0
-
-    for i in range(len(image_names)):
-        for j in range(i + 1, len(image_names)):
-            src_image_name, dst_image_name = image_names[i], image_names[j]
-            pair_count += 1
-            print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})")
-
-            try:
-                homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name)
-            except ValueError as e:
-                print(f"Skipping pair, could not compute homography: {e}")
-                continue
-
-            src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name]
-            
-            if src_preds.empty or dst_preds.empty:
-                continue
-
-            aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"])
-            
-            src_filtered, dst_filtered = remove_predictions(
-                src_predictions=src_preds,
-                dst_predictions=dst_preds,
-                aligned_predictions=aligned_src_preds,
-                threshold=threshold,
-                device=device,
-                strategy='left-hand'
-            )
-            
-            filtered_predictions[src_image_name] = src_filtered
-            filtered_predictions[dst_image_name] = dst_filtered
-            
-    return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id")
-
-def create_sfm_model(image_dir, output_path, references, overwrite=False):
-    """Generate SfM feature files needed for matching."""
-    feature_conf = extract_features.confs["disk"]
-    matcher_conf = match_features.confs["disk+lightglue"]
-    
-    sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5'
-    
-    extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite)
-    pairs_from_exhaustive.main(sfm_pairs, image_list=references)
-    match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite)
-
-
-# =============================MAIN FUNCTION====================================
-def unique_predictions_images(image_dir, save_dir, strategy='highest-score', visualization=True):
-    """
-    High-level function to get unique predictions from a directory of overlapping images.
-
-    Args:
-        image_dir (str): Path to the directory containing input images.
-        save_dir (str): Path to a directory for saving intermediate SfM files.
-        strategy (str, optional): The strategy for deduplication. 
-            Options: 'highest-score', 'left-hand', 'right-hand'. Defaults to 'highest-score'.
-        visualization (bool, optional): If True, shows a plot comparing original and final predictions. Defaults to True.
-
-    Returns:
-        pandas.DataFrame: A DataFrame containing the final, deduplicated predictions.
-    """
-    # 1. SETUP
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Using device: {device}")
-
-    model = main.deepforest()
-    model.use_release()
-    model.to(device)
-    
-    # Ensure the save directory exists
-    os.makedirs(save_dir, exist_ok=True)
-
-    image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))])
-    
-    print(f"Found {len(image_files)} images to process: {image_files}")
-    if not image_files:
-        raise FileNotFoundError(f"No images found in directory: {image_dir}")
-
-    # --- 2. PRE-PROCESSING: CREATE SFM FEATURES ---
-    print("\nStep 1: Creating SfM features...")
-    create_sfm_model(
-        image_dir=Path(image_dir),
-        output_path=Path(save_dir),
-        references=image_files,
-        overwrite=True
-    )
-    print("SfM features created.")
-
-    # 3. PREDICTION: GET INITIAL BOXES
-    print("\nStep 2: Running prediction on all images...")
-    all_predictions = []
-    for image_file in image_files:
-        print(f"Predicting on: {image_file}")
-        image_path = os.path.join(image_dir, image_file)
-        preds = model.predict_image(path=image_path, return_plot=False)
-        if preds is not None and not preds.empty:
-            preds["image_path"] = os.path.basename(image_file)
-            all_predictions.append(preds)
-
-    if not all_predictions:
-        raise ValueError("No predictions were made on any images. Cannot proceed.")
-        
-    predictions = pd.concat(all_predictions, ignore_index=True)
-    print(f"Found {len(predictions)} total predictions before filtering.")
-
-    # 4. DEDUPLICATION
-    print("\nStep 3: Resolving overlaps using SfM...")
-    matching_file = os.path.join(save_dir, "matches.h5")
-
-    final_predictions = align_and_delete(
-        predictions=predictions,
-        matching_h5_file=matching_file,
-        device=device,
-        strategy=strategy
-    )
-    print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}")
-
-    # 5. VISUALIZATION
-    if visualization and not final_predictions.empty:
-        print("\nStep 4: Generating plots...")
-        num_images = len(image_files)
-        # Adjust subplot grid to fit all images
-        cols = int(np.ceil(np.sqrt(num_images)))
-        rows = int(np.ceil(num_images / cols))
-        fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8))
-        axs = axs.flatten()
-
-        for i, image_path in enumerate(image_files):
-            full_image_path = os.path.join(image_dir, image_path)
-            image = cv2.imread(full_image_path)
-            
-            original_image_predictions = predictions[predictions["image_path"] == image_path]
-            for _, row in original_image_predictions.iterrows():
-                cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original
-            
-            final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path]
-            for _, row in final_image_predictions_plot.iterrows():
-                cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final
-                
-            axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-            axs[i].set_title(f"Final predictions for {image_path}")
-            axs[i].axis('off')
-        
-        # Hide any unused subplots
-        for j in range(i + 1, len(axs)):
-            axs[j].axis('off')
-
-        pyplot.tight_layout()
-        pyplot.show()
-
-    return final_predictions
-
-
-if __name__ == "__main__":
-    
-    base_path = r"C:\Users\Bhavya\GSoC\Predict_&_delete"
-    image_directory = os.path.join(base_path, "Gregg1_2")
-    save_directory = os.path.join(base_path, "Save_dir_new")
-
-    final_results = unique_predictions_images(
-        image_dir=image_directory,
-        save_dir=save_directory,
-        strategy='left-hand',
-        visualization=True
-    )
-
-    print("\nFinal deduplicated predictions DataFrame:")
-    print(final_results.head())
\ No newline at end of file
diff --git a/docs/user_guide/16_prediction.md b/docs/user_guide/16_prediction.md
index fc15038bf..76038a025 100644
--- a/docs/user_guide/16_prediction.md
+++ b/docs/user_guide/16_prediction.md
@@ -12,6 +12,12 @@ There are atleast four ways to make predictions with DeepForest.
 
 In general, during inference, for large images it is most common to use predict_tile.
 
+```{tip}
+Working with overlapping surveys and want to remove duplicate detections across images?
+See the new guide: [Unique Predictions (Double-Counting Removal)](./17_Unique_predictions.md).
+It covers the SfM-based pipeline, extra installation, and best practices.
+```
+
 ## Predict an image using the command line
 
 We provide a basic utility script to run a prediction task with the ability to save and/or plot outputs. This command is called `deepforest predict` and is included as part of the standard installation. You can run the command without any arguments, or the `--help` flag to check that it's available. The script will run in tiled prediction mode by default.
diff --git a/docs/user_guide/17_Unique_predictions.md b/docs/user_guide/17_Unique_predictions.md
new file mode 100644
index 000000000..4224bd636
--- /dev/null
+++ b/docs/user_guide/17_Unique_predictions.md
@@ -0,0 +1,557 @@
+# Unique Predictions: Double-Counting Removal
+
+This guide covers DeepForest's advanced double-counting removal capabilities for overlapping imagery scenarios. The algorithm uses Structure-from-Motion (SfM) techniques to resolve duplicate detections across multiple images, ensuring accurate object counts in drone surveys.
+
+## Overview
+
+When analyzing overlapping aerial imagery (common in drone surveys), the same objects (e.g., tree crowns) often appear in multiple images, leading to inflated counts. Traditional approaches like simple IoU-based Non-Maximum Suppression fail because they don't account for the geometric relationship between images.
+
+DeepForest's double-counting removal algorithm solves this by:
+
+1. **Feature Extraction**: Creating SfM features for all images using DISK+LightGlue
+2. **Geometric Alignment**: Computing homography matrices between image pairs
+3. **Prediction Alignment**: Transforming predictions between coordinate systems
+4. **Overlap Resolution**: Removing duplicates using configurable strategies
+5. **Visualization**: Optional before/after comparison plots
+
+## Use Cases
+
+- **Drone Surveys**: Overlapping flight paths with redundant tree crown detections
+- **Multi-Angle Detection**: Same objects viewed from different perspectives
+- **Forest Inventory**: Accurate tree counts across overlapping imagery
+- **Ecological Monitoring**: Precise wildlife or vegetation counts
+
+## Quick Start
+
+First, install DeepForest with the double-counting dependencies:
+
+```bash
+pip install deepforest[double_counting]
+```
+
+Then use the double-counting functionality:
+
+```python
+from deepforest import main
+
+# Initialize model
+model = main.deepforest()
+model.use_release()
+
+# Run double-counting removal
+results = model.predict_unique(
+    image_dir="/path/to/overlapping/images",
+    save_dir="/path/to/sfm_output",
+    strategy='highest-score',
+    visualization=True
+)
+
+print(f"Found {len(results)} unique tree crowns")
+```
+
+## Algorithm Details
+
+### Feature Extraction & Matching
+
+The algorithm uses DISK+LightGlue for robust feature extraction and matching:
+
+- **DISK descriptors**: Scale-invariant features effective for aerial imagery
+- **LightGlue matcher**: High-quality correspondences with viewpoint robustness
+- **Exhaustive pairing**: All possible image pairs are considered for matching
+
+### Homography Estimation
+
+For each image pair, a perspective transformation matrix is computed:
+
+- **RANSAC algorithm**: Robust estimation in presence of outliers
+- **Minimum 4 points**: Required for reliable homography computation
+- **Error threshold**: 4.0 pixels (tunable for different image types)
+
+### Prediction Alignment
+
+Bounding box predictions are transformed between coordinate systems:
+
+- **Corner transformation**: All four corners of each bounding box are transformed
+- **Axis-aligned bounding**: Transformed quadrilaterals are converted to rectangles
+- **Coordinate mapping**: Predictions aligned to common reference frame
+
+### Overlap Resolution Strategies
+
+Three strategies are available for removing overlapping detections:
+
+#### 1. Highest-Score Strategy (`'highest-score'`)
+
+**Recommended for most use cases**
+
+- Uses Non-Maximum Suppression (NMS) based on confidence scores
+- Keeps predictions with highest confidence, removes lower-scoring overlaps
+- Most sophisticated approach considering both spatial overlap and confidence
+- Requires GPU for optimal performance
+
+```python
+results = model.predict_unique(
+    image_dir="images/",
+    save_dir="output/",
+    strategy='highest-score'  # Default
+)
+```
+
+#### 2. Left-Hand Strategy (`'left-hand'`)
+
+- Keeps all predictions from the first image
+- Removes overlapping predictions from the second image
+- Simple but may not be optimal for all scenarios
+- Faster than highest-score strategy
+
+```python
+results = model.predict_unique(
+    image_dir="images/",
+    save_dir="output/",
+    strategy='left-hand'
+)
+```
+
+#### 3. Right-Hand Strategy (`'right-hand'`)
+
+- Keeps all predictions from the second image
+- Removes overlapping predictions from the first image
+- Simple but may not be optimal for all scenarios
+- Faster than highest-score strategy
+
+```python
+results = model.predict_unique(
+    image_dir="images/",
+    save_dir="output/",
+    strategy='right-hand'
+)
+```
+
+## Requirements
+
+### Image Requirements
+
+- **Overlap**: Images must have sufficient overlap (>30% recommended)
+- **Viewpoints**: Similar viewpoints for reliable homography estimation
+- **Quality**: Clear images with distinctive features for matching
+- **Formats**: Supported formats include .tif, .png, .jpg
+
+### System Requirements
+
+- **Memory**: Scales with number of predictions per image
+- **GPU**: Recommended for highest-score strategy (CUDA-compatible)
+- **Storage**: Space for SfM feature files (typically 10-50MB per image)
+
+### Dependencies
+
+The double-counting functionality requires additional dependencies that are not included in the base DeepForest installation. Install them using:
+
+```bash
+pip install deepforest[double_counting]
+```
+
+This installs, among others:
+- `opencv-python==4.11.0.86`: OpenCV (GUI wheel) used by the double-counting pipeline
+- `pycolmap>=0.6.0`: For robust homography estimation
+- `hloc>=1.4.0`: For feature extraction and matching
+- `kornia>=0.7.0` and `kornia-feature>=0.7.0`: For feature ops/matching helpers
+
+Important: Do not install `opencv-python` and `opencv-python-headless` in the same environment.
+
+Alternatively, you can install the dependencies manually:
+
+```bash
+pip install opencv-python==4.11.0.86 pycolmap hloc kornia kornia-feature
+```
+
+## API Reference
+
+### `predict_unique()`
+
+High-level function for double-counting removal across multiple images.
+
+```python
+def predict_unique(image_dir, save_dir, strategy='highest-score', visualization=True):
+    """
+    Get unique predictions from overlapping images.
+    
+    Args:
+        image_dir (str): Path to directory containing input images
+        save_dir (str): Path for saving intermediate SfM files
+        strategy (str): Deduplication strategy ('highest-score', 'left-hand', 'right-hand')
+        visualization (bool): Show before/after comparison plots
+        
+    Returns:
+        pandas.DataFrame: Deduplicated predictions with columns:
+            ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path']
+    """
+```
+
+### Core Functions
+
+#### `create_sfm_model()`
+
+Generate SfM feature files needed for geometric matching.
+
+```python
+from deepforest.evaluate import create_sfm_model
+
+create_sfm_model(
+    image_dir=Path("images/"),
+    output_path=Path("sfm_output/"),
+    references=["image1.tif", "image2.tif"],
+    overwrite=True
+)
+```
+
+#### `align_and_delete()`
+
+Main function for removing double-counting across image pairs.
+
+```python
+from deepforest.evaluate import align_and_delete
+
+final_predictions = align_and_delete(
+    matching_h5_file="matches.h5",
+    predictions=initial_predictions,
+    device=device,
+    threshold=0.325,
+    strategy='highest-score'
+)
+```
+
+## Performance Considerations
+
+### Computational Complexity
+
+- **Time Complexity**: O(N²) where N is the number of images
+- **Pair Processing**: N*(N-1)/2 image pairs must be processed
+- **Feature Extraction**: Most time-consuming step (scales with image size)
+- **GPU Acceleration**: Available for NMS operations
+
+### Memory Usage
+
+- **Scales with predictions**: More predictions per image = more memory
+- **SfM Features**: Feature files require additional storage
+- **Batch Processing**: Consider processing large datasets in batches
+
+### Optimization Tips
+
+1. **Use GPU**: Enable CUDA for highest-score strategy
+2. **Batch Processing**: Process large datasets in smaller batches
+3. **Image Resolution**: Consider downsampling very large images
+4. **Strategy Selection**: Use simpler strategies for faster processing
+
+## Troubleshooting
+
+### Common Issues
+
+#### Insufficient Matching Points
+
+**Error**: `"Not enough matching points (<4) found between images"`
+
+**Solutions**:
+- Ensure images have sufficient overlap (>30%)
+- Check image quality and clarity
+- Verify images are from similar viewpoints
+- Consider adjusting RANSAC parameters
+
+#### Homography Estimation Failure
+
+**Error**: `"Homography matrix estimation failed"`
+
+**Solutions**:
+- Increase image overlap
+- Improve image quality
+- Check for motion blur or distortion
+- Verify feature extraction succeeded
+
+#### Memory Issues
+
+**Symptoms**: Out of memory errors during processing
+
+**Solutions**:
+- Reduce batch size
+- Use CPU instead of GPU for NMS
+- Process fewer images at once
+- Consider image downsampling
+
+### Performance Optimization
+
+#### For Large Datasets
+
+```python
+# Process in batches
+image_batches = [images[i:i+5] for i in range(0, len(images), 5)]
+
+for batch in image_batches:
+    results = model.predict_unique(
+        image_dir=f"batch_{batch_idx}/",
+        save_dir=f"output_{batch_idx}/",
+        strategy='highest-score'
+    )
+```
+
+#### For Speed-Critical Applications
+
+```python
+# Use faster strategy
+results = model.predict_unique(
+    image_dir="images/",
+    save_dir="output/",
+    strategy='left-hand',  # Faster than highest-score
+    visualization=False    # Skip visualization
+)
+```
+
+## Examples
+
+### Basic Usage
+
+```python
+# First install: pip install deepforest[double_counting]
+from deepforest import main
+
+# Initialize model
+model = main.deepforest()
+model.use_release()
+
+# Process overlapping images
+results = model.predict_unique(
+    image_dir="drone_survey/images/",
+    save_dir="drone_survey/sfm_output/",
+    strategy='highest-score',
+    visualization=True
+)
+
+# Save results
+results.to_csv("unique_predictions.csv", index=False)
+print(f"Processed {len(results)} unique tree crowns")
+```
+
+### Advanced Configuration
+
+```python
+# First install: pip install deepforest[double_counting]
+import torch
+from deepforest import main
+from deepforest.evaluate import create_sfm_model, align_and_delete
+
+# Custom processing pipeline
+model = main.deepforest()
+model.use_release()
+
+# Create SfM features with custom settings
+create_sfm_model(
+    image_dir=Path("images/"),
+    output_path=Path("sfm_output/"),
+    references=["img1.tif", "img2.tif", "img3.tif"],
+    overwrite=True
+)
+
+# Run detection on individual images
+all_predictions = []
+for image_file in ["img1.tif", "img2.tif", "img3.tif"]:
+    preds = model.predict_image(path=f"images/{image_file}")
+    preds["image_path"] = image_file
+    all_predictions.append(preds)
+
+predictions = pd.concat(all_predictions)
+
+# Custom double-counting removal
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+final_results = align_and_delete(
+    matching_h5_file="sfm_output/matches.h5",
+    predictions=predictions,
+    device=device,
+    threshold=0.4,  # Custom IoU threshold
+    strategy='highest-score'
+)
+```
+
+### Batch Processing
+
+```python
+import os
+from deepforest import main
+import shutil
+import pandas as pd
+
+def process_image_batch(image_dir, batch_size=5):
+    """Process images in batches to manage memory usage."""
+    model = main.deepforest()
+    model.use_release()
+    
+    image_files = [f for f in os.listdir(image_dir) 
+                   if f.lower().endswith(('.tif', '.png', '.jpg'))]
+    
+    # Process in batches
+    batches = [image_files[i:i+batch_size] 
+               for i in range(0, len(image_files), batch_size)]
+    
+    all_results = []
+    
+    for i, batch in enumerate(batches):
+        print(f"Processing batch {i+1}/{len(batches)}")
+        
+        # Create temporary directory for batch
+        batch_dir = f"batch_{i}"
+        os.makedirs(batch_dir, exist_ok=True)
+        
+        # Copy batch images
+        for img_file in batch:
+            shutil.copy(f"{image_dir}/{img_file}", f"{batch_dir}/{img_file}")
+        
+        # Process batch
+        results = model.predict_unique(
+            image_dir=batch_dir,
+            save_dir=f"sfm_batch_{i}",
+            strategy='highest-score',
+            visualization=False
+        )
+        
+        all_results.append(results)
+        
+        # Cleanup
+        shutil.rmtree(batch_dir)
+    
+    return pd.concat(all_results, ignore_index=True)
+
+# Usage
+results = process_image_batch("large_dataset/", batch_size=3)
+```
+
+## Best Practices
+
+### Image Preparation
+
+1. **Ensure Sufficient Overlap**: Aim for >30% overlap between adjacent images
+2. **Maintain Consistent Viewpoints**: Similar camera angles improve matching
+3. **Optimize Image Quality**: Clear, well-lit images with distinctive features
+4. **Consider Resolution**: Balance between detail and processing speed
+
+### Strategy Selection
+
+- **Highest-Score**: Best accuracy, requires GPU, slower processing
+- **Left/Right-Hand**: Faster processing, may be less accurate
+- **Consider Use Case**: Choose based on accuracy vs. speed requirements
+
+### Performance Optimization
+
+1. **Use GPU**: Enable CUDA for NMS operations
+2. **Batch Processing**: Process large datasets in manageable chunks
+3. **Memory Management**: Monitor memory usage, adjust batch sizes
+4. **Storage Planning**: Allocate sufficient space for SfM files
+
+## Integration with Existing Workflows
+
+### With DeepForest Training
+
+```python
+# Train model on non-overlapping data
+model = main.deepforest()
+model.train(
+    csv_file="training_data.csv",
+    root_dir="training_images/"
+)
+
+# Apply to overlapping survey data
+survey_results = model.predict_unique(
+    image_dir="survey_images/",
+    save_dir="survey_sfm/",
+    strategy='highest-score'
+)
+```
+
+### With Evaluation Metrics
+
+```python
+# Evaluate unique predictions
+evaluation_results = model.evaluate(
+    csv_file="ground_truth.csv",
+    root_dir="ground_truth_images/",
+    predictions=survey_results
+)
+
+print(f"Precision: {evaluation_results['box_precision']:.3f}")
+print(f"Recall: {evaluation_results['box_recall']:.3f}")
+```
+
+## Conclusion
+
+DeepForest's double-counting removal algorithm provides a robust solution for accurate object counting in overlapping imagery scenarios. By leveraging SfM techniques and configurable strategies, users can achieve precise results while balancing accuracy and computational efficiency.
+
+For additional support or questions about the double-counting removal functionality, please refer to the DeepForest documentation or community forums.
+
+## Using only the double-counting tools (standalone)
+
+If you only need the SfM-based de-duplication (without running DeepForest inference), you can use the building blocks in `deepforest.evaluate` directly: `create_sfm_model()` and `align_and_delete()`. In that case, you must supply your own predictions DataFrame with columns `['xmin','ymin','xmax','ymax','score','label','image_path']`.
+
+### Why a separate environment is recommended
+
+- Different OpenCV wheels conflict (GUI `opencv-python` vs `opencv-python-headless`). The repo uses the headless build; mixing both breaks imports.
+- Torch/TorchVision/CUDA builds must be matched; installing other CV stacks may overwrite them.
+- Geo stack (`geopandas`, `shapely`) often pulls native libs; isolating avoids version pin clashes.
+
+Create a fresh environment to avoid these conflicts.
+
+### Minimal standalone environment (no DeepForest inference)
+
+Recommended with conda-forge (for clean Geo/CV binaries):
+
+```bash
+conda create -n df-doublecounting -c conda-forge python=3.11
+conda activate df-doublecounting
+
+# Core numeric/geo/cv
+pip install opencv-python==4.11.0.86 numpy pandas shapely geopandas h5py matplotlib
+
+# Torch + vision (pick the right CUDA/CPU build for your system if needed)
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+
+# SfM and features
+pip install pycolmap hloc kornia kornia-feature
+```
+
+Now you can run:
+
+```python
+from pathlib import Path
+import pandas as pd
+from deepforest.evaluate import create_sfm_model, align_and_delete
+import torch
+
+# 1) Build SfM artifacts once
+create_sfm_model(
+    image_dir=Path("/path/to/images"),
+    output_path=Path("/path/to/sfm_output"),
+    references=sorted(["img1.tif","img2.tif","img3.tif"]),
+    overwrite=True,
+)
+
+# 2) Load your own predictions (must include image_path per row)
+predictions = pd.read_csv("/path/to/predictions.csv")
+
+# 3) De-duplicate
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+final_predictions = align_and_delete(
+    matching_h5_file=str(Path("/path/to/sfm_output")/"matches.h5"),
+    predictions=predictions,
+    device=device,
+    threshold=0.325,
+    strategy="highest-score",
+)
+```
+
+### Using only the extras without DeepForest
+
+If you still prefer pip extras management but won’t use DeepForest inference, you can install just the extra group and your own stack:
+
+```bash
+pip install deepforest[double_counting]
+# Add your preferred torch build afterwards (to avoid pip picking an unintended one)
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+```
+
+Notes:
+- Ensure only one OpenCV wheel is present in the env. For double-counting, use `opencv-python==4.11.0.86` and avoid installing `opencv-python-headless` concurrently. If you hit conflicts, uninstall all OpenCV wheels and reinstall the one you need.
+ 
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index ff61074f8..643f1e78f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,6 +100,22 @@ docs = [
     "sphinx_rtd_theme",
 ]
 
+double_counting = [
+    "opencv-python==4.11.0.86",
+    "numpy<2.0",
+    "pandas",
+    "matplotlib",
+    "h5py",
+    "shapely>2.0.0",
+    "geopandas",
+    "torch>=2.2.0",
+    "torchvision>=0.17.0",
+    "pycolmap>=0.6.0",
+    "hloc>=1.4.0",
+    "kornia>=0.7.0",
+    "kornia-feature>=0.7.0",
+]
+
 [project.scripts]
 deepforest = "deepforest.scripts.cli:main"
 
diff --git a/src/deepforest/evaluate.py b/src/deepforest/evaluate.py
index d4f76a9af..5a84092d3 100644
--- a/src/deepforest/evaluate.py
+++ b/src/deepforest/evaluate.py
@@ -1,14 +1,24 @@
 """Evaluation module."""
 
+import os
 import warnings
 
+import cv2
 import geopandas as gpd
+import h5py
 import numpy as np
 import pandas as pd
+import pycolmap
 import shapely
+import torch
+import torchvision
+from shapely.geometry import box
 
 from deepforest import IoU
 from deepforest.utilities import determine_geometry_type
+from kornia.feature import extract_features, match_features
+from kornia.feature.matching import pairs_from_exhaustive
+from pycolmap import get_matches
 
 
 def evaluate_image_boxes(predictions, ground_df):
@@ -398,3 +408,357 @@ def point_recall(predictions, ground_df):
     class_recall = compute_class_recall(matched_results)
 
     return {"results": results, "box_recall": box_recall, "class_recall": class_recall}
+
+def get_matching_points(h5_file, image1_name, image2_name, min_score=None):
+    """
+    Extract matching feature points between two images from SfM feature files.
+    
+    This function retrieves corresponding feature points between two images that were
+    previously extracted and matched using Structure-from-Motion techniques. The matching
+    points are essential for computing geometric transformations between images.
+    
+    Args:
+        h5_file (str): Path to the HDF5 file containing feature matches
+        image1_name (str): Name of the first image (as stored in the feature files)
+        image2_name (str): Name of the second image (as stored in the feature files)
+        min_score (float, optional): Minimum matching score threshold for filtering
+            low-quality matches. If None, all matches are returned.
+            
+    Returns:
+        tuple: (points1, points2) where each is a numpy array of shape (N, 2)
+            containing the (x, y) coordinates of matching points in each image
+    
+    Note:
+        This function requires that SfM features have been previously extracted
+        using create_sfm_model() or similar functionality.
+    """
+    matches, scores = get_matches(h5_file, image1_name, image2_name)
+    if min_score is not None:
+        matches = matches[scores > min_score]
+    match_index = pd.DataFrame(matches, columns=["image1", "image2"])
+    
+    features_path = os.path.join(os.path.dirname(h5_file), "features.h5")
+    with h5py.File(features_path, 'r') as features_h5_f:
+        keypoints_image1 = pd.DataFrame(features_h5_f[image1_name]["keypoints"][:], columns=["x", "y"])
+        keypoints_image2 = pd.DataFrame(features_h5_f[image2_name]["keypoints"][:], columns=["x", "y"])
+        points1 = keypoints_image1.iloc[match_index["image1"].values].values
+        points2 = keypoints_image2.iloc[match_index["image2"].values].values
+    return points1, points2
+
+def compute_homography_matrix(h5_file, image1_name, image2_name):
+    """
+    Compute the homography matrix between two images using RANSAC.
+    
+    A homography matrix is a 3x3 transformation matrix that maps points from one
+    image plane to another. This is essential for aligning predictions between
+    overlapping images in the double-counting removal algorithm.
+    
+    The function uses RANSAC (Random Sample Consensus) to robustly estimate the
+    homography matrix even in the presence of outliers and noise in the feature matches.
+    
+    Args:
+        h5_file (str): Path to the HDF5 file containing feature matches
+        image1_name (str): Name of the first image
+        image2_name (str): Name of the second image
+        
+    Returns:
+        dict: A report dictionary containing:
+            - 'H': The 3x3 homography matrix (numpy array)
+            - 'inliers': Boolean array indicating which matches are inliers
+            - 'num_inliers': Number of inlier matches used
+            - Additional RANSAC statistics
+            
+    Raises:
+        ValueError: If fewer than 4 matching points are found between the images
+        ValueError: If homography matrix estimation fails (insufficient inliers)
+        
+    Note:
+        The RANSAC parameters are set to max_error=4.0 pixels, which works well
+        for most aerial imagery scenarios. For different image types, these parameters
+        may need adjustment.
+    """
+    points1, points2 = get_matching_points(h5_file, image1_name, image2_name)
+    if len(points1) < 4 or len(points2) < 4:
+        raise ValueError(f"Not enough matching points (<4) found between images {image1_name} and {image2_name}")
+
+    ransac_options = pycolmap.RANSACOptions(max_error=4.0)
+    report = pycolmap.estimate_homography_matrix(points1, points2, ransac_options)
+
+    if report is None:
+        raise ValueError(f"Homography matrix estimation failed for images {image1_name} and {image2_name}")
+    return report
+
+def warp_box(xmin, ymin, xmax, ymax, homography):
+    """
+    Transform a bounding box using a homography matrix.
+    
+    This function applies a perspective transformation to a bounding box by transforming
+    its four corner points and then computing the axis-aligned bounding box of the
+    transformed corners. This is used to align predictions from one image coordinate
+    system to another.
+    
+    Args:
+        xmin (float): Left coordinate of the bounding box
+        ymin (float): Top coordinate of the bounding box  
+        xmax (float): Right coordinate of the bounding box
+        ymax (float): Bottom coordinate of the bounding box
+        homography (numpy.ndarray): 3x3 homography matrix
+        
+    Returns:
+        tuple: (warped_xmin, warped_ymin, warped_xmax, warped_ymax) - the transformed
+            bounding box coordinates as integers
+            
+    Note:
+        The transformation may result in non-rectangular shapes, so the function
+        computes the axis-aligned bounding box of the transformed quadrilateral.
+        This can lead to slight expansion of the bounding box area.
+    """
+    points = np.array([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]], dtype=np.float32)
+    reshaped_points = points.reshape(-1, 1, 2)
+    warped_points = cv2.perspectiveTransform(reshaped_points, homography).squeeze(1)
+    
+    warped_xmin, warped_ymin = warped_points.min(axis=0)
+    warped_xmax, warped_ymax = warped_points.max(axis=0)
+    return int(warped_xmin), int(warped_ymin), int(warped_xmax), int(warped_ymax)
+
+def align_predictions(predictions, homography_matrix):
+    """
+    Transform all bounding box predictions using a homography matrix.
+    
+    This function applies geometric transformation to align predictions from one image
+    coordinate system to another. It's a key step in the double-counting removal process,
+    allowing comparison of predictions between overlapping images.
+    
+    Args:
+        predictions (pandas.DataFrame): DataFrame containing bounding box predictions
+            with columns ['xmin', 'ymin', 'xmax', 'ymax', ...]
+        homography_matrix (numpy.ndarray): 3x3 homography matrix for coordinate transformation
+        
+    Returns:
+        pandas.DataFrame: A copy of the input DataFrame with transformed bounding box
+            coordinates. All other columns remain unchanged.
+            
+    Note:
+        The function creates a copy of the input DataFrame to avoid modifying the original.
+        The transformation is applied row-wise to each bounding box prediction.
+    """
+    transformed_predictions = predictions.copy()
+    for index, row in transformed_predictions.iterrows():
+        xmin, ymin, xmax, ymax = warp_box(row['xmin'], row['ymin'], row['xmax'], row['ymax'], homography_matrix)
+        transformed_predictions.loc[index, ['xmin', 'ymin', 'xmax', 'ymax']] = xmin, ymin, xmax, ymax
+    return transformed_predictions
+
+def remove_predictions(src_predictions, dst_predictions, aligned_predictions, threshold, device, strategy='highest-score'):
+    """
+    Remove overlapping predictions between two images using specified strategies.
+    
+    This function implements the core logic for resolving double-counting by removing
+    overlapping detections between aligned image pairs. Three different strategies are
+    available, each with different trade-offs between accuracy and computational cost.
+    
+    **Strategies:**
+    
+    1. **'highest-score'**: Uses Non-Maximum Suppression (NMS) to keep predictions with
+       the highest confidence scores. This is the most sophisticated approach that considers
+       both spatial overlap and prediction confidence.
+       
+    2. **'left-hand'**: Keeps all predictions from the source image and removes overlapping
+       predictions from the destination image. Simple but may not be optimal.
+       
+    3. **'right-hand'**: Keeps all predictions from the destination image and removes
+       overlapping predictions from the source image. Simple but may not be optimal.
+    
+    Args:
+        src_predictions (pandas.DataFrame): Predictions from the source image
+        dst_predictions (pandas.DataFrame): Predictions from the destination image  
+        aligned_predictions (pandas.DataFrame): Source predictions transformed to destination
+            coordinate system using homography matrix
+        threshold (float): IoU threshold for determining overlap (0.0 to 1.0)
+        device (torch.device): PyTorch device for tensor operations (CPU or GPU)
+        strategy (str): Strategy for removing overlaps. Options: 'highest-score', 
+            'left-hand', 'right-hand'. Defaults to 'highest-score'.
+            
+    Returns:
+        tuple: (src_filtered, dst_filtered) - DataFrames containing the filtered
+            predictions for source and destination images respectively
+            
+    Raises:
+        ValueError: If an unknown strategy is specified
+        
+    Note:
+        For the 'highest-score' strategy, the function uses PyTorch's NMS implementation
+        which requires the predictions to have a 'score' column. For geometric strategies,
+        the function uses GeoPandas spatial operations which require 'box_id' columns.
+    """
+    if strategy == "highest-score":
+        dst_and_aligned_predictions = pd.concat([aligned_predictions, dst_predictions], ignore_index=True)
+        boxes = torch.tensor(dst_and_aligned_predictions[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float).to(device)
+        scores = torch.tensor(dst_and_aligned_predictions['score'].values, dtype=torch.float).to(device)
+        
+        keep_indices = torchvision.ops.nms(boxes, scores, threshold)
+        indices_to_keep = dst_and_aligned_predictions.iloc[keep_indices.cpu()]
+        
+        src_filtered = src_predictions[src_predictions.box_id.isin(indices_to_keep.box_id)]
+        dst_filtered = dst_predictions[dst_predictions.box_id.isin(indices_to_keep.box_id)]
+    else:
+        aligned_predictions["geometry"] = aligned_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
+        dst_predictions["geometry"] = dst_predictions.apply(lambda row: box(row['xmin'], row['ymin'], row['xmax'], row['ymax']), axis=1)
+        aligned_gdf = gpd.GeoDataFrame(aligned_predictions, geometry="geometry")
+        dst_gdf = gpd.GeoDataFrame(dst_predictions, geometry='geometry')
+
+        joined = gpd.sjoin(aligned_gdf, dst_gdf, how='inner', predicate='intersects')
+
+        if strategy == "left-hand":
+            src_indices_to_keep = src_predictions.box_id
+            dst_indices_to_keep = dst_predictions[~dst_predictions.box_id.isin(joined.box_id_right)].box_id
+        elif strategy == "right-hand":
+            src_indices_to_keep = src_predictions[~src_predictions.box_id.isin(joined.box_id_left)].box_id
+            dst_indices_to_keep = dst_predictions.box_id
+        else:
+            raise ValueError(f"Unknown strategy: {strategy}. Choose from 'highest-score', 'left-hand', 'right-hand'.")
+
+        src_filtered = src_predictions[src_predictions.box_id.isin(src_indices_to_keep)]
+        dst_filtered = dst_predictions[dst_predictions.box_id.isin(dst_indices_to_keep)]
+
+    return src_filtered, dst_filtered
+
+def align_and_delete(matching_h5_file, predictions, device, threshold=0.325, strategy='highest-score'):
+    """
+    Main function for removing double-counting across multiple overlapping images.
+    
+    This function implements the complete double-counting removal pipeline by processing
+    all pairs of images in the dataset. For each pair, it computes geometric alignment
+    and removes overlapping predictions using the specified strategy.
+    
+    **Algorithm Overview:**
+    
+    1. **Pairwise Processing**: Processes all unique pairs of images (N*(N-1)/2 pairs)
+    2. **Homography Estimation**: Computes transformation matrix between each image pair
+    3. **Prediction Alignment**: Transforms predictions from source to destination coordinates
+    4. **Overlap Resolution**: Removes overlapping detections using chosen strategy
+    5. **Iterative Refinement**: Updates predictions after each pair processing
+    
+    **Performance Considerations:**
+    
+    - Computational complexity: O(N²) where N is the number of images
+    - Memory usage scales with the number of predictions per image
+    - GPU acceleration is used for NMS operations when available
+    
+    Args:
+        matching_h5_file (str): Path to HDF5 file containing feature matches between images
+        predictions (pandas.DataFrame): Initial predictions from all images with columns:
+            ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path']
+        device (torch.device): PyTorch device for tensor operations
+        threshold (float, optional): IoU threshold for overlap detection. Defaults to 0.325
+        strategy (str, optional): Strategy for removing overlaps. Options: 'highest-score',
+            'left-hand', 'right-hand'. Defaults to 'highest-score'
+            
+    Returns:
+        pandas.DataFrame: Final predictions with double-counting removed. Contains the same
+            columns as input plus a 'box_id' column for tracking.
+            
+    Raises:
+        ValueError: If fewer than 2 images are provided
+        ValueError: If homography computation fails for image pairs
+        
+    Note:
+        The function processes images in sorted order by name for consistent results.
+        Images that cannot be geometrically aligned are skipped with a warning.
+        The threshold parameter should be tuned based on the expected overlap between images.
+    """
+    image_names = sorted(predictions.image_path.unique())
+    if len(image_names) < 2:
+        return predictions
+
+    predictions["box_id"] = range(len(predictions))
+    filtered_predictions = {name: predictions[predictions.image_path == name] for name in image_names}
+    
+    num_pairs = len(image_names) * (len(image_names) - 1) // 2
+    pair_count = 0
+
+    for i in range(len(image_names)):
+        for j in range(i + 1, len(image_names)):
+            src_image_name, dst_image_name = image_names[i], image_names[j]
+            pair_count += 1
+            print(f"Processing Pair {pair_count}/{num_pairs}: ({src_image_name}, {dst_image_name})")
+
+            try:
+                homography = compute_homography_matrix(h5_file=matching_h5_file, image1_name=src_image_name, image2_name=dst_image_name)
+            except ValueError as e:
+                print(f"Skipping pair, could not compute homography: {e}")
+                continue
+
+            src_preds, dst_preds = filtered_predictions[src_image_name], filtered_predictions[dst_image_name]
+            
+            if src_preds.empty or dst_preds.empty:
+                continue
+
+            aligned_src_preds = align_predictions(predictions=src_preds, homography_matrix=homography["H"])
+            
+            src_filtered, dst_filtered = remove_predictions(
+                src_predictions=src_preds,
+                dst_predictions=dst_preds,
+                aligned_predictions=aligned_src_preds,
+                threshold=threshold,
+                device=device,
+                strategy='left-hand'
+            )
+            
+            filtered_predictions[src_image_name] = src_filtered
+            filtered_predictions[dst_image_name] = dst_filtered
+            
+    return pd.concat(filtered_predictions.values()).drop_duplicates(subset="box_id")
+
+def create_sfm_model(image_dir, output_path, references, overwrite=False):
+    """
+    Generate Structure-from-Motion (SfM) feature files needed for geometric matching.
+    
+    This function creates the essential SfM infrastructure required for the double-counting
+    removal algorithm. It extracts distinctive features from images and establishes matches
+    between overlapping image pairs using state-of-the-art computer vision techniques.
+    
+    **Process Overview:**
+    
+    1. **Feature Extraction**: Uses DISK descriptors to extract robust, scale-invariant
+       features from each image. DISK is particularly effective for aerial imagery.
+       
+    2. **Pair Generation**: Creates exhaustive pairs between all images for matching.
+       This ensures all possible overlaps are considered.
+       
+    3. **Feature Matching**: Uses LightGlue matcher to establish correspondences between
+       feature points in overlapping image pairs. LightGlue provides high-quality matches
+       with good robustness to viewpoint changes.
+    
+    **Output Files:**
+    
+    - `features.h5`: Contains extracted feature points and descriptors for each image
+    - `matches.h5`: Contains feature matches between all image pairs
+    - `pairs-sfm.txt`: Lists all image pairs to be matched
+    
+    Args:
+        image_dir (pathlib.Path): Directory containing input images
+        output_path (pathlib.Path): Directory where SfM files will be saved
+        references (list): List of image filenames to process
+        overwrite (bool, optional): If True, overwrite existing SfM files.
+            If False, skip processing if files already exist. Defaults to False.
+            
+    Raises:
+        FileNotFoundError: If image files cannot be found
+        RuntimeError: If feature extraction or matching fails
+        
+    Note:
+        This function requires the hloc library and can be computationally intensive
+        for large images or many images. Processing time scales roughly quadratically
+        with the number of images due to exhaustive pairwise matching.
+        
+        The DISK+LightGlue combination is chosen for its effectiveness on aerial imagery
+        and robustness to illumination changes, which are common in drone surveys.
+    """
+    feature_conf = extract_features.confs["disk"]
+    matcher_conf = match_features.confs["disk+lightglue"]
+    
+    sfm_pairs, features, matches = output_path / 'pairs-sfm.txt', output_path / 'features.h5', output_path / 'matches.h5'
+    
+    extract_features.main(conf=feature_conf, image_dir=image_dir, image_list=references, feature_path=features, overwrite=overwrite)
+    pairs_from_exhaustive.main(sfm_pairs, image_list=references)
+    match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches, overwrite=overwrite)
\ No newline at end of file
diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 3d53375cf..f98a24a59 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -2,8 +2,11 @@
 import importlib
 import os
 import warnings
+from pathlib import Path
 
+import cv2
 import geopandas as gpd
+import matplotlib.pyplot as pyplot
 import numpy as np
 import pandas as pd
 import pytorch_lightning as pl
@@ -19,6 +22,7 @@
 from deepforest import evaluate as evaluate_iou
 from deepforest import predict, utilities
 from deepforest.datasets import prediction, training
+from evaluate import create_sfm_model, align_and_delete
 
 
 class deepforest(pl.LightningModule):
@@ -1071,3 +1075,118 @@ def __evaluation_logs__(self, results):
                         self.log(key, value)
                     except MisconfigurationException:
                         pass
+
+    def predict_unique(image_dir, save_dir, strategy='highest-score', visualization=True):
+        """
+        High-level function to get unique predictions from a directory of overlapping images.
+
+        Args:
+            image_dir (str): Path to the directory containing input images.
+                Supported formats: .tif, .png, .jpg
+            save_dir (str): Path to a directory for saving intermediate SfM files.
+                Will be created if it doesn't exist.
+            strategy (str, optional): The strategy for deduplication. 
+                Options: 'highest-score', 'left-hand', 'right-hand'. 
+                Defaults to 'highest-score'.
+            visualization (bool, optional): If True, shows a plot comparing original 
+                and final predictions. Blue boxes = original, Pink boxes = final.
+                Defaults to True.
+
+        Returns:
+            pandas.DataFrame: A DataFrame containing the final, deduplicated predictions
+            with columns: ['xmin', 'ymin', 'xmax', 'ymax', 'score', 'label', 'image_path']
+            
+        Note:
+            This function requires additional dependencies: pycolmap, hloc, torchvision.
+            The SfM feature extraction can be computationally intensive for large images.
+        """
+        # 1. SETUP
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {device}")
+
+        model = deepforest()
+        model.use_release()
+        model.to(device)
+        
+        # Ensure the save directory exists
+        os.makedirs(save_dir, exist_ok=True)
+
+        image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.tif', '.png', '.jpg'))])
+        
+        print(f"Found {len(image_files)} images to process: {image_files}")
+        if not image_files:
+            raise FileNotFoundError(f"No images found in directory: {image_dir}")
+
+        # --- 2. PRE-PROCESSING: CREATE SFM FEATURES ---
+        print("\nStep 1: Creating SfM features...")
+        create_sfm_model(
+            image_dir=Path(image_dir),
+            output_path=Path(save_dir),
+            references=image_files,
+            overwrite=True
+        )
+        print("SfM features created.")
+
+        # 3. PREDICTION: GET INITIAL BOXES
+        print("\nStep 2: Running prediction on all images...")
+        all_predictions = []
+        for image_file in image_files:
+            print(f"Predicting on: {image_file}")
+            image_path = os.path.join(image_dir, image_file)
+            preds = model.predict_image(path=image_path, return_plot=False)
+            if preds is not None and not preds.empty:
+                preds["image_path"] = os.path.basename(image_file)
+                all_predictions.append(preds)
+
+        if not all_predictions:
+            raise ValueError("No predictions were made on any images. Cannot proceed.")
+            
+        predictions = pd.concat(all_predictions, ignore_index=True)
+        print(f"Found {len(predictions)} total predictions before filtering.")
+
+        # 4. DEDUPLICATION
+        print("\nStep 3: Resolving overlaps using SfM...")
+        matching_file = os.path.join(save_dir, "matches.h5")
+
+        final_predictions = align_and_delete(
+            predictions=predictions,
+            matching_h5_file=matching_file,
+            device=device,
+            strategy=strategy
+        )
+        print(f"Overlap resolution complete. Final unique predictions: {len(final_predictions)}")
+
+        # 5. VISUALIZATION
+        if visualization and not final_predictions.empty:
+            print("\nStep 4: Generating plots...")
+            num_images = len(image_files)
+            # Adjust subplot grid to fit all images
+            cols = int(np.ceil(np.sqrt(num_images)))
+            rows = int(np.ceil(num_images / cols))
+            fig, axs = pyplot.subplots(rows, cols, figsize=(cols * 8, rows * 8))
+            axs = axs.flatten()
+
+            for i, image_path in enumerate(image_files):
+                full_image_path = os.path.join(image_dir, image_path)
+                image = cv2.imread(full_image_path)
+                
+                original_image_predictions = predictions[predictions["image_path"] == image_path]
+                for _, row in original_image_predictions.iterrows():
+                    cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (255, 0, 0), 7) # Blue for original
+                
+                final_image_predictions_plot = final_predictions[final_predictions["image_path"] == image_path]
+                for _, row in final_image_predictions_plot.iterrows():
+                    cv2.rectangle(image, (int(row["xmin"]), int(row["ymin"])), (int(row["xmax"]), int(row["ymax"])), (182, 192, 255), 5) # Pink for final
+                    
+                axs[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+                axs[i].set_title(f"Final predictions for {image_path}")
+                axs[i].axis('off')
+            
+            # Hide any unused subplots
+            for j in range(i + 1, len(axs)):
+                axs[j].axis('off')
+
+            pyplot.tight_layout()
+            pyplot.show()
+
+        return final_predictions