From 711cca054b3375441b8a85735b706e18daa5fd28 Mon Sep 17 00:00:00 2001 From: "Subhransu Sekhar Bhattacharjee (Rudra)" Date: Thu, 18 Jul 2024 13:05:47 +1000 Subject: [PATCH 1/5] Update depth_to_pointcloud.py --- metric_depth/depth_to_pointcloud.py | 96 +++++++++++++++++------------ 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/metric_depth/depth_to_pointcloud.py b/metric_depth/depth_to_pointcloud.py index 9b81cbbf..a20ec64b 100644 --- a/metric_depth/depth_to_pointcloud.py +++ b/metric_depth/depth_to_pointcloud.py @@ -1,6 +1,7 @@ -# Born out of Depth Anything V1 Issue 36 +# Born out of Depth Anything V1 Issue 36: Code by @1ssb # Make sure you have the necessary libraries -# Code by @1ssb +# Note that this code is meant for batch processing, to make individual predictions on different parameters, rewrite the loop execution +# Load the images you want to perform inference on in the input_images directory import argparse import cv2 @@ -13,28 +14,27 @@ from depth_anything_v2.dpt import DepthAnythingV2 - -if __name__ == '__main__': +def parse_arguments(): parser = argparse.ArgumentParser() + + # Model Parameters parser.add_argument('--encoder', default='vitl', type=str, choices=['vits', 'vitb', 'vitl', 'vitg']) - parser.add_argument('--load-from', default='', type=str) - parser.add_argument('--max-depth', default=20, type=float) + parser.add_argument('--load-from', default='checkpoints/depth_anything_v2_metric_hypersim_vitl.pth', type=str) + parser.add_argument('--max-depth', default=10, type=float) - parser.add_argument('--img-path', type=str) + # I/O Information + parser.add_argument('--img-path', default='./input_images', type=str) parser.add_argument('--outdir', type=str, default='./vis_pointcloud') - args = parser.parse_args() - - # Global settings - FL = 715.0873 - FY = 784 * 0.6 - FX = 784 * 0.6 - NYU_DATA = False - FINAL_HEIGHT = 518 - FINAL_WIDTH = 518 - - DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' - + # Inference Parameters + parser.add_argument('--focal-length-x', default=470.4, type=float, help='Focal length along the x-axis.') + parser.add_argument('--focal-length-y', default=470.4, type=float, help='Focal length along the y-axis.') + parser.add_argument('--final_width', default=360, type=float, help='Final Width of the images.') + parser.add_argument('--final_height', default=640, type=float, help='Final Height of the images.') + + return parser.parse_args() + +def initialize_model(args, DEVICE): model_configs = { 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, @@ -46,38 +46,54 @@ depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu')) depth_anything = depth_anything.to(DEVICE).eval() - if os.path.isfile(args.img_path): - if args.img_path.endswith('txt'): - with open(args.img_path, 'r') as f: + return depth_anything + +def get_filenames(img_path): + if os.path.isfile(img_path): + if img_path.endswith('txt'): + with open(img_path, 'r') as f: filenames = f.read().splitlines() else: - filenames = [args.img_path] + filenames = [img_path] else: - filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) - - os.makedirs(args.outdir, exist_ok=True) + filenames = glob.glob(os.path.join(img_path, '**/*'), recursive=True) + return filenames + +def process_images(filenames, depth_anything, args, DEVICE): + FX, FY = args.focal_length_x, args.focal_length_y + H, W = args.final_height, args.final_width for k, filename in enumerate(filenames): print(f'Progress {k+1}/{len(filenames)}: {filename}') - color_image = Image.open(filename).convert('RGB') - image = cv2.imread(filename) - pred = depth_anything.infer_image(image, FINAL_HEIGHT) + pred = depth_anything.infer_image(image, H) - # Resize color image and depth to final size - resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS) - resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST) + resized_color_image = color_image.resize((W, H), Image.LANCZOS) + resized_pred = Image.fromarray(pred).resize((W, H), Image.NEAREST) - focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL) - x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT)) - x = (x - FINAL_WIDTH / 2) / focal_length_x - y = (y - FINAL_HEIGHT / 2) / focal_length_y + focal_length_x, focal_length_y = (FX, FY) + x, y = np.meshgrid(np.arange(W), np.arange(H)) + x = (x - W / 2) / focal_length_x + y = (y - H / 2) / focal_length_y z = np.array(resized_pred) points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3) colors = np.array(resized_color_image).reshape(-1, 3) / 255.0 - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(points) - pcd.colors = o3d.utility.Vector3dVector(colors) - o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd) \ No newline at end of file + save_point_cloud(points, colors, args.outdir, filename) + +def save_point_cloud(points, colors, outdir, filename): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + pcd.colors = o3d.utility.Vector3dVector(colors) + o3d.io.write_point_cloud(os.path.join(outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd) + +if __name__ == '__main__': + args = parse_arguments() + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + depth_anything = initialize_model(args, DEVICE) + filenames = get_filenames(args.img_path) + os.makedirs(args.outdir, exist_ok=True) + process_images(filenames, depth_anything, args, DEVICE) From 25bfe515b88507622ee341ad5859605e7e6ca812 Mon Sep 17 00:00:00 2001 From: "Subhransu Sekhar Bhattacharjee (Rudra)" Date: Thu, 18 Jul 2024 13:06:29 +1000 Subject: [PATCH 2/5] Create metric_checkpoint_downloader.py --- metric_depth/metric_checkpoint_downloader.py | 59 ++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 metric_depth/metric_checkpoint_downloader.py diff --git a/metric_depth/metric_checkpoint_downloader.py b/metric_depth/metric_checkpoint_downloader.py new file mode 100644 index 00000000..27594a98 --- /dev/null +++ b/metric_depth/metric_checkpoint_downloader.py @@ -0,0 +1,59 @@ +import os +import requests +import argparse +from tqdm import tqdm + +def download_file(url, local_filename): + try: + response = requests.get(url, stream=True) + response.raise_for_status() + total_size = int(response.headers.get('content-length', 0)) + + with open(local_filename, 'wb') as file: + for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True): + file.write(data) + + print(f"File downloaded successfully: {local_filename}") + except requests.exceptions.HTTPError as http_err: + print(f"HTTP error occurred: {http_err}") + except Exception as err: + print(f"Other error occurred: {err}") + +def main(): + parser = argparse.ArgumentParser(description="Download checkpoint files.") + parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"], + default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.") + parser.add_argument("--environment", "-e", choices=["indoor", "outdoor", "i", "o"], + required=True, help="Specify the environment type for the model (indoor/i or outdoor/o).") + args = parser.parse_args() + + size_mapping = { + "l": "large", + "s": "small", + "b": "base" + } + environment_mapping = { + "i": "indoor", + "o": "outdoor" + } + + normalized_size = size_mapping.get(args.size, args.size) + normalized_environment = environment_mapping.get(args.environment, args.environment) + + urls = { + ("indoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true", + ("indoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Small/resolve/main/depth_anything_v2_metric_hypersim_vits.pth?download=true", + ("indoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Base/resolve/main/depth_anything_v2_metric_hypersim_vitb.pth?download=true", + ("outdoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Large/resolve/main/depth_anything_v2_metric_vkitti_vitl.pth?download=true", + ("outdoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Small/resolve/main/depth_anything_v2_metric_vkitti_vits.pth?download=true", + ("outdoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Base/resolve/main/depth_anything_v2_metric_vkitti_vitb.pth?download=true" + } + + url = urls[(normalized_environment, normalized_size)] + local_filename = os.path.join("checkpoints", url.split('/')[-1].split("?")[0]) + + os.makedirs("checkpoints", exist_ok=True) + download_file(url, local_filename) + +if __name__ == "__main__": + main() From 363506adbc17cd016e40f863c741ed819f11650f Mon Sep 17 00:00:00 2001 From: "Subhransu Sekhar Bhattacharjee (Rudra)" Date: Thu, 18 Jul 2024 13:07:05 +1000 Subject: [PATCH 3/5] Create checkpoint_downloader.py --- checkpoint_downloader.py | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 checkpoint_downloader.py diff --git a/checkpoint_downloader.py b/checkpoint_downloader.py new file mode 100644 index 00000000..bf5bcaa2 --- /dev/null +++ b/checkpoint_downloader.py @@ -0,0 +1,54 @@ +import os +import requests +import argparse +from tqdm import tqdm + +def download_file(url, local_filename): + try: + response = requests.get(url, stream=True) + response.raise_for_status() + + total_size = int(response.headers.get('content-length', 0)) + + with open(local_filename, 'wb') as file: + for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True): + file.write(data) + + print(f"File downloaded successfully: {local_filename}") + except requests.exceptions.HTTPError as http_err: + print(f"HTTP error occurred: {http_err}") + except Exception as err: + print(f"Other error occurred: {err}") + +def main(): + parser = argparse.ArgumentParser(description="Download checkpoint files.") + parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"], + default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.") + args = parser.parse_args() + + urls = { + "large": "https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true", + "small": "https://huggingface.co/depth-anything/Depth-Anything-V2-Small/resolve/main/depth_anything_v2_vits.pth?download=true", + "base": "https://huggingface.co/depth-anything/Depth-Anything-V2-Base/resolve/main/depth_anything_v2_vitb.pth?download=true" + } + + size_mapping = { + "l": "large", + "s": "small", + "b": "base" + } + + # Normalize the input size to its full form (e.g., "l" to "large") + normalized_size = size_mapping.get(args.size, args.size) # Retrieve full form or use the default if mapping not found + + # Get the URL for the specified size + url = urls[normalized_size] + + checkpoints_dir = "checkpoints" + local_filename = os.path.join(checkpoints_dir, f"depth_anything_v2_vit{normalized_size[0]}.pth") # Using the first letter of the full size form + + os.makedirs(checkpoints_dir, exist_ok=True) + download_file(url, local_filename) + +if __name__ == "__main__": + main() From b7f0704ed063fca557e47f5e69dd9b6ddd34e4df Mon Sep 17 00:00:00 2001 From: "Subhransu Sekhar Bhattacharjee (Rudra)" Date: Thu, 18 Jul 2024 13:07:24 +1000 Subject: [PATCH 4/5] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index e1c80e89..2b320ff2 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,11 @@ We provide **four models** of varying scales for robust relative depth estimatio | Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true) | | Depth-Anything-V2-Giant | 1.3B | Coming soon | +You may use the script to directly download as well, using the following instructions: + +``` +python checkpoint_downloader.py --size [small/s/base/b/large/l] +``` ## Usage @@ -173,6 +178,7 @@ We are sincerely grateful to the awesome Hugging Face team ([@Pedro Cuenca](http We also thank the [DINOv2](https://github.com/facebookresearch/dinov2) team for contributing such impressive models to our community. +The test.jpg image is from [here](https://rankcomfort.com/top-digital-cameras-for-travel-photography-2024/). ## LICENSE From d42daa19e10c405b67202bfad0aa32339fb996cd Mon Sep 17 00:00:00 2001 From: "Subhransu Sekhar Bhattacharjee (Rudra)" Date: Thu, 18 Jul 2024 13:07:57 +1000 Subject: [PATCH 5/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b320ff2..8689df14 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ We provide **four models** of varying scales for robust relative depth estimatio You may use the script to directly download as well, using the following instructions: -``` +```python python checkpoint_downloader.py --size [small/s/base/b/large/l] ```