@@ -61,10 +61,16 @@ def to_cpu(x): return todevice(x, 'cpu')
6161def to_cuda (x ): return todevice (x , 'cuda' )
6262
6363def dms_to_decimal (d , m , s , ref ):
64+ '''
65+ Convert degrees, minutes, seconds to decimal degrees.
66+ '''
6467 dd = d + m / 60 + s / 3600
6568 return - dd if ref in ['S' , 'W' ] else dd
6669
6770def get_gps_from_exif (image_path ):
71+ '''
72+ Get GPS information from an image file.
73+ '''
6874 img = Image .open (image_path )
6975 exif = img ._getexif ()
7076 if not exif :
@@ -80,6 +86,9 @@ def get_gps_from_exif(image_path):
8086 return gps_info
8187
8288def parse_gps_info (image_path ):
89+ '''
90+ Parse GPS information from an image file.
91+ '''
8392 gps_info = get_gps_from_exif (image_path )
8493 # Latitude
8594 lat_ref = gps_info .get ('GPSLatitudeRef' , 'N' )
@@ -111,6 +120,9 @@ def parse_gps_info(image_path):
111120 }
112121
113122def gps_to_xyz (gps_lookup , crs_from , crs_to ):
123+ '''
124+ Convert GPS coordinates to xyz coordinates.
125+ '''
114126 xyz_lookup = {}
115127 transformer = Transformer .from_crs (crs_from , crs_to , always_xy = True ) # includes altitude
116128 for image_name , (lat , lon , alt ) in gps_lookup .items ():
@@ -125,6 +137,8 @@ def estimate_3d_scale_from_gps(camera_centers, gps_xyz, camera_image_names, min_
125137 Inputs:
126138 camera_centers: (N, 3) array in MASt3r units (arbitrary scale)
127139 gps_xyz: (N, 3) array in meters [x, y, z] from lat/lon/alt
140+ camera_image_names: list of camera image names
141+ min_dist_threshold: minimum distance threshold for valid GPS pairs
128142 Returns:
129143 scale: estimated meters-per-unit scale factor
130144 """
@@ -153,6 +167,19 @@ def estimate_3d_scale_from_gps(camera_centers, gps_xyz, camera_image_names, min_
153167
154168
155169def estimate_scale_ransac (camera_centers , gps_xyz , camera_image_names , threshold = 0.05 , iterations = 1000 , min_dist = 1.0 ):
170+ '''
171+ Estimate scale factor between MASt3r's camera centers and GPS 3D coordinates using RANSAC.
172+
173+ Inputs:
174+ camera_centers: (N, 3) array in MASt3r units (arbitrary scale)
175+ gps_xyz: (N, 3) array in meters [x, y, z] from lat/lon/alt
176+ camera_image_names: list of camera image names
177+ threshold: threshold for inliers
178+ iterations: number of RANSAC iterations
179+ min_dist: minimum distance threshold for valid GPS pairs
180+ Returns:
181+ scale: estimated meters-per-unit scale factor
182+ '''
156183 scales = []
157184 pairs = []
158185
@@ -191,11 +218,17 @@ def estimate_scale_ransac(camera_centers, gps_xyz, camera_image_names, threshold
191218 return best_scale , len (best_inliers ), len (scales )
192219
193220def extract_image_names (image_paths ):
221+ '''
222+ Extract image names from a list of image paths.
223+ '''
194224 return [path .split ('/' )[- 1 ] for path in image_paths ]
195225
196226
197227
198228def collect_gps_data (data_folder ):
229+ '''
230+ Collect GPS data with extra metadata from a folder of images.
231+ '''
199232 records = []
200233 for fname in sorted (os .listdir (data_folder )):
201234 if fname .lower ().endswith (('.jpg' , '.jpeg' , '.png' )):
@@ -218,6 +251,9 @@ def collect_gps_data(data_folder):
218251
219252
220253def run_mast3r (args ):
254+ '''
255+ Run MASt3R on a folder of images.
256+ '''
221257 if args .weights is not None :
222258 weights_path = args .weights
223259 else :
@@ -246,22 +282,14 @@ def add_parse_args(parser, is_scene_path=False):
246282 parser .add_argument ('--crs_from' , type = str , required = False , default = 'EPSG:4979' , help = 'EPSG code of the input CRS' )
247283 parser .add_argument ('--crs_to' , type = str , required = False , default = 'EPSG:32616' , help = 'EPSG code of the output CRS' )
248284 if not is_scene_path :
249- # parser.add_argument('--retrieval_model', type=str, required=False, default=None, help='Retrieval model weights path that is used to make image pairs')
250- # parser.add_argument('--device', type=str, required=False, default='cuda:0', help='Device to run the model on')
251- # parser.add_argument('--silent', type=bool, required=True, help='Whether to run the model silently')
252- # parser.add_argument('--image_size', type=int, required=True, help='Image size')
253285 parser .add_argument ('--optim_level' , type = str , required = False , default = 'refine+depth' , choices = ['coarse' , 'refine' , 'refine+depth' ], help = 'Optimization level' )
254286 parser .add_argument ('--lr1' , type = float , required = False , default = 0.07 , help = 'Learning rate for the first refinement iteration stage' )
255287 parser .add_argument ('--niter1' , type = int , required = False , default = 300 , help = 'Number of iterations for the first refinement iteration stage' )
256288 parser .add_argument ('--lr2' , type = float , required = False , default = 0.01 , help = 'Learning rate for the second refinement iteration stage' )
257289 parser .add_argument ('--niter2' , type = int , required = False , default = 300 , help = 'Number of iterations for the second refinement iteration stage' )
258290 parser .add_argument ('--min_conf_thr' , type = float , required = False , default = 1.5 , help = 'Minimum confidence threshold' )
259291 parser .add_argument ('--matching_conf_thr' , type = float , required = False , default = 0. , help = 'Matching confidence threshold' )
260- # parser.add_argument('--as_pointcloud', type=bool, required=True, help='Whether to output a pointcloud')
261- # parser.add_argument('--mask_sky', type=bool, required=True, help='Whether to mask the sky')
262292 parser .add_argument ('--clean_depth' , type = bool , required = False , default = True , help = 'Whether to clean the depth' )
263- # parser.add_argument('--transparent_cams', type=bool, required=True, help='Whether to make the cameras transparent')
264- # parser.add_argument('--cam_size', type=float, required=True, help='Camera size')
265293
266294 available_scenegraph_type = [
267295 ("complete: all possible image pairs" , "complete" ),
@@ -295,6 +323,9 @@ def add_parse_args(parser, is_scene_path=False):
295323 return parser
296324
297325def scale_pointcloud_based_on_geotag ():
326+ '''
327+ Scale a pointcloud based on GPS data. If no scene file is provided, MASt3R will be run to generate a scene file.
328+ '''
298329 parser = argparse .ArgumentParser ()
299330
300331 # Add known args
@@ -316,9 +347,11 @@ def scale_pointcloud_based_on_geotag():
316347 with open (args .scene_path , 'rb' ) as f :
317348 data = pickle .load (f )
318349
319-
350+ # Get camera centers
320351 cam2w = data .get_im_poses ()
321352 camera_centers = cam2w [:, :3 , 3 ] # Extract translation component from [R|t]
353+
354+ # Collect GPS data
322355 df = collect_gps_data (args .folder_path )
323356 image_gps_data = df .to_numpy ()
324357 gps_lookup = {
@@ -327,6 +360,8 @@ def scale_pointcloud_based_on_geotag():
327360 }
328361 image_names = extract_image_names (data .img_paths )
329362 xyz_lookup = gps_to_xyz (gps_lookup , args .crs_from , args .crs_to )
363+
364+ # Estimate scale
330365 scale = 1.0
331366 if args .scale_method == 'ransac' :
332367 scale , sfm_dists , gps_dists = estimate_scale_ransac (camera_centers .cpu ().numpy (), xyz_lookup , image_names )
@@ -335,6 +370,8 @@ def scale_pointcloud_based_on_geotag():
335370 else :
336371 raise ValueError (f"Invalid scale method: { args .scale_method } " )
337372 print (f"Estimated scale: { scale } " )
373+
374+ # Convert scene output to PLY
338375 convert_scene_output_to_ply (args .output_path , data , scale = scale , apply_y_flip = False , min_conf_thr = args .min_conf_thr , clean = args .clean_depth , TSDF_thresh = args .TSDF_thresh )
339376
340377if __name__ == "__main__" :
0 commit comments