diff --git a/.gitignore b/.gitignore index d0aba0d..cbf775d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,8 @@ co3d_masks jay_work_in_progress/ jay_imagenet_train_04_30_2025_dimensions.npy jay_imagenet_train_0.1_dimensions.npy -*.png \ No newline at end of file +*.png +clickme_datasets/ +*.csv +temp/ +logs/ \ No newline at end of file diff --git a/ceiling_floor_estimate.py b/ceiling_floor_estimate.py index e8d0f1e..48a9197 100644 --- a/ceiling_floor_estimate.py +++ b/ceiling_floor_estimate.py @@ -1,4 +1,5 @@ import os, sys +import random import numpy as np from PIL import Image import json @@ -11,11 +12,84 @@ import gc import torch from joblib import Parallel, delayed -from scipy.stats import spearmanr +from scipy.stats import spearmanr, pearsonr, rankdata, wasserstein_distance_nd +from scipy.spatial.distance import cosine # import resource # Add resource module for file descriptor limits from sklearn.metrics import average_precision_score from torchvision.transforms import functional as tvF +from torchvision.transforms import InterpolationMode +# def emd_2d(test_map, ref_map): +# test_map = (test_map - test_map.min()) / (test_map.max() - test_map.min()+1e-8) +# ref_map = (ref_map - ref_map.min()) / (ref_map.max() - ref_map.min()+1e-8) +# return wasserstein_distance_nd(test_map, ref_map) + +def emd_2d(ref_map, test_map, eps=0.2, iters=): + """ + Sinkhorn EMD between standardized positive maps. Returns [B]. + """ + if s_map.ndim == 4: s_map = s_map.squeeze(1) + if c_map.ndim == 4: c_map = c_map.squeeze(1) + s = zplus(s_map).squeeze(1).clamp_min(0) + c = zplus(c_map).squeeze(1).clamp_min(0) + + B, H, W = s.shape + s = s.view(B, -1); c = c.view(B, -1) + s = s / (s.sum(dim=1, keepdim=True).clamp_min(1e-8)) + c = c / (c.sum(dim=1, keepdim=True).clamp_min(1e-8)) + + M = _grid_cost(H, W, s.device).unsqueeze(0).expand(B, -1, -1) + return _sinkhorn(s, c, M, eps=eps, iters=iters) + +def _sinkhorn(a, b, M, eps, iters): + """ + a,b: [B,P] prob vectors; M: [B,P,P] ground cost. + Returns: [B] transport cost. + """ + K = torch.exp(-M / eps) + u = torch.full_like(a, 1.0 / a.size(1)) + v = torch.full_like(b, 1.0 / b.size(1)) + for _ in range(iters): + u = a / (K @ v).clamp_min(1e-8) + v = b / (K.transpose(1,2) @ u).clamp_min(1e-8) + P = torch.diag_embed(u) @ K @ torch.diag_embed(v) + return (P * M).sum(dim=(1,2)) + +def rank_cosine(test_map, ref_map): + ref_map = ref_map.flatten() + test_map = test_map.flatten() + + non_zero_pos = np.where(ref_map != 0)[0] + + ref_rank = rankdata(ref_map, method='average') + test_rank = rankdata(test_map, method='average') + + ref_rank = ref_rank[non_zero_pos] + test_rank = test_rank[non_zero_pos] + ref_rank = np.float64(ref_rank) + test_rank = np.float64(test_rank) + + if test_rank.size > 1 and ref_rank.size > 1: + cosine_score = cosine(ref_rank, test_rank) + return cosine_score + else: + return float('nan') + +def rank_pearson(test_map, ref_map): + ref_map = ref_map.flatten() + test_map = test_map.flatten() + + non_zero_pos = np.where(ref_map != 0)[0] + ref_rank = rankdata(ref_map, method='average') + test_rank = rankdata(test_map, method='average') + + ref_rank = ref_rank[non_zero_pos] + test_rank = test_rank[non_zero_pos] + if test_rank.size > 1 and ref_rank.size > 1: + pearson_score = pearsonr(ref_rank, test_rank) + return pearson_score.statistic + else: + return float('nan') def auc(test_map, reference_map, thresholds=10, metric="iou"): """Compute the area under the IOU curve for a test map and a reference map""" @@ -48,7 +122,7 @@ def auc(test_map, reference_map, thresholds=10, metric="iou"): # Return the area under the curve (trapezoidal integration) # We're integrating over normalized threshold range [0,1] - return np.trapezoid(scores, x=thresholds) if len(thresholds) > 1 else np.mean(scores) + return np.trapz(scores, x=thresholds) if len(thresholds) > 1 else np.mean(scores) def rankorder(test_map, reference_map, threshold=0.): @@ -132,7 +206,7 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri target_depth_map = target_data['depth_map'] level_scores = [] for k, clickmap_at_k in enumerate(clickmaps): - if metric == "spearman" and k < (len(clickmaps)-1): + if metric != "auc" and k < (len(clickmaps)-1): continue rand_scores = [] n = len(clickmap_at_k) @@ -141,12 +215,14 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri for iteration in range(n_iterations): test_rand_perm = np.random.permutation(n) fh = test_rand_perm[:(n//2)] + fh = list(fh) + random.choices(fh, k=n//2) test_map = clickmap_at_k[fh].mean(0) if not floor and target_img_name == img_name: target_rand_perm = test_rand_perm else: target_rand_perm = np.random.permutation(target_n) sh = target_rand_perm[(target_n//2):] + sh = list(sh) + random.choices(sh, k=target_n//2) reference_map = target_clickmap_at_k[sh].mean(0) # Save for visualization if k == (len(clickmaps)-1) and iteration == (n_iterations-1): @@ -159,7 +235,7 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri #Project before blurring if target_img_name != img_name: - test_map = utils.project_img_gpu(test_map, depth_map, w2c, target_w2c, Ks, target_Ks, device=device) + test_map = utils.project_img_gpu(test_map, depth_map, target_depth_map, w2c, target_w2c, Ks, target_Ks, device=device) blur_clickmaps = utils.blur_maps_for_cf( np.stack((test_map, reference_map), axis=0)[None], @@ -177,11 +253,11 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri image_shape = config['image_shape'] center_crop = config['center_crop'] if center_crop: - test_map = torch.tensor(test_map) - reference_map = torch.tensor(reference_map) - test_map = tvF.resize(test_map, min(image_shape)) + test_map = torch.tensor(test_map)[None, :, :] + reference_map = torch.tensor(reference_map)[None, :, :] + test_map = tvF.resize(test_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) test_map = tvF.center_crop(test_map, center_crop) - reference_map = tvF.resize(reference_map, min(image_shape)) + reference_map = tvF.resize(reference_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) reference_map = tvF.center_crop(reference_map, center_crop) test_map = test_map.numpy().squeeze() reference_map = reference_map.numpy().squeeze() @@ -192,10 +268,16 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri score = rankorder(test_map.flatten(), reference_map.flatten()) elif metric == "spearman": score, _ = spearmanr(test_map.flatten(), reference_map.flatten()) - if np.isnan(score): - continue + elif metric == "rank_pearson": + score = rank_pearson(test_map, reference_map) + elif metric == "rank_cosine": + score = rank_cosine(test_map, reference_map) + elif metric == "emd": + score = emd_2d(test_map, reference_map) else: raise ValueError(f"Invalid metric: {metric}") + if np.isnan(score): + continue rand_scores.append(score) # Explicitly free memory @@ -206,7 +288,7 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri else: rand_scores = np.nanmean(np.asarray(rand_scores)) level_scores.append(rand_scores) - gc.collect() + # gc.collect() angle_score = np.nanmean(np.asarray(level_scores)) batch_results[target_img_diff].append(angle_score) all_rotation_results[img_name][target_img_diff] = angle_score @@ -219,7 +301,7 @@ def compute_rotation_correlation_batch(batch_indices, all_data, all_names, metri projected_img = np.asarray(img) if target_img_name != img_name: projected_img = np.moveaxis(projected_img, -1, 0) - projected_img = utils.project_img_gpu(projected_img, depth_map, w2c, target_w2c, Ks, target_Ks, device=device) + projected_img = utils.project_img_gpu(projected_img, depth_map, target_depth_map, w2c, target_w2c, Ks, target_Ks, device=device) projected_img = np.moveaxis(projected_img, 0, -1) title = f"{img_name}_{round(angle_score, 3)}" plot_infos.append({'output_name':image_output_name, 'title':title, @@ -258,7 +340,7 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" level_scores = [] scale_diff = scale / target_scale for k, clickmap_at_k in enumerate(clickmaps): - if metric == "spearman" and k < (len(clickmaps)-1): + if (metric != 'auc') and k < (len(clickmaps)-1): continue rand_scores = [] n = len(clickmap_at_k) @@ -267,6 +349,8 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" for iteration in range(n_iterations): test_rand_perm = np.random.permutation(n) fh = test_rand_perm[:(n // 2)] + fh = random.choices(fh, k=n) + fh = list(fh) + random.choices(fh, k=n//2) # sh = test_rand_perm[(n//2):] test_map = clickmap_at_k[fh].mean(0) if not floor and target_img_name == img_name: @@ -274,6 +358,8 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" else: target_rand_perm = np.random.permutation(target_n) sh = target_rand_perm[(target_n//2):] + sh = list(sh) + random.choices(sh, k=target_n//2) + reference_map = target_clickmap_at_k[sh].mean(0) if k == (len(clickmaps)-1) and iteration == (n_iterations - 1): unscaled_map = test_map.copy() @@ -285,11 +371,16 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" # Scale before blurring, need to scale kernel size if scale_diff != 1: - test_map = utils.sparse_scale(test_map, scale_diff, device).cpu().numpy() + test_map = utils.sparse_scale(test_map, scale_diff, device).cpu().numpy().squeeze() + # if scale_diff > 1: + # reference_map = torch.tensor(reference_map) + # reference_map = tvF.center_crop(reference_map, test_map.shape) + # reference_map = reference_map.numpy() + blur_clickmaps = utils.blur_maps_for_cf( np.stack((test_map, reference_map), axis=0)[None], - int(blur_size/scale_diff), - int(blur_sigma/scale_diff), + int(blur_size), + int(blur_sigma), gpu_batch_size=2).squeeze() test_map = blur_clickmaps[0] @@ -300,12 +391,12 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" if config: image_shape = config['image_shape'] center_crop = config['center_crop'] - if center_crop: - test_map = torch.tensor(test_map) - reference_map = torch.tensor(reference_map) - test_map = tvF.resize(test_map, min(image_shape)) + if center_crop and test_map.shape[-1] > center_crop[-1]: + test_map = torch.tensor(test_map)[None] + reference_map = torch.tensor(reference_map)[None] + test_map = tvF.resize(test_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) test_map = tvF.center_crop(test_map, center_crop) - reference_map = tvF.resize(reference_map, min(image_shape)) + reference_map = tvF.resize(reference_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) reference_map = tvF.center_crop(reference_map, center_crop) test_map = test_map.numpy().squeeze() reference_map = reference_map.numpy().squeeze() @@ -318,6 +409,12 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" score = rankorder(test_map.flatten(), reference_map.flatten()) elif metric == "spearman": score, _ = spearmanr(test_map.flatten(), reference_map.flatten()) + elif metric == "rank_pearson": + score = rank_pearson(test_map, reference_map) + elif metric == "rank_cosine": + score = rank_cosine(test_map, reference_map) + elif metric == "emd": + score = emd_2d(test_map, reference_map) else: raise ValueError(f"Invalid metric: {metric}") rand_scores.append(score) @@ -327,7 +424,7 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" del blur_clickmaps rand_scores = np.nanmean(np.asarray(rand_scores)) level_scores.append(rand_scores) - gc.collect() + # gc.collect() scale_results[img_name][target_img_diff] = np.nanmean(np.asarray(level_scores)) batch_results[target_img_diff].append(np.nanmean(np.asarray(level_scores))) if not floor: @@ -348,59 +445,90 @@ def compute_scale_correlation_batch(batch_indices, all_data, all_names, metric=" 'reference_map':reference_map, 'test_map': test_map}) return batch_results, scale_results, plot_infos -def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="auc", n_iterations=10, device='cuda', blur_size=11, blur_sigma=1.5, floor=False, config=None): +def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="auc", n_iterations=10, device='cuda', blur_size=11, blur_sigma=1.5, floor=False, config=None, metadata=None): """Compute split-half correlations for a batch of clickmaps in parallel""" batch_results = [] all_scores = {} + max_kernel_size = config.get("max_kernel_size", 51) + blur_sigma_function = config.get("blur_sigma_function", lambda x: x) for i in tqdm(batch_indices, desc="Computing split-half correlations", total=len(batch_indices)): - clickmaps = all_clickmaps[i] img_name = all_names[i] + clickmaps = all_clickmaps[img_name] + level_corrs = [] + #TODO modify for speed up + if metadata and img_name in metadata: + native_size = metadata[img_name] + short_side = min(native_size) + scale = short_side / min(clickmaps[-1].shape[-2:]) + adj_blur_size = int(np.round(blur_size * scale)) + if not adj_blur_size % 2: + adj_blur_size += 1 + adj_blur_size = min(adj_blur_size, max_kernel_size) + adj_blur_sigma = blur_sigma_function(adj_blur_size) + else: + adj_blur_size = blur_size + adj_blur_sigma = blur_sigma + if floor: - rand_i = np.random.choice([j for j in range(len(all_clickmaps)) if j != i]) - for k, clickmap_at_k in enumerate(clickmaps): + rand_i = np.random.randint(len(all_clickmaps) - 1) + if rand_i >= i: + rand_i += 1 + rand_name = all_names[rand_i] + rand_clickmaps = all_clickmaps[rand_name] + + if metadata and rand_name in metadata: + native_size = metadata[rand_name] + short_side = min(native_size) + scale = short_side / min(rand_clickmaps[-1].shape[-2:]) + rand_adj_blur_size = int(np.round(blur_size * scale)) + if not adj_blur_size % 2: + rand_adj_blur_size += 1 + rand_adj_blur_size = min(rand_adj_blur_size, max_kernel_size) + rand_adj_blur_sigma = blur_sigma_function(rand_adj_blur_size) + else: + rand_adj_blur_size = blur_size + rand_adj_blur_sigma = blur_sigma + + for k , clickmap_at_k, in enumerate(clickmaps): + if metric != "auc" and k < (len(clickmaps)-1): + continue rand_corrs = [] n = len(clickmap_at_k) - for _ in range(n_iterations): + if floor: + rand_clickmap_at_k = rand_clickmaps[k] + rand_n = len(rand_clickmap_at_k) + for n_iter in range(n_iterations): rand_perm = np.random.permutation(n) fh = rand_perm[:(n // 2)] - sh = rand_perm[(n // 2):] - + # Add bootstrapping to max fh/sh size to original img + fh = list(fh) + random.choices(fh, k=n//2) # Create the test and reference maps test_map = clickmap_at_k[fh].mean(0) if floor: - rand_perm = np.random.permutation(len(all_clickmaps[rand_i][k])) - sh = rand_perm[(n // 2):] - reference_map = all_clickmaps[rand_i][k][sh].mean(0) # Take maps from the same level in a random other image - #TODO Add adjusted blur size for imagenet images, add resize and center crop - # Ensure reference_map has the same shape as test_map - if reference_map.shape != test_map.shape: - # Resize reference_map to match test_map's shape - reference_map_resized = np.zeros(test_map.shape, dtype=reference_map.dtype) - # Copy the smaller of the dimensions for each axis - min_height = min(reference_map.shape[0], test_map.shape[0]) - min_width = min(reference_map.shape[1], test_map.shape[1]) - reference_map_resized[:min_height, :min_width] = reference_map[:min_height, :min_width] - reference_map = reference_map_resized - + rand_perm = np.random.permutation(rand_n) + sh = rand_perm[(rand_n // 2):] + sh = list(sh) + random.choices(sh, k=rand_n//2) + reference_map = rand_clickmap_at_k[sh].mean(0) # Take maps from the same level in a random other image reference_map = utils.blur_maps_for_cf( reference_map[None, None], - blur_size, - blur_sigma, + rand_adj_blur_size, + rand_adj_blur_sigma, gpu_batch_size=1).squeeze() test_map = utils.blur_maps_for_cf( test_map[None, None], - blur_size, - blur_sigma, + adj_blur_size, + adj_blur_sigma, gpu_batch_size=1).squeeze() else: + sh = rand_perm[(n // 2):] + sh = random.choices(sh, k=n) reference_map = clickmap_at_k[sh].mean(0) - # Make maps for each blur_clickmaps = utils.blur_maps_for_cf( np.stack((test_map, reference_map), axis=0)[None], - blur_size, - blur_sigma, + adj_blur_size, + adj_blur_sigma, gpu_batch_size=2).squeeze() test_map = blur_clickmaps[0] reference_map = blur_clickmaps[1] @@ -408,11 +536,11 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a image_shape = config['image_shape'] center_crop = config['center_crop'] if center_crop: - test_map = torch.tensor(test_map) - reference_map = torch.tensor(reference_map) - test_map = tvF.resize(test_map, min(image_shape)) + test_map = torch.tensor(test_map)[None] + reference_map = torch.tensor(reference_map)[None] + test_map = tvF.resize(test_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) test_map = tvF.center_crop(test_map, center_crop) - reference_map = tvF.resize(reference_map, min(image_shape)) + reference_map = tvF.resize(reference_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) reference_map = tvF.center_crop(reference_map, center_crop) test_map = test_map.numpy().squeeze() reference_map = reference_map.numpy().squeeze() @@ -423,18 +551,23 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a score = rankorder(test_map.flatten(), reference_map.flatten()) elif metric == "spearman": score, _ = spearmanr(test_map.flatten(), reference_map.flatten()) + elif metric == "rank_pearson": + score = rank_pearson(test_map, reference_map) + elif metric == "rank_cosine": + score = rank_cosine(test_map, reference_map) + elif metric == "emd": + score = emd_2d(test_map, reference_map) else: raise ValueError(f"Invalid metric: {metric}") rand_corrs.append(score) - # Explicitly free memory if 'blur_clickmaps' in locals(): del blur_clickmaps - rand_corrs = np.asarray(rand_corrs).mean() # Take the mean of the random correlations + rand_corrs = np.nanmean(np.asarray(rand_corrs)) # Take the mean of the random correlations level_corrs.append(rand_corrs) # Free memory - gc.collect() + # gc.collect() batch_results.append(np.asarray(level_corrs).mean()) # Integrate over the levels all_scores[img_name] = batch_results[-1] return batch_results, all_scores @@ -495,6 +628,9 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a if "constancy" not in config: config["constancy"] = False + + if "max_subjects" not in config: + config["max_subjects"] = -1 if args.metric is not None: config["metric"] = args.metric @@ -517,7 +653,6 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a image_paths = clickme_data['image_path'].unique() total_unique_images = len(image_paths) print(f"Found {total_unique_images} unique images") - # Set up GPU configuration if args.gpu_batch_size: config["gpu_batch_size"] = args.gpu_batch_size @@ -553,14 +688,14 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a image_output_dir = config["example_image_output_dir"] os.makedirs(output_dir, exist_ok=True) os.makedirs(image_output_dir, exist_ok=True) - os.makedirs(os.path.join(output_dir, config["experiment_name"]), exist_ok=True) + # os.makedirs(os.path.join(output_dir, config["experiment_name"]), exist_ok=True) # Create dedicated directory for click counts - click_counts_dir = os.path.join(output_dir, f"{config['experiment_name']}_click_counts") - os.makedirs(click_counts_dir, exist_ok=True) + # click_counts_dir = os.path.join(output_dir, f"{config['experiment_name']}_click_counts") + # os.makedirs(click_counts_dir, exist_ok=True) # Original code for non-HDF5 format - hdf5_path = os.path.join(output_dir, f"{config['experiment_name']}.h5") + hdf5_path = os.path.join(output_dir, f"{config['experiment_name']}_ceiling_metadata.h5") print(f"Saving results to file: {hdf5_path}") with h5py.File(hdf5_path, 'w') as f: f.create_group("clickmaps") @@ -619,7 +754,6 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a min_clicks=config["min_clicks"], max_clicks=config["max_clicks"], n_jobs=config["n_jobs"]) - # Apply filters if necessary if config["class_filter_file"]: print("Filtering classes...") @@ -707,6 +841,7 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a # Process correlation batches in parallel if not config['constancy']: + print(f"number of imgs:{len(final_keep_index)}, number of maps:{len(all_clickmaps)}") ceiling_returns = Parallel(n_jobs=n_jobs, prefer="threads")( delayed(compute_correlation_batch)( batch_indices=batch, @@ -719,6 +854,7 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a blur_sigma=config.get("blur_sigma", config["blur_size"]), floor=False, config=config, + metadata=metadata, ) for batch in tqdm(batches, desc="Computing ceiling batches", total=len(batches)) ) ceiling_results, all_ceilings = zip(*ceiling_returns) @@ -736,6 +872,7 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a blur_sigma=config.get("blur_sigma", config["blur_size"]), floor=True, config=config, + metadata=metadata, ) for batch in tqdm(batches, desc="Computing floor batches", total=len(batches)) ) floor_results, all_floors = zip(*floor_returns) @@ -791,16 +928,9 @@ def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="a all_data = {} for i, name in enumerate(all_names): - clickmap = all_clickmaps[i] + clickmap = all_clickmaps[name] img_idx = int(name.split('.')[0].split('_')[-1]) zoom_level = img_idx % 3 - # if zoom_level != 2: - # target_img_idx = img_idx+(2-zoom_level) - # else: - # target_img_idx = img_idx - # target_img_name = f"{'_'.join(name.split('.')[0].split('_')[:-1])}_{str(target_img_idx).zfill(5)}.png" - # if target_img_name not in all_names: - # continue depth_path = os.path.join(depth_root, f"depth_{name.replace('.png', '.npy')}") depth_map = np.load(depth_path) all_data[name] = {"clickmap": clickmap, "scale":scales_dict[name], "zoom":zoom_level, 'img_idx':img_idx, diff --git a/ceiling_floor_estimate_large.py b/ceiling_floor_estimate_large.py new file mode 100644 index 0000000..37bd5e0 --- /dev/null +++ b/ceiling_floor_estimate_large.py @@ -0,0 +1,653 @@ +import os, sys +import random +import numpy as np +from PIL import Image +import json +import pandas as pd +import argparse +from matplotlib import pyplot as plt +from src import utils +from tqdm import tqdm +import h5py +import gc +import torch +from joblib import Parallel, delayed +from scipy.stats import spearmanr, pearsonr, rankdata, wasserstein_distance_nd +from scipy.spatial.distance import cosine +# import resource # Add resource module for file descriptor limits +from sklearn.metrics import average_precision_score +from torchvision.transforms import functional as tvF +from torchvision.transforms import InterpolationMode + +def emd_2d(test_map, ref_map): + test_map = (test_map - test_map.min()) / (test_map.max() - test_map.min()+1e-8) + ref_map = (ref_map - ref_map.min()) / (ref_map.max() - ref_map.min()+1e-8) + return wasserstein_distance_nd(test_map, ref_map) + +def rank_cosine(test_map, ref_map): + ref_map = ref_map.flatten() + test_map = test_map.flatten() + + non_zero_pos = np.where(ref_map != 0)[0] + + ref_rank = rankdata(ref_map, method='average') + test_rank = rankdata(test_map, method='average') + + ref_rank = ref_rank[non_zero_pos] + test_rank = test_rank[non_zero_pos] + ref_rank = np.float64(ref_rank) + test_rank = np.float64(test_rank) + + if test_rank.size > 1 and ref_rank.size > 1: + cosine_score = cosine(ref_rank, test_rank) + return cosine_score + else: + return float('nan') + +def rank_pearson(test_map, ref_map): + ref_map = ref_map.flatten() + test_map = test_map.flatten() + + non_zero_pos = np.where(ref_map != 0)[0] + ref_rank = rankdata(ref_map, method='average') + test_rank = rankdata(test_map, method='average') + + ref_rank = ref_rank[non_zero_pos] + test_rank = test_rank[non_zero_pos] + if test_rank.size > 1 and ref_rank.size > 1: + pearson_score = pearsonr(ref_rank, test_rank) + return pearson_score.statistic + else: + return float('nan') + +def auc(test_map, reference_map, thresholds=10, metric="iou"): + """Compute the area under the IOU curve for a test map and a reference map""" + scores = [] + + # Normalize each map to [0,1] + test_map = (test_map - test_map.min()) / (test_map.max() - test_map.min()+1e-8) + reference_map = (reference_map - reference_map.min()) / (reference_map.max() - reference_map.min()+1e-8) + + # Create evenly spaced thresholds from 0 to 1 + # if thresholds == 1: + # thresholds = [0] + # else: + # thresholds = np.linspace(0, 1, thresholds) + thresholds = np.arange(0.05, 1., 0.05) + + # Calculate IOU at each threshold pair + for threshold in thresholds: + ref_binary = reference_map > threshold + if metric.lower() == "map": + score = average_precision_score(ref_binary, test_map) + elif metric.lower() == "iou": + test_binary = test_map > threshold + intersection = np.sum(np.logical_and(test_binary, ref_binary)) + union = np.sum(np.logical_or(test_binary, ref_binary)) + score = intersection / union if union > 0 else 0.0 + else: + raise ValueError(f"Invalid metric: {metric}") + scores.append(score) + + # Return the area under the curve (trapezoidal integration) + # We're integrating over normalized threshold range [0,1] + return np.trapz(scores, x=thresholds) if len(thresholds) > 1 else np.mean(scores) + + +def rankorder(test_map, reference_map, threshold=0.): + """ + 1. Rank order the test map. + 2. Binarize the reference map to get a mask of locations that we look at + 3. Average test map ranks within the reference map + + Parameters: + ----------- + test_map : numpy.ndarray + The test map to be rank ordered + reference_map : numpy.ndarray + The reference map to be binarized + threshold : float, optional + Threshold to binarize the reference map, default is 0.5 + + Returns: + -------- + float + The average rank of test map values within the reference map mask + """ + # Normalize the reference map + reference_map = reference_map / reference_map.max() + + # Binarize the reference map to create a mask + mask = reference_map > threshold + + # Get flat indices of non-zero elements in mask + mask_indices = np.where(mask.flatten())[0] + + if mask_indices.size == 0: + return 0.0 # Return 0 if no pixels are in the mask + + # Get the flattened test map + flat_test_map = test_map.flatten() + + # Rank order the test map (higher values get higher ranks) + # First argsort finds positions in sorted order + # Second argsort converts those positions to ranks + # We use flat_test_map directly (not negated) to make higher values = higher ranks + ranks = np.argsort(np.argsort(flat_test_map)) + + # Normalize ranks to [0, 1] where 1 represents the highest value + normalized_ranks = ranks / (len(ranks) - 1) if len(ranks) > 1 else ranks + + # Calculate mean rank within mask + mean_rank = normalized_ranks[mask_indices].mean() + + return mean_rank + + +def compute_correlation_batch(batch_indices, all_clickmaps, all_names, metric="auc", n_iterations=10, device='cuda', blur_size=11, blur_sigma=1.5, floor=False, config=None, metadata=None): + """Compute split-half correlations for a batch of clickmaps in parallel""" + batch_results = [] + all_scores = {} + max_kernel_size = config.get("max_kernel_size", 51) + blur_sigma_function = config.get("blur_sigma_function", lambda x: x) + for i in tqdm(batch_indices, desc="Computing split-half correlations", total=len(batch_indices), mininterval=10): + img_name = all_names[i] + clickmaps = all_clickmaps[img_name.replace('/', '_')] + level_corrs = [] + if metadata and img_name in metadata: + native_size = metadata[img_name] + short_side = min(native_size) + scale = short_side / min(clickmaps[-1].shape[-2:]) + adj_blur_size = int(np.round(blur_size * scale)) + if not adj_blur_size % 2: + adj_blur_size += 1 + adj_blur_size = min(adj_blur_size, max_kernel_size) + adj_blur_sigma = blur_sigma_function(adj_blur_size) + else: + adj_blur_size = blur_size + adj_blur_sigma = blur_sigma + if metadata: + print("Missing in Metadata", img_name) + if floor: + rand_i = np.random.randint(len(all_clickmaps) - 1) + if rand_i >= i: + rand_i += 1 + rand_name = all_names[rand_i] + random_map = all_clickmaps[rand_name.replace('/', '_')] + if metadata and rand_name in metadata: + native_size = metadata[rand_name] + short_side = min(native_size) + scale = short_side / min(random_map[-1].shape[-2:]) + rand_adj_blur_size = int(np.round(blur_size * scale)) + if not adj_blur_size % 2: + rand_adj_blur_size += 1 + rand_adj_blur_size = min(rand_adj_blur_size, max_kernel_size) + rand_adj_blur_sigma = blur_sigma_function(rand_adj_blur_size) + else: + rand_adj_blur_size = blur_size + rand_adj_blur_sigma = blur_sigma + if metadata: + print("Missing in Metadata", rand_name) + + for k, clickmap_at_k in enumerate(clickmaps): + if metric != "auc" and k < (len(clickmaps)-1): + continue + rand_corrs = [] + n = len(clickmap_at_k) + if floor: + rand_clickmap_at_k = random_map[k] + rand_n = len(rand_clickmap_at_k) + for _ in range(n_iterations): + rand_perm = np.random.permutation(n) + fh = rand_perm[:(n // 2)] + # Add bootstrapping to max fh/sh size to original img + fh = random.choices(fh, k=n) + # Create the test and reference maps + test_map = clickmap_at_k[fh].mean(0) + if floor: + rand_perm = np.random.permutation(rand_n) + sh = rand_perm[(rand_n // 2):] + sh = random.choices(sh, k=rand_n) + reference_map = rand_clickmap_at_k[sh].mean(0) # Take maps from the same level in a random other image + reference_map = utils.blur_maps_for_cf( + reference_map[None, None], + rand_adj_blur_size, + rand_adj_blur_sigma, + gpu_batch_size=1).squeeze() + test_map = utils.blur_maps_for_cf( + test_map[None, None], + adj_blur_size, + adj_blur_sigma, + gpu_batch_size=1).squeeze() + else: + sh = rand_perm[(n // 2):] + sh = random.choices(sh, k=n) + reference_map = clickmap_at_k[sh].mean(0) + + # Make maps for each + blur_clickmaps = utils.blur_maps_for_cf( + np.stack((test_map, reference_map), axis=0)[None], + adj_blur_size, + adj_blur_sigma, + gpu_batch_size=2).squeeze() + test_map = blur_clickmaps[0] + reference_map = blur_clickmaps[1] + if config: + image_shape = config['image_shape'] + center_crop = config['center_crop'] + if center_crop: + test_map = torch.tensor(test_map)[None, :, :] + reference_map = torch.tensor(reference_map)[None, :, :] + test_map = tvF.resize(test_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) + test_map = tvF.center_crop(test_map, center_crop) + reference_map = tvF.resize(reference_map, min(image_shape), interpolation=InterpolationMode.NEAREST_EXACT) + reference_map = tvF.center_crop(reference_map, center_crop) + test_map = test_map.numpy().squeeze() + reference_map = reference_map.numpy().squeeze() + # Use scipy's spearman correlation + if metric == "auc": + score = auc(test_map.flatten(), reference_map.flatten()) + elif metric == "rankorder": + score = rankorder(test_map.flatten(), reference_map.flatten()) + elif metric == "spearman": + score, _ = spearmanr(test_map.flatten(), reference_map.flatten()) + elif metric == "rank_pearson": + score = rank_pearson(test_map, reference_map) + elif metric == "rank_cosine": + score = rank_cosine(test_map, reference_map) + elif metric == "emd": + score = emd_2d(test_map, reference_map) + else: + raise ValueError(f"Invalid metric: {metric}") + rand_corrs.append(score) + + # Explicitly free memory + if 'blur_clickmaps' in locals(): + del blur_clickmaps + + rand_corrs = np.nanmean(np.asarray(rand_corrs)) # Take the mean of the random correlations + level_corrs.append(rand_corrs) + # Free memory + # gc.collect() + batch_results.append(np.asarray(level_corrs).mean()) # Integrate over the levels + all_scores[img_name] = batch_results[-1] + return batch_results, all_scores + + +if __name__ == "__main__": + # Add command line arguments + parser = argparse.ArgumentParser(description="Process clickme data for modeling") + parser.add_argument('config', nargs='?', help='Path to config file') + parser.add_argument('--debug', action='store_true', help='Enable additional debug output') + parser.add_argument('--verbose', action='store_true', help='Show detailed progress for GPU processing') + parser.add_argument('--gpu-batch-size', type=int, default=None, help='Override GPU batch size') + parser.add_argument('--max-workers', type=int, default=None, help='Maximum number of CPU workers') + parser.add_argument('--profile', action='store_true', help='Enable performance profiling') + parser.add_argument('--filter-duplicates', action='store_false', help='Filter duplicate participant submissions, keeping only the first submission per image') + parser.add_argument('--max-open-files', type=int, default=4096, help='Maximum number of open files allowed') + parser.add_argument('--correlation-batch-size', type=int, default=None, help='Override correlation batch size') + parser.add_argument('--correlation-jobs', type=int, default=None, help='Override number of parallel jobs for correlation') + parser.add_argument('--metric', type=str, default=None, help='Metric to use for correlation') + parser.add_argument('--time_based_bins', action='store_true', help='Enable time based bin threshold instead of count based') + parser.add_argument('--save_json', default=False, action='store_true') + args = parser.parse_args() + + # Increase file descriptor limit + # try: + # soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + # print(f"Current file descriptor limits: soft={soft}, hard={hard}") + # new_soft = min(args.max_open_files, hard) + # resource.setrlimit(resource.RLIMIT_NOFILE, (new_soft, hard)) + # print(f"Increased file descriptor soft limit to {new_soft}") + # except (ValueError, resource.error) as e: + # print(f"Warning: Could not increase file descriptor limit: {e}") + + # Start profiling if requested + if args.profile: + import cProfile + profiler = cProfile.Profile() + profiler.enable() + + # Load config file + if args.config: + config_file = args.config if "configs" + os.path.sep in args.config else os.path.join("configs", args.config) + assert os.path.exists(config_file), f"Cannot find config file: {config_file}" + config = utils.process_config(config_file) + else: + config_file = utils.get_config(sys.argv) + config = utils.process_config(config_file) + if "max_subjects" not in config: + config["max_subjects"] = -1 + # Add filter_duplicates to config if not present + if "filter_duplicates" not in config: + config["filter_duplicates"] = args.filter_duplicates + if "save_json" not in config: + config["save_json"] = args.save_json + + # Add time_based_bins to config if not present + if "time_based_bins" not in config: + config["time_based_bins"] = args.time_based_bins + + if "constancy" not in config: + config["constancy"] = False + + if args.metric is not None: + config["metric"] = args.metric + print(f"Overwriting metric to {args.metric}") + + # Load clickme data + print(f"Loading clickme data...") + clickme_data = utils.process_clickme_data( + config["clickme_data"], + config["filter_mobile"]) + total_maps = len(clickme_data) + + # Apply duplicate filtering if requested + if config["filter_duplicates"] or args.filter_duplicates: + clickme_data = utils.filter_duplicate_participants(clickme_data) + total_maps = len(clickme_data) + + # Validate clickme data structure + print(f"Validating clickme data structure for {total_maps} maps...") + image_paths = clickme_data['image_path'].unique() + total_unique_images = len(image_paths) + print(f"Found {total_unique_images} unique images") + + # Set up GPU configuration + if args.gpu_batch_size: + config["gpu_batch_size"] = args.gpu_batch_size + else: + config["gpu_batch_size"] = 4096 + + # Optimize number of workers based on CPU count + cpu_count = os.cpu_count() + if args.max_workers: + config["n_jobs"] = min(args.max_workers, cpu_count) + else: + # Leave some cores free for system operations + config["n_jobs"] = max(1, min(cpu_count - 1, 8)) + + # Verify GPU is available + config["use_gpu_blurring"] = torch.cuda.is_available() + if config["use_gpu_blurring"]: + # Print GPU info + gpu_name = torch.cuda.get_device_name(0) + gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) # GB + print(f"Using GPU: {gpu_name} with {gpu_memory:.2f} GB memory") + else: + print("GPU not available, exiting.") + sys.exit(1) + + # Set up output format + if "output_format" not in config or config["output_format"] == "auto": + config["output_format"] = "hdf5" if total_maps > 100000 else "numpy" + output_format = config["output_format"] + + # Ensure all directories exist + output_dir = config["assets"] + image_output_dir = config["example_image_output_dir"] + temp_dir = config["temp_dir"] + + os.makedirs(output_dir, exist_ok=True) + os.makedirs(image_output_dir, exist_ok=True) + # os.makedirs(os.path.join(output_dir, config["experiment_name"]), exist_ok=True) + # Create dedicated directory for click counts + click_counts_dir = os.path.join(output_dir, f"{config['experiment_name']}_click_counts") + # os.makedirs(click_counts_dir, exist_ok=True) + + # Original code for non-HDF5 format + hdf5_path = os.path.join(output_dir, f"{config['experiment_name']}.h5") + print(f"Saving results to file: {hdf5_path}") + with h5py.File(hdf5_path, 'w') as f: + f.create_group("clickmaps") + f.create_group("click_counts") # Add group for click counts + meta_grp = f.create_group("metadata") + meta_grp.attrs["total_unique_images"] = total_unique_images + meta_grp.attrs["total_maps"] = total_maps + meta_grp.attrs["filter_duplicates"] = np.bytes_("True" if config["filter_duplicates"] else "False") + meta_grp.attrs["creation_date"] = np.bytes_(pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")) + + # Print optimization settings + print("\nProcessing settings:") + print(f"- Dataset size: {total_maps} maps, {total_unique_images} images") + print(f"- GPU batch size: {config['gpu_batch_size']}") + print(f"- CPU workers: {config['n_jobs']}") + print(f"- Output format: {config['output_format']}") + print(f"- Filter duplicates: {config['filter_duplicates']}") + print(f"- Memory usage at start: {utils.get_memory_usage():.2f} MB\n") + + # Choose processing method (compiled Cython vs. Python) + use_cython = config.get("use_cython", True) + if use_cython: + try: + from src import cython_utils + create_clickmap_func = cython_utils.create_clickmap_fast + fast_duplicate_detection = cython_utils.fast_duplicate_detection + fast_ious_binary = cython_utils.fast_ious_binary + print("Using Cython-optimized functions") + except (ImportError, ModuleNotFoundError) as e: + use_cython = False + from src import python_utils + create_clickmap_func = python_utils.create_clickmap_fast + fast_duplicate_detection = python_utils.fast_duplicate_detection + fast_ious_binary = python_utils.fast_ious_binary + print(f"Cython modules not available: {e}") + print("Falling back to Python implementation. For best performance, run 'python compile_cython.py build_ext --inplace' first.") + else: + from src import python_utils + create_clickmap_func = python_utils.create_clickmap_fast + fast_duplicate_detection = python_utils.fast_duplicate_detection + fast_ious_binary = python_utils.fast_ious_binary + + # Load metadata + if config["metadata_file"]: + metadata = np.load(config["metadata_file"], allow_pickle=True).item() + else: + metadata = None + + print("Processing clickme data...") + # Always use parallel processing for large datasets + clickmaps, ccounts = utils.process_clickmap_files_parallel( + clickme_data=clickme_data, + image_path=config["image_path"], + file_inclusion_filter=config["file_inclusion_filter"], + file_exclusion_filter=config["file_exclusion_filter"], + min_clicks=config["min_clicks"], + max_clicks=config["max_clicks"], + n_jobs=config["n_jobs"]) + + # Apply filters if necessary + if config["class_filter_file"]: + print("Filtering classes...") + clickmaps = utils.filter_classes( + clickmaps=clickmaps, + class_filter_file=config["class_filter_file"]) + + if config["participant_filter"]: + print("Filtering participants...") + clickmaps = utils.filter_participants(clickmaps) + + # Process all maps with our new single-batch GPU function + print(f"Processing with GPU (batch size: {config['gpu_batch_size']})...") + final_clickmaps, all_clickmaps, categories, final_keep_index, click_counts, clickmap_bins = utils.process_all_maps_multi_thresh_gpu( + clickmaps=clickmaps, + config=config, + metadata=metadata, + create_clickmap_func=create_clickmap_func, + fast_duplicate_detection=fast_duplicate_detection, + return_before_blur=True, + average_maps=False, + time_based_bins=config['time_based_bins'], + save_to_disk=True, + maximum_length=5000, + ) + # Apply mask filtering if needed\ + # Not modified since it's not used for imagenet clickmaps + if final_keep_index and config["mask_dir"]: + print("Applying mask filtering...") + masks = utils.load_masks(config["mask_dir"]) + final_clickmaps, all_clickmaps, categories, final_keep_index = utils.filter_for_foreground_masks( + final_clickmaps=final_clickmaps, + all_clickmaps=all_clickmaps, + categories=categories, + masks=masks, + mask_threshold=config["mask_threshold"] + ) + # Update click counts to match filtered images + click_counts = {k: click_counts[k] for k in final_keep_index if k in click_counts} + + # Convert all_clickmaps to the format expected by the correlation code + image_shape = config["image_shape"] + correlation_batch_size = config["correlation_batch_size"] + null_iterations = config["null_iterations"] + metric = config["metric"] + n_jobs = config["n_jobs"] + gpu_batch_size = config["gpu_batch_size"] + + # Override configuration with command-line arguments if provided + if args.correlation_batch_size: + correlation_batch_size = args.correlation_batch_size + print(f"Overriding correlation batch size: {correlation_batch_size}") + else: + # Increase default batch size to speed up processing + correlation_batch_size = max(correlation_batch_size, 16) + + if args.correlation_jobs: + n_jobs = args.correlation_jobs + print(f"Overriding correlation jobs: {n_jobs}") + else: + # Increase default number of jobs + n_jobs = max(n_jobs, min(16, os.cpu_count())) + + # Check if GPU is available + if torch.cuda.is_available(): + device = 'cuda' + print(f"GPU detected: {torch.cuda.get_device_name(0)}") + print(f"Setting batch size to {gpu_batch_size} for GPU operations") + else: + device = 'cpu' + print("No GPU detected, using CPU for processing") + gpu_batch_size = 16 # Smaller batch size for CPU + print(f"Converting clickmaps for correlation analysis...") + + # Compute scores through split-halfs + # Optimize by processing in batches for better parallelization + print(f"Computing split-half correlations in parallel (n_jobs={n_jobs}, batch_size={correlation_batch_size})...") + temp_file = h5py.File(temp_dir, 'r') + temp_group = temp_file['clickmaps'] + all_clickmaps = temp_group + num_clickmaps = len(temp_group) + print(f"Num clickmaps {len(temp_group)}") + # Prepare batches for correlation computation + indices = list(range(num_clickmaps)) + batches = [indices[i:i+correlation_batch_size] for i in range(0, len(indices), correlation_batch_size)] + + # Process correlation batches in parallel + ceiling_returns = Parallel(n_jobs=n_jobs, prefer="threads")( + delayed(compute_correlation_batch)( + batch_indices=batch, + all_clickmaps=all_clickmaps, + all_names=final_keep_index, + metric=metric, + n_iterations=null_iterations, + device=device, + blur_size=config["blur_size"], + blur_sigma=config.get("blur_sigma", config["blur_size"]), + floor=False, + config=config, + metadata=metadata, + ) for batch in tqdm(batches, desc="Computing ceiling batches", total=len(batches)) + ) + ceiling_results, all_ceilings = zip(*ceiling_returns) + # Force garbage collection between major operations + gc.collect() + all_img_ceilings = {} + for img_ceilings in all_ceilings: + for img_name, score in img_ceilings.items(): + all_img_ceilings[img_name] = score + all_ceilings = np.concatenate(ceiling_results) + mean_ceiling = np.nanmean(all_ceilings) + if config['save_json']: + # Save as json + with open(os.path.join(output_dir, f"{config['experiment_name']}_{config['metric']}_ceiling_results.json"), 'w') as f: + output_json = {"all_imgs": final_keep_index, 'mean_ceiling':mean_ceiling, + 'all_ceilings':all_ceilings, 'all_img_ceilings':all_img_ceilings + } + for key, value in output_json.items(): + if isinstance(value, np.ndarray): + output_json[key] = value.tolist() + output_content = json.dumps(output_json, indent=4) + f.write(output_content) + + floor_returns = Parallel(n_jobs=n_jobs, prefer="threads")( + delayed(compute_correlation_batch)( + batch_indices=batch, + all_clickmaps=all_clickmaps, + all_names=final_keep_index, + metric=metric, + n_iterations=null_iterations, + device=device, + blur_size=config["blur_size"], + blur_sigma=config.get("blur_sigma", config["blur_size"]), + floor=True, + config=config, + metadata=metadata, + ) for batch in tqdm(batches, desc="Computing floor batches", total=len(batches)) + ) + floor_results, all_floors = zip(*floor_returns) + all_img_floors = {} + + for img_ceilings in all_floors: + for img_name, score in img_ceilings.items(): + all_img_floors[img_name] = score + # Flatten the results + all_floors = np.concatenate(floor_results) + + # Compute the mean of the ceilings and floors + mean_floor = np.nanmean(all_floors) + + # Compute the ratio of the mean of the ceilings to the mean of the floors + ratio = mean_ceiling / mean_floor + print(f"Mean ceiling: {mean_ceiling}, Mean floor: {mean_floor}, Ratio: {ratio}") + + # Save the results + np.savez( + os.path.join(output_dir, f"{config['experiment_name']}_{config['metric']}_ceiling_floor_results.npz"), + mean_ceiling=mean_ceiling, + mean_floor=mean_floor, + all_ceilings=all_ceilings, + all_floors=all_floors, + all_img_ceilings=all_img_ceilings, + all_img_floors=all_img_floors, + ratio=ratio) + if config['save_json']: + # Save as json + with open(os.path.join(output_dir, f"{config['experiment_name']}_{config['metric']}_ceiling_floor_results.json"), 'w') as f: + output_json = {"all_imgs": final_keep_index, 'mean_ceiling':mean_ceiling, 'mean_floor':mean_floor, + 'all_ceilings':all_ceilings, 'all_floors':all_floors, 'all_img_ceilings':all_img_ceilings, + 'all_img_floors':all_img_floors} + for key, value in output_json.items(): + if isinstance(value, np.ndarray): + output_json[key] = value.tolist() + output_content = json.dumps(output_json, indent=4) + f.write(output_content) + + # Delete temp file to save disk + if os.path.exists(temp_dir): + os.remove(temp_dir) + + # End profiling if it was enabled + if args.profile: + profiler.disable() + import pstats + from io import StringIO + s = StringIO() + ps = pstats.Stats(profiler, stream=s).sort_stats('cumulative') + ps.print_stats(30) # Print top 30 functions by time + print(s.getvalue()) + + # Save profile results to file + ps.dump_stats(os.path.join(output_dir, "profile_results.prof")) + print(f"Profile results saved to {os.path.join(output_dir, 'profile_results.prof')}") + + print(f"\nProcessing complete! Final memory usage: {utils.get_memory_usage():.2f} MB") diff --git a/configs/balance_exp_configs/imagenet_val_oscar_max_10.yaml b/configs/balance_exp_configs/imagenet_val_oscar_max_10.yaml new file mode 100644 index 0000000..103f6b4 --- /dev/null +++ b/configs/balance_exp_configs/imagenet_val_oscar_max_10.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: clickme_datasets/sampled_imgnet_val.csv +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: balanced_exp_10_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 20 +max_subjects: 10 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/balance_exp_configs/imagenet_val_oscar_max_15.yaml b/configs/balance_exp_configs/imagenet_val_oscar_max_15.yaml new file mode 100644 index 0000000..78769b8 --- /dev/null +++ b/configs/balance_exp_configs/imagenet_val_oscar_max_15.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: clickme_datasets/sampled_imgnet_val.csv +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: balanced_exp_15_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 20 +max_subjects: 15 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/balance_exp_configs/imagenet_val_oscar_max_20.yaml b/configs/balance_exp_configs/imagenet_val_oscar_max_20.yaml new file mode 100644 index 0000000..3cd0703 --- /dev/null +++ b/configs/balance_exp_configs/imagenet_val_oscar_max_20.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: clickme_datasets/sampled_imgnet_val.csv +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: balanced_exp_20_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 20 +max_subjects: 20 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/balance_exp_configs/imagenet_val_oscar_max_5.yaml b/configs/balance_exp_configs/imagenet_val_oscar_max_5.yaml new file mode 100644 index 0000000..144fe95 --- /dev/null +++ b/configs/balance_exp_configs/imagenet_val_oscar_max_5.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: clickme_datasets/sampled_imgnet_val.csv +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: balanced_exp_5_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 20 +max_subjects: 5 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/co3d_train.yaml b/configs/co3d_configs/co3d_train.yaml similarity index 98% rename from configs/co3d_train.yaml rename to configs/co3d_configs/co3d_train.yaml index 2cd19f9..7403a11 100644 --- a/configs/co3d_train.yaml +++ b/configs/co3d_configs/co3d_train.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d blur_size: 21 center_crop: - 224 diff --git a/configs/co3d_train_oscar.yaml b/configs/co3d_configs/co3d_train_oscar.yaml similarity index 98% rename from configs/co3d_train_oscar.yaml rename to configs/co3d_configs/co3d_train_oscar.yaml index a0f2742..22d3e92 100644 --- a/configs/co3d_train_oscar.yaml +++ b/configs/co3d_configs/co3d_train_oscar.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d blur_size: 21 center_crop: - 224 diff --git a/configs/co3d_val.yaml b/configs/co3d_configs/co3d_val.yaml similarity index 98% rename from configs/co3d_val.yaml rename to configs/co3d_configs/co3d_val.yaml index ccf8ef2..c11d0b1 100644 --- a/configs/co3d_val.yaml +++ b/configs/co3d_configs/co3d_val.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d blur_size: 21 center_crop: - 224 diff --git a/configs/co3d_val_oscar.yaml b/configs/co3d_configs/co3d_val_auc_oscar.yaml similarity index 98% rename from configs/co3d_val_oscar.yaml rename to configs/co3d_configs/co3d_val_auc_oscar.yaml index ccd5d2f..1243ad2 100644 --- a/configs/co3d_val_oscar.yaml +++ b/configs/co3d_configs/co3d_val_auc_oscar.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d blur_size: 21 center_crop: - 224 diff --git a/configs/co3d_configs/co3d_val_spearman_oscar.yaml b/configs/co3d_configs/co3d_val_spearman_oscar.yaml new file mode 100644 index 0000000..1d29b1a --- /dev/null +++ b/configs/co3d_configs/co3d_val_spearman_oscar.yaml @@ -0,0 +1,41 @@ +assets: assets/co3d +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +# clickme_data: clickme_datasets/dump_co3d_combined_03_13_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/dump_co3d_05_28_2025_unique.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: co3d_val_images +experiment_name: co3d_val +file_exclusion_filter: ILSVRC2012_val +file_inclusion_filter: CO3D_ClickMe2 +filter_mobile: true +gpu_batch_size: 4096 +image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/CO3D_ClickMe2 +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0. +max_clicks: 1000000 # Set to an impossibly high number to keep all maps +metadata_file: false +metric: spearman +min_clicks: 1 +min_subjects: 6 +n_jobs: -1 +null_iterations: 100 +parallel_prepare_maps: true +parallel_save: true +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: false +processed_clickme_file: co3d_val_processed.npz +processed_clickmap_bins: co3d_val_clickmap_bins.npy +processed_medians: co3d_val_medians.json +remove_string: false +multi_thresh_gpu: true +time_based_bins: true \ No newline at end of file diff --git a/configs/co3d_constancy_val.yaml b/configs/co3d_constancy_configs/co3d_constancy_val.yaml similarity index 100% rename from configs/co3d_constancy_val.yaml rename to configs/co3d_constancy_configs/co3d_constancy_val.yaml diff --git a/configs/co3d_constancy_val_oscar.yaml b/configs/co3d_constancy_configs/co3d_constancy_val_oscar.yaml similarity index 97% rename from configs/co3d_constancy_val_oscar.yaml rename to configs/co3d_constancy_configs/co3d_constancy_val_oscar.yaml index 04e2503..85cb30a 100644 --- a/configs/co3d_constancy_val_oscar.yaml +++ b/configs/co3d_constancy_configs/co3d_constancy_val_oscar.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d_constancy blur_size: 21 center_crop: - 224 diff --git a/configs/constancy_ceiling_auc.yaml b/configs/co3d_constancy_configs/constancy_ceiling_auc.yaml similarity index 95% rename from configs/constancy_ceiling_auc.yaml rename to configs/co3d_constancy_configs/constancy_ceiling_auc.yaml index 45668c0..a0a70b9 100644 --- a/configs/constancy_ceiling_auc.yaml +++ b/configs/co3d_constancy_configs/constancy_ceiling_auc.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d_constancy blur_size: 21 center_crop: - 224 @@ -10,7 +10,7 @@ correlation_batch_size: 128 debug: false display_image_keys: auto example_image_output_dir: co3d_constancy_val_images -experiment_name: co3d_constancy_val +experiment_name: co3d_constancy_val_obs file_exclusion_filter: ILSVRC2012_val file_inclusion_filter: CO3D_Constancy # CO3D_ClickMe2 filter_mobile: true diff --git a/configs/co3d_constancy_configs/constancy_ceiling_emd.yaml b/configs/co3d_constancy_configs/constancy_ceiling_emd.yaml new file mode 100644 index 0000000..b7a5557 --- /dev/null +++ b/configs/co3d_constancy_configs/constancy_ceiling_emd.yaml @@ -0,0 +1,45 @@ +assets: assets +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +# clickme_data: clickme_datasets/dump_co3d_combined_03_13_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/dump_co3d_05_28_2025_unique.npz +correlation_batch_size: 128 +debug: false +display_image_keys: auto +example_image_output_dir: co3d_constancy_val_images +experiment_name: co3d_constancy_val_obs +file_exclusion_filter: ILSVRC2012_val +file_inclusion_filter: CO3D_Constancy # CO3D_ClickMe2 +filter_mobile: true +gpu_batch_size: 4096 +image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/PeRFception/data/co3d_v2/clickme_trajectories/ +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0. +max_clicks: 1000000 # Set to an impossibly high number to keep all maps +metadata_file: false +metric: emd +min_clicks: 1 +min_subjects: 4 +n_jobs: -1 +null_iterations: 20 # Potentially lower this number for speed up +parallel_prepare_maps: true +parallel_save: true +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: false +processed_clickme_file: co3d_constancy_val_processed.npz +processed_clickmap_bins: co3d_constancy_val_clickmap_bins.npy +processed_medians: co3d_constancy_val_medians.json +remove_string: false +multi_thresh_gpu: true +constancy: true +camera_path: clickme_datasets/constancy_params.json +depth_path: /oscar/data/tserre/Users/pzhou10/CVM/onevision/assets/constancy_depths +save_json: true +time_based_bins: true diff --git a/configs/co3d_constancy_configs/constancy_ceiling_rank_cosine.yaml b/configs/co3d_constancy_configs/constancy_ceiling_rank_cosine.yaml new file mode 100644 index 0000000..2848284 --- /dev/null +++ b/configs/co3d_constancy_configs/constancy_ceiling_rank_cosine.yaml @@ -0,0 +1,45 @@ +assets: assets +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +# clickme_data: clickme_datasets/dump_co3d_combined_03_13_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/dump_co3d_05_28_2025_unique.npz +correlation_batch_size: 128 +debug: false +display_image_keys: auto +example_image_output_dir: co3d_constancy_val_images +experiment_name: co3d_constancy_val_obs +file_exclusion_filter: ILSVRC2012_val +file_inclusion_filter: CO3D_Constancy # CO3D_ClickMe2 +filter_mobile: true +gpu_batch_size: 4096 +image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/PeRFception/data/co3d_v2/clickme_trajectories/ +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0. +max_clicks: 1000000 # Set to an impossibly high number to keep all maps +metadata_file: false +metric: rank_cosine +min_clicks: 1 +min_subjects: 4 +n_jobs: -1 +null_iterations: 20 # Potentially lower this number for speed up +parallel_prepare_maps: true +parallel_save: true +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: false +processed_clickme_file: co3d_constancy_val_processed.npz +processed_clickmap_bins: co3d_constancy_val_clickmap_bins.npy +processed_medians: co3d_constancy_val_medians.json +remove_string: false +multi_thresh_gpu: true +constancy: true +camera_path: clickme_datasets/constancy_params.json +depth_path: /oscar/data/tserre/Users/pzhou10/CVM/onevision/assets/constancy_depths +save_json: true +time_based_bins: true diff --git a/configs/co3d_constancy_configs/constancy_ceiling_rank_pearson.yaml b/configs/co3d_constancy_configs/constancy_ceiling_rank_pearson.yaml new file mode 100644 index 0000000..5c838d6 --- /dev/null +++ b/configs/co3d_constancy_configs/constancy_ceiling_rank_pearson.yaml @@ -0,0 +1,45 @@ +assets: assets +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +# clickme_data: clickme_datasets/dump_co3d_combined_03_13_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/dump_co3d_05_28_2025_unique.npz +correlation_batch_size: 128 +debug: false +display_image_keys: auto +example_image_output_dir: co3d_constancy_val_images +experiment_name: co3d_constancy_val_obs +file_exclusion_filter: ILSVRC2012_val +file_inclusion_filter: CO3D_Constancy # CO3D_ClickMe2 +filter_mobile: true +gpu_batch_size: 4096 +image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/PeRFception/data/co3d_v2/clickme_trajectories/ +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0. +max_clicks: 1000000 # Set to an impossibly high number to keep all maps +metadata_file: false +metric: rank_pearson +min_clicks: 1 +min_subjects: 4 +n_jobs: -1 +null_iterations: 20 # Potentially lower this number for speed up +parallel_prepare_maps: true +parallel_save: true +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: false +processed_clickme_file: co3d_constancy_val_processed.npz +processed_clickmap_bins: co3d_constancy_val_clickmap_bins.npy +processed_medians: co3d_constancy_val_medians.json +remove_string: false +multi_thresh_gpu: true +constancy: true +camera_path: clickme_datasets/constancy_params.json +depth_path: /oscar/data/tserre/Users/pzhou10/CVM/onevision/assets/constancy_depths +save_json: true +time_based_bins: true diff --git a/configs/constancy_ceiling_spearman.yaml b/configs/co3d_constancy_configs/constancy_ceiling_spearman.yaml similarity index 95% rename from configs/constancy_ceiling_spearman.yaml rename to configs/co3d_constancy_configs/constancy_ceiling_spearman.yaml index 06fff09..c3916a1 100644 --- a/configs/constancy_ceiling_spearman.yaml +++ b/configs/co3d_constancy_configs/constancy_ceiling_spearman.yaml @@ -1,4 +1,4 @@ -assets: assets +assets: assets/co3d_constancy blur_size: 21 center_crop: - 224 @@ -10,7 +10,7 @@ correlation_batch_size: 128 debug: false display_image_keys: auto example_image_output_dir: co3d_constancy_val_images -experiment_name: co3d_constancy_val +experiment_name: co3d_constancy_val_obs file_exclusion_filter: ILSVRC2012_val file_inclusion_filter: CO3D_Constancy # CO3D_ClickMe2 filter_mobile: true diff --git a/configs/debug/bottom_10.yaml b/configs/debug/bottom_10.yaml new file mode 100644 index 0000000..c475f45 --- /dev/null +++ b/configs/debug/bottom_10.yaml @@ -0,0 +1,46 @@ +assets: assets/debug +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: clickme_datasets/bottom_imgnet_val.csv +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: bottom_imgnet +experiment_name: bottom_imgnet +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 20 +max_subjects: 5 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: bottom_imgnet_processed.npz +processed_medians: bottom_imgnet_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: bottom_imgnet_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_10.yaml b/configs/exp_configs/imagenet_val_oscar_max_10.yaml new file mode 100644 index 0000000..4ab7659 --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_10.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_10_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 10 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_15.yaml b/configs/exp_configs/imagenet_val_oscar_max_15.yaml new file mode 100644 index 0000000..24d0469 --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_15.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_15_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 15 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_20.yaml b/configs/exp_configs/imagenet_val_oscar_max_20.yaml new file mode 100644 index 0000000..741048c --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_20.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_20_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 20 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_25.yaml b/configs/exp_configs/imagenet_val_oscar_max_25.yaml new file mode 100644 index 0000000..33af46f --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_25.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_25_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 25 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_30.yaml b/configs/exp_configs/imagenet_val_oscar_max_30.yaml new file mode 100644 index 0000000..df4e648 --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_30.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_30_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 30 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/exp_configs/imagenet_val_oscar_max_5.yaml b/configs/exp_configs/imagenet_val_oscar_max_5.yaml new file mode 100644 index 0000000..5b1b9df --- /dev/null +++ b/configs/exp_configs/imagenet_val_oscar_max_5.yaml @@ -0,0 +1,47 @@ +assets: assets/exp +temp_dir: temp +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz +correlation_batch_size: 1024 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_08_27_2025_images +experiment_name: exp_5_subjects_08_27_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 2048 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 30 +max_subjects: 5 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_08_27_2025_processed.npz +processed_medians: jay_imagenet_val_combined_08_27_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_08_27_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true diff --git a/configs/imagenet_co3d_val_oscar.yaml b/configs/imgnet_configs/imagenet_co3d_val_oscar.yaml similarity index 63% rename from configs/imagenet_co3d_val_oscar.yaml rename to configs/imgnet_configs/imagenet_co3d_val_oscar.yaml index 2dd1bfc..9837d19 100644 --- a/configs/imagenet_co3d_val_oscar.yaml +++ b/configs/imgnet_configs/imagenet_co3d_val_oscar.yaml @@ -1,20 +1,20 @@ -assets: assets +assets: assets/co3d blur_size: 21 center_crop: - 224 - 224 class_filter_file: category_maps/synset_to_co3d.npy -clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_07_09_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz correlation_batch_size: 1024 debug: false display_image_keys: auto -example_image_output_dir: jay_imagenet_co3d_val_04_30_2025_images -experiment_name: jay_imagenet_co3d_val_07_09_2025 -file_exclusion_filter: ILSVRC2012_train +example_image_output_dir: jay_imagenet_co3d_val_08_27_2025_images +experiment_name: jay_imagenet_co3d_val_08_27_2025 +file_exclusion_filter: null file_inclusion_filter: ILSVRC2012_val filter_mobile: true gpu_batch_size: 4096 -image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val image_shape: - 256 - 256 @@ -32,9 +32,9 @@ parallel_save: true participant_filter: false percentile_thresh: 50 preprocess_db_data: true -processed_clickme_file: jay_imagenet_co3d_val_04_30_2025_processed.npz -processed_clickmap_bins: jay_imagenet_co3d_val_04_30_2025_clickmap_bins.npy -processed_medians: jay_imagenet_co3d_val_04_30_2025_medians.json +processed_clickme_file: jay_imagenet_co3d_val_08_27_2025_processed.npz +processed_clickmap_bins: jay_imagenet_co3d_val_08_27_2025_clickmap_bins.npy +processed_medians: jay_imagenet_co3d_val_08_27_2025_medians.json remove_string: imagenet/val/ output_format: "numpy" time_based_bins: true diff --git a/configs/imagenet_train_oscar.yaml b/configs/imgnet_configs/imagenet_train_oscar.yaml similarity index 67% rename from configs/imagenet_train_oscar.yaml rename to configs/imgnet_configs/imagenet_train_oscar.yaml index 9f3f985..21fd032 100644 --- a/configs/imagenet_train_oscar.yaml +++ b/configs/imgnet_configs/imagenet_train_oscar.yaml @@ -1,20 +1,20 @@ -assets: assets +assets: assets/imgnet_train_10_15 blur_size: 21 center_crop: - 224 - 224 class_filter_file: false -clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/train_combined_07_09_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/train_combined_10_15_2025.npz correlation_batch_size: 4000 debug: false display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_07_09_2025_images -experiment_name: jay_imagenet_train_07_09_2025 +example_image_output_dir: jay_imagenet_train_combined_10_15_2025_images +experiment_name: jay_imagenet_train_10_15_2025 file_exclusion_filter: ILSVRC2012_val file_inclusion_filter: null filter_mobile: true gpu_batch_size: 4096 -image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/train image_shape: - 256 - 256 @@ -32,8 +32,8 @@ parallel_save: false participant_filter: false percentile_thresh: 50 preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_07_09_2025_processed.npz -processed_medians: jay_imagenet_train_combined_07_09_2025_medians.json +processed_clickme_file: jay_imagenet_train_combined_10_15_2025_processed.npz +processed_medians: jay_imagenet_train_combined_10_15_2025_medians.json remove_string: imagenet/train/ output_format: "hdf5" use_cython: true @@ -41,4 +41,4 @@ chunk_size: 100000 batch_size: 14000 time_based_bins: true multi_thresh_gpu: multi_thresh_gpu -processed_clickmap_bins: jay_imagenet_train_combined_07_09_2025_clickmap_bins.npy +processed_clickmap_bins: jay_imagenet_train_combined_10_15_2025_clickmap_bins.npy diff --git a/configs/imagenet_val_oscar.yaml b/configs/imgnet_configs/imagenet_val_oscar.yaml similarity index 63% rename from configs/imagenet_val_oscar.yaml rename to configs/imgnet_configs/imagenet_val_oscar.yaml index a17b004..6dd7b4e 100644 --- a/configs/imagenet_val_oscar.yaml +++ b/configs/imgnet_configs/imagenet_val_oscar.yaml @@ -1,20 +1,21 @@ -assets: assets +assets: assets/imgnet_val_10_15 +temp_dir: temp blur_size: 21 center_crop: - 224 - 224 class_filter_file: false -clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_07_09_2025.npz +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_10_15_2025.npz correlation_batch_size: 1024 debug: false display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_07_09_2025_images -experiment_name: jay_imagenet_val_07_09_2025 -file_exclusion_filter: ILSVRC2012_train +example_image_output_dir: jay_imagenet_val_combined_10_15_2025_images +experiment_name: jay_imagenet_val_10_15_2025 +file_exclusion_filter: null file_inclusion_filter: ILSVRC2012_val filter_mobile: true gpu_batch_size: 4096 -image_path: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val image_shape: - 256 - 256 @@ -32,12 +33,12 @@ parallel_save: false participant_filter: false percentile_thresh: 50 preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_07_09_2025_processed.npz -processed_medians: jay_imagenet_val_combined_07_09_2025_medians.json +processed_clickme_file: jay_imagenet_val_combined_10_15_2025_processed.npz +processed_medians: jay_imagenet_val_combined_10_15_2025_medians.json remove_string: imagenet/val/ time_based_bins: true multi_thresh_gpu: multi_thresh_gpu output_format: "hdf5" -processed_clickmap_bins: jay_imagenet_val_combined_07_09_2025_clickmap_bins.npy +processed_clickmap_bins: jay_imagenet_val_combined_10_15_2025_clickmap_bins.npy chunk_size: 100000 -batch_size: 14000 \ No newline at end of file +batch_size: 14000 diff --git a/configs/imgnet_configs/imagenet_val_spearman_oscar.yaml b/configs/imgnet_configs/imagenet_val_spearman_oscar.yaml new file mode 100644 index 0000000..5756e28 --- /dev/null +++ b/configs/imgnet_configs/imagenet_val_spearman_oscar.yaml @@ -0,0 +1,46 @@ +assets: assets/imgnet_val_10_15 +temp_dir: temp/imgnet_val_10_15.h5 +blur_size: 21 +center_crop: +- 224 +- 224 +class_filter_file: false +clickme_data: /cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_10_15_2025.npz +correlation_batch_size: 4096 +debug: false +display_image_keys: auto +example_image_output_dir: jay_imagenet_val_combined_10_15_2025_images +experiment_name: jay_imagenet_val_10_15_2025 +file_exclusion_filter: null +file_inclusion_filter: ILSVRC2012_val +filter_mobile: true +gpu_batch_size: 4096 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val +image_shape: +- 256 +- 256 +mask_dir: null +mask_threshold: 0 +max_clicks: 1000000 +metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy +metric: spearman +min_clicks: 1 +min_subjects: 5 +n_jobs: -1 +null_iterations: 50 +parallel_prepare_maps: true +parallel_save: false +participant_filter: false +percentile_thresh: 50 +preprocess_db_data: true +processed_clickme_file: jay_imagenet_val_combined_10_15_2025_processed.npz +processed_medians: jay_imagenet_val_combined_10_15_2025_medians.json +remove_string: imagenet/val/ +time_based_bins: true +multi_thresh_gpu: multi_thresh_gpu +output_format: "hdf5" +processed_clickmap_bins: jay_imagenet_val_combined_10_15_2025_clickmap_bins.npy +chunk_size: 100000 +batch_size: 14000 +save_json: true +filter_duplicates: true \ No newline at end of file diff --git a/configs/jay_imagenet_for_co3d_train_0.1.yaml b/configs/imgnet_configs/jay_imagenet_for_co3d_train_0.1.yaml similarity index 91% rename from configs/jay_imagenet_for_co3d_train_0.1.yaml rename to configs/imgnet_configs/jay_imagenet_for_co3d_train_0.1.yaml index 333feaf..fca5283 100644 --- a/configs/jay_imagenet_for_co3d_train_0.1.yaml +++ b/configs/imgnet_configs/jay_imagenet_for_co3d_train_0.1.yaml @@ -14,7 +14,7 @@ file_exclusion_filter: false file_inclusion_filter: ILSVRC2012_val filter_mobile: true gpu_batch_size: 1024 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val image_shape: - 256 - 256 diff --git a/configs/jay_imagenet_for_co3d_val_0.1.yaml b/configs/imgnet_configs/jay_imagenet_for_co3d_val_0.1.yaml similarity index 91% rename from configs/jay_imagenet_for_co3d_val_0.1.yaml rename to configs/imgnet_configs/jay_imagenet_for_co3d_val_0.1.yaml index 547433b..3ff3390 100644 --- a/configs/jay_imagenet_for_co3d_val_0.1.yaml +++ b/configs/imgnet_configs/jay_imagenet_for_co3d_val_0.1.yaml @@ -14,7 +14,7 @@ file_exclusion_filter: false file_inclusion_filter: ILSVRC2012_val filter_mobile: true gpu_batch_size: 4096 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 +image_path: /gpfs/data/shared/imagenet/ILSVRC2012/val image_shape: - 256 - 256 diff --git a/configs/jay_imagenet_co3d_val_04_02_2025.yaml b/configs/jay_imagenet_co3d_val_04_02_2025.yaml deleted file mode 100644 index 1d5ca78..0000000 --- a/configs/jay_imagenet_co3d_val_04_02_2025.yaml +++ /dev/null @@ -1,39 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: category_maps/synset_to_co3d.npy -clickme_data: clickme_datasets/val_combined_04_02_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_co3d_val_04_02_2025_images -experiment_name: jay_imagenet_co3d_val_04_02_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 16384 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: spearman -min_clicks: 1 -min_subjects: 6 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_co3d_val_04_02_2025_processed.npz -processed_clickmap_bins: jay_imagenet_co3d_val_04_02_2025_clickmap_bins.npy -processed_medians: jay_imagenet_co3d_val_04_02_2025_medians.json -remove_string: imagenet/val/ -output_format: "numpy" diff --git a/configs/jay_imagenet_co3d_val_04_30_2025.yaml b/configs/jay_imagenet_co3d_val_04_30_2025.yaml deleted file mode 100644 index c8c9688..0000000 --- a/configs/jay_imagenet_co3d_val_04_30_2025.yaml +++ /dev/null @@ -1,39 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 224 -- 224 -class_filter_file: category_maps/synset_to_co3d.npy -clickme_data: clickme_datasets/val_combined_04_30_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_co3d_val_04_30_2025_images -experiment_name: jay_imagenet_co3d_val_04_30_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 16384 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy -metric: spearman -min_clicks: 1 -min_subjects: 6 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_co3d_val_04_30_2025_processed.npz -processed_clickmap_bins: jay_imagenet_co3d_val_04_30_2025_clickmap_bins.npy -processed_medians: jay_imagenet_co3d_val_04_30_2025_medians.json -remove_string: imagenet/val/ -output_format: "numpy" diff --git a/configs/jay_imagenet_train_0.1.yaml b/configs/jay_imagenet_train_0.1.yaml deleted file mode 100644 index 80d168b..0000000 --- a/configs/jay_imagenet_train_0.1.yaml +++ /dev/null @@ -1,36 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 224 -- 224 -class_filter_file: false -clickme_data: clickme_datasets/train_imagenet_10_17_2024.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_0.1_images -experiment_name: jay_imagenet_train_0.1 -file_exclusion_filter: false -file_inclusion_filter: false -filter_mobile: true -gpu_batch_size: 1024 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 100000 -metadata_file: image_metadata/jay_imagenet_train_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_0.1_processed.npz -processed_medians: jay_imagenet_train_0.1_medians.json -remove_string: imagenet/train/ diff --git a/configs/jay_imagenet_train_02_19_2025.yaml b/configs/jay_imagenet_train_02_19_2025.yaml deleted file mode 100644 index 17208b6..0000000 --- a/configs/jay_imagenet_train_02_19_2025.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: db_dumps/train_combined_02_26_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_02_19_2025_images -experiment_name: jay_imagenet_train_02_19_2025 -file_exclusion_filter: false -file_inclusion_filter: false -filter_mobile: true -gpu_batch_size: 1024 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 10000 -metadata_file: image_metadata/jay_imagenet_train_0.1_dimensions.npy -metric: spearman -min_clicks: 3 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_02_19_2025_processed.npz -processed_medians: jay_imagenet_train_combined_02_19_2025_medians.json -remove_string: imagenet/train/ diff --git a/configs/jay_imagenet_train_04_02_2025.yaml b/configs/jay_imagenet_train_04_02_2025.yaml deleted file mode 100644 index bca5d3d..0000000 --- a/configs/jay_imagenet_train_04_02_2025.yaml +++ /dev/null @@ -1,43 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/train_combined_04_02_2025.npz -correlation_batch_size: 4096 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_04_02_2025_images -experiment_name: jay_imagenet_train_04_02_2025 -file_exclusion_filter: ILSVRC2012_val -file_inclusion_filter: null -filter_mobile: true -gpu_batch_size: 10000 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: null -mask_threshold: 1 -max_clicks: 10000 -metadata_file: image_metadata/jay_imagenet_train_0.1_dimensions.npy -metric: spearman -min_clicks: 1 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_04_02_2025_processed.npz -processed_clickmap_bins: jay_imagenet_train_combined_04_02_2025_clickmap_bins.npy -processed_medians: jay_imagenet_train_combined_04_02_2025_medians.json -remove_string: imagenet/train/ -output_format: "hdf5" -use_cython: true -chunk_size: 100000 -batch_size: 50000 -multi_thresh_gpu: true diff --git a/configs/jay_imagenet_train_04_23_2025.yaml b/configs/jay_imagenet_train_04_23_2025.yaml deleted file mode 100644 index b6c50a6..0000000 --- a/configs/jay_imagenet_train_04_23_2025.yaml +++ /dev/null @@ -1,41 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/train_combined_04_23_2025.npz -correlation_batch_size: 4000 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_04_23_2025_images -experiment_name: jay_imagenet_train_04_23_2025 -file_exclusion_filter: ILSVRC2012_val -file_inclusion_filter: null -filter_mobile: true -gpu_batch_size: 5000 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: null -mask_threshold: 1 -max_clicks: 10000 -metadata_file: image_metadata/jay_imagenet_train_0.1_dimensions.npy -metric: spearman -min_clicks: 3 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_04_23_2025_processed.npz -processed_medians: jay_imagenet_train_combined_04_23_2025_medians.json -remove_string: imagenet/train/ -output_format: "hdf5" -use_cython: true -chunk_size: 100000 -batch_size: 14000 diff --git a/configs/jay_imagenet_train_04_30_2025.yaml b/configs/jay_imagenet_train_04_30_2025.yaml deleted file mode 100644 index 7a6ec84..0000000 --- a/configs/jay_imagenet_train_04_30_2025.yaml +++ /dev/null @@ -1,41 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 224 -- 224 -class_filter_file: false -clickme_data: clickme_datasets/train_combined_04_30_2025.npz -correlation_batch_size: 4000 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_04_30_2025_images -experiment_name: jay_imagenet_train_04_30_2025 -file_exclusion_filter: ILSVRC2012_val -file_inclusion_filter: null -filter_mobile: true -gpu_batch_size: 5000 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: null -mask_threshold: 1 -max_clicks: 10000 -metadata_file: image_metadata/jay_imagenet_train_04_30_2025_dimensions.npy -metric: spearman -min_clicks: 3 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_04_30_2025_processed.npz -processed_medians: jay_imagenet_train_combined_04_30_2025_medians.json -remove_string: imagenet/train/ -output_format: "hdf5" -use_cython: true -chunk_size: 100000 -batch_size: 14000 diff --git a/configs/jay_imagenet_train_12_18_2024.yaml b/configs/jay_imagenet_train_12_18_2024.yaml deleted file mode 100644 index defb25c..0000000 --- a/configs/jay_imagenet_train_12_18_2024.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: jay_work_in_progress -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/jay_imagenet_train_combined_12_18_2024.npz -correlation_batch_size: 4096 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_train_combined_12_18_2024_images -experiment_name: jay_imagenet_train_12_18_2024 -file_exclusion_filter: false -file_inclusion_filter: false -filter_mobile: true -gpu_batch_size: 4096 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/train -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_train_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 3 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_train_combined_12_18_2024_processed.npz -processed_medians: jay_imagenet_train_combined_12_18_2024_medians.json -remove_string: imagenet/train/ diff --git a/configs/jay_imagenet_val_0.1.yaml b/configs/jay_imagenet_val_0.1.yaml deleted file mode 100644 index c389888..0000000 --- a/configs/jay_imagenet_val_0.1.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 224 -- 224 -class_filter_file: false -clickme_data: clickme_datasets/val_imagenet_10_17_2024.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_0.1_images -experiment_name: jay_imagenet_val_0.1 -file_exclusion_filter: false -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 1024 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 5 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_0.1_processed.npz -processed_medians: jay_imagenet_val_0.1_medians.json -remove_string: imagenet/val/ diff --git a/configs/jay_imagenet_val_02_19_2025.yaml b/configs/jay_imagenet_val_02_19_2025.yaml deleted file mode 100644 index 77b4972..0000000 --- a/configs/jay_imagenet_val_02_19_2025.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/val_combined_04_02_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_04_02_2025_images -experiment_name: jay_imagenet_val_04_02_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 4096 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 5 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_02_19_2025_processed.npz -processed_medians: jay_imagenet_val_combined_02_19_2025_medians.json -remove_string: imagenet/val/ diff --git a/configs/jay_imagenet_val_04_02_2025.yaml b/configs/jay_imagenet_val_04_02_2025.yaml deleted file mode 100644 index 6ce3c7b..0000000 --- a/configs/jay_imagenet_val_04_02_2025.yaml +++ /dev/null @@ -1,39 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/val_combined_04_02_2025.npz -correlation_batch_size: 8 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_04_02_2025_images -experiment_name: jay_imagenet_val_04_02_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 16384 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: null -mask_threshold: 0. -max_clicks: 100000 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: rankorder -min_clicks: 1 -min_subjects: 6 -n_jobs: -1 -null_iterations: 20 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_04_02_2025_processed.npz -processed_clickmap_bins: jay_imagenet_val_combined_04_02_2025_clickmap_bins.npy -processed_medians: jay_imagenet_val_combined_04_02_2025_medians.json -remove_string: imagenet/val/ -multi_thresh_gpu: true diff --git a/configs/jay_imagenet_val_04_23_2025.yaml b/configs/jay_imagenet_val_04_23_2025.yaml deleted file mode 100644 index 43e37a3..0000000 --- a/configs/jay_imagenet_val_04_23_2025.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/val_combined_04_23_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_04_23_2025_images -experiment_name: jay_imagenet_val_04_23_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 16384 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 5 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_04_23_2025_processed.npz -processed_medians: jay_imagenet_val_combined_04_23_2025_medians.json -remove_string: imagenet/val/ diff --git a/configs/jay_imagenet_val_04_30_2025.yaml b/configs/jay_imagenet_val_04_30_2025.yaml deleted file mode 100644 index b9a99cf..0000000 --- a/configs/jay_imagenet_val_04_30_2025.yaml +++ /dev/null @@ -1,37 +0,0 @@ -assets: assets -blur_size: 21 -center_crop: -- 224 -- 224 -class_filter_file: false -clickme_data: clickme_datasets/val_combined_04_30_2025.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_04_30_2025_images -experiment_name: jay_imagenet_val_04_30_2025 -file_exclusion_filter: ILSVRC2012_train -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 16384 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: false -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 5 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -parallel_save: false -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_04_30_2025_processed.npz -processed_medians: jay_imagenet_val_combined_04_30_2025_medians.json -remove_string: imagenet/val/ diff --git a/configs/jay_imagenet_val_12_18_2024.yaml b/configs/jay_imagenet_val_12_18_2024.yaml deleted file mode 100644 index 41751cb..0000000 --- a/configs/jay_imagenet_val_12_18_2024.yaml +++ /dev/null @@ -1,36 +0,0 @@ -assets: jay_work_in_progress -blur_size: 21 -center_crop: -- 256 -- 256 -class_filter_file: false -clickme_data: clickme_datasets/jay_imagenet_val_combined_12_18_2024.npz -correlation_batch_size: 1024 -debug: false -display_image_keys: auto -example_image_output_dir: jay_imagenet_val_combined_12_18_2024_images -experiment_name: jay_imagenet_val_12_18_2024 -file_exclusion_filter: false -file_inclusion_filter: ILSVRC2012_val -filter_mobile: true -gpu_batch_size: 1024 -image_path: /media/data_cifs/projects/prj_video_imagenet/imagenet/ILSVRC/Data/CLS-LOC/val2 -image_shape: -- 256 -- 256 -mask_dir: null -mask_threshold: 1 -max_clicks: 75 -metadata_file: image_metadata/jay_imagenet_val_0.1_dimensions.npy -metric: spearman -min_clicks: 10 -min_subjects: 5 -n_jobs: -1 -null_iterations: 10 -parallel_prepare_maps: true -participant_filter: false -percentile_thresh: 50 -preprocess_db_data: true -processed_clickme_file: jay_imagenet_val_combined_12_18_2024_processed.npz -processed_medians: jay_imagenet_val_combined_12_18_2024_medians.json -remove_string: imagenet/val/ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00005143.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00005143.JPEG deleted file mode 100644 index d9000e7..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00005143.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00007012.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00007012.JPEG deleted file mode 100644 index 54e554a..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00007012.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00011048.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00011048.JPEG deleted file mode 100644 index 4046591..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00011048.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00014660.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00014660.JPEG deleted file mode 100644 index de1e797..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00014660.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00017247.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00017247.JPEG deleted file mode 100644 index 994a403..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00017247.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00022850.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00022850.JPEG deleted file mode 100644 index 8277c3b..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00022850.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00027083.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00027083.JPEG deleted file mode 100644 index 36e858d..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00027083.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00029849.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00029849.JPEG deleted file mode 100644 index 7c6a2ee..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00029849.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00031840.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00031840.JPEG deleted file mode 100644 index e786c15..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00031840.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00032692.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00032692.JPEG deleted file mode 100644 index 9315328..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00032692.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00039433.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00039433.JPEG deleted file mode 100644 index b3f9ee0..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00039433.JPEG and /dev/null differ diff --git a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00042007.JPEG b/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00042007.JPEG deleted file mode 100644 index 907dedf..0000000 Binary files a/jay_imagenet_for_co3d_val_0.1_images/ILSVRC2012_val_00042007.JPEG and /dev/null differ diff --git a/missing_val.json b/missing_val.json new file mode 100644 index 0000000..9a88ce8 --- /dev/null +++ b/missing_val.json @@ -0,0 +1,3 @@ +{ + "val": [] +} \ No newline at end of file diff --git a/prepare_clickmaps.py b/prepare_clickmaps.py index c7921bc..5572c6b 100644 --- a/prepare_clickmaps.py +++ b/prepare_clickmaps.py @@ -47,6 +47,9 @@ if "time_based_bins" not in config: config["time_based_bins"] = args.time_based_bins + if "max_subjects" not in config: + config["max_subjects"] = -1 + # Load clickme data print(f"Loading clickme data...") clickme_data = utils.process_clickme_data( @@ -253,6 +256,7 @@ # Save results for this batch if batch_final_keep_index: print(f"Saving {len(batch_final_keep_index)} processed maps for batch {batch_num+1}...") + print(f"Total number of maps: {len(batch_all_clickmaps)}") processed_images_count += len(batch_final_keep_index) # Store click counts @@ -276,18 +280,6 @@ compression_level=config.get("hdf5_compression_level", 0), clickmap_bins=batch_clickmap_bins ) - - # Also save individual NPY files for compatibility - # print("Saving individual NPY files in addition to HDF5...") - # npy_saved_count = utils.save_clickmaps_parallel( - # all_clickmaps=batch_all_clickmaps, - # final_keep_index=batch_final_keep_index, - # output_dir=output_dir, - # experiment_name=f"{config['experiment_name']}{batch_suffix}", - # image_path=config["image_path"], - # n_jobs=config["n_jobs"], - # file_inclusion_filter=config.get("file_inclusion_filter") - # ) else: # Use optimized HDF5 saving with compression saved_count = utils.save_clickmaps_to_hdf5( @@ -410,7 +402,8 @@ # Save results if final_keep_index: print(f"Saving {len(final_keep_index)} processed maps...") - + print(f"Total number of maps: {len(all_clickmaps)}") + # Save click counts to HDF5 with h5py.File(hdf5_path, 'a') as f: for img_name, count in click_counts.items(): @@ -489,79 +482,79 @@ np.save(os.path.join(output_dir, config["processed_clickmap_bins"]), clickmap_bins) # Process visualization for display images if needed - if config["display_image_keys"]: - if config["display_image_keys"] == "auto": - sz_dict = {k: len(v) for k, v in final_clickmaps.items()} - arg = np.argsort(list(sz_dict.values())) - config["display_image_keys"] = np.asarray(list(sz_dict.keys()))[arg[-10:]] + # if config["display_image_keys"]: + # if config["display_image_keys"] == "auto": + # sz_dict = {k: len(v) for k, v in final_clickmaps.items()} + # arg = np.argsort(list(sz_dict.values())) + # config["display_image_keys"] = np.asarray(list(sz_dict.keys()))[arg[-10:]] - print("Generating visualizations for display images...") - for img_name in config["display_image_keys"]: - # Find the corresponding heatmap - try: - if output_format == "hdf5": - # Read from HDF5 file - with h5py.File(hdf5_path, 'r') as f: - dataset_name = img_name.replace('/', '_') - if dataset_name in f["clickmaps"]: - hmp = f["clickmaps"][dataset_name][:] - # Also read click count if available - count_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}_count.npy") - click_count = np.load(count_path) if os.path.exists(count_path) else None - else: - print(f"Heatmap not found for {img_name}") - continue - else: - # Read from numpy file - heatmap_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}.npy") - if not os.path.exists(heatmap_path): - print(f"Heatmap not found for {img_name}") - continue + # print("Generating visualizations for display images...") + # for img_name in config["display_image_keys"]: + # # Find the corresponding heatmap + # try: + # if output_format == "hdf5": + # # Read from HDF5 file + # with h5py.File(hdf5_path, 'r') as f: + # dataset_name = img_name.replace('/', '_') + # if dataset_name in f["clickmaps"]: + # hmp = f["clickmaps"][dataset_name][:] + # # Also read click count if available + # count_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}_count.npy") + # click_count = np.load(count_path) if os.path.exists(count_path) else None + # else: + # print(f"Heatmap not found for {img_name}") + # continue + # else: + # # Read from numpy file + # heatmap_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}.npy") + # if not os.path.exists(heatmap_path): + # print(f"Heatmap not found for {img_name}") + # continue - hmp = np.load(heatmap_path) - # Try to load click count - count_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}_count.npy") - click_count = np.load(count_path) if os.path.exists(count_path) else None + # hmp = np.load(heatmap_path) + # # Try to load click count + # count_path = os.path.join(output_dir, config["experiment_name"], f"{img_name.replace('/', '_')}_count.npy") + # click_count = np.load(count_path) if os.path.exists(count_path) else None - # If not found, try the dedicated click counts directory - if click_count is None: - count_path = os.path.join(click_counts_dir, f"{img_name.replace('/', '_')}.npy") - click_count = np.load(count_path) if os.path.exists(count_path) else None + # # If not found, try the dedicated click counts directory + # if click_count is None: + # count_path = os.path.join(click_counts_dir, f"{img_name.replace('/', '_')}.npy") + # click_count = np.load(count_path) if os.path.exists(count_path) else None - # Load image - if os.path.exists(os.path.join(config["image_path"], img_name)): - img = Image.open(os.path.join(config["image_path"], img_name)) - elif os.path.exists(os.path.join(config["image_path"].replace(config["file_inclusion_filter"] + os.path.sep, ""), img_name)): - img = Image.open(os.path.join(config["image_path"].replace(config["file_inclusion_filter"] + os.path.sep, ""), img_name)) - elif os.path.exists(os.path.join(config["image_path"].replace(config["file_inclusion_filter"], ""), img_name)): - img = Image.open(os.path.join(config["image_path"].replace(config["file_inclusion_filter"], ""), img_name)) - else: - print(f"Image not found for {img_name}") - continue + # # Load image + # if os.path.exists(os.path.join(config["image_path"], img_name)): + # img = Image.open(os.path.join(config["image_path"], img_name)) + # elif os.path.exists(os.path.join(config["image_path"].replace(config["file_inclusion_filter"] + os.path.sep, ""), img_name)): + # img = Image.open(os.path.join(config["image_path"].replace(config["file_inclusion_filter"] + os.path.sep, ""), img_name)) + # elif os.path.exists(os.path.join(config["image_path"].replace(config["file_inclusion_filter"], ""), img_name)): + # img = Image.open(os.path.join(config["image_path"].replace(config["file_inclusion_filter"], ""), img_name)) + # else: + # print(f"Image not found for {img_name}") + # continue - if metadata: - click_match = [k_ for k_ in final_clickmaps.keys() if img_name in k_] - if click_match: - metadata_size = metadata[click_match[0]] - img = img.resize(metadata_size) + # if metadata: + # click_match = [k_ for k_ in final_clickmaps.keys() if img_name in k_] + # if click_match: + # metadata_size = metadata[click_match[0]] + # img = img.resize(metadata_size) - # Save visualization - f = plt.figure() - plt.subplot(1, 2, 1) - plt.imshow(np.asarray(img)) - title = f"{img_name}" - if click_count is not None: - title += f"\nTotal clicks: {click_count}" - plt.title(title) - plt.axis("off") - plt.subplot(1, 2, 2) - plt.imshow(hmp.mean(0)) - plt.axis("off") - plt.savefig(os.path.join(image_output_dir, img_name.replace('/', '_'))) - plt.close() - except Exception as e: - print(f"Error processing {img_name}: {str(e)}") - continue + # # Save visualization + # f = plt.figure() + # plt.subplot(1, 2, 1) + # plt.imshow(np.asarray(img)) + # title = f"{img_name}" + # if click_count is not None: + # title += f"\nTotal clicks: {click_count}" + # plt.title(title) + # plt.axis("off") + # plt.subplot(1, 2, 2) + # plt.imshow(hmp.mean(0)) + # plt.axis("off") + # plt.savefig(os.path.join(image_output_dir, img_name.replace('/', '_'))) + # plt.close() + # except Exception as e: + # print(f"Error processing {img_name}: {str(e)}") + # continue # End profiling if it was enabled if args.profile: profiler.disable() diff --git a/scripts/compute_ceiling_floor.sh b/scripts/compute_ceiling_floor.sh new file mode 100644 index 0000000..8742968 --- /dev/null +++ b/scripts/compute_ceiling_floor.sh @@ -0,0 +1,10 @@ +#!/bin/bash +#SBATCH -J ImgNet_Ceiling_Floor +#SBATCH -N 1-1 +#SBATCH -n 16 +#SBATCH -t 96:00:00 +#SBATCH --gres=gpu:1 +#SBATCH --mem=512G +#SBATCH -p gpu-he +echo Starting execution at `date` +conda run -p ../../gs-perception/venv python ceiling_floor_estimate_large.py configs/imgnet_configs/imagenet_val_spearman_oscar.yaml \ No newline at end of file diff --git a/scripts/process_imgnet.sh b/scripts/process_imgnet.sh new file mode 100644 index 0000000..a451e71 --- /dev/null +++ b/scripts/process_imgnet.sh @@ -0,0 +1,2 @@ +python prepare_clickmaps.py configs/imgnet_configs/imagenet_train_oscar.yaml +python prepare_clickmaps.py configs/imgnet_configs/imagenet_val_oscar.yaml diff --git a/scripts/run_exp.sh b/scripts/run_exp.sh new file mode 100644 index 0000000..ca61a99 --- /dev/null +++ b/scripts/run_exp.sh @@ -0,0 +1,11 @@ +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_5.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_10.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_15.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_20.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_25.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_30.yaml + +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_5.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_10.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_15.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_20.yaml \ No newline at end of file diff --git a/scripts/run_exp_Jay.sh b/scripts/run_exp_Jay.sh new file mode 100644 index 0000000..461ef55 --- /dev/null +++ b/scripts/run_exp_Jay.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH --time=48:00:00 +#SBATCH -n 8 +#SBATCH -N 1 +#SBATCH --mem=256G +#SBATCH -p gpu --gres=gpu:1 +#SBATCH --account=carney-tserre-condo +#SBATCH -J Jay-ClickMe-Processing +#SBATCH -o logs/log-ClickMe-Processing-%j.out + + +source /gpfs/data/tserre/jgopal/human_clickme_data_processing/jay-venv/bin/activate + +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_5.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_10.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_15.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_20.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_25.yaml +python ceiling_floor_estimate.py configs/exp_configs/imagenet_val_oscar_max_30.yaml + +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_5.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_10.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_15.yaml +# python ceiling_floor_estimate.py configs/balance_exp_configs/imagenet_val_oscar_max_20.yaml \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils.py b/src/utils.py index b54572b..1501b5c 100644 --- a/src/utils.py +++ b/src/utils.py @@ -12,10 +12,12 @@ from train_subject_classifier import RNN from accelerate import Accelerator from joblib import Parallel, delayed +import joblib import psutil +import h5py from PIL import Image from scipy.ndimage import maximum_filter - +import time # Near the top of the file (around line 10), add torch.cuda memory management functions try: @@ -162,6 +164,7 @@ def filter_for_foreground_masks( def process_clickme_data(data_file, filter_mobile, catch_thresh=0.95): if "csv" in data_file: + df = pd.read_csv(data_file) return pd.read_csv(data_file) elif "npz" in data_file: print("Load npz") @@ -338,8 +341,6 @@ def process_single_row(row): return None image_file_name = folder_image_file_name image_file_names.append(folder_image_file_name) - # elif file_inclusion_filter and file_inclusion_filter not in image_file_name: - # return None elif file_inclusion_filter and file_inclusion_filter not in image_file_name: return None @@ -351,9 +352,10 @@ def process_single_row(row): clickmap = row["clicks"] if isinstance(clickmap, str): - clean_string = re.sub(r'[{}"]', '', clickmap) - tuple_strings = clean_string.split(', ') - data_list = tuple_strings[0].strip("()").split("),(") + clean_string = re.sub(r'[{}"\[\]]', '', clickmap) + # tuple_strings = clean_string.split(', ') + # data_list = tuple_strings.strip("()").split("),(") + data_list = clean_string.strip("()").split("), (") if len(data_list) == 1: # Remove empty clickmaps return None tuples_list = [tuple(map(int, pair.split(','))) for pair in data_list] @@ -376,10 +378,15 @@ def process_single_row(row): return (image_file_name, tuples_list) # Process rows in parallel - results = Parallel(n_jobs=1)( - delayed(process_single_row)(row) - for _, row in tqdm(clickme_data.iterrows(), total=len(clickme_data), desc="Processing clickmaps") - ) + # results = Parallel(n_jobs=1)( + # delayed(process_single_row)(row) + # for _, row in tqdm(clickme_data.iterrows(), total=len(clickme_data), desc="Processing clickmaps") + # ) + + results = [] + for _, row in tqdm(clickme_data.iterrows(), total=len(clickme_data), desc="Processing clickmaps"): + single_row_result = process_single_row(row) + results.append(single_row_result) # Combine results proc_clickmaps = {} @@ -451,740 +458,6 @@ def convolve(heatmap, kernel, double_conv=False, device='cpu'): return blurred_heatmap.to(device) # [0] -def process_single_image(image_key, image_trials, image_shape, blur_size, blur_sigma, - min_pixels, min_subjects, center_crop, metadata, blur_sigma_function, - kernel_type, duplicate_thresh, max_kernel_size, blur_kernel, - create_clickmap_func, fast_duplicate_detection, device='cpu'): - """Helper function to process a single image for parallel processing""" - - # Process metadata and create clickmaps - if metadata is not None: - if image_key not in metadata: - # Use provided create_clickmap_func - clickmaps = np.asarray([create_clickmap_func([trials], image_shape) for trials in image_trials]) - clickmaps = torch.from_numpy(clickmaps).float().unsqueeze(1).to(device) - if kernel_type == "gaussian": - clickmaps = convolve(clickmaps, blur_kernel) - elif kernel_type == "circle": - clickmaps = convolve(clickmaps, blur_kernel, double_conv=True) - else: - native_size = metadata[image_key] - short_side = min(native_size) - scale = short_side / min(image_shape) - adj_blur_size = int(np.round(blur_size * scale)) - if not adj_blur_size % 2: - adj_blur_size += 1 - adj_blur_size = min(adj_blur_size, max_kernel_size) - adj_blur_sigma = blur_sigma_function(adj_blur_size) - # Use provided create_clickmap_func - clickmaps = np.asarray([create_clickmap_func([trials], native_size[::-1]) for trials in image_trials]) - clickmaps = torch.from_numpy(clickmaps).float().unsqueeze(1).to(device) - if kernel_type == "gaussian": - adj_blur_kernel = gaussian_kernel(adj_blur_size, adj_blur_sigma, device) - clickmaps = convolve(clickmaps, adj_blur_kernel) - elif kernel_type == "circle": - adj_blur_kernel = circle_kernel(adj_blur_size, adj_blur_sigma, device) - clickmaps = convolve(clickmaps, adj_blur_kernel, double_conv=True) - else: - # Use provided create_clickmap_func - clickmaps = np.asarray([create_clickmap_func([trials], image_shape) for trials in image_trials]) - clickmaps = torch.from_numpy(clickmaps).float().unsqueeze(1).to(device) - if kernel_type == "gaussian": - clickmaps = convolve(clickmaps, blur_kernel) - elif kernel_type == "circle": - clickmaps = convolve(clickmaps, blur_kernel, double_conv=True) - - if center_crop: - clickmaps = tvF.resize(clickmaps, min(image_shape)) - clickmaps = tvF.center_crop(clickmaps, center_crop) - clickmaps = clickmaps.squeeze().numpy() - - # Filter processing - if len(clickmaps.shape) == 2: # Single map - return None - - # Filter 1: Remove empties - empty_check = (clickmaps > 0).sum((1, 2)) > min_pixels - clickmaps = clickmaps[empty_check] - if len(clickmaps) < min_subjects: - return None - - # Filter 2: Remove duplicates using provided fast_duplicate_detection - clickmaps_vec = clickmaps.reshape(len(clickmaps), -1) - - # Use the function passed as argument - non_duplicate_indices = fast_duplicate_detection(clickmaps_vec, duplicate_thresh) - clickmaps = clickmaps[non_duplicate_indices] - - # dm = cdist(clickmaps_vec, clickmaps_vec) - # idx = np.tril_indices(len(dm), k=-1) - # lt_dm = dm[idx] - # if np.any(lt_dm < duplicate_thresh): - # remove = np.unique(np.where((dm + np.eye(len(dm)) == 0)))[0] - # rng = np.arange(len(dm)) - # dup_idx = rng[~np.in1d(rng, remove)] - # clickmaps = clickmaps[dup_idx] - - if len(clickmaps) >= min_subjects: - return (image_key, clickmaps) - return None - -def prepare_maps_batched_gpu( - final_clickmaps, - blur_size, - blur_sigma, - image_shape, - min_pixels, - min_subjects, - center_crop, - metadata=None, - blur_sigma_function=None, - kernel_type="circle", - duplicate_thresh=0.01, - max_kernel_size=51, - device='cuda', - batch_size=512, # Reduced from 1024 to 512 - n_jobs=-1, - timeout=600, # Add timeout parameter (10 minutes default) - verbose=True, # Add verbose parameter to control detailed logging - create_clickmap_func=None, - fast_duplicate_detection=None): - """ - Optimized version of prepare_maps that separates CPU and GPU work: - 1. Pre-processes clickmaps in parallel on CPU - 2. Processes batches of blurring on GPU - 3. Post-processes results in parallel on CPU - - Args: - final_clickmaps (list): List of dictionaries mapping image keys to clickmap trials - blur_size (int): Size of the blur kernel - blur_sigma (float): Sigma value for the blur kernel - image_shape (list/tuple): Shape of the images [height, width] - min_pixels (int): Minimum number of pixels for a valid map - min_subjects (int): Minimum number of subjects for a valid map - center_crop (list/tuple): Size for center cropping - metadata (dict, optional): Metadata dictionary. Defaults to None. - blur_sigma_function (function, optional): Function to calculate blur sigma. Required. - kernel_type (str, optional): Type of kernel to use. Defaults to "circle". - duplicate_thresh (float, optional): Threshold for duplicate detection. Defaults to 0.01. - max_kernel_size (int, optional): Maximum kernel size. Defaults to 51. - device (str, optional): Device to use for GPU operations. Defaults to 'cuda'. - batch_size (int, optional): Batch size for GPU processing. Defaults to 512. - n_jobs (int, optional): Number of parallel jobs for CPU operations. Defaults to -1. - timeout (int, optional): Timeout in seconds for parallel jobs. Defaults to 600. - verbose (bool): Whether to show detailed progress logging - create_clickmap_func (function): Function to create the initial clickmap - fast_duplicate_detection (function): Function for duplicate detection - - Returns: - tuple: (new_final_clickmaps, all_clickmaps, categories, keep_index) - """ - import torch - import torch.nn.functional as F - from joblib import Parallel, delayed - from scipy.spatial.distance import cdist - import numpy as np - from tqdm import tqdm - import torchvision.transforms.functional as tvF - - assert blur_sigma_function is not None, "Blur sigma function not passed." - # Check if functions were passed - assert create_clickmap_func is not None, "create_clickmap function must be provided." - assert fast_duplicate_detection is not None, "fast_duplicate_detection function must be provided." - - # Step 1: Create kernels on GPU - if kernel_type == "gaussian": - blur_kernel = gaussian_kernel(blur_size, blur_sigma, device) - elif kernel_type == "circle": - blur_kernel = circle_kernel(blur_size, blur_sigma, device) - else: - raise NotImplementedError(kernel_type) - - # We'll store all results here - all_final_results = { - 'all_clickmaps': [], - 'categories': [], - 'keep_index': [], - 'new_final_clickmaps': {} - } - - # FIX: More carefully merge dictionaries to avoid mixing maps from different images - # We use a dict to track the source of each clickmap to ensure we're not mixing maps - merged_clickmaps = {} - image_sources = {} # Track which dict each image came from - map_counts_before = {} # Track number of maps before merging - map_counts_after = {} # Track number of maps after merging - - for dict_idx, clickmap_dict in enumerate(final_clickmaps): - # Count maps in this dictionary - for image_key, maps in clickmap_dict.items(): - if image_key not in map_counts_before: - map_counts_before[image_key] = 0 - map_counts_before[image_key] += len(maps) - - for image_key, maps in clickmap_dict.items(): - if image_key in merged_clickmaps: - # If this image already exists, we need to make sure we're not - # accidentally mixing maps from different images - print(f"Warning: Image {image_key} found in multiple dictionaries. Combining maps.") - # Append maps while preserving the source tracking - merged_clickmaps[image_key].extend(maps) - if isinstance(image_sources[image_key], list): - image_sources[image_key].append(dict_idx) - else: - image_sources[image_key] = [image_sources[image_key], dict_idx] - else: - # First occurrence of this image - merged_clickmaps[image_key] = maps - image_sources[image_key] = dict_idx - - # Count maps after merging - for image_key, maps in merged_clickmaps.items(): - map_counts_after[image_key] = len(maps) - - # Log if we found any duplicate keys across dictionaries - duplicate_keys = [k for k, v in image_sources.items() if isinstance(v, list)] - if duplicate_keys: - print(f"Found {len(duplicate_keys)} images with maps in multiple dictionaries. These have been properly combined.") - - # Extra verification in verbose mode - if verbose: - print("\nVerification of map combining:") - for key in duplicate_keys: - if map_counts_before[key] != map_counts_after[key]: - print(f" ERROR: Map count mismatch for {key}: Before={map_counts_before[key]}, After={map_counts_after[key]}") - else: - print(f" OK: Successfully combined {map_counts_after[key]} maps for {key}") - print("") - - # Step 2: Get all keys and prepare for batch processing - all_keys = list(merged_clickmaps.keys()) - total_images = len(all_keys) - - # Calculate number of batches based on total unique images - # Set more conservative batch sizes for stability - # cpu_batch_size = min(batch_size, 5000) # Cap at 5000 for stability - cpu_batch_size = batch_size - num_cpu_batches = (total_images + cpu_batch_size - 1) // cpu_batch_size - effective_n_jobs = min(n_jobs if n_jobs > 0 else os.cpu_count(), os.cpu_count(), 16) # Cap at 16 workers - - print(f"Processing {total_images} unique images in {num_cpu_batches} CPU batches (GPU batch size: {batch_size})...") - print(f"Using {effective_n_jobs} parallel jobs for CPU pre/post-processing.") - - # Process each batch of images - processed_count = 0 - with tqdm(total=total_images, desc="Processing Image Batches") as pbar: - for cpu_batch_idx in range(num_cpu_batches): - batch_start = cpu_batch_idx * cpu_batch_size - batch_end = min(batch_start + cpu_batch_size, total_images) - - # print(f"\n│ ├─ Processing CPU batch {cpu_batch_idx+1}/{num_cpu_batches} (images {batch_start}-{batch_end})...") - - # Get keys for this batch - batch_keys = all_keys[batch_start:batch_end] - - # Step 3: Pre-process only this batch of clickmaps in parallel on CPU - # print(f"│ │ ├─ Pre-processing clickmaps on CPU (parallel, n_jobs={effective_n_jobs})...") - - def preprocess_clickmap(image_key, image_trials, image_shape, metadata=None): - """Helper function to pre-process a clickmap before GPU processing""" - # Process metadata and create clickmaps (creates binary maps, no blurring) - # Ensure image_shape is a tuple as required by create_clickmap_func - image_shape_tuple = tuple(image_shape) if isinstance(image_shape, list) else image_shape - - if metadata is not None and image_key in metadata: - native_size = metadata[image_key] - # Use provided create_clickmap_func - clickmaps = np.asarray([create_clickmap_func([trials], native_size[::-1]) for trials in image_trials]) - return { - 'key': image_key, - 'clickmaps': clickmaps, - 'native_size': native_size if image_key in metadata else None - } - else: - # Use provided create_clickmap_func - clickmaps = np.asarray([create_clickmap_func([trials], image_shape_tuple) for trials in image_trials]) - return { - 'key': image_key, - 'clickmaps': clickmaps, - 'native_size': None - } - - # Use parallel processing for pre-processing only this batch - # Set a timeout for workers to avoid indefinite hanging - preprocessed = Parallel(n_jobs=effective_n_jobs, timeout=timeout)( - delayed(preprocess_clickmap)( - key, - merged_clickmaps[key], - image_shape, - metadata - ) for key in tqdm(batch_keys, desc="Pre-processing", total=len(batch_keys), leave=False) - ) - - # Only keep non-empty preprocessed data - preprocessed = [p for p in preprocessed if p is not None and len(p['clickmaps']) > 0] - - # Step 4: Process GPU blurring - # print(f"│ │ ├─ Processing blurring on GPU (batch_size={batch_size})...") - - # Process in smaller GPU sub-batches to prevent OOM errors - gpu_batch_size = batch_size # min(batch_size, 256) # Cap at 256 to prevent OOM errors - batch_results = [] - - # Flatten the list of clickmaps for efficient GPU batching - gpu_processing_list = [] - for item in preprocessed: - # Each item in preprocessed has a list of clickmaps for one image - # We need to process each clickmap individually on the GPU eventually - gpu_processing_list.append(item) - - # Process GPU batches with a progress bar - total_gpu_batches = (len(gpu_processing_list) + gpu_batch_size - 1) // gpu_batch_size - if verbose: - print(f"Processing {len(gpu_processing_list)} images in {total_gpu_batches} GPU batches (size: {gpu_batch_size})...") - - with tqdm(total=total_gpu_batches, desc="GPU batches", leave=False) as gpu_batch_pbar: - for gpu_batch_idx in range(0, len(gpu_processing_list), gpu_batch_size): - - # Log current batch information - batch_start = gpu_batch_idx - batch_end = min(gpu_batch_idx + gpu_batch_size, len(gpu_processing_list)) - current_batch_size = batch_end - batch_start - - # Get smaller sub-batch to process - gpu_batch_items = gpu_processing_list[gpu_batch_idx : gpu_batch_idx + gpu_batch_size] - - # Skip empty batches - if not gpu_batch_items: - gpu_batch_pbar.update(1) - continue - - # Log tensor preparation step - if verbose: - print(f" │ ├─ Preparing tensors for {len(gpu_batch_items)} images...") - - # Prepare tensors for this GPU batch - tensors_to_blur = [] - metadata_for_batch = [] - keys_for_batch = [] - map_counts = [] # Track how many maps belong to each original image key - - for item in gpu_batch_items: - key = item['key'] - clickmaps_np = item['clickmaps'] - native_size = item['native_size'] - - # Convert numpy arrays to PyTorch tensors - # Important: Keep track of how many maps belong to this key - num_maps_for_key = len(clickmaps_np) - if num_maps_for_key > 0: - clickmaps_tensor = torch.from_numpy(clickmaps_np).float().unsqueeze(1).to(device) - tensors_to_blur.append(clickmaps_tensor) - # Repeat metadata for each map belonging to this key - metadata_for_batch.extend([(key, native_size)] * num_maps_for_key) - keys_for_batch.extend([key] * num_maps_for_key) - map_counts.append(num_maps_for_key) - - if not tensors_to_blur: - if verbose: - print(f" │ ├─ No valid tensors to process, skipping batch") - gpu_batch_pbar.update(1) - continue - - # Log batch tensor creation - if verbose: - print(f" │ ├─ Concatenating {len(tensors_to_blur)} tensors with {sum(map_counts)} total maps...") - - # Concatenate tensors for efficient batch processing - batch_tensor = torch.cat(tensors_to_blur, dim=0) - - # Log tensor shape for debugging - if verbose: - print(f" │ ├─ Batch tensor shape: {batch_tensor.shape}, processing blurring...") - - # Clear up memory - del tensors_to_blur - torch.cuda.empty_cache() - - # Apply blurring (needs to handle potential metadata variations within batch) - blurred_batch = torch.zeros_like(batch_tensor) - current_idx = 0 - - # Apply blurring with a more memory-efficient approach - sub_batch_size = 100 # Process in small sub-batches for stability - if verbose and len(gpu_batch_items) > 1: - print(f" │ ├─ Processing {len(gpu_batch_items)} image items in batches of {sub_batch_size}...") - - for item_idx, item in enumerate(tqdm(gpu_batch_items, desc="Blurring items", leave=False, disable=not verbose)): - # Apply blurring based on the specific item's metadata - key = item['key'] - num_maps = len(item['clickmaps']) - native_size = item['native_size'] - - if num_maps == 0: - continue - - item_tensor = batch_tensor[current_idx : current_idx + num_maps] - import pdb; pdb.set_trace() - try: - # Process with proper error handling - if native_size is not None: - short_side = min(native_size) - scale = short_side / min(image_shape) - adj_blur_size = int(np.round(blur_size * scale)) - if not adj_blur_size % 2: - adj_blur_size += 1 - adj_blur_size = min(adj_blur_size, max_kernel_size) - adj_blur_sigma = blur_sigma_function(adj_blur_size) - - if kernel_type == "gaussian": - adj_blur_kernel = gaussian_kernel(adj_blur_size, adj_blur_sigma, device) - blurred_item = convolve(item_tensor, adj_blur_kernel) - elif kernel_type == "circle": - adj_blur_kernel = circle_kernel(adj_blur_size, adj_blur_sigma, device) - blurred_item = convolve(item_tensor, adj_blur_kernel, double_conv=True) - - # Free memory for next iteration - if 'adj_blur_kernel' in locals(): - del adj_blur_kernel - else: - # Use the standard kernel - if kernel_type == "gaussian": - blurred_item = convolve(item_tensor, blur_kernel) - elif kernel_type == "circle": - blurred_item = convolve(item_tensor, blur_kernel, double_conv=True) - - blurred_batch[current_idx : current_idx + num_maps] = blurred_item - - # Free memory - del blurred_item - except Exception as e: - if verbose: - print(f" │ ├─ ERROR processing item {item_idx} (key: {key}): {e}") - # In case of error, just keep original - blurred_batch[current_idx : current_idx + num_maps] = item_tensor - - current_idx += num_maps - - # Periodically clear cache - if item_idx % 50 == 0: - torch.cuda.empty_cache() - - # Log center crop info if applicable - if center_crop and verbose: - print(f" │ ├─ Applying center crop from {blurred_batch.shape[-2:]} to {center_crop}...") - - # Apply center crop if needed (applied to the whole batch) - if center_crop: - # Resize first if dimensions are different - if blurred_batch.shape[-2:] != image_shape: - blurred_batch = tvF.resize(blurred_batch, list(image_shape), antialias=True) - blurred_batch = tvF.center_crop(blurred_batch, list(center_crop)) - - # Log conversion to numpy - if verbose: - print(f" │ ├─ Converting to numpy and organizing results...") - - # Convert back to numpy and store results indexed by key - processed_maps_np = blurred_batch.squeeze(1).cpu().numpy() - - # Reconstruct the results grouped by image key - start_idx = 0 - item_idx = 0 - while start_idx < len(processed_maps_np): - key = keys_for_batch[start_idx] - num_maps = map_counts[item_idx] - end_idx = start_idx + num_maps - batch_results.append({ - 'key': key, - 'clickmaps': processed_maps_np[start_idx:end_idx] - }) - start_idx = end_idx - item_idx += 1 - - # Log memory cleanup - if verbose: - print(f" │ └─ Cleaning up memory...") - - # Free GPU memory - del batch_tensor, blurred_batch, item_tensor - if 'adj_blur_kernel' in locals(): del adj_blur_kernel - # check_gpu_memory_usage(threshold=0.5, force_cleanup=True) - - # Add a small delay to allow system to stabilize - import time - time.sleep(0.1) - - # Update GPU batch progress bar - gpu_batch_pbar.update(1) - - - # Add post-processing progress logging - if verbose: - print(f"Post-processing {len(batch_results)} results...") - - # Use parallel processing for post-processing with timeout - post_results = Parallel(n_jobs=effective_n_jobs, timeout=timeout)( - delayed(postprocess_clickmap)( - batch_results[i], - min_pixels, - min_subjects, - duplicate_thresh - ) for i in tqdm(range(len(batch_results)), desc="Post-processing", disable=not verbose) - ) - - # Step 6: Compile final results for this batch - for result in post_results: - if result is not None: - image_key, clickmaps = result - category = image_key.split("/")[0] - - all_final_results['all_clickmaps'].append(clickmaps) - all_final_results['categories'].append(category) - all_final_results['keep_index'].append(image_key) - all_final_results['new_final_clickmaps'][image_key] = merged_clickmaps[image_key] - - # Update progress bar - pbar.update(len(batch_keys)) - processed_count += len(batch_keys) - - # Free memory - del preprocessed, batch_results, post_results - if 'gpu_processing_list' in locals(): del gpu_processing_list - torch.cuda.empty_cache() - - # # Final cleanup before returning - # check_gpu_memory_usage(threshold=0.0, force_cleanup=True) - - print(f"\nFinished processing {processed_count} images.") - # Return combined results - return ( - all_final_results['new_final_clickmaps'], - all_final_results['all_clickmaps'], - all_final_results['categories'], - all_final_results['keep_index'] - ) - -# Custom wrapper for prepare_maps_batched_gpu with progress display -def prepare_maps_with_gpu_batching(final_clickmaps, **kwargs): - """ - Wrapper for prepare_maps_batched_gpu that displays progress and follows - the same signature as prepare_maps_with_progress for easy swapping. - - This version optimizes processing by: - 1. Pre-processing clickmaps in parallel on CPU - 2. Processing batches of blurring operations on GPU - 3. Post-processing results in parallel on CPU - - Args: - final_clickmaps (list): List of dictionaries mapping image keys to clickmap trials - **kwargs: Additional arguments to pass to prepare_maps_batched_gpu - - Returns: - tuple: (new_final_clickmaps, all_clickmaps, categories, keep_index) - """ - batch_size = kwargs.pop('batch_size', 512) # Default batch size of 512 - verbose = kwargs.pop('verbose', True) # Add verbose parameter, default to True - - # Display more information if verbose - if verbose: - print(f"│ ├─ Processing with GPU-optimized batching (batch_size={batch_size})...") - - # Pass the required functions from kwargs - create_clickmap_func = kwargs.get('create_clickmap_func') - fast_duplicate_detection = kwargs.get('fast_duplicate_detection') - - return prepare_maps_batched_gpu( - final_clickmaps=final_clickmaps, - batch_size=batch_size, - verbose=verbose, # Pass verbose parameter - create_clickmap_func=create_clickmap_func, - fast_duplicate_detection=fast_duplicate_detection, - **{k: v for k, v in kwargs.items() if k not in ('create_clickmap_func', 'fast_duplicate_detection', 'batch_size', 'verbose')} - ) - -# GPU-accelerated correlation metrics -def compute_AUC_gpu(pred, target, device='cuda'): - """ - GPU-accelerated implementation of AUC score computation. - - Args: - pred (np.ndarray): Predicted heatmap - target (np.ndarray): Target heatmap - device (str): Device to run computation on ('cuda' or 'cpu') - - Returns: - float: AUC score - """ - import torch - from sklearn import metrics - - # Flatten arrays - pred_flat = pred.flatten() - target_flat = target.flatten() - - # Convert to PyTorch tensors - pred_tensor = torch.tensor(pred_flat, device=device) - target_tensor = torch.tensor(target_flat, device=device) - - # Create a binary mask of non-zero target pixels - mask = target_tensor > 0 - - # If no positive pixels, return 0.5 (random chance) - if not torch.any(mask): - return 0.5 - - # Get masked predictions and binary ground truth - masked_pred = pred_tensor[mask].cpu().numpy() - masked_target = torch.ones_like(target_tensor[mask]).cpu().numpy() - - # Get an equal number of negative samples - neg_mask = ~mask - if torch.sum(neg_mask) > 0: - # Select same number of negative samples as positive ones - num_pos = torch.sum(mask).item() - num_neg = min(num_pos, torch.sum(neg_mask).item()) - - # Get indices of negative samples - neg_indices = torch.nonzero(neg_mask).squeeze() - if neg_indices.numel() > 0: - if neg_indices.numel() > num_neg: - # Random sample negative indices if we have more than we need - perm = torch.randperm(neg_indices.numel(), device=device) - neg_indices = neg_indices[perm[:num_neg]] - - # Get predictions for negative samples and set target to 0 - neg_pred = pred_tensor[neg_indices].cpu().numpy() - neg_target = torch.zeros(neg_indices.numel()).numpy() - - # Combine positive and negative samples - masked_pred = np.concatenate([masked_pred, neg_pred]) - masked_target = np.concatenate([masked_target, neg_target]) - - # Compute AUC score - try: - return metrics.roc_auc_score(masked_target, masked_pred) - except ValueError: - # In case of errors, fallback to 0.5 - return 0.5 - -def compute_spearman_correlation_gpu(pred, target, device='cuda'): - """ - GPU-accelerated implementation of Spearman correlation computation. - - Args: - pred (np.ndarray): Predicted heatmap - target (np.ndarray): Target heatmap - device (str): Device to run computation on ('cuda' or 'cpu') - - Returns: - float: Spearman correlation coefficient - """ - import torch - - # Flatten arrays - pred_flat = pred.flatten() - target_flat = target.flatten() - - # Convert to PyTorch tensors - pred_tensor = torch.tensor(pred_flat, device=device) - target_tensor = torch.tensor(target_flat, device=device) - - # Compute ranks - pred_rank = torch.argsort(torch.argsort(pred_tensor)).float() - target_rank = torch.argsort(torch.argsort(target_tensor)).float() - - # Compute mean ranks - pred_mean = torch.mean(pred_rank) - target_mean = torch.mean(target_rank) - - # Compute numerator and denominator - numerator = torch.sum((pred_rank - pred_mean) * (target_rank - target_mean)) - denominator = torch.sqrt(torch.sum((pred_rank - pred_mean)**2) * torch.sum((target_rank - target_mean)**2)) - - # Compute correlation - if denominator > 0: - correlation = numerator / denominator - return correlation.cpu().item() - else: - return 0.0 - -def compute_crossentropy_gpu(pred, target, eps=1e-10, device='cuda'): - """ - GPU-accelerated implementation of cross-entropy computation. - - Args: - pred (np.ndarray): Predicted heatmap - target (np.ndarray): Target heatmap - eps (float): Small value to avoid numerical issues - device (str): Device to run computation on ('cuda' or 'cpu') - - Returns: - float: Cross-entropy loss - """ - import torch - import torch.nn.functional as F - - # Convert to PyTorch tensors - pred_tensor = torch.tensor(pred, device=device).float() - target_tensor = torch.tensor(target, device=device).float() - - # Normalize target to sum to 1 - target_sum = torch.sum(target_tensor) - if target_sum > 0: - target_tensor = target_tensor / target_sum - - # Normalize prediction to sum to 1 - pred_sum = torch.sum(pred_tensor) - if pred_sum > 0: - pred_tensor = pred_tensor / pred_sum - - # Add small epsilon to avoid log(0) - pred_tensor = torch.clamp(pred_tensor, min=eps) - - # Compute cross-entropy loss - loss = -torch.sum(target_tensor * torch.log(pred_tensor)) - - return loss.cpu().item() - -# Function to process a batch of correlation computations on GPU -def batch_compute_correlations_gpu(test_maps, reference_maps, metric='auc', device='cuda'): - """ - Process a batch of correlation computations on GPU for improved performance. - For Spearman correlation, uses scipy's implementation instead of the GPU version - - Args: - test_maps (list): List of test maps - reference_maps (list): List of reference maps - metric (str): Metric to use ('auc', 'spearman', 'crossentropy') - device (str): Device to run computation on ('cuda' or 'cpu') - - Returns: - list: Correlation scores for each pair of maps - """ - assert len(test_maps) == len(reference_maps), "Number of test and reference maps must match" - from scipy.stats import spearmanr - - results = [] - for test_map, reference_map in zip(test_maps, reference_maps): - # Normalize maps - test_map = (test_map - test_map.min()) / (test_map.max() - test_map.min() + 1e-10) - reference_map = (reference_map - reference_map.min()) / (reference_map.max() - reference_map.min() + 1e-10) - - # Compute correlation using appropriate function - if metric.lower() == 'auc': - score = compute_AUC_gpu(test_map, reference_map, device) - elif metric.lower() == 'spearman': - # Use scipy's spearman implementation instead of GPU version - score, _ = spearmanr(test_map.flatten(), reference_map.flatten()) - elif metric.lower() == 'crossentropy': - score = compute_crossentropy_gpu(test_map, reference_map, device) - else: - raise ValueError(f"Unsupported metric: {metric}") - - results.append(score) - - return results def save_single_clickmap(all_clickmaps, idx, img_name, image_path, file_inclusion_filter=None, save_dir=None): """Helper function to save a single clickmap""" @@ -1250,10 +523,6 @@ def save_clickmaps_parallel(all_clickmaps, final_keep_index, output_dir, experim int Number of successfully saved files """ - from joblib import Parallel, delayed - import os - import numpy as np - from tqdm import tqdm # Create output directory if it doesn't exist save_dir = os.path.join(output_dir, experiment_name) @@ -1309,11 +578,7 @@ def save_clickmaps_to_hdf5(all_clickmaps, final_keep_index, hdf5_path, clickmap_ int Number of successfully saved files """ - import os - import numpy as np - from tqdm import tqdm - import h5py - import time + # Ensure the directory exists os.makedirs(os.path.dirname(hdf5_path), exist_ok=True) @@ -1348,7 +613,7 @@ def save_clickmaps_to_hdf5(all_clickmaps, final_keep_index, hdf5_path, clickmap_ dataset_name = img_name.replace('/', '_') # Get the clickmap - hmp = all_clickmaps[i] + hmp = all_clickmaps[img_name] bin_clickmaps = clickmap_bins[img_name] # Check if dataset already exists and delete it if it does if dataset_name in f["clickmaps"]: @@ -1662,19 +927,22 @@ def process_all_maps_multi_thresh_gpu( thresholds=10, return_before_blur=False, time_based_bins=False, + save_to_disk=False, + maximum_length=5000, ): """ Simplified function to blur clickmaps on GPU in batches with adaptive kernel sizing """ - import torch - from tqdm import tqdm - import numpy as np - + if save_to_disk: + assert return_before_blur + temp_file = h5py.File(config['temp_dir'], 'w') + temp_group = temp_file.create_group("clickmaps") # Extract basic parameters blur_size = config["blur_size"] blur_sigma = config.get("blur_sigma", blur_size) image_shape = config["image_shape"] min_subjects = config["min_subjects"] + max_subjects = config["max_subjects"] min_clicks = config["min_clicks"] max_kernel_size = config.get("max_kernel_size", 51) blur_sigma_function = config.get("blur_sigma_function", lambda x: x) @@ -1691,17 +959,19 @@ def process_all_maps_multi_thresh_gpu( print("Pre-processing clickmaps on CPU...") # Prepare data structures - all_clickmaps = [] + all_clickmaps = {} keep_index = [] categories = [] final_clickmaps = {} clickmap_bins = {} click_counts = {} # Track click counts for each image - + total_maps = 0 + if save_to_disk: + save_count = 0 # Preprocess all clickmaps first to binary maps - for key, trials in clickmaps.items(): + for clickmap_idx, (key, trials) in tqdm(enumerate(clickmaps.items()), "Pre-processing on CPU"): if len(trials) < min_subjects: - print("Not enough subjects", key, len(trials)) + # print("Not enough subjects", key, len(trials)) continue if time_based_bins: lens = [len(x) for x in trials] @@ -1709,9 +979,6 @@ def process_all_maps_multi_thresh_gpu( for trial in trials: max_count = len(trial) half_count = int(max_count/2) - #below_mean = np.linspace(max(half_count * .1, min_clicks), half_count, thresholds //2).astype(int) - #above_mean = np.linspace(half_count+1, max_count + 1, thresholds // 2).astype(int) - #trial_bin = np.concatenate([below_mean, above_mean]) trial_bin = np.linspace(max(half_count * .1, min_clicks), max_count, thresholds).astype(int) bins.append(trial_bin) bin_clickmaps = [] @@ -1732,11 +999,12 @@ def process_all_maps_multi_thresh_gpu( # org_number = binary_maps.sum((-2, -1)) binary_maps = binary_maps[mask] # If we have enough valid maps, average them and keep this image - # if len(binary_maps) >= min_subjects: if average_maps: bin_clickmaps.append(np.array(binary_maps).mean(0, keepdims=True)) else: bin_clickmaps.append(np.array(binary_maps)) + # else: + # print("Not enough subjects", key, len(binary_maps)) else: # Get max count then do thresholds from that @@ -1768,13 +1036,12 @@ def process_all_maps_multi_thresh_gpu( mask = binary_maps.sum((-2, -1)) >= min_clicks binary_maps = binary_maps[mask] # If we have enough valid maps, average them and keep this image - if len(binary_maps) >= min_subjects: - if average_maps: - bin_clickmaps.append(np.array(binary_maps).mean(0, keepdims=True)) - else: - bin_clickmaps.append(np.array(binary_maps)) + if average_maps: + bin_clickmaps.append(np.array(binary_maps).mean(0, keepdims=True)) else: - print("Not enough subjects", key, len(binary_maps)) + bin_clickmaps.append(np.array(binary_maps)) + # else: + # print("Not enough subjects", key, len(binary_maps)) # Skip if we don't have any valid bin_clickmaps if not bin_clickmaps: @@ -1799,13 +1066,24 @@ def process_all_maps_multi_thresh_gpu( click_counts[key] = len(trials) # Store total clicks for this image clickmap_bins[key] = np.asarray(bin_counts) # Add to all_clickmaps with the appropriate method - if return_before_blur: - all_clickmaps.append(np.stack(bin_clickmaps, axis=0)) + if save_to_disk: + key = key.replace('/', '_') + temp_group.create_dataset(key, data=np.stack(bin_clickmaps, axis=0)) + + elif return_before_blur: + bin_clickmaps = np.stack(bin_clickmaps, axis=0) + if max_subjects > 0: + max_subjects = min(max_subjects, bin_clickmaps.shape[1]) + bin_clickmaps = bin_clickmaps[:, :max_subjects, :, :] + all_clickmaps[key] = (np.stack(bin_clickmaps, axis=0)) else: - all_clickmaps.append(np.concatenate(bin_clickmaps, axis=0)) - if not all_clickmaps: + all_clickmaps[key] = (np.concatenate(bin_clickmaps, axis=0)) + if save_to_disk: + temp_file.close() + + if not save_to_disk and not all_clickmaps: print("No valid clickmaps to process") - return {}, [], [], [], {} + return {}, {}, [], [], {}, {} if return_before_blur: return final_clickmaps, all_clickmaps, categories, keep_index, click_counts, clickmap_bins @@ -1815,7 +1093,7 @@ def process_all_maps_multi_thresh_gpu( print(f"Preparing to blur {total_maps} image clickmaps using GPU with adaptive kernel sizing...") # Convert all maps to tensors - all_tensors = [torch.from_numpy(maps).float() for maps in all_clickmaps] + # all_tensors = [torch.from_numpy(maps).float() for maps in all_clickmaps] # Group images by their required kernel size to batch efficiently kernel_groups = {} @@ -1838,12 +1116,12 @@ def process_all_maps_multi_thresh_gpu( kernel_key = (adj_blur_size, adj_blur_sigma) if kernel_key not in kernel_groups: kernel_groups[kernel_key] = [] - kernel_groups[kernel_key].append(idx) + kernel_groups[kernel_key].append(key) print(f"Processing {len(kernel_groups)} different kernel sizes...") # Process each kernel group separately - for (kernel_size, kernel_sigma), image_indices in tqdm(kernel_groups.items(), desc="Processing kernel groups"): - print(f"Processing {len(image_indices)} images with kernel size {kernel_size}, sigma {kernel_sigma}") + for (kernel_size, kernel_sigma), image_keys in tqdm(kernel_groups.items(), desc="Processing kernel groups"): + print(f"Processing {len(image_keys)} images with kernel size {kernel_size}, sigma {kernel_sigma}") # print(f"Processing {len(image_indices)} images with kernel size {kernel_size}, sigma {kernel_sigma}") # Create kernel for this group @@ -1851,26 +1129,29 @@ def process_all_maps_multi_thresh_gpu( kernel = circle_kernel(kernel_size, kernel_sigma, 'cuda') # Process images in this group in batches - group_batch_size = min(gpu_batch_size, len(image_indices)) - num_batches = (len(image_indices) + group_batch_size - 1) // group_batch_size + group_batch_size = min(gpu_batch_size, len(image_keys)) + num_batches = (len(image_keys) + group_batch_size - 1) // group_batch_size for batch_idx in range(num_batches): # Get batch indices for this kernel group batch_start = batch_idx * group_batch_size - batch_end = min(batch_start + group_batch_size, len(image_indices)) - batch_image_indices = image_indices[batch_start:batch_end] + batch_end = min(batch_start + group_batch_size, len(image_keys)) + batch_image_keys = image_keys[batch_start:batch_end] # Get tensors for this batch - batch_tensors = [all_tensors[idx] for idx in batch_image_indices] - + # batch_tensors = [all_tensors[idx] for idx in batch_image_indices] + batch_tensors = {} + for key in batch_image_keys: + single_tensor = torch.from_numpy(all_clickmaps[key]).float() + batch_tensors[key] = single_tensor # Group batch tensors by shape to handle different dimensions within the same kernel group shape_groups = {} - for i, tensor in enumerate(batch_tensors): + for i, (key, tensor) in enumerate(batch_tensors.items()): shape_key = tuple(tensor.shape) if shape_key not in shape_groups: shape_groups[shape_key] = [] - shape_groups[shape_key].append((i, tensor, batch_image_indices[i])) + shape_groups[shape_key].append((i, tensor, key)) # Process each shape group separately for shape, tensor_data in shape_groups.items(): - indices, tensors, img_indices = zip(*tensor_data) + indices, tensors, img_keys = zip(*tensor_data) # Calculate memory-safe batch size for this shape group # Estimate memory usage: shape[0] * shape[1] * shape[2] * 4 bytes per float32 memory_per_tensor = shape[0] * shape[1] * shape[2] * 4 # bytes @@ -1889,7 +1170,7 @@ def process_all_maps_multi_thresh_gpu( end_idx = min(start_idx + safe_batch_size, len(tensors)) batch_tensors_subset = tensors[start_idx:end_idx] - batch_img_indices_subset = img_indices[start_idx:end_idx] + batch_img_keys_subset = img_keys[start_idx:end_idx] try: # Try to concatenate tensors of the same shape shape_batch_tensor = torch.cat(batch_tensors_subset, dim=0).unsqueeze(1).to('cuda') @@ -1898,8 +1179,9 @@ def process_all_maps_multi_thresh_gpu( blurred_tensor = convolve(shape_batch_tensor, kernel, double_conv=True) # Convert back to numpy and update results blurred_maps = blurred_tensor.squeeze(1).cpu().numpy() - for i, img_idx in enumerate(batch_img_indices_subset): - all_clickmaps[img_idx] = blurred_maps[i*thresholds:(i+1)*thresholds] # Keep the same shape with extra dimension + for i, img_key in enumerate(batch_img_keys_subset): + + all_clickmaps[img_key] = blurred_maps[i*thresholds:(i+1)*thresholds] # Keep the same shape with extra dimension # Clean up GPU memory for this shape batch del shape_batch_tensor, blurred_tensor torch.cuda.empty_cache() @@ -1907,10 +1189,10 @@ def process_all_maps_multi_thresh_gpu( except Exception as e: # If concatenation still fails, process individually print(f"Shape batch processing failed for shape {shape} (batch {shape_batch_idx+1}/{num_shape_batches}), processing {len(batch_tensors_subset)} images individually: {e}") - for i, (tensor, img_idx) in enumerate(zip(batch_tensors_subset, batch_img_indices_subset)): + for i, (tensor, img_key) in enumerate(zip(batch_tensors_subset, batch_img_keys_subset)): gpu_tensor = tensor.unsqueeze(1).to('cuda') blurred_tensor = convolve(gpu_tensor, kernel, double_conv=True) - all_clickmaps[img_idx] = blurred_tensor.squeeze(1).cpu().numpy() + all_clickmaps[img_key] = blurred_tensor.squeeze(1).cpu().numpy() # Clean up GPU memory del gpu_tensor, blurred_tensor @@ -1921,7 +1203,6 @@ def process_all_maps_multi_thresh_gpu( torch.cuda.empty_cache() return final_clickmaps, all_clickmaps, categories, keep_index, click_counts, clickmap_bins - def blur_maps_for_cf(all_clickmaps, blur_size, blur_sigma, gpu_batch_size, native_size=None): # Step 2: Prepare for batch blurring on GPU total_maps = len(all_clickmaps) @@ -2021,7 +1302,7 @@ def blur_maps_for_cf(all_clickmaps, blur_size, blur_sigma, gpu_batch_size, nativ torch.cuda.empty_cache() return all_clickmaps -def sparse_scale(img, scale, device='cpu'): +def sparse_scale(img, scale, device='cpu', pad=True): if isinstance(img, np.ndarray): img = torch.tensor(img).to(device) input_shape = img.shape @@ -2054,15 +1335,17 @@ def sparse_scale(img, scale, device='cpu'): count_img[count_img==0] = 1 scaled_img = scaled_img/count_img - - pad_h = (org_h - new_h) // 2 - pad_w = (org_w - new_w) // 2 - diff_h = org_h - new_h - pad_h*2 - diff_w = org_w - new_w - pad_w*2 - padded_img = F.pad(scaled_img, (pad_h, pad_h+diff_h, pad_w, pad_w+diff_w)) - padded_img = padded_img.reshape(input_shape) - return padded_img - + if pad: + pad_h = (org_h - new_h) // 2 + pad_w = (org_w - new_w) // 2 + diff_h = org_h - new_h - pad_h*2 + diff_w = org_w - new_w - pad_w*2 + padded_img = F.pad(scaled_img, (pad_h, pad_h+diff_h, pad_w, pad_w+diff_w)) + padded_img = padded_img.reshape(input_shape) + return padded_img + else: + return scaled_img + def scale_img(img, scale, device='cpu'): if isinstance(img, np.ndarray): img = torch.tensor(img) @@ -2101,17 +1384,19 @@ def to_torch(x, device, dtype): return x.to(device, dtype=dtype) return torch.as_tensor(x, device=device, dtype=dtype) -def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): - was_tensor = isinstance(img, torch.Tensor) - org_dtype = img.dtype if hasattr(img, 'dtype') else 'float32' +def project_img_gpu(img, depth, target_depth, w2c_s, w2c_t, K_s, K_t, device): + org_dtype = img.dtype if hasattr(img, 'dtype') else 'float32' + input_shape = img.shape + was_tensor = isinstance(img, torch.Tensor) + # Move everything to torch if isinstance(img, torch.Tensor): img = img else: img = torch.tensor(img).float() dtype = img.dtype - input_shape = img.shape + img = img.to(device) depth = to_torch(depth, device, dtype) # Convert to numpy to avoid lazy tensor operation in parallel @@ -2119,7 +1404,6 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): K_s = K_s.cpu().numpy() K_s_inv = np.linalg.inv(K_s) K_s_inv = to_torch(K_s_inv, device, dtype) - # K_s = to_torch(K_s, device, dtype) K_t = to_torch(K_t, device, dtype) w2c_s = to_torch(w2c_s, device, dtype) w2c_t = to_torch(w2c_t, device, dtype) @@ -2128,10 +1412,8 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): img = img.unsqueeze(0) # (1,H,W) elif img.ndim > 3: img = img.reshape(-1, img.shape[-2], img.shape[-1]) - C, H, W = img.shape assert depth.shape[-2:] == (H, W), "depth must match HxW of img" - R_s, T_s = w2c_s[:3, :3], w2c_s[:3, 3] R_t, T_t = w2c_t[:3, :3], w2c_t[:3, 3] @@ -2141,6 +1423,7 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): torch.arange(W, device=device, dtype=dtype), indexing='ij' ) + pixels_h = torch.stack([xs, ys, torch.ones_like(xs)], dim=0).reshape(3, -1) # (3, N) depth_flat = depth.reshape(-1) # (N,) @@ -2164,10 +1447,11 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): # ---- Valid mask ---- valid = ( (x_t >= 0) & (x_t < W) & - (y_t >= 0) & (y_t < H) & - (z_proj > 0) + (y_t >= 0) & (y_t < H) # & + # (z_proj > 0) ) + if not valid.any(): out = torch.zeros_like(img) if not was_tensor: @@ -2177,6 +1461,7 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): x_t = x_t[valid] y_t = y_t[valid] z_t = z_proj[valid] + img_flat = img.view(img.shape[0], -1)[:, valid] # (C, N_valid) # ---- Z-buffer via scatter_reduce (amin) ---- @@ -2184,7 +1469,9 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): num_pixels = H * W # Per-target-pixel min depth - z_min = torch.full((num_pixels,), float('inf'), device=device, dtype=z_t.dtype) + # z_min = torch.full((num_pixels,), float('inf'), device=device, dtype=z_t.dtype) + # Use this to mask any point that's behind the actual object + z_min = torch.tensor(target_depth*1.1).reshape(-1).to(device).to(z_t.dtype) # PyTorch >= 1.12: Tensor.scatter_reduce_ z_min = z_min.scatter_reduce(0, flat_indices, z_t, reduce='amin', include_self=True) @@ -2202,9 +1489,6 @@ def project_img_gpu(img, depth, w2c_s, w2c_t, K_s, K_t, device): # Restore original shape/dtype if not was_tensor: target = target.detach().cpu().numpy().astype(org_dtype) - else: - target = target.detach().cpu().astype(org_dtype) - target = target.reshape(input_shape) return target @@ -2230,4 +1514,4 @@ def get_rot_target(img_idx): target_img_ids.append(target_img_idx) target_img_diffs.append(i) - return target_img_ids, target_img_diffs \ No newline at end of file + return target_img_ids, target_img_diffs diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/find_missing_images.py b/tools/find_missing_images.py new file mode 100644 index 0000000..90151e5 --- /dev/null +++ b/tools/find_missing_images.py @@ -0,0 +1,16 @@ +import os +import json +from tools.find_top_bottom import get_num_subjects + +if __name__ == "__main__": + num_subjects = get_num_subjects() + missing_images = {"val":[]} + print(len(num_subjects)) + for img_name, num_subjects in num_subjects.items(): + if num_subjects < 5: + missing_images["val"].append((img_name, num_subjects)) + print(img_name, num_subjects) + + with open("missing_val.json", 'w') as f: + json_content = json.dumps(missing_images, indent=4) + f.write(json_content) \ No newline at end of file diff --git a/tools/find_top_bottom.py b/tools/find_top_bottom.py new file mode 100644 index 0000000..0530610 --- /dev/null +++ b/tools/find_top_bottom.py @@ -0,0 +1,107 @@ +import os +import numpy as np +from src.utils import process_clickme_data +from tqdm import tqdm +import h5py +import json +from matplotlib import pyplot as plt +from PIL import Image +from src.utils import process_clickme_data + +def get_num_subjects(): + clickme_data_file = "/cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz" + clickme_data = process_clickme_data(clickme_data_file, True) + total_numbers = {} + for _, row in tqdm(clickme_data.iterrows(), total=len(clickme_data), desc="Processing clickmaps"): + image_path = row['image_path'] + image_file_name = row['image_path'].split(os.path.sep)[-1] + if "ILSVRC2012_val" not in image_path: + continue + if image_file_name not in total_numbers: + total_numbers[image_file_name] = 1 + else: + total_numbers[image_file_name] += 1 + return total_numbers + +def plot_clickmap(img, hmp, score, num_subjects, img_name, image_output_dir): + f = plt.figure() + plt.subplot(1, 2, 1) + plt.imshow(np.asarray(img)) + title = f"{img_name}\nSpearman: {score}\nNum Subjects: {num_subjects}" + plt.title(title) + plt.axis("off") + plt.subplot(1, 2, 2) + plt.imshow(hmp) + plt.axis("off") + plt.savefig(os.path.join(image_output_dir, img_name.replace('/', '_'))) + plt.close() + return + +if __name__ == "__main__": + scores_json = "assets/exp/exp_30_subjects_08_27_2025_spearman_ceiling_floor_results.json" + data_root = "/gpfs/data/shared/imagenet/ILSVRC2012/val" + image_output_dir = "temp/top_bot_imgs_30" + os.makedirs(image_output_dir, exist_ok=True) + with open(scores_json, 'r') as f: + scores_dict = json.load(f)['all_img_ceilings'] + metadata = np.load("image_metadata/jay_imagenet_val_04_30_2025_dimensions.npy", allow_pickle=True).item() + val_map_files = ['assets/imgnet/jay_imagenet_val_08_27_2025_batch001.h5', 'assets/imgnet/jay_imagenet_val_08_27_2025_batch002.h5', + 'assets/imgnet/jay_imagenet_val_08_27_2025_batch003.h5', 'assets/imgnet/jay_imagenet_val_08_27_2025_batch004.h5'] + top10 = dict(sorted(scores_dict.items(), key=lambda x: x[1], reverse=True)[:10]) + bot10 = dict(sorted(scores_dict.items(), key=lambda x: x[1], reverse=False)[:10]) + top10_maps = {} + bot10_maps = {} + for map_file in val_map_files: + map_content = h5py.File(map_file, 'r')['clickmaps'] + for img_name in top10: + img_key = img_name.split('/')[1] + img_name = img_name.replace('/', '_') + if img_name in map_content: + top10_maps[img_key] = map_content[img_name]['clickmap'][:].mean(0) + for bot_img_name in bot10: + print(bot_img_name) + img_key = bot_img_name.split('/')[1] + bot_img_name = bot_img_name.replace('/', '_') + if bot_img_name in map_content: + bot10_maps[img_key] = map_content[bot_img_name]['clickmap'][:].mean(0) + + top10_paths = [] + bot10_paths = [] + for img_name in top10: + img_name = img_name.split('/')[1] + img_path = os.path.join(data_root, f'{img_name}') + top10_paths.append(img_path) + + for img_name in bot10: + img_name = img_name.split('/')[1] + img_path = os.path.join(data_root, f'{img_name}') + bot10_paths.append(img_path) + + for i, img_name in enumerate(top10): + score = scores_dict[img_name] + metadata_shape = metadata[img_name] + + img_name = img_name.split('/')[1] + hmp = top10_maps[img_name] + img = Image.open(top10_paths[i]) + # num_subjects = num_subjects_dict[img_name] + num_subjects = 1 + img = np.asarray(img) + if img.shape[0] != hmp.shape[0]: + print(img_name, img.shape, hmp.shape, metadata_shape) + plot_clickmap(img, hmp, score, num_subjects, f"top_{img_name}", image_output_dir) + + + for i, img_name in enumerate(bot10): + score = scores_dict[img_name] + metadata_shape = metadata[img_name] + + img_name = img_name.split('/')[1] + hmp = bot10_maps[img_name] + img = Image.open(bot10_paths[i]) + num_subjects = 1 + # num_subjects = num_subjects_dict[img_name] + img = np.asarray(img) + if img.shape[0] != hmp.shape[0]: + print(img_name, img.shape, hmp.shape, metadata_shape) + plot_clickmap(img, hmp, score, num_subjects, f"bottom_{img_name}", image_output_dir) \ No newline at end of file diff --git a/tools/sample_clickmaps.py b/tools/sample_clickmaps.py new file mode 100644 index 0000000..3f0dd44 --- /dev/null +++ b/tools/sample_clickmaps.py @@ -0,0 +1,64 @@ +import os +import numpy as np +from src.utils import process_clickme_data +from tqdm import tqdm +# Sample clickmaps that have more than 30 subjects while maintaining class distribution +if __name__ == "__main__": + clickme_data_file = "/cifs/data/tserre_lrs/projects/projects/prj_video_imagenet/human_clickme_data_processing/clickme_datasets/val_combined_08_27_2025.npz" + clickme_data = process_clickme_data(clickme_data_file, True) + total_maps = len(clickme_data) + total_numbers = {} + map_file = 'assets/jay_imagenet_val_08_27_2025_batch001.h5' + + #target_img_names = ["ILSVRC2012_val_00008676.JPEG", "ILSVRC2012_val_00009263.JPEG", "ILSVRC2012_val_00009305.JPEG", "ILSVRC2012_val_00013865.JPEG" + # "ILSVRC2012_val_00021166.JPEG", "ILSVRC2012_val_00023616.JPEG", "ILSVRC2012_val_00024753.JPEG", "ILSVRC2012_val_00027647.JPEG", + # "ILSVRC2012_val_00034111.JPEG", "ILSVRC2012_val_00038455.JPEG"] + for _, row in tqdm(clickme_data.iterrows(), total=len(clickme_data), desc="Processing clickmaps"): + image_path = row['image_path'] + image_file_name = os.path.sep.join(row['image_path'].split(os.path.sep)[-2:]) + cls_name = image_file_name.split('/')[0] + if cls_name not in total_numbers: + total_numbers[cls_name] = {} + if "ILSVRC2012_val" not in image_file_name: + continue + if image_path not in total_numbers[cls_name]: + total_numbers[cls_name][image_path] = 1 + else: + total_numbers[cls_name][image_path] += 1 + sampled_img_paths = {} + total_counts = [] + for cls_name, image_paths in total_numbers.items(): + numbers = [] + sampled_img_paths[cls_name] = [] + sampled_names = [] + for img_path, number in image_paths.items(): + numbers.append(number) + img_name = img_path.split('/')[-1] + #if img_name in target_img_names: + if number > 20: + sampled_names.append(img_path) + sampled_img_paths[cls_name].append(img_path) + numbers = np.array(numbers) + larger_than = np.sum(numbers>20) + if larger_than > 0: + total_counts.append(larger_than) + + for cls_name, img_paths in sampled_img_paths.items(): + sampled_img_paths[cls_name] = img_paths[:5] + sampled_clickme_data = clickme_data.copy() + + allowed_files = { + f"{img_path}" for _, img_paths in sampled_img_paths.items() for img_path in img_paths + } + print(allowed_files) + print(sampled_clickme_data) + print(len(sampled_clickme_data)) + # Keep only rows whose file name is allowed + sampled_clickme_data = sampled_clickme_data[ + sampled_clickme_data["image_path"].isin(allowed_files) + ] + print(len(sampled_clickme_data)) + sampled_clickme_data.to_csv(os.path.join('clickme_datasets', 'bottom_imgnet_val.csv')) + + + \ No newline at end of file diff --git a/tools/validate_size.py b/tools/validate_size.py new file mode 100644 index 0000000..e8ff0ad --- /dev/null +++ b/tools/validate_size.py @@ -0,0 +1,26 @@ +import os +import numpy as np +import h5py +from PIL import Image +from tqdm import tqdm + +if __name__ == "__main__": + hdf_path = "assets/imgnet" + metadata = np.load("image_metadata/jay_imagenet_train_04_30_2025_dimensions.npy", allow_pickle=True).item() + data_root = "/gpfs/data/shared/imagenet/ILSVRC2012/train" + for i, hdf_file in tqdm(enumerate(os.listdir(hdf_path))): + if not hdf_file.endswith('.h5'): + continue + map_content = h5py.File(os.path.join(hdf_path, hdf_file), 'r')['clickmaps'] + for img_name in map_content: + metadata_img_name = img_name.replace('_', '/', 1) + img_cls = metadata_img_name.split('/')[0] + folder_img_name = metadata_img_name.split('/')[1] + if not os.path.exists(os.path.join(data_root, img_cls, folder_img_name)) or metadata_img_name not in metadata: + continue + img = Image.open(os.path.join(data_root, img_cls, folder_img_name)) + img = np.asarray(img) + metadata_shape = metadata[metadata_img_name][::-1] + hmp_shape = map_content[img_name]['clickmap'][:].mean(0).shape + if metadata_shape != hmp_shape or img.shape[:2] != hmp_shape: + print(i, img_name, img.shape[:2], metadata_shape, hmp_shape) \ No newline at end of file diff --git a/tools/verify_list.py b/tools/verify_list.py new file mode 100644 index 0000000..2c52c3a --- /dev/null +++ b/tools/verify_list.py @@ -0,0 +1,13 @@ +import json +import os + +if __name__ == "__main__": + img_list = "clickme_datasets/missing_imgnet_train.json" + imgnet_path = "/gpfs/data/shared/imagenet/ILSVRC2012/train" + with open(img_list, 'r') as f: + json_content = json.load(f) + + for img_cls, img_list in json_content.items(): + for img in img_list: + if not os.path.exists(os.path.join(imgnet_path, img_cls, img)): + print("Missing", img) \ No newline at end of file