diff --git a/README.md b/README.md index 2f4a870..4a1b214 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ZedProfiler -[![Coverage](https://img.shields.io/badge/coverage-94%25-brightgreen)](#quality-gates) +[![Coverage](https://img.shields.io/badge/coverage-96%25-brightgreen)](#quality-gates) CPU-first 3D image feature extraction toolkit for high-content and high-throughput image-based profiling. diff --git a/pyproject.toml b/pyproject.toml index ab4e244..f3f4e00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,13 +22,10 @@ dependencies = [ "bioio-tifffile>=1.3", "fire>=0.7.1", "jinja2>=3.1.6", - "numpy>=2.2.6", + "numpy>=2.2", "pandas>=3.0.2", - "scipy>=1.17.1", - "tqdm>=4.67.3", - "pandera>=0.31.1", - "scikit-image>=0.26", - + "scikit-image>=0.25", + "scipy>=1.15", ] scripts.ZedProfiler = "ZedProfiler.cli:trigger" diff --git a/src/zedprofiler/featurization/colocalization.py b/src/zedprofiler/featurization/colocalization.py index 3f798c8..2971f1e 100644 --- a/src/zedprofiler/featurization/colocalization.py +++ b/src/zedprofiler/featurization/colocalization.py @@ -1,10 +1,503 @@ -"""Colocalization featurization module scaffold.""" +"""Colocalization feature extraction utilities for 3D image objects. + +Computes per-object colocalization metrics (Pearson correlation, Manders +coefficients, overlap coefficient, K1/K2 coefficients) between pairs of +fluorescence channels using the Costes automatic thresholding method. +""" from __future__ import annotations -from zedprofiler.exceptions import ZedProfilerError +from typing import Dict, Tuple + +import numpy +import scipy.ndimage +import skimage + +from zedprofiler.image_utils.image_utils import ( + crop_3D_image, + new_crop_border, + select_objects_from_label, +) + +COSTES_R_FAR_THRESHOLD = 0.45 +COSTES_R_MID_THRESHOLD = 0.35 +COSTES_R_NEAR_THRESHOLD = 0.25 +MIN_PEARSON_POINTS = 2 +WIDE_BISECTION_WINDOW = 6 +UINT8_MAX = 255 +UINT16_MAX = 65535 + + +def _require_scipy() -> None: + if scipy is None: + raise ModuleNotFoundError( + "scipy is required for colocalization features. " + "Install zedprofiler with scipy." + ) + + +def _require_skimage() -> None: + if skimage is None: + raise ModuleNotFoundError( + "scikit-image is required for colocalization features. " + "Install zedprofiler with scikit-image." + ) + + +def linear_costes_threshold_calculation( + first_image: numpy.ndarray, + second_image: numpy.ndarray, + scale_max: int = 255, + fast_costes: str = "Accurate", +) -> Tuple[float, float]: + """ + Finds the Costes Automatic Threshold for colocalization using a linear algorithm. + Candidate thresholds are gradually decreased until Pearson R falls below 0. + If "Fast" mode is enabled the "steps" between tested thresholds will be increased + when Pearson R is much greater than 0. The other mode is "Accurate" which + will always step down by the same amount. + + Parameters + ---------- + first_image : numpy.ndarray + The first fluorescence image. + second_image : numpy.ndarray + The second fluorescence image. + scale_max : int, optional + The maximum value for the image scale, by default 255. + fast_costes : str, optional + The mode for the Costes threshold calculation, by default "Accurate". + + Returns + ------- + Tuple[float, float] + The calculated thresholds for the first and second images. + """ + _require_scipy() + i_step = 1 / scale_max # Step size for the threshold as a float + non_zero = (first_image > 0) | (second_image > 0) + xvar = numpy.var(first_image[non_zero], axis=0, ddof=1) + yvar = numpy.var(second_image[non_zero], axis=0, ddof=1) + + xmean = numpy.mean(first_image[non_zero], axis=0) + ymean = numpy.mean(second_image[non_zero], axis=0) + + z = first_image[non_zero] + second_image[non_zero] + zvar = numpy.var(z, axis=0, ddof=1) + + covar = 0.5 * (zvar - (xvar + yvar)) + + denom = 2 * covar + num = (yvar - xvar) + numpy.sqrt( + (yvar - xvar) * (yvar - xvar) + 4 * (covar * covar) + ) + a = num / denom + b = ymean - a * xmean + + # Start at 1 step above the maximum value + img_max = max(first_image.max(), second_image.max()) + i = i_step * ((img_max // i_step) + 1) + + num_true = None + first_image_max = first_image.max() + second_image_max = second_image.max() + + # Initialize without a threshold + costReg, _ = scipy.stats.pearsonr(first_image, second_image) + thr_first_image_c = i + thr_second_image_c = (a * i) + b + while i > first_image_max and (a * i) + b > second_image_max: + i -= i_step + while i > i_step: + thr_first_image_c = i + thr_second_image_c = (a * i) + b + combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) + try: + # Only run pearsonr if the input has changed. + if (positives := numpy.count_nonzero(combt)) != num_true: + costReg, _ = scipy.stats.pearsonr( + first_image[combt], second_image[combt] + ) + num_true = positives + + if costReg <= 0: + break + elif fast_costes == "Accurate" or i < i_step * 10: + i -= i_step + elif costReg > COSTES_R_FAR_THRESHOLD: + # We're way off, step down 10x + i -= i_step * 10 + elif costReg > COSTES_R_MID_THRESHOLD: + # Still far from 0, step 5x + i -= i_step * 5 + elif costReg > COSTES_R_NEAR_THRESHOLD: + # Step 2x + i -= i_step * 2 + else: + i -= i_step + except ValueError: + break + return thr_first_image_c, thr_second_image_c + + +def bisection_costes_threshold_calculation( + first_image: numpy.ndarray, second_image: numpy.ndarray, scale_max: int = 255 +) -> tuple[float, float]: + """ + Finds the Costes Automatic Threshold for colocalization using a bisection algorithm. + Candidate thresholds are selected from within a window of possible intensities, + this window is narrowed based on the R value of each tested candidate. + We're looking for the first point at 0, and R value can become highly variable + at lower thresholds in some samples. Therefore the candidate tested in each + loop is 1/6th of the window size below the maximum value + (as opposed to the midpoint). + + Parameters + ---------- + first_image : numpy.ndarray + The first fluorescence image. + second_image : numpy.ndarray + The second fluorescence image. + scale_max : int, optional + The maximum value for the image scale, by default 255. + + Returns + ------- + Tuple[float, float] + The calculated thresholds for the first and second images. + """ + _require_scipy() + + non_zero = (first_image > 0) | (second_image > 0) + xvar = numpy.var(first_image[non_zero], axis=0, ddof=1) + yvar = numpy.var(second_image[non_zero], axis=0, ddof=1) + + xmean = numpy.mean(first_image[non_zero], axis=0) + ymean = numpy.mean(second_image[non_zero], axis=0) + + z = first_image[non_zero] + second_image[non_zero] + zvar = numpy.var(z, axis=0, ddof=1) + + covar = 0.5 * (zvar - (xvar + yvar)) + + denom = 2 * covar + num = (yvar - xvar) + numpy.sqrt((yvar - xvar) * (yvar - xvar) + 4 * (covar**2)) + a = num / denom + b = ymean - a * xmean + + # Initialize variables + left = 1 + right = scale_max + mid = ((right - left) // (6 / 5)) + left + lastmid = 0 + # Marks the value with the last positive R value. + valid = 1 + + while lastmid != mid: + thr_first_image_c = mid / scale_max + thr_second_image_c = (a * thr_first_image_c) + b + combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) + if numpy.count_nonzero(combt) <= MIN_PEARSON_POINTS: + # Can't run meaningful Pearson with only a few values. + left = mid - 1 + else: + try: + costReg, _ = scipy.stats.pearsonr( + first_image[combt], second_image[combt] + ) + if costReg < 0: + left = mid - 1 + elif costReg >= 0: + right = mid + 1 + valid = mid + except ValueError: + # Catch misc Pearson errors with low sample numbers + left = mid - 1 + lastmid = mid + if right - left > WIDE_BISECTION_WINDOW: + mid = ((right - left) // (6 / 5)) + left + else: + mid = ((right - left) // 2) + left + + thr_first_image_c = (valid - 1) / scale_max + thr_second_image_c = (a * thr_first_image_c) + b + + return thr_first_image_c, thr_second_image_c + + +def prepare_two_images_for_colocalization( # noqa: PLR0913 + label_object1: numpy.ndarray, + label_object2: numpy.ndarray, + image_object1: numpy.ndarray, + image_object2: numpy.ndarray, + object_id1: int, + object_id2: int, +) -> Tuple[numpy.ndarray, numpy.ndarray]: + """ + Prepare two images for colocalization analysis by cropping to object bbox. + It selects objects from label images, calculates their bounding boxes, + and crops both images accordingly. + + Parameters + ---------- + label_object1 : numpy.ndarray + The segmented label image for the first object. + label_object2 : numpy.ndarray + The segmented label image for the second object. + image_object1 : numpy.ndarray + The spectral image to crop for the first object. + image_object2 : numpy.ndarray + The spectral image to crop for the second object. + object_id1 : int + The object index to select from the label image for the first object. + object_id2 : int + The object index to select from the label image for the second object. + + Returns + ------- + Tuple[numpy.ndarray, numpy.ndarray] + The two cropped images for colocalization analysis. + """ + _require_skimage() + label_object1 = select_objects_from_label(label_object1, object_id1) + label_object2 = select_objects_from_label(label_object2, object_id2) + # get the image bbox + props_image1 = skimage.measure.regionprops_table(label_object1, properties=["bbox"]) + bbox_image1 = ( + props_image1["bbox-0"][0], # z min + props_image1["bbox-1"][0], # y min + props_image1["bbox-2"][0], # x min + props_image1["bbox-3"][0], # z max + props_image1["bbox-4"][0], # y max + props_image1["bbox-5"][0], # x max + ) + + props_image2 = skimage.measure.regionprops_table(label_object2, properties=["bbox"]) + bbox_image2 = ( + props_image2["bbox-0"][0], # z min + props_image2["bbox-1"][0], # y min + props_image2["bbox-2"][0], # x min + props_image2["bbox-3"][0], # z max + props_image2["bbox-4"][0], # y max + props_image2["bbox-5"][0], # x max + ) + + new_bbox1, new_bbox2 = new_crop_border(bbox_image1, bbox_image2, image_object1) + + cropped_image_1 = crop_3D_image(image_object1, new_bbox1) + cropped_image_2 = crop_3D_image(image_object2, new_bbox2) + return cropped_image_1, cropped_image_2 + + +def compute_colocalization( # noqa: PLR0912, PLR0915 + cropped_image_1: numpy.ndarray, + cropped_image_2: numpy.ndarray, + thr: int = 15, + fast_costes: str = "Accurate", +) -> Dict[str, float]: + """ + This function calculates the colocalization coefficients between two images. + It computes the correlation coefficient, Manders' coefficients, overlap coefficient, + and Costes' coefficients. The results are returned as a dictionary. + + Parameters + ---------- + cropped_image_1 : numpy.ndarray + The first cropped image. + cropped_image_2 : numpy.ndarray + The second cropped image. + thr : int, optional + The threshold for the Manders' coefficients, by default 15 + fast_costes : str, optional + The mode for Costes' threshold calculation, by default "Accurate". + Options are "Accurate" or "Fast". + "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. + The "Fast" mode is faster but less accurate. + + Returns + ------- + Dict[str, float] + The output features for colocalization analysis. + """ + _require_scipy() + results = {} + ################################################################################################ + # Calculate the correlation coefficient between the two images + # This is the Pearson correlation coefficient + # Pearson correlation coeffecient = cov(X, Y) / (std(X) * std(Y)) + # where cov(X, Y) is the covariance of X and Y + # where X and Y are the two images + # std(X) is the standard deviation of X + # std(Y) is the standard deviation of Y + # cov(X, Y) = sum((X - mean(X)) * (Y - mean(Y))) / (N - 1) + # std(X) = sqrt(sum((X - mean(X)) ** 2) / (N - 1)) + # thus N -1 cancels out in the calculation below + ################################################################################################ + mean1 = numpy.mean(cropped_image_1) + mean2 = numpy.mean(cropped_image_2) + std1 = numpy.sqrt(numpy.sum((cropped_image_1 - mean1) ** 2)) + std2 = numpy.sqrt(numpy.sum((cropped_image_2 - mean2) ** 2)) + x = cropped_image_1 - mean1 # x is not the same as the x dimension here + y = cropped_image_2 - mean2 # y is not the same as the y dimension here + corr = numpy.sum(x * y) / (std1 * std2) + + ################################################################################################ + # Calculate the Manders' coefficients + ################################################################################################ + + # Threshold as percentage of maximum intensity of objects in each channel + try: + tff = (thr / 100) * numpy.max(cropped_image_1) + tss = (thr / 100) * numpy.max(cropped_image_2) + # Ensure thresholds are at least 1 to avoid zero thresholding + # if an errors occurs this is probably due to empty images + # or images where the bbox is incredibly small and inconsistent + # or the bbox is on the border of the image + # in which case we want to remove anyway + except ValueError: + M1, M2 = 0.0, 0.0 + else: + # get the thresholds + combined_thresh = (cropped_image_1 >= tff) & (cropped_image_2 >= tss) + + first_image_thresh = cropped_image_1[combined_thresh] + second_image_thresh = cropped_image_2[combined_thresh] + + tot_first_image_thr = scipy.ndimage.sum( + cropped_image_1[cropped_image_1 >= tff], + ) + tot_second_image_thr = scipy.ndimage.sum( + cropped_image_2[cropped_image_2 >= tss] + ) + + if tot_first_image_thr > 0 and tot_second_image_thr > 0: + M1 = scipy.ndimage.sum(first_image_thresh) / tot_first_image_thr + M2 = scipy.ndimage.sum(second_image_thresh) / tot_second_image_thr + else: + M1, M2 = 0.0, 0.0 + ################################################################################################ + # Calculate the overlap coefficient + ################################################################################################ + + fpsq = scipy.ndimage.sum( + cropped_image_1[combined_thresh] ** 2, + ) + spsq = scipy.ndimage.sum( + cropped_image_2[combined_thresh] ** 2, + ) + pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) + overlap = ( + scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) + / pdt + ) + # leave in for now given they are not exported but still calculated + K1 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(fpsq)) + K2 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(spsq)) + if K1 == K2: + pass + + # first_pixels, second_pixels = flattened image arrays + # combined_thresh = boolean mask of pixels above threshold in both channels + # fi_thresh, si_thresh = thresholded intensities (same shape as pixels) + + # --- Rank computation --- + # Flatten images for ranking + img1_flat = cropped_image_1.flatten() + img2_flat = cropped_image_2.flatten() + + # --- Rank computation --- + sorted_idx_1 = numpy.argsort(img1_flat) + sorted_idx_2 = numpy.argsort(img2_flat) + + # Create rank arrays + rank_1_flat = numpy.empty_like(sorted_idx_1, dtype=float) + rank_2_flat = numpy.empty_like(sorted_idx_2, dtype=float) + rank_1_flat[sorted_idx_1] = numpy.arange(len(sorted_idx_1)) + rank_2_flat[sorted_idx_2] = numpy.arange(len(sorted_idx_2)) + + # Reshape back to original shape + rank_im1 = rank_1_flat.reshape(cropped_image_1.shape) + rank_im2 = rank_2_flat.reshape(cropped_image_2.shape) + + # --- Rank difference weight --- + R = max(rank_im1.max(), rank_im2.max()) + 1 + Di = numpy.abs(rank_im1 - rank_im2) + weight = (R - Di) / R + + # Get weights for thresholded pixels + weight_thresh = weight[combined_thresh] + + # Get thresholded values (no double-thresholding!) + first_image_thresh_final = first_image_thresh + second_image_thresh_final = second_image_thresh + + # --- Calculate weighted colocalization --- + if numpy.any(combined_thresh) and len(first_image_thresh_final) > 0: + weighted_sum_1 = numpy.sum(first_image_thresh_final * weight_thresh) + weighted_sum_2 = numpy.sum(second_image_thresh_final * weight_thresh) + + total_1 = numpy.sum(first_image_thresh_final) + total_2 = numpy.sum(second_image_thresh_final) + + RWC1 = weighted_sum_1 / total_1 if total_1 > 0 else 0.0 + RWC2 = weighted_sum_2 / total_2 if total_2 > 0 else 0.0 + else: + RWC1, RWC2 = 0.0, 0.0 + ################################################################################################ + # Calculate the Costes' coefficient + ################################################################################################ + + # Orthogonal Regression for Costes' automated threshold + if numpy.max(cropped_image_1) > UINT8_MAX or numpy.max(cropped_image_2) > UINT8_MAX: + scale = UINT16_MAX + else: + scale = UINT8_MAX + + if fast_costes == "Accurate": + thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( + cropped_image_1, cropped_image_2, scale + ) + else: + thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( + cropped_image_1, cropped_image_2, scale, fast_costes + ) + + # Costes' thershold for entire image is applied to each object + first_image_above_thr = cropped_image_1 > thr_first_image_c + second_image_above_thr = cropped_image_2 > thr_second_image_c + combined_thresh_c = first_image_above_thr & second_image_above_thr + first_image_thresh_c = cropped_image_1[combined_thresh_c] + second_image_thresh_c = cropped_image_2[combined_thresh_c] + + tot_first_image_thr_c = scipy.ndimage.sum( + cropped_image_1[cropped_image_1 >= thr_first_image_c], + ) + + tot_second_image_thr_c = scipy.ndimage.sum( + cropped_image_2[cropped_image_2 >= thr_second_image_c], + ) + if tot_first_image_thr_c > 0 and tot_second_image_thr_c > 0: + C1 = scipy.ndimage.sum(first_image_thresh_c) / tot_first_image_thr_c + C2 = scipy.ndimage.sum(second_image_thresh_c) / tot_second_image_thr_c + else: + C1, C2 = 0.0, 0.0 + ################################################################################################ + # write the results to the output dictionary + ################################################################################################ + results["Correlation"] = corr + results["MandersCoeffM1"] = M1 + results["MandersCoeffM2"] = M2 + results["OverlapCoeff"] = overlap + results["MandersCoeffCostesM1"] = C1 + results["MandersCoeffCostesM2"] = C2 + results["RankWeightedColocalizationCoeff1"] = RWC1 + results["RankWeightedColocalizationCoeff2"] = RWC2 -def compute() -> dict[str, list[float]]: - """Placeholder for colocalization computation implementation.""" - raise ZedProfilerError("colocalization.compute is not implemented yet") + return results diff --git a/src/zedprofiler/image_utils/image_utils.py b/src/zedprofiler/image_utils/image_utils.py new file mode 100644 index 0000000..f3dc58c --- /dev/null +++ b/src/zedprofiler/image_utils/image_utils.py @@ -0,0 +1,327 @@ +from typing import Tuple, Union + +import numpy + +BBoxCoord = Union[int, float] +BBox3D = tuple[BBoxCoord, BBoxCoord, BBoxCoord, BBoxCoord, BBoxCoord, BBoxCoord] + + +def select_objects_from_label( + label_image: numpy.ndarray, object_ids: list +) -> numpy.ndarray: + """ + Selects objects from a label image based on the provided object IDs. + + Parameters + ---------- + label_image : numpy.ndarray + The segmented label image. + object_ids : list + The object IDs to select. + + Returns + ------- + numpy.ndarray + The label image with only the selected objects. + """ + label_image = label_image.copy() + label_image[label_image != object_ids] = 0 + return label_image + + +def expand_box( + min_coor: int, max_coord: int, current_min: int, current_max: int, expand_by: int +) -> Union[Tuple[int, int], ValueError]: + """ + Expand the bounding box of an object in a 3D image. + + Parameters + ---------- + min_coor : int + The minimum coordinate of the image for any dimension. + max_coord : int + The maximum coordinate of the image for any dimension. + current_min : int + The current minimum coordinate of an object's bounding box + for any dimension. + current_max : int + The current maximum coordinate of an object's bounding box + for any dimension. + expand_by : int + The amount to expand the bounding box by. + + Returns + ------- + Union[Tuple[int, int], ValueError] + The new minimum and maximum coordinates of the bounding box. + Raises ValueError if the expansion is not possible. + """ + + if max_coord - min_coor - (current_max - current_min) < expand_by: + return ValueError("Cannot expand box by the requested amount") + while expand_by > 0: + if current_min > min_coor: + current_min -= 1 + expand_by -= 1 + elif current_max < max_coord: + current_max += 1 + expand_by -= 1 + + return current_min, current_max + + +def new_crop_border( + bbox1: BBox3D, + bbox2: BBox3D, + image: numpy.ndarray, +) -> tuple[BBox3D, BBox3D]: + """ + Expand the bounding boxes of two objects in a 3D image to match their sizes. + + Parameters + ---------- + bbox1 : BBox3D + The bounding box of the first object. + bbox2 : BBox3D + The bounding box of the second object. + image : numpy.ndarray + The image to crop for each of the bounding boxes. + + Returns + ------- + tuple[BBox3D, BBox3D] + The new bounding boxes of the two objects. + Raises + ValueError + If the expansion is not possible. + """ + i1z1, i1y1, i1x1, i1z2, i1y2, i1x2 = bbox1 + i2z1, i2y1, i2x1, i2z2, i2y2, i2x2 = bbox2 + z_range1 = i1z2 - i1z1 + y_range1 = i1y2 - i1y1 + x_range1 = i1x2 - i1x1 + z_range2 = i2z2 - i2z1 + y_range2 = i2y2 - i2y1 + x_range2 = i2x2 - i2x1 + z_diff = numpy.abs(z_range1 - z_range2) + y_diff = numpy.abs(y_range1 - y_range2) + x_diff = numpy.abs(x_range1 - x_range2) + min_z_coord = 0 + max_z_coord = image.shape[0] + min_y_coord = 0 + max_y_coord = image.shape[1] + min_x_coord = 0 + max_x_coord = image.shape[2] + if z_range1 < z_range2: + i1z1, i1z2 = expand_box( + min_coor=min_z_coord, + max_coord=max_z_coord, + current_min=i1z1, + current_max=i1z2, + expand_by=z_diff, + ) + elif z_range1 > z_range2: + i2z1, i2z2 = expand_box( + min_coor=min_z_coord, + max_coord=max_z_coord, + current_min=i2z1, + current_max=i2z2, + expand_by=z_diff, + ) + if y_range1 < y_range2: + i1y1, i1y2 = expand_box( + min_coor=min_y_coord, + max_coord=max_y_coord, + current_min=i1y1, + current_max=i1y2, + expand_by=y_diff, + ) + elif y_range1 > y_range2: + i2y1, i2y2 = expand_box( + min_coor=min_y_coord, + max_coord=max_y_coord, + current_min=i2y1, + current_max=i2y2, + expand_by=y_diff, + ) + if x_range1 < x_range2: + i1x1, i1x2 = expand_box( + min_coor=min_x_coord, + max_coord=max_x_coord, + current_min=i1x1, + current_max=i1x2, + expand_by=x_diff, + ) + elif x_range1 > x_range2: + i2x1, i2x2 = expand_box( + min_coor=min_x_coord, + max_coord=max_x_coord, + current_min=i2x1, + current_max=i2x2, + expand_by=x_diff, + ) + return (i1z1, i1y1, i1x1, i1z2, i1y2, i1x2), (i2z1, i2y1, i2x1, i2z2, i2y2, i2x2) + + +# crop the image to the bbox of the mask +def crop_3D_image( + image: numpy.ndarray, + bbox: BBox3D, +) -> numpy.ndarray: + """ + Crop a 3D image to the bounding box of a mask. + + Parameters + ---------- + image : numpy.ndarray + The image to crop. + bbox : BBox3D + The bounding box of the mask. + + Returns + ------- + numpy.ndarray + The cropped image. + """ + z1, y1, x1, z2, y2, x2 = bbox + return image[z1:z2, y1:y2, x1:x2] + + +def single_3D_image_expand_bbox( + image: numpy.ndarray, + bbox: tuple[int, int, int, int, int, int], + expand_pixels: int, + anisotropy_factor: int, +) -> tuple[int, int, int, int, int, int]: + """ + Expand the bbox in a way that keeps the crop within the + confines of the image volume + + Parameters + ---------- + image : numpy.ndarray + 3D image array from which the bbox was derived + bbox : tuple[int, int, int, int, int, int] + 3D bbox in the format (zmin, ymin, xmin, zmax, ymax, xmax) + expand_pixels : int + number of pixels to expand the bbox in each direction (z, y, x) + the coordinates become isotropic here so the expansion is + the same across dimensions, + but the anisotropy factor is used to adjust for the z dimension + anisotropy_factor : int + The ratio of "pixel" size in um between the z dimension and the x/y dimensions. + This is used to adjust the expansion of the bbox in the z dimension to account + for anisotropy in the image volume. + For example, if the z spacing is 5um and the x/y spacing is 1um, + then the anisotropy factor would be 5. + + Returns + ------- + tuple[int, int, int, int, int, int] + Updated bbox in the format (zmin, ymin, xmin, zmax, ymax, xmax) + after expansion and adjustment for anisotropy + """ + z1, y1, x1, z2, y2, x2 = bbox + zmin, ymin, xmin = 0, 0, 0 + zmax, ymax, xmax = image.shape + # adjust the anisotropy factor for the z dimension + z1, z2 = z1 * anisotropy_factor, z2 * anisotropy_factor + zmax = zmax * anisotropy_factor + # expand the bbox by the specified number of pixels in each direction + z1_expanded = z1 - expand_pixels + y1_expanded = y1 - expand_pixels + x1_expanded = x1 - expand_pixels + z2_expanded = z2 + expand_pixels + y2_expanded = y2 + expand_pixels + x2_expanded = x2 + expand_pixels + # convert the expanded bbox back to the original z dimension scale + z1_expanded = numpy.floor(z1_expanded / anisotropy_factor) + z2_expanded = numpy.ceil(z2_expanded / anisotropy_factor) + # ensure the expanded bbox does not go outside the image boundaries + z1_expanded, z2_expanded = ( + max(z1_expanded, numpy.floor(zmin / anisotropy_factor)).astype(int), + min(z2_expanded, numpy.ceil(zmax / anisotropy_factor)).astype(int), + ) + y1_expanded, y2_expanded = max(y1_expanded, ymin), min(y2_expanded, ymax) + x1_expanded, x2_expanded = max(x1_expanded, xmin), min(x2_expanded, xmax) + + return ( + z1_expanded, + y1_expanded, + x1_expanded, + z2_expanded, + y2_expanded, + x2_expanded, + ) + + +def check_for_xy_squareness(bbox: tuple[int, int, int, int, int, int]) -> float: + """ + This function returns the ratio of the x length to the y length + A value of 1 indicates a square bbox is present + + Parameters + ---------- + bbox : The bbox to check + (z_min, y_min, x_min, z_max, y_max, x_max) + Where each value is an int representing the pixel coordinate + of the bbox in that dimension + + Returns + ------- + float + The ratio of the y length to the x length of the bbox. + A value of 1 indicates a square bbox. + """ + _z_min, y_min, x_min, _z_max, y_max, x_max = bbox + x_length = x_max - x_min + if x_length == 0: + raise ValueError( + "Cannot compute xy squareness for bbox with zero width in x dimension " + f"(bbox={bbox})." + ) + xy_squareness = (y_max - y_min) / x_length + return xy_squareness + + +def square_off_xy_crop_bbox( + bbox: tuple[int, int, int, int, int, int], +) -> tuple[int, int, int, int, int, int]: + """ + Adjust the bbox to be square in the XY plane. + + The function computes the new bbox from the current X/Y dimensions. + + Parameters + ---------- + bbox : tuple[int, int, int, int, int, int] + The bbox to adjust: + (z_min, y_min, x_min, z_max, y_max, x_max) + + Each value is an integer pixel coordinate in that dimension. + + Returns + ------- + tuple[int, int, int, int, int, int] + The adjusted bbox that is square in the XY plane: + (z_min, new_y_min, new_x_min, z_max, new_y_max, new_x_max) + + Each value is an integer pixel coordinate in that dimension. + """ + zmin, ymin, xmin, zmax, ymax, xmax = bbox + # first find the larger dimension between x and y + x_size = xmax - xmin + y_size = ymax - ymin + if x_size > y_size: + # need to expand y dimension + new_ymin = int(ymin - (x_size - y_size) / 2) + new_ymax = int(ymax + (x_size - y_size) / 2) + return (zmin, new_ymin, xmin, zmax, new_ymax, xmax) + elif y_size > x_size: + # need to expand x dimension + new_xmin = int(xmin - (y_size - x_size) / 2) + new_xmax = int(xmax + (y_size - x_size) / 2) + return (zmin, ymin, new_xmin, zmax, ymax, new_xmax) + else: + # already square + return bbox diff --git a/tests/featurization/test_colocalization.py b/tests/featurization/test_colocalization.py new file mode 100644 index 0000000..d18cfb1 --- /dev/null +++ b/tests/featurization/test_colocalization.py @@ -0,0 +1,128 @@ +import numpy as np +import pytest +from _pytest.monkeypatch import MonkeyPatch + +from zedprofiler.featurization import colocalization as coloc + + +def test_linear_costes_threshold_calculation_returns_finite_thresholds() -> None: + x = np.linspace(0.01, 1.0, 200) + y = 0.85 * x + 0.05 * np.sin(np.arange(x.size)) + t1, t2 = coloc.linear_costes_threshold_calculation( + x, y, scale_max=255, fast_costes="Fast" + ) + assert np.isfinite(t1) + assert np.isfinite(t2) + + +def test_bisection_costes_threshold_calculation_returns_finite_thresholds() -> None: + x = np.linspace(0.01, 1.0, 200) + y = 0.9 * x + 0.03 * np.cos(np.arange(x.size)) + t1, t2 = coloc.bisection_costes_threshold_calculation(x, y, scale_max=255) + assert np.isfinite(t1) + assert np.isfinite(t2) + + +def test_prepare_two_images_for_colocalization_crops_expected_regions( + monkeypatch: MonkeyPatch, +) -> None: + label1 = np.zeros((4, 4, 4), dtype=int) + label2 = np.zeros((4, 4, 4), dtype=int) + label1[1:3, 1:3, 1:3] = 1 + label2[0:2, 0:2, 0:2] = 2 + + img1 = np.arange(64).reshape(4, 4, 4) + img2 = np.arange(100, 164).reshape(4, 4, 4) + + monkeypatch.setattr(coloc, "select_objects_from_label", lambda arr, _: arr) + monkeypatch.setattr(coloc, "new_crop_border", lambda b1, b2, _img: (b1, b2)) + monkeypatch.setattr( + coloc, + "crop_3D_image", + lambda img, bbox: img[bbox[0] : bbox[3], bbox[1] : bbox[4], bbox[2] : bbox[5]], + ) + + out1, out2 = coloc.prepare_two_images_for_colocalization( + label1, label2, img1, img2, object_id1=1, object_id2=2 + ) + + assert out1.shape == (2, 2, 2) + assert out2.shape == (2, 2, 2) + np.testing.assert_array_equal(out1, img1[1:3, 1:3, 1:3]) + np.testing.assert_array_equal(out2, img2[0:2, 0:2, 0:2]) + + +def test_compute_colocalization_identical_images_are_highly_colocalized() -> None: + arr = np.array( + [ + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + ], + dtype=float, + ) + res = coloc.compute_colocalization(arr, arr, thr=0, fast_costes="Fast") + + expected_keys = { + "Correlation", + "MandersCoeffM1", + "MandersCoeffM2", + "OverlapCoeff", + "MandersCoeffCostesM1", + "MandersCoeffCostesM2", + "RankWeightedColocalizationCoeff1", + "RankWeightedColocalizationCoeff2", + } + assert expected_keys.issubset(res.keys()) + assert res["Correlation"] == pytest.approx(1.0, rel=1e-6) + assert res["MandersCoeffM1"] == pytest.approx(1.0, rel=1e-6) + assert res["MandersCoeffM2"] == pytest.approx(1.0, rel=1e-6) + assert res["OverlapCoeff"] == pytest.approx(1.0, rel=1e-6) + + +def test_compute_colocalization_empty_combined_threshold_path() -> None: + a = np.ones((2, 2, 2), dtype=float) + b = np.ones((2, 2, 2), dtype=float) * 2.0 + + res = coloc.compute_colocalization(a, b, thr=200, fast_costes="Fast") + + assert res["MandersCoeffM1"] == 0.0 + assert res["MandersCoeffM2"] == 0.0 + assert res["RankWeightedColocalizationCoeff1"] == 0.0 + assert res["RankWeightedColocalizationCoeff2"] == 0.0 + assert np.isnan(res["OverlapCoeff"]) + + +def test_compute_colocalization_costes_dispatch(monkeypatch: MonkeyPatch) -> None: + calls = {"bisection": 0, "linear": 0} + + def fake_bisection( + _i1: np.ndarray, _i2: np.ndarray, _scale: int + ) -> tuple[float, float]: + calls["bisection"] += 1 + return 0.1, 0.1 + + def fake_linear( + _i1: np.ndarray, + _i2: np.ndarray, + _scale: int, + _mode: str, + ) -> tuple[float, float]: + calls["linear"] += 1 + return 0.1, 0.1 + + monkeypatch.setattr(coloc, "bisection_costes_threshold_calculation", fake_bisection) + monkeypatch.setattr(coloc, "linear_costes_threshold_calculation", fake_linear) + + img1 = np.arange(1, 9, dtype=float).reshape(2, 2, 2) + img2 = img1 + 1.0 + + coloc.compute_colocalization(img1, img2, fast_costes="Accurate") + coloc.compute_colocalization(img1, img2, fast_costes="Fast") + + assert calls["bisection"] == 1 + assert calls["linear"] == 1 + + +def test_compute_colocalization_empty_input_raises() -> None: + with pytest.raises(UnboundLocalError): + coloc.compute_colocalization(np.array([]), np.array([])) diff --git a/tests/test_image_utils.py b/tests/test_image_utils.py new file mode 100644 index 0000000..311c470 --- /dev/null +++ b/tests/test_image_utils.py @@ -0,0 +1,102 @@ +import numpy as np +import pytest + +from zedprofiler.image_utils.image_utils import ( + check_for_xy_squareness, + crop_3D_image, + expand_box, + new_crop_border, + select_objects_from_label, + single_3D_image_expand_bbox, + square_off_xy_crop_bbox, +) + + +def test_select_objects_from_label_filters_values() -> None: + label_image = np.array([[0, 1, 2], [2, 1, 3]]) + selected = select_objects_from_label(label_image, 2) + + np.testing.assert_array_equal(selected, np.array([[0, 0, 2], [2, 0, 0]])) + # Ensure the original array is unchanged. + np.testing.assert_array_equal(label_image, np.array([[0, 1, 2], [2, 1, 3]])) + + +def test_expand_box_expands_from_min_edge_first() -> None: + new_min, new_max = expand_box(0, 10, 3, 6, 2) + assert (new_min, new_max) == (1, 6) + + +def test_expand_box_returns_value_error_when_impossible() -> None: + result = expand_box(0, 5, 1, 4, 3) + assert isinstance(result, ValueError) + + +def test_new_crop_border_expands_first_bbox_when_second_is_larger() -> None: + bbox1 = (2, 2, 2, 4, 4, 4) + bbox2 = (1, 1, 1, 6, 6, 6) + image = np.zeros((10, 10, 10)) + + new_bbox1, new_bbox2 = new_crop_border(bbox1, bbox2, image) + + assert new_bbox2 == bbox2 + assert new_bbox1 == (0, 0, 0, 5, 5, 5) + + +def test_new_crop_border_expands_second_bbox_when_first_is_larger() -> None: + bbox1 = (1, 1, 1, 7, 7, 7) + bbox2 = (2, 2, 2, 4, 4, 4) + image = np.zeros((10, 10, 10)) + + new_bbox1, new_bbox2 = new_crop_border(bbox1, bbox2, image) + + assert new_bbox1 == bbox1 + assert new_bbox2 == (0, 0, 0, 6, 6, 6) + + +def test_crop_3d_image_returns_expected_subvolume() -> None: + image = np.arange(4 * 5 * 6).reshape(4, 5, 6) + cropped = crop_3D_image(image, (1, 2, 1, 3, 5, 5)) + + np.testing.assert_array_equal(cropped, image[1:3, 2:5, 1:5]) + + +def test_single_3d_image_expand_bbox_adjusts_for_anisotropy_and_bounds() -> None: + image = np.zeros((5, 10, 10)) + bbox = (1, 3, 3, 2, 5, 5) + + expanded = single_3D_image_expand_bbox( + image=image, + bbox=bbox, + expand_pixels=4, + anisotropy_factor=2, + ) + + assert expanded == (0, 0, 0, 4, 9, 9) + + +def test_check_for_xy_squareness_returns_ratio() -> None: + ratio = check_for_xy_squareness((0, 10, 20, 5, 30, 30)) + assert ratio == pytest.approx(2.0) + + +def test_check_for_xy_squareness_raises_for_zero_x_width() -> None: + with pytest.raises(ValueError, match="zero width"): + check_for_xy_squareness((0, 1, 5, 2, 6, 5)) + + +def test_square_off_xy_crop_bbox_expands_y_dimension() -> None: + bbox = (0, 10, 20, 4, 14, 30) + adjusted = square_off_xy_crop_bbox(bbox) + assert adjusted == (0, 7, 20, 4, 17, 30) + + +def test_square_off_xy_crop_bbox_expands_x_dimension() -> None: + bbox = (0, 10, 20, 4, 20, 24) + adjusted = square_off_xy_crop_bbox(bbox) + assert adjusted == (0, 10, 17, 4, 20, 27) + + +def test_square_off_xy_crop_bbox_keeps_square_bbox_unchanged() -> None: + bbox = (0, 1, 2, 5, 9, 10) + adjusted = square_off_xy_crop_bbox(bbox) + assert adjusted == bbox diff --git a/uv.lock b/uv.lock index 5075075..a2ad339 100644 --- a/uv.lock +++ b/uv.lock @@ -2793,7 +2793,6 @@ dependencies = [ { name = "pandas" }, { name = "scikit-image" }, { name = "scipy" }, - { name = "tqdm" }, ] [package.dev-dependencies] @@ -2823,11 +2822,10 @@ requires-dist = [ { name = "bioio-tifffile", specifier = ">=1.3.0" }, { name = "fire", specifier = ">=0.7.1" }, { name = "jinja2", specifier = ">=3.1.6" }, - { name = "numpy", specifier = ">=2.2.6" }, + { name = "numpy", specifier = ">=2.2" }, { name = "pandas", specifier = ">=3.0.2" }, - { name = "scikit-image", specifier = ">=0.26.0" }, - { name = "scipy", specifier = ">=1.17.1" }, - { name = "tqdm", specifier = ">=4.67.3" }, + { name = "scikit-image", specifier = ">=0.25" }, + { name = "scipy", specifier = ">=1.15" }, ] [package.metadata.requires-dev]