Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 42 additions & 12 deletions EMT_data_analysis/analysis_scripts/Analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,37 @@

warnings.filterwarnings("ignore")

rng = np.random.default_rng(42)

def bootstrap_corr(x, y, method="Pearson", confidence_interval=0.95, n_bootstraps=2000, seed=42, verbose=True):
x = np.asarray(x)
y = np.asarray(y)

corr_func = pearsonr
if method == "Spearman":
corr_func = spearmanr

r_observed, p_value = corr_func(x, y)
n_samples = len(x)

inds = np.arange(n_samples)
boot_corrs = []
for _ in range(n_bootstraps):
resampled_inds = rng.choice(inds, size=n_samples, replace=True)
rx = x[resampled_inds]
ry = y[resampled_inds]
boot_r, _ = corr_func(rx, ry)
boot_corrs.append(boot_r)
alpha = (1.0-confidence_interval)/2.0
ci_low = np.nanpercentile(boot_corrs, 100*alpha)
ci_high = np.nanpercentile(boot_corrs, 100*(1.0-alpha))

if verbose:
print(f'Number of samples: {n_samples}')
print(f'{method} Correlation: {r_observed:.3g} | 95% CI: [{ci_low:.2f}, {ci_high:.2f}] | p-value: {p_value:.3g}')

return r_observed, p_value, ci_low, ci_high

def run_all_analyses():
"""
Run all analysis functions
Expand All @@ -29,6 +60,8 @@ def run_all_analyses():

df = io.load_image_analysis_extracted_features()

print("Loaded image analysis extracted features and started running analyses...")

plot_area_at_glass_all_data(df, FIGS_DIR, OUT_TYPE)
plot_area_at_glass_h2b(df, FIGS_DIR, OUT_TYPE)
plot_migration_timing_all_data(df, FIGS_DIR, OUT_TYPE)
Expand Down Expand Up @@ -611,10 +644,8 @@ def plot_gene_expression_experiments(df, figs_dir, out_type):
migration = df_g['Migration Onset Time (Footprint Area Based)'][[cond in val for val in df_g['Experimental Condition'].values]].dropna()
metric = df_g['gene_metric'][[cond in val for val in df_g['Experimental Condition'].values]].dropna()

pearson, p_pvalue = pearsonr(migration, metric)
spearman, s_pvalue = spearmanr(migration, metric)
print(f'Pearson Correlation: {pearson:.3g} | p-value: {p_pvalue:.3g}')
print(f'Spearman Correlation: {spearman:.3g} | p-value: {s_pvalue:.3g}')
pearson, p_pvalue, _, _ = bootstrap_corr(migration, metric, "Pearson")
spearman, s_pvalue, _, _ = bootstrap_corr(migration, metric, "Spearman")

X = df_g['Migration Onset Time (Footprint Area Based)'][[cond in val for val in df_g['Experimental Condition'].values]].dropna()
Y = df_g['gene_metric'][[cond in val for val in df_g['Experimental Condition'].values]].dropna()
Expand All @@ -638,11 +669,9 @@ def plot_gene_expression_experiments(df, figs_dir, out_type):
migration = df_g['Migration Onset Time (Footprint Area Based)']
metric = df_g['gene_metric']

pearson, p_pvalue = pearsonr(migration, metric)
spearman, s_pvalue = spearmanr(migration, metric)
print(f'Pearson Correlation: {pearson:.3g} | p-value: {p_pvalue:.3g}')
print(f'Spearman Correlation: {spearman:.3g} | p-value: {s_pvalue:.3g}')

pearson, p_pvalue, _, _ = bootstrap_corr(migration, metric, "Pearson")
spearman, s_pvalue, _, _ = bootstrap_corr(migration, metric, "Spearman")

X = df_g['Migration Onset Time (Footprint Area Based)']
Y = df_g['gene_metric']

Expand Down Expand Up @@ -719,6 +748,8 @@ def plot_collagenase_analysis(df, figs_dir, out_type):
X = df_gene['Collagenease concentration (ug/mL)']
Y = df_gene['Migration Onset Time (Footprint Area Based)']

_ = bootstrap_corr(X, Y, "Spearman")

# It's important to add a constant (intercept) to the model
X = sm.add_constant(X)

Expand Down Expand Up @@ -747,7 +778,6 @@ def plot_collagenase_analysis(df, figs_dir, out_type):
else:
print("\nConclusion: The p-value for the slope is not less than 0.05, so we cannot conclude there is a significant linear relationship.")


def plot_mmp_inhibitor_migration(df, figs_dir, out_type):
"""
Parameters
Expand Down Expand Up @@ -1015,8 +1045,8 @@ def plot_inside_outside_migration_timing(df, figs_dir, out_type):
X = dfio_scatter['Migration Onset Time (Footprint Area Based)']
Y = dfio_scatter['Migration Onset Time (Inside/Outside Basement Membrane Based)']

p_results = pearsonr(X.values, Y.values)
r_results = spearmanr(X.values, Y.values)
p_results = bootstrap_corr(X.values, Y.values, "Pearson")
r_results = bootstrap_corr(X.values, Y.values, "Spearman")
print('n: {0:d}'.format(n_movies_io))
print('Pearson Correlation: {0:.3g} | p-Value: {1:.3g}'.format(p_results.statistic, p_results.pvalue))
print('Spearman Correlation: {0:.3g} | p-Value: {1:.3g}'.format(r_results.statistic, r_results.pvalue))
Expand Down
15 changes: 9 additions & 6 deletions EMT_data_analysis/tools/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ def load_imaging_and_segmentation_dataset():
return df

def load_image_analysis_extracted_features(load_from_aws: bool = True):
path = "https://allencell.s3.amazonaws.com/aics/emt_timelapse_dataset/manifests/Image_analysis_extracted_features.csv?versionId=ehxRXxC0FpidcpgXU_z.51T.nkWB0Yuj"
if not load_from_aws:
# Or read from local if the user decides to run Metric_computation.py
metric_comp_results_dir = get_results_directory_name() / "metric_computation"
path = metric_comp_results_dir / "Image_analysis_extracted_features.csv"
df = pd.read_csv(path)
metric_comp_results_dir = get_results_directory_name() / "metric_computation"
path = metric_comp_results_dir / "Image_analysis_extracted_features.csv"
try:
print('Trying to load features from local path.')
df = pd.read_csv(path)
except Exception:
print(f'Features not found at {path}. Loading from AWS instead. This may take a while...')
path = "https://allencell.s3.amazonaws.com/aics/emt_timelapse_dataset/manifests/Image_analysis_extracted_features.csv?versionId=ehxRXxC0FpidcpgXU_z.51T.nkWB0Yuj"
df = pd.read_csv(path)
return df

def load_inside_outside_classification(load_from_aws: bool = True):
Expand Down