WayScience · axiomcura · May 1, 2025 · Apr 30, 2025 · Apr 30, 2025 · May 1, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,14 +4,14 @@
 repos:
   # Keep pre-commit itself up to date
 -   repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
-    rev: v0.6.1
+    rev: v0.7.0
     hooks:
     -   id: pre-commit-update
         args: ["--verbose"]
 
   # Ruff for linting Python files
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.11.5
+    rev: v0.11.8
     hooks:
     -   id: ruff
         args: ["--fix"]

diff --git a/notebooks/exploratory-analysis/pairwise-analysis/dmso-consensus-profiles.ipynb b/notebooks/exploratory-analysis/pairwise-analysis/dmso-consensus-profiles.ipynb
diff --git a/notebooks/exploratory-analysis/pairwise-analysis/nbconverted/dmso-consensus-profiles.py b/notebooks/exploratory-analysis/pairwise-analysis/nbconverted/dmso-consensus-profiles.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+# # Generating DMSO consensus profiles
+
+# In[1]:
+
+
+import pathlib
+import sys
+
+import pandas as pd
+from pycytominer import consensus
+
+sys.path.append("../../../")
+from utils import data_utils
+
+# In[2]:
+
+
+# setting profile path
+concat_profile_path = pathlib.Path("../UMAP-aggregated-fs-profiles/results/concat_data/batch_1_concat_agg_fs.csv").resolve(strict=True)
+
+# setting output path
+# output_path = pathlib.Path("results/").resolve(strict=True)
+
+
+# In[3]:
+
+
+# load in aggregate profiles
+agg_df = pd.read_csv(concat_profile_path)
+
+# update aggregate profiles to only DMSO treated wells
+dmso_agg_df = agg_df.loc[
+    (agg_df["Metadata_control_type"] == "positive")
+| (agg_df["Metadata_control_type"] == "negative")]
+
+# split the metadata and morphology features
+dmso_agg_meta, dmso_agg_feats = data_utils.split_meta_and_features(dmso_agg_df)
+
+# display
+print("Shape: ", dmso_agg_df.shape)
+dmso_agg_df.head()
+
+
+# In[4]:
+
+
+consensus_df = consensus(profiles = dmso_agg_df,
+                         replicate_columns=["Metadata_plate_barcode", "Metadata_plate_name", "Metadata_treatment"],
+                         operation="median",
+                         features=dmso_agg_feats,
+)
diff --git a/notebooks/exploratory-analysis/pairwise-analysis/nbconverted/pairwise-compare.py b/notebooks/exploratory-analysis/pairwise-analysis/nbconverted/pairwise-compare.py
@@ -1,5 +1,9 @@
 #!/usr/bin/env python
 
+# # Comparing controls and treatments using pairwise compare
+#
+# This notebook employs pairwise comparison to quantify the similarity between cellular profiles. In this section, we assess the consistency of experimental replicates and evaluate the similarity between treated wells (containing failing cardiac fibroblast cells) and control wells.
+
 # In[1]:
 
 
@@ -9,6 +13,7 @@
 import pandas as pd
 from comparators.PearsonsCorrelation import PearsonsCorrelation
 from comparison_tools.PairwiseCompareManager import PairwiseCompareManager
+from pycytominer import consensus
 from pycytominer.cyto_utils import load_profiles
 
 # loading project utils
@@ -37,7 +42,7 @@
 # split the features:
 metadata, features = split_meta_and_features(agg_profile)
 
-# now only select DMSO profiles that are DMSO-positive and DMSO-negative
+# now only select DMSO profiles that are DMSO_positive and DMSO-negative
 dmso_profiles = agg_profile.loc[
     (agg_profile["Metadata_treatment"] == "DMSO-positive")
     | (agg_profile["Metadata_treatment"] == "DMSO-negative")
@@ -125,14 +130,144 @@
 )
 
 
+# ## Calculate pair wise across DMSO consensus profiles
+
+# In[7]:
+
+
+consensus_dmso_df = consensus(
+    profiles=dmso_profiles,
+    replicate_columns=[
+        "Metadata_plate_barcode",
+        "Metadata_plate_name",
+        "Metadata_treatment",
+    ],
+    operation="median",
+    features=features,
+)
+
+# split to positive and negative controls
+consensus_dmso_pos_df = consensus_dmso_df.loc[
+    consensus_dmso_df["Metadata_treatment"] == "DMSO-positive"
+]
+consensus_dmso_neg_df = consensus_dmso_df.loc[
+    consensus_dmso_df["Metadata_treatment"] == "DMSO-negative"
+]
+
+
+# In[8]:
+
+
+# comparing the consensus profiles of the positive controls
+consensus_dmso_pos_cntrl_comparer = PairwiseCompareManager(
+    _df=consensus_dmso_pos_df,
+    _feat_cols=features,
+    _different_columns=["Metadata_plate_name"],
+    _same_columns=["Metadata_treatment"],
+    _comparator=PearsonsCorrelation(),
+)
+
+# comparing the consensus profiles of the negative controls
+consensus_dmso_neg_cntrl_comparer = PairwiseCompareManager(
+    _df=consensus_dmso_neg_df,
+    _feat_cols=features,
+    _different_columns=["Metadata_plate_name"],
+    _same_columns=["Metadata_treatment"],
+    _comparator=PearsonsCorrelation(),
+)
+
+# collecting all pairwise scores
+consensus_pos_cntrl_pairwise_scores = consensus_dmso_pos_cntrl_comparer()
+consensus_neg_cntrl_pairwise_scores = consensus_dmso_neg_cntrl_comparer()
+
+
+# In[9]:
+
+
+# selecting only relevant columns
+consensus_pos_cntrl_scores = consensus_pos_cntrl_pairwise_scores[
+    [
+        "pearsons_correlation",
+        "Metadata_treatment__antehoc_group0",
+        "Metadata_plate_name__posthoc_group0",
+        "Metadata_plate_name__posthoc_group1",
+    ]
+]
+consensus_neg_cntrl_scores = consensus_neg_cntrl_pairwise_scores[
+    [
+        "pearsons_correlation",
+        "Metadata_treatment__antehoc_group0",
+        "Metadata_plate_name__posthoc_group0",
+        "Metadata_plate_name__posthoc_group1",
+    ]
+]
+
+# generated plate well names
+final_consensus_pairwise_scores = pd.concat(
+    [
+        consensus_pos_cntrl_scores,
+        consensus_neg_cntrl_scores,
+    ]
+).rename(columns={"Metadata_treatment__antehoc_group0": "Metadata_treatment"}).reset_index(drop=True)
+
+# saving the final consensus pairwise scores
+final_consensus_pairwise_scores.to_csv(output_path / "final_dmso_consensus_pairwise_scores.csv", index=False)
+
+
+# ## Calculating pairwise compare within replicates
+#
+# In this section, we compute pairwise Pearson correlations between replicates of the same treatment. This helps identify poorly performing technical replicates—those with low correlation values—while high correlations indicate consistent and reliable measurements across replicates.
+
+# In[10]:
+
+
+# selecting only the treated wells without the DMSO profiles
+treated_wells_only_df = agg_profile.loc[
+    (agg_profile["Metadata_treatment"] != "DMSO-positive") & (agg_profile["Metadata_treatment"] != "DMSO-negative")
+].copy()
+
+# reducing the metadata to only the relevant ones
+treated_wells_only_df = treated_wells_only_df[["Metadata_plate_name", "Metadata_treatment"] + features]
+
+
+# In[ ]:
+
+
+# calculating the pairwise scores between replicates
+replicate_pairwise_comparer = PairwiseCompareManager(
+    _df=treated_wells_only_df,
+    _feat_cols=features,
+    _different_columns=["Metadata_plate_name"],
+    _same_columns=["Metadata_treatment"],
+    _comparator=PearsonsCorrelation(),
+)
+
+# collecting all pairwise scores
+replicate_pairwise_scores = replicate_pairwise_comparer()
+
+
+# In[12]:
+
+
+# selecting only relevant columns
+replicate_pairwise_scores = replicate_pairwise_scores[["pearsons_correlation", "Metadata_treatment__antehoc_group0", "Metadata_plate_name__posthoc_group0", "Metadata_plate_name__posthoc_group1"]]
+
+# renaming the columns
+replicate_pairwise_scores.columns = ["pearsons_correlation", "Metadata_treatment", "plate_name_0", "plate_name_1"]
+
+# saving the final pairwise scores
+replicate_pairwise_scores.to_csv(output_path / "final_replicate_pairwise_scores.csv", index=False)
+
+
+
 # ## Calculating pair wise across treatments
 #
 # In this section of the notebook, we conduct pairwise comparisons across all treatments and specific controls. Two data frames are created:
 #
 # - **healthy_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the healthy reference.
 # - **failing_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the failing reference.
 
-# In[7]:
+# In[13]:
 
 
 # calculating pairwise correlation between healthy control and treated failing wells
@@ -159,25 +294,25 @@
 failing_ref_trt_pairwise_scores = failing_ref_trt_pairwise_comparer()
 
 
-# In[8]:
+# In[14]:
 
 
 # Select only the relevant columns and add a reference column for healthy controls
 health_ref_pairwise_scores = healthy_ref_trt_pairwise_scores[
-    ["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
+    ["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
 ].copy()
 health_ref_pairwise_scores["reference"] = "Healthy"
 
 # Select only the relevant columns and add a reference column for failing controls
 failing_ref_pairwise_scores = failing_ref_trt_pairwise_scores[
-    ["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
+    ["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
 ].copy()
 failing_ref_pairwise_scores["reference"] = "Failing"
 
 # Combine the healthy and failing control dataframes into a single dataframe
 final_trt_pairwise_scores = (
     pd.concat([health_ref_pairwise_scores, failing_ref_pairwise_scores])
-    .rename(columns={"Metadata_treatment__antehoc_group1": "Metadata_treatment"})
+    .rename(columns={"Metadata_treatment__posthoc_group1": "Metadata_treatment"})
     .reset_index(drop=True)
 )