Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
repos:
# Keep pre-commit itself up to date
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
rev: v0.6.1
rev: v0.7.0
hooks:
- id: pre-commit-update
args: ["--verbose"]

# Ruff for linting Python files
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.11.5
rev: v0.11.8
hooks:
- id: ruff
args: ["--fix"]
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python

# # Generating DMSO consensus profiles

# In[1]:


import pathlib
import sys

import pandas as pd
from pycytominer import consensus

sys.path.append("../../../")
from utils import data_utils

# In[2]:


# setting profile path
concat_profile_path = pathlib.Path("../UMAP-aggregated-fs-profiles/results/concat_data/batch_1_concat_agg_fs.csv").resolve(strict=True)

# setting output path
# output_path = pathlib.Path("results/").resolve(strict=True)


# In[3]:


# load in aggregate profiles
agg_df = pd.read_csv(concat_profile_path)

# update aggregate profiles to only DMSO treated wells
dmso_agg_df = agg_df.loc[
(agg_df["Metadata_control_type"] == "positive")
| (agg_df["Metadata_control_type"] == "negative")]

# split the metadata and morphology features
dmso_agg_meta, dmso_agg_feats = data_utils.split_meta_and_features(dmso_agg_df)

# display
print("Shape: ", dmso_agg_df.shape)
dmso_agg_df.head()


# In[4]:


consensus_df = consensus(profiles = dmso_agg_df,
replicate_columns=["Metadata_plate_barcode", "Metadata_plate_name", "Metadata_treatment"],
operation="median",
features=dmso_agg_feats,
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env python

# # Comparing controls and treatments using pairwise compare
#
# This notebook employs pairwise comparison to quantify the similarity between cellular profiles. In this section, we assess the consistency of experimental replicates and evaluate the similarity between treated wells (containing failing cardiac fibroblast cells) and control wells.

# In[1]:


Expand All @@ -9,6 +13,7 @@
import pandas as pd
from comparators.PearsonsCorrelation import PearsonsCorrelation
from comparison_tools.PairwiseCompareManager import PairwiseCompareManager
from pycytominer import consensus
from pycytominer.cyto_utils import load_profiles

# loading project utils
Expand Down Expand Up @@ -37,7 +42,7 @@
# split the features:
metadata, features = split_meta_and_features(agg_profile)

# now only select DMSO profiles that are DMSO-positive and DMSO-negative
# now only select DMSO profiles that are DMSO_positive and DMSO-negative
dmso_profiles = agg_profile.loc[
(agg_profile["Metadata_treatment"] == "DMSO-positive")
| (agg_profile["Metadata_treatment"] == "DMSO-negative")
Expand Down Expand Up @@ -125,14 +130,144 @@
)


# ## Calculate pair wise across DMSO consensus profiles

# In[7]:


consensus_dmso_df = consensus(
profiles=dmso_profiles,
replicate_columns=[
"Metadata_plate_barcode",
"Metadata_plate_name",
"Metadata_treatment",
],
operation="median",
features=features,
)

# split to positive and negative controls
consensus_dmso_pos_df = consensus_dmso_df.loc[
consensus_dmso_df["Metadata_treatment"] == "DMSO-positive"
]
consensus_dmso_neg_df = consensus_dmso_df.loc[
consensus_dmso_df["Metadata_treatment"] == "DMSO-negative"
]


# In[8]:


# comparing the consensus profiles of the positive controls
consensus_dmso_pos_cntrl_comparer = PairwiseCompareManager(
_df=consensus_dmso_pos_df,
_feat_cols=features,
_different_columns=["Metadata_plate_name"],
_same_columns=["Metadata_treatment"],
_comparator=PearsonsCorrelation(),
)

# comparing the consensus profiles of the negative controls
consensus_dmso_neg_cntrl_comparer = PairwiseCompareManager(
_df=consensus_dmso_neg_df,
_feat_cols=features,
_different_columns=["Metadata_plate_name"],
_same_columns=["Metadata_treatment"],
_comparator=PearsonsCorrelation(),
)

# collecting all pairwise scores
consensus_pos_cntrl_pairwise_scores = consensus_dmso_pos_cntrl_comparer()
consensus_neg_cntrl_pairwise_scores = consensus_dmso_neg_cntrl_comparer()


# In[9]:


# selecting only relevant columns
consensus_pos_cntrl_scores = consensus_pos_cntrl_pairwise_scores[
[
"pearsons_correlation",
"Metadata_treatment__antehoc_group0",
"Metadata_plate_name__posthoc_group0",
"Metadata_plate_name__posthoc_group1",
]
]
consensus_neg_cntrl_scores = consensus_neg_cntrl_pairwise_scores[
[
"pearsons_correlation",
"Metadata_treatment__antehoc_group0",
"Metadata_plate_name__posthoc_group0",
"Metadata_plate_name__posthoc_group1",
]
]

# generated plate well names
final_consensus_pairwise_scores = pd.concat(
[
consensus_pos_cntrl_scores,
consensus_neg_cntrl_scores,
]
).rename(columns={"Metadata_treatment__antehoc_group0": "Metadata_treatment"}).reset_index(drop=True)

# saving the final consensus pairwise scores
final_consensus_pairwise_scores.to_csv(output_path / "final_dmso_consensus_pairwise_scores.csv", index=False)


# ## Calculating pairwise compare within replicates
#
# In this section, we compute pairwise Pearson correlations between replicates of the same treatment. This helps identify poorly performing technical replicates—those with low correlation values—while high correlations indicate consistent and reliable measurements across replicates.

# In[10]:


# selecting only the treated wells without the DMSO profiles
treated_wells_only_df = agg_profile.loc[
(agg_profile["Metadata_treatment"] != "DMSO-positive") & (agg_profile["Metadata_treatment"] != "DMSO-negative")
].copy()

# reducing the metadata to only the relevant ones
treated_wells_only_df = treated_wells_only_df[["Metadata_plate_name", "Metadata_treatment"] + features]


# In[ ]:


# calculating the pairwise scores between replicates
replicate_pairwise_comparer = PairwiseCompareManager(
_df=treated_wells_only_df,
_feat_cols=features,
_different_columns=["Metadata_plate_name"],
_same_columns=["Metadata_treatment"],
_comparator=PearsonsCorrelation(),
)

# collecting all pairwise scores
replicate_pairwise_scores = replicate_pairwise_comparer()


# In[12]:


# selecting only relevant columns
replicate_pairwise_scores = replicate_pairwise_scores[["pearsons_correlation", "Metadata_treatment__antehoc_group0", "Metadata_plate_name__posthoc_group0", "Metadata_plate_name__posthoc_group1"]]

# renaming the columns
replicate_pairwise_scores.columns = ["pearsons_correlation", "Metadata_treatment", "plate_name_0", "plate_name_1"]

# saving the final pairwise scores
replicate_pairwise_scores.to_csv(output_path / "final_replicate_pairwise_scores.csv", index=False)



# ## Calculating pair wise across treatments
#
# In this section of the notebook, we conduct pairwise comparisons across all treatments and specific controls. Two data frames are created:
#
# - **healthy_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the healthy reference.
# - **failing_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the failing reference.

# In[7]:
# In[13]:


# calculating pairwise correlation between healthy control and treated failing wells
Expand All @@ -159,25 +294,25 @@
failing_ref_trt_pairwise_scores = failing_ref_trt_pairwise_comparer()


# In[8]:
# In[14]:


# Select only the relevant columns and add a reference column for healthy controls
health_ref_pairwise_scores = healthy_ref_trt_pairwise_scores[
["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
].copy()
health_ref_pairwise_scores["reference"] = "Healthy"

# Select only the relevant columns and add a reference column for failing controls
failing_ref_pairwise_scores = failing_ref_trt_pairwise_scores[
["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
].copy()
failing_ref_pairwise_scores["reference"] = "Failing"

# Combine the healthy and failing control dataframes into a single dataframe
final_trt_pairwise_scores = (
pd.concat([health_ref_pairwise_scores, failing_ref_pairwise_scores])
.rename(columns={"Metadata_treatment__antehoc_group1": "Metadata_treatment"})
.rename(columns={"Metadata_treatment__posthoc_group1": "Metadata_treatment"})
.reset_index(drop=True)
)

Expand Down
Loading