Skip to content

Commit cba9a1f

Browse files
authored
Merge pull request #13 from ChoBioLab/qa
Version 1?
2 parents 0426120 + 340c3f5 commit cba9a1f

40 files changed

Lines changed: 22426 additions & 13991 deletions

README.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,31 @@ Correspondence: elizabeth.aslinger@aya.yale.edu
1616
with desired environment name):
1717
`conda create -n corescpy python=3.10.4 # create python environment`
1818

19-
3. Activate the conda environment with `conda activate corescpy`.
19+
1. Activate the conda environment with `conda activate corescpy`.
2020

2121
4. Clone the repository to your local computer:
2222
`git clone git@github.com:ChoBioLab/corescpy.git`,
2323
`git clone https://github.com/ChoBioLab/corescpy.git`, or
2424
look above for the green "Code" button and press it for instructions.
2525

26-
5. Navigate to the repository directory (replace <DIRECTORY> with your path):
26+
1. Navigate to the repository directory (replace <DIRECTORY> with your path):
2727
`cd <DIRECTORY>`
2828

29-
6. Install the package with pip. (Ensure you have pip installed.)
29+
2. Install
30+
31+
1. Install the package with pip. (Ensure you have pip installed.)
3032
`pip install .`
3133

32-
7. If you have issues with resolving/finding the most up-to-date version of the `spatialdata` and/or `spatialdata-io` packages, try running:
34+
1. If you have issues with resolving/finding the most up-to-date version of the `spatialdata` and/or `spatialdata-io` packages, try running:
3335
```
3436
pip install git+https://github.com/scverse/spatialdata
3537
pip install git+https://github.com/scverse/spatialdata-io
3638
```
37-
in your terminal while in your conda environment, then re-try step (6).
39+
in your terminal while in your conda environment, then re-try step (6). If you have an M1 Mac, [see this thread about known compatibility issues](https://github.com/scverse/pertpy/issues/201#issuecomment-1431621313) with `pertpy` if you have issues with the install.
3840

3941
8. If you're planning to use this environment with Jupyter notebooks, run `conda install nb_conda_kernels`, then `pip install ipykernel`.
4042

43+
If you have issues importing modules or functions (particularly if it only happens if you don't run the import after launching `python` while you are in the `corescpy` directory), try `mv <CONDA_ENV_PATH>/site-packages/_corescpy.pth <CONDA_ENV_PATH>/_corescpy.pth.bak` (replacing <CONDA_ENV_PATH> with your conda site-packages path, e.g., `/home/elizabeth/elizabeth/miniconda3/envs/corescpy/lib/python3.10/site-packages`), then `pip uninstall corescpy` then `cd corescpy` (replace "corescpy" with path to your corescpy top-level directory if needed) then `pip install -e .`. Then try `cd` to return to your home directory, then `python -c "import corescpy; print(dir(corescpy))"` from your terminal. Make sure it prints out submodules (e.g., `analysis`), and not just the base attributes (e.g., `__doc__`).
4144

4245
** Note: To use GPU resources, use `conda install -c rapidsai -c nvidia -c conda-forge cugraph cuml cudf` and install the gpu version of coreSCpy (which should `pip install scanpy[rapids]`).
4346

corescpy/__init__.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,24 @@
22
# pylint: disable=unused-import
33

44
import sys
5+
from .constants import get_panel_constants, get_layer_dict
56
from .class_sc import Omics
67
from .class_crispr import Crispr
78
from .class_spatial import Spatial
89
from . import utils as tl
910
from . import processing as pp
1011
from . import analysis as ax
1112
from . import visualization as pl
12-
from . import class_crispr, class_sc, class_spatial
13+
from . import class_crispr, class_sc, class_spatial, constants
1314

1415
mod = ["ax", "pl", "pp", "tl", "Omics", "Crispr", "Spatial"]
1516
sys.modules.update({f"{__name__}.{m}": globals()[m] for m in mod})
1617

17-
18-
def get_panel_constants(**kwargs):
19-
from .constants import get_panel_constants as gpc # noqa: E402
20-
return gpc(**kwargs)
21-
18+
SPATIAL_KEY = "spatial"
2219

2320
__all__ = [
2421
"ax", "pl", "pp", "tl", "Omics", "Crispr", "Spatial",
2522
"processing", "analysis", "visualization", "utils",
26-
"class_sc", "class_crispr", "class_spatial", "defaults",
27-
"get_panel_constants"
23+
"class_sc", "class_crispr", "class_spatial", "constants",
24+
"get_panel_constants", "get_layer_dict", "SPATIAL_KEY"
2825
]

corescpy/analysis/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from .perturbations import (
66
perform_mixscape, perform_augur, perform_differential_prioritization,
77
compute_distance, perform_gsea, perform_gsea_pt,
8-
perform_pathway_interference, perform_dea, calculate_dea_deseq2)
8+
perform_pathway_interference, perform_dea, calculate_dea_deseq2,
9+
calculate_deg_covariates)
910
from .clustering import (cluster, find_marker_genes, make_marker_genes_df,
1011
perform_celltypist, annotate_by_markers,
1112
print_marker_info)
@@ -22,5 +23,6 @@
2223
"find_marker_genes", "make_marker_genes_df", "print_marker_info",
2324
"perform_celltypist", "annotate_by_markers", "analyze_composition",
2425
"analyze_receptor_ligand", "analyze_causal_network",
25-
"classify_gex_cells", "classify_coex_cells", "classify_tx"
26+
"classify_gex_cells", "classify_coex_cells", "classify_tx",
27+
"calculate_deg_covariates"
2628
]

corescpy/analysis/clustering.py

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
from warnings import warn
1111
import os
1212
import re
13+
import traceback
14+
import seaborn as sb
1315
from copy import deepcopy
14-
import seaborn as sns
1516
import celltypist
1617
from anndata import AnnData
1718
import scanpy as sc
@@ -92,6 +93,13 @@ def cluster(adata, layer=None, method_cluster="leiden", key_added=None,
9293
ann = cr.tl.merge_pca_subset(ann, ann_use, retain_cols=False)
9394
else: # if used full gene set
9495
ann = ann_use
96+
try:
97+
sc.pl.pca_variance_ratio(ann, n_pcs=kws_pca[
98+
"n_comps"] if "n_comps" in kws_pca and isinstance(
99+
kws_pca["n_comps"], (int, float)) else 50, log=True)
100+
except Exception:
101+
traceback.print_exc()
102+
warn("\nPlotting PCA variance ratio failed!")
95103

96104
# Neighborhood Graph
97105
print(f"\n\n<<< COMPUTING NEIGHBORHOOD GRAPH >>>\n{kws_neighbors}\n")
@@ -124,13 +132,17 @@ def cluster(adata, layer=None, method_cluster="leiden", key_added=None,
124132

125133

126134
def find_marker_genes(adata, assay=None, col_cell_type="leiden", n_genes=5,
127-
key_reference="rest", layer="log1p", p_threshold=None,
135+
key_reference="rest", layer="log1p",
136+
p_threshold=None, lfc_threshold=None,
128137
col_gene_symbols=None, method="wilcoxon", kws_plot=True,
129-
use_raw=False, key_added="rank_genes_groups", **kwargs):
138+
use_raw=False, key_added="rank_genes_groups",
139+
pts=True, **kwargs):
130140
"""Find cluster gene markers."""
131141
figs = {}
132142
if kws_plot is True:
133143
kws_plot = {}
144+
if lfc_threshold is None:
145+
lfc_threshold = [None, None]
134146
if assay:
135147
adata = adata[assay]
136148
if layer:
@@ -139,14 +151,16 @@ def find_marker_genes(adata, assay=None, col_cell_type="leiden", n_genes=5,
139151
col_cell_type].astype("category")
140152
sc.tl.rank_genes_groups(
141153
adata, col_cell_type, method=method, reference=key_reference,
142-
key_added=key_added, use_raw=use_raw, **kwargs) # rank
154+
key_added=key_added, use_raw=use_raw, pts=pts, **kwargs) # rank
143155
if isinstance(kws_plot, dict):
144156
figs["marker_rankings"] = cr.pl.plot_markers(
145-
adata, n_genes=n_genes, key_added=key_added, use_raw=use_raw,
146-
key_reference=key_reference, **{"col_wrap": 3, **kws_plot})
157+
adata, key_added=key_added, use_raw=use_raw,
158+
key_reference=key_reference, **{
159+
"col_wrap": 3, "n_genes": min([n_genes, 5]) if isinstance(
160+
n_genes, (int, float)) else 5, **kws_plot})
147161
ranks = make_marker_genes_df(
148162
adata, col_cell_type, key_added=key_added, p_threshold=p_threshold,
149-
log2fc_min=None, log2fc_max=None, gene_symbols=col_gene_symbols)
163+
lfc_threshold=lfc_threshold, gene_symbols=col_gene_symbols)
150164
return ranks, figs
151165

152166

@@ -156,8 +170,14 @@ def make_marker_genes_df(adata, col_cell_type, key_added="leiden",
156170
ranks = sc.get.rank_genes_groups_df(adata, None, key=key_added, **kwargs)
157171
ranks = ranks.rename({"group": col_cell_type}, axis=1).set_index(
158172
[col_cell_type, "names"]) # format ranking dataframe
159-
if lfc_threshold:
160-
ranks = ranks[ranks.logfoldchanges >= lfc_threshold] # filter ~ LFC
173+
if lfc_threshold is None:
174+
lfc_threshold = [None, None]
175+
if isinstance(lfc_threshold, int):
176+
lfc_threshold = [lfc_threshold, None] # assume if 1 # given, maximum
177+
if lfc_threshold[0] is not None:
178+
ranks = ranks[ranks.logfoldchanges >= lfc_threshold[0]] # minimum LFC
179+
if lfc_threshold[1]:
180+
ranks = ranks[ranks.logfoldchanges <= lfc_threshold[1]] # maximum LFC
161181
if p_threshold:
162182
ranks = ranks[ranks.pvals_adj <= p_threshold] # filter ~ LFC
163183
return ranks
@@ -259,21 +279,17 @@ def perform_celltypist(adata, model, col_cell_type=None,
259279
color=list(ccts), wspace=space) # all 1 plot
260280

261281
# Plot Confidence Scores
262-
if "majority_voting" in ann.obs: # if did over-clustering/majority voting
263-
conf = ann.obs[["majority_voting", "predicted_labels", "conf_score"
264-
]].set_index("conf_score").stack().rename_axis(
265-
["Confidence Score", "Annotation"]).to_frame(
266-
"Label").reset_index() # scores ~ label
267-
268-
aspect = int(len(conf[conf.Annotation == "predicted_labels"
269-
].Label.unique()) / 15) # aspect ratio
270-
figs["confidence"] = sns.catplot(
271-
data=conf, y="Confidence Score", row="Annotation", height=40,
272-
aspect=aspect, x="Label", hue="Label", kind="violin") # plot
273-
figs["confidence"].figure.suptitle("CellTypist Confidence Scores")
274-
for a in figs["confidence"].axes.flat:
275-
_ = a.set_xticklabels(a.get_xticklabels(), rotation=90)
276-
figs["confidence"].fig.show()
282+
if "majority_voting" in ann.obs and (
283+
"conf_score" in ann.obs.columns): # if did majority voting
284+
try:
285+
figs["confidence"] = sb.displot(
286+
ann.obs, x="conf_score", hue="majority_voting",
287+
kind="kde", fill=True, cut=0)
288+
print(f"\n\n\n{'=' * 80}\nConfidence Scores\n{'=' * 80}",
289+
"\n\n", ann.obs.groupby("majority_voting").apply(
290+
lambda x: x["conf_score"].describe()).round(2), "\n\n")
291+
except Exception:
292+
pass
277293
return ann, res, figs
278294

279295

@@ -296,6 +312,8 @@ def annotate_by_markers(adata, data_assignment, method="overlap_count",
296312
if isinstance(data_assignment, (str, os.PathLike)):
297313
data_assignment = pd.read_excel(data_assignment, index_col=0)
298314
assign = data_assignment.copy()
315+
if assign.shape[1] == 1:
316+
col_assignment = assign.columns[0]
299317
if renaming is True:
300318
sources = assign[col_assignment].unique()
301319
rename = dict(zip(sources, [" ".join([i.capitalize() if i and i[

corescpy/analysis/communication.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def analyze_receptor_ligand(adata, method="liana", n_jobs=4, seed=1618,
2626
"""Perform receptor-ligand analysis."""
2727
if copy is True:
2828
adata = adata.copy()
29+
if figsize is None:
30+
figsize = (20, 20)
2931
res_keys = ["squidpy", "liana_res", "lr_dea_res", "dea_results", "dea_df"]
3032
figs, res = {}, dict(zip(res_keys, [None] * len(res_keys))) # for output
3133
kws_plot = {} if kws_plot is None else {**kws_plot}
@@ -72,7 +74,7 @@ def analyze_receptor_ligand(adata, method="liana", n_jobs=4, seed=1618,
7274
print(traceback.format_exc(), "Liana + DEA failed!\n\n",)
7375

7476
# Plotting
75-
if plot is True:
77+
if plot is True and method != "squidpy":
7678
try:
7779
figs["lr"] = plot_receptor_ligand(
7880
adata=adata, lr_dea_res=res["lr_dea_res"], **kws) # plots

corescpy/analysis/composition.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
"""
99

1010
import pertpy as pt
11-
from pertpy.plot._coda import CodaPlot as coda_plot
11+
# from pertpy.plot._coda import CodaPlot as pt.pl.coda.
1212
import arviz as az
1313
import traceback
1414
import warnings
1515
import matplotlib.pyplot as plt
1616

1717

18-
def analyze_composition(adata, col_condition, col_cell_type, assay=None,
18+
def analyze_composition(adata, col_condition, col_cell_type, assay=None,
1919
layer=None, copy=False, generate_sample_level=True,
2020
plot=True, reference_cell_type="automatic",
2121
key_reference_cell_type="automatic",
@@ -64,21 +64,21 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
6464
reference_cell_type="automatic",
6565
analysis_type="cell_level",
6666
generate_sample_level=True, sample_identifier="batch",
67-
covariates=None, est_fdr=0.05, plot=True, out_file=None):
67+
covariates=None, est_fdr=0.05, out_file=None, plot=True,
68+
plot_zero_covariate=True, plot_zero_cell_type=True):
6869
"""Perform SCCoda compositional analysis."""
6970
figs, results = {}, {}
71+
adata = adata.copy()
7072
if generate_sample_level is True and sample_identifier is None:
7173
warnings.warn(
7274
"Can't generate sample level if `sample_identifier`=None."
7375
" Setting `generate_sample_level` to False.")
7476
generate_sample_level = False
7577
mod, mod_o = "coda", assay if assay else "rna"
76-
# covariate_obs = [covariates] + col_condition if isinstance(
77-
# covariates, str) else covariates + [
78-
# col_condition] if covariates else [col_condition]
78+
if isinstance(covariates, str):
79+
covariates = [covariates]
7980
covariate_obs = [col_condition] + covariates if covariates else [
8081
col_condition]
81-
adata = adata.copy()
8282
adata = adata[~adata.obs[col_condition].isnull()].copy()
8383
adata.obs.index = [adata.obs.index.values[i] + "_" + str(
8484
adata.obs.iloc[i][col_condition]) + "_" + str(adata.obs.iloc[i][
@@ -96,11 +96,11 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
9696
# [col_cell_type, col_condition]])
9797
if plot is True:
9898
try:
99-
figs["barplot"] = coda_plot.boxplots(
99+
figs["barplot"] = model.plot_boxplots(
100100
scodata, modality_key=mod,
101101
feature_name=col_condition,
102-
figsize=(12, 5), add_dots=True,
103-
args_swarmplot={"palette": ["red"]})
102+
# args_swarmplot={"palette": ["red"]},
103+
figsize=(12, 5), add_dots=True)
104104
plt.show()
105105
except Exception as err:
106106
print(f"{err}\n\nFailed to plot boxplots. Ensure PyQt5 is "
@@ -111,14 +111,14 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
111111
if plot is True:
112112
try:
113113
figs[
114-
"find_reference"] = coda_plot.rel_abundance_dispersion_plot(
114+
"find_reference"] = model.plot_rel_abundance_dispersion_plot(
115115
scodata, modality_key=mod,
116116
abundant_threshold=0.9) # helps choose rference cell type
117117
except Exception as err:
118118
print(f"{err}\n\nFailed to plot reference cell type.\n\n")
119119
figs["find_reference"] = err
120120
try:
121-
figs["proportions"] = coda_plot.boxplots(
121+
figs["proportions"] = model.plot_boxplots(
122122
scodata, modality_key=mod,
123123
feature_name=col_condition, add_dots=True)
124124
except Exception as err:
@@ -150,7 +150,7 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
150150
scodata.write_h5mu(f"{out_file}_{est_fdr}_fdr")
151151
if plot is True:
152152
try:
153-
figs["proportions_stacked"] = coda_plot.stacked_barplot(
153+
figs["proportions_stacked"] = model.plot_stacked_barplot(
154154
scodata, modality_key=mod, feature_name=col_condition)
155155
plt.show()
156156
except Exception as err:
@@ -169,12 +169,10 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
169169
plt.tight_layout()
170170
plt.show()
171171
try:
172-
pzc = any((scodata.varm[f"effect_df_{x}"]["Final Parameter"].any(
173-
) for x in scodata.uns["scCODA_params"]["covariate_names"]
174-
)) is False # don't plot 0 effects if any non-0
175-
figs["effects"] = coda_plot.effects_barplot(
172+
figs["effects"] = model.plot_effects_barplot(
176173
scodata, modality_key=mod, parameter="Final Parameter",
177-
plot_zero_cell_type=pzc)
174+
plot_zero_cell_type=plot_zero_cell_type,
175+
plot_zero_covariate=plot_zero_covariate)
178176
except Exception as err:
179177
print(traceback.format_exc(), "\n\nFailed to plot effects.\n\n")
180178
figs["effects"] = err
@@ -239,12 +237,12 @@ def perform_tasccoda(adata, col_condition, col_cell_type,
239237
sample_identifier=col_sample_id,
240238
covariate_obs=covariates + [col_condition],
241239
levels_orig=col_list_lineage_tree, add_level_name=True) # load model
242-
coda_plot.draw_tree(ts_data["coda"])
240+
model.plot_draw_tree(ts_data["coda"])
243241
ts_data.mod["coda_subset"] = ts_data["coda"][ts_data["coda"].obs[
244242
col_condition].isin([key_control, key_treatment])] # subset if needed
245243
if plot is True:
246-
figs["tree"] = coda_plot.draw_tree(ts_data["coda"])
247-
figs["descriptives_abundance"] = coda_plot.boxplots(
244+
figs["tree"] = model.plot_draw_tree(ts_data["coda"])
245+
figs["descriptives_abundance"] = model.plot_boxplots(
248246
ts_data, modality_key="coda_subset", feature_name=col_condition,
249247
figsize=(20, 8))
250248
plt.show()
@@ -260,12 +258,12 @@ def perform_tasccoda(adata, col_condition, col_cell_type,
260258
results["credible_effects"] = model.credible_effects(
261259
ts_data, modality_key="coda_subset") # credible effects
262260
if plot:
263-
figs["credible_effects"] = coda_plot.draw_effects(
261+
figs["credible_effects"] = model.plot_draw_effects(
264262
ts_data, modality_key="coda_subset",
265263
tree=ts_data["coda_subset"].uns["tree"],
266264
covariate=f"{col_condition}[T.{key_treatment}]"
267265
) # effects as sizes/colors of nodes on the lineage tree
268-
figs["credible_effects_dual"] = coda_plot.draw_effects(
266+
figs["credible_effects_dual"] = model.plot_draw_effects(
269267
ts_data, modality_key="coda_subset",
270268
tree=ts_data["coda_subset"].uns["tree"],
271269
covariate=f"{col_condition}[T.{key_treatment}]",

0 commit comments

Comments
 (0)