ChoBioLab
diff --git a/‎README.md‎
Lines changed: 8 additions & 5 deletions b/‎README.md‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎corescpy/__init__.py‎
Lines changed: 5 additions & 8 deletions b/‎corescpy/__init__.py‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎corescpy/analysis/__init__.py‎
Lines changed: 4 additions & 2 deletions b/‎corescpy/analysis/__init__.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎corescpy/analysis/clustering.py‎
Lines changed: 42 additions & 24 deletions b/‎corescpy/analysis/clustering.py‎
Lines changed: 42 additions & 24 deletions
diff --git a/‎corescpy/analysis/communication.py‎
Lines changed: 3 additions & 1 deletion b/‎corescpy/analysis/communication.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎corescpy/analysis/composition.py‎
Lines changed: 21 additions & 23 deletions b/‎corescpy/analysis/composition.py‎
Lines changed: 21 additions & 23 deletions
@@ -16,28 +16,31 @@ Correspondence: elizabeth.aslinger@aya.yale.edu
 with desired environment name):
 `conda create -n corescpy python=3.10.4  # create python environment`
 
-3. Activate the conda environment with `conda activate corescpy`.
+1. Activate the conda environment with `conda activate corescpy`.
 
 4. Clone the repository to your local computer:
 `git clone git@github.com:ChoBioLab/corescpy.git`,
 `git clone https://github.com/ChoBioLab/corescpy.git`, or
 look above for the green "Code" button and press it for instructions.
 
-5. Navigate to the repository directory (replace <DIRECTORY> with your path):
+1. Navigate to the repository directory (replace <DIRECTORY> with your path):
 `cd <DIRECTORY>`
 
-6. Install the package with pip. (Ensure you have pip installed.)
+2. Install 
+
+1. Install the package with pip. (Ensure you have pip installed.)
 `pip install .`
 
-7. If you have issues with resolving/finding the most up-to-date version of the `spatialdata` and/or `spatialdata-io` packages, try running:
+1. If you have issues with resolving/finding the most up-to-date version of the `spatialdata` and/or `spatialdata-io` packages, try running:
 ```
 pip install git+https://github.com/scverse/spatialdata
 pip install git+https://github.com/scverse/spatialdata-io
 ```
-in your terminal while in your conda environment, then re-try step (6).
+in your terminal while in your conda environment, then re-try step (6). If you have an M1 Mac, [see this thread about known compatibility issues](https://github.com/scverse/pertpy/issues/201#issuecomment-1431621313) with `pertpy` if you have issues with the install.
 
 8. If you're planning to use this environment with Jupyter notebooks, run `conda install nb_conda_kernels`, then `pip install ipykernel`.
 
+If you have issues importing modules or functions (particularly if it only happens if you don't run the import after launching `python` while you are in the `corescpy` directory), try `mv <CONDA_ENV_PATH>/site-packages/_corescpy.pth <CONDA_ENV_PATH>/_corescpy.pth.bak` (replacing <CONDA_ENV_PATH> with your conda site-packages path, e.g., `/home/elizabeth/elizabeth/miniconda3/envs/corescpy/lib/python3.10/site-packages`), then `pip uninstall corescpy` then `cd corescpy` (replace "corescpy" with path to your corescpy top-level directory if needed) then `pip install -e .`. Then try `cd` to return to your home directory, then `python -c "import corescpy; print(dir(corescpy))"` from your terminal. Make sure it prints out submodules (e.g., `analysis`), and not just the base attributes (e.g., `__doc__`).
 
 ** Note: To use GPU resources, use `conda install -c rapidsai -c nvidia -c conda-forge cugraph cuml cudf` and install the gpu version of coreSCpy (which should `pip install scanpy[rapids]`).
 
 
@@ -2,27 +2,24 @@
 # pylint: disable=unused-import
 
 import sys
+from .constants import get_panel_constants, get_layer_dict
 from .class_sc import Omics
 from .class_crispr import Crispr
 from .class_spatial import Spatial
 from . import utils as tl
 from . import processing as pp
 from . import analysis as ax
 from . import visualization as pl
-from . import class_crispr, class_sc, class_spatial
+from . import class_crispr, class_sc, class_spatial, constants
 
 mod = ["ax", "pl", "pp", "tl", "Omics", "Crispr", "Spatial"]
 sys.modules.update({f"{__name__}.{m}": globals()[m] for m in mod})
 
-
-def get_panel_constants(**kwargs):
-    from .constants import get_panel_constants as gpc  # noqa: E402
-    return gpc(**kwargs)
-
+SPATIAL_KEY = "spatial"
 
 __all__ = [
     "ax", "pl", "pp", "tl", "Omics", "Crispr", "Spatial",
     "processing", "analysis", "visualization", "utils",
-    "class_sc", "class_crispr", "class_spatial", "defaults",
-    "get_panel_constants"
+    "class_sc", "class_crispr", "class_spatial", "constants",
+    "get_panel_constants", "get_layer_dict", "SPATIAL_KEY"
 ]
@@ -5,7 +5,8 @@
 from .perturbations import (
     perform_mixscape, perform_augur, perform_differential_prioritization,
     compute_distance, perform_gsea, perform_gsea_pt,
-    perform_pathway_interference, perform_dea, calculate_dea_deseq2)
+    perform_pathway_interference, perform_dea, calculate_dea_deseq2,
+    calculate_deg_covariates)
 from .clustering import (cluster, find_marker_genes, make_marker_genes_df,
                          perform_celltypist, annotate_by_markers,
                          print_marker_info)
@@ -22,5 +23,6 @@
     "find_marker_genes", "make_marker_genes_df", "print_marker_info",
     "perform_celltypist", "annotate_by_markers", "analyze_composition",
     "analyze_receptor_ligand", "analyze_causal_network",
-    "classify_gex_cells", "classify_coex_cells", "classify_tx"
+    "classify_gex_cells", "classify_coex_cells", "classify_tx",
+    "calculate_deg_covariates"
 ]
@@ -10,8 +10,9 @@
 from warnings import warn
 import os
 import re
+import traceback
+import seaborn as sb
 from copy import deepcopy
-import seaborn as sns
 import celltypist
 from anndata import AnnData
 import scanpy as sc
@@ -92,6 +93,13 @@ def cluster(adata, layer=None, method_cluster="leiden", key_added=None,
             ann = cr.tl.merge_pca_subset(ann, ann_use, retain_cols=False)
         else:  # if used full gene set
             ann = ann_use
+        try:
+            sc.pl.pca_variance_ratio(ann, n_pcs=kws_pca[
+                "n_comps"] if "n_comps" in kws_pca and isinstance(
+                    kws_pca["n_comps"], (int, float)) else 50, log=True)
+        except Exception:
+            traceback.print_exc()
+            warn("\nPlotting PCA variance ratio failed!")
 
     # Neighborhood Graph
     print(f"\n\n<<< COMPUTING NEIGHBORHOOD GRAPH >>>\n{kws_neighbors}\n")
@@ -124,13 +132,17 @@ def cluster(adata, layer=None, method_cluster="leiden", key_added=None,
 
 
 def find_marker_genes(adata, assay=None, col_cell_type="leiden", n_genes=5,
-                      key_reference="rest", layer="log1p", p_threshold=None,
+                      key_reference="rest", layer="log1p",
+                      p_threshold=None, lfc_threshold=None,
                       col_gene_symbols=None, method="wilcoxon", kws_plot=True,
-                      use_raw=False, key_added="rank_genes_groups", **kwargs):
+                      use_raw=False, key_added="rank_genes_groups",
+                      pts=True, **kwargs):
     """Find cluster gene markers."""
     figs = {}
     if kws_plot is True:
         kws_plot = {}
+    if lfc_threshold is None:
+        lfc_threshold = [None, None]
     if assay:
         adata = adata[assay]
     if layer:
@@ -139,14 +151,16 @@ def find_marker_genes(adata, assay=None, col_cell_type="leiden", n_genes=5,
         col_cell_type].astype("category")
     sc.tl.rank_genes_groups(
         adata, col_cell_type, method=method, reference=key_reference,
-        key_added=key_added, use_raw=use_raw, **kwargs)  # rank
+        key_added=key_added, use_raw=use_raw, pts=pts, **kwargs)  # rank
     if isinstance(kws_plot, dict):
         figs["marker_rankings"] = cr.pl.plot_markers(
-            adata, n_genes=n_genes, key_added=key_added, use_raw=use_raw,
-            key_reference=key_reference, **{"col_wrap": 3, **kws_plot})
+            adata, key_added=key_added, use_raw=use_raw,
+            key_reference=key_reference, **{
+                "col_wrap": 3, "n_genes": min([n_genes, 5]) if isinstance(
+                    n_genes, (int, float)) else 5, **kws_plot})
     ranks = make_marker_genes_df(
         adata, col_cell_type, key_added=key_added, p_threshold=p_threshold,
-        log2fc_min=None, log2fc_max=None, gene_symbols=col_gene_symbols)
+        lfc_threshold=lfc_threshold, gene_symbols=col_gene_symbols)
     return ranks, figs
 
 
@@ -156,8 +170,14 @@ def make_marker_genes_df(adata, col_cell_type, key_added="leiden",
     ranks = sc.get.rank_genes_groups_df(adata, None, key=key_added, **kwargs)
     ranks = ranks.rename({"group": col_cell_type}, axis=1).set_index(
         [col_cell_type, "names"])  # format ranking dataframe
-    if lfc_threshold:
-        ranks = ranks[ranks.logfoldchanges >= lfc_threshold]  # filter ~ LFC
+    if lfc_threshold is None:
+        lfc_threshold = [None, None]
+    if isinstance(lfc_threshold, int):
+        lfc_threshold = [lfc_threshold, None]  # assume if 1 # given, maximum
+    if lfc_threshold[0] is not None:
+        ranks = ranks[ranks.logfoldchanges >= lfc_threshold[0]]  # minimum LFC
+    if lfc_threshold[1]:
+        ranks = ranks[ranks.logfoldchanges <= lfc_threshold[1]]  # maximum LFC
     if p_threshold:
         ranks = ranks[ranks.pvals_adj <= p_threshold]  # filter ~ LFC
     return ranks
@@ -259,21 +279,17 @@ def perform_celltypist(adata, model, col_cell_type=None,
                              color=list(ccts), wspace=space)  # all 1 plot
 
     # Plot Confidence Scores
-    if "majority_voting" in ann.obs:  # if did over-clustering/majority voting
-        conf = ann.obs[["majority_voting", "predicted_labels", "conf_score"
-                        ]].set_index("conf_score").stack().rename_axis(
-                            ["Confidence Score", "Annotation"]).to_frame(
-                                "Label").reset_index()  # scores ~ label
-
-        aspect = int(len(conf[conf.Annotation == "predicted_labels"
-                              ].Label.unique()) / 15)  # aspect ratio
-        figs["confidence"] = sns.catplot(
-            data=conf, y="Confidence Score", row="Annotation", height=40,
-            aspect=aspect, x="Label", hue="Label", kind="violin")  # plot
-        figs["confidence"].figure.suptitle("CellTypist Confidence Scores")
-        for a in figs["confidence"].axes.flat:
-            _ = a.set_xticklabels(a.get_xticklabels(), rotation=90)
-        figs["confidence"].fig.show()
+    if "majority_voting" in ann.obs and (
+            "conf_score" in ann.obs.columns):  # if did majority voting
+        try:
+            figs["confidence"] = sb.displot(
+                ann.obs, x="conf_score", hue="majority_voting",
+                kind="kde", fill=True, cut=0)
+            print(f"\n\n\n{'=' * 80}\nConfidence Scores\n{'=' * 80}",
+                  "\n\n", ann.obs.groupby("majority_voting").apply(
+                      lambda x: x["conf_score"].describe()).round(2), "\n\n")
+        except Exception:
+            pass
     return ann, res, figs
 
 
@@ -296,6 +312,8 @@ def annotate_by_markers(adata, data_assignment, method="overlap_count",
     if isinstance(data_assignment, (str, os.PathLike)):
         data_assignment = pd.read_excel(data_assignment, index_col=0)
     assign = data_assignment.copy()
+    if assign.shape[1] == 1:
+        col_assignment = assign.columns[0]
     if renaming is True:
         sources = assign[col_assignment].unique()
         rename = dict(zip(sources, [" ".join([i.capitalize() if i and i[
 
@@ -26,6 +26,8 @@ def analyze_receptor_ligand(adata, method="liana", n_jobs=4, seed=1618,
     """Perform receptor-ligand analysis."""
     if copy is True:
         adata = adata.copy()
+    if figsize is None:
+        figsize = (20, 20)
     res_keys = ["squidpy", "liana_res", "lr_dea_res", "dea_results", "dea_df"]
     figs, res = {}, dict(zip(res_keys, [None] * len(res_keys)))  # for output
     kws_plot = {} if kws_plot is None else {**kws_plot}
@@ -72,7 +74,7 @@ def analyze_receptor_ligand(adata, method="liana", n_jobs=4, seed=1618,
             print(traceback.format_exc(), "Liana + DEA failed!\n\n",)
 
     # Plotting
-    if plot is True:
+    if plot is True and method != "squidpy":
         try:
             figs["lr"] = plot_receptor_ligand(
                 adata=adata, lr_dea_res=res["lr_dea_res"], **kws)  # plots
 
@@ -8,14 +8,14 @@
 """
 
 import pertpy as pt
-from pertpy.plot._coda import CodaPlot as coda_plot
+# from pertpy.plot._coda import CodaPlot as pt.pl.coda.
 import arviz as az
 import traceback
 import warnings
 import matplotlib.pyplot as plt
 
 
-def analyze_composition(adata, col_condition,  col_cell_type, assay=None,
+def analyze_composition(adata, col_condition, col_cell_type, assay=None,
                         layer=None, copy=False, generate_sample_level=True,
                         plot=True, reference_cell_type="automatic",
                         key_reference_cell_type="automatic",
@@ -64,21 +64,21 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
                    reference_cell_type="automatic",
                    analysis_type="cell_level",
                    generate_sample_level=True, sample_identifier="batch",
-                   covariates=None, est_fdr=0.05, plot=True, out_file=None):
+                   covariates=None, est_fdr=0.05, out_file=None, plot=True,
+                   plot_zero_covariate=True, plot_zero_cell_type=True):
     """Perform SCCoda compositional analysis."""
     figs, results = {}, {}
+    adata = adata.copy()
     if generate_sample_level is True and sample_identifier is None:
         warnings.warn(
             "Can't generate sample level if `sample_identifier`=None."
             " Setting `generate_sample_level` to False.")
         generate_sample_level = False
     mod, mod_o = "coda", assay if assay else "rna"
-    # covariate_obs = [covariates] + col_condition if isinstance(
-    #     covariates, str) else covariates + [
-    #         col_condition] if covariates else [col_condition]
+    if isinstance(covariates, str):
+        covariates = [covariates]
     covariate_obs = [col_condition] + covariates if covariates else [
         col_condition]
-    adata = adata.copy()
     adata = adata[~adata.obs[col_condition].isnull()].copy()
     adata.obs.index = [adata.obs.index.values[i] + "_" + str(
         adata.obs.iloc[i][col_condition]) + "_" + str(adata.obs.iloc[i][
@@ -96,11 +96,11 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
     #     [col_cell_type, col_condition]])
     if plot is True:
         try:
-            figs["barplot"] = coda_plot.boxplots(
+            figs["barplot"] = model.plot_boxplots(
                 scodata, modality_key=mod,
                 feature_name=col_condition,
-                figsize=(12, 5), add_dots=True,
-                args_swarmplot={"palette": ["red"]})
+                # args_swarmplot={"palette": ["red"]},
+                figsize=(12, 5), add_dots=True)
             plt.show()
         except Exception as err:
             print(f"{err}\n\nFailed to plot boxplots. Ensure PyQt5 is "
@@ -111,14 +111,14 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
     if plot is True:
         try:
             figs[
-                "find_reference"] = coda_plot.rel_abundance_dispersion_plot(
+                "find_reference"] = model.plot_rel_abundance_dispersion_plot(
                     scodata, modality_key=mod,
                     abundant_threshold=0.9)  # helps choose rference cell type
         except Exception as err:
             print(f"{err}\n\nFailed to plot reference cell type.\n\n")
             figs["find_reference"] = err
         try:
-            figs["proportions"] = coda_plot.boxplots(
+            figs["proportions"] = model.plot_boxplots(
                 scodata, modality_key=mod,
                 feature_name=col_condition, add_dots=True)
         except Exception as err:
@@ -150,7 +150,7 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
             scodata.write_h5mu(f"{out_file}_{est_fdr}_fdr")
     if plot is True:
         try:
-            figs["proportions_stacked"] = coda_plot.stacked_barplot(
+            figs["proportions_stacked"] = model.plot_stacked_barplot(
                 scodata, modality_key=mod, feature_name=col_condition)
             plt.show()
         except Exception as err:
@@ -169,12 +169,10 @@ def perform_sccoda(adata, col_condition, col_cell_type, assay=None,
         plt.tight_layout()
         plt.show()
         try:
-            pzc = any((scodata.varm[f"effect_df_{x}"]["Final Parameter"].any(
-                ) for x in scodata.uns["scCODA_params"]["covariate_names"]
-                       )) is False  # don't plot 0 effects if any non-0
-            figs["effects"] = coda_plot.effects_barplot(
+            figs["effects"] = model.plot_effects_barplot(
                 scodata, modality_key=mod, parameter="Final Parameter",
-                plot_zero_cell_type=pzc)
+                plot_zero_cell_type=plot_zero_cell_type,
+                plot_zero_covariate=plot_zero_covariate)
         except Exception as err:
             print(traceback.format_exc(), "\n\nFailed to plot effects.\n\n")
             figs["effects"] = err
@@ -239,12 +237,12 @@ def perform_tasccoda(adata, col_condition, col_cell_type,
         sample_identifier=col_sample_id,
         covariate_obs=covariates + [col_condition],
         levels_orig=col_list_lineage_tree, add_level_name=True)  # load model
-    coda_plot.draw_tree(ts_data["coda"])
+    model.plot_draw_tree(ts_data["coda"])
     ts_data.mod["coda_subset"] = ts_data["coda"][ts_data["coda"].obs[
         col_condition].isin([key_control, key_treatment])]  # subset if needed
     if plot is True:
-        figs["tree"] = coda_plot.draw_tree(ts_data["coda"])
-        figs["descriptives_abundance"] = coda_plot.boxplots(
+        figs["tree"] = model.plot_draw_tree(ts_data["coda"])
+        figs["descriptives_abundance"] = model.plot_boxplots(
             ts_data, modality_key="coda_subset", feature_name=col_condition,
             figsize=(20, 8))
         plt.show()
@@ -260,12 +258,12 @@ def perform_tasccoda(adata, col_condition, col_cell_type,
     results["credible_effects"] = model.credible_effects(
         ts_data, modality_key="coda_subset")  # credible effects
     if plot:
-        figs["credible_effects"] = coda_plot.draw_effects(
+        figs["credible_effects"] = model.plot_draw_effects(
             ts_data, modality_key="coda_subset",
             tree=ts_data["coda_subset"].uns["tree"],
             covariate=f"{col_condition}[T.{key_treatment}]"
             )  # effects as sizes/colors of nodes on the lineage tree
-        figs["credible_effects_dual"] = coda_plot.draw_effects(
+        figs["credible_effects_dual"] = model.plot_draw_effects(
             ts_data, modality_key="coda_subset",
             tree=ts_data["coda_subset"].uns["tree"],
             covariate=f"{col_condition}[T.{key_treatment}]",