Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
[link-tests]: https://github.com/TarikExner/CytoNormPy/actions/workflows/pytest.yml
[badge-docs]: https://img.shields.io/readthedocs/cytonormpy

A python port for the CytoNorm R library.
A python port for the CytoNorm (2.0) R library.

# Installation

Expand Down
62 changes: 33 additions & 29 deletions cytonormpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,61 @@
import sys
from ._cytonorm import CytoNorm, example_cytonorm, example_anndata
from ._dataset import FCSFile
from ._clustering import (FlowSOM,
KMeans,
MeanShift,
AffinityPropagation)
from ._transformation import (AsinhTransformer,
HyperLogTransformer,
LogTransformer,
LogicleTransformer,
Transformer)
from ._plotting import Plotter
from ._clustering import FlowSOM, KMeans, MeanShift, AffinityPropagation
from ._transformation import (
AsinhTransformer,
HyperLogTransformer,
LogTransformer,
LogicleTransformer,
Transformer,
)
from ._cytonorm import read_model
from ._evaluation import (mad_from_fcs,
mad_comparison_from_fcs,
mad_from_anndata,
mad_comparison_from_anndata,
emd_from_fcs,
emd_comparison_from_fcs,
emd_from_anndata,
emd_comparison_from_anndata)

from ._evaluation import (
mad_from_fcs,
mad_comparison_from_fcs,
mad_from_anndata,
mad_comparison_from_anndata,
emd_from_fcs,
emd_comparison_from_fcs,
emd_from_anndata,
emd_comparison_from_anndata,
)
from . import _plotting as pl
from ._plotting import scatter, histogram, emd, mad, cv_heatmap, splineplot, Plotter

sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["pl"]})

__all__ = [
"CytoNorm",

"FlowSOM",
"KMeans",
"MeanShift",
"AffinityPropagation",

"example_anndata",
"example_cytonorm",

"Transformer",
"AsinhTransformer",
"HyperLogTransformer",
"LogTransformer",
"LogicleTransformer",

"Plotter",
"FCSFile",

"read_model",

"mad_from_fcs",
"mad_comparison_from_fcs",
"mad_from_anndata",
"mad_comparison_from_anndata",

"emd_from_fcs",
"emd_comparison_from_fcs",
"emd_from_anndata",
"emd_comparison_from_anndata"
"emd_comparison_from_anndata",
"pl",
"scatter",
"histogram",
"emd",
"mad",
"cv_heatmap",
"splineplot",
"Plotter",
]

__version__ = '0.0.3'
__version__ = "1.0.2"
12 changes: 2 additions & 10 deletions cytonormpy/_clustering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
from ._cluster_algorithms import (FlowSOM,
KMeans,
MeanShift,
AffinityPropagation)
from ._cluster_algorithms import FlowSOM, KMeans, MeanShift, AffinityPropagation

__all__ = [
"FlowSOM",
"KMeans",
"MeanShift",
"AffinityPropagation"
]
__all__ = ["FlowSOM", "KMeans", "MeanShift", "AffinityPropagation"]
157 changes: 113 additions & 44 deletions cytonormpy/_clustering/_cluster_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import numpy as np
import warnings

from typing import Optional

from abc import abstractmethod
from flowsom.models import FlowSOMEstimator
from sklearn.base import clone
from sklearn.cluster import KMeans as knnclassifier
from sklearn.cluster import AffinityPropagation as affinitypropagationclassifier
from sklearn.cluster import MeanShift as meanshiftclassifier

from abc import abstractmethod


class ClusterBase:
"""\
Expand All @@ -20,15 +19,15 @@ def __init__(self):
pass

@abstractmethod
def train(self,
X: np.ndarray,
**kwargs) -> None:
def train(self, X: np.ndarray, **kwargs) -> None:
pass

@abstractmethod
def calculate_clusters(self,
X: np.ndarray,
**kwargs) -> np.ndarray:
def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray:
pass

@abstractmethod
def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]) -> np.ndarray:
pass


Expand All @@ -48,8 +47,7 @@ class FlowSOM(ClusterBase):

"""

def __init__(self,
**kwargs):
def __init__(self, **kwargs):
super().__init__()
if not kwargs:
kwargs = {}
Expand All @@ -59,9 +57,7 @@ def __init__(self,
kwargs["seed"] = 187
self.est = FlowSOMEstimator(**kwargs)

def train(self,
X: np.ndarray,
**kwargs):
def train(self, X: np.ndarray, **kwargs):
"""\
Trains the SOM. Calls :class:`flowsom.FlowSOMEstimator.fit()` internally.

Expand All @@ -80,9 +76,7 @@ def train(self,
self.est.fit(X, **kwargs)
return

def calculate_clusters(self,
X: np.ndarray,
**kwargs) -> np.ndarray:
def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""\
Calculates the clusters. Calls :class:`flowsom.FlowSOMEstimator.predict()` internally.

Expand All @@ -100,6 +94,35 @@ def calculate_clusters(self,
"""
return self.est.predict(X, **kwargs)

def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]):
"""\
Calculates the clusters for a given metacluster number. The estimator
will calculate a SOM once, then fit the ConsensusCluster class given
the n_metaclusters that are provided.

Parameters
----------
X
The data that are supposed to be predicted.
n_metaclusters
A list of integers specifying the number of metaclusters per test.

Returns
-------
Cluster annotations stored in a :class:`np.ndarray`, where the n_metacluster
denotes the column and the rows are the individual cells.

"""
self.est.cluster_model.fit(X)
y_clusters = self.est.cluster_model.predict(X)
X_codes = self.est.cluster_model.codes
assignments = np.empty((X.shape[0], len(n_clusters)), dtype=np.int16)
for j, n_mc in enumerate(n_clusters):
self.est.set_n_clusters(n_mc)
y_codes = self.est.metacluster_model.fit_predict(X_codes)
assignments[:, j] = y_codes[y_clusters]
return assignments


class MeanShift(ClusterBase):
"""\
Expand All @@ -117,16 +140,11 @@ class MeanShift(ClusterBase):

"""

def __init__(self,
**kwargs):
def __init__(self, **kwargs):
super().__init__()
if "random_state" not in kwargs:
kwargs["random_state"] = 187
self.est = meanshiftclassifier(**kwargs)

def train(self,
X: np.ndarray,
**kwargs):
def train(self, X: np.ndarray, **kwargs):
"""\
Trains the classifier. Calls :class:`sklearn.cluster.MeanShift.fit()` internally.

Expand All @@ -145,9 +163,7 @@ def train(self,
self.est.fit(X, **kwargs)
return

def calculate_clusters(self,
X: np.ndarray,
**kwargs) -> np.ndarray:
def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""\
Calculates the clusters. Calls :class:`sklearn.cluster.MeanShift.predict()` internally.

Expand All @@ -165,6 +181,29 @@ def calculate_clusters(self,
"""
return self.est.predict(X, **kwargs)

def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]):
"""
MeanShift ignores n_clusters: warns if len(n_clusters)>1,
then returns the same assignment in each column.
"""
if len(n_clusters) > 1:
warnings.warn(
"MeanShift: ignoring requested n_clusters list, "
"producing identical assignments for each entry.",
UserWarning,
stacklevel=2,
)

n_samples = X.shape[0]
out = np.empty((n_samples, len(n_clusters)), dtype=int)

for j in range(len(n_clusters)):
est = clone(self.est)
est.fit(X)
out[:, j] = est.predict(X)

return out


class KMeans(ClusterBase):
"""\
Expand All @@ -182,16 +221,13 @@ class KMeans(ClusterBase):

"""

def __init__(self,
**kwargs):
def __init__(self, **kwargs):
super().__init__()
if "random_state" not in kwargs:
kwargs["random_state"] = 187
self.est = knnclassifier(**kwargs)

def train(self,
X: np.ndarray,
**kwargs):
def train(self, X: np.ndarray, **kwargs):
"""\
Trains the classifier. Calls :class:`sklearn.cluster.KMeans.fit()` internally.

Expand All @@ -210,9 +246,7 @@ def train(self,
self.est.fit(X, **kwargs)
return

def calculate_clusters(self,
X: np.ndarray,
**kwargs) -> np.ndarray:
def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""\
Calculates the clusters. Calls :class:`sklearn.cluster.KMeans.predict()` internally.

Expand All @@ -230,6 +264,23 @@ def calculate_clusters(self,
"""
return self.est.predict(X, **kwargs)

def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]):
"""
Returns an array of shape (n_samples, len(n_clusters)),
where each column i is the cluster‐assignment vector
for KMeans(n_clusters=n_clusters[i]).
"""
n_samples = X.shape[0]
out = np.empty((n_samples, len(n_clusters)), dtype=int)

for j, k in enumerate(n_clusters):
est = clone(self.est)
est.set_params(n_clusters=k)
est.fit(X)
out[:, j] = est.predict(X)

return out


class AffinityPropagation(ClusterBase):
"""\
Expand All @@ -247,16 +298,13 @@ class AffinityPropagation(ClusterBase):

"""

def __init__(self,
**kwargs):
def __init__(self, **kwargs):
super().__init__()
if "random_state" not in kwargs:
kwargs["random_state"] = 187
self.est = affinitypropagationclassifier(**kwargs)

def train(self,
X: np.ndarray,
**kwargs):
def train(self, X: np.ndarray, **kwargs):
"""\
Trains the classifier. Calls :class:`sklearn.cluster.AffinityPropagation.fit()` internally.

Expand All @@ -275,9 +323,7 @@ def train(self,
self.est.fit(X, **kwargs)
return

def calculate_clusters(self,
X: np.ndarray,
**kwargs) -> np.ndarray:
def calculate_clusters(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""\
Calculates the clusters. Calls :class:`sklearn.cluster.AffinityPropagation.predict()` internally.

Expand All @@ -294,3 +340,26 @@ def calculate_clusters(self,

"""
return self.est.predict(X, **kwargs)

def calculate_clusters_multiple(self, X: np.ndarray, n_clusters: list[int]):
"""
AffinityPropagation ignores n_clusters: warns if len(n_clusters)>1,
then returns the same assignment for each entry.
"""
if len(n_clusters) > 1:
warnings.warn(
"AffinityPropagation: ignoring requested n_clusters list, "
"producing identical assignments for each entry.",
UserWarning,
stacklevel=2,
)

n_samples = X.shape[0]
out = np.empty((n_samples, len(n_clusters)), dtype=int)

for j in range(len(n_clusters)):
est = clone(self.est)
est.fit(X)
out[:, j] = est.predict(X)

return out
Loading